vihangp commited on
Commit
ab40649
1 Parent(s): 95b03dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -15,7 +15,7 @@ def _grab_best_device(use_gpu=True):
15
  device = _grab_best_device()
16
 
17
  default_model_per_language = {
18
- "marathi": "facebook/mms-tts-mar"
19
  }
20
 
21
  models_per_language = {
@@ -58,26 +58,26 @@ def generate_audio(text, model_id, language):
58
  out = []
59
  # first generate original model result
60
  output = pipe_dict["original_pipe"](text)
61
- output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Non finetuned model prediction {default_model_per_language[language]}", show_label=True,
62
  visible=True)
63
  out.append(output)
64
 
65
 
66
- if num_speakers>1:
67
- for i in range(min(num_speakers, max_speakers - 1)):
68
- forward_params = {"speaker_id": i}
69
- output = pipe_dict["pipe"](text, forward_params=forward_params)
70
 
71
- output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label=f"Generated Audio - speaker {i}", show_label=True,
72
- visible=True)
73
- out.append(output)
74
- out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
75
- else:
76
- output = pipe_dict["pipe"](text)
77
- output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label="Generated Audio - Mono speaker", show_label=True,
78
- visible=True)
79
- out.append(output)
80
- out.extend([gr.Audio(visible=False)]*(max_speakers-2))
81
  return out
82
 
83
 
 
15
  device = _grab_best_device()
16
 
17
  default_model_per_language = {
18
+ "marathi": "ylacombe/mms-mar-finetuned-monospeaker"
19
  }
20
 
21
  models_per_language = {
 
58
  out = []
59
  # first generate original model result
60
  output = pipe_dict["original_pipe"](text)
61
+ output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Finetuned model prediction {default_model_per_language[language]}", show_label=True,
62
  visible=True)
63
  out.append(output)
64
 
65
 
66
+ # if num_speakers>1:
67
+ # for i in range(min(num_speakers, max_speakers - 1)):
68
+ # forward_params = {"speaker_id": i}
69
+ # output = pipe_dict["pipe"](text, forward_params=forward_params)
70
 
71
+ # output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label=f"Generated Audio - speaker {i}", show_label=True,
72
+ # visible=True)
73
+ # out.append(output)
74
+ # out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
75
+ # else:
76
+ # output = pipe_dict["pipe"](text)
77
+ # output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label="Generated Audio - Mono speaker", show_label=True,
78
+ # visible=True)
79
+ # out.append(output)
80
+ # out.extend([gr.Audio(visible=False)]*(max_speakers-2))
81
  return out
82
 
83