Hatman commited on
Commit
864e9d8
1 Parent(s): 90f8c97
Files changed (2) hide show
  1. app.py +14 -8
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,21 +1,24 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
4
- import librosa
 
5
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
6
 
7
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
8
 
9
  model_name = "Hemg/human-emotion-detection"
10
- feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name).to(device)
11
  model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name).to(device)
12
 
13
  def preprocess_audio(audio):
14
- audio_array, sampling_rate = librosa.load(audio, sr=16000) # Load and resample to 16kHz
15
- return {'speech': audio_array, 'sampling_rate': sampling_rate}
16
 
17
  @spaces.GPU
18
  def inference(audio):
 
 
19
  example = preprocess_audio(audio)
20
  inputs = feature_extractor(example['speech'], sampling_rate=16000, return_tensors="pt", padding=True)
21
  inputs = inputs.to(device) # Move inputs to GPU
@@ -23,12 +26,15 @@ def inference(audio):
23
  logits = model(**inputs).logits
24
  predicted_ids = torch.argmax(logits, dim=-1)
25
  return model.config.id2label[predicted_ids.item()], logits, predicted_ids # Move tensors back to CPU for further processing
 
26
 
27
- iface = gr.Interface(fn=predict_sentiment,
28
- inputs=gr.inputs.Audio(source="microphone", type="filepath"),
29
- outputs="text",
 
 
30
  title="Audio Sentiment Analysis",
31
  description="Upload an audio file or record one to analyze sentiment.")
32
 
33
 
34
- iface.launch()
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ #import librosa
5
+ #import numpy as np
6
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
7
 
8
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
9
 
10
  model_name = "Hemg/human-emotion-detection"
11
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
12
  model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name).to(device)
13
 
14
  def preprocess_audio(audio):
15
+ #audio_array, sampling_rate = librosa.load(audio, sr=16000) # Load and resample to 16kHz
16
+ #return {'speech': audio_array, 'sampling_rate': sampling_rate}
17
 
18
  @spaces.GPU
19
  def inference(audio):
20
+ print('hello')
21
+ '''
22
  example = preprocess_audio(audio)
23
  inputs = feature_extractor(example['speech'], sampling_rate=16000, return_tensors="pt", padding=True)
24
  inputs = inputs.to(device) # Move inputs to GPU
 
26
  logits = model(**inputs).logits
27
  predicted_ids = torch.argmax(logits, dim=-1)
28
  return model.config.id2label[predicted_ids.item()], logits, predicted_ids # Move tensors back to CPU for further processing
29
+ '''
30
 
31
+ iface = gr.Interface(fn=inference,
32
+ inputs=gr.Audio(type="filepath"),
33
+ outputs=[gr.Label(label="Predicted Sentiment"),
34
+ gr.JSON(label="Logits"),
35
+ gr.JSON(label="Predicted ID")],
36
  title="Audio Sentiment Analysis",
37
  description="Upload an audio file or record one to analyze sentiment.")
38
 
39
 
40
+ iface.launch(share=True)
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- gradio
2
  torch
3
  transformers
4
- librosa
 
 
1
  torch
2
  transformers
3
+ accelerate