import gradio as gr from transformers import pipeline from gradio_client import Client, file language_classifier = Client("adrien-alloreview/speechbrain-lang-id-voxlingua107-ecapa") transcriber = Client("tensorlake/audio-extractors") emotion_detector = pipeline( "audio-classification", model="HowMannyMore/wav2vec2-lg-xlsr-ur-speech-emotion-recognition", ) model_name_rus = "IlyaGusev/rubertconv_toxic_clf" toxic_detector = pipeline( "text-classification", model=model_name_rus, tokenizer=model_name_rus, framework="pt", max_length=512, truncation=True, device=0, ) def detect_language(file_path): result = language_classifier.predict(param_0=file(file_path), api_name="/predict") language_result = result["label"].split(": ")[1] if language_result.lower() in ["russian", "belarussian", "ukrainian"]: selected_language = "russian" else: selected_language = "kazakh" return selected_language def request_gradio(file_path, language): try: result = transcriber.predict( audio_filepath=file(file_path), task="transcribe", batch_size=24, chunk_length_s=30, sampling_rate=16000, language=language, num_speakers=2, min_speakers=2, max_speakers=2, assisted=False, api_name="/transcribe", ) return result except Exception as e: return None def detect_emotion(audio): res = emotion_detector(audio) emotion_with_max_score = res[0]["label"] return emotion_with_max_score def detect_toxic_local(text_whisper): res = toxic_detector([text_whisper])[0]["label"] if res == "toxic": return True if res == "neutral": return False else: return None def assessment(file_path): language = detect_language(file_path) result_text = request_gradio(file_path, language) result_emotion = detect_emotion(result_text) result_toxic = detect_toxic_local(result_text) return {"emotion": result_emotion, "toxic": result_toxic} gradio_app = gr.Interface( fn=assessment, inputs=gr.Audio(sources=["upload"], type="filepath"), outputs="json" ) gradio_app.launch()