import gradio as gr from transformers import pipeline from gradio_client import Client, file import json language_classifier = Client("adrien-alloreview/speechbrain-lang-id-voxlingua107-ecapa") transcriber = Client("tensorlake/audio-extractors") emotion_detector = pipeline( "audio-classification", model="HowMannyMore/wav2vec2-lg-xlsr-ur-speech-emotion-recognition", ) model_name_rus = "IlyaGusev/rubertconv_toxic_clf" toxic_detector = pipeline( "text-classification", model=model_name_rus, tokenizer=model_name_rus, framework="pt", max_length=512, truncation=True, ) def detect_language(file_path): try: result = language_classifier.predict(param_0=file(file_path), api_name="/predict") language_result = result["label"].split(": ")[1] if language_result.lower() in ["russian", "belarussian", "ukrainian"]: selected_language = "russian" else: selected_language = "kazakh" return selected_language except Exception as e: print(f"Language detection failed: {e}") return None def request_gradio(file_path, language): try: result = transcriber.predict( audio_filepath=file(file_path), task="transcribe", batch_size=24, chunk_length_s=30, sampling_rate=16000, language=language, num_speakers=2, min_speakers=2, max_speakers=2, assisted=False, api_name="/transcribe", ) return result except Exception as e: print(f"Transcription failed: {e}") return f"Transcription failed: {e}" def detect_emotion(audio): try: res = emotion_detector(audio) emotion_with_max_score = res[0]["label"] return emotion_with_max_score except Exception as e: return f"Emotion detection failed: {e}" def detect_toxic_local(text_whisper): try: res = toxic_detector([text_whisper])[0]["label"] if res == "toxic": return True elif res == "neutral": return False else: return None except Exception as e: print(f"Toxicity detection failed: {e}") return None def assessment(file_path): language = detect_language(file_path) or "unknown" result_text = request_gradio(file_path, language) or "" result_emotion = detect_emotion(file_path) or "unknown" result_toxic = detect_toxic_local(result_text) or False return json.dumps({"language": language, "transcription": result_text, "emotion": result_emotion, "toxic": result_toxic,}) gradio_app = gr.Interface( fn=assessment, inputs=gr.Audio(sources=["upload"], type="filepath"), outputs="json" ) gradio_app.launch()