Spaces:

hon9kon9ize
/

yue-tonenet

Sleeping

yue-tonenet / app.py

Update app.py

4a4cc1f verified about 1 month ago

No virus

1.57 kB

	import gradio as gr
	import torch
	import librosa
	from huggingface_hub import hf_hub_download
	from onnxruntime import InferenceSession
	import numpy as np

	# Load the onnx model
	model = hf_hub_download(
	repo_id="hon9kon9ize/yue-tonenet", filename="model.onnx")
	session = InferenceSession(model)


	def extract_feature(filepath, sampling_rate=22050):
	y = librosa.load(filepath, sr=sampling_rate)[0]
	margin = int(sampling_rate * 0.1)
	y = y[margin:-margin]
	y, _ = librosa.effects.trim(
	y, top_db=40, frame_length=1024, hop_length=256)
	duration = len(y) / sampling_rate
	rate = duration * 2 # 0.5s
	y = librosa.effects.time_stretch(y=y, rate=rate)
	mel_feat = librosa.feature.melspectrogram(
	y=y, sr=sampling_rate, n_mels=64, n_fft=2048, hop_length=16, fmin=50, fmax=350)
	y = librosa.power_to_db(S=mel_feat, ref=np.max)

	return y


	def predict(filepath):
	if filepath is None:
	return "Input Error! Please enter one audio!"
	x = extract_feature(filepath)
	x = x.reshape(-1, 1, 690, 64)
	input_name = session.get_inputs()[0].name
	output_name = session.get_outputs()[0].name
	pred = session.run([output_name], {input_name: x})[0]
	pred = torch.softmax(torch.tensor(pred[0]), dim=0)

	return {str(i+1): f"{pred[i].item():.2f}" for i in range(6)}


	inputs = gr.Audio(sources="microphone", type="filepath", label="Recording")

	# Render the app
	app = gr.Interface(fn=predict, inputs=inputs,
	outputs="label", title="Cantonese ToneNet", allow_flagging="never")

	app.launch()