import torch import gradio as gr import torchaudio from transformers import AutoModel import spaces checkpoint_path = "./" model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True) @spaces.GPU() def restore_audio(input_audio): # Load the audio file waveform, sample_rate = torchaudio.load(input_audio) # Calculate the duration of the audio (in seconds) duration = waveform.shape[1] / sample_rate # Output file path output_path = "restored_output.wav" if duration > 10: model(input_audio, output_path, short=False) else: model(input_audio, output_path) # short=True by default return output_path with gr.Blocks() as demo: gr.Markdown("# 🔊 Voice Restoration with Transformer-based Model") gr.Markdown( """ Upload a degraded audio file or select an example, and the space will restore it using the **VoiceRestore** model! Based on this [repo](https://github.com/skirdey/voicerestore) by [@Stan Kirdey](https://github.com/skirdey), and the HF Transformers 🤗 [Model](https://huggingface.co/jadechoghari/VoiceRestore) by [@jadechoghari](https://github.com/jadechoghari). The model returns optimized results for audio less than 10 seconds, however, it supports unlimited duration! """ ) with gr.Row(): with gr.Column(): gr.Markdown("### 🎧 Select an Example or Upload Your Audio:") input_audio = gr.Audio(label="Upload Degraded Audio", type="filepath") gr.Examples( examples=["example_input.wav", "example_16khz.wav", "example-distort-16khz.wav", "example-full-degrad.wav", "example-reverb-16khz.wav"], inputs=input_audio, label="Sample Degraded Audios" ), cache_examples="lazy" with gr.Column(): gr.Markdown("### 🎶 Restored Audio Output:") output_audio = gr.Audio(label="Restored Audio", type="filepath") with gr.Row(): restore_btn = gr.Button("✨ Restore Audio") # Connect the button to the function restore_btn.click(restore_audio, inputs=input_audio, outputs=output_audio) # Launch the demo demo.launch(debug=True)