SAML100723GoodVersion

Runtime error

App Files Files Community

SAML100723GoodVersion / app.py

on1onmangoes

Update app.py

d1b9092 verified 7 months ago

raw

history blame contribute delete

5.76 kB

	import gradio as gr
	import torch
	import io
	import base64
	import numpy as np
	import scipy.io.wavfile
	from typing import Text
	from pyannote.audio import Pipeline
	from pyannote.audio import Audio
	from pyannote.core import Segment
	import gradio as gr
	import os



	import yt_dlp as youtube_dl
	from gradio_client import Client
	from transformers.pipelines.audio_utils import ffmpeg_read

	HF_TOKEN = os.environ.get("HF_TOKEN")

	# set up the diarization pipeline
	diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN)
	if torch.cuda.is_available():
	diarization_pipeline.to(torch.device("cuda"))


	import gradio as gr


	def transcribe(audio_path):
	# Run diarization while we wait for Whisper JAX
	diarization = diarization_pipeline(audio_path)
	# Segments = diarization.for_json()["content"]
	# Segments = str(diarization)
	transcription = "SAML Output"
	return diarization



	title = "SAML Speaker Diarization ⚡️ "

	description = """Combine the speed of Whisper JAX with pyannote speaker diarization to transcribe meetings in super fast time. Demo uses Whisper JAX as an [endpoint](https://twitter.com/sanchitgandhi99/status/1656665496463495168) and pyannote speaker diarization running locally. The Whisper JAX endpoint is run asynchronously, meaning speaker diarization is run in parallel to the speech transcription. The diarized timestamps are aligned with the Whisper output to give the final speaker-segmented transcription.
	To duplicate the demo, first accept the pyannote terms of use for the [speaker diarization](https://huggingface.co/pyannote/speaker-diarization) and [segmentation](https://huggingface.co/pyannote/segmentation) models. Then, click [here](https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-diarization?duplicate=true) to duplicate the demo, and enter your Hugging Face access token as a Space secret when prompted.
	"""

	article = """Whisper large-v2 model by OpenAI. Speaker diarization model by pyannote. Whisper JAX backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX [code](https://github.com/sanchit-gandhi/whisper-jax) and Gradio demo by 🤗 Hugging Face."""


	import gradio as gr

	def greet(name):
	return "Hello " + name + "!!"

	iface = gr.Interface(fn=transcribe, inputs=gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"), outputs="text")
	iface.launch(show_api=True)







	# audio_file = gr.Interface(
	# fn=transcribe,
	# inputs=[
	# gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
	# # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
	# # gr.inputs.Checkbox(default=True, label="Group by speaker"),
	# ],
	# outputs=[
	# gr.outputs.Textbox(label="Transcription").style(show_copy_button=True)
	# ],
	# allow_flagging="auto",
	# title=title,
	# description=description,
	# article=article,
	# )

	# demo = gr.Blocks()
	# with demo:
	# gr.TabbedInterface([audio_file], ["Audio File"])

	# demo.launch()





	# def transcribe(audio_path, task="transcribe", group_by_speaker=True, progress=gr.Progress()):

	# # run diarization while we wait for Whisper JAX
	# progress(0, desc="Diarizing...")
	# diarization = diarization_pipeline(audio_path)
	# print(diarization)
	# #segments = diarization.for_json()["content"]
	# #segments = str(diarization)
	# transcription = "SAML Output"
	# return transcription

	# title = "SAML Speaker Diarization ⚡️"

	# description = """Combine the speed of Whisper JAX with pyannote speaker diarization to transcribe meetings in super fast time. Demo uses Whisper JAX as an [endpoint](https://twitter.com/sanchitgandhi99/status/1656665496463495168) and pyannote speaker diarization running locally. The Whisper JAX endpoint is run asynchronously, meaning speaker diarization is run in parallel to the speech transcription. The diarized timestamps are aligned with the Whisper output to give the final speaker-segmented transcription.
	# To duplicate the demo, first accept the pyannote terms of use for the [speaker diarization](https://huggingface.co/pyannote/speaker-diarization) and [segmentation](https://huggingface.co/pyannote/segmentation) models. Then, click [here](https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-diarization?duplicate=true) to duplicate the demo, and enter your Hugging Face access token as a Space secret when prompted.
	# """

	# article = "Whisper large-v2 model by OpenAI. Speaker diarization model by pyannote. Whisper JAX backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX [code](https://github.com/sanchit-gandhi/whisper-jax) and Gradio demo by 🤗 Hugging Face."


	# audio_file = gr.Interface(
	# fn=transcribe,
	# inputs=[
	# gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
	# gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
	# gr.inputs.Checkbox(default=True, label="Group by speaker"),
	# ],
	# outputs=[
	# gr.outputs.Textbox(label="Transcription").style(show_copy_button=True)
	# ],
	# allow_flagging="never",
	# title=title,
	# description=description,
	# article=article,
	# )

	# demo = gr.Blocks()
	# with demo:
	# gr.TabbedInterface([ audio_file], [ "Audio File"])

	# demo.queue(max_size=10)
	# demo.launch(show_api=True)


	# # def greet(name):
	# # return "Hello " + name + "!!"

	# # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
	# # iface.launch()