litagin commited on
Commit
f4fa6cb
β€’
1 Parent(s): d4d32c6

Use librosa

Browse files
Files changed (2) hide show
  1. app.py +7 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import os
2
  import time
 
3
 
4
  import gradio as gr
 
5
  import spaces
6
  import torch
7
  from loguru import logger
8
- from pydub import AudioSegment
9
  from transformers import pipeline
10
 
11
  is_hf = os.getenv("SYSTEM") == "spaces"
@@ -45,14 +46,16 @@ logger.success("Pipelines initialized!")
45
 
46
  @spaces.GPU
47
  def transcribe_common(audio: str, model: str) -> tuple[str, float]:
48
- logger.info(f"Transcribing {audio} with {model}")
 
 
49
  # Get duration of audio
50
- duration = AudioSegment.from_file(audio).duration_seconds
51
  logger.info(f"Duration: {duration:.2f}s")
52
  if duration > 15:
53
  return "Audio too long, limit is 15 seconds", 0
54
  start_time = time.time()
55
- result = pipe_dict[model](audio, generate_kwargs=generate_kwargs)["text"]
56
  end_time = time.time()
57
  logger.success(f"Transcribed {audio} with {model} in {end_time - start_time:.2f}s")
58
  logger.success(f"Result:\n{result}")
 
1
  import os
2
  import time
3
+ from pathlib import Path
4
 
5
  import gradio as gr
6
+ import librosa
7
  import spaces
8
  import torch
9
  from loguru import logger
 
10
  from transformers import pipeline
11
 
12
  is_hf = os.getenv("SYSTEM") == "spaces"
 
46
 
47
  @spaces.GPU
48
  def transcribe_common(audio: str, model: str) -> tuple[str, float]:
49
+ logger.info(f"Transcribing {Path(audio).name} with {model}")
50
+ # Read and resample audio to 16kHz
51
+ y, sr = librosa.load(audio, mono=True, sr=16000)
52
  # Get duration of audio
53
+ duration = librosa.get_duration(y=y, sr=sr)
54
  logger.info(f"Duration: {duration:.2f}s")
55
  if duration > 15:
56
  return "Audio too long, limit is 15 seconds", 0
57
  start_time = time.time()
58
+ result = pipe_dict[model](y, generate_kwargs=generate_kwargs)["text"]
59
  end_time = time.time()
60
  logger.success(f"Transcribed {audio} with {model} in {end_time - start_time:.2f}s")
61
  logger.success(f"Result:\n{result}")
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  gradio
 
2
  loguru
3
  numpy<2
4
  spaces
5
  torch
6
- transformers
 
1
  gradio
2
+ librosa
3
  loguru
4
  numpy<2
5
  spaces
6
  torch
7
+ transformers