Spaces:

Lookimi
/

TuberTranscript

Runtime error

App Files Files Community

TuberTranscript / app.py

Lookimi

Update app.py

4bcf60e over 1 year ago

raw

history blame

2.29 kB




	#importing the necessary modules
	import os
	import urllib.request
	import re
	import time
	import gradio as gr

	#Creating a Gradio App Menu
	def transcript_extract():
	#specifying the YouTube channel URL
	channel_url = gr.inputs.Textbox(label="Channel URL")

	#accessing the webpage
	page = urllib.request.urlopen(channel_url)

	#reading the source code
	data = page.read().decode("utf-8")

	#creating a directory to save the transcripts
	os.makedirs('Transcripts',exist_ok=True)

	#finding the transcripts
	transcript_links = re.findall(r'(\/watch\?v=[A-Za-z0-9_.-]*)', str(data))

	#looping through each transcript to download
	for link in transcript_links:
	video_url = 'http://www.youtube.com'+link
	#access the video page
	video_page = urllib.request.urlopen(video_url)
	#read the source code
	video_data = video_page.read().decode("utf-8")
	#find the transcript
	transcript_link = re.findall(r'(\/timedtext_editor\?[A-Za-z0-9_.-]*)', str(video_data))
	#check if there is a transcript available
	if(len(transcript_link) > 0):
	#access the transcript page
	transcript_url ='http://www.youtube.com'+ transcript_link[0]
	transcript_page = urllib.request.urlopen(transcript_url)
	transcript_data = transcript_page.read().decode("utf-8")
	#find the link to the transcript
	transcript_download_link = re.findall(r'(\/api\/timedtext\?[A-Za-z0-9_.-]*)', str(transcript_data))
	#check if the transcript is available for download
	if(len(transcript_download_link) > 0):
	#download the transcript
	file_name = "Transcripts/" + link[9:] + ".xml"
	download_url = 'http://www.youtube.com'+transcript_download_link[0]
	urllib.request.urlretrieve(download_url, file_name)
	print("Downloading transcript for video " + link[9:] + "...")
	time.sleep(3)
	else:
	print("Transcript not available for video " + link[9:])
	else:
	print("Transcript not available for video " + link[9:])

	#launch the gradio
	gr.Interface(fn=transcript_extract, inputs=channel_url, outputs="textbox").launch()