kai-ytb-private-replysend

Running

App Files Files Community

kai-ytb-private-replysend / app.py

seawolf2357

Update app.py

1e3100b verified 5 months ago

raw

history blame

9.25 kB

	import os
	import google_auth_oauthlib.flow
	from google.oauth2.credentials import Credentials
	from googleapiclient.discovery import build
	from google.auth.transport.requests import Request
	import discord
	import logging
	import re
	import asyncio
	import subprocess
	from huggingface_hub import InferenceClient
	from youtube_transcript_api import YouTubeTranscriptApi
	from youtube_transcript_api.formatters import TextFormatter
	from dotenv import load_dotenv

	# 환경 변수 로드
	load_dotenv()

	# JSON_TOKEN.json 파일의 경로
	credentials_path = 'JSON_TOKEN.json'
	token_path = 'token.json'
	auth_code_path = 'auth_code.txt' # 인증 코드를 저장할 파일 경로

	# 로깅 설정
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s:%(message)s', handlers=[logging.StreamHandler()])

	# 인텐트 설정
	intents = discord.Intents.default()
	intents.message_content = True
	intents.messages = True
	intents.guilds = True
	intents.guild_messages = True

	# 추론 API 클라이언트 설정
	hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))

	# OAuth 2.0 인증 설정
	SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]
	creds = None

	def authorize():
	flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
	credentials_path, SCOPES)
	auth_url, _ = flow.authorization_url(prompt='consent')
	print('Please go to this URL and finish the authentication: {}'.format(auth_url))

	print(f'Enter the authorization code in the file {auth_code_path} and press Enter')
	input('Press Enter after saving the authorization code...')

	if not os.path.exists(auth_code_path):
	raise FileNotFoundError(f"'{auth_code_path}' 파일이 존재하지 않습니다.")

	with open(auth_code_path, 'r') as file:
	code = file.read().strip()

	flow.fetch_token(code=code)
	creds = flow.credentials

	with open(token_path, 'w') as token:
	token.write(creds.to_json())

	return creds

	# 기존 토큰을 로드하거나 새로 인증
	if os.path.exists(token_path):
	creds = Credentials.from_authorized_user_file(token_path, SCOPES)
	else:
	creds = authorize()

	# 토큰 갱신 또는 재인증
	if not creds or not creds.valid:
	if creds and creds.expired and creds.refresh_token:
	creds.refresh(Request())
	else:
	creds = authorize()

	# YouTube API 클라이언트 생성
	youtube_service = build('youtube', 'v3', credentials=creds)

	# 디스코드 봇 설정
	SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))

	class MyClient(discord.Client):
	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)
	self.is_processing = False

	async def on_ready(self):
	logging.info(f'{self.user}로 로그인되었습니다!')

	# web.py 파일 실행
	subprocess.Popen(["python", "web.py"])
	logging.info("Web.py server has been started.")

	# 봇이 시작될 때 안내 메시지를 전송
	channel = self.get_channel(SPECIFIC_CHANNEL_ID)
	if channel:
	await channel.send("유튜브 비디오 URL을 입력하면, 자막과 댓글을 기반으로 답글을 작성합니다.")

	async def on_message(self, message):
	if message.author == self.user:
	return
	if not self.is_message_in_specific_channel(message):
	return
	if self.is_processing:
	return
	self.is_processing = True
	try:
	video_id = extract_video_id(message.content)
	if video_id:
	transcript = await get_best_available_transcript(video_id)
	comments = await get_video_comments(video_id)
	if comments and transcript:
	replies = await generate_replies(comments, transcript)
	await create_thread_and_send_replies(message, video_id, comments, replies)
	await post_replies_to_youtube(video_id, comments, replies)
	else:
	await message.channel.send("자막이나 댓글을 가져올 수 없습니다.")
	else:
	await message.channel.send("유효한 유튜브 비디오 URL을 제공해 주세요.")
	finally:
	self.is_processing = False

	def is_message_in_specific_channel(self, message):
	# 메시지가 지정된 채널이거나, 해당 채널의 쓰레드인 경우 True 반환
	return message.channel.id == SPECIFIC_CHANNEL_ID or (
	isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
	)

	def extract_video_id(url):
	"""
	YouTube 비디오 URL에서 비디오 ID를 추출합니다.
	"""
	video_id = None
	youtube_regex = (
	r'(https?://)?(www\.)?'
	'(youtube\|youtu\|youtube-nocookie)\.(com\|be)/'
	'(watch\?v=\|embed/\|v/\|.+\?v=)?([^&=%\?]{11})')

	match = re.match(youtube_regex, url)
	if match:
	video_id = match.group(6)
	logging.debug(f'Extracted video ID: {video_id}')
	return video_id

	async def get_best_available_transcript(video_id):
	"""
	YouTube 비디오의 자막을 가져옵니다.
	"""
	try:
	# 한국어 자막 시도
	transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
	except Exception as e:
	logging.warning(f'Error fetching Korean transcript: {e}')
	try:
	# 한국어 자막이 없으면 영어 자막 시도
	transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
	except Exception as e:
	logging.warning(f'Error fetching English transcript: {e}')
	try:
	# 영어 자막도 없으면 다른 언어 자막 시도
	transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
	transcript = transcripts.find_manually_created_transcript().fetch()
	except Exception as e:
	logging.error(f'Error fetching alternative transcript: {e}')
	return None

	# 자막 포맷팅
	formatter = TextFormatter()
	transcript_text = formatter.format_transcript(transcript)
	logging.debug(f'Fetched transcript: {transcript_text}')
	return transcript_text

	async def get_video_comments(video_id):
	"""
	YouTube 비디오의 댓글을 가져옵니다.
	"""
	comments = []
	response = youtube_service.commentThreads().list(
	part='snippet',
	videoId=video_id,
	maxResults=100 # 최대 100개의 댓글 가져오기
	).execute()

	for item in response.get('items', []):
	comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
	comment_id = item['snippet']['topLevelComment']['id']
	comments.append((comment, comment_id)) # 댓글과 댓글 ID를 함께 저장

	logging.debug(f'Fetched comments: {comments}')
	return comments

	async def generate_replies(comments, transcript):
	"""
	댓글과 자막을 기반으로 LLM 답글을 생성합니다.
	"""
	replies = []
	for comment, _ in comments:
	messages = [
	{"role": "system", "content": f"비디오 자막: {transcript}"},
	{"role": "user", "content": comment}
	]
	loop = asyncio.get_event_loop()
	response = await loop.run_in_executor(None, lambda: hf_client.chat_completion(
	messages, max_tokens=400, temperature=0.7, top_p=0.85)) # max_tokens 값을 조정

	if response.choices and response.choices[0].message:
	reply = response.choices[0].message['content'].strip()
	else:
	reply = "답글을 생성할 수 없습니다."
	replies.append(reply)

	logging.debug(f'Generated replies: {replies}')
	return replies

	async def create_thread_and_send_replies(message, video_id, comments, replies):
	"""
	댓글과 답글을 새로운 쓰레드에 전송합니다.
	"""
	thread = await message.channel.create_thread(name=f"{message.author.name}의 댓글 답글", message=message)
	for (comment, _), reply in zip(comments, replies):
	embed = discord.Embed(description=f"댓글: {comment}\n답글: {reply}")
	await thread.send(embed=embed)

	async def post_replies_to_youtube(video_id, comments, replies):
	"""
	생성된 답글을 YouTube 댓글로 게시합니다.
	"""
	for (comment, comment_id), reply in zip(comments, replies):
	try:
	response = youtube_service.comments().insert(
	part='snippet',
	body={
	'snippet': {
	'parentId': comment_id,
	'textOriginal': reply
	}
	}
	).execute()
	logging.debug(f'Posted reply to comment: {comment_id} with response: {response}')
	except Exception as e:
	logging.error(f'Error posting reply to comment {comment_id}: {e}')
	logging.debug(f'Response: {e.resp} \| Content: {e.content}')

	if __name__ == "__main__":
	discord_client = MyClient(intents=intents)
	discord_client.run(os.getenv('DISCORD_TOKEN'))