seawolf2357's picture
Update app.py
70b82a0 verified
import discord
import logging
import os
import re
import asyncio
import subprocess
import aiohttp
from huggingface_hub import InferenceClient
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from dotenv import load_dotenv
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
load_dotenv()
# ๋กœ๊น… ์„ค์ •
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s:%(message)s', handlers=[logging.StreamHandler()])
# ์ธํ…ํŠธ ์„ค์ •
intents = discord.Intents.default()
intents.message_content = True
intents.messages = True
intents.guilds = True
intents.guild_messages = True
# ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
# YouTube API ์„ค์ •
API_KEY = os.getenv("YOUTUBE_API_KEY")
youtube_service = build('youtube', 'v3', developerKey=API_KEY)
# ํŠน์ • ์ฑ„๋„ ID
SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
# ์›นํ›… URL ์„ค์ •
WEBHOOK_URL = "https://connect.pabbly.com/workflow/sendwebhookdata/IjU3NjUwNTY1MDYzMjA0MzA1MjY4NTUzMDUxMzUi_pc"
# ์ „์†ก ์‹คํŒจ ์‹œ ์žฌ์‹œ๋„ ํšŸ์ˆ˜
MAX_RETRIES = 3
class MyClient(discord.Client):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.is_processing = False
self.session = None
async def on_ready(self):
logging.info(f'{self.user}๋กœ ๋กœ๊ทธ์ธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!')
# web.py ํŒŒ์ผ ์‹คํ–‰
subprocess.Popen(["python", "web.py"])
logging.info("Web.py ์„œ๋ฒ„๊ฐ€ ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
# aiohttp ํด๋ผ์ด์–ธํŠธ ์„ธ์…˜ ์ƒ์„ฑ
self.session = aiohttp.ClientSession()
# ๋ด‡์ด ์‹œ์ž‘๋  ๋•Œ ์•ˆ๋‚ด ๋ฉ”์‹œ์ง€๋ฅผ ์ „์†ก
channel = self.get_channel(SPECIFIC_CHANNEL_ID)
if channel:
await channel.send("์œ ํŠœ๋ธŒ ๋น„๋””์˜ค URL์„ ์ž…๋ ฅํ•˜๋ฉด, ์ž๋ง‰๊ณผ ๋Œ“๊ธ€์„ ๊ธฐ๋ฐ˜์œผ๋กœ ๋‹ต๊ธ€์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.")
async def on_message(self, message):
if message.author == self.user:
return
if not self.is_message_in_specific_channel(message):
return
if self.is_processing:
return
self.is_processing = True
try:
video_id = extract_video_id(message.content)
if video_id:
transcript = await get_best_available_transcript(video_id)
comments = await get_video_comments(video_id)
if comments and transcript:
replies = await generate_replies(comments, transcript)
await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
else:
await message.channel.send("์ž๋ง‰์ด๋‚˜ ๋Œ“๊ธ€์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
else:
await message.channel.send("์œ ํšจํ•œ ์œ ํŠœ๋ธŒ ๋น„๋””์˜ค URL์„ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.")
finally:
self.is_processing = False
def is_message_in_specific_channel(self, message):
return message.channel.id == SPECIFIC_CHANNEL_ID or (
isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
)
async def close(self):
# aiohttp ํด๋ผ์ด์–ธํŠธ ์„ธ์…˜ ์ข…๋ฃŒ
if self.session:
await self.session.close()
await super().close()
def extract_video_id(url):
video_id = None
youtube_regex = (
r'(https?://)?(www\.)?'
'(youtube|youtu|youtube-nocookie)\.(com|be)/'
'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
match = re.match(youtube_regex, url)
if match:
video_id = match.group(6)
logging.debug(f'์ถ”์ถœ๋œ ๋น„๋””์˜ค ID: {video_id}')
return video_id
async def get_best_available_transcript(video_id):
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
except Exception as e:
logging.warning(f'ํ•œ๊ตญ์–ด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
except Exception as e:
logging.warning(f'์˜์–ด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
try:
transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = transcripts.find_manually_created_transcript().fetch()
except Exception as e:
logging.error(f'๋Œ€์ฒด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
return None
formatter = TextFormatter()
transcript_text = formatter.format_transcript(transcript)
logging.debug(f'๊ฐ€์ ธ์˜จ ์ž๋ง‰: {transcript_text}')
return transcript_text
async def get_video_comments(video_id):
comments = []
response = youtube_service.commentThreads().list(
part='snippet',
videoId=video_id,
maxResults=100 # ์ตœ๋Œ€ 100๊ฐœ์˜ ๋Œ“๊ธ€ ๊ฐ€์ ธ์˜ค๊ธฐ
).execute()
for item in response.get('items', []):
comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
comment_id = item['snippet']['topLevelComment']['id']
comments.append((comment, comment_id))
logging.debug(f'๊ฐ€์ ธ์˜จ ๋Œ“๊ธ€: {comments}')
return comments
async def generate_replies(comments, transcript):
replies = []
for comment, _ in comments:
messages = [
{"role": "system", "content": """๋„ˆ์˜ ์ด๋ฆ„์€ OpenFreeAI์ด๋‹ค. ๋‹ต๊ธ€ ์ƒ์„ฑํ›„ ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— ๋„ˆ์˜ ์ด๋ฆ„์„ ๋ฐํžˆ๊ณ  ๊ณต์†ํ•˜๊ฒŒ ์ธ์‚ฌํ•˜๋ผ. ๋น„๋””์˜ค ์ž๋ง‰: {transcript}"""},
{"role": "user", "content": comment}
]
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(None, lambda: hf_client.chat_completion(
messages, max_tokens=250, temperature=0.7, top_p=0.85))
if response.choices and response.choices[0].message:
reply = response.choices[0].message['content'].strip()
else:
reply = "๋‹ต๊ธ€์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
replies.append(reply)
logging.debug(f'์ƒ์„ฑ๋œ ๋‹ต๊ธ€: {replies}')
return replies
async def send_webhook_data(session, chunk_data, chunk_number):
for attempt in range(MAX_RETRIES):
try:
async with session.post(WEBHOOK_URL, json=chunk_data) as resp:
if resp.status == 200:
logging.info(f"์›นํ›…์œผ๋กœ ๋ฐ์ดํ„ฐ ์ „์†ก ์„ฑ๊ณต: {chunk_number} ๋ฒˆ์งธ ์‹œ๋„")
return True # ์„ฑ๊ณต ์‹œ ์ข…๋ฃŒ
else:
logging.error(f"์›นํ›…์œผ๋กœ ๋ฐ์ดํ„ฐ ์ „์†ก ์‹คํŒจ: {resp.status}, {chunk_number} ๋ฒˆ์งธ ์‹œ๋„")
except aiohttp.ClientError as e:
logging.error(f"์›นํ›… ์ „์†ก ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}, {chunk_number} ๋ฒˆ์งธ ์‹œ๋„")
await asyncio.sleep(1) # ์žฌ์‹œ๋„ ์ „์— ์ž ์‹œ ๋Œ€๊ธฐ
return False # ์žฌ์‹œ๋„ ํšŸ์ˆ˜ ์ดˆ๊ณผ ์‹œ ์‹คํŒจ๋กœ ๊ฐ„์ฃผ
async def create_thread_and_send_replies(message, video_id, comments, replies, session):
thread = await message.channel.create_thread(name=f"{message.author.name}์˜ ๋Œ“๊ธ€ ๋‹ต๊ธ€", message=message)
webhook_data = {"video_id": video_id, "replies": []}
for (comment, comment_id), reply in zip(comments, replies):
embed = discord.Embed(description=f"**๋Œ“๊ธ€**: {comment}\n**๋‹ต๊ธ€**: {reply}")
await thread.send(embed=embed)
# ์›นํ›… ๋ฐ์ดํ„ฐ ์ค€๋น„ (comment id ํฌํ•จ)
webhook_data["replies"].append({"comment": comment, "reply": reply, "comment_id": comment_id})
# ๋ฐ์ดํ„ฐ๋ฅผ ์—ฌ๋Ÿฌ ๋ฒˆ ๋‚˜๋ˆ„์–ด ์ „์†ก
chunk_size = 1 # ์ „์†กํ•  ๋ฐ์ดํ„ฐ์˜ ๊ฐœ์ˆ˜๋ฅผ 1๋กœ ์„ค์ •ํ•˜์—ฌ ๊ฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ณ„๋„๋กœ ์ „์†ก
for i in range(0, len(webhook_data["replies"]), chunk_size):
chunk = webhook_data["replies"][i:i+chunk_size]
chunk_data = {"video_id": video_id, "replies": chunk}
success = await send_webhook_data(session, chunk_data, i // chunk_size + 1)
if not success:
logging.error(f"๋ฐ์ดํ„ฐ ์ „์†ก ์‹คํŒจ: {i // chunk_size + 1} ๋ฒˆ์งธ ์ฒญํฌ")
if __name__ == "__main__":
discord_client = MyClient(intents=intents)
discord_client.run(os.getenv('DISCORD_TOKEN'))