import os import gradio as gr import openai import re from brd import brd_txt from langchain_openai import ChatOpenAI from langchain.agents import AgentExecutor, create_tool_calling_agent from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchResults from langchain_community.utilities import WikipediaAPIWrapper, DuckDuckGoSearchAPIWrapper from langchain_core.prompts import ChatPromptTemplate from datetime import datetime, timedelta from langchain_community.document_loaders import WebBaseLoader import bs4 from bs4 import BeautifulSoup from pathlib import Path # Set up API keys openai.api_key = os.getenv('OPENAI_API_KEY') username = os.getenv('USERNAME') password = os.getenv('PASSWORD') # Initialize LLM llm_article = ChatOpenAI(model="gpt-4o-mini") # Set Up Search Tools wikipedia = WikipediaAPIWrapper() duckduckgo_wrapper = DuckDuckGoSearchAPIWrapper(max_results=3) tools = [WikipediaQueryRun(api_wrapper=wikipedia), DuckDuckGoSearchResults(api_wrapper=duckduckgo_wrapper)] # CEFR levels and their descriptions CEFR_LEVELS = { "Pre-A1": "Foundation", "A1": "Elementary", "A2": "Pre-intermediate", "B1": "Intermediate", "B2": "Upper Intermediate", "C1": "Advanced", "C2": "Proficiency" } # Create Chat Prompt Template prompt_article = ChatPromptTemplate.from_messages([ ("system", "You are a helpful assistant for ESL learners. Use the wikipedia tool to find information about the student's chosen topics. Conduct thorough research to not be misleading. Then, use this information as the basis for an engaging magazine article ADAPTED TO THE STATED LEVEL OF ENGLISH PROFICIENCY."), ("human", "{input}"), ("placeholder", "{agent_scratchpad}"), ]) # Construct the Tools agent agent_article = create_tool_calling_agent(llm_article, tools, prompt_article) # Create an agent executor agent_executor_article = AgentExecutor(agent=agent_article, tools=tools, verbose=True) def get_content(topic): query = f"Provide comprehensive information about {topic}, including any recent developments or news if available." result = agent_executor_article.invoke({"input": query}) return result['output'], result.get('intermediate_steps', []) def get_links(keyword): search = DuckDuckGoSearchResults(api_wrapper=duckduckgo_wrapper) results = search.run(tool_input=keyword) links = [] parsed_links = re.findall(r'link:\s*(https?://[^\],\s]+)', results) for link in parsed_links: links.append(link) return links def load_documents_from_links(links): bs4_strainer = bs4.SoupStrainer(('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')) document_loader = WebBaseLoader(web_path=links) docs = document_loader.load() # Parsing the documents to extract text content parsed_docs = [] for doc in docs: soup = BeautifulSoup(doc.page_content, 'html.parser', parse_only=bs4_strainer) parsed_docs.append(soup.get_text()) return parsed_docs def generate_article(topic, content, urls, level, learning_objective=None): objective_text = f"Focus on the following learning objective: {learning_objective}. " if learning_objective else "" if 'wikipedia.org' not in " ".join(urls): urls.insert(0, 'https://en.wikipedia.org') prompt = f""" #Role -You are a writer specializing in articles for English learners -Your role is to write an article about {topic} suitable for a {level} ({CEFR_LEVELS[level]}) English language learner. #Instructions {objective_text} 1. Use the topics to generate a guiding question for the article. If there are multiple topics, think about how to connect them meaningfully into one question, so the article has one central focus. 2. Select from the following content to find ideas for your article: {content}. Your selection will help you connect the topics and answer the question. You do not have to use all sources. Select the most appropriate ones. 3. If you do not have enough information to connect the topics, answer the question, use the Wikipedia tool again for additional information. 4. Combine the ideas in ONE article that reads like an article in a magazine for young adults. This includes: - A title - An introduction - A body with a continuous, chronological narrative, and a mix of information and/or news - A conclusion #Critical - Make sure your article matches the appropriate reading level ({CEFR_LEVELS[level]}) of the reader/English learner. This means: > For "Pre-A1": Use very simple words and short sentences, focusing on basic vocabulary. Write very short articles. > For "A1": Use simple language elements (phrases, basic vocabulary). Write short articles. > For "A2": Use common language elements (vocabulary, sentence structures, grammar, conjugation). Write 400-word articles. > For "B1": Use more complex language elements (vocabulary, sentence structures, grammar, conjugation). Write 600-word articles. > For "B2": Use advanced language elements (vocabulary, sentence structures, grammar, conjugation). Write 800-word articles. > For "C1": Use refined language elements (vocabulary, sentence structures, grammar, conjugation) requiring fluency. Write 1000-word articles. > For "C2": Use highly advanced language elements (vocabulary, sentence structures, grammar, conjugation) showing complete mastery of the language. - Refuse to write articles with inappropriate content. - Make sure your article is appropriate for a school context. - Make sure your article is balanced, tells the full story, and paints the whole picture with nuance and care. #Important - The article should be engaging, informative, and at the right level of complexity for the specified language level. - Include a 'Sources' section at the end of the article with the following URLs: {urls} - DO NOT use paragraphs or headings. Output a block of text with a title. - End with a note saying that the article was AI-generated by GPT-4o based on online information. Students should keep in mind that AI can make mistakes. """ result = agent_executor_article.invoke({"input": prompt}) return result['output'] def extract_language_elements(article, level, learning_objective=None): objective_text = f"Additionally, focus on the following learning objective: {learning_objective}. " if learning_objective else "" prompt = f""" Analyze the following article for a {level} ({CEFR_LEVELS[level]}) English language learner: Extract and categorize language elements found in this article and relevant to an English learner at this level: 1. Vocabulary: Important or challenging words with their definitions 2. Grammar: Key grammar structures or patterns used in the article 3. Conjugation: Notable verb conjugations present in the article 4. Expressions: Idiomatic expressions or phrasal verbs 5. Sentence structures: Complex or notable sentence structures Ensure that the extracted elements are appropriate and relevant for a {level} learner. {objective_text} Return the results in a structured format. Article: {article} """ result = agent_executor_article.invoke({"input": prompt}) return result['output'] def generate_feedback_and_takeaway(article, level, language_elements, learning_objective=None): objective_text = f"Additionally, focus on the following learning objective: {learning_objective}. " if learning_objective else "" prompt = f""" Based on the following article for a {level} ({CEFR_LEVELS[level]}) English language learner and the extracted language elements: Create a takeaway section about key language elements ({language_elements}) found in this article ({article}). This should include: - A list of important vocabulary words with their definitions - A list of important grammar rules or patterns used in the article - A list of notable conjugations - A list of idiomatic expressions or phrasal verbs - Examples of complex or notable sentence structures Use the provided language elements as a basis, but feel free to expand or adjust as needed. {objective_text} Keep in mind that you are addressing the student! """ result = agent_executor_article.invoke({"input": prompt}) return result['output'] def main(topic, level, learning_objective=None): try: content, intermediate_steps = get_content(topic) if content: urls = [step[1] for step in intermediate_steps if isinstance(step[1], str) and step[1].startswith('http')] if not urls: urls = get_links(topic) additional_content = load_documents_from_links(urls) content += " ".join(additional_content) article = generate_article(topic, content, urls, level, learning_objective) language_elements = extract_language_elements(article, level, learning_objective) feedback_and_takeaway = generate_feedback_and_takeaway(article, level, language_elements, learning_objective) context = { 'article': article, 'language_elements': language_elements, 'feedback_and_takeaway': feedback_and_takeaway, 'level': level, 'learning_objective': learning_objective } return article, feedback_and_takeaway, context else: return "No relevant content found. Please try a different topic.", "", None except Exception as e: return f"An error occurred: {str(e)}", "", None # Function to call OpenAI's Text-to-Speech API def text_to_speech(text): client = openai.OpenAI() speech_file_path = Path(__file__).parent / "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="nova", input=text ) response.stream_to_file(speech_file_path) return str(speech_file_path) # Create the Gradio interface with gr.Blocks() as demo: gr.HTML(f"""""") gr.Markdown("# **NewsWave** 🌊🐬") gr.Markdown("**Generate AI-written articles based on your interests, current English level, and online information!**") topic_input = gr.Textbox(label="Enter your topic of interest") level_input = gr.Dropdown(label="Select your current English proficiency level", choices=list(CEFR_LEVELS.keys())) objective_input = gr.Textbox(label="Enter a specific learning objective (optional)") generate_btn = gr.Button("Surf the Web and Generate Article") tts_btn = gr.Button("Read it to me") audio_output = gr.Audio() article_output = gr.Markdown() feedback_output = gr.Markdown() crtr_rgt = gr.Markdown(brd_txt) def on_generate(topic, level, learning_objective): article, feedback, _ = main(topic, level, learning_objective) return article, feedback def on_read(article): audio_data = text_to_speech(article) return audio_data generate_btn.click(on_generate, inputs=[topic_input, level_input, objective_input], outputs=[article_output, feedback_output]) tts_btn.click(on_read, inputs=[article_output], outputs=[audio_output]) gr.Row([generate_btn, tts_btn]) gr.Column([audio_output, article_output, feedback_output, crtr_rgt]) demo.launch(auth=(username, password))