Spaces:
Running
Running
import os | |
import gradio as gr | |
import cohere | |
import requests | |
from crewai import Agent, Task, Crew, Process | |
from langchain_groq import ChatGroq | |
from langchain_cohere import ChatCohere | |
from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults | |
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool | |
from duckduckgo_search import DDGS | |
from newspaper import Article | |
# Ensure essential environment variables are set | |
cohere_api_key = os.getenv('COHERE_API_KEY') | |
if not cohere_api_key: | |
raise EnvironmentError("COHERE_API_KEY is not set in environment variables") | |
groq_api_key = os.getenv("GROQ_API_KEY") | |
if not groq_api_key: | |
raise EnvironmentError("GROQ_API_KEY is not set in environment variables") | |
# Initialize API clients | |
co = cohere.Client(cohere_api_key) | |
print("client ok") | |
def fetch_content(url): | |
try: | |
article = Article(url) | |
article.download() | |
article.parse() | |
return article.text | |
except Exception as e: | |
print("ERROR: " + str(e)) | |
return f"Error fetching content: {e}" | |
# Define the DuckDuckGoSearch tool | |
def search_results(search_query: str) -> dict: | |
""" | |
Performs a web search to gather and return a collection of search results. | |
This tool automates the retrieval of web-based information related to a specified query. | |
Args: | |
- search_query (str): The query string that specifies the information to be searched on the web. This should be a clear and concise expression of the user's information needs. | |
Returns: | |
- list: A list of dictionaries, where each dictionary represents a search result. Each dictionary includes 'snippet' of the page and the 'link' with the url linking to it. | |
""" | |
results = DDGS().text(search_query, max_results=5, timelimit='m') | |
results_list = [{"title": result['title'], "snippet": result['body'], "link": result['href']} for result in results] | |
return results_list | |
def web_scrapper(url: str, topic: str) -> str: | |
""" | |
A tool designed to extract and read the content of a specified link and generate a summary on a specific topic. | |
It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. | |
This tool is particularly useful for web scraping tasks, data collection, or extracting specific information from websites. | |
Args: | |
- url (str): The URL from which to scrape content. | |
- topic (str): The specific topic on which to generate a summary. | |
Returns: | |
- summary (str): summary of the url on the topic | |
""" | |
# Scrape content from the specified URL | |
content = fetch_content(url) | |
# Prepare the prompt for generating the summary | |
prompt = f"Generate a summary of the following content on the topic ## {topic} ### \n\nCONTENT:\n\n" + content | |
# Generate the summary using Cohere | |
response = co.chat( | |
model='command-r-plus', | |
message=prompt, | |
temperature=0.4, | |
max_tokens=1000, | |
chat_history=[], | |
prompt_truncation='AUTO' | |
) | |
summary_response = f"""### | |
Summary: | |
{response.text} | |
URL: {url} | |
### | |
""" | |
return summary_response | |
def kickoff_crew(topic: str, model_choice: str) -> str: | |
try: | |
# Initialize the large language models based on user selection | |
groq_llm = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name=model_choice) | |
# Define Agents with Groq LLM | |
researcher = Agent( | |
role='Researcher', | |
goal='Search and Collect detailed information on topic ## {topic} ##', | |
tools=[search_results, web_scrapper], | |
llm=groq_llm, # Assigning the LLM here | |
backstory=( | |
"You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. " | |
"Your dedication to detail ensures the reliability and thoroughness of your findings. " | |
"With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results." | |
), | |
allow_delegation=False, | |
max_iter=15, | |
max_rpm=20, | |
memory=True, | |
verbose=True | |
) | |
editor = Agent( | |
role='Editor', | |
goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##', | |
llm=groq_llm, # Assigning the LLM here | |
backstory=( | |
"As an expert editor, you specialize in transforming raw data into clear, engaging reports. " | |
"Your strong command of language and attention to detail ensure that each report not only conveys essential insights " | |
"but is also easily understandable and appealing to diverse audiences. " | |
), | |
allow_delegation=False, | |
max_iter=5, | |
max_rpm=15, | |
memory=True, | |
verbose=True | |
) | |
# Define Tasks | |
research_task = Task( | |
description=( | |
"Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. " | |
"If more detailed searches are required, generate and execute new queries related to ## {topic} ##. " | |
"Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. " | |
"Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. " | |
"Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information." | |
), | |
expected_output=( | |
"A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. " | |
"Each section should properly cite sources, providing a thorough overview of the information gathered." | |
), | |
agent=researcher | |
) | |
edit_task = Task( | |
description=( | |
"Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. " | |
"Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. " | |
"Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. " | |
"Include a section listing all sources used, formatted as bullet points following this template: " | |
"- title: url'." | |
), | |
expected_output=( | |
"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. " | |
"The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. " | |
"Ensure the document is grammatically correct and ready for publication or presentation." | |
), | |
agent=editor, | |
context=[research_task] | |
) | |
# Forming the Crew | |
crew = Crew( | |
agents=[researcher, editor], | |
tasks=[research_task, edit_task], | |
process=Process.sequential, | |
) | |
# Kick-off the research process | |
result = crew.kickoff(inputs={'topic': topic}) | |
if not isinstance(result, str): | |
result = str(result) | |
return result | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def main(): | |
"""Set up the Gradio interface for the CrewAI Research Tool.""" | |
with gr.Blocks() as demo: | |
gr.Markdown("## CrewAI Research Tool") | |
topic_input = gr.Textbox(label="Enter Topic", placeholder="Type here...") | |
model_choice = gr.Radio(choices=["llama3-8b-8192", "llama3-70b-8192", 'llama-3.1-8b-instant', 'llama-3.1-70b-versatile'], label="Choose Model") | |
submit_button = gr.Button("Start Research") | |
output = gr.Markdown(label="Result") | |
submit_button.click( | |
fn=kickoff_crew, | |
inputs=[topic_input, model_choice], | |
outputs=output | |
) | |
# demo.launch(debug=True) | |
demo.queue(api_open=False, max_size=3).launch() | |
if __name__ == "__main__": | |
main() |