Spaces:

poscye
/

ddg-web-search-chat

Running on Zero

App Files Files Community

pabloce commited on May 26

Commit

452bb3a

•

1 Parent(s): f04b8f8

Create utils.py

Browse files

Files changed (1) hide show

utils.py +44 -0

utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import json
+import time
+from typing import List
+from datetime import datetime, timezone
+from pydantic import BaseModel, Field
+from trafilatura import fetch_url, extract
+def get_server_time():
+    utc_time = datetime.now(timezone.utc)
+    return utc_time.strftime("%Y-%m-%d %H:%M:%S")
+def get_website_content_from_url(url: str) -> str:
+    """
+    Get website content from a URL using Selenium and BeautifulSoup for improved content extraction and filtering.
+    Args:
+        url (str): URL to get website content from.
+    Returns:
+        str: Extracted content including title, main text, and tables.
+    """
+    try:
+        downloaded = fetch_url(url)
+        result = extract(downloaded, include_formatting=True, include_links=True, output_format='json', url=url)
+        if result:
+            result = json.loads(result)
+            return f'=========== Website Title: {result["title"]} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{result["raw_text"]}\n\n=========== Website Content End ===========\n\n'
+        else:
+            return ""
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+class CitingSources(BaseModel):
+    """
+    This represents the citing of the sources you used to answer the user query.
+    """
+    sources: List[str] = Field(
+        ...,
+        description="List of sources to cite. Should be an URL of the source. E.g. GitHub URL, Blogpost URL or Newsletter URL."
+    )