Vera-ZWY commited on
Commit
9cc1dc9
1 Parent(s): d2f7cc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -69
app.py CHANGED
@@ -65,84 +65,60 @@ def stream_chat_with_rag(
65
  return answer
66
 
67
 
68
- def format_answer_string(answer: str):
69
- """
70
- This function extracts and formats the assistant's response before document metadata.
71
- Anything after the marker `[(` (where documents are listed) is ignored.
72
- """
73
- # Step 1: Split the response at the start of the document metadata
74
- split_marker = "[("
75
- if split_marker in answer:
76
- # Everything before the marker is the relevant answer
77
- answer_before_docs = answer.split(split_marker)[0]
78
- else:
79
- # If no documents metadata, return the entire answer
80
- answer_before_docs = answer
81
 
82
- # Step 2: Clean up formatting by replacing escaped newline characters
83
- formatted_answer = answer_before_docs.replace("\\n", "\n").strip()
84
 
85
- # Step 3: Remove potential starting and ending artifacts like (' and ,) if present
86
- if formatted_answer.startswith("(\"") and formatted_answer.endswith("\","):
87
- formatted_answer = formatted_answer[2:-2].strip()
88
 
89
- # Optional: Add a prefix for clarity
90
- formatted_answer = "Co-Pilot: " + formatted_answer
91
 
92
- return formatted_answer
93
-
94
- def format_relevant_documents(relevant_docs: list):
95
- """
96
- This function formats the relevant document metadata and content for readable output.
97
- It extracts the heading, page number, and a snippet of the content from each document.
98
- """
99
- formatted_docs = "Relevant Documents:\n\n"
100
 
101
- for idx, (doc, score) in enumerate(relevant_docs):
102
- # Extract the relevant metadata
103
- heading = doc.metadata.get('heading', 'Unnamed Document')
104
- page_number = int(doc.metadata.get('page_number', -1))
105
- source = doc.metadata.get('source', 'Unknown Source')
106
- confidence = round(score, 4) # Rounding the score for cleaner output
107
 
108
- # Add the formatted details to the output string
109
- formatted_docs += f"Document {idx + 1}:\n"
110
- formatted_docs += f" - Heading: {heading}\n"
111
- formatted_docs += f" - Page Number: {page_number}\n"
112
- formatted_docs += f" - Source: {source}\n"
113
- formatted_docs += f" - Confidence Score: {confidence}\n"
114
 
115
- # Optionally include a snippet from the content
116
- content_snippet = doc.page_content[:200] # Take the first 200 characters for preview
117
- formatted_docs += f" - Content Snippet: {content_snippet}...\n\n"
118
 
119
- return formatted_docs.strip()
120
 
121
 
122
- # # ------------------------------------- Core CNI APP ----------------------------------------------------------------------------------
123
- # # Function to handle PDF processing API call
124
- # def process_pdf(pdf_file, client_name):
125
- # return client.predict(
126
- # pdf_file=handle_file(pdf_file),
127
- # #client_name=client_name, # Hardcoded client name
128
- # api_name="/process_pdf"
129
- # )[1] # Return only the result string
130
-
131
- # # # Function to handle search API call
132
- # # def search_api(query):
133
- # # return client.predict(query=query, api_name="/search_with_confidence")
134
-
135
- # # Function to handle RAG API call
136
- # def rag_api(question, client_name):
137
- # return client.predict(question=question,selected_document=client_name, api_name="/get_answer")
138
-
139
- # def delete_index():
140
- # result = client.predict(api_name="/delete_index")
141
- # return result
142
-
143
-
144
-
145
- #-------------------------------------- UX & Gradio -------------------------------------------------------------------------------
146
 
147
  # CSS for custom styling
148
  CSS = """
@@ -156,7 +132,7 @@ CSS = """
156
 
157
 
158
  # Title for the application
159
- TITLE = "<h1 style='text-align:center;'>CNI RAG AGENTIC v0.2</h1>"
160
 
161
  # Create the Gradio Blocks interface
162
  with gr.Blocks(css=CSS) as demo:
 
65
  return answer
66
 
67
 
68
+ # def format_answer_string(answer: str):
69
+ # """
70
+ # This function extracts and formats the assistant's response before document metadata.
71
+ # Anything after the marker `[(` (where documents are listed) is ignored.
72
+ # """
73
+ # # Step 1: Split the response at the start of the document metadata
74
+ # split_marker = "[("
75
+ # if split_marker in answer:
76
+ # # Everything before the marker is the relevant answer
77
+ # answer_before_docs = answer.split(split_marker)[0]
78
+ # else:
79
+ # # If no documents metadata, return the entire answer
80
+ # answer_before_docs = answer
81
 
82
+ # # Step 2: Clean up formatting by replacing escaped newline characters
83
+ # formatted_answer = answer_before_docs.replace("\\n", "\n").strip()
84
 
85
+ # # Step 3: Remove potential starting and ending artifacts like (' and ,) if present
86
+ # if formatted_answer.startswith("(\"") and formatted_answer.endswith("\","):
87
+ # formatted_answer = formatted_answer[2:-2].strip()
88
 
89
+ # # Optional: Add a prefix for clarity
90
+ # formatted_answer = "Co-Pilot: " + formatted_answer
91
 
92
+ # return formatted_answer
93
+
94
+ # def format_relevant_documents(relevant_docs: list):
95
+ # """
96
+ # This function formats the relevant document metadata and content for readable output.
97
+ # It extracts the heading, page number, and a snippet of the content from each document.
98
+ # """
99
+ # formatted_docs = "Relevant Documents:\n\n"
100
 
101
+ # for idx, (doc, score) in enumerate(relevant_docs):
102
+ # # Extract the relevant metadata
103
+ # heading = doc.metadata.get('heading', 'Unnamed Document')
104
+ # page_number = int(doc.metadata.get('page_number', -1))
105
+ # source = doc.metadata.get('source', 'Unknown Source')
106
+ # confidence = round(score, 4) # Rounding the score for cleaner output
107
 
108
+ # # Add the formatted details to the output string
109
+ # formatted_docs += f"Document {idx + 1}:\n"
110
+ # formatted_docs += f" - Heading: {heading}\n"
111
+ # formatted_docs += f" - Page Number: {page_number}\n"
112
+ # formatted_docs += f" - Source: {source}\n"
113
+ # formatted_docs += f" - Confidence Score: {confidence}\n"
114
 
115
+ # # Optionally include a snippet from the content
116
+ # content_snippet = doc.page_content[:200] # Take the first 200 characters for preview
117
+ # formatted_docs += f" - Content Snippet: {content_snippet}...\n\n"
118
 
119
+ # return formatted_docs.strip()
120
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # CSS for custom styling
124
  CSS = """
 
132
 
133
 
134
  # Title for the application
135
+ TITLE = "<h1 style='text-align:center;'>Reddit Election Q&A agent v0.1</h1>"
136
 
137
  # Create the Gradio Blocks interface
138
  with gr.Blocks(css=CSS) as demo: