Spaces:
Runtime error
Runtime error
updates
Browse files- app.py +12 -5
- tabs/run_benchmark.py +4 -4
app.py
CHANGED
@@ -17,14 +17,14 @@ from tabs.run_benchmark import run_benchmark_main
|
|
17 |
demo = gr.Blocks()
|
18 |
|
19 |
|
20 |
-
def run_benchmark_gradio(tool_name, model_name, openai_api_key, anthropic_api_key):
|
21 |
"""Run the benchmark using inputs."""
|
22 |
if tool_name is None:
|
23 |
return "Please enter the name of your tool."
|
24 |
if openai_api_key is None and anthropic_api_key is None:
|
25 |
return "Please enter either OpenAI or Anthropic API key."
|
26 |
|
27 |
-
result = run_benchmark_main(tool_name, model_name, openai_api_key, anthropic_api_key)
|
28 |
if result == 'completed':
|
29 |
# get the results file in the results directory
|
30 |
fns = glob('results/*.csv')
|
@@ -106,8 +106,8 @@ with demo:
|
|
106 |
"claude-prediction-offline",
|
107 |
"claude-prediction-online",
|
108 |
'prediction-request-rag',
|
109 |
-
|
110 |
-
|
111 |
"prediction-request-reasoning-claude",
|
112 |
"prediction-request-rag-claude",
|
113 |
"prediction-url-cot-claude",
|
@@ -122,6 +122,13 @@ with demo:
|
|
122 |
with gr.Row():
|
123 |
openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
|
124 |
anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
with gr.Row():
|
126 |
run_button = gr.Button("Run Benchmark")
|
127 |
with gr.Row():
|
@@ -132,7 +139,7 @@ with demo:
|
|
132 |
summary = gr.Dataframe()
|
133 |
|
134 |
run_button.click(run_benchmark_gradio,
|
135 |
-
inputs=[tool_name, model_name, openai_api_key, anthropic_api_key],
|
136 |
outputs=[result, summary])
|
137 |
|
138 |
demo.queue(default_concurrency_limit=40).launch()
|
|
|
17 |
demo = gr.Blocks()
|
18 |
|
19 |
|
20 |
+
def run_benchmark_gradio(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
|
21 |
"""Run the benchmark using inputs."""
|
22 |
if tool_name is None:
|
23 |
return "Please enter the name of your tool."
|
24 |
if openai_api_key is None and anthropic_api_key is None:
|
25 |
return "Please enter either OpenAI or Anthropic API key."
|
26 |
|
27 |
+
result = run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key)
|
28 |
if result == 'completed':
|
29 |
# get the results file in the results directory
|
30 |
fns = glob('results/*.csv')
|
|
|
106 |
"claude-prediction-offline",
|
107 |
"claude-prediction-online",
|
108 |
'prediction-request-rag',
|
109 |
+
"prediction-with-research-conservative",
|
110 |
+
"prediction-with-research-bold",
|
111 |
"prediction-request-reasoning-claude",
|
112 |
"prediction-request-rag-claude",
|
113 |
"prediction-url-cot-claude",
|
|
|
122 |
with gr.Row():
|
123 |
openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
|
124 |
anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
|
125 |
+
with gr.Row():
|
126 |
+
num_questions = gr.Slider(
|
127 |
+
minimum=1,
|
128 |
+
maximum=340,
|
129 |
+
value=10,
|
130 |
+
label="Number of questions to run the benchmark on",
|
131 |
+
)
|
132 |
with gr.Row():
|
133 |
run_button = gr.Button("Run Benchmark")
|
134 |
with gr.Row():
|
|
|
139 |
summary = gr.Dataframe()
|
140 |
|
141 |
run_button.click(run_benchmark_gradio,
|
142 |
+
inputs=[tool_name, model_name, num_questions, openai_api_key, anthropic_api_key],
|
143 |
outputs=[result, summary])
|
144 |
|
145 |
demo.queue(default_concurrency_limit=40).launch()
|
tabs/run_benchmark.py
CHANGED
@@ -2,16 +2,16 @@ import os
|
|
2 |
from benchmark.run_benchmark import run_benchmark
|
3 |
|
4 |
|
5 |
-
def run_benchmark_main(tool_name, model_name, openai_api_key, anthropic_api_key):
|
6 |
"""Run the benchmark using the provided function and API key."""
|
7 |
# Empyt the results directory
|
8 |
os.system("rm -rf results/*")
|
9 |
|
10 |
-
print(f"Running benchmark with the following parameters: {tool_name}, {model_name}, {openai_api_key}, {anthropic_api_key}")
|
11 |
-
|
12 |
# Set the benchmark parameters
|
13 |
kwargs = {}
|
14 |
-
|
|
|
|
|
15 |
kwargs["tools"] = [tool_name]
|
16 |
if model_name:
|
17 |
kwargs["model"] = model_name
|
|
|
2 |
from benchmark.run_benchmark import run_benchmark
|
3 |
|
4 |
|
5 |
+
def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
|
6 |
"""Run the benchmark using the provided function and API key."""
|
7 |
# Empyt the results directory
|
8 |
os.system("rm -rf results/*")
|
9 |
|
|
|
|
|
10 |
# Set the benchmark parameters
|
11 |
kwargs = {}
|
12 |
+
if not num_questions:
|
13 |
+
num_questions = 10
|
14 |
+
kwargs["num_questions"] = num_questions
|
15 |
kwargs["tools"] = [tool_name]
|
16 |
if model_name:
|
17 |
kwargs["model"] = model_name
|