AutoTab / app.py
Ki-Seki's picture
docs: update
c2c0cff
import json
import time
import gradio as gr
import pandas as pd
from autotab import AutoTab
def auto_tabulator_completion(
in_file_path: str,
instruction: str,
max_examples: int,
model_name: str,
generation_config: dict,
request_interval: float,
save_every: int,
str_api_keys: str,
base_url: str,
) -> tuple[str, str, str, pd.DataFrame]:
output_file_name = f"output_{time.strftime('%Y%m%d%H%M%S')}.xlsx"
autotab = AutoTab(
in_file_path=in_file_path,
out_file_path=output_file_name,
instruction=instruction,
max_examples=max_examples,
model_name=model_name,
generation_config=json.loads(generation_config),
request_interval=request_interval,
save_every=save_every,
api_keys=str_api_keys.split(),
base_url=base_url,
)
start = time.time()
autotab.run()
time_taken = time.time() - start
report = f"Total data points: {autotab.num_data}\n" + \
f"Total missing (before): {autotab.num_missing}\n" + \
f"Total missing (after): {autotab.failed_count}\n" + \
f"Total queries made: {autotab.request_count}\n" + \
f"Time taken: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}\n" + \
f"Prediction per second: {autotab.num_missing / time_taken:.2f}\n" + \
f"Query per second: {autotab.request_count / time_taken:.2f}"
query_example = autotab.query_example if autotab.request_count > 0 else "No queries made."
return report, output_file_name, query_example, autotab.data[:15]
# Gradio interface
inputs = [
gr.File(label="Input Excel File"),
gr.Textbox(
value="You are a helpful assistant. Help me finish the task.",
label="Instruction",
),
gr.Slider(value=4, minimum=1, maximum=50, step=1, label="Max Examples"),
gr.Textbox(value="Qwen/Qwen2-7B-Instruct", label="Model Name"),
gr.Textbox(
value='{"temperature": 0, "max_tokens": 128}',
label="Generation Config in Dict",
),
gr.Slider(value=0.1, minimum=0, maximum=10, label="Request Interval in Seconds"),
gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
gr.Textbox(
value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah",
label="API Key(s). One per line.",
),
gr.Textbox(value="https://public-beta-api.siliconflow.cn/v1", label="Base URL"),
]
outputs = [
gr.Textbox(label="Report"),
gr.File(label="Output Excel File"),
gr.Textbox(label="Query Example"),
gr.Dataframe(label="First 15 rows."),
]
gr.Interface(
fn=auto_tabulator_completion,
inputs=inputs,
outputs=outputs,
title="Auto Tabulator Completion",
description="Automatically complete missing output values in tabular data based on in-context learning. Check https://github.com/Ki-Seki/autotab.",
).launch()