|
import json |
|
import time |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
|
|
from autotab import AutoTab |
|
|
|
|
|
def auto_tabulator_completion( |
|
in_file_path: str, |
|
instruction: str, |
|
max_examples: int, |
|
model_name: str, |
|
generation_config: dict, |
|
request_interval: float, |
|
save_every: int, |
|
str_api_keys: str, |
|
base_url: str, |
|
) -> tuple[str, str, str, pd.DataFrame]: |
|
output_file_name = f"output_{time.strftime('%Y%m%d%H%M%S')}.xlsx" |
|
autotab = AutoTab( |
|
in_file_path=in_file_path, |
|
out_file_path=output_file_name, |
|
instruction=instruction, |
|
max_examples=max_examples, |
|
model_name=model_name, |
|
generation_config=json.loads(generation_config), |
|
request_interval=request_interval, |
|
save_every=save_every, |
|
api_keys=str_api_keys.split(), |
|
base_url=base_url, |
|
) |
|
start = time.time() |
|
autotab.run() |
|
time_taken = time.time() - start |
|
|
|
report = f"Total data points: {autotab.num_data}\n" + \ |
|
f"Total missing (before): {autotab.num_missing}\n" + \ |
|
f"Total missing (after): {autotab.failed_count}\n" + \ |
|
f"Total queries made: {autotab.request_count}\n" + \ |
|
f"Time taken: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}\n" + \ |
|
f"Prediction per second: {autotab.num_missing / time_taken:.2f}\n" + \ |
|
f"Query per second: {autotab.request_count / time_taken:.2f}" |
|
|
|
query_example = autotab.query_example if autotab.request_count > 0 else "No queries made." |
|
return report, output_file_name, query_example, autotab.data[:15] |
|
|
|
|
|
|
|
inputs = [ |
|
gr.File(label="Input Excel File"), |
|
gr.Textbox( |
|
value="You are a helpful assistant. Help me finish the task.", |
|
label="Instruction", |
|
), |
|
gr.Slider(value=4, minimum=1, maximum=50, step=1, label="Max Examples"), |
|
gr.Textbox(value="Qwen/Qwen2-7B-Instruct", label="Model Name"), |
|
gr.Textbox( |
|
value='{"temperature": 0, "max_tokens": 128}', |
|
label="Generation Config in Dict", |
|
), |
|
gr.Slider(value=0.1, minimum=0, maximum=10, label="Request Interval in Seconds"), |
|
gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"), |
|
gr.Textbox( |
|
value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah", |
|
label="API Key(s). One per line.", |
|
), |
|
gr.Textbox(value="https://public-beta-api.siliconflow.cn/v1", label="Base URL"), |
|
] |
|
|
|
outputs = [ |
|
gr.Textbox(label="Report"), |
|
gr.File(label="Output Excel File"), |
|
gr.Textbox(label="Query Example"), |
|
gr.Dataframe(label="First 15 rows."), |
|
] |
|
|
|
gr.Interface( |
|
fn=auto_tabulator_completion, |
|
inputs=inputs, |
|
outputs=outputs, |
|
title="Auto Tabulator Completion", |
|
description="Automatically complete missing output values in tabular data based on in-context learning. Check https://github.com/Ki-Seki/autotab.", |
|
).launch() |
|
|