helenai commited on
Commit
ec62405
1 Parent(s): 23f6a23

Update preset tokenizers

Browse files

Delete prompt file if exists
Log system info

Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -1,25 +1,28 @@
 
 
 
 
1
  import gradio as gr
2
- from test_prompt_generator import generate_prompt
 
 
 
 
 
 
3
 
4
- tokenizers = [
5
- "google/gemma-7b",
6
- "mistralai/Mistral-7B-v0.1",
7
- "facebook/opt-2.7b",
8
- "microsoft/phi-2",
9
- "THUDM/chatglm3-6b",
10
- "Qwen/Qwen1.5-7B-Chat",
11
- "bigscience/bloom-560m",
12
- "ise-uiuc/Magicoder-S-DS-6.7B",
13
- "google/flan-t5-base",
14
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
15
- "google-bert/bert-base-uncased",
16
- ]
17
 
 
 
 
 
18
 
19
- def generate(model_id, num_tokens, prefix=None, source_text=None):
20
- output_file = f"prompt_{num_tokens}.jsonl"
21
- prompt = generate_prompt(model_id, int(num_tokens), prefix=prefix, source_text=source_text, output_file=output_file)
22
- return prompt, output_file
 
 
23
 
24
 
25
  demo = gr.Interface(
@@ -30,8 +33,8 @@ demo = gr.Interface(
30
  inputs=[
31
  gr.Dropdown(
32
  label="Tokenizer",
33
- choices=tokenizers,
34
- value="mistralai/Mistral-7B-v0.1",
35
  allow_custom_value=True,
36
  info="Select a tokenizer from this list or paste a model_id from a model on the Hugging Face Hub",
37
  ),
@@ -47,14 +50,18 @@ demo = gr.Interface(
47
  info="By default, prompts will be generated from Alice in Wonderland. Enter text here to use that instead.",
48
  ),
49
  ],
50
- outputs=[gr.Textbox(label="prompt", show_copy_button=True), gr.File(label="Json file")],
 
 
 
 
51
  examples=[
52
- ["mistralai/Mistral-7B-v0.1", 32],
53
- ["mistralai/Mistral-7B-v0.1", 64],
54
- ["mistralai/Mistral-7B-v0.1", 128],
55
- ["mistralai/Mistral-7B-v0.1", 512],
56
- ["mistralai/Mistral-7B-v0.1", 1024],
57
- ["mistralai/Mistral-7B-v0.1", 2048],
58
  ],
59
  cache_examples=False,
60
  allow_flagging=False,
 
1
+ import pprint
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
  import gradio as gr
6
+ from test_prompt_generator.test_prompt_generator import _preset_tokenizers, generate_prompt
7
+
8
+ # log system info for debugging purposes
9
+ result = subprocess.run(["lscpu"], text=True, capture_output=True)
10
+ pprint.pprint(result.stdout)
11
+ result = subprocess.run(["pip", "freeze"], text=True, capture_output=True)
12
+ pprint.pprint(result.stdout)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def generate(tokenizer_id, num_tokens, prefix=None, source_text=None):
16
+ output_path = Path(f"prompt_{num_tokens}.jsonl")
17
+ if output_path.exists():
18
+ output_path.unlink()
19
 
20
+ prompt = generate_prompt(
21
+ tokenizer_id, int(num_tokens), prefix=prefix, source_text=source_text, output_file=output_path
22
+ )
23
+ if tokenizer_id in _preset_tokenizers:
24
+ tokenizer_id = _preset_tokenizers[tokenizer_id]
25
+ return prompt, str(output_path), tokenizer_id
26
 
27
 
28
  demo = gr.Interface(
 
33
  inputs=[
34
  gr.Dropdown(
35
  label="Tokenizer",
36
+ choices=_preset_tokenizers,
37
+ value="mistral",
38
  allow_custom_value=True,
39
  info="Select a tokenizer from this list or paste a model_id from a model on the Hugging Face Hub",
40
  ),
 
50
  info="By default, prompts will be generated from Alice in Wonderland. Enter text here to use that instead.",
51
  ),
52
  ],
53
+ outputs=[
54
+ gr.Textbox(label="prompt", show_copy_button=True),
55
+ gr.File(label="Json file"),
56
+ gr.Markdown(label="tokenizer"),
57
+ ],
58
  examples=[
59
+ ["mistral", 32],
60
+ ["mistral", 64],
61
+ ["mistral", 128],
62
+ ["mistral", 512],
63
+ ["mistral", 1024],
64
+ ["mistral", 2048],
65
  ],
66
  cache_examples=False,
67
  allow_flagging=False,