aka7774 commited on
Commit
581f159
โ€ข
1 Parent(s): 20310a2

Upload 8 files

Browse files
Files changed (8) hide show
  1. app.py +135 -0
  2. fn.py +128 -0
  3. install.bat +56 -0
  4. main.py +33 -0
  5. models.json +79 -0
  6. models.py +19 -0
  7. requirements.txt +12 -0
  8. venv.sh +7 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fn
2
+ import gradio as gr
3
+ import models
4
+
5
+ def fn_chat(instruction, input, model, dtype, is_messages, template, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
6
+ args = {
7
+ 'instruction': instruction,
8
+ 'input': input,
9
+ 'model': model,
10
+ 'dtype': dtype,
11
+ 'is_messages': is_messages,
12
+ 'template': template,
13
+ 'max_new_tokens': int(max_new_tokens),
14
+ 'temperature': float(temperature),
15
+ 'top_p': float(top_p),
16
+ 'top_k': int(top_k),
17
+ 'repetition_penalty': float(repetition_penalty),
18
+ }
19
+
20
+ content = fn.infer(args)
21
+ return content
22
+
23
+ with gr.Blocks() as demo:
24
+ opt = models.get_head_options()
25
+
26
+ with gr.Row():
27
+ with gr.Column(scale=1):
28
+ model = gr.Textbox(
29
+ value=opt['model'],
30
+ label='model',
31
+ show_label=True,
32
+ interactive=True,
33
+ show_copy_button=True,
34
+ )
35
+
36
+ dtype = gr.Dropdown(
37
+ value=opt['dtype'],
38
+ choices=['int4','int8','fp16', 'bf16'],
39
+ label='dtype',
40
+ show_label=True,
41
+ interactive=True,
42
+ allow_custom_value=True,
43
+ )
44
+ template = gr.Textbox(
45
+ value=opt['template'],
46
+ lines=3,
47
+ label='template',
48
+ show_label=True,
49
+ interactive=True,
50
+ show_copy_button=True,
51
+ )
52
+ is_messages = gr.Checkbox(
53
+ value=opt['is_messages'],
54
+ label='is_messages',
55
+ show_label=True,
56
+ interactive=True,
57
+ )
58
+
59
+ with gr.Column(scale=1):
60
+ max_new_tokens = gr.Textbox(
61
+ value=opt['max_new_tokens'],
62
+ label='max_new_tokens',
63
+ show_label=True,
64
+ interactive=True,
65
+ show_copy_button=True,
66
+ )
67
+ temperature = gr.Textbox(
68
+ value=opt['temperature'],
69
+ label='temperature',
70
+ show_label=True,
71
+ interactive=True,
72
+ show_copy_button=True,
73
+ )
74
+ top_p = gr.Textbox(
75
+ value=opt['top_p'],
76
+ label='top_p',
77
+ show_label=True,
78
+ interactive=True,
79
+ show_copy_button=True,
80
+ )
81
+ top_k = gr.Textbox(
82
+ value=opt['top_k'],
83
+ label='top_k',
84
+ show_label=True,
85
+ interactive=True,
86
+ show_copy_button=True,
87
+ )
88
+ repetition_penalty = gr.Textbox(
89
+ value=opt['repetition_penalty'],
90
+ label='repetition_penalty',
91
+ show_label=True,
92
+ interactive=True,
93
+ show_copy_button=True,
94
+ )
95
+
96
+ with gr.Accordion('Preset', open=False):
97
+ gr.Examples(
98
+ models.get_examples(),
99
+ [model, dtype, is_messages, template, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
100
+ )
101
+
102
+ with gr.Row():
103
+ with gr.Column(scale=1):
104
+ instruction = gr.Textbox(
105
+ lines=20,
106
+ label='instruction',
107
+ show_label=True,
108
+ interactive=True,
109
+ show_copy_button=True,
110
+ )
111
+ user_input = gr.Textbox(
112
+ lines=1,
113
+ label='input',
114
+ show_label=True,
115
+ interactive=True,
116
+ show_copy_button=True,
117
+ )
118
+ chat_button = gr.Button(value='chat')
119
+
120
+ with gr.Column(scale=1):
121
+ said = gr.Textbox(
122
+ label='said',
123
+ lines=15,
124
+ show_label=True,
125
+ show_copy_button=True,
126
+ )
127
+
128
+ chat_button.click(
129
+ fn=fn_chat,
130
+ inputs=[instruction, user_input, model, dtype, is_messages, template, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
131
+ outputs=[said],
132
+ )
133
+
134
+ if __name__ == '__main__':
135
+ demo.launch()
fn.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import torch
4
+ import datetime
5
+ import json
6
+ import csv
7
+ import gc
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
9
+ from transformers import TextStreamer, TextIteratorStreamer
10
+ from transformers import GenerationConfig, AutoConfig, GPTQConfig, AwqConfig
11
+ from models import models
12
+
13
+ tokenizer = None
14
+ model = None
15
+ loaded_model_name = None
16
+ loaded_dtype = None
17
+
18
+ def load_model(model_name, dtype = 'int4'):
19
+ global tokenizer, model, loaded_model_name, loaded_dtype
20
+
21
+ if loaded_model_name == model_name and loaded_dtype == dtype:
22
+ return
23
+
24
+ del model
25
+ del tokenizer
26
+ model = None
27
+ tokenizer = None
28
+ gc.collect()
29
+ torch.cuda.empty_cache()
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
32
+
33
+ if dtype == 'int4':
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ model_name,
36
+ device_map="auto",
37
+ trust_remote_code=True,
38
+ quantization_config=BitsAndBytesConfig(
39
+ load_in_4bit=True,
40
+ bnb_4bit_compute_dtype=torch.bfloat16,
41
+ ),
42
+ )
43
+ elif dtype == 'int8':
44
+ model = AutoModelForCausalLM.from_pretrained(
45
+ model_name,
46
+ device_map="auto",
47
+ trust_remote_code=True,
48
+ quantization_config=BitsAndBytesConfig(
49
+ torch_dtype=torch.bfloat16,
50
+ load_in_8bit=True,
51
+ ),
52
+ )
53
+ elif dtype == 'fp16':
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ model_name,
56
+ device_map="auto",
57
+ trust_remote_code=True,
58
+ torch_dtype=torch.float16,
59
+ )
60
+ elif dtype == 'bf16':
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ model_name,
63
+ device_map="auto",
64
+ trust_remote_code=True,
65
+ torch_dtype=torch.bfloat16,
66
+ )
67
+ else:
68
+ model = AutoModelForCausalLM.from_pretrained(
69
+ model_name,
70
+ trust_remote_code=True,
71
+ device_map="auto",
72
+ )
73
+
74
+ loaded_model_name = model_name
75
+ loaded_dtype = dtype
76
+
77
+ def infer(args: dict):
78
+ global tokenizer, model, loaded_model_name
79
+
80
+ if 'model' in args:
81
+ args['model_name'] = args['model']
82
+
83
+ if not tokenizer or 'model_name' in args and loaded_model_name != args['model_name']:
84
+ if 'dtype' in args:
85
+ load_model(args['model_name'], args['dtype'])
86
+ else:
87
+ load_model(args['model_name'])
88
+
89
+ config = {}
90
+ if args['model_name'] in models:
91
+ config = models[args['model_name']]
92
+ config.update(args)
93
+
94
+ if config['is_messages']:
95
+ messages = []
96
+ messages.append({"role": "system", "content": args['instruction']})
97
+ if args['input']:
98
+ messages.append({"role": "user", "content": args['input']})
99
+ tprompt = tokenizer.apply_chat_template(conversation=messages, add_generation_prompt=True, tokenize=False)
100
+ else:
101
+ tprompt = config['template'].format(bos_token=tokenizer.bos_token, instruction=args['instruction'], input=args['input'])
102
+
103
+ kwargs = config.copy()
104
+ for k in ['model_name', 'template', 'instruction', 'input', 'location', 'endpoint', 'model', 'dtype', 'is_messages']:
105
+ if k in kwargs:
106
+ del kwargs[k]
107
+
108
+ with torch.no_grad():
109
+ token_ids = tokenizer.encode(tprompt, add_special_tokens=False, return_tensors="pt")
110
+ if config['is_messages']:
111
+ output_ids = model.generate(
112
+ input_ids=token_ids.to(model.device),
113
+ do_sample=True,
114
+ **kwargs,
115
+ )
116
+ else:
117
+ output_ids = model.generate(
118
+ input_ids=token_ids.to(model.device),
119
+ do_sample=True,
120
+ pad_token_id=tokenizer.pad_token_id,
121
+ bos_token_id=tokenizer.bos_token_id,
122
+ eos_token_id=tokenizer.eos_token_id,
123
+ **kwargs,
124
+ )
125
+ out = output_ids.tolist()[0][token_ids.size(1) :]
126
+ content = tokenizer.decode(out, skip_special_tokens=True)
127
+
128
+ return content
install.bat ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ rem -------------------------------------------
4
+ rem NOT guaranteed to work on Windows
5
+
6
+ set REPOS=https://huggingface.co/spaces/aka7774/llm
7
+ set APPDIR=llm
8
+ set VENV=venv
9
+
10
+ rem -------------------------------------------
11
+
12
+ set INSTALL_DIR=%~dp0
13
+ cd /d %INSTALL_DIR%
14
+
15
+ :git_clone
16
+ set DL_URL=%REPOS%
17
+ set DL_DST=%APPDIR%
18
+ git clone %DL_URL% %APPDIR%
19
+ if exist %DL_DST% goto install_python
20
+
21
+ set DL_URL=https://github.com/git-for-windows/git/releases/download/v2.41.0.windows.3/PortableGit-2.41.0.3-64-bit.7z.exe
22
+ set DL_DST=PortableGit-2.41.0.3-64-bit.7z.exe
23
+ curl -L -o %DL_DST% %DL_URL%
24
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
25
+ %DL_DST% -y
26
+ del %DL_DST%
27
+
28
+ set GIT=%INSTALL_DIR%PortableGit\bin\git
29
+ %GIT% clone %REPOS%
30
+
31
+ :install_python
32
+ set DL_URL=https://github.com/indygreg/python-build-standalone/releases/download/20240107/cpython-3.10.13+20240107-i686-pc-windows-msvc-shared-install_only.tar.gz
33
+ set DL_DST="%INSTALL_DIR%python.tar.gz"
34
+ curl -L -o %DL_DST% %DL_URL%
35
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
36
+ tar -xzf %DL_DST%
37
+
38
+ set PYTHON=%INSTALL_DIR%python\python.exe
39
+ set PATH=%PATH%;%INSTALL_DIR%python310\Scripts
40
+
41
+ :install_venv
42
+ cd %APPDIR%
43
+ %PYTHON% -m venv %VENV%
44
+ set PYTHON=%VENV%\Scripts\python.exe
45
+
46
+ :install_pip
47
+ set DL_URL=https://bootstrap.pypa.io/get-pip.py
48
+ set DL_DST=%INSTALL_DIR%get-pip.py
49
+ curl -o %DL_DST% %DL_URL%
50
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
51
+ %PYTHON% %DL_DST%
52
+
53
+ %PYTHON% -m pip install gradio
54
+ %PYTHON% -m pip install -r requirements.txt
55
+
56
+ pause
main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import signal
5
+ import io
6
+
7
+ from fastapi import FastAPI, Request, status, Form, UploadFile
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel, Field
11
+ from fastapi.exceptions import RequestValidationError
12
+ from fastapi.responses import JSONResponse
13
+
14
+ import fn
15
+ import gradio as gr
16
+ from app import demo
17
+
18
+ app = FastAPI()
19
+
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=['*'],
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
27
+
28
+ gr.mount_gradio_app(app, demo, path="/gradio")
29
+
30
+ @app.post("/infer")
31
+ async def api_infer(args: dict):
32
+ content = fn.infer(args)
33
+ return {'content': content}
models.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lightblue/qarasu-14B-chat-plus-unleashed": {
3
+ "model": "lightblue/qarasu-14B-chat-plus-unleashed",
4
+ "dtype": "int4",
5
+ "is_messages": false,
6
+ "template": "{instruction}\n\n{input}",
7
+ "max_new_tokens": 256,
8
+ "temperature": 0.1,
9
+ "top_p": 0.9,
10
+ "top_k": 40,
11
+ "repetition_penalty": 1.1
12
+ },
13
+ "elyza/ELYZA-japanese-Llama-2-13b-fast-instruct": {
14
+ "model": "elyza/ELYZA-japanese-Llama-2-13b-fast-instruct",
15
+ "dtype": "int4",
16
+ "is_messages": false,
17
+ "template": "{bos_token}[INST] <<SYS>>\n{instruction}\n<</SYS>>\n{input} [/INST]",
18
+ "max_new_tokens": 256,
19
+ "temperature": 1.0,
20
+ "top_p": 0.9,
21
+ "top_k": 40,
22
+ "repetition_penalty": 1.1
23
+ },
24
+ "tokyotech-llm/Swallow-13b-instruct-hf": {
25
+ "model": "tokyotech-llm/Swallow-13b-instruct-hf",
26
+ "dtype": "int4",
27
+ "is_messages": false,
28
+ "template": "ไปฅไธ‹ใซใ€ใ‚ใ‚‹ใ‚ฟใ‚นใ‚ฏใ‚’่ชฌๆ˜Žใ™ใ‚‹ๆŒ‡็คบใŒใ‚ใ‚Šใ€ใใ‚Œใซไป˜้šใ™ใ‚‹ๅ…ฅๅŠ›ใŒๆ›ดใชใ‚‹ๆ–‡่„ˆใ‚’ๆไพ›ใ—ใฆใ„ใพใ™ใ€‚ใƒชใ‚ฏใ‚จใ‚นใƒˆใ‚’้ฉๅˆ‡ใซๅฎŒไบ†ใ™ใ‚‹ใŸใ‚ใฎๅ›ž็ญ”ใ‚’่จ˜่ฟฐใ—ใฆใใ ใ•ใ„ใ€‚\n\n### ๆŒ‡็คบ:\n{instruction}\n\n### ๅ…ฅๅŠ›:\n{input}\n\n### ๅฟœ็ญ”:",
29
+ "max_new_tokens": 256,
30
+ "temperature": 0.99,
31
+ "top_p": 0.95,
32
+ "top_k": 40,
33
+ "repetition_penalty": 1.1
34
+ },
35
+ "rinna/nekomata-14b-instruction": {
36
+ "model": "rinna/nekomata-14b-instruction",
37
+ "dtype": "int4",
38
+ "is_messages": false,
39
+ "template": "ไปฅไธ‹ใฏใ€ใ‚ฟใ‚นใ‚ฏใ‚’่ชฌๆ˜Žใ™ใ‚‹ๆŒ‡็คบใจใ€ๆ–‡่„ˆใฎใ‚ใ‚‹ๅ…ฅๅŠ›ใฎ็ต„ใฟๅˆใ‚ใ›ใงใ™ใ€‚่ฆๆฑ‚ใ‚’้ฉๅˆ‡ใซๆบ€ใŸใ™ๅฟœ็ญ”ใ‚’ๆ›ธใใชใ•ใ„ใ€‚\n\n### ๆŒ‡็คบ:\n{instruction}\n\n### ๅ…ฅๅŠ›:\n{input}\n\n### ๅฟœ็ญ”:",
40
+ "max_new_tokens": 256,
41
+ "temperature": 0.5,
42
+ "top_p": 0.95,
43
+ "top_k": 40,
44
+ "repetition_penalty": 1.1
45
+ },
46
+ "cyberagent/calm2-7b-chat": {
47
+ "model": "cyberagent/calm2-7b-chat",
48
+ "dtype": "int4",
49
+ "is_messages": false,
50
+ "template": "{instruction}\nUSER: {input}\nASSISTANT: ",
51
+ "max_new_tokens": 256,
52
+ "temperature": 0.8,
53
+ "top_p": 0.9,
54
+ "top_k": 40,
55
+ "repetition_penalty": 1.1
56
+ },
57
+ "llm-jp/llm-jp-13b-instruct-full-jaster-dolly-oasst-v1.0": {
58
+ "model": "llm-jp/llm-jp-13b-instruct-full-jaster-dolly-oasst-v1.0",
59
+ "dtype": "int4",
60
+ "is_messages": false,
61
+ "template": "{instruction}\n{input}\n### ๅ›ž็ญ”๏ผš",
62
+ "max_new_tokens": 256,
63
+ "temperature": 0.7,
64
+ "top_p": 0.95,
65
+ "top_k": 40,
66
+ "repetition_penalty": 1.1
67
+ },
68
+ "stockmark/stockmark-13b-instruct": {
69
+ "model": "stockmark/stockmark-13b-instruct",
70
+ "dtype": "int4",
71
+ "is_messages": false,
72
+ "template": "{instruction}\n\n### Input:\n{input}\n\n### Output:",
73
+ "max_new_tokens": 256,
74
+ "temperature": 0.7,
75
+ "top_p": 0.9,
76
+ "top_k": 40,
77
+ "repetition_penalty": 1.1
78
+ }
79
+ }
models.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ def load():
4
+ with open('models.json', 'r', encoding='utf-8') as f:
5
+ models = json.load(f)
6
+
7
+ return models
8
+
9
+ def get_examples():
10
+ examples = []
11
+ for model in models.keys():
12
+ examples.append(list(models[model].values()))
13
+
14
+ return examples
15
+
16
+ def get_head_options():
17
+ return models[list(models.keys())[0]]
18
+
19
+ models = load()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ accelerate
5
+ sentencepiece
6
+ bitsandbytes
7
+ scipy
8
+ tiktoken
9
+ einops
10
+ transformers_stream_generator
11
+ protobuf
12
+ python-multipart
venv.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/bash
2
+
3
+ python3 -m venv venv
4
+ curl -kL https://bootstrap.pypa.io/get-pip.py | venv/bin/python
5
+
6
+ venv/bin/python -m pip install gradio
7
+ venv/bin/python -m pip install -r requirements.txt