Update app.py
Browse files
app.py
CHANGED
@@ -8,17 +8,32 @@ import requests
|
|
8 |
import logging
|
9 |
from threading import Event
|
10 |
import tiktoken # 引入 tiktoken 库
|
|
|
11 |
|
12 |
def local_encoding_for_model(model_name: str):
|
|
|
|
|
|
|
13 |
local_encoding_path = '/app/cl100k_base.tiktoken'
|
14 |
if os.path.exists(local_encoding_path):
|
15 |
with open(local_encoding_path, 'rb') as f:
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
else:
|
18 |
raise FileNotFoundError(f"Local encoding file not found at {local_encoding_path}")
|
19 |
|
|
|
20 |
tiktoken.encoding_for_model = local_encoding_for_model
|
21 |
|
|
|
22 |
app = Flask(__name__)
|
23 |
logging.basicConfig(level=logging.INFO)
|
24 |
|
|
|
8 |
import logging
|
9 |
from threading import Event
|
10 |
import tiktoken # 引入 tiktoken 库
|
11 |
+
from tiktoken import Encoding
|
12 |
|
13 |
def local_encoding_for_model(model_name: str):
|
14 |
+
"""
|
15 |
+
从本地加载编码文件并返回一个 Encoding 对象。
|
16 |
+
"""
|
17 |
local_encoding_path = '/app/cl100k_base.tiktoken'
|
18 |
if os.path.exists(local_encoding_path):
|
19 |
with open(local_encoding_path, 'rb') as f:
|
20 |
+
encoding_data = f.read() # 读取本地编码文件的字节内容
|
21 |
+
|
22 |
+
# 构造一个 Encoding 对象
|
23 |
+
return Encoding(
|
24 |
+
name="cl100k_base", # 编码的名称
|
25 |
+
pat_str="", # 正则表达式(如果有)
|
26 |
+
mergeable_ranks={}, # 合并的 rank 数据(通常是从文件或其他地方加载)
|
27 |
+
special_tokens={}, # 特殊 token 映射
|
28 |
+
explicit_n_vocab=None # 可选的词汇表大小
|
29 |
+
)
|
30 |
else:
|
31 |
raise FileNotFoundError(f"Local encoding file not found at {local_encoding_path}")
|
32 |
|
33 |
+
# 替换 tiktoken 的 encoding_for_model 函数
|
34 |
tiktoken.encoding_for_model = local_encoding_for_model
|
35 |
|
36 |
+
|
37 |
app = Flask(__name__)
|
38 |
logging.basicConfig(level=logging.INFO)
|
39 |
|