moriire commited on
Commit
654eaa0
1 Parent(s): fcb53b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -1,12 +1,25 @@
1
  import fastapi
 
2
  from fastapi.responses import JSONResponse
3
  from llama_cpp import Llama
4
  from time import time
5
  import logging
6
 
 
 
7
 
8
- MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
 
9
 
 
 
 
 
 
 
 
 
 
10
  # Logger setup
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
@@ -59,7 +72,7 @@ async def complete(
59
  ) -> dict:
60
  try:
61
  st = time()
62
- output = llm.create_chat_completion(
63
  messages=[
64
  {"role": "system", "content": system},
65
  {"role": "user", "content": question},
 
1
  import fastapi
2
+ """
3
  from fastapi.responses import JSONResponse
4
  from llama_cpp import Llama
5
  from time import time
6
  import logging
7
 
8
+ """
9
+ #MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
10
 
11
+ import llama_cpp
12
+ import llama_cpp.llama_tokenizer
13
 
14
+ llama = llama_cpp.Llama.from_pretrained(
15
+ repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
16
+ filename="*q4_0.gguf",
17
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
18
+ verbose=False,
19
+ n_ctx=4096,
20
+ n_threads=4,
21
+ n_gpu_layers=0,
22
+ )
23
  # Logger setup
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
 
72
  ) -> dict:
73
  try:
74
  st = time()
75
+ output = llama.create_chat_completion(
76
  messages=[
77
  {"role": "system", "content": system},
78
  {"role": "user", "content": question},