moriire commited on
Commit
6a34b4c
1 Parent(s): f88f764

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -7,6 +7,13 @@ import llama_cpp
7
  import llama_cpp.llama_tokenizer
8
  from pydantic import BaseModel
9
 
 
 
 
 
 
 
 
10
  llama = llama_cpp.Llama.from_pretrained(
11
  repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
12
  filename="*q4_0.gguf",
@@ -56,18 +63,10 @@ def index():
56
  @app.get("/health")
57
  def health():
58
  return {"status": "ok"}
59
-
60
-
61
- class GenModel(BaseModel):
62
- question: str
63
- system: str = "You are a story writing assistant."
64
- temperature: float = 0.7
65
- seed: int = 42
66
 
67
  # Chat Completion API
68
- @app.get("/generate_stream")
69
- async def complete(gen:GenModel
70
- ) -> dict:
71
  try:
72
  st = time()
73
  output = llama.create_chat_completion(
 
7
  import llama_cpp.llama_tokenizer
8
  from pydantic import BaseModel
9
 
10
+
11
+ class GenModel(BaseModel):
12
+ question: str
13
+ system: str = "You are a story writing assistant."
14
+ temperature: float = 0.7
15
+ seed: int = 42
16
+
17
  llama = llama_cpp.Llama.from_pretrained(
18
  repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
19
  filename="*q4_0.gguf",
 
63
  @app.get("/health")
64
  def health():
65
  return {"status": "ok"}
 
 
 
 
 
 
 
66
 
67
  # Chat Completion API
68
+ @app.get("/generate_stream/")
69
+ async def complete(gen:GenModel):
 
70
  try:
71
  st = time()
72
  output = llama.create_chat_completion(