moriire commited on
Commit
ef7bf1f
1 Parent(s): 22c75d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -10,9 +10,12 @@ from pydantic import BaseModel
10
 
11
  class GenModel(BaseModel):
12
  question: str
13
- system: str = "You are a helpful medical assistant."
14
  temperature: float = 0.8
15
- seed: int = 101
 
 
 
16
 
17
  llm_chat = llama_cpp.Llama.from_pretrained(
18
  repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
@@ -30,6 +33,9 @@ llm_generate = llama_cpp.Llama.from_pretrained(
30
  verbose=False,
31
  n_ctx=4096,
32
  n_gpu_layers=0,
 
 
 
33
  #chat_format="llama-2"
34
  )
35
  # Logger setup
@@ -40,7 +46,13 @@ app = fastapi.FastAPI(
40
  title="OpenGenAI",
41
  description="Your Excellect AI Physician")
42
 
43
-
 
 
 
 
 
 
44
  @app.get("/")
45
  def index():
46
  return fastapi.responses.RedirectResponse(url="/docs")
@@ -66,17 +78,7 @@ async def chat(gen:GenModel):
66
  )
67
  messages.append({"role": "user", "content": gen.question},)
68
  print(output)
69
- """
70
- for chunk in output:
71
-
72
- delta = chunk['choices'][0]['delta']
73
- if 'role' in delta:
74
- print(delta['role'], end=': ')
75
- elif 'content' in delta:
76
- print(delta['content'], end='')
77
-
78
- print(chunk)
79
- """
80
  et = time()
81
  output["time"] = et - st
82
  messages.append({'role': "assistant", "content": output['choices'][0]['message']})
@@ -96,16 +98,17 @@ async def generate(gen:GenModel):
96
  gen.seed: int = 42
97
  try:
98
  st = time()
99
- output = llm_generate.create_chat_completion(
100
  messages=[
101
  {"role": "system", "content": gen.system},
102
  {"role": "user", "content": gen.question},
103
  ],
104
  temperature = gen.temperature,
105
  seed= gen.seed,
106
- stream=True
 
107
  )
108
-
109
  for chunk in output:
110
  delta = chunk['choices'][0]['delta']
111
  if 'role' in delta:
@@ -113,7 +116,7 @@ async def generate(gen:GenModel):
113
  elif 'content' in delta:
114
  print(delta['content'], end='')
115
  #print(chunk)
116
-
117
  et = time()
118
  #output["time"] = et - st
119
  return output
 
10
 
11
  class GenModel(BaseModel):
12
  question: str
13
+ system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
14
  temperature: float = 0.8
15
+ seed: int = 101,
16
+ mirostat_mode=2,
17
+ mirostat_tau=4.0,
18
+ mirostat_eta=1.1
19
 
20
  llm_chat = llama_cpp.Llama.from_pretrained(
21
  repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
 
33
  verbose=False,
34
  n_ctx=4096,
35
  n_gpu_layers=0,
36
+ mirostat_mode=2,
37
+ mirostat_tau=4.0,
38
+ mirostat_eta=1.1
39
  #chat_format="llama-2"
40
  )
41
  # Logger setup
 
46
  title="OpenGenAI",
47
  description="Your Excellect AI Physician")
48
 
49
+ app.add_middleware(
50
+ CORSMiddleware,
51
+ allow_origins = ["*"],
52
+ allow_credentials=True,
53
+ allow_methods=["*"],
54
+ allow_headers=["*"]
55
+ )
56
  @app.get("/")
57
  def index():
58
  return fastapi.responses.RedirectResponse(url="/docs")
 
78
  )
79
  messages.append({"role": "user", "content": gen.question},)
80
  print(output)
81
+
 
 
 
 
 
 
 
 
 
 
82
  et = time()
83
  output["time"] = et - st
84
  messages.append({'role': "assistant", "content": output['choices'][0]['message']})
 
98
  gen.seed: int = 42
99
  try:
100
  st = time()
101
+ output = llm_generate.create_completion(
102
  messages=[
103
  {"role": "system", "content": gen.system},
104
  {"role": "user", "content": gen.question},
105
  ],
106
  temperature = gen.temperature,
107
  seed= gen.seed,
108
+ #stream=True,
109
+ #echo=True
110
  )
111
+ """
112
  for chunk in output:
113
  delta = chunk['choices'][0]['delta']
114
  if 'role' in delta:
 
116
  elif 'content' in delta:
117
  print(delta['content'], end='')
118
  #print(chunk)
119
+ """
120
  et = time()
121
  #output["time"] = et - st
122
  return output