Spaces:
Sleeping
Sleeping
Luke Stanley
commited on
Commit
•
9475016
1
Parent(s):
976ea17
Ensure N_GPU_LAYERS is int
Browse files
utils.py
CHANGED
@@ -19,7 +19,7 @@ from llama_cpp import Llama, LlamaGrammar, json_schema_to_gbnf
|
|
19 |
URL = "http://localhost:5834/v1/chat/completions"
|
20 |
in_memory_llm = None
|
21 |
|
22 |
-
N_GPU_LAYERS = env.get("N_GPU_LAYERS",
|
23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
@@ -147,3 +147,27 @@ def query_ai_prompt(prompt, replacements, model_class, in_memory=True):
|
|
147 |
return llm_stream_sans_network(prompt, model_class)
|
148 |
else:
|
149 |
return llm_streaming(prompt, model_class)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
URL = "http://localhost:5834/v1/chat/completions"
|
20 |
in_memory_llm = None
|
21 |
|
22 |
+
N_GPU_LAYERS = int(env.get("N_GPU_LAYERS", 20)) # Default to -1, which means use all layers if available
|
23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
|
|
147 |
return llm_stream_sans_network(prompt, model_class)
|
148 |
else:
|
149 |
return llm_streaming(prompt, model_class)
|
150 |
+
|
151 |
+
|
152 |
+
def llm_stream_sans_network_simple(
|
153 |
+
prompt: str, json_schema:str
|
154 |
+
):
|
155 |
+
grammar = LlamaGrammar.from_json_schema(json_schema)
|
156 |
+
|
157 |
+
stream = in_memory_llm(
|
158 |
+
prompt,
|
159 |
+
max_tokens=MAX_TOKENS,
|
160 |
+
temperature=TEMPERATURE,
|
161 |
+
grammar=grammar,
|
162 |
+
stream=True
|
163 |
+
)
|
164 |
+
|
165 |
+
output_text = ""
|
166 |
+
for chunk in stream:
|
167 |
+
result = chunk["choices"][0]
|
168 |
+
print(result["text"], end='', flush=True)
|
169 |
+
output_text = output_text + result["text"]
|
170 |
+
#yield result["text"]
|
171 |
+
|
172 |
+
print('\n')
|
173 |
+
return output_text
|