Spaces:
Running
on
Zero
Running
on
Zero
Upload llmdolphin.py
Browse files- llmdolphin.py +2 -2
llmdolphin.py
CHANGED
@@ -692,7 +692,7 @@ def dolphin_respond(
|
|
692 |
llm = Llama(
|
693 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
694 |
flash_attn=True,
|
695 |
-
n_gpu_layers=
|
696 |
n_batch=1024,
|
697 |
n_ctx=4096, #8192
|
698 |
n_threads=8,
|
@@ -787,7 +787,7 @@ def dolphin_respond_auto(
|
|
787 |
llm = Llama(
|
788 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
789 |
flash_attn=True,
|
790 |
-
n_gpu_layers=
|
791 |
n_batch=1024,
|
792 |
n_ctx=4096, #8192
|
793 |
n_threads=8,
|
|
|
692 |
llm = Llama(
|
693 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
694 |
flash_attn=True,
|
695 |
+
n_gpu_layers=40, # 81
|
696 |
n_batch=1024,
|
697 |
n_ctx=4096, #8192
|
698 |
n_threads=8,
|
|
|
787 |
llm = Llama(
|
788 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
789 |
flash_attn=True,
|
790 |
+
n_gpu_layers=40, # 81
|
791 |
n_batch=1024,
|
792 |
n_ctx=4096, #8192
|
793 |
n_threads=8,
|