Spaces:
Running
on
Zero
Running
on
Zero
Upload 2 files
Browse files- app.py +1 -1
- llmdolphin.py +1 -3
app.py
CHANGED
@@ -42,7 +42,7 @@ with gr.Blocks(theme="NoCrypt/miku@>=1.2.2", fill_width=True, css="") as app:
|
|
42 |
with gr.Accordion("Additional inputs", open=False):
|
43 |
chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
|
44 |
chat_sysmsg = gr.Textbox(value=get_dolphin_sysprompt(), label="System message")
|
45 |
-
chat_tokens = gr.Slider(minimum=1, maximum=4096, value=
|
46 |
chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
47 |
chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
48 |
chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
|
|
42 |
with gr.Accordion("Additional inputs", open=False):
|
43 |
chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
|
44 |
chat_sysmsg = gr.Textbox(value=get_dolphin_sysprompt(), label="System message")
|
45 |
+
chat_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
|
46 |
chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
47 |
chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
48 |
chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
llmdolphin.py
CHANGED
@@ -694,8 +694,7 @@ def dolphin_respond(
|
|
694 |
flash_attn=True,
|
695 |
n_gpu_layers=81, # 81
|
696 |
n_batch=1024,
|
697 |
-
n_ctx=
|
698 |
-
n_threads=8,
|
699 |
)
|
700 |
provider = LlamaCppPythonProvider(llm)
|
701 |
|
@@ -790,7 +789,6 @@ def dolphin_respond_auto(
|
|
790 |
n_gpu_layers=81, # 81
|
791 |
n_batch=1024,
|
792 |
n_ctx=4096, #8192
|
793 |
-
n_threads=8,
|
794 |
)
|
795 |
provider = LlamaCppPythonProvider(llm)
|
796 |
|
|
|
694 |
flash_attn=True,
|
695 |
n_gpu_layers=81, # 81
|
696 |
n_batch=1024,
|
697 |
+
n_ctx=8192, #8192
|
|
|
698 |
)
|
699 |
provider = LlamaCppPythonProvider(llm)
|
700 |
|
|
|
789 |
n_gpu_layers=81, # 81
|
790 |
n_batch=1024,
|
791 |
n_ctx=4096, #8192
|
|
|
792 |
)
|
793 |
provider = LlamaCppPythonProvider(llm)
|
794 |
|