John6666 commited on
Commit
8f20fda
β€’
1 Parent(s): 7d22dbb

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. llmdolphin.py +1 -3
app.py CHANGED
@@ -42,7 +42,7 @@ with gr.Blocks(theme="NoCrypt/miku@>=1.2.2", fill_width=True, css="") as app:
42
  with gr.Accordion("Additional inputs", open=False):
43
  chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
44
  chat_sysmsg = gr.Textbox(value=get_dolphin_sysprompt(), label="System message")
45
- chat_tokens = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max tokens")
46
  chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
47
  chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
48
  chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
 
42
  with gr.Accordion("Additional inputs", open=False):
43
  chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
44
  chat_sysmsg = gr.Textbox(value=get_dolphin_sysprompt(), label="System message")
45
+ chat_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
46
  chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
47
  chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
48
  chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
llmdolphin.py CHANGED
@@ -694,8 +694,7 @@ def dolphin_respond(
694
  flash_attn=True,
695
  n_gpu_layers=81, # 81
696
  n_batch=1024,
697
- n_ctx=4096, #8192
698
- n_threads=8,
699
  )
700
  provider = LlamaCppPythonProvider(llm)
701
 
@@ -790,7 +789,6 @@ def dolphin_respond_auto(
790
  n_gpu_layers=81, # 81
791
  n_batch=1024,
792
  n_ctx=4096, #8192
793
- n_threads=8,
794
  )
795
  provider = LlamaCppPythonProvider(llm)
796
 
 
694
  flash_attn=True,
695
  n_gpu_layers=81, # 81
696
  n_batch=1024,
697
+ n_ctx=8192, #8192
 
698
  )
699
  provider = LlamaCppPythonProvider(llm)
700
 
 
789
  n_gpu_layers=81, # 81
790
  n_batch=1024,
791
  n_ctx=4096, #8192
 
792
  )
793
  provider = LlamaCppPythonProvider(llm)
794