inflaton commited on
Commit
7fded8d
1 Parent(s): 43e183b
Files changed (2) hide show
  1. app_modules/llm_loader.py +2 -2
  2. tgi.sh +1 -3
app_modules/llm_loader.py CHANGED
@@ -298,7 +298,7 @@ class LLMLoader:
298
  config=config,
299
  quantization_config=double_quant_config,
300
  trust_remote_code=True,
301
- token=token,
302
  )
303
  if is_t5
304
  else AutoModelForCausalLM.from_pretrained(
@@ -306,7 +306,7 @@ class LLMLoader:
306
  config=config,
307
  quantization_config=double_quant_config,
308
  trust_remote_code=True,
309
- token=token,
310
  )
311
  )
312
 
 
298
  config=config,
299
  quantization_config=double_quant_config,
300
  trust_remote_code=True,
301
+ use_auth_token=token,
302
  )
303
  if is_t5
304
  else AutoModelForCausalLM.from_pretrained(
 
306
  config=config,
307
  quantization_config=double_quant_config,
308
  trust_remote_code=True,
309
+ use_auth_token=token,
310
  )
311
  )
312
 
tgi.sh CHANGED
@@ -9,9 +9,7 @@ uname -a
9
 
10
  . env/tgi.conf
11
 
12
- export MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
13
- export QUANTIZE="--quantize bitsandbytes-fp4"
14
-
15
  echo Running $MODEL_ID with TGI
16
 
17
  text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
 
 
9
 
10
  . env/tgi.conf
11
 
 
 
 
12
  echo Running $MODEL_ID with TGI
13
 
14
  text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
15
+