Lam-Hung commited on
Commit
1bc4eb9
1 Parent(s): 33bc3b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -7,6 +7,8 @@ import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
9
 
 
 
10
  MAX_MAX_NEW_TOKENS = 2048
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -14,11 +16,12 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
14
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
15
 
16
  model_id = "google/gemma-2-9b-it"
17
- tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_id,
20
  device_map="auto",
21
  torch_dtype=torch.bfloat16,
 
22
  )
23
  model.config.sliding_window = 4096
24
  model.eval()
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
9
 
10
+ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
11
+
12
  MAX_MAX_NEW_TOKENS = 2048
13
  DEFAULT_MAX_NEW_TOKENS = 1024
14
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
16
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
17
 
18
  model_id = "google/gemma-2-9b-it"
19
+ tokenizer = GemmaTokenizerFast.from_pretrained(model_id, token = huggingface_token)
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_id,
22
  device_map="auto",
23
  torch_dtype=torch.bfloat16,
24
+ token = huggingface_token
25
  )
26
  model.config.sliding_window = 4096
27
  model.eval()