bstraehle commited on
Commit
df16a07
1 Parent(s): cbf9d56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -7
app.py CHANGED
@@ -9,11 +9,6 @@ import evaluate
9
  import numpy as np
10
  import random
11
 
12
- tokenizer = None
13
-
14
- def preprocess_function(examples):
15
- return tokenizer(examples["text"], padding="max_length", truncation=True)
16
-
17
  def process(model_id, dataset):
18
  # Step 1: Load dataset
19
  dataset_imdb = load_dataset(dataset)
@@ -39,11 +34,11 @@ def process(model_id, dataset):
39
 
40
  # Step 3: Text tokenization
41
 
42
- global tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
43
 
44
  # Step 4: Apply tokenization to dataset
45
 
46
- tokenized_imdb = dataset_imdb.map(preprocess_function, batched=True)
47
 
48
  #Step 5: Fine-tune the model
49
 
 
9
  import numpy as np
10
  import random
11
 
 
 
 
 
 
12
  def process(model_id, dataset):
13
  # Step 1: Load dataset
14
  dataset_imdb = load_dataset(dataset)
 
34
 
35
  # Step 3: Text tokenization
36
 
37
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
38
 
39
  # Step 4: Apply tokenization to dataset
40
 
41
+ tokenized_imdb = dataset_imdb.map(tokenizer(examples["text"], padding="max_length", truncation=True), batched=True)
42
 
43
  #Step 5: Fine-tune the model
44