Update app.py
Browse files
app.py
CHANGED
@@ -9,11 +9,6 @@ import evaluate
|
|
9 |
import numpy as np
|
10 |
import random
|
11 |
|
12 |
-
tokenizer = None
|
13 |
-
|
14 |
-
def preprocess_function(examples):
|
15 |
-
return tokenizer(examples["text"], padding="max_length", truncation=True)
|
16 |
-
|
17 |
def process(model_id, dataset):
|
18 |
# Step 1: Load dataset
|
19 |
dataset_imdb = load_dataset(dataset)
|
@@ -39,11 +34,11 @@ def process(model_id, dataset):
|
|
39 |
|
40 |
# Step 3: Text tokenization
|
41 |
|
42 |
-
|
43 |
|
44 |
# Step 4: Apply tokenization to dataset
|
45 |
|
46 |
-
tokenized_imdb = dataset_imdb.map(
|
47 |
|
48 |
#Step 5: Fine-tune the model
|
49 |
|
|
|
9 |
import numpy as np
|
10 |
import random
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
def process(model_id, dataset):
|
13 |
# Step 1: Load dataset
|
14 |
dataset_imdb = load_dataset(dataset)
|
|
|
34 |
|
35 |
# Step 3: Text tokenization
|
36 |
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
38 |
|
39 |
# Step 4: Apply tokenization to dataset
|
40 |
|
41 |
+
tokenized_imdb = dataset_imdb.map(tokenizer(examples["text"], padding="max_length", truncation=True), batched=True)
|
42 |
|
43 |
#Step 5: Fine-tune the model
|
44 |
|