Alejadro Sanchez-Giraldo commited on
Commit
6e61bae
1 Parent(s): 0c868d2

add train model base code

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. requirements.txt +2 -1
  3. training/train.py +41 -0
README.md CHANGED
@@ -24,6 +24,6 @@ docker run -it -p 8501:8501 --platform=linux/amd64 \
24
 
25
  ### API
26
 
27
- docker run -it -p 8501:8501 --platform=linux/amd64 \
28
  -e LAUNCHDARKLY_SDK_KEY="sdk-142d656c-d430-4f8c-b2f1-7275f2ec65ff" \
29
  registry.hf.space/asgface-sentimentai:latest python api.py
 
24
 
25
  ### API
26
 
27
+ docker run -it -p 5001:5000 --platform=linux/amd64 \
28
  -e LAUNCHDARKLY_SDK_KEY="sdk-142d656c-d430-4f8c-b2f1-7275f2ec65ff" \
29
  registry.hf.space/asgface-sentimentai:latest python api.py
requirements.txt CHANGED
@@ -2,4 +2,5 @@ streamlit
2
  transformers
3
  launchdarkly-server-sdk
4
  torch
5
- Flask
 
 
2
  transformers
3
  launchdarkly-server-sdk
4
  torch
5
+ Flask
6
+ datasets
training/train.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AlbertForSequenceClassification, AlbertTokenizer, Trainer, TrainingArguments
2
+ from datasets import load_dataset
3
+
4
+ # Load a dataset (replace with your dataset)
5
+ dataset = load_dataset("text", data_files={"train": "path/to/train.txt", "test": "path/to/test.txt"})
6
+
7
+ # Preprocess the dataset (tokenization, formatting, etc.)
8
+ def preprocess_function(examples):
9
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
10
+
11
+ tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
12
+ tokenized_dataset = dataset.map(preprocess_function, batched=True)
13
+
14
+ # Load the model
15
+ model = AlbertForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2) # Adjust num_labels as needed
16
+
17
+ # Define training arguments
18
+ training_args = TrainingArguments(
19
+ output_dir="./results",
20
+ num_train_epochs=3,
21
+ per_device_train_batch_size=8,
22
+ per_device_eval_batch_size=8,
23
+ warmup_steps=500,
24
+ weight_decay=0.01,
25
+ evaluate_during_training=True,
26
+ logging_dir="./logs",
27
+ )
28
+
29
+ # Initialize the Trainer
30
+ trainer = Trainer(
31
+ model=model,
32
+ args=training_args,
33
+ train_dataset=tokenized_dataset["train"],
34
+ eval_dataset=tokenized_dataset["test"]
35
+ )
36
+
37
+ # Train the model
38
+ trainer.train()
39
+
40
+ # Save the fine-tuned model
41
+ model.save_pretrained("path/to/save/model")