KameronB commited on
Commit
5805854
1 Parent(s): 987e9e5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +182 -2
README.md CHANGED
@@ -7,10 +7,182 @@ language:
7
  <summary>
8
  TinyBERT based model
9
  </summary>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  </details>
11
 
12
  <details>
13
  <summary>RoBERT based model</summary>
 
 
14
  ```python
15
  import torch
16
  from torch.utils.data import DataLoader, Dataset
@@ -22,14 +194,22 @@ import pandas as pd
22
  tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
23
 
24
  # Load RoBERTa pre-trained model
25
- model = RobertaForSequenceClassification.from_pretrained('KameronB/SITCC-Incident-Request-Classifier', num_labels=2)
26
- model = model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
27
 
28
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  ```
32
 
 
33
  ```python
34
 
35
  def predict_description(model, tokenizer, text, max_length=512):
 
7
  <summary>
8
  TinyBERT based model
9
  </summary>
10
+
11
+ ### Fetching the model
12
+ ```python
13
+ # Load the TinyBERT tokenizer and model
14
+ tokenizer = AutoTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
15
+ model = AutoModelForSequenceClassification.from_pretrained('huawei-noah/TinyBERT_General_4L_312D', num_labels=2)
16
+
17
+ # fetch the statedict to apply the fine-tuned weights
18
+ state_dict = torch.hub.load_state_dict_from_url(f"https://huggingface.co/KameronB/SITCC-Incident-Request-Classifier/resolve/main/tiny_bert_model.bin")
19
+ # if running on cpu
20
+ # state_dict = torch.hub.load_state_dict_from_url(f"https://huggingface.co/KameronB/SITCC-Incident-Request-Classifier/resolve/main/tiny_bert_model.bin", map_location=torch.device('cpu'))
21
+
22
+ model.load_state_dict(state_dict)
23
+
24
+ model = model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
25
+
26
+ ```
27
+
28
+
29
+ ### Using the model
30
+
31
+ ```python
32
+ def predict_description(model, tokenizer, text, max_length=512):
33
+ model.eval() # Set the model to evaluation mode
34
+
35
+ # Ensure model is on the correct device
36
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+ model = model.to(device)
38
+
39
+ # Encode the input text
40
+ inputs = tokenizer.encode_plus(
41
+ text,
42
+ None,
43
+ add_special_tokens=True,
44
+ max_length=max_length,
45
+ padding='max_length',
46
+ return_token_type_ids=False,
47
+ return_tensors='pt',
48
+ truncation=True
49
+ )
50
+
51
+ # Move tensors to the correct device
52
+ inputs = {key: value.to(device) for key, value in inputs.items()}
53
+
54
+ # Make prediction
55
+ with torch.no_grad():
56
+ outputs = model(**inputs)
57
+ logits = outputs.logits
58
+ probabilities = torch.softmax(logits, dim=-1)
59
+ predicted_class_id = torch.argmax(probabilities, dim=-1).item()
60
+
61
+ return predicted_class_id, probabilities.cpu().tolist()
62
+
63
+
64
+
65
+ #Example usage
66
+
67
+ tickets = [
68
+ """Inquiry about the possibility of customizing Docker to better meet department-specific needs.
69
+ Gathered requirements for desired customizations.""",
70
+ """We've encountered a recurring problem with DEVEnv shutting down anytime we try to save documents.
71
+ I looked over the error logs for any clues about what's going wrong. I'm passing this on to the team responsible for software upkeep."""
72
+ ]
73
+
74
+ for i, row in df.sample(frac=0.01).iterrows():
75
+ prediction, probabilities = predict_description(model, tokenizer, row['content'])
76
+ prediction = (['INCIDENT', 'TASK'])[prediction]
77
+ print(f"{prediction} ({probabilities}) <== {row['content']}")
78
+ ```
79
+
80
+ ### Additional fine-tuning
81
+
82
+ ```python
83
+
84
+ # The dataset class
85
+ class TextDataset(Dataset):
86
+ def __init__(self, descriptions, labels, tokenizer, max_len):
87
+ self.descriptions = descriptions
88
+ self.labels = labels
89
+ self.tokenizer = tokenizer
90
+ self.max_len = max_len
91
+
92
+ def __len__(self):
93
+ return len(self.descriptions)
94
+
95
+ def __getitem__(self, idx):
96
+ text = self.descriptions[idx]
97
+ inputs = self.tokenizer.encode_plus(
98
+ text,
99
+ None,
100
+ add_special_tokens=True,
101
+ max_length=self.max_len,
102
+ padding='max_length',
103
+ return_token_type_ids=False,
104
+ truncation=True
105
+ )
106
+ return {
107
+ 'input_ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
108
+ 'attention_mask': torch.tensor(inputs['attention_mask'], dtype=torch.long),
109
+ 'labels': torch.tensor(self.labels[idx], dtype=torch.long)
110
+ }
111
+
112
+ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
113
+ # load the data
114
+ df = pd.read_csv('..\\data\\final_data.csv')
115
+ df['label'] = df['type'].astype('category').cat.codes # Convert labels to category codes if they aren't already
116
+
117
+ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
118
+ # create the training and validation sets and data loaders
119
+ print( "cuda is available" if torch.cuda.is_available() else "cuda is unavailable: running on cpu")
120
+
121
+ # Split the data into training and validation sets
122
+ train_df, val_df = train_test_split(df, test_size=0.15)
123
+
124
+ # Create PyTorch datasets
125
+ train_dataset = TextDataset(train_df['content'].tolist(), train_df['label'].tolist(), tokenizer, max_len=512)
126
+ val_dataset = TextDataset(val_df['content'].tolist(), val_df['label'].tolist(), tokenizer, max_len=512)
127
+
128
+ # Create data loaders
129
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
130
+ val_loader = DataLoader(val_dataset, batch_size=32)
131
+
132
+ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
133
+ # Train the model
134
+
135
+ # only these layers will be trained, customize this to your liking to freeze the ones you dont want to retrain
136
+ training_layers = [
137
+ "bert.encoder.layer.3.output.dense.weight",
138
+ "bert.encoder.layer.3.output.dense.bias",
139
+ "bert.encoder.layer.3.output.LayerNorm.weight",
140
+ "bert.encoder.layer.3.output.LayerNorm.bias",
141
+ "bert.pooler.dense.weight",
142
+ "bert.pooler.dense.bias",
143
+ "classifier.weight",
144
+ "classifier.bias",
145
+ ]
146
+
147
+ for name, param in model.named_parameters():
148
+ if name not in training_layers: # Freeze layers that are not part of the classifier
149
+ param.requires_grad = False
150
+
151
+ # Training setup
152
+ optimizer = AdamW(model.parameters(), lr=5e-5)
153
+ epochs = 2
154
+
155
+ for epoch in range(epochs):
156
+ model.train()
157
+ loss_item = float('+inf')
158
+ for batch in tqdm(train_loader, desc=f"Training Loss: {loss_item}"):
159
+ batch = {k: v.to(model.device) for k, v in batch.items()}
160
+ outputs = model(**batch)
161
+ loss = outputs.loss
162
+ loss.backward()
163
+ optimizer.step()
164
+ optimizer.zero_grad()
165
+ loss_item = loss.item()
166
+
167
+ model.eval()
168
+ total_eval_accuracy = 0
169
+ for batch in tqdm(val_loader, desc=f"Validation Accuracy: {total_eval_accuracy}"):
170
+ batch = {k: v.to(model.device) for k, v in batch.items()}
171
+ with torch.no_grad():
172
+ outputs = model(**batch)
173
+ logits = outputs.logits
174
+ predictions = torch.argmax(logits, dim=-1)
175
+ accuracy = (predictions == batch['labels']).cpu().numpy().mean()
176
+ total_eval_accuracy += accuracy
177
+
178
+ print(f"Validation Accuracy: {total_eval_accuracy / len(val_loader)}")
179
+ ```
180
  </details>
181
 
182
  <details>
183
  <summary>RoBERT based model</summary>
184
+
185
+ ### Base model
186
  ```python
187
  import torch
188
  from torch.utils.data import DataLoader, Dataset
 
194
  tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
195
 
196
  # Load RoBERTa pre-trained model
197
+ model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
 
198
 
199
 
200
+ # fetch the statedict to apply the fine-tuned weights
201
+ state_dict = torch.hub.load_state_dict_from_url(f"https://huggingface.co/KameronB/SITCC-Incident-Request-Classifier/resolve/main/pytorch_model.bin")
202
+ # if running on cpu
203
+ # state_dict = torch.hub.load_state_dict_from_url(f"https://huggingface.co/KameronB/SITCC-Incident-Request-Classifier/resolve/main/pytorch_model.bin", map_location=torch.device('cpu'))
204
+
205
+ model.load_state_dict(state_dict)
206
+
207
+ model = model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
208
 
209
 
210
  ```
211
 
212
+ ### Use model to make predictions
213
  ```python
214
 
215
  def predict_description(model, tokenizer, text, max_length=512):