jenslemmens commited on
Commit
eb59411
1 Parent(s): c388d6d

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -5
  2. all_results.json +12 -11
  3. eval_results.json +8 -7
  4. train_results.json +4 -4
  5. trainer_state.json +80 -183
README.md CHANGED
@@ -19,11 +19,11 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  This model is a fine-tuned version of [DTAI-KULeuven/robbert-2023-dutch-base](https://huggingface.co/DTAI-KULeuven/robbert-2023-dutch-base) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.6600
23
- - Precision: 0.6285
24
- - Recall: 0.6293
25
- - Fscore: 0.6282
26
- - Accuracy: 0.6290
27
 
28
  ## Model description
29
 
 
19
 
20
  This model is a fine-tuned version of [DTAI-KULeuven/robbert-2023-dutch-base](https://huggingface.co/DTAI-KULeuven/robbert-2023-dutch-base) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.6591
23
+ - Precision: 0.6282
24
+ - Recall: 0.6290
25
+ - Fscore: 0.6278
26
+ - Accuracy: 0.6285
27
 
28
  ## Model description
29
 
all_results.json CHANGED
@@ -1,16 +1,17 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_fscore": 0.6321511851241228,
4
- "eval_loss": 0.6710947155952454,
5
- "eval_precision": 0.6327920074497231,
6
- "eval_recall": 0.6336155271716182,
7
- "eval_runtime": 2673.8432,
 
8
  "eval_samples": 834159,
9
- "eval_samples_per_second": 311.97,
10
- "eval_steps_per_second": 19.498,
11
- "train_loss": 0.5972718149742211,
12
- "train_runtime": 22981.0662,
13
  "train_samples": 868047,
14
- "train_samples_per_second": 37.772,
15
- "train_steps_per_second": 0.59
16
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.6284593225032638,
4
+ "eval_fscore": 0.6277856730232845,
5
+ "eval_loss": 0.659059464931488,
6
+ "eval_precision": 0.6282084760632409,
7
+ "eval_recall": 0.6289991449960104,
8
+ "eval_runtime": 939.6401,
9
  "eval_samples": 834159,
10
+ "eval_samples_per_second": 887.743,
11
+ "eval_steps_per_second": 13.871,
12
+ "train_loss": 0.6080337204928948,
13
+ "train_runtime": 5529.9878,
14
  "train_samples": 868047,
15
+ "train_samples_per_second": 156.971,
16
+ "train_steps_per_second": 1.226
17
  }
eval_results.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_fscore": 0.6321511851241228,
4
- "eval_loss": 0.6710947155952454,
5
- "eval_precision": 0.6327920074497231,
6
- "eval_recall": 0.6336155271716182,
7
- "eval_runtime": 2673.8432,
 
8
  "eval_samples": 834159,
9
- "eval_samples_per_second": 311.97,
10
- "eval_steps_per_second": 19.498
11
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.6284593225032638,
4
+ "eval_fscore": 0.6277856730232845,
5
+ "eval_loss": 0.659059464931488,
6
+ "eval_precision": 0.6282084760632409,
7
+ "eval_recall": 0.6289991449960104,
8
+ "eval_runtime": 939.6401,
9
  "eval_samples": 834159,
10
+ "eval_samples_per_second": 887.743,
11
+ "eval_steps_per_second": 13.871
12
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5972718149742211,
4
- "train_runtime": 22981.0662,
5
  "train_samples": 868047,
6
- "train_samples_per_second": 37.772,
7
- "train_steps_per_second": 0.59
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6080337204928948,
4
+ "train_runtime": 5529.9878,
5
  "train_samples": 868047,
6
+ "train_samples_per_second": 156.971,
7
+ "train_steps_per_second": 1.226
8
  }
trainer_state.json CHANGED
@@ -1,247 +1,144 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9999815678395665,
5
- "eval_steps": 2500,
6
- "global_step": 13563,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.04,
13
- "learning_rate": 9.634299196342994e-06,
14
- "loss": 0.6609,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.07,
19
- "learning_rate": 9.267861092678611e-06,
20
- "loss": 0.6467,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.11,
25
- "learning_rate": 8.899211088992112e-06,
26
- "loss": 0.6414,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.15,
31
- "learning_rate": 8.530561085305612e-06,
32
- "loss": 0.6322,
33
  "step": 2000
34
  },
35
- {
36
- "epoch": 0.18,
37
- "learning_rate": 8.161911081619111e-06,
38
- "loss": 0.6266,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.18,
43
- "eval_fscore": 0.6023351654273126,
44
- "eval_loss": 0.6601094007492065,
45
- "eval_precision": 0.6348626173773073,
46
- "eval_recall": 0.6216278871239599,
47
- "eval_runtime": 2683.597,
48
- "eval_samples_per_second": 310.836,
49
- "eval_steps_per_second": 19.427,
50
- "step": 2500
51
- },
52
- {
53
- "epoch": 0.22,
54
- "learning_rate": 7.793261077932612e-06,
55
- "loss": 0.6193,
56
- "step": 3000
57
- },
58
- {
59
- "epoch": 0.26,
60
- "learning_rate": 7.424611074246111e-06,
61
- "loss": 0.6134,
62
- "step": 3500
63
- },
64
  {
65
  "epoch": 0.29,
66
- "learning_rate": 7.055961070559611e-06,
67
- "loss": 0.6069,
68
- "step": 4000
69
- },
70
- {
71
- "epoch": 0.33,
72
- "learning_rate": 6.687311066873111e-06,
73
- "loss": 0.6043,
74
- "step": 4500
75
- },
76
- {
77
- "epoch": 0.37,
78
- "learning_rate": 6.318661063186611e-06,
79
- "loss": 0.6008,
80
- "step": 5000
81
  },
82
  {
83
  "epoch": 0.37,
84
- "eval_fscore": 0.6295517813114804,
85
- "eval_loss": 0.6481114625930786,
86
- "eval_precision": 0.6323627402469738,
87
- "eval_recall": 0.6327219192535836,
88
- "eval_runtime": 2681.3189,
89
- "eval_samples_per_second": 311.1,
90
- "eval_steps_per_second": 19.444,
91
- "step": 5000
92
- },
93
- {
94
- "epoch": 0.41,
95
- "learning_rate": 5.950011059500111e-06,
96
- "loss": 0.5989,
97
- "step": 5500
98
  },
99
  {
100
  "epoch": 0.44,
101
- "learning_rate": 5.581361055813611e-06,
102
- "loss": 0.6005,
103
- "step": 6000
104
- },
105
- {
106
- "epoch": 0.48,
107
- "learning_rate": 5.212711052127111e-06,
108
- "loss": 0.5894,
109
- "step": 6500
110
  },
111
  {
112
  "epoch": 0.52,
113
- "learning_rate": 4.8440610484406105e-06,
114
- "loss": 0.5871,
115
- "step": 7000
116
- },
117
- {
118
- "epoch": 0.55,
119
- "learning_rate": 4.475411044754111e-06,
120
- "loss": 0.5925,
121
- "step": 7500
122
- },
123
- {
124
- "epoch": 0.55,
125
- "eval_fscore": 0.6292509682845455,
126
- "eval_loss": 0.6569201946258545,
127
- "eval_precision": 0.6303328699646444,
128
- "eval_recall": 0.6311007791866495,
129
- "eval_runtime": 2678.5974,
130
- "eval_samples_per_second": 311.416,
131
- "eval_steps_per_second": 19.464,
132
- "step": 7500
133
  },
134
  {
135
  "epoch": 0.59,
136
- "learning_rate": 4.1074983410749835e-06,
137
- "loss": 0.5859,
138
- "step": 8000
139
  },
140
  {
141
- "epoch": 0.63,
142
- "learning_rate": 3.7388483373884836e-06,
143
- "loss": 0.5817,
144
- "step": 8500
 
 
 
 
 
 
145
  },
146
  {
147
  "epoch": 0.66,
148
- "learning_rate": 3.370935633709357e-06,
149
- "loss": 0.5819,
150
- "step": 9000
151
- },
152
- {
153
- "epoch": 0.7,
154
- "learning_rate": 3.0022856300228566e-06,
155
- "loss": 0.5812,
156
- "step": 9500
157
- },
158
- {
159
- "epoch": 0.74,
160
- "learning_rate": 2.6336356263363567e-06,
161
- "loss": 0.5824,
162
- "step": 10000
163
  },
164
  {
165
  "epoch": 0.74,
166
- "eval_fscore": 0.6275101578247589,
167
- "eval_loss": 0.6671798825263977,
168
- "eval_precision": 0.6346007218237036,
169
- "eval_recall": 0.6333714931104397,
170
- "eval_runtime": 2680.7422,
171
- "eval_samples_per_second": 311.167,
172
- "eval_steps_per_second": 19.448,
173
- "step": 10000
174
- },
175
- {
176
- "epoch": 0.77,
177
- "learning_rate": 2.2649856226498564e-06,
178
- "loss": 0.5762,
179
- "step": 10500
180
  },
181
  {
182
  "epoch": 0.81,
183
- "learning_rate": 1.8963356189633564e-06,
184
- "loss": 0.5709,
185
- "step": 11000
186
- },
187
- {
188
- "epoch": 0.85,
189
- "learning_rate": 1.5276856152768563e-06,
190
- "loss": 0.5717,
191
- "step": 11500
192
  },
193
  {
194
  "epoch": 0.88,
195
- "learning_rate": 1.159035611590356e-06,
196
- "loss": 0.5687,
197
- "step": 12000
198
- },
199
- {
200
- "epoch": 0.92,
201
- "learning_rate": 7.903856079038562e-07,
202
- "loss": 0.5661,
203
- "step": 12500
204
  },
205
  {
206
- "epoch": 0.92,
207
- "eval_fscore": 0.6327639400151716,
208
- "eval_loss": 0.672634482383728,
209
- "eval_precision": 0.6340222342544792,
210
- "eval_recall": 0.6347830614243537,
211
- "eval_runtime": 2680.9501,
212
- "eval_samples_per_second": 311.143,
213
- "eval_steps_per_second": 19.446,
214
- "step": 12500
 
215
  },
216
  {
217
  "epoch": 0.96,
218
- "learning_rate": 4.2247290422472906e-07,
219
- "loss": 0.5723,
220
- "step": 13000
221
- },
222
- {
223
- "epoch": 1.0,
224
- "learning_rate": 5.3822900538229014e-08,
225
- "loss": 0.5705,
226
- "step": 13500
227
  },
228
  {
229
  "epoch": 1.0,
230
- "step": 13563,
231
- "total_flos": 2.0223656129308262e+17,
232
- "train_loss": 0.5972718149742211,
233
- "train_runtime": 22981.0662,
234
- "train_samples_per_second": 37.772,
235
- "train_steps_per_second": 0.59
236
  }
237
  ],
238
  "logging_steps": 500,
239
- "max_steps": 13563,
240
  "num_input_tokens_seen": 0,
241
  "num_train_epochs": 1,
242
  "save_steps": 500,
243
- "total_flos": 2.0223656129308262e+17,
244
- "train_batch_size": 16,
245
  "trial_name": null,
246
  "trial_params": null
247
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 2000,
6
+ "global_step": 6782,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.07,
13
+ "learning_rate": 9.26570333235034e-06,
14
+ "loss": 0.6511,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.15,
19
+ "learning_rate": 8.529932173400177e-06,
20
+ "loss": 0.6334,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.22,
25
+ "learning_rate": 7.792686523149514e-06,
26
+ "loss": 0.6255,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 0.29,
31
+ "learning_rate": 7.05544087289885e-06,
32
+ "loss": 0.616,
33
  "step": 2000
34
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  {
36
  "epoch": 0.29,
37
+ "eval_accuracy": 0.6266611041779805,
38
+ "eval_fscore": 0.6266318238581281,
39
+ "eval_loss": 0.6498423218727112,
40
+ "eval_precision": 0.6295307854237061,
41
+ "eval_recall": 0.629852593471248,
42
+ "eval_runtime": 927.8313,
43
+ "eval_samples_per_second": 899.042,
44
+ "eval_steps_per_second": 14.048,
45
+ "step": 2000
 
 
 
 
 
 
46
  },
47
  {
48
  "epoch": 0.37,
49
+ "learning_rate": 6.318195222648187e-06,
50
+ "loss": 0.6115,
51
+ "step": 2500
 
 
 
 
 
 
 
 
 
 
 
52
  },
53
  {
54
  "epoch": 0.44,
55
+ "learning_rate": 5.580949572397524e-06,
56
+ "loss": 0.6085,
57
+ "step": 3000
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 0.52,
61
+ "learning_rate": 4.84370392214686e-06,
62
+ "loss": 0.6009,
63
+ "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  },
65
  {
66
  "epoch": 0.59,
67
+ "learning_rate": 4.106458271896196e-06,
68
+ "loss": 0.6033,
69
+ "step": 4000
70
  },
71
  {
72
+ "epoch": 0.59,
73
+ "eval_accuracy": 0.6227865430931033,
74
+ "eval_fscore": 0.6227524751090208,
75
+ "eval_loss": 0.6584447622299194,
76
+ "eval_precision": 0.627835745995615,
77
+ "eval_recall": 0.627405168660975,
78
+ "eval_runtime": 922.5319,
79
+ "eval_samples_per_second": 904.206,
80
+ "eval_steps_per_second": 14.129,
81
+ "step": 4000
82
  },
83
  {
84
  "epoch": 0.66,
85
+ "learning_rate": 3.3692126216455325e-06,
86
+ "loss": 0.5969,
87
+ "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
88
  },
89
  {
90
  "epoch": 0.74,
91
+ "learning_rate": 2.6334414626953703e-06,
92
+ "loss": 0.5968,
93
+ "step": 5000
 
 
 
 
 
 
 
 
 
 
 
94
  },
95
  {
96
  "epoch": 0.81,
97
+ "learning_rate": 1.897670303745208e-06,
98
+ "loss": 0.5922,
99
+ "step": 5500
 
 
 
 
 
 
100
  },
101
  {
102
  "epoch": 0.88,
103
+ "learning_rate": 1.1604246534945445e-06,
104
+ "loss": 0.5896,
105
+ "step": 6000
 
 
 
 
 
 
106
  },
107
  {
108
+ "epoch": 0.88,
109
+ "eval_accuracy": 0.6289963903764151,
110
+ "eval_fscore": 0.6282163376402503,
111
+ "eval_loss": 0.659950852394104,
112
+ "eval_precision": 0.6285056225987752,
113
+ "eval_recall": 0.6292816037736055,
114
+ "eval_runtime": 919.1018,
115
+ "eval_samples_per_second": 907.581,
116
+ "eval_steps_per_second": 14.181,
117
+ "step": 6000
118
  },
119
  {
120
  "epoch": 0.96,
121
+ "learning_rate": 4.231790032438809e-07,
122
+ "loss": 0.5894,
123
+ "step": 6500
 
 
 
 
 
 
124
  },
125
  {
126
  "epoch": 1.0,
127
+ "step": 6782,
128
+ "total_flos": 5.709819056802048e+16,
129
+ "train_loss": 0.6080337204928948,
130
+ "train_runtime": 5529.9878,
131
+ "train_samples_per_second": 156.971,
132
+ "train_steps_per_second": 1.226
133
  }
134
  ],
135
  "logging_steps": 500,
136
+ "max_steps": 6782,
137
  "num_input_tokens_seen": 0,
138
  "num_train_epochs": 1,
139
  "save_steps": 500,
140
+ "total_flos": 5.709819056802048e+16,
141
+ "train_batch_size": 64,
142
  "trial_name": null,
143
  "trial_params": null
144
  }