batterydata commited on
Commit
000ab3d
1 Parent(s): e2eab22
Files changed (3) hide show
  1. pytorch_model.bin +1 -1
  2. trainer_state.json +237 -180
  3. training_args.bin +2 -2
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2501b95940948e0e7914b12893a4db2e51e91522e86608321b56de7825d840c
3
  size 431110641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4fb75cd82c67cea63b9bb08734e851e4e0e2c023be07ce4b9534b582b90862
3
  size 431110641
trainer_state.json CHANGED
@@ -1,349 +1,406 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 12240,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.61,
12
- "learning_rate": 1.9183006535947716e-05,
13
- "loss": 0.4329,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.9725273278019229,
19
- "eval_f1": 0.9629807384654764,
20
- "eval_loss": 0.10430943965911865,
21
- "eval_precision": 0.9637516411028211,
22
- "eval_recall": 0.9622110681287765,
23
- "eval_runtime": 2.936,
24
- "eval_samples_per_second": 494.214,
25
- "eval_steps_per_second": 30.995,
26
  "step": 816
27
  },
28
  {
29
  "epoch": 1.23,
30
- "learning_rate": 1.8366013071895427e-05,
31
- "loss": 0.0811,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 1.84,
36
- "learning_rate": 1.7549019607843138e-05,
37
- "loss": 0.0607,
38
  "step": 1500
39
  },
40
  {
41
  "epoch": 2.0,
42
- "eval_accuracy": 0.9751349927564862,
43
- "eval_f1": 0.9635150914122107,
44
- "eval_loss": 0.09400150179862976,
45
- "eval_precision": 0.9641165172855314,
46
- "eval_recall": 0.9629144154224879,
47
- "eval_runtime": 2.7493,
48
- "eval_samples_per_second": 527.777,
49
- "eval_steps_per_second": 33.1,
50
  "step": 1632
51
  },
52
  {
53
  "epoch": 2.45,
54
- "learning_rate": 1.6732026143790852e-05,
55
- "loss": 0.0428,
56
  "step": 2000
57
  },
58
  {
59
  "epoch": 3.0,
60
- "eval_accuracy": 0.9755037534571316,
61
- "eval_f1": 0.9632957125043962,
62
- "eval_loss": 0.09555820375680923,
63
- "eval_precision": 0.9633573141486811,
64
- "eval_recall": 0.9632341187378113,
65
- "eval_runtime": 2.7415,
66
- "eval_samples_per_second": 529.269,
67
- "eval_steps_per_second": 33.193,
68
  "step": 2448
69
  },
70
  {
71
  "epoch": 3.06,
72
- "learning_rate": 1.5915032679738563e-05,
73
- "loss": 0.0363,
74
  "step": 2500
75
  },
76
  {
77
  "epoch": 3.68,
78
- "learning_rate": 1.5098039215686276e-05,
79
- "loss": 0.0249,
80
  "step": 3000
81
  },
82
  {
83
  "epoch": 4.0,
84
- "eval_accuracy": 0.9765046753588832,
85
- "eval_f1": 0.9649540551338394,
86
- "eval_loss": 0.0989251509308815,
87
- "eval_precision": 0.9646919734151329,
88
- "eval_recall": 0.9652162792928163,
89
- "eval_runtime": 2.7759,
90
- "eval_samples_per_second": 522.708,
91
- "eval_steps_per_second": 32.782,
92
  "step": 3264
93
  },
94
  {
95
  "epoch": 4.29,
96
- "learning_rate": 1.4281045751633989e-05,
97
- "loss": 0.0221,
98
  "step": 3500
99
  },
100
  {
101
  "epoch": 4.9,
102
- "learning_rate": 1.3464052287581701e-05,
103
- "loss": 0.0177,
104
  "step": 4000
105
  },
106
  {
107
  "epoch": 5.0,
108
- "eval_accuracy": 0.976451995258791,
109
- "eval_f1": 0.9646675296035285,
110
- "eval_loss": 0.10913769155740738,
111
- "eval_precision": 0.9643747204294204,
112
- "eval_recall": 0.9649605166405576,
113
- "eval_runtime": 3.7551,
114
- "eval_samples_per_second": 386.403,
115
- "eval_steps_per_second": 24.233,
116
  "step": 4080
117
  },
118
  {
119
  "epoch": 5.51,
120
- "learning_rate": 1.2647058823529412e-05,
121
- "loss": 0.0129,
122
  "step": 4500
123
  },
124
  {
125
  "epoch": 6.0,
126
- "eval_accuracy": 0.9780060582115107,
127
- "eval_f1": 0.9675017575254042,
128
- "eval_loss": 0.11124212294816971,
129
- "eval_precision": 0.967038231818327,
130
- "eval_recall": 0.9679657278045973,
131
- "eval_runtime": 3.8249,
132
- "eval_samples_per_second": 379.357,
133
- "eval_steps_per_second": 23.792,
134
  "step": 4896
135
  },
136
  {
137
  "epoch": 6.13,
138
- "learning_rate": 1.1830065359477125e-05,
139
- "loss": 0.0119,
140
  "step": 5000
141
  },
142
  {
143
  "epoch": 6.74,
144
- "learning_rate": 1.1013071895424838e-05,
145
- "loss": 0.0084,
146
  "step": 5500
147
  },
148
  {
149
  "epoch": 7.0,
150
- "eval_accuracy": 0.9783221388120638,
151
- "eval_f1": 0.9677996994212259,
152
- "eval_loss": 0.12139276415109634,
153
- "eval_precision": 0.967985415933732,
154
- "eval_recall": 0.9676140541577416,
155
- "eval_runtime": 3.747,
156
- "eval_samples_per_second": 387.246,
157
- "eval_steps_per_second": 24.286,
158
  "step": 5712
159
  },
160
  {
161
  "epoch": 7.35,
162
- "learning_rate": 1.0196078431372549e-05,
163
- "loss": 0.0076,
164
  "step": 6000
165
  },
166
  {
167
  "epoch": 7.97,
168
- "learning_rate": 9.379084967320261e-06,
169
- "loss": 0.0061,
170
  "step": 6500
171
  },
172
  {
173
  "epoch": 8.0,
174
- "eval_accuracy": 0.9775319373106809,
175
- "eval_f1": 0.9664444551020734,
176
- "eval_loss": 0.13343702256679535,
177
- "eval_precision": 0.9665217113257019,
178
- "eval_recall": 0.9663672112279804,
179
- "eval_runtime": 2.7649,
180
- "eval_samples_per_second": 524.787,
181
- "eval_steps_per_second": 32.912,
182
  "step": 6528
183
  },
184
  {
185
  "epoch": 8.58,
186
- "learning_rate": 8.562091503267974e-06,
187
- "loss": 0.0048,
188
  "step": 7000
189
  },
190
  {
191
  "epoch": 9.0,
192
- "eval_accuracy": 0.9775055972606348,
193
- "eval_f1": 0.9661577811525858,
194
- "eval_loss": 0.13892702758312225,
195
- "eval_precision": 0.9662041181736795,
196
- "eval_recall": 0.9661114485757217,
197
- "eval_runtime": 3.8371,
198
- "eval_samples_per_second": 378.146,
199
- "eval_steps_per_second": 23.716,
200
  "step": 7344
201
  },
202
  {
203
  "epoch": 9.19,
204
- "learning_rate": 7.745098039215687e-06,
205
- "loss": 0.0047,
206
  "step": 7500
207
  },
208
  {
209
  "epoch": 9.8,
210
- "learning_rate": 6.928104575163399e-06,
211
- "loss": 0.0044,
212
  "step": 8000
213
  },
214
  {
215
  "epoch": 10.0,
216
- "eval_accuracy": 0.9781640985117872,
217
- "eval_f1": 0.967271564473579,
218
- "eval_loss": 0.14489226043224335,
219
- "eval_precision": 0.9674726540011515,
220
- "eval_recall": 0.9670705585216919,
221
- "eval_runtime": 3.7406,
222
- "eval_samples_per_second": 387.909,
223
- "eval_steps_per_second": 24.328,
224
  "step": 8160
225
  },
226
  {
227
  "epoch": 10.42,
228
- "learning_rate": 6.111111111111112e-06,
229
- "loss": 0.0034,
230
  "step": 8500
231
  },
232
  {
233
  "epoch": 11.0,
234
- "eval_accuracy": 0.9779797181614646,
235
- "eval_f1": 0.9672149490880608,
236
- "eval_loss": 0.14611582458019257,
237
- "eval_precision": 0.9671994884910486,
238
- "eval_recall": 0.9672304101793535,
239
- "eval_runtime": 2.8969,
240
- "eval_samples_per_second": 500.886,
241
- "eval_steps_per_second": 31.413,
242
  "step": 8976
243
  },
244
  {
245
  "epoch": 11.03,
246
- "learning_rate": 5.294117647058824e-06,
247
- "loss": 0.0034,
248
  "step": 9000
249
  },
250
  {
251
  "epoch": 11.64,
252
- "learning_rate": 4.477124183006537e-06,
253
- "loss": 0.0025,
254
  "step": 9500
255
  },
256
  {
257
  "epoch": 12.0,
258
- "eval_accuracy": 0.9784274990122481,
259
- "eval_f1": 0.9676501159350764,
260
- "eval_loss": 0.14966140687465668,
261
- "eval_precision": 0.9680061428205784,
262
- "eval_recall": 0.9672943508424182,
263
- "eval_runtime": 2.7972,
264
- "eval_samples_per_second": 518.731,
265
- "eval_steps_per_second": 32.532,
266
  "step": 9792
267
  },
268
  {
269
  "epoch": 12.25,
270
- "learning_rate": 3.6601307189542484e-06,
271
- "loss": 0.0022,
272
  "step": 10000
273
  },
274
  {
275
  "epoch": 12.87,
276
- "learning_rate": 2.843137254901961e-06,
277
- "loss": 0.0024,
278
  "step": 10500
279
  },
280
  {
281
  "epoch": 13.0,
282
- "eval_accuracy": 0.977848017911234,
283
- "eval_f1": 0.9670199670679264,
284
- "eval_loss": 0.15780866146087646,
285
- "eval_precision": 0.9670972692971798,
286
- "eval_recall": 0.9669426771955625,
287
- "eval_runtime": 3.8111,
288
- "eval_samples_per_second": 380.734,
289
- "eval_steps_per_second": 23.878,
290
  "step": 10608
291
  },
292
  {
293
  "epoch": 13.48,
294
- "learning_rate": 2.0261437908496734e-06,
295
- "loss": 0.0014,
296
  "step": 11000
297
  },
298
  {
299
  "epoch": 14.0,
300
- "eval_accuracy": 0.978111418411695,
301
- "eval_f1": 0.9674521213671389,
302
- "eval_loss": 0.15866732597351074,
303
- "eval_precision": 0.9675139888089528,
304
- "eval_recall": 0.9673902618370153,
305
- "eval_runtime": 2.8382,
306
- "eval_samples_per_second": 511.243,
307
- "eval_steps_per_second": 32.063,
308
  "step": 11424
309
  },
310
  {
311
  "epoch": 14.09,
312
- "learning_rate": 1.2091503267973858e-06,
313
- "loss": 0.0019,
314
  "step": 11500
315
  },
316
  {
317
  "epoch": 14.71,
318
- "learning_rate": 3.921568627450981e-07,
319
- "loss": 0.0016,
320
  "step": 12000
321
  },
322
  {
323
  "epoch": 15.0,
324
- "eval_accuracy": 0.978401158962202,
325
- "eval_f1": 0.96785434549785,
326
- "eval_loss": 0.15796583890914917,
327
- "eval_precision": 0.9678388746803069,
328
- "eval_recall": 0.9678698168100003,
329
- "eval_runtime": 3.7576,
330
- "eval_samples_per_second": 386.151,
331
- "eval_steps_per_second": 24.218,
332
  "step": 12240
333
  },
334
  {
335
- "epoch": 15.0,
336
- "step": 12240,
337
- "total_flos": 5.119363918698906e+16,
338
- "train_loss": 0.03262665262993644,
339
- "train_runtime": 785.1159,
340
- "train_samples_per_second": 249.403,
341
- "train_steps_per_second": 15.59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  }
343
  ],
344
- "max_steps": 12240,
345
- "num_train_epochs": 15,
346
- "total_flos": 5.119363918698906e+16,
347
  "trial_name": null,
348
  "trial_params": null
349
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 18.0,
5
+ "global_step": 14688,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.61,
12
+ "learning_rate": 1.9387254901960785e-05,
13
+ "loss": 0.4753,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.9720334966029388,
19
+ "eval_f1": 0.9606774786290644,
20
+ "eval_loss": 0.10676991939544678,
21
+ "eval_precision": 0.9602632083306715,
22
+ "eval_recall": 0.9610921065251447,
23
+ "eval_runtime": 4.3302,
24
+ "eval_samples_per_second": 335.092,
25
+ "eval_steps_per_second": 21.015,
26
  "step": 816
27
  },
28
  {
29
  "epoch": 1.23,
30
+ "learning_rate": 1.877450980392157e-05,
31
+ "loss": 0.0981,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 1.84,
36
+ "learning_rate": 1.8161764705882355e-05,
37
+ "loss": 0.0753,
38
  "step": 1500
39
  },
40
  {
41
  "epoch": 2.0,
42
+ "eval_accuracy": 0.9730605150892716,
43
+ "eval_f1": 0.9629144363665415,
44
+ "eval_loss": 0.10854744911193848,
45
+ "eval_precision": 0.9624992014310356,
46
+ "eval_recall": 0.9633300297324083,
47
+ "eval_runtime": 4.0978,
48
+ "eval_samples_per_second": 354.094,
49
+ "eval_steps_per_second": 22.207,
50
  "step": 1632
51
  },
52
  {
53
  "epoch": 2.45,
54
+ "learning_rate": 1.7549019607843138e-05,
55
+ "loss": 0.0535,
56
  "step": 2000
57
  },
58
  {
59
  "epoch": 3.0,
60
+ "eval_accuracy": 0.9732580186443356,
61
+ "eval_f1": 0.9634966531942426,
62
+ "eval_loss": 0.10905417799949646,
63
+ "eval_precision": 0.9628967367009388,
64
+ "eval_recall": 0.9640973176891844,
65
+ "eval_runtime": 4.1017,
66
+ "eval_samples_per_second": 353.751,
67
+ "eval_steps_per_second": 22.186,
68
  "step": 2448
69
  },
70
  {
71
  "epoch": 3.06,
72
+ "learning_rate": 1.693627450980392e-05,
73
+ "loss": 0.046,
74
  "step": 2500
75
  },
76
  {
77
  "epoch": 3.68,
78
+ "learning_rate": 1.6323529411764708e-05,
79
+ "loss": 0.0306,
80
  "step": 3000
81
  },
82
  {
83
  "epoch": 4.0,
84
+ "eval_accuracy": 0.9720927476694581,
85
+ "eval_f1": 0.9631510853418946,
86
+ "eval_loss": 0.1200980469584465,
87
+ "eval_precision": 0.9623978549540347,
88
+ "eval_recall": 0.9639054956999904,
89
+ "eval_runtime": 4.0941,
90
+ "eval_samples_per_second": 354.409,
91
+ "eval_steps_per_second": 22.227,
92
  "step": 3264
93
  },
94
  {
95
  "epoch": 4.29,
96
+ "learning_rate": 1.571078431372549e-05,
97
+ "loss": 0.0271,
98
  "step": 3500
99
  },
100
  {
101
  "epoch": 4.9,
102
+ "learning_rate": 1.5098039215686276e-05,
103
+ "loss": 0.0222,
104
  "step": 4000
105
  },
106
  {
107
  "epoch": 5.0,
108
+ "eval_accuracy": 0.9717372412703429,
109
+ "eval_f1": 0.9625902498242924,
110
+ "eval_loss": 0.1297336369752884,
111
+ "eval_precision": 0.9618834796488428,
112
+ "eval_recall": 0.963298059400876,
113
+ "eval_runtime": 3.1221,
114
+ "eval_samples_per_second": 464.754,
115
+ "eval_steps_per_second": 29.147,
116
  "step": 4080
117
  },
118
  {
119
  "epoch": 5.51,
120
+ "learning_rate": 1.448529411764706e-05,
121
+ "loss": 0.0165,
122
  "step": 4500
123
  },
124
  {
125
  "epoch": 6.0,
126
+ "eval_accuracy": 0.9724285037130669,
127
+ "eval_f1": 0.9634232047022744,
128
+ "eval_loss": 0.1293230503797531,
129
+ "eval_precision": 0.9626544096523988,
130
+ "eval_recall": 0.9641932286837814,
131
+ "eval_runtime": 4.1611,
132
+ "eval_samples_per_second": 348.706,
133
+ "eval_steps_per_second": 21.869,
134
  "step": 4896
135
  },
136
  {
137
  "epoch": 6.13,
138
+ "learning_rate": 1.3872549019607844e-05,
139
+ "loss": 0.0157,
140
  "step": 5000
141
  },
142
  {
143
  "epoch": 6.74,
144
+ "learning_rate": 1.3259803921568627e-05,
145
+ "loss": 0.0106,
146
  "step": 5500
147
  },
148
  {
149
  "epoch": 7.0,
150
+ "eval_accuracy": 0.9736727761099699,
151
+ "eval_f1": 0.9648310228071295,
152
+ "eval_loss": 0.14171898365020752,
153
+ "eval_precision": 0.9639996170172023,
154
+ "eval_recall": 0.965663863934269,
155
+ "eval_runtime": 3.07,
156
+ "eval_samples_per_second": 472.638,
157
+ "eval_steps_per_second": 29.642,
158
  "step": 5712
159
  },
160
  {
161
  "epoch": 7.35,
162
+ "learning_rate": 1.2647058823529412e-05,
163
+ "loss": 0.0099,
164
  "step": 6000
165
  },
166
  {
167
  "epoch": 7.97,
168
+ "learning_rate": 1.2034313725490197e-05,
169
+ "loss": 0.0084,
170
  "step": 6500
171
  },
172
  {
173
  "epoch": 8.0,
174
+ "eval_accuracy": 0.9737912782430084,
175
+ "eval_f1": 0.9657231348883248,
176
+ "eval_loss": 0.15885183215141296,
177
+ "eval_precision": 0.9645066649658779,
178
+ "eval_recall": 0.9669426771955625,
179
+ "eval_runtime": 3.0514,
180
+ "eval_samples_per_second": 475.516,
181
+ "eval_steps_per_second": 29.822,
182
  "step": 6528
183
  },
184
  {
185
  "epoch": 8.58,
186
+ "learning_rate": 1.142156862745098e-05,
187
+ "loss": 0.007,
188
  "step": 7000
189
  },
190
  {
191
  "epoch": 9.0,
192
+ "eval_accuracy": 0.9737320271764892,
193
+ "eval_f1": 0.9660194174757282,
194
+ "eval_loss": 0.16605724394321442,
195
+ "eval_precision": 0.9650023927261127,
196
+ "eval_recall": 0.9670385881901595,
197
+ "eval_runtime": 4.1246,
198
+ "eval_samples_per_second": 351.79,
199
+ "eval_steps_per_second": 22.063,
200
  "step": 7344
201
  },
202
  {
203
  "epoch": 9.19,
204
+ "learning_rate": 1.0808823529411765e-05,
205
+ "loss": 0.0066,
206
  "step": 7500
207
  },
208
  {
209
  "epoch": 9.8,
210
+ "learning_rate": 1.0196078431372549e-05,
211
+ "loss": 0.0051,
212
  "step": 8000
213
  },
214
  {
215
  "epoch": 10.0,
216
+ "eval_accuracy": 0.9731592668668037,
217
+ "eval_f1": 0.964738573573094,
218
+ "eval_loss": 0.17197225987911224,
219
+ "eval_precision": 0.9638150547241456,
220
+ "eval_recall": 0.965663863934269,
221
+ "eval_runtime": 3.0554,
222
+ "eval_samples_per_second": 474.895,
223
+ "eval_steps_per_second": 29.783,
224
  "step": 8160
225
  },
226
  {
227
  "epoch": 10.42,
228
+ "learning_rate": 9.583333333333335e-06,
229
+ "loss": 0.0046,
230
  "step": 8500
231
  },
232
  {
233
  "epoch": 11.0,
234
+ "eval_accuracy": 0.9732382682888292,
235
+ "eval_f1": 0.9645942795087595,
236
+ "eval_loss": 0.1706034392118454,
237
+ "eval_precision": 0.9636862594932669,
238
+ "eval_recall": 0.9655040122766073,
239
+ "eval_runtime": 4.2231,
240
+ "eval_samples_per_second": 343.586,
241
+ "eval_steps_per_second": 21.548,
242
  "step": 8976
243
  },
244
  {
245
  "epoch": 11.03,
246
+ "learning_rate": 8.970588235294119e-06,
247
+ "loss": 0.0051,
248
  "step": 9000
249
  },
250
  {
251
  "epoch": 11.64,
252
+ "learning_rate": 8.357843137254903e-06,
253
+ "loss": 0.0032,
254
  "step": 9500
255
  },
256
  {
257
  "epoch": 12.0,
258
+ "eval_accuracy": 0.973475272554906,
259
+ "eval_f1": 0.9657750934713834,
260
+ "eval_loss": 0.1721695363521576,
261
+ "eval_precision": 0.9653432139777046,
262
+ "eval_recall": 0.9662073595703188,
263
+ "eval_runtime": 4.1749,
264
+ "eval_samples_per_second": 347.551,
265
+ "eval_steps_per_second": 21.797,
266
  "step": 9792
267
  },
268
  {
269
  "epoch": 12.25,
270
+ "learning_rate": 7.745098039215687e-06,
271
+ "loss": 0.0037,
272
  "step": 10000
273
  },
274
  {
275
  "epoch": 12.87,
276
+ "learning_rate": 7.132352941176472e-06,
277
+ "loss": 0.0031,
278
  "step": 10500
279
  },
280
  {
281
  "epoch": 13.0,
282
+ "eval_accuracy": 0.9734357718438932,
283
+ "eval_f1": 0.9649974450689831,
284
+ "eval_loss": 0.18985576927661896,
285
+ "eval_precision": 0.9639814962513957,
286
+ "eval_recall": 0.9660155375811247,
287
+ "eval_runtime": 3.0596,
288
+ "eval_samples_per_second": 474.243,
289
+ "eval_steps_per_second": 29.742,
290
  "step": 10608
291
  },
292
  {
293
  "epoch": 13.48,
294
+ "learning_rate": 6.519607843137256e-06,
295
+ "loss": 0.0028,
296
  "step": 11000
297
  },
298
  {
299
  "epoch": 14.0,
300
+ "eval_accuracy": 0.9733962711328804,
301
+ "eval_f1": 0.9652715654952077,
302
+ "eval_loss": 0.1955101639032364,
303
+ "eval_precision": 0.9646243734235816,
304
+ "eval_recall": 0.9659196265865277,
305
+ "eval_runtime": 3.05,
306
+ "eval_samples_per_second": 475.739,
307
+ "eval_steps_per_second": 29.836,
308
  "step": 11424
309
  },
310
  {
311
  "epoch": 14.09,
312
+ "learning_rate": 5.90686274509804e-06,
313
+ "loss": 0.0028,
314
  "step": 11500
315
  },
316
  {
317
  "epoch": 14.71,
318
+ "learning_rate": 5.294117647058824e-06,
319
+ "loss": 0.0026,
320
  "step": 12000
321
  },
322
  {
323
  "epoch": 15.0,
324
+ "eval_accuracy": 0.9738110285985148,
325
+ "eval_f1": 0.9658246656760773,
326
+ "eval_loss": 0.19060379266738892,
327
+ "eval_precision": 0.9653465346534653,
328
+ "eval_recall": 0.9663032705649157,
329
+ "eval_runtime": 4.1019,
330
+ "eval_samples_per_second": 353.738,
331
+ "eval_steps_per_second": 22.185,
332
  "step": 12240
333
  },
334
  {
335
+ "epoch": 15.32,
336
+ "learning_rate": 4.681372549019608e-06,
337
+ "loss": 0.0019,
338
+ "step": 12500
339
+ },
340
+ {
341
+ "epoch": 15.93,
342
+ "learning_rate": 4.068627450980392e-06,
343
+ "loss": 0.0021,
344
+ "step": 13000
345
+ },
346
+ {
347
+ "epoch": 16.0,
348
+ "eval_accuracy": 0.973969031442566,
349
+ "eval_f1": 0.9657132819112408,
350
+ "eval_loss": 0.2030537873506546,
351
+ "eval_precision": 0.964773452456924,
352
+ "eval_recall": 0.9666549442117714,
353
+ "eval_runtime": 4.0654,
354
+ "eval_samples_per_second": 356.916,
355
+ "eval_steps_per_second": 22.384,
356
+ "step": 13056
357
+ },
358
+ {
359
+ "epoch": 16.54,
360
+ "learning_rate": 3.4558823529411766e-06,
361
+ "loss": 0.0017,
362
+ "step": 13500
363
+ },
364
+ {
365
+ "epoch": 17.0,
366
+ "eval_accuracy": 0.9744035392637067,
367
+ "eval_f1": 0.9668322128359497,
368
+ "eval_loss": 0.1976209431886673,
369
+ "eval_precision": 0.9658605066683683,
370
+ "eval_recall": 0.9678058761469357,
371
+ "eval_runtime": 4.0709,
372
+ "eval_samples_per_second": 356.434,
373
+ "eval_steps_per_second": 22.354,
374
+ "step": 13872
375
+ },
376
+ {
377
+ "epoch": 17.16,
378
+ "learning_rate": 2.843137254901961e-06,
379
+ "loss": 0.0015,
380
+ "step": 14000
381
+ },
382
+ {
383
+ "epoch": 17.77,
384
+ "learning_rate": 2.2303921568627456e-06,
385
+ "loss": 0.0015,
386
+ "step": 14500
387
+ },
388
+ {
389
+ "epoch": 18.0,
390
+ "eval_accuracy": 0.9746997945963027,
391
+ "eval_f1": 0.9669350201265093,
392
+ "eval_loss": 0.19767752289772034,
393
+ "eval_precision": 0.9662250598563448,
394
+ "eval_recall": 0.967646024489274,
395
+ "eval_runtime": 4.1567,
396
+ "eval_samples_per_second": 349.074,
397
+ "eval_steps_per_second": 21.892,
398
+ "step": 14688
399
  }
400
  ],
401
+ "max_steps": 16320,
402
+ "num_train_epochs": 20,
403
+ "total_flos": 6.14323669958656e+16,
404
  "trial_name": null,
405
  "trial_params": null
406
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2ad2280933fd2dfde8ee13fc62c6b195937dddc4b2505fce56231e549d3c475
3
- size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3f5d9fbb7dcb238c7722f2a11d35ccddcf96b37c8ed9a1d879458d25efd5245
3
+ size 3183