sbaner24 commited on
Commit
413f12a
1 Parent(s): c20fecb

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +306 -450
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 50.0,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.19366493821144104,
5
- "eval_runtime": 0.2357,
6
- "eval_samples_per_second": 114.572,
7
- "eval_steps_per_second": 4.243,
8
- "train_loss": 0.3985473644733429,
9
- "train_runtime": 860.9181,
10
- "train_samples_per_second": 13.648,
11
- "train_steps_per_second": 0.058
12
  }
 
1
  {
2
+ "epoch": 33.33,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.059336088597774506,
5
+ "eval_runtime": 0.1802,
6
+ "eval_samples_per_second": 94.35,
7
+ "eval_steps_per_second": 5.55,
8
+ "train_loss": 0.1723680231720209,
9
+ "train_runtime": 293.6769,
10
+ "train_samples_per_second": 25.709,
11
+ "train_steps_per_second": 0.17
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.19366493821144104,
5
- "eval_runtime": 0.2357,
6
- "eval_samples_per_second": 114.572,
7
- "eval_steps_per_second": 4.243
8
  }
 
1
  {
2
+ "epoch": 33.33,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.059336088597774506,
5
+ "eval_runtime": 0.1802,
6
+ "eval_samples_per_second": 94.35,
7
+ "eval_steps_per_second": 5.55
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 0.3985473644733429,
4
- "train_runtime": 860.9181,
5
- "train_samples_per_second": 13.648,
6
- "train_steps_per_second": 0.058
7
  }
 
1
  {
2
+ "epoch": 33.33,
3
+ "train_loss": 0.1723680231720209,
4
+ "train_runtime": 293.6769,
5
+ "train_samples_per_second": 25.709,
6
+ "train_steps_per_second": 0.17
7
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "vit-base-patch16-224-Trial008-YEL_STEM/checkpoint-13",
4
- "epoch": 50.0,
5
  "global_step": 50,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
@@ -10,766 +10,622 @@
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 1e-05,
13
- "loss": 0.7185,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.5925925925925926,
19
- "eval_loss": 0.7705853581428528,
20
- "eval_runtime": 0.1712,
21
- "eval_samples_per_second": 157.732,
22
- "eval_steps_per_second": 5.842,
23
  "step": 1
24
  },
25
  {
26
- "epoch": 2.0,
27
  "learning_rate": 2e-05,
28
- "loss": 0.7119,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_accuracy": 0.5555555555555556,
34
- "eval_loss": 0.6863459944725037,
35
- "eval_runtime": 0.1662,
36
- "eval_samples_per_second": 162.481,
37
- "eval_steps_per_second": 6.018,
38
- "step": 2
39
- },
40
- {
41
- "epoch": 3.0,
42
  "learning_rate": 3e-05,
43
- "loss": 0.6949,
44
  "step": 3
45
  },
46
  {
47
- "epoch": 3.0,
48
- "eval_accuracy": 0.6296296296296297,
49
- "eval_loss": 0.6335163116455078,
50
- "eval_runtime": 0.1656,
51
- "eval_samples_per_second": 163.044,
52
- "eval_steps_per_second": 6.039,
53
  "step": 3
54
  },
55
  {
56
- "epoch": 4.0,
57
  "learning_rate": 4e-05,
58
- "loss": 0.6605,
59
- "step": 4
60
- },
61
- {
62
- "epoch": 4.0,
63
- "eval_accuracy": 0.7037037037037037,
64
- "eval_loss": 0.5916621088981628,
65
- "eval_runtime": 0.1676,
66
- "eval_samples_per_second": 161.123,
67
- "eval_steps_per_second": 5.968,
68
  "step": 4
69
  },
70
  {
71
- "epoch": 5.0,
72
  "learning_rate": 5e-05,
73
- "loss": 0.6505,
74
  "step": 5
75
  },
76
  {
77
- "epoch": 5.0,
78
- "eval_accuracy": 0.7407407407407407,
79
- "eval_loss": 0.5240380764007568,
80
- "eval_runtime": 0.1669,
81
- "eval_samples_per_second": 161.734,
82
- "eval_steps_per_second": 5.99,
83
  "step": 5
84
  },
85
  {
86
- "epoch": 6.0,
87
  "learning_rate": 4.888888888888889e-05,
88
- "loss": 0.59,
89
  "step": 6
90
  },
91
  {
92
- "epoch": 6.0,
93
- "eval_accuracy": 0.7037037037037037,
94
- "eval_loss": 0.5366202592849731,
95
- "eval_runtime": 0.1664,
96
- "eval_samples_per_second": 162.276,
97
- "eval_steps_per_second": 6.01,
98
  "step": 6
99
  },
100
  {
101
- "epoch": 7.0,
102
  "learning_rate": 4.7777777777777784e-05,
103
- "loss": 0.6172,
104
  "step": 7
105
  },
106
  {
107
- "epoch": 7.0,
108
- "eval_accuracy": 0.7407407407407407,
109
- "eval_loss": 0.460934042930603,
110
- "eval_runtime": 0.1699,
111
- "eval_samples_per_second": 158.909,
112
- "eval_steps_per_second": 5.886,
113
  "step": 7
114
  },
115
  {
116
- "epoch": 8.0,
117
  "learning_rate": 4.666666666666667e-05,
118
- "loss": 0.5515,
119
  "step": 8
120
  },
121
  {
122
- "epoch": 8.0,
123
- "eval_accuracy": 0.8888888888888888,
124
- "eval_loss": 0.36247193813323975,
125
- "eval_runtime": 0.1698,
126
- "eval_samples_per_second": 158.965,
127
- "eval_steps_per_second": 5.888,
128
- "step": 8
129
- },
130
- {
131
- "epoch": 9.0,
132
  "learning_rate": 4.555555555555556e-05,
133
- "loss": 0.5121,
134
  "step": 9
135
  },
136
  {
137
- "epoch": 9.0,
138
- "eval_accuracy": 0.8888888888888888,
139
- "eval_loss": 0.32386109232902527,
140
- "eval_runtime": 0.1742,
141
- "eval_samples_per_second": 154.975,
142
- "eval_steps_per_second": 5.74,
143
  "step": 9
144
  },
145
  {
146
- "epoch": 10.0,
147
  "learning_rate": 4.4444444444444447e-05,
148
- "loss": 0.5379,
149
- "step": 10
150
- },
151
- {
152
- "epoch": 10.0,
153
- "eval_accuracy": 0.8518518518518519,
154
- "eval_loss": 0.3688213527202606,
155
- "eval_runtime": 0.1675,
156
- "eval_samples_per_second": 161.227,
157
- "eval_steps_per_second": 5.971,
158
  "step": 10
159
  },
160
  {
161
- "epoch": 11.0,
162
  "learning_rate": 4.3333333333333334e-05,
163
- "loss": 0.4648,
164
  "step": 11
165
  },
166
  {
167
- "epoch": 11.0,
168
- "eval_accuracy": 0.8148148148148148,
169
- "eval_loss": 0.35447680950164795,
170
- "eval_runtime": 0.1732,
171
- "eval_samples_per_second": 155.859,
172
- "eval_steps_per_second": 5.773,
173
  "step": 11
174
  },
175
  {
176
- "epoch": 12.0,
177
  "learning_rate": 4.222222222222222e-05,
178
- "loss": 0.4653,
179
  "step": 12
180
  },
181
  {
182
- "epoch": 12.0,
183
- "eval_accuracy": 0.9259259259259259,
184
- "eval_loss": 0.24839486181735992,
185
- "eval_runtime": 0.1653,
186
- "eval_samples_per_second": 163.353,
187
- "eval_steps_per_second": 6.05,
188
  "step": 12
189
  },
190
  {
191
- "epoch": 13.0,
192
  "learning_rate": 4.111111111111111e-05,
193
- "loss": 0.4433,
194
  "step": 13
195
  },
196
  {
197
- "epoch": 13.0,
198
- "eval_accuracy": 1.0,
199
- "eval_loss": 0.19366493821144104,
200
- "eval_runtime": 0.1654,
201
- "eval_samples_per_second": 163.25,
202
- "eval_steps_per_second": 6.046,
203
  "step": 13
204
  },
205
  {
206
- "epoch": 14.0,
207
  "learning_rate": 4e-05,
208
- "loss": 0.4537,
209
  "step": 14
210
  },
211
  {
212
- "epoch": 14.0,
213
- "eval_accuracy": 0.9629629629629629,
214
- "eval_loss": 0.18937674164772034,
215
- "eval_runtime": 0.1655,
216
- "eval_samples_per_second": 163.171,
217
- "eval_steps_per_second": 6.043,
218
- "step": 14
219
- },
220
- {
221
- "epoch": 15.0,
222
  "learning_rate": 3.888888888888889e-05,
223
- "loss": 0.427,
224
  "step": 15
225
  },
226
  {
227
- "epoch": 15.0,
228
- "eval_accuracy": 0.8518518518518519,
229
- "eval_loss": 0.22488076984882355,
230
- "eval_runtime": 0.1664,
231
- "eval_samples_per_second": 162.218,
232
- "eval_steps_per_second": 6.008,
233
  "step": 15
234
  },
235
  {
236
- "epoch": 16.0,
237
  "learning_rate": 3.777777777777778e-05,
238
- "loss": 0.4154,
239
  "step": 16
240
  },
241
  {
242
- "epoch": 16.0,
243
- "eval_accuracy": 0.9629629629629629,
244
- "eval_loss": 0.15886831283569336,
245
- "eval_runtime": 0.1726,
246
- "eval_samples_per_second": 156.432,
247
- "eval_steps_per_second": 5.794,
248
- "step": 16
249
- },
250
- {
251
- "epoch": 17.0,
252
  "learning_rate": 3.6666666666666666e-05,
253
- "loss": 0.3895,
254
  "step": 17
255
  },
256
  {
257
- "epoch": 17.0,
258
- "eval_accuracy": 1.0,
259
- "eval_loss": 0.10410090535879135,
260
- "eval_runtime": 0.1666,
261
- "eval_samples_per_second": 162.095,
262
- "eval_steps_per_second": 6.004,
263
  "step": 17
264
  },
265
  {
266
- "epoch": 18.0,
267
  "learning_rate": 3.555555555555556e-05,
268
- "loss": 0.3994,
269
  "step": 18
270
  },
271
  {
272
- "epoch": 18.0,
273
- "eval_accuracy": 1.0,
274
- "eval_loss": 0.09158260375261307,
275
- "eval_runtime": 0.1684,
276
- "eval_samples_per_second": 160.318,
277
- "eval_steps_per_second": 5.938,
278
  "step": 18
279
  },
280
  {
281
- "epoch": 19.0,
282
  "learning_rate": 3.444444444444445e-05,
283
- "loss": 0.3692,
284
  "step": 19
285
  },
286
  {
287
- "epoch": 19.0,
288
- "eval_accuracy": 1.0,
289
- "eval_loss": 0.09137348085641861,
290
- "eval_runtime": 0.1719,
291
- "eval_samples_per_second": 157.043,
292
- "eval_steps_per_second": 5.816,
293
  "step": 19
294
  },
295
  {
296
- "epoch": 20.0,
297
  "learning_rate": 3.3333333333333335e-05,
298
- "loss": 0.3647,
299
  "step": 20
300
  },
301
  {
302
- "epoch": 20.0,
303
- "eval_accuracy": 0.8888888888888888,
304
- "eval_loss": 0.14642660319805145,
305
- "eval_runtime": 0.1664,
306
- "eval_samples_per_second": 162.284,
307
- "eval_steps_per_second": 6.011,
308
- "step": 20
309
- },
310
- {
311
- "epoch": 21.0,
312
  "learning_rate": 3.222222222222223e-05,
313
- "loss": 0.3789,
314
  "step": 21
315
  },
316
  {
317
- "epoch": 21.0,
318
- "eval_accuracy": 0.9259259259259259,
319
- "eval_loss": 0.15246732532978058,
320
- "eval_runtime": 0.1668,
321
- "eval_samples_per_second": 161.905,
322
- "eval_steps_per_second": 5.996,
323
  "step": 21
324
  },
325
  {
326
- "epoch": 22.0,
327
  "learning_rate": 3.111111111111111e-05,
328
- "loss": 0.3889,
329
- "step": 22
330
- },
331
- {
332
- "epoch": 22.0,
333
- "eval_accuracy": 1.0,
334
- "eval_loss": 0.09971807897090912,
335
- "eval_runtime": 0.1672,
336
- "eval_samples_per_second": 161.509,
337
- "eval_steps_per_second": 5.982,
338
  "step": 22
339
  },
340
  {
341
- "epoch": 23.0,
342
  "learning_rate": 3e-05,
343
- "loss": 0.3312,
344
  "step": 23
345
  },
346
  {
347
- "epoch": 23.0,
348
- "eval_accuracy": 1.0,
349
- "eval_loss": 0.0697513148188591,
350
- "eval_runtime": 0.1661,
351
- "eval_samples_per_second": 162.597,
352
- "eval_steps_per_second": 6.022,
353
  "step": 23
354
  },
355
  {
356
- "epoch": 24.0,
357
  "learning_rate": 2.8888888888888888e-05,
358
- "loss": 0.3653,
359
  "step": 24
360
  },
361
  {
362
- "epoch": 24.0,
363
- "eval_accuracy": 1.0,
364
- "eval_loss": 0.06495417654514313,
365
- "eval_runtime": 0.1655,
366
- "eval_samples_per_second": 163.146,
367
- "eval_steps_per_second": 6.042,
368
  "step": 24
369
  },
370
  {
371
- "epoch": 25.0,
372
  "learning_rate": 2.777777777777778e-05,
373
- "loss": 0.3499,
374
  "step": 25
375
  },
376
  {
377
- "epoch": 25.0,
378
- "eval_accuracy": 1.0,
379
- "eval_loss": 0.06258516013622284,
380
- "eval_runtime": 0.1682,
381
- "eval_samples_per_second": 160.522,
382
- "eval_steps_per_second": 5.945,
383
  "step": 25
384
  },
385
  {
386
- "epoch": 26.0,
387
  "learning_rate": 2.6666666666666667e-05,
388
- "loss": 0.3602,
389
  "step": 26
390
  },
391
  {
392
- "epoch": 26.0,
393
- "eval_accuracy": 1.0,
394
- "eval_loss": 0.07324908673763275,
395
- "eval_runtime": 0.1705,
396
- "eval_samples_per_second": 158.4,
397
- "eval_steps_per_second": 5.867,
398
- "step": 26
399
- },
400
- {
401
- "epoch": 27.0,
402
  "learning_rate": 2.5555555555555554e-05,
403
- "loss": 0.3209,
404
  "step": 27
405
  },
406
  {
407
- "epoch": 27.0,
408
- "eval_accuracy": 1.0,
409
- "eval_loss": 0.06223699823021889,
410
- "eval_runtime": 0.1675,
411
- "eval_samples_per_second": 161.197,
412
- "eval_steps_per_second": 5.97,
413
  "step": 27
414
  },
415
  {
416
- "epoch": 28.0,
417
  "learning_rate": 2.4444444444444445e-05,
418
- "loss": 0.3,
419
- "step": 28
420
- },
421
- {
422
- "epoch": 28.0,
423
- "eval_accuracy": 1.0,
424
- "eval_loss": 0.054389819502830505,
425
- "eval_runtime": 0.1713,
426
- "eval_samples_per_second": 157.638,
427
- "eval_steps_per_second": 5.838,
428
  "step": 28
429
  },
430
  {
431
- "epoch": 29.0,
432
  "learning_rate": 2.3333333333333336e-05,
433
- "loss": 0.2738,
434
  "step": 29
435
  },
436
  {
437
- "epoch": 29.0,
438
- "eval_accuracy": 1.0,
439
- "eval_loss": 0.04480647295713425,
440
- "eval_runtime": 0.1694,
441
- "eval_samples_per_second": 159.352,
442
- "eval_steps_per_second": 5.902,
443
  "step": 29
444
  },
445
  {
446
- "epoch": 30.0,
447
  "learning_rate": 2.2222222222222223e-05,
448
- "loss": 0.3283,
449
  "step": 30
450
  },
451
  {
452
- "epoch": 30.0,
453
- "eval_accuracy": 1.0,
454
- "eval_loss": 0.04304824769496918,
455
- "eval_runtime": 0.1678,
456
- "eval_samples_per_second": 160.864,
457
- "eval_steps_per_second": 5.958,
458
  "step": 30
459
  },
460
  {
461
- "epoch": 31.0,
462
  "learning_rate": 2.111111111111111e-05,
463
- "loss": 0.3162,
464
  "step": 31
465
  },
466
  {
467
- "epoch": 31.0,
468
- "eval_accuracy": 1.0,
469
- "eval_loss": 0.040174148976802826,
470
- "eval_runtime": 0.168,
471
- "eval_samples_per_second": 160.743,
472
- "eval_steps_per_second": 5.953,
473
  "step": 31
474
  },
475
  {
476
- "epoch": 32.0,
477
  "learning_rate": 2e-05,
478
- "loss": 0.3411,
479
- "step": 32
480
- },
481
- {
482
- "epoch": 32.0,
483
- "eval_accuracy": 1.0,
484
- "eval_loss": 0.03942341357469559,
485
- "eval_runtime": 0.1668,
486
- "eval_samples_per_second": 161.85,
487
- "eval_steps_per_second": 5.994,
488
  "step": 32
489
  },
490
  {
491
- "epoch": 33.0,
492
  "learning_rate": 1.888888888888889e-05,
493
- "loss": 0.3195,
494
  "step": 33
495
  },
496
  {
497
- "epoch": 33.0,
498
  "eval_accuracy": 1.0,
499
- "eval_loss": 0.03807567059993744,
500
- "eval_runtime": 0.1706,
501
- "eval_samples_per_second": 158.261,
502
- "eval_steps_per_second": 5.862,
503
  "step": 33
504
  },
505
  {
506
- "epoch": 34.0,
507
  "learning_rate": 1.777777777777778e-05,
508
- "loss": 0.3111,
509
  "step": 34
510
  },
511
  {
512
- "epoch": 34.0,
513
- "eval_accuracy": 1.0,
514
- "eval_loss": 0.0349883958697319,
515
- "eval_runtime": 0.166,
516
- "eval_samples_per_second": 162.686,
517
- "eval_steps_per_second": 6.025,
518
- "step": 34
519
- },
520
- {
521
- "epoch": 35.0,
522
  "learning_rate": 1.6666666666666667e-05,
523
- "loss": 0.2816,
524
  "step": 35
525
  },
526
  {
527
- "epoch": 35.0,
528
  "eval_accuracy": 1.0,
529
- "eval_loss": 0.034967441111803055,
530
- "eval_runtime": 0.1673,
531
- "eval_samples_per_second": 161.39,
532
- "eval_steps_per_second": 5.977,
533
  "step": 35
534
  },
535
  {
536
- "epoch": 36.0,
537
  "learning_rate": 1.5555555555555555e-05,
538
- "loss": 0.2602,
539
  "step": 36
540
  },
541
  {
542
- "epoch": 36.0,
543
  "eval_accuracy": 1.0,
544
- "eval_loss": 0.03582083806395531,
545
- "eval_runtime": 0.1664,
546
- "eval_samples_per_second": 162.232,
547
- "eval_steps_per_second": 6.009,
548
  "step": 36
549
  },
550
  {
551
- "epoch": 37.0,
552
  "learning_rate": 1.4444444444444444e-05,
553
- "loss": 0.3128,
554
  "step": 37
555
  },
556
  {
557
- "epoch": 37.0,
558
  "eval_accuracy": 1.0,
559
- "eval_loss": 0.038833122700452805,
560
- "eval_runtime": 0.1722,
561
- "eval_samples_per_second": 156.817,
562
- "eval_steps_per_second": 5.808,
563
  "step": 37
564
  },
565
  {
566
- "epoch": 38.0,
567
  "learning_rate": 1.3333333333333333e-05,
568
- "loss": 0.326,
569
  "step": 38
570
  },
571
  {
572
- "epoch": 38.0,
573
- "eval_accuracy": 1.0,
574
- "eval_loss": 0.04981035739183426,
575
- "eval_runtime": 0.1656,
576
- "eval_samples_per_second": 163.003,
577
- "eval_steps_per_second": 6.037,
578
- "step": 38
579
- },
580
- {
581
- "epoch": 39.0,
582
  "learning_rate": 1.2222222222222222e-05,
583
- "loss": 0.3228,
584
  "step": 39
585
  },
586
  {
587
- "epoch": 39.0,
588
- "eval_accuracy": 1.0,
589
- "eval_loss": 0.0702390968799591,
590
- "eval_runtime": 0.1726,
591
- "eval_samples_per_second": 156.474,
592
- "eval_steps_per_second": 5.795,
593
  "step": 39
594
  },
595
  {
596
- "epoch": 40.0,
597
  "learning_rate": 1.1111111111111112e-05,
598
- "loss": 0.3073,
599
- "step": 40
600
- },
601
- {
602
- "epoch": 40.0,
603
- "eval_accuracy": 0.9629629629629629,
604
- "eval_loss": 0.07822608947753906,
605
- "eval_runtime": 0.1726,
606
- "eval_samples_per_second": 156.457,
607
- "eval_steps_per_second": 5.795,
608
  "step": 40
609
  },
610
  {
611
- "epoch": 41.0,
612
  "learning_rate": 1e-05,
613
- "loss": 0.3266,
614
  "step": 41
615
  },
616
  {
617
- "epoch": 41.0,
618
- "eval_accuracy": 0.9629629629629629,
619
- "eval_loss": 0.07207214087247849,
620
- "eval_runtime": 0.1667,
621
- "eval_samples_per_second": 161.996,
622
- "eval_steps_per_second": 6.0,
623
  "step": 41
624
  },
625
  {
626
- "epoch": 42.0,
627
  "learning_rate": 8.88888888888889e-06,
628
- "loss": 0.3546,
629
  "step": 42
630
  },
631
  {
632
- "epoch": 42.0,
633
- "eval_accuracy": 1.0,
634
- "eval_loss": 0.05794690176844597,
635
- "eval_runtime": 0.1682,
636
- "eval_samples_per_second": 160.517,
637
- "eval_steps_per_second": 5.945,
638
  "step": 42
639
  },
640
  {
641
- "epoch": 43.0,
642
  "learning_rate": 7.777777777777777e-06,
643
- "loss": 0.2832,
644
  "step": 43
645
  },
646
  {
647
- "epoch": 43.0,
648
- "eval_accuracy": 1.0,
649
- "eval_loss": 0.04870154336094856,
650
- "eval_runtime": 0.1649,
651
- "eval_samples_per_second": 163.704,
652
- "eval_steps_per_second": 6.063,
653
  "step": 43
654
  },
655
  {
656
- "epoch": 44.0,
657
  "learning_rate": 6.666666666666667e-06,
658
- "loss": 0.2872,
659
  "step": 44
660
  },
661
  {
662
- "epoch": 44.0,
663
- "eval_accuracy": 1.0,
664
- "eval_loss": 0.04281274601817131,
665
- "eval_runtime": 0.1667,
666
- "eval_samples_per_second": 162.007,
667
- "eval_steps_per_second": 6.0,
668
- "step": 44
669
- },
670
- {
671
- "epoch": 45.0,
672
  "learning_rate": 5.555555555555556e-06,
673
- "loss": 0.2699,
674
  "step": 45
675
  },
676
  {
677
- "epoch": 45.0,
678
- "eval_accuracy": 1.0,
679
- "eval_loss": 0.03951677307486534,
680
- "eval_runtime": 0.1669,
681
- "eval_samples_per_second": 161.806,
682
- "eval_steps_per_second": 5.993,
683
  "step": 45
684
  },
685
  {
686
- "epoch": 46.0,
687
  "learning_rate": 4.444444444444445e-06,
688
- "loss": 0.3002,
689
  "step": 46
690
  },
691
  {
692
- "epoch": 46.0,
693
- "eval_accuracy": 1.0,
694
- "eval_loss": 0.03907058387994766,
695
- "eval_runtime": 0.1677,
696
- "eval_samples_per_second": 161.001,
697
- "eval_steps_per_second": 5.963,
698
- "step": 46
699
- },
700
- {
701
- "epoch": 47.0,
702
  "learning_rate": 3.3333333333333333e-06,
703
- "loss": 0.327,
704
  "step": 47
705
  },
706
  {
707
- "epoch": 47.0,
708
- "eval_accuracy": 1.0,
709
- "eval_loss": 0.03901884704828262,
710
- "eval_runtime": 0.168,
711
- "eval_samples_per_second": 160.751,
712
- "eval_steps_per_second": 5.954,
713
  "step": 47
714
  },
715
  {
716
- "epoch": 48.0,
717
  "learning_rate": 2.2222222222222225e-06,
718
- "loss": 0.2746,
719
  "step": 48
720
  },
721
  {
722
- "epoch": 48.0,
723
- "eval_accuracy": 1.0,
724
- "eval_loss": 0.0386575423181057,
725
- "eval_runtime": 0.1697,
726
- "eval_samples_per_second": 159.095,
727
- "eval_steps_per_second": 5.892,
728
  "step": 48
729
  },
730
  {
731
- "epoch": 49.0,
732
  "learning_rate": 1.1111111111111112e-06,
733
- "loss": 0.2781,
734
  "step": 49
735
  },
736
  {
737
- "epoch": 49.0,
738
- "eval_accuracy": 1.0,
739
- "eval_loss": 0.03857966884970665,
740
- "eval_runtime": 0.1664,
741
- "eval_samples_per_second": 162.219,
742
- "eval_steps_per_second": 6.008,
743
  "step": 49
744
  },
745
  {
746
- "epoch": 50.0,
747
  "learning_rate": 0.0,
748
- "loss": 0.2925,
749
  "step": 50
750
  },
751
  {
752
- "epoch": 50.0,
753
- "eval_accuracy": 1.0,
754
- "eval_loss": 0.03861464560031891,
755
- "eval_runtime": 0.1753,
756
- "eval_samples_per_second": 153.994,
757
- "eval_steps_per_second": 5.703,
758
  "step": 50
759
  },
760
  {
761
- "epoch": 50.0,
762
  "step": 50,
763
- "total_flos": 9.10530877971456e+17,
764
- "train_loss": 0.3985473644733429,
765
- "train_runtime": 860.9181,
766
- "train_samples_per_second": 13.648,
767
- "train_steps_per_second": 0.058
768
  }
769
  ],
770
  "max_steps": 50,
771
  "num_train_epochs": 50,
772
- "total_flos": 9.10530877971456e+17,
773
  "trial_name": null,
774
  "trial_params": null
775
  }
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "vit-base-patch16-224-Trial008-YEL_STEM/checkpoint-33",
4
+ "epoch": 33.333333333333336,
5
  "global_step": 50,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 1e-05,
13
+ "loss": 0.544,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.4117647058823529,
19
+ "eval_loss": 0.8179483413696289,
20
+ "eval_runtime": 0.1414,
21
+ "eval_samples_per_second": 120.252,
22
+ "eval_steps_per_second": 7.074,
23
  "step": 1
24
  },
25
  {
26
+ "epoch": 1.33,
27
  "learning_rate": 2e-05,
28
+ "loss": 0.189,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 2.0,
 
 
 
 
 
 
 
 
 
33
  "learning_rate": 3e-05,
34
+ "loss": 0.3416,
35
  "step": 3
36
  },
37
  {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.5294117647058824,
40
+ "eval_loss": 0.7448362112045288,
41
+ "eval_runtime": 0.1314,
42
+ "eval_samples_per_second": 129.417,
43
+ "eval_steps_per_second": 7.613,
44
  "step": 3
45
  },
46
  {
47
+ "epoch": 2.67,
48
  "learning_rate": 4e-05,
49
+ "loss": 0.3467,
 
 
 
 
 
 
 
 
 
50
  "step": 4
51
  },
52
  {
53
+ "epoch": 3.0,
54
  "learning_rate": 5e-05,
55
+ "loss": 0.1412,
56
  "step": 5
57
  },
58
  {
59
+ "epoch": 3.0,
60
+ "eval_accuracy": 0.5294117647058824,
61
+ "eval_loss": 0.7606351375579834,
62
+ "eval_runtime": 0.1335,
63
+ "eval_samples_per_second": 127.334,
64
+ "eval_steps_per_second": 7.49,
65
  "step": 5
66
  },
67
  {
68
+ "epoch": 4.0,
69
  "learning_rate": 4.888888888888889e-05,
70
+ "loss": 0.4868,
71
  "step": 6
72
  },
73
  {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.6470588235294118,
76
+ "eval_loss": 0.5646853446960449,
77
+ "eval_runtime": 0.1292,
78
+ "eval_samples_per_second": 131.608,
79
+ "eval_steps_per_second": 7.742,
80
  "step": 6
81
  },
82
  {
83
+ "epoch": 5.0,
84
  "learning_rate": 4.7777777777777784e-05,
85
+ "loss": 0.3852,
86
  "step": 7
87
  },
88
  {
89
+ "epoch": 5.0,
90
+ "eval_accuracy": 0.8235294117647058,
91
+ "eval_loss": 0.46457329392433167,
92
+ "eval_runtime": 0.1294,
93
+ "eval_samples_per_second": 131.34,
94
+ "eval_steps_per_second": 7.726,
95
  "step": 7
96
  },
97
  {
98
+ "epoch": 5.33,
99
  "learning_rate": 4.666666666666667e-05,
100
+ "loss": 0.1219,
101
  "step": 8
102
  },
103
  {
104
+ "epoch": 6.0,
 
 
 
 
 
 
 
 
 
105
  "learning_rate": 4.555555555555556e-05,
106
+ "loss": 0.284,
107
  "step": 9
108
  },
109
  {
110
+ "epoch": 6.0,
111
+ "eval_accuracy": 0.8235294117647058,
112
+ "eval_loss": 0.42997172474861145,
113
+ "eval_runtime": 0.1289,
114
+ "eval_samples_per_second": 131.916,
115
+ "eval_steps_per_second": 7.76,
116
  "step": 9
117
  },
118
  {
119
+ "epoch": 6.67,
120
  "learning_rate": 4.4444444444444447e-05,
121
+ "loss": 0.2402,
 
 
 
 
 
 
 
 
 
122
  "step": 10
123
  },
124
  {
125
+ "epoch": 7.0,
126
  "learning_rate": 4.3333333333333334e-05,
127
+ "loss": 0.1075,
128
  "step": 11
129
  },
130
  {
131
+ "epoch": 7.0,
132
+ "eval_accuracy": 0.8235294117647058,
133
+ "eval_loss": 0.4628201127052307,
134
+ "eval_runtime": 0.1339,
135
+ "eval_samples_per_second": 126.943,
136
+ "eval_steps_per_second": 7.467,
137
  "step": 11
138
  },
139
  {
140
+ "epoch": 8.0,
141
  "learning_rate": 4.222222222222222e-05,
142
+ "loss": 0.3243,
143
  "step": 12
144
  },
145
  {
146
+ "epoch": 8.0,
147
+ "eval_accuracy": 0.7647058823529411,
148
+ "eval_loss": 0.4686700999736786,
149
+ "eval_runtime": 0.1311,
150
+ "eval_samples_per_second": 129.643,
151
+ "eval_steps_per_second": 7.626,
152
  "step": 12
153
  },
154
  {
155
+ "epoch": 9.0,
156
  "learning_rate": 4.111111111111111e-05,
157
+ "loss": 0.3317,
158
  "step": 13
159
  },
160
  {
161
+ "epoch": 9.0,
162
+ "eval_accuracy": 0.8235294117647058,
163
+ "eval_loss": 0.40888315439224243,
164
+ "eval_runtime": 0.1345,
165
+ "eval_samples_per_second": 126.352,
166
+ "eval_steps_per_second": 7.432,
167
  "step": 13
168
  },
169
  {
170
+ "epoch": 9.33,
171
  "learning_rate": 4e-05,
172
+ "loss": 0.078,
173
  "step": 14
174
  },
175
  {
176
+ "epoch": 10.0,
 
 
 
 
 
 
 
 
 
177
  "learning_rate": 3.888888888888889e-05,
178
+ "loss": 0.146,
179
  "step": 15
180
  },
181
  {
182
+ "epoch": 10.0,
183
+ "eval_accuracy": 0.8823529411764706,
184
+ "eval_loss": 0.3329775929450989,
185
+ "eval_runtime": 0.1287,
186
+ "eval_samples_per_second": 132.111,
187
+ "eval_steps_per_second": 7.771,
188
  "step": 15
189
  },
190
  {
191
+ "epoch": 10.67,
192
  "learning_rate": 3.777777777777778e-05,
193
+ "loss": 0.1816,
194
  "step": 16
195
  },
196
  {
197
+ "epoch": 11.0,
 
 
 
 
 
 
 
 
 
198
  "learning_rate": 3.6666666666666666e-05,
199
+ "loss": 0.0762,
200
  "step": 17
201
  },
202
  {
203
+ "epoch": 11.0,
204
+ "eval_accuracy": 0.8823529411764706,
205
+ "eval_loss": 0.2941233217716217,
206
+ "eval_runtime": 0.1341,
207
+ "eval_samples_per_second": 126.788,
208
+ "eval_steps_per_second": 7.458,
209
  "step": 17
210
  },
211
  {
212
+ "epoch": 12.0,
213
  "learning_rate": 3.555555555555556e-05,
214
+ "loss": 0.2351,
215
  "step": 18
216
  },
217
  {
218
+ "epoch": 12.0,
219
+ "eval_accuracy": 0.8823529411764706,
220
+ "eval_loss": 0.3217422664165497,
221
+ "eval_runtime": 0.1302,
222
+ "eval_samples_per_second": 130.557,
223
+ "eval_steps_per_second": 7.68,
224
  "step": 18
225
  },
226
  {
227
+ "epoch": 13.0,
228
  "learning_rate": 3.444444444444445e-05,
229
+ "loss": 0.2458,
230
  "step": 19
231
  },
232
  {
233
+ "epoch": 13.0,
234
+ "eval_accuracy": 0.8823529411764706,
235
+ "eval_loss": 0.37050262093544006,
236
+ "eval_runtime": 0.1312,
237
+ "eval_samples_per_second": 129.619,
238
+ "eval_steps_per_second": 7.625,
239
  "step": 19
240
  },
241
  {
242
+ "epoch": 13.33,
243
  "learning_rate": 3.3333333333333335e-05,
244
+ "loss": 0.096,
245
  "step": 20
246
  },
247
  {
248
+ "epoch": 14.0,
 
 
 
 
 
 
 
 
 
249
  "learning_rate": 3.222222222222223e-05,
250
+ "loss": 0.1431,
251
  "step": 21
252
  },
253
  {
254
+ "epoch": 14.0,
255
+ "eval_accuracy": 0.8823529411764706,
256
+ "eval_loss": 0.3138301968574524,
257
+ "eval_runtime": 0.1384,
258
+ "eval_samples_per_second": 122.875,
259
+ "eval_steps_per_second": 7.228,
260
  "step": 21
261
  },
262
  {
263
+ "epoch": 14.67,
264
  "learning_rate": 3.111111111111111e-05,
265
+ "loss": 0.1592,
 
 
 
 
 
 
 
 
 
266
  "step": 22
267
  },
268
  {
269
+ "epoch": 15.0,
270
  "learning_rate": 3e-05,
271
+ "loss": 0.0883,
272
  "step": 23
273
  },
274
  {
275
+ "epoch": 15.0,
276
+ "eval_accuracy": 0.9411764705882353,
277
+ "eval_loss": 0.15097980201244354,
278
+ "eval_runtime": 0.1309,
279
+ "eval_samples_per_second": 129.914,
280
+ "eval_steps_per_second": 7.642,
281
  "step": 23
282
  },
283
  {
284
+ "epoch": 16.0,
285
  "learning_rate": 2.8888888888888888e-05,
286
+ "loss": 0.1601,
287
  "step": 24
288
  },
289
  {
290
+ "epoch": 16.0,
291
+ "eval_accuracy": 0.9411764705882353,
292
+ "eval_loss": 0.13728952407836914,
293
+ "eval_runtime": 0.131,
294
+ "eval_samples_per_second": 129.734,
295
+ "eval_steps_per_second": 7.631,
296
  "step": 24
297
  },
298
  {
299
+ "epoch": 17.0,
300
  "learning_rate": 2.777777777777778e-05,
301
+ "loss": 0.2212,
302
  "step": 25
303
  },
304
  {
305
+ "epoch": 17.0,
306
+ "eval_accuracy": 0.9411764705882353,
307
+ "eval_loss": 0.11754892766475677,
308
+ "eval_runtime": 0.1434,
309
+ "eval_samples_per_second": 118.566,
310
+ "eval_steps_per_second": 6.974,
311
  "step": 25
312
  },
313
  {
314
+ "epoch": 17.33,
315
  "learning_rate": 2.6666666666666667e-05,
316
+ "loss": 0.0634,
317
  "step": 26
318
  },
319
  {
320
+ "epoch": 18.0,
 
 
 
 
 
 
 
 
 
321
  "learning_rate": 2.5555555555555554e-05,
322
+ "loss": 0.1311,
323
  "step": 27
324
  },
325
  {
326
+ "epoch": 18.0,
327
+ "eval_accuracy": 0.9411764705882353,
328
+ "eval_loss": 0.11296124756336212,
329
+ "eval_runtime": 0.134,
330
+ "eval_samples_per_second": 126.873,
331
+ "eval_steps_per_second": 7.463,
332
  "step": 27
333
  },
334
  {
335
+ "epoch": 18.67,
336
  "learning_rate": 2.4444444444444445e-05,
337
+ "loss": 0.1254,
 
 
 
 
 
 
 
 
 
338
  "step": 28
339
  },
340
  {
341
+ "epoch": 19.0,
342
  "learning_rate": 2.3333333333333336e-05,
343
+ "loss": 0.0801,
344
  "step": 29
345
  },
346
  {
347
+ "epoch": 19.0,
348
+ "eval_accuracy": 0.9411764705882353,
349
+ "eval_loss": 0.15056446194648743,
350
+ "eval_runtime": 0.1358,
351
+ "eval_samples_per_second": 125.162,
352
+ "eval_steps_per_second": 7.362,
353
  "step": 29
354
  },
355
  {
356
+ "epoch": 20.0,
357
  "learning_rate": 2.2222222222222223e-05,
358
+ "loss": 0.1857,
359
  "step": 30
360
  },
361
  {
362
+ "epoch": 20.0,
363
+ "eval_accuracy": 0.9411764705882353,
364
+ "eval_loss": 0.1271848976612091,
365
+ "eval_runtime": 0.1323,
366
+ "eval_samples_per_second": 128.474,
367
+ "eval_steps_per_second": 7.557,
368
  "step": 30
369
  },
370
  {
371
+ "epoch": 21.0,
372
  "learning_rate": 2.111111111111111e-05,
373
+ "loss": 0.241,
374
  "step": 31
375
  },
376
  {
377
+ "epoch": 21.0,
378
+ "eval_accuracy": 0.9411764705882353,
379
+ "eval_loss": 0.0974147617816925,
380
+ "eval_runtime": 0.1295,
381
+ "eval_samples_per_second": 131.238,
382
+ "eval_steps_per_second": 7.72,
383
  "step": 31
384
  },
385
  {
386
+ "epoch": 21.33,
387
  "learning_rate": 2e-05,
388
+ "loss": 0.055,
 
 
 
 
 
 
 
 
 
389
  "step": 32
390
  },
391
  {
392
+ "epoch": 22.0,
393
  "learning_rate": 1.888888888888889e-05,
394
+ "loss": 0.1098,
395
  "step": 33
396
  },
397
  {
398
+ "epoch": 22.0,
399
  "eval_accuracy": 1.0,
400
+ "eval_loss": 0.059336088597774506,
401
+ "eval_runtime": 0.1296,
402
+ "eval_samples_per_second": 131.212,
403
+ "eval_steps_per_second": 7.718,
404
  "step": 33
405
  },
406
  {
407
+ "epoch": 22.67,
408
  "learning_rate": 1.777777777777778e-05,
409
+ "loss": 0.1536,
410
  "step": 34
411
  },
412
  {
413
+ "epoch": 23.0,
 
 
 
 
 
 
 
 
 
414
  "learning_rate": 1.6666666666666667e-05,
415
+ "loss": 0.0464,
416
  "step": 35
417
  },
418
  {
419
+ "epoch": 23.0,
420
  "eval_accuracy": 1.0,
421
+ "eval_loss": 0.05742228403687477,
422
+ "eval_runtime": 0.1389,
423
+ "eval_samples_per_second": 122.425,
424
+ "eval_steps_per_second": 7.201,
425
  "step": 35
426
  },
427
  {
428
+ "epoch": 24.0,
429
  "learning_rate": 1.5555555555555555e-05,
430
+ "loss": 0.1757,
431
  "step": 36
432
  },
433
  {
434
+ "epoch": 24.0,
435
  "eval_accuracy": 1.0,
436
+ "eval_loss": 0.055365510284900665,
437
+ "eval_runtime": 0.1445,
438
+ "eval_samples_per_second": 117.645,
439
+ "eval_steps_per_second": 6.92,
440
  "step": 36
441
  },
442
  {
443
+ "epoch": 25.0,
444
  "learning_rate": 1.4444444444444444e-05,
445
+ "loss": 0.1992,
446
  "step": 37
447
  },
448
  {
449
+ "epoch": 25.0,
450
  "eval_accuracy": 1.0,
451
+ "eval_loss": 0.060540929436683655,
452
+ "eval_runtime": 0.1309,
453
+ "eval_samples_per_second": 129.86,
454
+ "eval_steps_per_second": 7.639,
455
  "step": 37
456
  },
457
  {
458
+ "epoch": 25.33,
459
  "learning_rate": 1.3333333333333333e-05,
460
+ "loss": 0.0717,
461
  "step": 38
462
  },
463
  {
464
+ "epoch": 26.0,
 
 
 
 
 
 
 
 
 
465
  "learning_rate": 1.2222222222222222e-05,
466
+ "loss": 0.1167,
467
  "step": 39
468
  },
469
  {
470
+ "epoch": 26.0,
471
+ "eval_accuracy": 0.9411764705882353,
472
+ "eval_loss": 0.08181151747703552,
473
+ "eval_runtime": 0.1335,
474
+ "eval_samples_per_second": 127.361,
475
+ "eval_steps_per_second": 7.492,
476
  "step": 39
477
  },
478
  {
479
+ "epoch": 26.67,
480
  "learning_rate": 1.1111111111111112e-05,
481
+ "loss": 0.0992,
 
 
 
 
 
 
 
 
 
482
  "step": 40
483
  },
484
  {
485
+ "epoch": 27.0,
486
  "learning_rate": 1e-05,
487
+ "loss": 0.0703,
488
  "step": 41
489
  },
490
  {
491
+ "epoch": 27.0,
492
+ "eval_accuracy": 0.9411764705882353,
493
+ "eval_loss": 0.11766067892313004,
494
+ "eval_runtime": 0.1308,
495
+ "eval_samples_per_second": 129.989,
496
+ "eval_steps_per_second": 7.646,
497
  "step": 41
498
  },
499
  {
500
+ "epoch": 28.0,
501
  "learning_rate": 8.88888888888889e-06,
502
+ "loss": 0.1382,
503
  "step": 42
504
  },
505
  {
506
+ "epoch": 28.0,
507
+ "eval_accuracy": 0.9411764705882353,
508
+ "eval_loss": 0.1280849128961563,
509
+ "eval_runtime": 0.1329,
510
+ "eval_samples_per_second": 127.963,
511
+ "eval_steps_per_second": 7.527,
512
  "step": 42
513
  },
514
  {
515
+ "epoch": 29.0,
516
  "learning_rate": 7.777777777777777e-06,
517
+ "loss": 0.1563,
518
  "step": 43
519
  },
520
  {
521
+ "epoch": 29.0,
522
+ "eval_accuracy": 0.9411764705882353,
523
+ "eval_loss": 0.1357402503490448,
524
+ "eval_runtime": 0.1332,
525
+ "eval_samples_per_second": 127.623,
526
+ "eval_steps_per_second": 7.507,
527
  "step": 43
528
  },
529
  {
530
+ "epoch": 29.33,
531
  "learning_rate": 6.666666666666667e-06,
532
+ "loss": 0.0491,
533
  "step": 44
534
  },
535
  {
536
+ "epoch": 30.0,
 
 
 
 
 
 
 
 
 
537
  "learning_rate": 5.555555555555556e-06,
538
+ "loss": 0.1113,
539
  "step": 45
540
  },
541
  {
542
+ "epoch": 30.0,
543
+ "eval_accuracy": 0.8823529411764706,
544
+ "eval_loss": 0.14173081517219543,
545
+ "eval_runtime": 0.1315,
546
+ "eval_samples_per_second": 129.236,
547
+ "eval_steps_per_second": 7.602,
548
  "step": 45
549
  },
550
  {
551
+ "epoch": 30.67,
552
  "learning_rate": 4.444444444444445e-06,
553
+ "loss": 0.0964,
554
  "step": 46
555
  },
556
  {
557
+ "epoch": 31.0,
 
 
 
 
 
 
 
 
 
558
  "learning_rate": 3.3333333333333333e-06,
559
+ "loss": 0.0639,
560
  "step": 47
561
  },
562
  {
563
+ "epoch": 31.0,
564
+ "eval_accuracy": 0.9411764705882353,
565
+ "eval_loss": 0.12498356401920319,
566
+ "eval_runtime": 0.135,
567
+ "eval_samples_per_second": 125.956,
568
+ "eval_steps_per_second": 7.409,
569
  "step": 47
570
  },
571
  {
572
+ "epoch": 32.0,
573
  "learning_rate": 2.2222222222222225e-06,
574
+ "loss": 0.1564,
575
  "step": 48
576
  },
577
  {
578
+ "epoch": 32.0,
579
+ "eval_accuracy": 0.9411764705882353,
580
+ "eval_loss": 0.11074606329202652,
581
+ "eval_runtime": 0.1319,
582
+ "eval_samples_per_second": 128.928,
583
+ "eval_steps_per_second": 7.584,
584
  "step": 48
585
  },
586
  {
587
+ "epoch": 33.0,
588
  "learning_rate": 1.1111111111111112e-06,
589
+ "loss": 0.1877,
590
  "step": 49
591
  },
592
  {
593
+ "epoch": 33.0,
594
+ "eval_accuracy": 0.9411764705882353,
595
+ "eval_loss": 0.10016259551048279,
596
+ "eval_runtime": 0.137,
597
+ "eval_samples_per_second": 124.112,
598
+ "eval_steps_per_second": 7.301,
599
  "step": 49
600
  },
601
  {
602
+ "epoch": 33.33,
603
  "learning_rate": 0.0,
604
+ "loss": 0.06,
605
  "step": 50
606
  },
607
  {
608
+ "epoch": 33.33,
609
+ "eval_accuracy": 0.9411764705882353,
610
+ "eval_loss": 0.09584321826696396,
611
+ "eval_runtime": 0.1456,
612
+ "eval_samples_per_second": 116.79,
613
+ "eval_steps_per_second": 6.87,
614
  "step": 50
615
  },
616
  {
617
+ "epoch": 33.33,
618
  "step": 50,
619
+ "total_flos": 3.9079210362638746e+17,
620
+ "train_loss": 0.1723680231720209,
621
+ "train_runtime": 293.6769,
622
+ "train_samples_per_second": 25.709,
623
+ "train_steps_per_second": 0.17
624
  }
625
  ],
626
  "max_steps": 50,
627
  "num_train_epochs": 50,
628
+ "total_flos": 3.9079210362638746e+17,
629
  "trial_name": null,
630
  "trial_params": null
631
  }