ChrisZeng commited on
Commit
c30db38
1 Parent(s): 53d6ab5

Training in progress, epoch 7

Browse files
checkpoint-1141/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:645b21030c95ae40ac3efa58aeb77c516129c1e21a7b6f96040b285dab281f95
3
- size 2681490814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4397c0871aa8e40f1ccb350534d7ab81ba3fccb978f0d348c29c46fbebd3f4
3
+ size 2681485310
checkpoint-1141/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a8a61834396ae4adc1129d107763a962535223b9f1f71f8fd0280f439a14cc6
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a64b29d4c457a46664b7f17b3612c04de19d524459d4eb507a1824de9a48d8b
3
  size 1340743917
checkpoint-1141/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ba5811f005ed35376bd706a06398b37e12b1ccc81171222e0877b0309aae66
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127190e9a123a5ee5c7cae85536b7ea27da0902983a65fced8591323b53379bf
3
  size 14503
checkpoint-1141/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5cbabc59ca6629b30ec20ef4a37b261f5583c218af1ab376085065bb9a7ec5b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6c415d3c794e7691e5da96fc4ee5df698b9430d4ddd779b2851bfafcde2856
3
  size 623
checkpoint-1141/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.2981628179550171,
3
- "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-978",
4
  "epoch": 6.997323135755258,
5
  "global_step": 1141,
6
  "is_hyper_param_search": false,
@@ -9,120 +9,120 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 7.5e-07,
77
- "loss": 0.2742,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
- "eval_accuracy": 0.7887263339070568,
83
- "eval_f1": 0.7821763089027973,
84
- "eval_loss": 0.30303624272346497,
85
- "eval_runtime": 8.908,
86
- "eval_samples_per_second": 260.89,
87
- "eval_steps_per_second": 32.667,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
- "learning_rate": 7e-07,
93
- "loss": 0.2676,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
- "eval_accuracy": 0.7938898450946644,
99
- "eval_f1": 0.7850614050415754,
100
- "eval_loss": 0.2981628179550171,
101
- "eval_runtime": 9.0189,
102
- "eval_samples_per_second": 257.681,
103
- "eval_steps_per_second": 32.266,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
- "learning_rate": 6.5e-07,
109
- "loss": 0.2585,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
- "eval_accuracy": 0.7908777969018933,
115
- "eval_f1": 0.7821955847641968,
116
- "eval_loss": 0.3001907467842102,
117
- "eval_runtime": 8.8872,
118
- "eval_samples_per_second": 261.499,
119
- "eval_steps_per_second": 32.744,
120
  "step": 1141
121
  }
122
  ],
123
- "max_steps": 3260,
124
- "num_train_epochs": 20,
125
- "total_flos": 2.131077947245901e+16,
126
  "trial_name": null,
127
  "trial_params": null
128
  }
 
1
  {
2
+ "best_metric": 0.3031200170516968,
3
+ "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-1141",
4
  "epoch": 6.997323135755258,
5
  "global_step": 1141,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.2826,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "eval_accuracy": 0.7839931153184165,
83
+ "eval_f1": 0.7744514636272033,
84
+ "eval_loss": 0.3055116832256317,
85
+ "eval_runtime": 9.1262,
86
+ "eval_samples_per_second": 254.651,
87
+ "eval_steps_per_second": 31.886,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
+ "learning_rate": 4e-07,
93
+ "loss": 0.2747,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
+ "eval_accuracy": 0.7857142857142857,
99
+ "eval_f1": 0.7771340101366444,
100
+ "eval_loss": 0.30407363176345825,
101
+ "eval_runtime": 9.1656,
102
+ "eval_samples_per_second": 253.557,
103
+ "eval_steps_per_second": 31.749,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "learning_rate": 3e-07,
109
+ "loss": 0.2678,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
+ "eval_accuracy": 0.7865748709122203,
115
+ "eval_f1": 0.7768983029852594,
116
+ "eval_loss": 0.3031200170516968,
117
+ "eval_runtime": 9.6529,
118
+ "eval_samples_per_second": 240.756,
119
+ "eval_steps_per_second": 30.146,
120
  "step": 1141
121
  }
122
  ],
123
+ "max_steps": 1630,
124
+ "num_train_epochs": 10,
125
+ "total_flos": 2.131107070199405e+16,
126
  "trial_name": null,
127
  "trial_params": null
128
  }
checkpoint-1141/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dde5eeea306f54118173d342686475ad9209b6c2cac103f7b114d5f582dc36
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
checkpoint-1304/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a995198957991a4778c9b0a4ff2acc3f852248799a63f63a112918961d52dec7
3
- size 2681490814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef7936ae326c4eaaaf3a99ec813dac11f0e26791ed08a0d5bcb6ff80d2857ff
3
+ size 2681485310
checkpoint-1304/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffff6f838e3747eee33fd07b6b5738e8124d6bf65b8019b93dc2b67e1e73aaec
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a424c985030a874a9e4bfc84349addc7b4ac2efb4d1b6b44956f1f85a1ff96e1
3
  size 1340743917
checkpoint-1304/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a7cdc490809685b8f3f1a14763799335b83c7009894dafbcb3f054ef356ff5d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a7aa61e17bfe1f7d2cd89233eb4f54a7784fec415bcbcb0b0ac32e158b1d30
3
  size 14503
checkpoint-1304/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0acd2889d07b3f459da1f99892f4b8cf79ba4808ac88cb08ca68b0256d9cd814
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e4a97a3a66b98bf4e23a9d653c834f95839beaa653ab7298c03a6f2b4965f0
3
  size 623
checkpoint-1304/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.2981628179550171,
3
- "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-978",
4
  "epoch": 7.997323135755258,
5
  "global_step": 1304,
6
  "is_hyper_param_search": false,
@@ -9,136 +9,136 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 7.5e-07,
77
- "loss": 0.2742,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
- "eval_accuracy": 0.7887263339070568,
83
- "eval_f1": 0.7821763089027973,
84
- "eval_loss": 0.30303624272346497,
85
- "eval_runtime": 8.908,
86
- "eval_samples_per_second": 260.89,
87
- "eval_steps_per_second": 32.667,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
- "learning_rate": 7e-07,
93
- "loss": 0.2676,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
- "eval_accuracy": 0.7938898450946644,
99
- "eval_f1": 0.7850614050415754,
100
- "eval_loss": 0.2981628179550171,
101
- "eval_runtime": 9.0189,
102
- "eval_samples_per_second": 257.681,
103
- "eval_steps_per_second": 32.266,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
- "learning_rate": 6.5e-07,
109
- "loss": 0.2585,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
- "eval_accuracy": 0.7908777969018933,
115
- "eval_f1": 0.7821955847641968,
116
- "eval_loss": 0.3001907467842102,
117
- "eval_runtime": 8.8872,
118
- "eval_samples_per_second": 261.499,
119
- "eval_steps_per_second": 32.744,
120
  "step": 1141
121
  },
122
  {
123
  "epoch": 8.0,
124
- "learning_rate": 6e-07,
125
- "loss": 0.2526,
126
  "step": 1304
127
  },
128
  {
129
  "epoch": 8.0,
130
- "eval_accuracy": 0.7943201376936316,
131
- "eval_f1": 0.7876461988304093,
132
- "eval_loss": 0.30516260862350464,
133
- "eval_runtime": 9.0042,
134
- "eval_samples_per_second": 258.103,
135
- "eval_steps_per_second": 32.318,
136
  "step": 1304
137
  }
138
  ],
139
- "max_steps": 3260,
140
- "num_train_epochs": 20,
141
- "total_flos": 2.435494355632512e+16,
142
  "trial_name": null,
143
  "trial_params": null
144
  }
 
1
  {
2
+ "best_metric": 0.3010457754135132,
3
+ "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-1304",
4
  "epoch": 7.997323135755258,
5
  "global_step": 1304,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.2826,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "eval_accuracy": 0.7839931153184165,
83
+ "eval_f1": 0.7744514636272033,
84
+ "eval_loss": 0.3055116832256317,
85
+ "eval_runtime": 9.1262,
86
+ "eval_samples_per_second": 254.651,
87
+ "eval_steps_per_second": 31.886,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
+ "learning_rate": 4e-07,
93
+ "loss": 0.2747,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
+ "eval_accuracy": 0.7857142857142857,
99
+ "eval_f1": 0.7771340101366444,
100
+ "eval_loss": 0.30407363176345825,
101
+ "eval_runtime": 9.1656,
102
+ "eval_samples_per_second": 253.557,
103
+ "eval_steps_per_second": 31.749,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "learning_rate": 3e-07,
109
+ "loss": 0.2678,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
+ "eval_accuracy": 0.7865748709122203,
115
+ "eval_f1": 0.7768983029852594,
116
+ "eval_loss": 0.3031200170516968,
117
+ "eval_runtime": 9.6529,
118
+ "eval_samples_per_second": 240.756,
119
+ "eval_steps_per_second": 30.146,
120
  "step": 1141
121
  },
122
  {
123
  "epoch": 8.0,
124
+ "learning_rate": 2e-07,
125
+ "loss": 0.2641,
126
  "step": 1304
127
  },
128
  {
129
  "epoch": 8.0,
130
+ "eval_accuracy": 0.7874354561101549,
131
+ "eval_f1": 0.7772050769924619,
132
+ "eval_loss": 0.3010457754135132,
133
+ "eval_runtime": 9.1465,
134
+ "eval_samples_per_second": 254.087,
135
+ "eval_steps_per_second": 31.816,
136
  "step": 1304
137
  }
138
  ],
139
+ "max_steps": 1630,
140
+ "num_train_epochs": 10,
141
+ "total_flos": 2.435546776948819e+16,
142
  "trial_name": null,
143
  "trial_params": null
144
  }
checkpoint-1304/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dde5eeea306f54118173d342686475ad9209b6c2cac103f7b114d5f582dc36
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0655cfc868a392ac2b0850468debd2161b60753b0a1dc8c0e61845d86a8b31a
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a424c985030a874a9e4bfc84349addc7b4ac2efb4d1b6b44956f1f85a1ff96e1
3
  size 1340743917