boumehdi commited on
Commit
31999ff
1 Parent(s): efd8959

Upload 12 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +2 -2
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +32 -128
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bed063b4186f03787edb99c074dbb586b99f5162085d5c16e0db4cbcb0a4f632
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5acf3b5453e7ae9cf209df888fb143d9f2da40adf070d6a3d025caa438f8739
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a5e177135e49a5cecca9645170889d50c62923d9a76429da7138d06c224d1da
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71bda9f90725233cac8670bf36b60117f18383968a3933b4a0555a23b7c15279
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b5477abcf468982052803c535d0ca7e85d3b5529a3db76f938926b09683583b
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7602cd5e8794b2d59d7a8b65b5ec6bf6159ef05734d0ad6928196a70d07daf
3
+ size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25b49edd22c6273e0961ea9beb65cba08c556a16b608c3406a5432bac4cb2a57
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8cbeae9563088ca9bba5c5c22688b1f3ca3cf93bb99af3f85f8c6cc9cab6fa6
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d955e432a5975234ea4c4b68a9fcfc654a3b35487b3555ca89a82642063ca66b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b46c84ae4fe317e348faba65da33d430fc6f16849dfa16b90ea095086de29d
3
  size 627
trainer_state.json CHANGED
@@ -1,163 +1,67 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.314754098360655,
5
- "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.31,
12
- "learning_rate": 9.99957013773138e-06,
13
- "loss": 0.029,
14
  "step": 150
15
  },
16
  {
17
- "epoch": 2.63,
18
- "learning_rate": 9.998254232827442e-06,
19
- "loss": 0.0249,
20
  "step": 300
21
  },
22
  {
23
- "epoch": 3.94,
24
- "learning_rate": 9.996938327923503e-06,
25
- "loss": 0.0239,
26
  "step": 450
27
  },
28
  {
29
- "epoch": 5.26,
30
- "learning_rate": 9.995622423019565e-06,
31
- "loss": 0.0223,
32
  "step": 600
33
  },
34
  {
35
- "epoch": 6.58,
36
- "learning_rate": 9.994306518115625e-06,
37
- "loss": 0.0219,
38
  "step": 750
39
  },
40
  {
41
- "epoch": 7.89,
42
- "learning_rate": 9.992990613211686e-06,
43
- "loss": 0.0215,
44
  "step": 900
45
  },
46
  {
47
- "epoch": 8.77,
48
- "eval_loss": 0.21139128506183624,
49
- "eval_runtime": 432.0782,
50
- "eval_samples_per_second": 9.443,
51
- "eval_steps_per_second": 1.18,
52
- "eval_wer": 0.1666585139167441,
53
- "step": 1000
54
- },
55
- {
56
- "epoch": 9.21,
57
- "learning_rate": 9.991674708307746e-06,
58
- "loss": 0.0207,
59
  "step": 1050
60
  },
61
  {
62
- "epoch": 10.52,
63
- "learning_rate": 9.990358803403808e-06,
64
- "loss": 0.0209,
65
- "step": 1200
66
- },
67
- {
68
- "epoch": 11.84,
69
- "learning_rate": 9.989042898499869e-06,
70
- "loss": 0.0193,
71
- "step": 1350
72
- },
73
- {
74
- "epoch": 13.16,
75
- "learning_rate": 9.98772699359593e-06,
76
- "loss": 0.02,
77
- "step": 1500
78
- },
79
- {
80
- "epoch": 14.47,
81
- "learning_rate": 9.986411088691991e-06,
82
- "loss": 0.0191,
83
- "step": 1650
84
- },
85
- {
86
- "epoch": 15.79,
87
- "learning_rate": 9.985095183788054e-06,
88
- "loss": 0.0189,
89
- "step": 1800
90
- },
91
- {
92
- "epoch": 17.1,
93
- "learning_rate": 9.983779278884114e-06,
94
- "loss": 0.0185,
95
- "step": 1950
96
- },
97
- {
98
- "epoch": 17.54,
99
- "eval_loss": 0.2301694005727768,
100
- "eval_runtime": 231.0348,
101
- "eval_samples_per_second": 17.66,
102
- "eval_steps_per_second": 2.207,
103
- "eval_wer": 0.16558235092696766,
104
- "step": 2000
105
- },
106
- {
107
- "epoch": 18.42,
108
- "learning_rate": 9.982463373980174e-06,
109
- "loss": 0.0184,
110
- "step": 2100
111
- },
112
- {
113
- "epoch": 19.73,
114
- "learning_rate": 9.981147469076235e-06,
115
- "loss": 0.0186,
116
- "step": 2250
117
- },
118
- {
119
- "epoch": 21.05,
120
- "learning_rate": 9.979831564172297e-06,
121
- "loss": 0.0174,
122
- "step": 2400
123
- },
124
- {
125
- "epoch": 22.37,
126
- "learning_rate": 9.978515659268357e-06,
127
- "loss": 0.0175,
128
- "step": 2550
129
- },
130
- {
131
- "epoch": 23.68,
132
- "learning_rate": 9.977199754364418e-06,
133
- "loss": 0.0169,
134
- "step": 2700
135
- },
136
- {
137
- "epoch": 25.0,
138
- "learning_rate": 9.97588384946048e-06,
139
- "loss": 0.0174,
140
- "step": 2850
141
- },
142
- {
143
- "epoch": 26.31,
144
- "learning_rate": 9.974567944556542e-06,
145
- "loss": 0.0175,
146
- "step": 3000
147
- },
148
- {
149
- "epoch": 26.31,
150
- "eval_loss": 0.23709918558597565,
151
- "eval_runtime": 224.4917,
152
- "eval_samples_per_second": 18.174,
153
- "eval_steps_per_second": 2.272,
154
- "eval_wer": 0.16504426943207945,
155
- "step": 3000
156
  }
157
  ],
158
- "max_steps": 1140000,
159
  "num_train_epochs": 10000,
160
- "total_flos": 8.535940964167849e+19,
161
  "trial_name": null,
162
  "trial_params": null
163
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.087431693989071,
5
+ "global_step": 1100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.64,
12
+ "learning_rate": 4.999725244532367e-06,
13
+ "loss": 0.0243,
14
  "step": 150
15
  },
16
  {
17
+ "epoch": 3.3,
18
+ "learning_rate": 4.998900978129465e-06,
19
+ "loss": 0.0192,
20
  "step": 300
21
  },
22
  {
23
+ "epoch": 4.94,
24
+ "learning_rate": 4.998076711726564e-06,
25
+ "loss": 0.0175,
26
  "step": 450
27
  },
28
  {
29
+ "epoch": 6.59,
30
+ "learning_rate": 4.997252445323663e-06,
31
+ "loss": 0.0174,
32
  "step": 600
33
  },
34
  {
35
+ "epoch": 8.24,
36
+ "learning_rate": 4.996428178920761e-06,
37
+ "loss": 0.0168,
38
  "step": 750
39
  },
40
  {
41
+ "epoch": 9.89,
42
+ "learning_rate": 4.995609407627212e-06,
43
+ "loss": 0.0175,
44
  "step": 900
45
  },
46
  {
47
+ "epoch": 11.54,
48
+ "learning_rate": 4.994785141224311e-06,
49
+ "loss": 0.0168,
 
 
 
 
 
 
 
 
 
50
  "step": 1050
51
  },
52
  {
53
+ "epoch": 12.09,
54
+ "eval_loss": 0.23020002245903015,
55
+ "eval_runtime": 433.719,
56
+ "eval_samples_per_second": 9.407,
57
+ "eval_steps_per_second": 1.176,
58
+ "eval_wer": 0.16092909535452324,
59
+ "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
  ],
62
+ "max_steps": 910000,
63
  "num_train_epochs": 10000,
64
+ "total_flos": 3.930570221142893e+19,
65
  "trial_name": null,
66
  "trial_params": null
67
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38fe6647c74d4bfa083d20aefd98944f22f415066e1f9f366d882e66134054bf
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac89f9984dd853509f158f5c63d67654b0bc1d76d097b923ea0fef638f790ce5
3
  size 3323