boumehdi commited on
Commit
763c7d2
1 Parent(s): 379b387

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +54 -54
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a895d3647e473dba8b32f7496e84d2b35bf2e8a9da7924568994ea0e23c8d96
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6d4d7630b4d191f4e60f7290221bb7b841c866908affb66c3acb430d72b52b
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a8f6e6b312fa1b113a4c77869f2bcb7cc23f4abbc0d1ca811079f79220ece74
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b0001be4bcbd360b155ff6b09197534e32acf1ad9d35f43b70b6045b9156c67
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6c46fdc39595e4492274898b6b3506defdd41d1ceec77870a782f04c4492b4d
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c1299279db2fbc4f7c5ae0b3d9988647a93090b85d0673f16c255280a4e8e1c
3
  size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:940e29fecbb01cce370af89051d4b7e269c9468bdcab1fb705f4b1715779ddb5
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c904deb17584498a1a86bb508e6bbd68684aa572ee60c23f77b77d0b8129fa4d
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0606add1ed65078f140d7e321f11e032f96a3f3c5f6aec240beb600a5f0782b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54ac577c49911bc9457ae001e29dcf4084465a3c73435eba449acf103e171f5e
3
  size 627
trainer_state.json CHANGED
@@ -1,112 +1,112 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.491862567811935,
5
  "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.09,
12
- "learning_rate": 9.99963765490253e-06,
13
- "loss": 0.009,
14
  "step": 150
15
  },
16
  {
17
- "epoch": 2.17,
18
- "learning_rate": 9.998557866512066e-06,
19
- "loss": 0.0075,
20
  "step": 300
21
  },
22
  {
23
- "epoch": 3.26,
24
- "learning_rate": 9.997470831219655e-06,
25
- "loss": 0.0088,
26
  "step": 450
27
  },
28
  {
29
- "epoch": 4.35,
30
- "learning_rate": 9.996383795927241e-06,
31
- "loss": 0.0139,
32
  "step": 600
33
  },
34
  {
35
- "epoch": 5.43,
36
- "learning_rate": 9.995296760634829e-06,
37
- "loss": 0.0151,
38
  "step": 750
39
  },
40
  {
41
- "epoch": 6.52,
42
- "learning_rate": 9.994209725342417e-06,
43
- "loss": 0.0148,
44
  "step": 900
45
  },
46
  {
47
- "epoch": 7.25,
48
- "eval_loss": 0.25712448358535767,
49
- "eval_runtime": 419.2266,
50
- "eval_samples_per_second": 9.415,
51
- "eval_steps_per_second": 1.178,
52
- "eval_wer": 0.1661549302864046,
53
  "step": 1000
54
  },
55
  {
56
- "epoch": 7.61,
57
- "learning_rate": 9.993122690050005e-06,
58
- "loss": 0.0151,
59
  "step": 1050
60
  },
61
  {
62
- "epoch": 8.69,
63
- "learning_rate": 9.99203565475759e-06,
64
- "loss": 0.0144,
65
  "step": 1200
66
  },
67
  {
68
- "epoch": 9.78,
69
- "learning_rate": 9.99094861946518e-06,
70
- "loss": 0.0131,
71
  "step": 1350
72
  },
73
  {
74
- "epoch": 10.87,
75
- "learning_rate": 9.989861584172766e-06,
76
- "loss": 0.0143,
77
  "step": 1500
78
  },
79
  {
80
- "epoch": 11.95,
81
- "learning_rate": 9.988774548880354e-06,
82
- "loss": 0.0141,
83
  "step": 1650
84
  },
85
  {
86
- "epoch": 13.04,
87
- "learning_rate": 9.987687513587942e-06,
88
- "loss": 0.0136,
89
  "step": 1800
90
  },
91
  {
92
- "epoch": 14.13,
93
- "learning_rate": 9.98660772519748e-06,
94
- "loss": 0.0137,
95
  "step": 1950
96
  },
97
  {
98
- "epoch": 14.49,
99
- "eval_loss": 0.26535430550575256,
100
- "eval_runtime": 234.4518,
101
- "eval_samples_per_second": 16.835,
102
- "eval_steps_per_second": 2.107,
103
- "eval_wer": 0.16665827754567877,
104
  "step": 2000
105
  }
106
  ],
107
- "max_steps": 1380000,
108
  "num_train_epochs": 10000,
109
- "total_flos": 4.548899267190198e+19,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.54207650273224,
5
  "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.31,
12
+ "learning_rate": 9.99957013773138e-06,
13
+ "loss": 0.029,
14
  "step": 150
15
  },
16
  {
17
+ "epoch": 2.63,
18
+ "learning_rate": 9.998254232827442e-06,
19
+ "loss": 0.0249,
20
  "step": 300
21
  },
22
  {
23
+ "epoch": 3.94,
24
+ "learning_rate": 9.996938327923503e-06,
25
+ "loss": 0.0239,
26
  "step": 450
27
  },
28
  {
29
+ "epoch": 5.26,
30
+ "learning_rate": 9.995622423019565e-06,
31
+ "loss": 0.0223,
32
  "step": 600
33
  },
34
  {
35
+ "epoch": 6.58,
36
+ "learning_rate": 9.994306518115625e-06,
37
+ "loss": 0.0219,
38
  "step": 750
39
  },
40
  {
41
+ "epoch": 7.89,
42
+ "learning_rate": 9.992990613211686e-06,
43
+ "loss": 0.0215,
44
  "step": 900
45
  },
46
  {
47
+ "epoch": 8.77,
48
+ "eval_loss": 0.21139128506183624,
49
+ "eval_runtime": 432.0782,
50
+ "eval_samples_per_second": 9.443,
51
+ "eval_steps_per_second": 1.18,
52
+ "eval_wer": 0.1666585139167441,
53
  "step": 1000
54
  },
55
  {
56
+ "epoch": 9.21,
57
+ "learning_rate": 9.991674708307746e-06,
58
+ "loss": 0.0207,
59
  "step": 1050
60
  },
61
  {
62
+ "epoch": 10.52,
63
+ "learning_rate": 9.990358803403808e-06,
64
+ "loss": 0.0209,
65
  "step": 1200
66
  },
67
  {
68
+ "epoch": 11.84,
69
+ "learning_rate": 9.989042898499869e-06,
70
+ "loss": 0.0193,
71
  "step": 1350
72
  },
73
  {
74
+ "epoch": 13.16,
75
+ "learning_rate": 9.98772699359593e-06,
76
+ "loss": 0.02,
77
  "step": 1500
78
  },
79
  {
80
+ "epoch": 14.47,
81
+ "learning_rate": 9.986411088691991e-06,
82
+ "loss": 0.0191,
83
  "step": 1650
84
  },
85
  {
86
+ "epoch": 15.79,
87
+ "learning_rate": 9.985095183788054e-06,
88
+ "loss": 0.0189,
89
  "step": 1800
90
  },
91
  {
92
+ "epoch": 17.1,
93
+ "learning_rate": 9.983779278884114e-06,
94
+ "loss": 0.0185,
95
  "step": 1950
96
  },
97
  {
98
+ "epoch": 17.54,
99
+ "eval_loss": 0.2301694005727768,
100
+ "eval_runtime": 231.0348,
101
+ "eval_samples_per_second": 17.66,
102
+ "eval_steps_per_second": 2.207,
103
+ "eval_wer": 0.16558235092696766,
104
  "step": 2000
105
  }
106
  ],
107
+ "max_steps": 1140000,
108
  "num_train_epochs": 10000,
109
+ "total_flos": 5.692337203030269e+19,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fe320fc812e3797fe9b29ebf41e5b8a98264c8f1cd5c379b532ee8b5a418ed
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38fe6647c74d4bfa083d20aefd98944f22f415066e1f9f366d882e66134054bf
3
  size 3323