boumehdi commited on
Commit
43c7d8d
1 Parent(s): dca3f64

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +50 -41
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da38f3bc2fe6927f77f1e949dacaaa5c88d1ff1e7b5b630565b9bad55cdc8037
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a895d3647e473dba8b32f7496e84d2b35bf2e8a9da7924568994ea0e23c8d96
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0e93138c7536f15d081a2517332243e990720ea7e4c526912b3a5cff073cf49
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8f6e6b312fa1b113a4c77869f2bcb7cc23f4abbc0d1ca811079f79220ece74
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:061193452a858228d1c66af4f1074191f8ac9e88dfc0f2e16c6ccd6079a33e18
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6c46fdc39595e4492274898b6b3506defdd41d1ceec77870a782f04c4492b4d
3
  size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7076cb3d5134abd5db45f65ae43a71e92139309531dd802207a13ea550dea80b
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940e29fecbb01cce370af89051d4b7e269c9468bdcab1fb705f4b1715779ddb5
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c024168d11f393cebc0d70778a05dbba26ed981c876446f516b34a4b4b7f2cc
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0606add1ed65078f140d7e321f11e032f96a3f3c5f6aec240beb600a5f0782b
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.867992766726944,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,94 +10,103 @@
10
  {
11
  "epoch": 1.09,
12
  "learning_rate": 9.99963765490253e-06,
13
- "loss": 0.0157,
14
  "step": 150
15
  },
16
  {
17
  "epoch": 2.17,
18
- "learning_rate": 9.998550619610118e-06,
19
- "loss": 0.0123,
20
  "step": 300
21
  },
22
  {
23
  "epoch": 3.26,
24
- "learning_rate": 9.997463584317706e-06,
25
- "loss": 0.0152,
26
  "step": 450
27
  },
28
- {
29
- "epoch": 3.62,
30
- "eval_loss": 0.23832739889621735,
31
- "eval_runtime": 297.425,
32
- "eval_samples_per_second": 13.271,
33
- "eval_steps_per_second": 1.661,
34
- "eval_wer": 0.17043338199023506,
35
- "step": 500
36
- },
37
  {
38
  "epoch": 4.35,
39
- "learning_rate": 9.996376549025293e-06,
40
- "loss": 0.0167,
41
  "step": 600
42
  },
43
  {
44
  "epoch": 5.43,
45
- "learning_rate": 9.99528951373288e-06,
46
- "loss": 0.0148,
47
  "step": 750
48
  },
49
  {
50
  "epoch": 6.52,
51
- "learning_rate": 9.994202478440467e-06,
52
- "loss": 0.0153,
53
  "step": 900
54
  },
55
  {
56
  "epoch": 7.25,
57
- "eval_loss": 0.24725446105003357,
58
- "eval_runtime": 201.8594,
59
- "eval_samples_per_second": 19.553,
60
- "eval_steps_per_second": 2.447,
61
- "eval_wer": 0.16867166658277546,
62
  "step": 1000
63
  },
64
  {
65
  "epoch": 7.61,
66
- "learning_rate": 9.993115443148055e-06,
67
- "loss": 0.0163,
68
  "step": 1050
69
  },
70
  {
71
  "epoch": 8.69,
72
- "learning_rate": 9.992028407855643e-06,
73
- "loss": 0.0158,
74
  "step": 1200
75
  },
76
  {
77
  "epoch": 9.78,
78
  "learning_rate": 9.99094861946518e-06,
79
- "loss": 0.0146,
80
  "step": 1350
81
  },
82
  {
83
  "epoch": 10.87,
84
  "learning_rate": 9.989861584172766e-06,
85
- "loss": 0.0154,
86
  "step": 1500
87
  },
88
  {
89
- "epoch": 10.87,
90
- "eval_loss": 0.24820923805236816,
91
- "eval_runtime": 211.4153,
92
- "eval_samples_per_second": 18.669,
93
- "eval_steps_per_second": 2.337,
94
- "eval_wer": 0.1679669804197916,
95
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "max_steps": 1380000,
99
  "num_train_epochs": 10000,
100
- "total_flos": 3.4217202294237536e+19,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.491862567811935,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.09,
12
  "learning_rate": 9.99963765490253e-06,
13
+ "loss": 0.009,
14
  "step": 150
15
  },
16
  {
17
  "epoch": 2.17,
18
+ "learning_rate": 9.998557866512066e-06,
19
+ "loss": 0.0075,
20
  "step": 300
21
  },
22
  {
23
  "epoch": 3.26,
24
+ "learning_rate": 9.997470831219655e-06,
25
+ "loss": 0.0088,
26
  "step": 450
27
  },
 
 
 
 
 
 
 
 
 
28
  {
29
  "epoch": 4.35,
30
+ "learning_rate": 9.996383795927241e-06,
31
+ "loss": 0.0139,
32
  "step": 600
33
  },
34
  {
35
  "epoch": 5.43,
36
+ "learning_rate": 9.995296760634829e-06,
37
+ "loss": 0.0151,
38
  "step": 750
39
  },
40
  {
41
  "epoch": 6.52,
42
+ "learning_rate": 9.994209725342417e-06,
43
+ "loss": 0.0148,
44
  "step": 900
45
  },
46
  {
47
  "epoch": 7.25,
48
+ "eval_loss": 0.25712448358535767,
49
+ "eval_runtime": 419.2266,
50
+ "eval_samples_per_second": 9.415,
51
+ "eval_steps_per_second": 1.178,
52
+ "eval_wer": 0.1661549302864046,
53
  "step": 1000
54
  },
55
  {
56
  "epoch": 7.61,
57
+ "learning_rate": 9.993122690050005e-06,
58
+ "loss": 0.0151,
59
  "step": 1050
60
  },
61
  {
62
  "epoch": 8.69,
63
+ "learning_rate": 9.99203565475759e-06,
64
+ "loss": 0.0144,
65
  "step": 1200
66
  },
67
  {
68
  "epoch": 9.78,
69
  "learning_rate": 9.99094861946518e-06,
70
+ "loss": 0.0131,
71
  "step": 1350
72
  },
73
  {
74
  "epoch": 10.87,
75
  "learning_rate": 9.989861584172766e-06,
76
+ "loss": 0.0143,
77
  "step": 1500
78
  },
79
  {
80
+ "epoch": 11.95,
81
+ "learning_rate": 9.988774548880354e-06,
82
+ "loss": 0.0141,
83
+ "step": 1650
84
+ },
85
+ {
86
+ "epoch": 13.04,
87
+ "learning_rate": 9.987687513587942e-06,
88
+ "loss": 0.0136,
89
+ "step": 1800
90
+ },
91
+ {
92
+ "epoch": 14.13,
93
+ "learning_rate": 9.98660772519748e-06,
94
+ "loss": 0.0137,
95
+ "step": 1950
96
+ },
97
+ {
98
+ "epoch": 14.49,
99
+ "eval_loss": 0.26535430550575256,
100
+ "eval_runtime": 234.4518,
101
+ "eval_samples_per_second": 16.835,
102
+ "eval_steps_per_second": 2.107,
103
+ "eval_wer": 0.16665827754567877,
104
+ "step": 2000
105
  }
106
  ],
107
  "max_steps": 1380000,
108
  "num_train_epochs": 10000,
109
+ "total_flos": 4.548899267190198e+19,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8a48147752c3a8083ebf5cdd854e3c3535a7d951355ce96dd9e8d52de7d94ab
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fe320fc812e3797fe9b29ebf41e5b8a98264c8f1cd5c379b532ee8b5a418ed
3
  size 3323