Upload 9 files

Browse files

Files changed (7) hide show

optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scaler.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +54 -54
training_args.bin +1 -1

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a895d3647e473dba8b32f7496e84d2b35bf2e8a9da7924568994ea0e23c8d96
 size 2490594117

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b6d4d7630b4d191f4e60f7290221bb7b841c866908affb66c3acb430d72b52b
 size 2490594117

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a8f6e6b312fa1b113a4c77869f2bcb7cc23f4abbc0d1ca811079f79220ece74
 size 1262168365

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b0001be4bcbd360b155ff6b09197534e32acf1ad9d35f43b70b6045b9156c67
 size 1262168365

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6c46fdc39595e4492274898b6b3506defdd41d1ceec77870a782f04c4492b4d
 size 14639

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c1299279db2fbc4f7c5ae0b3d9988647a93090b85d0673f16c255280a4e8e1c
 size 14639

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:940e29fecbb01cce370af89051d4b7e269c9468bdcab1fb705f4b1715779ddb5
 size 557

 version https://git-lfs.github.com/spec/v1
+oid sha256:c904deb17584498a1a86bb508e6bbd68684aa572ee60c23f77b77d0b8129fa4d
 size 557

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0606add1ed65078f140d7e321f11e032f96a3f3c5f6aec240beb600a5f0782b
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:54ac577c49911bc9457ae001e29dcf4084465a3c73435eba449acf103e171f5e
 size 627

trainer_state.json CHANGED Viewed

@@ -1,112 +1,112 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 14.491862567811935,
   "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.09,
-      "learning_rate": 9.99963765490253e-06,
-      "loss": 0.009,
       "step": 150
     },
     {
-      "epoch": 2.17,
-      "learning_rate": 9.998557866512066e-06,
-      "loss": 0.0075,
       "step": 300
     },
     {
-      "epoch": 3.26,
-      "learning_rate": 9.997470831219655e-06,
-      "loss": 0.0088,
       "step": 450
     },
     {
-      "epoch": 4.35,
-      "learning_rate": 9.996383795927241e-06,
-      "loss": 0.0139,
       "step": 600
     },
     {
-      "epoch": 5.43,
-      "learning_rate": 9.995296760634829e-06,
-      "loss": 0.0151,
       "step": 750
     },
     {
-      "epoch": 6.52,
-      "learning_rate": 9.994209725342417e-06,
-      "loss": 0.0148,
       "step": 900
     },
     {
-      "epoch": 7.25,
-      "eval_loss": 0.25712448358535767,
-      "eval_runtime": 419.2266,
-      "eval_samples_per_second": 9.415,
-      "eval_steps_per_second": 1.178,
-      "eval_wer": 0.1661549302864046,
       "step": 1000
     },
     {
-      "epoch": 7.61,
-      "learning_rate": 9.993122690050005e-06,
-      "loss": 0.0151,
       "step": 1050
     },
     {
-      "epoch": 8.69,
-      "learning_rate": 9.99203565475759e-06,
-      "loss": 0.0144,
       "step": 1200
     },
     {
-      "epoch": 9.78,
-      "learning_rate": 9.99094861946518e-06,
-      "loss": 0.0131,
       "step": 1350
     },
     {
-      "epoch": 10.87,
-      "learning_rate": 9.989861584172766e-06,
-      "loss": 0.0143,
       "step": 1500
     },
     {
-      "epoch": 11.95,
-      "learning_rate": 9.988774548880354e-06,
-      "loss": 0.0141,
       "step": 1650
     },
     {
-      "epoch": 13.04,
-      "learning_rate": 9.987687513587942e-06,
-      "loss": 0.0136,
       "step": 1800
     },
     {
-      "epoch": 14.13,
-      "learning_rate": 9.98660772519748e-06,
-      "loss": 0.0137,
       "step": 1950
     },
     {
-      "epoch": 14.49,
-      "eval_loss": 0.26535430550575256,
-      "eval_runtime": 234.4518,
-      "eval_samples_per_second": 16.835,
-      "eval_steps_per_second": 2.107,
-      "eval_wer": 0.16665827754567877,
       "step": 2000
     }
   ],
-  "max_steps": 1380000,
   "num_train_epochs": 10000,
-  "total_flos": 4.548899267190198e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 17.54207650273224,
   "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 1.31,
+      "learning_rate": 9.99957013773138e-06,
+      "loss": 0.029,
       "step": 150
     },
     {
+      "epoch": 2.63,
+      "learning_rate": 9.998254232827442e-06,
+      "loss": 0.0249,
       "step": 300
     },
     {
+      "epoch": 3.94,
+      "learning_rate": 9.996938327923503e-06,
+      "loss": 0.0239,
       "step": 450
     },
     {
+      "epoch": 5.26,
+      "learning_rate": 9.995622423019565e-06,
+      "loss": 0.0223,
       "step": 600
     },
     {
+      "epoch": 6.58,
+      "learning_rate": 9.994306518115625e-06,
+      "loss": 0.0219,
       "step": 750
     },
     {
+      "epoch": 7.89,
+      "learning_rate": 9.992990613211686e-06,
+      "loss": 0.0215,
       "step": 900
     },
     {
+      "epoch": 8.77,
+      "eval_loss": 0.21139128506183624,
+      "eval_runtime": 432.0782,
+      "eval_samples_per_second": 9.443,
+      "eval_steps_per_second": 1.18,
+      "eval_wer": 0.1666585139167441,
       "step": 1000
     },
     {
+      "epoch": 9.21,
+      "learning_rate": 9.991674708307746e-06,
+      "loss": 0.0207,
       "step": 1050
     },
     {
+      "epoch": 10.52,
+      "learning_rate": 9.990358803403808e-06,
+      "loss": 0.0209,
       "step": 1200
     },
     {
+      "epoch": 11.84,
+      "learning_rate": 9.989042898499869e-06,
+      "loss": 0.0193,
       "step": 1350
     },
     {
+      "epoch": 13.16,
+      "learning_rate": 9.98772699359593e-06,
+      "loss": 0.02,
       "step": 1500
     },
     {
+      "epoch": 14.47,
+      "learning_rate": 9.986411088691991e-06,
+      "loss": 0.0191,
       "step": 1650
     },
     {
+      "epoch": 15.79,
+      "learning_rate": 9.985095183788054e-06,
+      "loss": 0.0189,
       "step": 1800
     },
     {
+      "epoch": 17.1,
+      "learning_rate": 9.983779278884114e-06,
+      "loss": 0.0185,
       "step": 1950
     },
     {
+      "epoch": 17.54,
+      "eval_loss": 0.2301694005727768,
+      "eval_runtime": 231.0348,
+      "eval_samples_per_second": 17.66,
+      "eval_steps_per_second": 2.207,
+      "eval_wer": 0.16558235092696766,
       "step": 2000
     }
   ],
+  "max_steps": 1140000,
   "num_train_epochs": 10000,
+  "total_flos": 5.692337203030269e+19,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67fe320fc812e3797fe9b29ebf41e5b8a98264c8f1cd5c379b532ee8b5a418ed
 size 3323

 version https://git-lfs.github.com/spec/v1
+oid sha256:38fe6647c74d4bfa083d20aefd98944f22f415066e1f9f366d882e66134054bf
 size 3323