Training in progress, step 2500, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1355a0456315e38ddd4e5bb14d13f3e60daf4a83eb0200f06f8ad70651e2671e
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e113b4f4f7380ea91be321d7eb224a1892c29690b499cf104edbc1033fe5b04
 size 1342238560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7573892481b9fc959cbab3bd079cfad71612c44c3de992eb02dd0ddb993d15b
 size 2852574238

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa6dff968ebc536d935f20500dfd3266e442e6914d65f9452ccde2523c539a55
 size 2852574238

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2de1ff0ec7b4f73410478352e0bc1e361ac7d425afa69cd020451718a9b7ad4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d23adb5f8b168d9bd8854c7c24a982a4c7bb6c53373bc21471b5f28d834d745
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15a8ae873ae1ad723f4506548bbe03017c25276924eb215dbc4422a986eb89bc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ac3968049235612001c3413dd650a780dde8f9dab2897c02f639abe7251f8ed
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1601067378252168,
   "eval_steps": 500,
-  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -207,6 +207,21 @@
       "learning_rate": 0.00019999787116827252,
       "loss": 2.4247,
       "step": 2400
     }
   ],
   "logging_steps": 100,
@@ -226,7 +241,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.866978990698496e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1667778519012675,
   "eval_steps": 500,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019999787116827252,
       "loss": 2.4247,
       "step": 2400
+    },
+    {
+      "epoch": 0.1667778519012675,
+      "grad_norm": 6.1392998695373535,
+      "learning_rate": 0.00019999411790321993,
+      "loss": 2.4662,
+      "step": 2500
+    },
+    {
+      "epoch": 0.1667778519012675,
+      "eval_loss": 2.4712343215942383,
+      "eval_runtime": 4209.0065,
+      "eval_samples_per_second": 2.353,
+      "eval_steps_per_second": 1.177,
+      "step": 2500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 5.065745562277478e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null