Training in progress, step 18580, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3ead06ca64c097b468e4848a48dfe313e10e066882daadec4733b392f7d027f
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:97e8843eb054c3ad74976f86442c8477d9387361a93f5b5aa761f197a5bc9063
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7406298021aee6db4278f2ee9543df6b8e99529b9e938b2035da9248006c804c
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c96818bbdf189c58ef600d3a975e4800cffb8d3a7249ab57a213e50b7404224
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47f34dc38cec7b95d2822f7dac5953198d47e6fd7a3a22a93769d88071de8848
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5412b1babcd6e1b6448c556f65ec1c55adcb2b9230a5765d4a15f21fea9e3a6f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b581f3feb3bd9bc8f3fdf9e12a5bd849da1459f5b42fe0464a7236b0311e0814
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c99a29b95a012bd096c718ca62d3947568602c45a86a3f3f0f2a57277532305
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2626551377665822,
   "eval_steps": 500,
-  "global_step": 18560,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6503,6 +6503,13 @@
       "learning_rate": 6.766473629355452e-07,
       "loss": 3.5451,
       "step": 18560
     }
   ],
   "logging_steps": 20,
@@ -6510,7 +6517,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 4.033483670627942e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.26293817132021,
   "eval_steps": 500,
+  "global_step": 18580,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.766473629355452e-07,
       "loss": 3.5451,
       "step": 18560
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 19.47591209411621,
+      "learning_rate": 6.580644556884702e-07,
+      "loss": 3.5458,
+      "step": 18580
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 4.037882943504384e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null