Training in progress, step 26800, checkpoint

Files changed (8) hide show

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "o_proj",
-    "gate_proj",
     "k_proj",
     "up_proj",
     "q_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "down_proj",
     "up_proj",
+    "v_proj",
     "q_proj",
+    "o_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a66649ed360eedf7f38576a034cb81a9f0d0d85b4e760a08a7af7c644b300130
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:781ebd998a71a8a8dd274340aa1124e66d3e00c0fe5093f9222bc4cbcd86aea4
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a84728d3e6388a2e313a0b930591e1caa03457e9373fa5a9018a74d4f76a4dd
 size 85736914

 version https://git-lfs.github.com/spec/v1
+oid sha256:25f27f39f3f676a65d1a0f1adedcb02f1c57c226e2ebe8e8d1fa52b1fce80bd5
 size 85736914

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e52e46a86dee0df6899d8616f9314a071547c40c5f77f9c7869ec679d05b2712
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9677b30ce0049dd05e6c506abb4b29c5c6375c70f6e47b50d2834249967a3544
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:034fc0fef9516d254e4e7d37234cc827f307f60cceb9a55e70e7d8869181a03e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:72cff663de036e0467d57060700e3479f0afb43e1a6ff1b8ccc5306bd9e04e29
 size 1064

last-checkpoint/tokenizer.json CHANGED Viewed

@@ -2412,6 +2412,7 @@
     "end_of_word_suffix": null,
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
       "!": 0,
       "\"": 1,

     "end_of_word_suffix": null,
     "fuse_unk": false,
     "byte_fallback": false,
+    "ignore_merges": true,
     "vocab": {
       "!": 0,
       "\"": 1,

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.31650299882129407,
   "eval_steps": 2000,
-  "global_step": 26600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -938,6 +938,13 @@
       "learning_rate": 1.9803262560054603e-05,
       "loss": 1.5271,
       "step": 26600
     }
   ],
   "logging_steps": 200,
@@ -945,7 +952,19 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
-  "total_flos": 7.664136678128517e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3188827206169429,
   "eval_steps": 2000,
+  "global_step": 26800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9803262560054603e-05,
       "loss": 1.5271,
       "step": 26600
+    },
+    {
+      "epoch": 0.3188827206169429,
+      "grad_norm": 0.9575275182723999,
+      "learning_rate": 1.980030022063824e-05,
+      "loss": 1.5308,
+      "step": 26800
     }
   ],
   "logging_steps": 200,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.07870830933975e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c09e4d0caa94d3d4acabbf865ee69cc816a077a03533c04bf2c5e62bf08ec171
-size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:99da71cc1761188ab44f1fc840fa4a8d321bbf1910892d2eb8c5958220e9679a
+size 5112