MohamedAhmedAE commited on
Commit
cb8a059
1 Parent(s): 537d708

Training in progress, step 26800, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
- "o_proj",
25
- "gate_proj",
26
  "k_proj",
 
27
  "up_proj",
 
28
  "q_proj",
29
- "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "k_proj",
24
+ "down_proj",
25
  "up_proj",
26
+ "v_proj",
27
  "q_proj",
28
+ "o_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a66649ed360eedf7f38576a034cb81a9f0d0d85b4e760a08a7af7c644b300130
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781ebd998a71a8a8dd274340aa1124e66d3e00c0fe5093f9222bc4cbcd86aea4
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a84728d3e6388a2e313a0b930591e1caa03457e9373fa5a9018a74d4f76a4dd
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f27f39f3f676a65d1a0f1adedcb02f1c57c226e2ebe8e8d1fa52b1fce80bd5
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e52e46a86dee0df6899d8616f9314a071547c40c5f77f9c7869ec679d05b2712
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9677b30ce0049dd05e6c506abb4b29c5c6375c70f6e47b50d2834249967a3544
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:034fc0fef9516d254e4e7d37234cc827f307f60cceb9a55e70e7d8869181a03e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72cff663de036e0467d57060700e3479f0afb43e1a6ff1b8ccc5306bd9e04e29
3
  size 1064
last-checkpoint/tokenizer.json CHANGED
@@ -2412,6 +2412,7 @@
2412
  "end_of_word_suffix": null,
2413
  "fuse_unk": false,
2414
  "byte_fallback": false,
 
2415
  "vocab": {
2416
  "!": 0,
2417
  "\"": 1,
 
2412
  "end_of_word_suffix": null,
2413
  "fuse_unk": false,
2414
  "byte_fallback": false,
2415
+ "ignore_merges": true,
2416
  "vocab": {
2417
  "!": 0,
2418
  "\"": 1,
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.31650299882129407,
5
  "eval_steps": 2000,
6
- "global_step": 26600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -938,6 +938,13 @@
938
  "learning_rate": 1.9803262560054603e-05,
939
  "loss": 1.5271,
940
  "step": 26600
 
 
 
 
 
 
 
941
  }
942
  ],
943
  "logging_steps": 200,
@@ -945,7 +952,19 @@
945
  "num_input_tokens_seen": 0,
946
  "num_train_epochs": 5,
947
  "save_steps": 200,
948
- "total_flos": 7.664136678128517e+17,
 
 
 
 
 
 
 
 
 
 
 
 
949
  "train_batch_size": 1,
950
  "trial_name": null,
951
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3188827206169429,
5
  "eval_steps": 2000,
6
+ "global_step": 26800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
938
  "learning_rate": 1.9803262560054603e-05,
939
  "loss": 1.5271,
940
  "step": 26600
941
+ },
942
+ {
943
+ "epoch": 0.3188827206169429,
944
+ "grad_norm": 0.9575275182723999,
945
+ "learning_rate": 1.980030022063824e-05,
946
+ "loss": 1.5308,
947
+ "step": 26800
948
  }
949
  ],
950
  "logging_steps": 200,
 
952
  "num_input_tokens_seen": 0,
953
  "num_train_epochs": 5,
954
  "save_steps": 200,
955
+ "stateful_callbacks": {
956
+ "TrainerControl": {
957
+ "args": {
958
+ "should_epoch_stop": false,
959
+ "should_evaluate": false,
960
+ "should_log": false,
961
+ "should_save": true,
962
+ "should_training_stop": false
963
+ },
964
+ "attributes": {}
965
+ }
966
+ },
967
+ "total_flos": 8.07870830933975e+17,
968
  "train_batch_size": 1,
969
  "trial_name": null,
970
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c09e4d0caa94d3d4acabbf865ee69cc816a077a03533c04bf2c5e62bf08ec171
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99da71cc1761188ab44f1fc840fa4a8d321bbf1910892d2eb8c5958220e9679a
3
+ size 5112