antonpolishko
commited on
Commit
•
c97cda5
1
Parent(s):
8a84567
Training in progress, step 101745, checkpoint
Browse files- last-checkpoint/global_step101745/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step101745/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00003.safetensors +1 -1
- last-checkpoint/model-00002-of-00003.safetensors +1 -1
- last-checkpoint/model-00003-of-00003.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1055 -4
last-checkpoint/global_step101745/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02a312d808a4c172a56abcf2f40299903b1ec005555ff4afaa353ae861d7928f
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afe89340e864a632169aad6dbd0fcfb068a456c1abac5c390c7ca79bfdad12e4
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e8ec624ae4d73c9a944cea9a483bd5d654a8c01ddbda5023a6cc4977d9dbf59
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2b04266d96f81665468b05b72ee80d30f80a76dbb06f9475402426f1e72feff
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e8cda0a6e4c008262c71ed483b24d3979ad1116b60dc0be92e0d06856b9c705
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e0009d61898a300b5c09522870f4308e87bbac2df7f91199077fda4404432b7
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bec8bddc4488c2e51ee7e0d4c237f1bc7e872e69c611319cd7de830d7fcaca5
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b65ef960cacd5b70f9a94edca82899ea137421e370a4176186755bdead703815
|
3 |
+
size 10872039004
|
last-checkpoint/global_step101745/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:330ac7739e9ce88431a3a6420364de47b7d467118dbd996d3159d374ab495b17
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f65c55a330b36a156910e7c5ada38ced5e061fd2b4acf25fe82250512b3972a
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8104744f25b575a26698f052dcb081ede4f1d1d5bf9c6d4d41371b55d50c7da
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a48c001725bbb6dacefd41fc7503de7cff3c4b53cdb0a710a98f3913fe7a5f69
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f95559154cad8f5e2dabd64c971235da76b9e7ae8f75eec6f94b336aca5c724
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78cc2b8deaeb103e46a198afc385fba8bdb111bd774d5de46aa00fea2299335f
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:076929eac3f8ce949414796524cbd1b61b6dab0e13b44ae8910225e297af2a99
|
3 |
+
size 150629
|
last-checkpoint/global_step101745/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ae3702d2fb41778c438fa5f6efeeebc2e5bd97eec9d176b6019aa3d461de198
|
3 |
+
size 150629
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step101745
|
last-checkpoint/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4949453792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fee95e5476cb17c76bfb897d09991439a890fd23748ccbce8b5f8895286f3c71
|
3 |
size 4949453792
|
last-checkpoint/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3539ebc92f72f2b5ace7845c2be62ee407e51ec15cb60ee54ba6c5bd8ea7ae0b
|
3 |
size 4999819336
|
last-checkpoint/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4546807800
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:794124481b74e42a87aa50938feaa7fc3af205392714321ed70afe96d50db44f
|
3 |
size 4546807800
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9969fc360b910120505cfd1a5391d0621f19b1148c5bcf6c652a3372995c8ad
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:366dbd6034f944308663d42453222c57415716e792194be691d878d87ea586ed
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5985705fd83509240a89b3ff7eeafa57c5afbf06c196e7c4c2969bbe9c7bbb14
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:298dcb66ee0fdd37ee48627d56d853dbc0c02896347b8c3855b4518862c2439c
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34993850cad8ac78e1ecea51711c55f1667a4914a7137efd0e4aef004a80b55b
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3438b9131ad5e67c3acb845aaad9ff3a23b1bd067fb8d8034e0950c740b9681a
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19e1c720c8ffd6aa3f02f7316c8fce68c3ab8b53eddeb28c138f5c59218da3fd
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5c5e4ef5311c5309e896f1e33453c4fd04ece9185eff8119289c947570133cf
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d948e55a76e95b0e17e5fe88a19415512c589fc23d487fad5f8729bd18d3d07
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -143030,6 +143030,1057 @@
|
|
143030 |
"eval_samples_per_second": 94.4,
|
143031 |
"eval_steps_per_second": 3.099,
|
143032 |
"step": 101000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143033 |
}
|
143034 |
],
|
143035 |
"logging_steps": 5,
|
@@ -143044,12 +144095,12 @@
|
|
143044 |
"should_evaluate": false,
|
143045 |
"should_log": false,
|
143046 |
"should_save": true,
|
143047 |
-
"should_training_stop":
|
143048 |
},
|
143049 |
"attributes": {}
|
143050 |
}
|
143051 |
},
|
143052 |
-
"total_flos": 1.
|
143053 |
"train_batch_size": 4,
|
143054 |
"trial_name": null,
|
143055 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 101745,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
143030 |
"eval_samples_per_second": 94.4,
|
143031 |
"eval_steps_per_second": 3.099,
|
143032 |
"step": 101000
|
143033 |
+
},
|
143034 |
+
{
|
143035 |
+
"epoch": 2.9781807459826037,
|
143036 |
+
"grad_norm": 3.4780764844811576,
|
143037 |
+
"learning_rate": 1.208466738100572e-09,
|
143038 |
+
"loss": 1.0177,
|
143039 |
+
"step": 101005
|
143040 |
+
},
|
143041 |
+
{
|
143042 |
+
"epoch": 2.9783281733746128,
|
143043 |
+
"grad_norm": 3.578505302309684,
|
143044 |
+
"learning_rate": 1.1921921399893343e-09,
|
143045 |
+
"loss": 1.0343,
|
143046 |
+
"step": 101010
|
143047 |
+
},
|
143048 |
+
{
|
143049 |
+
"epoch": 2.9784756007666227,
|
143050 |
+
"grad_norm": 3.501341482963037,
|
143051 |
+
"learning_rate": 1.1760278548285374e-09,
|
143052 |
+
"loss": 1.0141,
|
143053 |
+
"step": 101015
|
143054 |
+
},
|
143055 |
+
{
|
143056 |
+
"epoch": 2.9786230281586317,
|
143057 |
+
"grad_norm": 3.4322692165815014,
|
143058 |
+
"learning_rate": 1.1599738830936346e-09,
|
143059 |
+
"loss": 1.0386,
|
143060 |
+
"step": 101020
|
143061 |
+
},
|
143062 |
+
{
|
143063 |
+
"epoch": 2.9787704555506416,
|
143064 |
+
"grad_norm": 3.5124242055636423,
|
143065 |
+
"learning_rate": 1.1440302252571643e-09,
|
143066 |
+
"loss": 1.0724,
|
143067 |
+
"step": 101025
|
143068 |
+
},
|
143069 |
+
{
|
143070 |
+
"epoch": 2.9789178829426506,
|
143071 |
+
"grad_norm": 3.441267717096939,
|
143072 |
+
"learning_rate": 1.128196881788751e-09,
|
143073 |
+
"loss": 0.9757,
|
143074 |
+
"step": 101030
|
143075 |
+
},
|
143076 |
+
{
|
143077 |
+
"epoch": 2.97906531033466,
|
143078 |
+
"grad_norm": 3.605028591840654,
|
143079 |
+
"learning_rate": 1.1124738531534395e-09,
|
143080 |
+
"loss": 1.0721,
|
143081 |
+
"step": 101035
|
143082 |
+
},
|
143083 |
+
{
|
143084 |
+
"epoch": 2.9792127377266695,
|
143085 |
+
"grad_norm": 3.5182178585874224,
|
143086 |
+
"learning_rate": 1.096861139814609e-09,
|
143087 |
+
"loss": 1.0114,
|
143088 |
+
"step": 101040
|
143089 |
+
},
|
143090 |
+
{
|
143091 |
+
"epoch": 2.979360165118679,
|
143092 |
+
"grad_norm": 3.6112919384262434,
|
143093 |
+
"learning_rate": 1.0813587422314753e-09,
|
143094 |
+
"loss": 1.0871,
|
143095 |
+
"step": 101045
|
143096 |
+
},
|
143097 |
+
{
|
143098 |
+
"epoch": 2.9795075925106884,
|
143099 |
+
"grad_norm": 3.6427611754999476,
|
143100 |
+
"learning_rate": 1.0659666608603402e-09,
|
143101 |
+
"loss": 1.0705,
|
143102 |
+
"step": 101050
|
143103 |
+
},
|
143104 |
+
{
|
143105 |
+
"epoch": 2.979655019902698,
|
143106 |
+
"grad_norm": 3.4410974273060737,
|
143107 |
+
"learning_rate": 1.0506848961537585e-09,
|
143108 |
+
"loss": 1.0303,
|
143109 |
+
"step": 101055
|
143110 |
+
},
|
143111 |
+
{
|
143112 |
+
"epoch": 2.9798024472947073,
|
143113 |
+
"grad_norm": 3.4255610359334896,
|
143114 |
+
"learning_rate": 1.0355134485613705e-09,
|
143115 |
+
"loss": 1.0165,
|
143116 |
+
"step": 101060
|
143117 |
+
},
|
143118 |
+
{
|
143119 |
+
"epoch": 2.979949874686717,
|
143120 |
+
"grad_norm": 3.518099437282749,
|
143121 |
+
"learning_rate": 1.0204523185303183e-09,
|
143122 |
+
"loss": 1.0463,
|
143123 |
+
"step": 101065
|
143124 |
+
},
|
143125 |
+
{
|
143126 |
+
"epoch": 2.9800973020787263,
|
143127 |
+
"grad_norm": 3.6946436726446112,
|
143128 |
+
"learning_rate": 1.0055015065031647e-09,
|
143129 |
+
"loss": 1.0527,
|
143130 |
+
"step": 101070
|
143131 |
+
},
|
143132 |
+
{
|
143133 |
+
"epoch": 2.9802447294707357,
|
143134 |
+
"grad_norm": 3.448413224342889,
|
143135 |
+
"learning_rate": 9.906610129199744e-10,
|
143136 |
+
"loss": 1.0322,
|
143137 |
+
"step": 101075
|
143138 |
+
},
|
143139 |
+
{
|
143140 |
+
"epoch": 2.980392156862745,
|
143141 |
+
"grad_norm": 3.402023415946032,
|
143142 |
+
"learning_rate": 9.759308382174814e-10,
|
143143 |
+
"loss": 0.9987,
|
143144 |
+
"step": 101080
|
143145 |
+
},
|
143146 |
+
{
|
143147 |
+
"epoch": 2.9805395842547546,
|
143148 |
+
"grad_norm": 3.342541522760159,
|
143149 |
+
"learning_rate": 9.613109828290889e-10,
|
143150 |
+
"loss": 1.0905,
|
143151 |
+
"step": 101085
|
143152 |
+
},
|
143153 |
+
{
|
143154 |
+
"epoch": 2.980687011646764,
|
143155 |
+
"grad_norm": 3.454606506925015,
|
143156 |
+
"learning_rate": 9.468014471852859e-10,
|
143157 |
+
"loss": 1.0602,
|
143158 |
+
"step": 101090
|
143159 |
+
},
|
143160 |
+
{
|
143161 |
+
"epoch": 2.9808344390387735,
|
143162 |
+
"grad_norm": 3.564885357788969,
|
143163 |
+
"learning_rate": 9.324022317128144e-10,
|
143164 |
+
"loss": 1.0696,
|
143165 |
+
"step": 101095
|
143166 |
+
},
|
143167 |
+
{
|
143168 |
+
"epoch": 2.980981866430783,
|
143169 |
+
"grad_norm": 3.526371490811913,
|
143170 |
+
"learning_rate": 9.181133368350858e-10,
|
143171 |
+
"loss": 1.0453,
|
143172 |
+
"step": 101100
|
143173 |
+
},
|
143174 |
+
{
|
143175 |
+
"epoch": 2.981129293822792,
|
143176 |
+
"grad_norm": 3.4591675287004926,
|
143177 |
+
"learning_rate": 9.039347629730133e-10,
|
143178 |
+
"loss": 1.0324,
|
143179 |
+
"step": 101105
|
143180 |
+
},
|
143181 |
+
{
|
143182 |
+
"epoch": 2.981276721214802,
|
143183 |
+
"grad_norm": 3.61466606312567,
|
143184 |
+
"learning_rate": 8.898665105437631e-10,
|
143185 |
+
"loss": 1.0631,
|
143186 |
+
"step": 101110
|
143187 |
+
},
|
143188 |
+
{
|
143189 |
+
"epoch": 2.981424148606811,
|
143190 |
+
"grad_norm": 3.4959713738858214,
|
143191 |
+
"learning_rate": 8.75908579961171e-10,
|
143192 |
+
"loss": 1.0306,
|
143193 |
+
"step": 101115
|
143194 |
+
},
|
143195 |
+
{
|
143196 |
+
"epoch": 2.981571575998821,
|
143197 |
+
"grad_norm": 3.3763188408032794,
|
143198 |
+
"learning_rate": 8.620609716361583e-10,
|
143199 |
+
"loss": 1.0512,
|
143200 |
+
"step": 101120
|
143201 |
+
},
|
143202 |
+
{
|
143203 |
+
"epoch": 2.98171900339083,
|
143204 |
+
"grad_norm": 3.7657892330093325,
|
143205 |
+
"learning_rate": 8.483236859758991e-10,
|
143206 |
+
"loss": 1.0523,
|
143207 |
+
"step": 101125
|
143208 |
+
},
|
143209 |
+
{
|
143210 |
+
"epoch": 2.9818664307828393,
|
143211 |
+
"grad_norm": 3.4594441903672095,
|
143212 |
+
"learning_rate": 8.3469672338507e-10,
|
143213 |
+
"loss": 1.0465,
|
143214 |
+
"step": 101130
|
143215 |
+
},
|
143216 |
+
{
|
143217 |
+
"epoch": 2.9820138581748488,
|
143218 |
+
"grad_norm": 3.4597460711912955,
|
143219 |
+
"learning_rate": 8.211800842641837e-10,
|
143220 |
+
"loss": 1.0685,
|
143221 |
+
"step": 101135
|
143222 |
+
},
|
143223 |
+
{
|
143224 |
+
"epoch": 2.982161285566858,
|
143225 |
+
"grad_norm": 3.478891287082443,
|
143226 |
+
"learning_rate": 8.077737690112552e-10,
|
143227 |
+
"loss": 1.0099,
|
143228 |
+
"step": 101140
|
143229 |
+
},
|
143230 |
+
{
|
143231 |
+
"epoch": 2.9823087129588677,
|
143232 |
+
"grad_norm": 3.5790413855179763,
|
143233 |
+
"learning_rate": 7.944777780205525e-10,
|
143234 |
+
"loss": 1.0418,
|
143235 |
+
"step": 101145
|
143236 |
+
},
|
143237 |
+
{
|
143238 |
+
"epoch": 2.982456140350877,
|
143239 |
+
"grad_norm": 3.5825051688671548,
|
143240 |
+
"learning_rate": 7.812921116834293e-10,
|
143241 |
+
"loss": 1.0199,
|
143242 |
+
"step": 101150
|
143243 |
+
},
|
143244 |
+
{
|
143245 |
+
"epoch": 2.9826035677428866,
|
143246 |
+
"grad_norm": 3.72426110666006,
|
143247 |
+
"learning_rate": 7.682167703883247e-10,
|
143248 |
+
"loss": 1.0118,
|
143249 |
+
"step": 101155
|
143250 |
+
},
|
143251 |
+
{
|
143252 |
+
"epoch": 2.982750995134896,
|
143253 |
+
"grad_norm": 3.2327017015211887,
|
143254 |
+
"learning_rate": 7.552517545195148e-10,
|
143255 |
+
"loss": 1.0427,
|
143256 |
+
"step": 101160
|
143257 |
+
},
|
143258 |
+
{
|
143259 |
+
"epoch": 2.9828984225269055,
|
143260 |
+
"grad_norm": 3.491633347366196,
|
143261 |
+
"learning_rate": 7.423970644583611e-10,
|
143262 |
+
"loss": 1.0209,
|
143263 |
+
"step": 101165
|
143264 |
+
},
|
143265 |
+
{
|
143266 |
+
"epoch": 2.983045849918915,
|
143267 |
+
"grad_norm": 3.453507886484639,
|
143268 |
+
"learning_rate": 7.296527005833109e-10,
|
143269 |
+
"loss": 1.0415,
|
143270 |
+
"step": 101170
|
143271 |
+
},
|
143272 |
+
{
|
143273 |
+
"epoch": 2.9831932773109244,
|
143274 |
+
"grad_norm": 3.5454516722266076,
|
143275 |
+
"learning_rate": 7.170186632698972e-10,
|
143276 |
+
"loss": 1.0758,
|
143277 |
+
"step": 101175
|
143278 |
+
},
|
143279 |
+
{
|
143280 |
+
"epoch": 2.983340704702934,
|
143281 |
+
"grad_norm": 3.6460624808611453,
|
143282 |
+
"learning_rate": 7.044949528890732e-10,
|
143283 |
+
"loss": 1.0672,
|
143284 |
+
"step": 101180
|
143285 |
+
},
|
143286 |
+
{
|
143287 |
+
"epoch": 2.9834881320949433,
|
143288 |
+
"grad_norm": 3.5711970795526264,
|
143289 |
+
"learning_rate": 6.920815698097105e-10,
|
143290 |
+
"loss": 1.0506,
|
143291 |
+
"step": 101185
|
143292 |
+
},
|
143293 |
+
{
|
143294 |
+
"epoch": 2.983635559486953,
|
143295 |
+
"grad_norm": 3.273061716783228,
|
143296 |
+
"learning_rate": 6.7977851439735e-10,
|
143297 |
+
"loss": 1.0261,
|
143298 |
+
"step": 101190
|
143299 |
+
},
|
143300 |
+
{
|
143301 |
+
"epoch": 2.9837829868789623,
|
143302 |
+
"grad_norm": 3.4808884364076214,
|
143303 |
+
"learning_rate": 6.675857870137858e-10,
|
143304 |
+
"loss": 1.0396,
|
143305 |
+
"step": 101195
|
143306 |
+
},
|
143307 |
+
{
|
143308 |
+
"epoch": 2.9839304142709713,
|
143309 |
+
"grad_norm": 3.5016438609583447,
|
143310 |
+
"learning_rate": 6.55503388017481e-10,
|
143311 |
+
"loss": 1.0476,
|
143312 |
+
"step": 101200
|
143313 |
+
},
|
143314 |
+
{
|
143315 |
+
"epoch": 2.984077841662981,
|
143316 |
+
"grad_norm": 3.5703386916353916,
|
143317 |
+
"learning_rate": 6.435313177644009e-10,
|
143318 |
+
"loss": 1.0052,
|
143319 |
+
"step": 101205
|
143320 |
+
},
|
143321 |
+
{
|
143322 |
+
"epoch": 2.98422526905499,
|
143323 |
+
"grad_norm": 3.4695232862227834,
|
143324 |
+
"learning_rate": 6.316695766071801e-10,
|
143325 |
+
"loss": 1.0615,
|
143326 |
+
"step": 101210
|
143327 |
+
},
|
143328 |
+
{
|
143329 |
+
"epoch": 2.984372696447,
|
143330 |
+
"grad_norm": 3.4624847737998463,
|
143331 |
+
"learning_rate": 6.199181648938734e-10,
|
143332 |
+
"loss": 1.0163,
|
143333 |
+
"step": 101215
|
143334 |
+
},
|
143335 |
+
{
|
143336 |
+
"epoch": 2.984520123839009,
|
143337 |
+
"grad_norm": 3.4876069721782663,
|
143338 |
+
"learning_rate": 6.082770829708706e-10,
|
143339 |
+
"loss": 1.0602,
|
143340 |
+
"step": 101220
|
143341 |
+
},
|
143342 |
+
{
|
143343 |
+
"epoch": 2.9846675512310186,
|
143344 |
+
"grad_norm": 3.532602947885916,
|
143345 |
+
"learning_rate": 5.967463311808141e-10,
|
143346 |
+
"loss": 1.0071,
|
143347 |
+
"step": 101225
|
143348 |
+
},
|
143349 |
+
{
|
143350 |
+
"epoch": 2.984814978623028,
|
143351 |
+
"grad_norm": 3.4541837850505153,
|
143352 |
+
"learning_rate": 5.85325909863016e-10,
|
143353 |
+
"loss": 1.0531,
|
143354 |
+
"step": 101230
|
143355 |
+
},
|
143356 |
+
{
|
143357 |
+
"epoch": 2.9849624060150375,
|
143358 |
+
"grad_norm": 3.4316843411771694,
|
143359 |
+
"learning_rate": 5.740158193530409e-10,
|
143360 |
+
"loss": 1.0539,
|
143361 |
+
"step": 101235
|
143362 |
+
},
|
143363 |
+
{
|
143364 |
+
"epoch": 2.985109833407047,
|
143365 |
+
"grad_norm": 3.6204892058067792,
|
143366 |
+
"learning_rate": 5.628160599843723e-10,
|
143367 |
+
"loss": 1.0215,
|
143368 |
+
"step": 101240
|
143369 |
+
},
|
143370 |
+
{
|
143371 |
+
"epoch": 2.9852572607990564,
|
143372 |
+
"grad_norm": 3.4593649070013583,
|
143373 |
+
"learning_rate": 5.517266320859138e-10,
|
143374 |
+
"loss": 1.0409,
|
143375 |
+
"step": 101245
|
143376 |
+
},
|
143377 |
+
{
|
143378 |
+
"epoch": 2.985404688191066,
|
143379 |
+
"grad_norm": 3.4946329775197937,
|
143380 |
+
"learning_rate": 5.407475359844871e-10,
|
143381 |
+
"loss": 1.0042,
|
143382 |
+
"step": 101250
|
143383 |
+
},
|
143384 |
+
{
|
143385 |
+
"epoch": 2.9855521155830753,
|
143386 |
+
"grad_norm": 3.440147903718295,
|
143387 |
+
"learning_rate": 5.298787720031673e-10,
|
143388 |
+
"loss": 1.0216,
|
143389 |
+
"step": 101255
|
143390 |
+
},
|
143391 |
+
{
|
143392 |
+
"epoch": 2.9856995429750848,
|
143393 |
+
"grad_norm": 3.641877983015855,
|
143394 |
+
"learning_rate": 5.191203404612821e-10,
|
143395 |
+
"loss": 0.9965,
|
143396 |
+
"step": 101260
|
143397 |
+
},
|
143398 |
+
{
|
143399 |
+
"epoch": 2.9858469703670942,
|
143400 |
+
"grad_norm": 3.382978868155368,
|
143401 |
+
"learning_rate": 5.084722416760779e-10,
|
143402 |
+
"loss": 1.0572,
|
143403 |
+
"step": 101265
|
143404 |
+
},
|
143405 |
+
{
|
143406 |
+
"epoch": 2.9859943977591037,
|
143407 |
+
"grad_norm": 3.525243679247285,
|
143408 |
+
"learning_rate": 4.979344759602212e-10,
|
143409 |
+
"loss": 1.0051,
|
143410 |
+
"step": 101270
|
143411 |
+
},
|
143412 |
+
{
|
143413 |
+
"epoch": 2.986141825151113,
|
143414 |
+
"grad_norm": 3.4344871646063306,
|
143415 |
+
"learning_rate": 4.875070436242968e-10,
|
143416 |
+
"loss": 1.0362,
|
143417 |
+
"step": 101275
|
143418 |
+
},
|
143419 |
+
{
|
143420 |
+
"epoch": 2.9862892525431226,
|
143421 |
+
"grad_norm": 3.482552862049224,
|
143422 |
+
"learning_rate": 4.771899449751427e-10,
|
143423 |
+
"loss": 1.0216,
|
143424 |
+
"step": 101280
|
143425 |
+
},
|
143426 |
+
{
|
143427 |
+
"epoch": 2.986436679935132,
|
143428 |
+
"grad_norm": 3.4315918878145184,
|
143429 |
+
"learning_rate": 4.669831803158498e-10,
|
143430 |
+
"loss": 0.9928,
|
143431 |
+
"step": 101285
|
143432 |
+
},
|
143433 |
+
{
|
143434 |
+
"epoch": 2.9865841073271415,
|
143435 |
+
"grad_norm": 3.640120820952677,
|
143436 |
+
"learning_rate": 4.568867499474272e-10,
|
143437 |
+
"loss": 1.0284,
|
143438 |
+
"step": 101290
|
143439 |
+
},
|
143440 |
+
{
|
143441 |
+
"epoch": 2.986731534719151,
|
143442 |
+
"grad_norm": 3.5196349297443454,
|
143443 |
+
"learning_rate": 4.4690065416630456e-10,
|
143444 |
+
"loss": 1.0819,
|
143445 |
+
"step": 101295
|
143446 |
+
},
|
143447 |
+
{
|
143448 |
+
"epoch": 2.9868789621111604,
|
143449 |
+
"grad_norm": 3.5351800333635204,
|
143450 |
+
"learning_rate": 4.370248932668297e-10,
|
143451 |
+
"loss": 1.0291,
|
143452 |
+
"step": 101300
|
143453 |
+
},
|
143454 |
+
{
|
143455 |
+
"epoch": 2.9870263895031695,
|
143456 |
+
"grad_norm": 3.4792705042417307,
|
143457 |
+
"learning_rate": 4.2725946753960353e-10,
|
143458 |
+
"loss": 1.0562,
|
143459 |
+
"step": 101305
|
143460 |
+
},
|
143461 |
+
{
|
143462 |
+
"epoch": 2.9871738168951794,
|
143463 |
+
"grad_norm": 3.6087924082531595,
|
143464 |
+
"learning_rate": 4.176043772714799e-10,
|
143465 |
+
"loss": 1.0518,
|
143466 |
+
"step": 101310
|
143467 |
+
},
|
143468 |
+
{
|
143469 |
+
"epoch": 2.9873212442871884,
|
143470 |
+
"grad_norm": 3.6417532312929315,
|
143471 |
+
"learning_rate": 4.0805962274723106e-10,
|
143472 |
+
"loss": 1.0671,
|
143473 |
+
"step": 101315
|
143474 |
+
},
|
143475 |
+
{
|
143476 |
+
"epoch": 2.987468671679198,
|
143477 |
+
"grad_norm": 3.598314667206711,
|
143478 |
+
"learning_rate": 3.9862520424746586e-10,
|
143479 |
+
"loss": 1.0464,
|
143480 |
+
"step": 101320
|
143481 |
+
},
|
143482 |
+
{
|
143483 |
+
"epoch": 2.9876160990712073,
|
143484 |
+
"grad_norm": 3.630617656529265,
|
143485 |
+
"learning_rate": 3.8930112204946246e-10,
|
143486 |
+
"loss": 1.0709,
|
143487 |
+
"step": 101325
|
143488 |
+
},
|
143489 |
+
{
|
143490 |
+
"epoch": 2.9877635264632167,
|
143491 |
+
"grad_norm": 3.5212189965174616,
|
143492 |
+
"learning_rate": 3.8008737642800105e-10,
|
143493 |
+
"loss": 1.0562,
|
143494 |
+
"step": 101330
|
143495 |
+
},
|
143496 |
+
{
|
143497 |
+
"epoch": 2.987910953855226,
|
143498 |
+
"grad_norm": 3.5690933880773175,
|
143499 |
+
"learning_rate": 3.709839676541149e-10,
|
143500 |
+
"loss": 1.0734,
|
143501 |
+
"step": 101335
|
143502 |
+
},
|
143503 |
+
{
|
143504 |
+
"epoch": 2.9880583812472357,
|
143505 |
+
"grad_norm": 3.467680786941367,
|
143506 |
+
"learning_rate": 3.619908959955065e-10,
|
143507 |
+
"loss": 1.0557,
|
143508 |
+
"step": 101340
|
143509 |
+
},
|
143510 |
+
{
|
143511 |
+
"epoch": 2.988205808639245,
|
143512 |
+
"grad_norm": 3.581249403310713,
|
143513 |
+
"learning_rate": 3.53108161716964e-10,
|
143514 |
+
"loss": 1.0339,
|
143515 |
+
"step": 101345
|
143516 |
+
},
|
143517 |
+
{
|
143518 |
+
"epoch": 2.9883532360312546,
|
143519 |
+
"grad_norm": 3.5472784568206666,
|
143520 |
+
"learning_rate": 3.44335765079945e-10,
|
143521 |
+
"loss": 1.0384,
|
143522 |
+
"step": 101350
|
143523 |
+
},
|
143524 |
+
{
|
143525 |
+
"epoch": 2.988500663423264,
|
143526 |
+
"grad_norm": 3.451518254268168,
|
143527 |
+
"learning_rate": 3.3567370634257633e-10,
|
143528 |
+
"loss": 1.0677,
|
143529 |
+
"step": 101355
|
143530 |
+
},
|
143531 |
+
{
|
143532 |
+
"epoch": 2.9886480908152735,
|
143533 |
+
"grad_norm": 3.5719134626976357,
|
143534 |
+
"learning_rate": 3.2712198575965414e-10,
|
143535 |
+
"loss": 1.0635,
|
143536 |
+
"step": 101360
|
143537 |
+
},
|
143538 |
+
{
|
143539 |
+
"epoch": 2.988795518207283,
|
143540 |
+
"grad_norm": 3.432616193253789,
|
143541 |
+
"learning_rate": 3.186806035830603e-10,
|
143542 |
+
"loss": 1.043,
|
143543 |
+
"step": 101365
|
143544 |
+
},
|
143545 |
+
{
|
143546 |
+
"epoch": 2.9889429455992924,
|
143547 |
+
"grad_norm": 3.3624266843713224,
|
143548 |
+
"learning_rate": 3.103495600605133e-10,
|
143549 |
+
"loss": 1.045,
|
143550 |
+
"step": 101370
|
143551 |
+
},
|
143552 |
+
{
|
143553 |
+
"epoch": 2.989090372991302,
|
143554 |
+
"grad_norm": 3.466143176735262,
|
143555 |
+
"learning_rate": 3.0212885543806636e-10,
|
143556 |
+
"loss": 1.0477,
|
143557 |
+
"step": 101375
|
143558 |
+
},
|
143559 |
+
{
|
143560 |
+
"epoch": 2.9892378003833113,
|
143561 |
+
"grad_norm": 3.4021280419706437,
|
143562 |
+
"learning_rate": 2.940184899567766e-10,
|
143563 |
+
"loss": 1.0231,
|
143564 |
+
"step": 101380
|
143565 |
+
},
|
143566 |
+
{
|
143567 |
+
"epoch": 2.989385227775321,
|
143568 |
+
"grad_norm": 3.482349740258068,
|
143569 |
+
"learning_rate": 2.8601846385603594e-10,
|
143570 |
+
"loss": 1.013,
|
143571 |
+
"step": 101385
|
143572 |
+
},
|
143573 |
+
{
|
143574 |
+
"epoch": 2.9895326551673302,
|
143575 |
+
"grad_norm": 3.3774775631369285,
|
143576 |
+
"learning_rate": 2.7812877737065644e-10,
|
143577 |
+
"loss": 1.061,
|
143578 |
+
"step": 101390
|
143579 |
+
},
|
143580 |
+
{
|
143581 |
+
"epoch": 2.9896800825593397,
|
143582 |
+
"grad_norm": 3.392442747807713,
|
143583 |
+
"learning_rate": 2.703494307333687e-10,
|
143584 |
+
"loss": 1.0186,
|
143585 |
+
"step": 101395
|
143586 |
+
},
|
143587 |
+
{
|
143588 |
+
"epoch": 2.9898275099513487,
|
143589 |
+
"grad_norm": 3.460299618114275,
|
143590 |
+
"learning_rate": 2.6268042417232354e-10,
|
143591 |
+
"loss": 1.0409,
|
143592 |
+
"step": 101400
|
143593 |
+
},
|
143594 |
+
{
|
143595 |
+
"epoch": 2.9899749373433586,
|
143596 |
+
"grad_norm": 3.344860649341353,
|
143597 |
+
"learning_rate": 2.551217579140064e-10,
|
143598 |
+
"loss": 1.0053,
|
143599 |
+
"step": 101405
|
143600 |
+
},
|
143601 |
+
{
|
143602 |
+
"epoch": 2.9901223647353676,
|
143603 |
+
"grad_norm": 3.4736930796092076,
|
143604 |
+
"learning_rate": 2.476734321803231e-10,
|
143605 |
+
"loss": 1.029,
|
143606 |
+
"step": 101410
|
143607 |
+
},
|
143608 |
+
{
|
143609 |
+
"epoch": 2.9902697921273775,
|
143610 |
+
"grad_norm": 3.551410509978409,
|
143611 |
+
"learning_rate": 2.403354471910979e-10,
|
143612 |
+
"loss": 1.0635,
|
143613 |
+
"step": 101415
|
143614 |
+
},
|
143615 |
+
{
|
143616 |
+
"epoch": 2.9904172195193865,
|
143617 |
+
"grad_norm": 3.467831834220946,
|
143618 |
+
"learning_rate": 2.3310780316115886e-10,
|
143619 |
+
"loss": 1.0249,
|
143620 |
+
"step": 101420
|
143621 |
+
},
|
143622 |
+
{
|
143623 |
+
"epoch": 2.990564646911396,
|
143624 |
+
"grad_norm": 3.5589342007969487,
|
143625 |
+
"learning_rate": 2.2599050030408518e-10,
|
143626 |
+
"loss": 1.0117,
|
143627 |
+
"step": 101425
|
143628 |
+
},
|
143629 |
+
{
|
143630 |
+
"epoch": 2.9907120743034055,
|
143631 |
+
"grad_norm": 3.417192542375895,
|
143632 |
+
"learning_rate": 2.1898353882887635e-10,
|
143633 |
+
"loss": 1.0377,
|
143634 |
+
"step": 101430
|
143635 |
+
},
|
143636 |
+
{
|
143637 |
+
"epoch": 2.990859501695415,
|
143638 |
+
"grad_norm": 3.4742568728720356,
|
143639 |
+
"learning_rate": 2.120869189420338e-10,
|
143640 |
+
"loss": 1.0835,
|
143641 |
+
"step": 101435
|
143642 |
+
},
|
143643 |
+
{
|
143644 |
+
"epoch": 2.9910069290874244,
|
143645 |
+
"grad_norm": 3.4750269372761315,
|
143646 |
+
"learning_rate": 2.0530064084631207e-10,
|
143647 |
+
"loss": 1.0402,
|
143648 |
+
"step": 101440
|
143649 |
+
},
|
143650 |
+
{
|
143651 |
+
"epoch": 2.991154356479434,
|
143652 |
+
"grad_norm": 3.4408230104144435,
|
143653 |
+
"learning_rate": 1.9862470474155125e-10,
|
143654 |
+
"loss": 1.0565,
|
143655 |
+
"step": 101445
|
143656 |
+
},
|
143657 |
+
{
|
143658 |
+
"epoch": 2.9913017838714433,
|
143659 |
+
"grad_norm": 3.4402703797857574,
|
143660 |
+
"learning_rate": 1.9205911082384454e-10,
|
143661 |
+
"loss": 1.0445,
|
143662 |
+
"step": 101450
|
143663 |
+
},
|
143664 |
+
{
|
143665 |
+
"epoch": 2.9914492112634528,
|
143666 |
+
"grad_norm": 3.5966249365895306,
|
143667 |
+
"learning_rate": 1.8560385928678703e-10,
|
143668 |
+
"loss": 1.0588,
|
143669 |
+
"step": 101455
|
143670 |
+
},
|
143671 |
+
{
|
143672 |
+
"epoch": 2.991596638655462,
|
143673 |
+
"grad_norm": 3.598684815000747,
|
143674 |
+
"learning_rate": 1.7925895032022688e-10,
|
143675 |
+
"loss": 1.0179,
|
143676 |
+
"step": 101460
|
143677 |
+
},
|
143678 |
+
{
|
143679 |
+
"epoch": 2.9917440660474717,
|
143680 |
+
"grad_norm": 3.351735520513655,
|
143681 |
+
"learning_rate": 1.7302438411068156e-10,
|
143682 |
+
"loss": 1.0233,
|
143683 |
+
"step": 101465
|
143684 |
+
},
|
143685 |
+
{
|
143686 |
+
"epoch": 2.991891493439481,
|
143687 |
+
"grad_norm": 3.4684775161292056,
|
143688 |
+
"learning_rate": 1.669001608417542e-10,
|
143689 |
+
"loss": 1.0367,
|
143690 |
+
"step": 101470
|
143691 |
+
},
|
143692 |
+
{
|
143693 |
+
"epoch": 2.9920389208314906,
|
143694 |
+
"grad_norm": 3.4699842103572895,
|
143695 |
+
"learning_rate": 1.6088628069371724e-10,
|
143696 |
+
"loss": 1.0699,
|
143697 |
+
"step": 101475
|
143698 |
+
},
|
143699 |
+
{
|
143700 |
+
"epoch": 2.9921863482235,
|
143701 |
+
"grad_norm": 3.607935043815634,
|
143702 |
+
"learning_rate": 1.5498274384351252e-10,
|
143703 |
+
"loss": 1.0481,
|
143704 |
+
"step": 101480
|
143705 |
+
},
|
143706 |
+
{
|
143707 |
+
"epoch": 2.9923337756155095,
|
143708 |
+
"grad_norm": 3.7157662384137633,
|
143709 |
+
"learning_rate": 1.4918955046475113e-10,
|
143710 |
+
"loss": 1.031,
|
143711 |
+
"step": 101485
|
143712 |
+
},
|
143713 |
+
{
|
143714 |
+
"epoch": 2.992481203007519,
|
143715 |
+
"grad_norm": 3.4233429587255833,
|
143716 |
+
"learning_rate": 1.4350670072812988e-10,
|
143717 |
+
"loss": 1.0738,
|
143718 |
+
"step": 101490
|
143719 |
+
},
|
143720 |
+
{
|
143721 |
+
"epoch": 2.992628630399528,
|
143722 |
+
"grad_norm": 3.45468407123128,
|
143723 |
+
"learning_rate": 1.3793419480059856e-10,
|
143724 |
+
"loss": 1.074,
|
143725 |
+
"step": 101495
|
143726 |
+
},
|
143727 |
+
{
|
143728 |
+
"epoch": 2.992776057791538,
|
143729 |
+
"grad_norm": 3.583597140930575,
|
143730 |
+
"learning_rate": 1.3247203284619258e-10,
|
143731 |
+
"loss": 1.0432,
|
143732 |
+
"step": 101500
|
143733 |
+
},
|
143734 |
+
{
|
143735 |
+
"epoch": 2.992776057791538,
|
143736 |
+
"eval_loss": 1.0764915943145752,
|
143737 |
+
"eval_runtime": 4.254,
|
143738 |
+
"eval_samples_per_second": 93.088,
|
143739 |
+
"eval_steps_per_second": 3.056,
|
143740 |
+
"step": 101500
|
143741 |
+
},
|
143742 |
+
{
|
143743 |
+
"epoch": 2.992923485183547,
|
143744 |
+
"grad_norm": 3.5355851788415706,
|
143745 |
+
"learning_rate": 1.2712021502561677e-10,
|
143746 |
+
"loss": 1.0772,
|
143747 |
+
"step": 101505
|
143748 |
+
},
|
143749 |
+
{
|
143750 |
+
"epoch": 2.993070912575557,
|
143751 |
+
"grad_norm": 3.4939571531411224,
|
143752 |
+
"learning_rate": 1.2187874149666156e-10,
|
143753 |
+
"loss": 1.0127,
|
143754 |
+
"step": 101510
|
143755 |
+
},
|
143756 |
+
{
|
143757 |
+
"epoch": 2.993218339967566,
|
143758 |
+
"grad_norm": 3.470063825606587,
|
143759 |
+
"learning_rate": 1.1674761241295405e-10,
|
143760 |
+
"loss": 1.0239,
|
143761 |
+
"step": 101515
|
143762 |
+
},
|
143763 |
+
{
|
143764 |
+
"epoch": 2.9933657673595753,
|
143765 |
+
"grad_norm": 3.560649499716591,
|
143766 |
+
"learning_rate": 1.1172682792603972e-10,
|
143767 |
+
"loss": 1.0152,
|
143768 |
+
"step": 101520
|
143769 |
+
},
|
143770 |
+
{
|
143771 |
+
"epoch": 2.9935131947515847,
|
143772 |
+
"grad_norm": 3.4247028373360533,
|
143773 |
+
"learning_rate": 1.0681638818371698e-10,
|
143774 |
+
"loss": 1.0436,
|
143775 |
+
"step": 101525
|
143776 |
+
},
|
143777 |
+
{
|
143778 |
+
"epoch": 2.993660622143594,
|
143779 |
+
"grad_norm": 3.4755116301999167,
|
143780 |
+
"learning_rate": 1.0201629333003726e-10,
|
143781 |
+
"loss": 1.0526,
|
143782 |
+
"step": 101530
|
143783 |
+
},
|
143784 |
+
{
|
143785 |
+
"epoch": 2.9938080495356036,
|
143786 |
+
"grad_norm": 3.481811389027158,
|
143787 |
+
"learning_rate": 9.732654350655401e-11,
|
143788 |
+
"loss": 1.0692,
|
143789 |
+
"step": 101535
|
143790 |
+
},
|
143791 |
+
{
|
143792 |
+
"epoch": 2.993955476927613,
|
143793 |
+
"grad_norm": 3.5753634136086236,
|
143794 |
+
"learning_rate": 9.274713885107366e-11,
|
143795 |
+
"loss": 1.0479,
|
143796 |
+
"step": 101540
|
143797 |
+
},
|
143798 |
+
{
|
143799 |
+
"epoch": 2.9941029043196226,
|
143800 |
+
"grad_norm": 3.5325899938280814,
|
143801 |
+
"learning_rate": 8.827807949848831e-11,
|
143802 |
+
"loss": 1.0252,
|
143803 |
+
"step": 101545
|
143804 |
+
},
|
143805 |
+
{
|
143806 |
+
"epoch": 2.994250331711632,
|
143807 |
+
"grad_norm": 3.4004667810107727,
|
143808 |
+
"learning_rate": 8.39193655803594e-11,
|
143809 |
+
"loss": 1.0304,
|
143810 |
+
"step": 101550
|
143811 |
+
},
|
143812 |
+
{
|
143813 |
+
"epoch": 2.9943977591036415,
|
143814 |
+
"grad_norm": 3.5140789472054865,
|
143815 |
+
"learning_rate": 7.967099722491767e-11,
|
143816 |
+
"loss": 1.0391,
|
143817 |
+
"step": 101555
|
143818 |
+
},
|
143819 |
+
{
|
143820 |
+
"epoch": 2.994545186495651,
|
143821 |
+
"grad_norm": 3.5834466636312294,
|
143822 |
+
"learning_rate": 7.553297455664687e-11,
|
143823 |
+
"loss": 1.0463,
|
143824 |
+
"step": 101560
|
143825 |
+
},
|
143826 |
+
{
|
143827 |
+
"epoch": 2.9946926138876604,
|
143828 |
+
"grad_norm": 3.5101558172953538,
|
143829 |
+
"learning_rate": 7.150529769836544e-11,
|
143830 |
+
"loss": 1.0687,
|
143831 |
+
"step": 101565
|
143832 |
+
},
|
143833 |
+
{
|
143834 |
+
"epoch": 2.99484004127967,
|
143835 |
+
"grad_norm": 3.4503968065645565,
|
143836 |
+
"learning_rate": 6.75879667678958e-11,
|
143837 |
+
"loss": 1.0365,
|
143838 |
+
"step": 101570
|
143839 |
+
},
|
143840 |
+
{
|
143841 |
+
"epoch": 2.9949874686716793,
|
143842 |
+
"grad_norm": 3.4777642071518486,
|
143843 |
+
"learning_rate": 6.378098188014602e-11,
|
143844 |
+
"loss": 1.0362,
|
143845 |
+
"step": 101575
|
143846 |
+
},
|
143847 |
+
{
|
143848 |
+
"epoch": 2.9951348960636888,
|
143849 |
+
"grad_norm": 3.496130635484323,
|
143850 |
+
"learning_rate": 6.008434314835887e-11,
|
143851 |
+
"loss": 1.0588,
|
143852 |
+
"step": 101580
|
143853 |
+
},
|
143854 |
+
{
|
143855 |
+
"epoch": 2.9952823234556982,
|
143856 |
+
"grad_norm": 3.645770543256753,
|
143857 |
+
"learning_rate": 5.64980506799484e-11,
|
143858 |
+
"loss": 1.0623,
|
143859 |
+
"step": 101585
|
143860 |
+
},
|
143861 |
+
{
|
143862 |
+
"epoch": 2.9954297508477072,
|
143863 |
+
"grad_norm": 3.3236397532043496,
|
143864 |
+
"learning_rate": 5.3022104581496036e-11,
|
143865 |
+
"loss": 0.9774,
|
143866 |
+
"step": 101590
|
143867 |
+
},
|
143868 |
+
{
|
143869 |
+
"epoch": 2.995577178239717,
|
143870 |
+
"grad_norm": 3.400033187644708,
|
143871 |
+
"learning_rate": 4.965650495458718e-11,
|
143872 |
+
"loss": 1.071,
|
143873 |
+
"step": 101595
|
143874 |
+
},
|
143875 |
+
{
|
143876 |
+
"epoch": 2.995724605631726,
|
143877 |
+
"grad_norm": 3.520058189013029,
|
143878 |
+
"learning_rate": 4.640125189872557e-11,
|
143879 |
+
"loss": 1.0006,
|
143880 |
+
"step": 101600
|
143881 |
+
},
|
143882 |
+
{
|
143883 |
+
"epoch": 2.995872033023736,
|
143884 |
+
"grad_norm": 3.39774628663836,
|
143885 |
+
"learning_rate": 4.3256345509251616e-11,
|
143886 |
+
"loss": 1.0381,
|
143887 |
+
"step": 101605
|
143888 |
+
},
|
143889 |
+
{
|
143890 |
+
"epoch": 2.996019460415745,
|
143891 |
+
"grad_norm": 3.3864876142348836,
|
143892 |
+
"learning_rate": 4.022178587900771e-11,
|
143893 |
+
"loss": 1.0383,
|
143894 |
+
"step": 101610
|
143895 |
+
},
|
143896 |
+
{
|
143897 |
+
"epoch": 2.9961668878077545,
|
143898 |
+
"grad_norm": 3.385010992472616,
|
143899 |
+
"learning_rate": 3.7297573097505586e-11,
|
143900 |
+
"loss": 1.0082,
|
143901 |
+
"step": 101615
|
143902 |
+
},
|
143903 |
+
{
|
143904 |
+
"epoch": 2.996314315199764,
|
143905 |
+
"grad_norm": 3.552251985271931,
|
143906 |
+
"learning_rate": 3.4483707250093645e-11,
|
143907 |
+
"loss": 1.036,
|
143908 |
+
"step": 101620
|
143909 |
+
},
|
143910 |
+
{
|
143911 |
+
"epoch": 2.9964617425917734,
|
143912 |
+
"grad_norm": 3.4661072663723482,
|
143913 |
+
"learning_rate": 3.1780188420454934e-11,
|
143914 |
+
"loss": 1.0048,
|
143915 |
+
"step": 101625
|
143916 |
+
},
|
143917 |
+
{
|
143918 |
+
"epoch": 2.996609169983783,
|
143919 |
+
"grad_norm": 3.551376018059265,
|
143920 |
+
"learning_rate": 2.9187016687692856e-11,
|
143921 |
+
"loss": 1.0143,
|
143922 |
+
"step": 101630
|
143923 |
+
},
|
143924 |
+
{
|
143925 |
+
"epoch": 2.9967565973757924,
|
143926 |
+
"grad_norm": 3.588832998457119,
|
143927 |
+
"learning_rate": 2.670419212758013e-11,
|
143928 |
+
"loss": 1.0488,
|
143929 |
+
"step": 101635
|
143930 |
+
},
|
143931 |
+
{
|
143932 |
+
"epoch": 2.996904024767802,
|
143933 |
+
"grad_norm": 3.396140604846354,
|
143934 |
+
"learning_rate": 2.4331714814224136e-11,
|
143935 |
+
"loss": 1.0508,
|
143936 |
+
"step": 101640
|
143937 |
+
},
|
143938 |
+
{
|
143939 |
+
"epoch": 2.9970514521598113,
|
143940 |
+
"grad_norm": 3.4687608645192762,
|
143941 |
+
"learning_rate": 2.2069584816736265e-11,
|
143942 |
+
"loss": 1.0623,
|
143943 |
+
"step": 101645
|
143944 |
+
},
|
143945 |
+
{
|
143946 |
+
"epoch": 2.9971988795518207,
|
143947 |
+
"grad_norm": 3.4540543392812477,
|
143948 |
+
"learning_rate": 1.991780220214623e-11,
|
143949 |
+
"loss": 1.0335,
|
143950 |
+
"step": 101650
|
143951 |
+
},
|
143952 |
+
{
|
143953 |
+
"epoch": 2.99734630694383,
|
143954 |
+
"grad_norm": 3.4548007843882336,
|
143955 |
+
"learning_rate": 1.7876367033320406e-11,
|
143956 |
+
"loss": 1.0271,
|
143957 |
+
"step": 101655
|
143958 |
+
},
|
143959 |
+
{
|
143960 |
+
"epoch": 2.9974937343358397,
|
143961 |
+
"grad_norm": 3.5303449387835713,
|
143962 |
+
"learning_rate": 1.594527937021084e-11,
|
143963 |
+
"loss": 1.0264,
|
143964 |
+
"step": 101660
|
143965 |
+
},
|
143966 |
+
{
|
143967 |
+
"epoch": 2.997641161727849,
|
143968 |
+
"grad_norm": 3.498365409650255,
|
143969 |
+
"learning_rate": 1.4124539270271575e-11,
|
143970 |
+
"loss": 1.0587,
|
143971 |
+
"step": 101665
|
143972 |
+
},
|
143973 |
+
{
|
143974 |
+
"epoch": 2.9977885891198586,
|
143975 |
+
"grad_norm": 3.5036397863956443,
|
143976 |
+
"learning_rate": 1.2414146786793312e-11,
|
143977 |
+
"loss": 1.0159,
|
143978 |
+
"step": 101670
|
143979 |
+
},
|
143980 |
+
{
|
143981 |
+
"epoch": 2.997936016511868,
|
143982 |
+
"grad_norm": 3.5737974450898156,
|
143983 |
+
"learning_rate": 1.0814101970152424e-11,
|
143984 |
+
"loss": 1.0514,
|
143985 |
+
"step": 101675
|
143986 |
+
},
|
143987 |
+
{
|
143988 |
+
"epoch": 2.9980834439038775,
|
143989 |
+
"grad_norm": 3.426289934547559,
|
143990 |
+
"learning_rate": 9.324404866978276e-12,
|
143991 |
+
"loss": 1.048,
|
143992 |
+
"step": 101680
|
143993 |
+
},
|
143994 |
+
{
|
143995 |
+
"epoch": 2.9982308712958865,
|
143996 |
+
"grad_norm": 3.537525912002454,
|
143997 |
+
"learning_rate": 7.945055521818567e-12,
|
143998 |
+
"loss": 1.058,
|
143999 |
+
"step": 101685
|
144000 |
+
},
|
144001 |
+
{
|
144002 |
+
"epoch": 2.9983782986878964,
|
144003 |
+
"grad_norm": 3.422937332568118,
|
144004 |
+
"learning_rate": 6.6760539746413274e-12,
|
144005 |
+
"loss": 1.0753,
|
144006 |
+
"step": 101690
|
144007 |
+
},
|
144008 |
+
{
|
144009 |
+
"epoch": 2.9985257260799054,
|
144010 |
+
"grad_norm": 3.46618341561677,
|
144011 |
+
"learning_rate": 5.517400262916583e-12,
|
144012 |
+
"loss": 1.0251,
|
144013 |
+
"step": 101695
|
144014 |
+
},
|
144015 |
+
{
|
144016 |
+
"epoch": 2.9986731534719153,
|
144017 |
+
"grad_norm": 3.473010086771127,
|
144018 |
+
"learning_rate": 4.469094421200026e-12,
|
144019 |
+
"loss": 1.061,
|
144020 |
+
"step": 101700
|
144021 |
+
},
|
144022 |
+
{
|
144023 |
+
"epoch": 2.9988205808639243,
|
144024 |
+
"grad_norm": 3.498755935582447,
|
144025 |
+
"learning_rate": 3.5311364803003454e-12,
|
144026 |
+
"loss": 1.0218,
|
144027 |
+
"step": 101705
|
144028 |
+
},
|
144029 |
+
{
|
144030 |
+
"epoch": 2.998968008255934,
|
144031 |
+
"grad_norm": 3.391075897936316,
|
144032 |
+
"learning_rate": 2.7035264668628933e-12,
|
144033 |
+
"loss": 1.0147,
|
144034 |
+
"step": 101710
|
144035 |
+
},
|
144036 |
+
{
|
144037 |
+
"epoch": 2.9991154356479433,
|
144038 |
+
"grad_norm": 3.625795522250744,
|
144039 |
+
"learning_rate": 1.986264406284022e-12,
|
144040 |
+
"loss": 0.9831,
|
144041 |
+
"step": 101715
|
144042 |
+
},
|
144043 |
+
{
|
144044 |
+
"epoch": 2.9992628630399527,
|
144045 |
+
"grad_norm": 3.563423605835511,
|
144046 |
+
"learning_rate": 1.3793503193804124e-12,
|
144047 |
+
"loss": 1.0555,
|
144048 |
+
"step": 101720
|
144049 |
+
},
|
144050 |
+
{
|
144051 |
+
"epoch": 2.999410290431962,
|
144052 |
+
"grad_norm": 3.567012479198932,
|
144053 |
+
"learning_rate": 8.827842240544115e-13,
|
144054 |
+
"loss": 1.0615,
|
144055 |
+
"step": 101725
|
144056 |
+
},
|
144057 |
+
{
|
144058 |
+
"epoch": 2.9995577178239716,
|
144059 |
+
"grad_norm": 3.4603339435714893,
|
144060 |
+
"learning_rate": 4.965661344613626e-13,
|
144061 |
+
"loss": 1.0368,
|
144062 |
+
"step": 101730
|
144063 |
+
},
|
144064 |
+
{
|
144065 |
+
"epoch": 2.999705145215981,
|
144066 |
+
"grad_norm": 3.5829890417701162,
|
144067 |
+
"learning_rate": 2.2069606267494103e-13,
|
144068 |
+
"loss": 0.9983,
|
144069 |
+
"step": 101735
|
144070 |
+
},
|
144071 |
+
{
|
144072 |
+
"epoch": 2.9998525726079905,
|
144073 |
+
"grad_norm": 3.451722998370578,
|
144074 |
+
"learning_rate": 5.51740161891523e-14,
|
144075 |
+
"loss": 1.0379,
|
144076 |
+
"step": 101740
|
144077 |
+
},
|
144078 |
+
{
|
144079 |
+
"epoch": 3.0,
|
144080 |
+
"grad_norm": 3.447175611271937,
|
144081 |
+
"learning_rate": 0.0,
|
144082 |
+
"loss": 1.0307,
|
144083 |
+
"step": 101745
|
144084 |
}
|
144085 |
],
|
144086 |
"logging_steps": 5,
|
|
|
144095 |
"should_evaluate": false,
|
144096 |
"should_log": false,
|
144097 |
"should_save": true,
|
144098 |
+
"should_training_stop": true
|
144099 |
},
|
144100 |
"attributes": {}
|
144101 |
}
|
144102 |
},
|
144103 |
+
"total_flos": 1.06516665335808e+16,
|
144104 |
"train_batch_size": 4,
|
144105 |
"trial_name": null,
|
144106 |
"trial_params": null
|