Upload 13 files
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +83 -267
- training_args.bin +1 -1
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999863872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36f4082807a4a91ad2133d752fd587814a29439888c9a3db8d97583619044e9f
|
3 |
size 4999863872
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 482809696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fb02e929fe4112ec14b7703127a33f20068fba2973643efa9ba4ddf3fce3aea
|
3 |
size 482809696
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:529fe7f11f242c337c0f1a103ac077595ea1499207d09d52224640ca121e2a94
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e165e1f46e8a630f28d55072c8f17768fb98c886bdc1313ee5c85479b03c11ab
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "nllb_200_distilled_1.3B_ENtoFO_bsz_64_epochs_10lr0.0001/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -45,13 +45,13 @@
|
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.17572569219448442,
|
48 |
-
"eval_bleu": 39.
|
49 |
-
"eval_chrf++": 58.
|
50 |
-
"eval_gen_len": 17.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_runtime":
|
53 |
-
"eval_samples_per_second": 2.
|
54 |
-
"eval_steps_per_second": 1.
|
55 |
"step": 500
|
56 |
},
|
57 |
{
|
@@ -91,13 +91,13 @@
|
|
91 |
},
|
92 |
{
|
93 |
"epoch": 0.35145138438896883,
|
94 |
-
"eval_bleu": 41.
|
95 |
-
"eval_chrf++": 60.
|
96 |
-
"eval_gen_len": 17.
|
97 |
-
"eval_loss": 0.
|
98 |
-
"eval_runtime":
|
99 |
-
"eval_samples_per_second": 2.
|
100 |
-
"eval_steps_per_second": 1.
|
101 |
"step": 1000
|
102 |
},
|
103 |
{
|
@@ -137,13 +137,13 @@
|
|
137 |
},
|
138 |
{
|
139 |
"epoch": 0.5271770765834533,
|
140 |
-
"eval_bleu": 41.
|
141 |
-
"eval_chrf++": 60.
|
142 |
-
"eval_gen_len": 17.
|
143 |
-
"eval_loss": 0.
|
144 |
-
"eval_runtime":
|
145 |
-
"eval_samples_per_second": 2.
|
146 |
-
"eval_steps_per_second": 1.
|
147 |
"step": 1500
|
148 |
},
|
149 |
{
|
@@ -183,13 +183,13 @@
|
|
183 |
},
|
184 |
{
|
185 |
"epoch": 0.7029027687779377,
|
186 |
-
"eval_bleu": 42.
|
187 |
-
"eval_chrf++": 61.
|
188 |
-
"eval_gen_len": 17.
|
189 |
-
"eval_loss": 0.
|
190 |
-
"eval_runtime":
|
191 |
-
"eval_samples_per_second": 2.
|
192 |
-
"eval_steps_per_second": 1.
|
193 |
"step": 2000
|
194 |
},
|
195 |
{
|
@@ -229,13 +229,13 @@
|
|
229 |
},
|
230 |
{
|
231 |
"epoch": 0.8786284609724221,
|
232 |
-
"eval_bleu": 43.
|
233 |
-
"eval_chrf++": 61.
|
234 |
-
"eval_gen_len": 17.
|
235 |
-
"eval_loss": 0.
|
236 |
-
"eval_runtime":
|
237 |
-
"eval_samples_per_second": 2.
|
238 |
-
"eval_steps_per_second": 1.
|
239 |
"step": 2500
|
240 |
},
|
241 |
{
|
@@ -275,13 +275,13 @@
|
|
275 |
},
|
276 |
{
|
277 |
"epoch": 1.0543541531669065,
|
278 |
-
"eval_bleu": 43.
|
279 |
-
"eval_chrf++": 62.
|
280 |
-
"eval_gen_len": 17.
|
281 |
-
"eval_loss": 0.
|
282 |
-
"eval_runtime":
|
283 |
-
"eval_samples_per_second": 2.
|
284 |
-
"eval_steps_per_second": 1.
|
285 |
"step": 3000
|
286 |
},
|
287 |
{
|
@@ -321,13 +321,13 @@
|
|
321 |
},
|
322 |
{
|
323 |
"epoch": 1.2300798453613908,
|
324 |
-
"eval_bleu":
|
325 |
-
"eval_chrf++": 62.
|
326 |
-
"eval_gen_len": 17.
|
327 |
-
"eval_loss": 0.
|
328 |
-
"eval_runtime":
|
329 |
-
"eval_samples_per_second": 2.
|
330 |
-
"eval_steps_per_second": 1.
|
331 |
"step": 3500
|
332 |
},
|
333 |
{
|
@@ -367,13 +367,13 @@
|
|
367 |
},
|
368 |
{
|
369 |
"epoch": 1.4058055375558753,
|
370 |
-
"eval_bleu":
|
371 |
-
"eval_chrf++": 62.
|
372 |
-
"eval_gen_len": 17.
|
373 |
-
"eval_loss": 0.
|
374 |
-
"eval_runtime":
|
375 |
-
"eval_samples_per_second": 2.
|
376 |
-
"eval_steps_per_second": 1.
|
377 |
"step": 4000
|
378 |
},
|
379 |
{
|
@@ -413,13 +413,13 @@
|
|
413 |
},
|
414 |
{
|
415 |
"epoch": 1.5815312297503596,
|
416 |
-
"eval_bleu": 43.
|
417 |
-
"eval_chrf++": 62.
|
418 |
-
"eval_gen_len": 17.
|
419 |
-
"eval_loss": 0.
|
420 |
-
"eval_runtime":
|
421 |
-
"eval_samples_per_second": 2.
|
422 |
-
"eval_steps_per_second": 1.
|
423 |
"step": 4500
|
424 |
},
|
425 |
{
|
@@ -459,13 +459,13 @@
|
|
459 |
},
|
460 |
{
|
461 |
"epoch": 1.757256921944844,
|
462 |
-
"eval_bleu": 44.
|
463 |
-
"eval_chrf++": 62.
|
464 |
-
"eval_gen_len": 17.
|
465 |
-
"eval_loss": 0.
|
466 |
-
"eval_runtime":
|
467 |
-
"eval_samples_per_second": 2.
|
468 |
-
"eval_steps_per_second": 1.
|
469 |
"step": 5000
|
470 |
},
|
471 |
{
|
@@ -505,205 +505,21 @@
|
|
505 |
},
|
506 |
{
|
507 |
"epoch": 1.9329826141393285,
|
508 |
-
"eval_bleu": 44.
|
509 |
-
"eval_chrf++": 62.
|
510 |
-
"eval_gen_len": 17.
|
511 |
-
"eval_loss": 0.
|
512 |
-
"eval_runtime":
|
513 |
-
"eval_samples_per_second": 2.
|
514 |
-
"eval_steps_per_second": 1.
|
515 |
"step": 5500
|
516 |
-
},
|
517 |
-
{
|
518 |
-
"epoch": 1.9681277525782255,
|
519 |
-
"grad_norm": 0.5488588213920593,
|
520 |
-
"learning_rate": 8.17531305903399e-05,
|
521 |
-
"loss": 0.5845,
|
522 |
-
"step": 5600
|
523 |
-
},
|
524 |
-
{
|
525 |
-
"epoch": 2.0032728910171222,
|
526 |
-
"grad_norm": 0.49017634987831116,
|
527 |
-
"learning_rate": 8.139534883720931e-05,
|
528 |
-
"loss": 0.59,
|
529 |
-
"step": 5700
|
530 |
-
},
|
531 |
-
{
|
532 |
-
"epoch": 2.038418029456019,
|
533 |
-
"grad_norm": 0.5274912118911743,
|
534 |
-
"learning_rate": 8.103756708407871e-05,
|
535 |
-
"loss": 0.4667,
|
536 |
-
"step": 5800
|
537 |
-
},
|
538 |
-
{
|
539 |
-
"epoch": 2.0735631678949162,
|
540 |
-
"grad_norm": 1.4353556632995605,
|
541 |
-
"learning_rate": 8.067978533094812e-05,
|
542 |
-
"loss": 0.4706,
|
543 |
-
"step": 5900
|
544 |
-
},
|
545 |
-
{
|
546 |
-
"epoch": 2.108708306333813,
|
547 |
-
"grad_norm": 0.5296390056610107,
|
548 |
-
"learning_rate": 8.032200357781753e-05,
|
549 |
-
"loss": 0.4697,
|
550 |
-
"step": 6000
|
551 |
-
},
|
552 |
-
{
|
553 |
-
"epoch": 2.108708306333813,
|
554 |
-
"eval_bleu": 44.1342,
|
555 |
-
"eval_chrf++": 62.5401,
|
556 |
-
"eval_gen_len": 17.7677,
|
557 |
-
"eval_loss": 0.6582108736038208,
|
558 |
-
"eval_runtime": 3425.6987,
|
559 |
-
"eval_samples_per_second": 2.137,
|
560 |
-
"eval_steps_per_second": 1.069,
|
561 |
-
"step": 6000
|
562 |
-
},
|
563 |
-
{
|
564 |
-
"epoch": 2.14385344477271,
|
565 |
-
"grad_norm": 0.6416345238685608,
|
566 |
-
"learning_rate": 7.996422182468695e-05,
|
567 |
-
"loss": 0.4882,
|
568 |
-
"step": 6100
|
569 |
-
},
|
570 |
-
{
|
571 |
-
"epoch": 2.1789985832116066,
|
572 |
-
"grad_norm": 0.5234227180480957,
|
573 |
-
"learning_rate": 7.960644007155635e-05,
|
574 |
-
"loss": 0.4835,
|
575 |
-
"step": 6200
|
576 |
-
},
|
577 |
-
{
|
578 |
-
"epoch": 2.2141437216505038,
|
579 |
-
"grad_norm": 0.4757489860057831,
|
580 |
-
"learning_rate": 7.924865831842576e-05,
|
581 |
-
"loss": 0.4771,
|
582 |
-
"step": 6300
|
583 |
-
},
|
584 |
-
{
|
585 |
-
"epoch": 2.2492888600894005,
|
586 |
-
"grad_norm": 0.5438205599784851,
|
587 |
-
"learning_rate": 7.889087656529517e-05,
|
588 |
-
"loss": 0.4829,
|
589 |
-
"step": 6400
|
590 |
-
},
|
591 |
-
{
|
592 |
-
"epoch": 2.2844339985282973,
|
593 |
-
"grad_norm": 0.5392005443572998,
|
594 |
-
"learning_rate": 7.853309481216459e-05,
|
595 |
-
"loss": 0.474,
|
596 |
-
"step": 6500
|
597 |
-
},
|
598 |
-
{
|
599 |
-
"epoch": 2.2844339985282973,
|
600 |
-
"eval_bleu": 44.2923,
|
601 |
-
"eval_chrf++": 62.5586,
|
602 |
-
"eval_gen_len": 17.7492,
|
603 |
-
"eval_loss": 0.6481789350509644,
|
604 |
-
"eval_runtime": 3422.6338,
|
605 |
-
"eval_samples_per_second": 2.139,
|
606 |
-
"eval_steps_per_second": 1.07,
|
607 |
-
"step": 6500
|
608 |
-
},
|
609 |
-
{
|
610 |
-
"epoch": 2.319579136967194,
|
611 |
-
"grad_norm": 0.5046759843826294,
|
612 |
-
"learning_rate": 7.8175313059034e-05,
|
613 |
-
"loss": 0.4802,
|
614 |
-
"step": 6600
|
615 |
-
},
|
616 |
-
{
|
617 |
-
"epoch": 2.354724275406091,
|
618 |
-
"grad_norm": 0.49111250042915344,
|
619 |
-
"learning_rate": 7.78175313059034e-05,
|
620 |
-
"loss": 0.4916,
|
621 |
-
"step": 6700
|
622 |
-
},
|
623 |
-
{
|
624 |
-
"epoch": 2.389869413844988,
|
625 |
-
"grad_norm": 0.5712496638298035,
|
626 |
-
"learning_rate": 7.745974955277281e-05,
|
627 |
-
"loss": 0.4845,
|
628 |
-
"step": 6800
|
629 |
-
},
|
630 |
-
{
|
631 |
-
"epoch": 2.425014552283885,
|
632 |
-
"grad_norm": 0.6314510703086853,
|
633 |
-
"learning_rate": 7.710196779964223e-05,
|
634 |
-
"loss": 0.484,
|
635 |
-
"step": 6900
|
636 |
-
},
|
637 |
-
{
|
638 |
-
"epoch": 2.4601596907227816,
|
639 |
-
"grad_norm": 0.6166778802871704,
|
640 |
-
"learning_rate": 7.674418604651163e-05,
|
641 |
-
"loss": 0.488,
|
642 |
-
"step": 7000
|
643 |
-
},
|
644 |
-
{
|
645 |
-
"epoch": 2.4601596907227816,
|
646 |
-
"eval_bleu": 44.7709,
|
647 |
-
"eval_chrf++": 62.9298,
|
648 |
-
"eval_gen_len": 17.7547,
|
649 |
-
"eval_loss": 0.6452430486679077,
|
650 |
-
"eval_runtime": 3438.1607,
|
651 |
-
"eval_samples_per_second": 2.129,
|
652 |
-
"eval_steps_per_second": 1.065,
|
653 |
-
"step": 7000
|
654 |
-
},
|
655 |
-
{
|
656 |
-
"epoch": 2.495304829161679,
|
657 |
-
"grad_norm": 0.5143587589263916,
|
658 |
-
"learning_rate": 7.638640429338104e-05,
|
659 |
-
"loss": 0.4875,
|
660 |
-
"step": 7100
|
661 |
-
},
|
662 |
-
{
|
663 |
-
"epoch": 2.5304499676005756,
|
664 |
-
"grad_norm": 0.5172815322875977,
|
665 |
-
"learning_rate": 7.602862254025045e-05,
|
666 |
-
"loss": 0.4805,
|
667 |
-
"step": 7200
|
668 |
-
},
|
669 |
-
{
|
670 |
-
"epoch": 2.5655951060394724,
|
671 |
-
"grad_norm": 0.49376818537712097,
|
672 |
-
"learning_rate": 7.567084078711986e-05,
|
673 |
-
"loss": 0.488,
|
674 |
-
"step": 7300
|
675 |
-
},
|
676 |
-
{
|
677 |
-
"epoch": 2.600740244478369,
|
678 |
-
"grad_norm": 0.5714296102523804,
|
679 |
-
"learning_rate": 7.531305903398927e-05,
|
680 |
-
"loss": 0.4893,
|
681 |
-
"step": 7400
|
682 |
-
},
|
683 |
-
{
|
684 |
-
"epoch": 2.635885382917266,
|
685 |
-
"grad_norm": 0.47455132007598877,
|
686 |
-
"learning_rate": 7.495527728085868e-05,
|
687 |
-
"loss": 0.4767,
|
688 |
-
"step": 7500
|
689 |
-
},
|
690 |
-
{
|
691 |
-
"epoch": 2.635885382917266,
|
692 |
-
"eval_bleu": 44.8961,
|
693 |
-
"eval_chrf++": 63.0641,
|
694 |
-
"eval_gen_len": 17.7427,
|
695 |
-
"eval_loss": 0.6402289867401123,
|
696 |
-
"eval_runtime": 3447.7653,
|
697 |
-
"eval_samples_per_second": 2.123,
|
698 |
-
"eval_steps_per_second": 1.062,
|
699 |
-
"step": 7500
|
700 |
}
|
701 |
],
|
702 |
"logging_steps": 100,
|
703 |
"max_steps": 28450,
|
704 |
"num_input_tokens_seen": 0,
|
705 |
"num_train_epochs": 10,
|
706 |
-
"save_steps":
|
707 |
"stateful_callbacks": {
|
708 |
"TrainerControl": {
|
709 |
"args": {
|
@@ -716,7 +532,7 @@
|
|
716 |
"attributes": {}
|
717 |
}
|
718 |
},
|
719 |
-
"total_flos":
|
720 |
"train_batch_size": 2,
|
721 |
"trial_name": null,
|
722 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6349581480026245,
|
3 |
+
"best_model_checkpoint": "nllb_200_distilled_1.3B_ENtoFO_bsz_64_epochs_10lr0.0001/checkpoint-5500",
|
4 |
+
"epoch": 1.9329826141393285,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.17572569219448442,
|
48 |
+
"eval_bleu": 39.5852,
|
49 |
+
"eval_chrf++": 58.6112,
|
50 |
+
"eval_gen_len": 17.7742,
|
51 |
+
"eval_loss": 0.8167479634284973,
|
52 |
+
"eval_runtime": 3590.7671,
|
53 |
+
"eval_samples_per_second": 2.039,
|
54 |
+
"eval_steps_per_second": 1.02,
|
55 |
"step": 500
|
56 |
},
|
57 |
{
|
|
|
91 |
},
|
92 |
{
|
93 |
"epoch": 0.35145138438896883,
|
94 |
+
"eval_bleu": 41.4406,
|
95 |
+
"eval_chrf++": 60.2627,
|
96 |
+
"eval_gen_len": 17.9384,
|
97 |
+
"eval_loss": 0.7587867975234985,
|
98 |
+
"eval_runtime": 3432.3677,
|
99 |
+
"eval_samples_per_second": 2.133,
|
100 |
+
"eval_steps_per_second": 1.067,
|
101 |
"step": 1000
|
102 |
},
|
103 |
{
|
|
|
137 |
},
|
138 |
{
|
139 |
"epoch": 0.5271770765834533,
|
140 |
+
"eval_bleu": 41.9609,
|
141 |
+
"eval_chrf++": 60.633,
|
142 |
+
"eval_gen_len": 17.8868,
|
143 |
+
"eval_loss": 0.7227240800857544,
|
144 |
+
"eval_runtime": 3519.7091,
|
145 |
+
"eval_samples_per_second": 2.08,
|
146 |
+
"eval_steps_per_second": 1.04,
|
147 |
"step": 1500
|
148 |
},
|
149 |
{
|
|
|
183 |
},
|
184 |
{
|
185 |
"epoch": 0.7029027687779377,
|
186 |
+
"eval_bleu": 42.6374,
|
187 |
+
"eval_chrf++": 61.2613,
|
188 |
+
"eval_gen_len": 17.7972,
|
189 |
+
"eval_loss": 0.7011950612068176,
|
190 |
+
"eval_runtime": 3436.4901,
|
191 |
+
"eval_samples_per_second": 2.13,
|
192 |
+
"eval_steps_per_second": 1.065,
|
193 |
"step": 2000
|
194 |
},
|
195 |
{
|
|
|
229 |
},
|
230 |
{
|
231 |
"epoch": 0.8786284609724221,
|
232 |
+
"eval_bleu": 43.3601,
|
233 |
+
"eval_chrf++": 61.7425,
|
234 |
+
"eval_gen_len": 17.935,
|
235 |
+
"eval_loss": 0.6797980666160583,
|
236 |
+
"eval_runtime": 3463.8249,
|
237 |
+
"eval_samples_per_second": 2.114,
|
238 |
+
"eval_steps_per_second": 1.057,
|
239 |
"step": 2500
|
240 |
},
|
241 |
{
|
|
|
275 |
},
|
276 |
{
|
277 |
"epoch": 1.0543541531669065,
|
278 |
+
"eval_bleu": 43.6802,
|
279 |
+
"eval_chrf++": 62.0773,
|
280 |
+
"eval_gen_len": 17.9802,
|
281 |
+
"eval_loss": 0.6721383929252625,
|
282 |
+
"eval_runtime": 3607.5103,
|
283 |
+
"eval_samples_per_second": 2.029,
|
284 |
+
"eval_steps_per_second": 1.015,
|
285 |
"step": 3000
|
286 |
},
|
287 |
{
|
|
|
321 |
},
|
322 |
{
|
323 |
"epoch": 1.2300798453613908,
|
324 |
+
"eval_bleu": 43.9272,
|
325 |
+
"eval_chrf++": 62.2949,
|
326 |
+
"eval_gen_len": 17.8805,
|
327 |
+
"eval_loss": 0.6639961004257202,
|
328 |
+
"eval_runtime": 3511.7691,
|
329 |
+
"eval_samples_per_second": 2.085,
|
330 |
+
"eval_steps_per_second": 1.042,
|
331 |
"step": 3500
|
332 |
},
|
333 |
{
|
|
|
367 |
},
|
368 |
{
|
369 |
"epoch": 1.4058055375558753,
|
370 |
+
"eval_bleu": 43.7946,
|
371 |
+
"eval_chrf++": 62.1568,
|
372 |
+
"eval_gen_len": 17.8172,
|
373 |
+
"eval_loss": 0.6595008373260498,
|
374 |
+
"eval_runtime": 3539.6035,
|
375 |
+
"eval_samples_per_second": 2.068,
|
376 |
+
"eval_steps_per_second": 1.034,
|
377 |
"step": 4000
|
378 |
},
|
379 |
{
|
|
|
413 |
},
|
414 |
{
|
415 |
"epoch": 1.5815312297503596,
|
416 |
+
"eval_bleu": 43.8068,
|
417 |
+
"eval_chrf++": 62.1665,
|
418 |
+
"eval_gen_len": 17.8271,
|
419 |
+
"eval_loss": 0.64792400598526,
|
420 |
+
"eval_runtime": 3653.4406,
|
421 |
+
"eval_samples_per_second": 2.004,
|
422 |
+
"eval_steps_per_second": 1.002,
|
423 |
"step": 4500
|
424 |
},
|
425 |
{
|
|
|
459 |
},
|
460 |
{
|
461 |
"epoch": 1.757256921944844,
|
462 |
+
"eval_bleu": 44.0163,
|
463 |
+
"eval_chrf++": 62.4374,
|
464 |
+
"eval_gen_len": 17.8788,
|
465 |
+
"eval_loss": 0.6403423547744751,
|
466 |
+
"eval_runtime": 3440.4393,
|
467 |
+
"eval_samples_per_second": 2.128,
|
468 |
+
"eval_steps_per_second": 1.064,
|
469 |
"step": 5000
|
470 |
},
|
471 |
{
|
|
|
505 |
},
|
506 |
{
|
507 |
"epoch": 1.9329826141393285,
|
508 |
+
"eval_bleu": 44.5286,
|
509 |
+
"eval_chrf++": 62.728,
|
510 |
+
"eval_gen_len": 17.8899,
|
511 |
+
"eval_loss": 0.6349581480026245,
|
512 |
+
"eval_runtime": 3524.4692,
|
513 |
+
"eval_samples_per_second": 2.077,
|
514 |
+
"eval_steps_per_second": 1.039,
|
515 |
"step": 5500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
}
|
517 |
],
|
518 |
"logging_steps": 100,
|
519 |
"max_steps": 28450,
|
520 |
"num_input_tokens_seen": 0,
|
521 |
"num_train_epochs": 10,
|
522 |
+
"save_steps": 500,
|
523 |
"stateful_callbacks": {
|
524 |
"TrainerControl": {
|
525 |
"args": {
|
|
|
532 |
"attributes": {}
|
533 |
}
|
534 |
},
|
535 |
+
"total_flos": 2.3968643831845356e+18,
|
536 |
"train_batch_size": 2,
|
537 |
"trial_name": null,
|
538 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8cf5c96d2df8bf58fd891f9907effe03685c6ca5d8073324d1011c8629e1259
|
3 |
size 5304
|