|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998720081914757, |
|
"global_step": 3906, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 9.2076, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 9.3403, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-05, |
|
"loss": 7.9144, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 6.7554, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012, |
|
"loss": 5.7716, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017, |
|
"loss": 5.0089, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999947171819797, |
|
"loss": 4.383, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999352861202634, |
|
"loss": 4.1286, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999809824411913, |
|
"loss": 3.7428, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999618340341782, |
|
"loss": 3.4558, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019993608465545054, |
|
"loss": 3.3284, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019990373600536657, |
|
"loss": 3.202, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019986479022006677, |
|
"loss": 3.1471, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019981924987133289, |
|
"loss": 3.0477, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019976711796641832, |
|
"loss": 2.9572, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019970839794784917, |
|
"loss": 2.9283, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019964309369319722, |
|
"loss": 2.882, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019957120951482363, |
|
"loss": 2.799, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019949275015959442, |
|
"loss": 2.7808, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001994077208085668, |
|
"loss": 2.7251, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001993161270766472, |
|
"loss": 2.6977, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019921797501222036, |
|
"loss": 2.7366, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019911327109675003, |
|
"loss": 2.7007, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019900202224435086, |
|
"loss": 2.6237, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019888423580133194, |
|
"loss": 2.4928, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001987599195457116, |
|
"loss": 2.6081, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019862908168670384, |
|
"loss": 2.59, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019849173086417622, |
|
"loss": 2.5477, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001983478761480793, |
|
"loss": 2.5678, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019819752703784777, |
|
"loss": 2.4678, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001980406934617731, |
|
"loss": 2.4486, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019787738577634794, |
|
"loss": 2.5024, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019770761476558223, |
|
"loss": 2.5042, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019753139164029108, |
|
"loss": 2.4463, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019734872803735444, |
|
"loss": 2.4035, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001971596360189488, |
|
"loss": 2.4444, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001969641280717504, |
|
"loss": 2.4404, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019676221710611093, |
|
"loss": 2.4378, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019655391645520486, |
|
"loss": 2.3842, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000196339239874149, |
|
"loss": 2.3743, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019611820153909418, |
|
"loss": 2.3983, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001958908160462892, |
|
"loss": 2.3224, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001956570984111169, |
|
"loss": 2.3779, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019541706406710256, |
|
"loss": 2.3706, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000195170728864895, |
|
"loss": 2.3213, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001949181090712195, |
|
"loss": 2.3222, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019465922136780396, |
|
"loss": 2.3018, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019439408285027717, |
|
"loss": 2.3268, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019412271102703992, |
|
"loss": 2.2956, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019384512381810887, |
|
"loss": 2.2676, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019356133955393312, |
|
"loss": 2.2877, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001932713769741839, |
|
"loss": 2.3348, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001929752552265169, |
|
"loss": 2.3026, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019267299386530813, |
|
"loss": 2.3344, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019236461285036233, |
|
"loss": 2.258, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001920501325455952, |
|
"loss": 2.2424, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019172957371768848, |
|
"loss": 2.2824, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019140295753471872, |
|
"loss": 2.2618, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001910703055647595, |
|
"loss": 2.243, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019073163977445696, |
|
"loss": 2.2295, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019038698252757952, |
|
"loss": 2.2533, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019003635658354094, |
|
"loss": 2.2098, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001896797850958973, |
|
"loss": 2.3026, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018931729161081835, |
|
"loss": 2.19, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018894890006553237, |
|
"loss": 2.1298, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018857463478674552, |
|
"loss": 2.1882, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018819452048903561, |
|
"loss": 2.1378, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018780858227321988, |
|
"loss": 2.1886, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001874168456246975, |
|
"loss": 2.1542, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018701933641176676, |
|
"loss": 2.1299, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018661608088391671, |
|
"loss": 2.164, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001862071056700939, |
|
"loss": 2.1497, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018579243777694387, |
|
"loss": 2.1869, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018537210458702773, |
|
"loss": 2.1317, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018494613385701408, |
|
"loss": 2.138, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018451455371584603, |
|
"loss": 2.1337, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018407739266288365, |
|
"loss": 2.1062, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018363467956602206, |
|
"loss": 2.0968, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001831864436597853, |
|
"loss": 2.095, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018273271454339552, |
|
"loss": 2.1348, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001822735221788186, |
|
"loss": 2.0977, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001818088968887857, |
|
"loss": 2.1029, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00018133886935479057, |
|
"loss": 2.1493, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001808634706150639, |
|
"loss": 2.088, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001803827320625234, |
|
"loss": 2.1212, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017989668544270097, |
|
"loss": 2.0288, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001794053628516462, |
|
"loss": 2.0432, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017890879673380719, |
|
"loss": 2.0355, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017840701987988772, |
|
"loss": 2.0755, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001779000654246823, |
|
"loss": 2.0453, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017738796684488772, |
|
"loss": 2.0582, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017687075795689278, |
|
"loss": 2.0768, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017634847291454503, |
|
"loss": 2.091, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001758211462068955, |
|
"loss": 2.0577, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017528881265592108, |
|
"loss": 2.0704, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017475150741422528, |
|
"loss": 2.0305, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001742092659627167, |
|
"loss": 2.0256, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001736621241082663, |
|
"loss": 2.0357, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017311011798134263, |
|
"loss": 1.9873, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017255328403362606, |
|
"loss": 2.0248, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017199165903560192, |
|
"loss": 1.9927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.7968560457229614, |
|
"eval_runtime": 2.032, |
|
"eval_samples_per_second": 49.213, |
|
"eval_steps_per_second": 1.969, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017142528007413192, |
|
"loss": 1.9916, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017085418455000553, |
|
"loss": 2.0123, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017027841017546998, |
|
"loss": 2.0141, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00016969799497174005, |
|
"loss": 1.976, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001691129772664873, |
|
"loss": 1.9943, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00016852339569130905, |
|
"loss": 1.9607, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00016792928917917755, |
|
"loss": 1.9793, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00016733069696186868, |
|
"loss": 1.988, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00016672765856737178, |
|
"loss": 1.9143, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00016612021381727887, |
|
"loss": 1.9971, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001655084028241555, |
|
"loss": 1.9675, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001648922659888916, |
|
"loss": 2.0046, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00016427184399803383, |
|
"loss": 2.018, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001636471778210988, |
|
"loss": 1.999, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00016301830870786742, |
|
"loss": 1.9143, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00016238527818566138, |
|
"loss": 1.9324, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001617481280566005, |
|
"loss": 1.9493, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00016110690039484267, |
|
"loss": 1.9507, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00016046163754380514, |
|
"loss": 1.9408, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00015981238211336873, |
|
"loss": 2.0009, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00015915917697706386, |
|
"loss": 1.9684, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001585020652692394, |
|
"loss": 1.9373, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001578410903822145, |
|
"loss": 1.9038, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00015717629596341288, |
|
"loss": 1.9065, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00015650772591248085, |
|
"loss": 1.9327, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001558354243783882, |
|
"loss": 1.935, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000155159435756513, |
|
"loss": 1.9339, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00015447980468570979, |
|
"loss": 1.929, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00015379657604536203, |
|
"loss": 1.9184, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00015310979495241825, |
|
"loss": 1.9242, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00015241950675841306, |
|
"loss": 1.9133, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001517257570464721, |
|
"loss": 1.9014, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00015102859162830209, |
|
"loss": 1.9283, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00015032805654116566, |
|
"loss": 1.8821, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014962419804484127, |
|
"loss": 1.8956, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014891706261856844, |
|
"loss": 1.9166, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014820669695797843, |
|
"loss": 1.9385, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014749314797201084, |
|
"loss": 1.9325, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014677646277981593, |
|
"loss": 1.8642, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014605668870764293, |
|
"loss": 1.8964, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001453338732857152, |
|
"loss": 1.8727, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014460806424509132, |
|
"loss": 1.8644, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001438793095145132, |
|
"loss": 1.8591, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00014314765721724118, |
|
"loss": 1.8931, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00014241315566787617, |
|
"loss": 1.8953, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00014167585336916926, |
|
"loss": 1.8672, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001409357990088188, |
|
"loss": 1.8414, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00014019304145625517, |
|
"loss": 1.8838, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00013944762975941403, |
|
"loss": 1.856, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00013877453061830693, |
|
"loss": 1.8715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00013802421179949775, |
|
"loss": 1.8323, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00013727138205490392, |
|
"loss": 1.898, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00013651609109757744, |
|
"loss": 1.8455, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00013575838880309623, |
|
"loss": 1.8788, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00013499832520627076, |
|
"loss": 1.8881, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00013423595049783974, |
|
"loss": 1.8326, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00013347131502115616, |
|
"loss": 1.845, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00013270446926886252, |
|
"loss": 1.8768, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00013193546387955672, |
|
"loss": 1.8571, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00013116434963444815, |
|
"loss": 1.8596, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00013039117745400426, |
|
"loss": 1.8515, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00012961599839458825, |
|
"loss": 1.8281, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00012883886364508718, |
|
"loss": 1.7872, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00012805982452353213, |
|
"loss": 1.8333, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00012727893247370918, |
|
"loss": 1.7989, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00012657458799214414, |
|
"loss": 1.8662, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00012579031754172398, |
|
"loss": 1.8667, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001250043440297479, |
|
"loss": 1.821, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001242167193579139, |
|
"loss": 1.8333, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00012342749553695423, |
|
"loss": 1.8554, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001226367246832007, |
|
"loss": 1.8308, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00012184445901514343, |
|
"loss": 1.8215, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00012105075084998242, |
|
"loss": 1.8347, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00012025565260017291, |
|
"loss": 1.7671, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011945921676996417, |
|
"loss": 1.8035, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011866149595193254, |
|
"loss": 1.8008, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001178625428235085, |
|
"loss": 1.8057, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011706241014349788, |
|
"loss": 1.8286, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011626115074859829, |
|
"loss": 1.7838, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011545881754990972, |
|
"loss": 1.7678, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011465546352944083, |
|
"loss": 1.8015, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00011385114173661003, |
|
"loss": 1.773, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00011304590528474257, |
|
"loss": 1.7528, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00011223980734756319, |
|
"loss": 1.7651, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00011143290115568473, |
|
"loss": 1.7817, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00011062523999309291, |
|
"loss": 1.7699, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010981687719362807, |
|
"loss": 1.7672, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010900786613746299, |
|
"loss": 1.789, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010819826024757807, |
|
"loss": 1.7622, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010738811298623348, |
|
"loss": 1.7543, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010657747785143882, |
|
"loss": 1.7432, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010576640837342036, |
|
"loss": 1.7765, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010495495811108622, |
|
"loss": 1.768, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010414318064848956, |
|
"loss": 1.7852, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001033311295912902, |
|
"loss": 1.7551, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001025188585632147, |
|
"loss": 1.7474, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010186891940623151, |
|
"loss": 1.7737, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00010105638760647513, |
|
"loss": 1.7802, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00010024378604824765, |
|
"loss": 1.7723, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.943116839162797e-05, |
|
"loss": 1.7412, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.5315768718719482, |
|
"eval_runtime": 2.0299, |
|
"eval_samples_per_second": 49.264, |
|
"eval_steps_per_second": 1.971, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.869984308751394e-05, |
|
"loss": 1.7468, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.788734267841828e-05, |
|
"loss": 1.7681, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.707498177847988e-05, |
|
"loss": 1.8109, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.626281403188578e-05, |
|
"loss": 1.72, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.545089307006811e-05, |
|
"loss": 1.703, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.463927250816272e-05, |
|
"loss": 1.7624, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.382800594146841e-05, |
|
"loss": 1.7587, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.301714694190808e-05, |
|
"loss": 1.7375, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.220674905449091e-05, |
|
"loss": 1.7579, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.139686579377649e-05, |
|
"loss": 1.7396, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.058755064034127e-05, |
|
"loss": 1.6666, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.977885703724658e-05, |
|
"loss": 1.7319, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.897083838650984e-05, |
|
"loss": 1.7387, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.816354804557807e-05, |
|
"loss": 1.7204, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.743765350485347e-05, |
|
"loss": 1.7183, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.671243090320367e-05, |
|
"loss": 1.7173, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.590746326848647e-05, |
|
"loss": 1.7185, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.510342623330503e-05, |
|
"loss": 1.7228, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.430037289218072e-05, |
|
"loss": 1.7542, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.349835627467664e-05, |
|
"loss": 1.7005, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.269742934189604e-05, |
|
"loss": 1.7211, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.189764498298483e-05, |
|
"loss": 1.7455, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.109905601163912e-05, |
|
"loss": 1.6729, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.030171516261782e-05, |
|
"loss": 1.7341, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.950567508826012e-05, |
|
"loss": 1.7286, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.871098835500859e-05, |
|
"loss": 1.7079, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.791770743993817e-05, |
|
"loss": 1.7001, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.712588472729058e-05, |
|
"loss": 1.7239, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.633557250501531e-05, |
|
"loss": 1.7032, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.55468229613168e-05, |
|
"loss": 1.6624, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.475968818120798e-05, |
|
"loss": 1.7258, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.405269046437083e-05, |
|
"loss": 1.6995, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.342541200785587e-05, |
|
"loss": 1.6715, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.26428964082281e-05, |
|
"loss": 1.7005, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.186218733274769e-05, |
|
"loss": 1.6575, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.1083336335476e-05, |
|
"loss": 1.7001, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.030639484777641e-05, |
|
"loss": 1.6679, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.953141417491781e-05, |
|
"loss": 1.7034, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.875844549268706e-05, |
|
"loss": 1.6804, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.798753984400916e-05, |
|
"loss": 1.6844, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.721874813557699e-05, |
|
"loss": 1.7038, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.645212113448953e-05, |
|
"loss": 1.6728, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.568770946489948e-05, |
|
"loss": 1.682, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.492556360467025e-05, |
|
"loss": 1.6799, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.416573388204282e-05, |
|
"loss": 1.66, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.340827047231211e-05, |
|
"loss": 1.6806, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.265322339451376e-05, |
|
"loss": 1.6661, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.190064250812124e-05, |
|
"loss": 1.6696, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.115057750975312e-05, |
|
"loss": 1.6153, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.040307792989157e-05, |
|
"loss": 1.6824, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.9658193129611604e-05, |
|
"loss": 1.6886, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.891597229732135e-05, |
|
"loss": 1.6358, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.8176464445514166e-05, |
|
"loss": 1.6462, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.7439718407531906e-05, |
|
"loss": 1.6434, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.670578283434016e-05, |
|
"loss": 1.6459, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.5974706191315884e-05, |
|
"loss": 1.6705, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.5246536755046706e-05, |
|
"loss": 1.6638, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.452132261014304e-05, |
|
"loss": 1.6656, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.379911164606304e-05, |
|
"loss": 1.6572, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.315172891887351e-05, |
|
"loss": 1.643, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.2435355221012797e-05, |
|
"loss": 1.6544, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.172212245066537e-05, |
|
"loss": 1.628, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.1012077706100125e-05, |
|
"loss": 1.6378, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.0305267875065087e-05, |
|
"loss": 1.6475, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.9601739631690836e-05, |
|
"loss": 1.5959, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.897140837169796e-05, |
|
"loss": 1.657, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.827424295352793e-05, |
|
"loss": 1.6716, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.758049324158693e-05, |
|
"loss": 1.59, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.6890205047581745e-05, |
|
"loss": 1.6442, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.6203423954637995e-05, |
|
"loss": 1.6152, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.552019531429019e-05, |
|
"loss": 1.6446, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.484056424348703e-05, |
|
"loss": 1.6216, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.416457562161184e-05, |
|
"loss": 1.6534, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.349227408751919e-05, |
|
"loss": 1.6474, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.282370403658717e-05, |
|
"loss": 1.6338, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2158909617785525e-05, |
|
"loss": 1.6473, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.149793473076058e-05, |
|
"loss": 1.6315, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.084082302293617e-05, |
|
"loss": 1.6516, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.018761788663127e-05, |
|
"loss": 1.6112, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.953836245619488e-05, |
|
"loss": 1.6077, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.889309960515738e-05, |
|
"loss": 1.6182, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.82518719433995e-05, |
|
"loss": 1.6072, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.761472181433865e-05, |
|
"loss": 1.6062, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6981691292132604e-05, |
|
"loss": 1.6332, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6352822178901235e-05, |
|
"loss": 1.6393, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5728156001966154e-05, |
|
"loss": 1.6401, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5169584051980575e-05, |
|
"loss": 1.6234, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.461447977339909e-05, |
|
"loss": 1.5814, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.4001800370596834e-05, |
|
"loss": 1.6018, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.339347915362796e-05, |
|
"loss": 1.6172, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.278955629293534e-05, |
|
"loss": 1.6042, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.219007166851673e-05, |
|
"loss": 1.6119, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1595064867291394e-05, |
|
"loss": 1.621, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1004575180485885e-05, |
|
"loss": 1.6046, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.0418641601039366e-05, |
|
"loss": 1.5811, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.9837302821028956e-05, |
|
"loss": 1.5635, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.926059722911447e-05, |
|
"loss": 1.6193, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.86885629080035e-05, |
|
"loss": 1.6067, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.823432416081132e-05, |
|
"loss": 1.5795, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7670793109350358e-05, |
|
"loss": 1.5891, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.3908636569976807, |
|
"eval_runtime": 2.0291, |
|
"eval_samples_per_second": 49.282, |
|
"eval_steps_per_second": 1.971, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7112038311280828e-05, |
|
"loss": 1.599, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.655809666393112e-05, |
|
"loss": 1.5877, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.600900474679364e-05, |
|
"loss": 1.6096, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.546479881910918e-05, |
|
"loss": 1.6317, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4925514817472618e-05, |
|
"loss": 1.5218, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4391188353459925e-05, |
|
"loss": 1.5526, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.3861854711276378e-05, |
|
"loss": 1.5753, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.333754884542667e-05, |
|
"loss": 1.6214, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.281830537840678e-05, |
|
"loss": 1.591, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2355342955230186e-05, |
|
"loss": 1.5578, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.189653361595686e-05, |
|
"loss": 1.5684, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1391646203159456e-05, |
|
"loss": 1.5654, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.089194968671713e-05, |
|
"loss": 1.5803, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.039747706404943e-05, |
|
"loss": 1.5737, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.99082609876164e-05, |
|
"loss": 1.5444, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.9472487573431274e-05, |
|
"loss": 1.5995, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8993347647457706e-05, |
|
"loss": 1.5803, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8519556989292508e-05, |
|
"loss": 1.5892, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8051146885663938e-05, |
|
"loss": 1.6006, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7588148267995695e-05, |
|
"loss": 1.567, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7130591710364486e-05, |
|
"loss": 1.5557, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6678507427480983e-05, |
|
"loss": 1.5794, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6231925272694615e-05, |
|
"loss": 1.5858, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5790874736022287e-05, |
|
"loss": 1.5791, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.535538494220089e-05, |
|
"loss": 1.5721, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4925484648764131e-05, |
|
"loss": 1.5537, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.450120224414352e-05, |
|
"loss": 1.5698, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4082565745793686e-05, |
|
"loss": 1.5529, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3669602798342296e-05, |
|
"loss": 1.5702, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3262340671764584e-05, |
|
"loss": 1.5273, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2860806259582492e-05, |
|
"loss": 1.5401, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2504344407159785e-05, |
|
"loss": 1.5753, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2113765387943211e-05, |
|
"loss": 1.5564, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.172898992919923e-05, |
|
"loss": 1.5189, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1350043439544521e-05, |
|
"loss": 1.5607, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0976950942680197e-05, |
|
"loss": 1.539, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0609737075739412e-05, |
|
"loss": 1.5593, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.0248426087660557e-05, |
|
"loss": 1.5345, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.89304183758577e-06, |
|
"loss": 1.5988, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.543607793285626e-06, |
|
"loss": 1.5306, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.200147029609264e-06, |
|
"loss": 1.5702, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.896158250762244e-06, |
|
"loss": 1.5378, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.59704246528129e-06, |
|
"loss": 1.5693, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.270426282311539e-06, |
|
"loss": 1.5517, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.949867454404824e-06, |
|
"loss": 1.5576, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.635387149637685e-06, |
|
"loss": 1.5763, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.327006134691883e-06, |
|
"loss": 1.5768, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.024744773483105e-06, |
|
"loss": 1.5393, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.7286230258161385e-06, |
|
"loss": 1.5617, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.438660446066891e-06, |
|
"loss": 1.5404, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.154876181891145e-06, |
|
"loss": 1.5765, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.877288972960071e-06, |
|
"loss": 1.5942, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.632774125747675e-06, |
|
"loss": 1.5557, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.367011482971008e-06, |
|
"loss": 1.5438, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.107497922021364e-06, |
|
"loss": 1.5351, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.854250579856034e-06, |
|
"loss": 1.5304, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.6072861796429665e-06, |
|
"loss": 1.554, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.366621029656582e-06, |
|
"loss": 1.5185, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.1322710222008065e-06, |
|
"loss": 1.5746, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.904251632559652e-06, |
|
"loss": 1.5413, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.6825779179752716e-06, |
|
"loss": 1.5533, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.467264516653668e-06, |
|
"loss": 1.5432, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2583256467980773e-06, |
|
"loss": 1.5869, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.055775105670056e-06, |
|
"loss": 1.5374, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.8596262686783837e-06, |
|
"loss": 1.5425, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.6698920884958177e-06, |
|
"loss": 1.5906, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.486585094203786e-06, |
|
"loss": 1.5787, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.309717390464983e-06, |
|
"loss": 1.5579, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.1393006567240635e-06, |
|
"loss": 1.5391, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.9753461464364408e-06, |
|
"loss": 1.5478, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.8178646863250548e-06, |
|
"loss": 1.5777, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6668666756655572e-06, |
|
"loss": 1.5239, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5365200653588708e-06, |
|
"loss": 1.4992, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.397867724769042e-06, |
|
"loss": 1.5272, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2657265680968589e-06, |
|
"loss": 1.541, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1523738102167225e-06, |
|
"loss": 1.5219, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.044308247886483e-06, |
|
"loss": 1.5524, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.30443453495422e-07, |
|
"loss": 1.5508, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.231207093463699e-07, |
|
"loss": 1.5758, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.223471024881412e-07, |
|
"loss": 1.5658, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.281292874978029e-07, |
|
"loss": 1.5232, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.404734860412375e-07, |
|
"loss": 1.5646, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.5938548646227154e-07, |
|
"loss": 1.5771, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8487064340047006e-07, |
|
"loss": 1.5611, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.16933877437553e-07, |
|
"loss": 1.6229, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.555796747724104e-07, |
|
"loss": 1.5496, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0081208692490638e-07, |
|
"loss": 1.5312, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5263473046833732e-07, |
|
"loss": 1.5681, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1105078679056747e-07, |
|
"loss": 1.5128, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.606300188400805e-08, |
|
"loss": 1.5764, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3906, |
|
"total_flos": 331952415375360.0, |
|
"train_loss": 1.9416432221974707, |
|
"train_runtime": 74872.2082, |
|
"train_samples_per_second": 6.678, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"max_steps": 3906, |
|
"num_train_epochs": 1, |
|
"total_flos": 331952415375360.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|