|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 36806, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0013584741618214422, |
|
"grad_norm": 0.336629520848516, |
|
"learning_rate": 8.868778280542987e-05, |
|
"loss": 0.9342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0027169483236428845, |
|
"grad_norm": 0.41112101024853426, |
|
"learning_rate": 0.00017918552036199096, |
|
"loss": 0.5198, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004075422485464326, |
|
"grad_norm": 0.38040867145053625, |
|
"learning_rate": 0.0002696832579185521, |
|
"loss": 0.4962, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.005433896647285769, |
|
"grad_norm": 0.5231995312551354, |
|
"learning_rate": 0.00036018099547511313, |
|
"loss": 0.5198, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.006792370809107211, |
|
"grad_norm": 0.37419434381789457, |
|
"learning_rate": 0.00039999942189133334, |
|
"loss": 0.5615, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.008150844970928653, |
|
"grad_norm": 0.5051525763054373, |
|
"learning_rate": 0.00039999551377337605, |
|
"loss": 0.5629, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.009509319132750094, |
|
"grad_norm": 0.37940069661143105, |
|
"learning_rate": 0.0003999879188244911, |
|
"loss": 0.5715, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.010867793294571538, |
|
"grad_norm": 0.4187127030134641, |
|
"learning_rate": 0.0003999766371846881, |
|
"loss": 0.5191, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01222626745639298, |
|
"grad_norm": 0.4287344223835902, |
|
"learning_rate": 0.00039996166906193926, |
|
"loss": 0.5266, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.013584741618214421, |
|
"grad_norm": 0.42083341057731205, |
|
"learning_rate": 0.00039994301473217543, |
|
"loss": 0.4993, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.014943215780035863, |
|
"grad_norm": 0.44004507818862004, |
|
"learning_rate": 0.00039992067453928115, |
|
"loss": 0.5008, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.016301689941857305, |
|
"grad_norm": 0.3947637329018398, |
|
"learning_rate": 0.0003998946488950882, |
|
"loss": 0.5199, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01766016410367875, |
|
"grad_norm": 0.35666971296762606, |
|
"learning_rate": 0.0003998649382793681, |
|
"loss": 0.5024, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.01901863826550019, |
|
"grad_norm": 0.3864531806325715, |
|
"learning_rate": 0.0003998315432398232, |
|
"loss": 0.5058, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.020377112427321632, |
|
"grad_norm": 0.3979644110719079, |
|
"learning_rate": 0.00039979446439207663, |
|
"loss": 0.4833, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.021735586589143076, |
|
"grad_norm": 0.32463557091237133, |
|
"learning_rate": 0.000399753702419661, |
|
"loss": 0.5181, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.023094060750964516, |
|
"grad_norm": 0.36185137659998867, |
|
"learning_rate": 0.0003997092580740055, |
|
"loss": 0.4731, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.02445253491278596, |
|
"grad_norm": 0.36585435226850893, |
|
"learning_rate": 0.00039966113217442266, |
|
"loss": 0.5028, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0258110090746074, |
|
"grad_norm": 0.41125601490924335, |
|
"learning_rate": 0.00039960932560809256, |
|
"loss": 0.4907, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.027169483236428843, |
|
"grad_norm": 0.3848215921898568, |
|
"learning_rate": 0.0003995538393300469, |
|
"loss": 0.483, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.028527957398250287, |
|
"grad_norm": 0.33951726776456204, |
|
"learning_rate": 0.0003994946743631513, |
|
"loss": 0.4567, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.029886431560071727, |
|
"grad_norm": 0.3234265221555652, |
|
"learning_rate": 0.0003994318317980862, |
|
"loss": 0.4663, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03124490572189317, |
|
"grad_norm": 0.3997241882392808, |
|
"learning_rate": 0.0003993666791947118, |
|
"loss": 0.4709, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.03260337988371461, |
|
"grad_norm": 0.45209570419286, |
|
"learning_rate": 0.00039929799689193896, |
|
"loss": 0.4836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.033961854045536054, |
|
"grad_norm": 0.31674750892690945, |
|
"learning_rate": 0.000399224275685374, |
|
"loss": 0.4708, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.0353203282073575, |
|
"grad_norm": 0.3336127152054193, |
|
"learning_rate": 0.0003991468818653226, |
|
"loss": 0.48, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03667880236917894, |
|
"grad_norm": 0.30959090922038635, |
|
"learning_rate": 0.0003990658168585062, |
|
"loss": 0.4568, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.03803727653100038, |
|
"grad_norm": 0.37908103624960793, |
|
"learning_rate": 0.0003989810821593234, |
|
"loss": 0.4867, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.03939575069282182, |
|
"grad_norm": 0.3746655333602711, |
|
"learning_rate": 0.0003988944833236597, |
|
"loss": 0.4685, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.040754224854643264, |
|
"grad_norm": 0.3841226468141105, |
|
"learning_rate": 0.0003988043631486177, |
|
"loss": 0.4788, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04211269901646471, |
|
"grad_norm": 0.4460643403291581, |
|
"learning_rate": 0.0003987087755737732, |
|
"loss": 0.4502, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04347117317828615, |
|
"grad_norm": 0.3671013360773688, |
|
"learning_rate": 0.00039860952488846415, |
|
"loss": 0.4534, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04482964734010759, |
|
"grad_norm": 0.34747090480175297, |
|
"learning_rate": 0.000398506612922334, |
|
"loss": 0.4405, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.04618812150192903, |
|
"grad_norm": 0.36167597296572834, |
|
"learning_rate": 0.00039840004157252047, |
|
"loss": 0.4778, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.047546595663750475, |
|
"grad_norm": 0.4009770951011953, |
|
"learning_rate": 0.0003982898128036203, |
|
"loss": 0.479, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.04890506982557192, |
|
"grad_norm": 0.5968187908901367, |
|
"learning_rate": 0.00039818055417120837, |
|
"loss": 0.5444, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05026354398739336, |
|
"grad_norm": 1.8105646301472897, |
|
"learning_rate": 0.0003980655464341668, |
|
"loss": 0.5653, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.0516220181492148, |
|
"grad_norm": 0.789226312089056, |
|
"learning_rate": 0.0003979470320337101, |
|
"loss": 0.7458, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05298049231103624, |
|
"grad_norm": 0.9263531743492742, |
|
"learning_rate": 0.0003978224864094713, |
|
"loss": 0.5415, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.054338966472857686, |
|
"grad_norm": 0.47720194915599734, |
|
"learning_rate": 0.00039769429401312583, |
|
"loss": 0.5115, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05569744063467913, |
|
"grad_norm": 0.6259624884089505, |
|
"learning_rate": 0.0003975624572078452, |
|
"loss": 0.4968, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.05705591479650057, |
|
"grad_norm": 0.6070556198109234, |
|
"learning_rate": 0.00039742697842398407, |
|
"loss": 0.4721, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.05841438895832201, |
|
"grad_norm": 0.9495402483045952, |
|
"learning_rate": 0.00039728786015903527, |
|
"loss": 0.5384, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.05977286312014345, |
|
"grad_norm": 0.5094390312707457, |
|
"learning_rate": 0.0003971451049775838, |
|
"loss": 0.4768, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0611313372819649, |
|
"grad_norm": 0.4164209148361273, |
|
"learning_rate": 0.00039699871551126, |
|
"loss": 0.4745, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.06248981144378634, |
|
"grad_norm": 0.4096404988646368, |
|
"learning_rate": 0.0003968486944586903, |
|
"loss": 0.4571, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06384828560560778, |
|
"grad_norm": 0.36680803179529065, |
|
"learning_rate": 0.00039669504458544815, |
|
"loss": 0.4542, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.06520675976742922, |
|
"grad_norm": 0.4436494287681541, |
|
"learning_rate": 0.00039653776872400245, |
|
"loss": 0.4526, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06656523392925066, |
|
"grad_norm": 0.632255456628572, |
|
"learning_rate": 0.0003963768697736659, |
|
"loss": 0.4614, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.06792370809107211, |
|
"grad_norm": 0.27294306772696525, |
|
"learning_rate": 0.0003962123507005411, |
|
"loss": 0.4247, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06928218225289355, |
|
"grad_norm": 0.4922506411666624, |
|
"learning_rate": 0.00039604421453746615, |
|
"loss": 0.4336, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.070640656414715, |
|
"grad_norm": 0.3836605187611845, |
|
"learning_rate": 0.00039587246438395866, |
|
"loss": 0.4308, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.07199913057653644, |
|
"grad_norm": 0.30031785058018967, |
|
"learning_rate": 0.0003956971034061584, |
|
"loss": 0.4336, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.07335760473835788, |
|
"grad_norm": 0.30896010198949975, |
|
"learning_rate": 0.00039551813483676944, |
|
"loss": 0.4282, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07471607890017933, |
|
"grad_norm": 0.3264468040624618, |
|
"learning_rate": 0.0003953355619749999, |
|
"loss": 0.4466, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.07607455306200075, |
|
"grad_norm": 0.47417679268203056, |
|
"learning_rate": 0.0003951493881865018, |
|
"loss": 0.455, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.0774330272238222, |
|
"grad_norm": 0.467248267974456, |
|
"learning_rate": 0.0003949596169033084, |
|
"loss": 0.436, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.07879150138564364, |
|
"grad_norm": 0.2574312755334094, |
|
"learning_rate": 0.0003947662516237714, |
|
"loss": 0.4263, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.08014997554746509, |
|
"grad_norm": 0.40465633762609327, |
|
"learning_rate": 0.0003945692959124962, |
|
"loss": 0.4275, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.08150844970928653, |
|
"grad_norm": 0.3105964962014554, |
|
"learning_rate": 0.0003943687534002764, |
|
"loss": 0.4063, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08286692387110797, |
|
"grad_norm": 0.36079130494736616, |
|
"learning_rate": 0.00039416462778402644, |
|
"loss": 0.4291, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.08422539803292942, |
|
"grad_norm": 0.27991323241801247, |
|
"learning_rate": 0.0003939569228267139, |
|
"loss": 0.4294, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.08558387219475086, |
|
"grad_norm": 0.28381836513166353, |
|
"learning_rate": 0.00039374564235729017, |
|
"loss": 0.4198, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.0869423463565723, |
|
"grad_norm": 0.3299204189197327, |
|
"learning_rate": 0.00039353079027061935, |
|
"loss": 0.4103, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08830082051839375, |
|
"grad_norm": 0.326311631143589, |
|
"learning_rate": 0.0003933123705274068, |
|
"loss": 0.4297, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.08965929468021518, |
|
"grad_norm": 0.3302186726703939, |
|
"learning_rate": 0.0003930903871541262, |
|
"loss": 0.4129, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.09101776884203662, |
|
"grad_norm": 0.24354600292418105, |
|
"learning_rate": 0.00039286484424294534, |
|
"loss": 0.4178, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.09237624300385806, |
|
"grad_norm": 0.33427871787687957, |
|
"learning_rate": 0.00039263574595165007, |
|
"loss": 0.4229, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.09373471716567951, |
|
"grad_norm": 0.3922838514063193, |
|
"learning_rate": 0.00039240309650356874, |
|
"loss": 0.416, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.09509319132750095, |
|
"grad_norm": 0.37066104429189983, |
|
"learning_rate": 0.0003921669001874933, |
|
"loss": 0.4359, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0964516654893224, |
|
"grad_norm": 0.29257797493644805, |
|
"learning_rate": 0.0003919271613576008, |
|
"loss": 0.4286, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.09781013965114384, |
|
"grad_norm": 0.45431531124773644, |
|
"learning_rate": 0.0003916838844333732, |
|
"loss": 0.4291, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.09916861381296528, |
|
"grad_norm": 0.31092097963579685, |
|
"learning_rate": 0.0003914370738995154, |
|
"loss": 0.431, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.10052708797478672, |
|
"grad_norm": 0.2895204897106821, |
|
"learning_rate": 0.00039118673430587307, |
|
"loss": 0.4372, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.10188556213660817, |
|
"grad_norm": 0.2576792476027214, |
|
"learning_rate": 0.0003909328702673485, |
|
"loss": 0.4527, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.1032440362984296, |
|
"grad_norm": 0.30963702592618436, |
|
"learning_rate": 0.0003906754864638156, |
|
"loss": 0.4121, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.10460251046025104, |
|
"grad_norm": 0.6475657508703404, |
|
"learning_rate": 0.0003904145876400337, |
|
"loss": 0.4224, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.10596098462207248, |
|
"grad_norm": 0.3256994801441109, |
|
"learning_rate": 0.00039015017860555984, |
|
"loss": 0.4363, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.10731945878389393, |
|
"grad_norm": 0.3471706882794347, |
|
"learning_rate": 0.0003898822642346604, |
|
"loss": 0.4252, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.10867793294571537, |
|
"grad_norm": 0.29742285163537485, |
|
"learning_rate": 0.00038961084946622114, |
|
"loss": 0.41, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11003640710753682, |
|
"grad_norm": 0.42367795916225637, |
|
"learning_rate": 0.0003893359393036561, |
|
"loss": 0.4047, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.11139488126935826, |
|
"grad_norm": 0.3766799857882688, |
|
"learning_rate": 0.0003890575388148154, |
|
"loss": 0.4142, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.1127533554311797, |
|
"grad_norm": 0.4163460710146468, |
|
"learning_rate": 0.00038877565313189184, |
|
"loss": 0.4467, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.11411182959300115, |
|
"grad_norm": 0.2826407429662945, |
|
"learning_rate": 0.00038849028745132627, |
|
"loss": 0.4149, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.11547030375482259, |
|
"grad_norm": 0.3265965959677555, |
|
"learning_rate": 0.0003882014470337117, |
|
"loss": 0.4358, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.11682877791664402, |
|
"grad_norm": 0.24777830179313484, |
|
"learning_rate": 0.00038790913720369657, |
|
"loss": 0.4012, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.11818725207846546, |
|
"grad_norm": 0.2915403708081659, |
|
"learning_rate": 0.00038761336334988634, |
|
"loss": 0.4069, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.1195457262402869, |
|
"grad_norm": 0.3326202807353683, |
|
"learning_rate": 0.00038731413092474423, |
|
"loss": 0.3902, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.12090420040210835, |
|
"grad_norm": 0.3965527555219645, |
|
"learning_rate": 0.00038701144544449085, |
|
"loss": 0.3894, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.1222626745639298, |
|
"grad_norm": 0.36617448279834447, |
|
"learning_rate": 0.0003867053124890022, |
|
"loss": 0.3993, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.12362114872575124, |
|
"grad_norm": 0.2978802526091461, |
|
"learning_rate": 0.0003863957377017073, |
|
"loss": 0.3934, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.12497962288757268, |
|
"grad_norm": 0.3199935306648141, |
|
"learning_rate": 0.0003860827267894834, |
|
"loss": 0.4015, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.1263380970493941, |
|
"grad_norm": 0.28870094415921566, |
|
"learning_rate": 0.00038576628552255173, |
|
"loss": 0.4242, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.12769657121121555, |
|
"grad_norm": 0.3368662682662353, |
|
"learning_rate": 0.00038544641973437026, |
|
"loss": 0.4078, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.129055045373037, |
|
"grad_norm": 0.34171191785593347, |
|
"learning_rate": 0.0003851231353215267, |
|
"loss": 0.4184, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.13041351953485844, |
|
"grad_norm": 0.3456350837751831, |
|
"learning_rate": 0.00038479643824362956, |
|
"loss": 0.4011, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.13177199369667988, |
|
"grad_norm": 0.44062561750629825, |
|
"learning_rate": 0.00038446633452319845, |
|
"loss": 0.4179, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.13313046785850133, |
|
"grad_norm": 0.30494197906676423, |
|
"learning_rate": 0.00038413283024555284, |
|
"loss": 0.3987, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.13448894202032277, |
|
"grad_norm": 0.26281770974778434, |
|
"learning_rate": 0.00038379593155870006, |
|
"loss": 0.3745, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.13584741618214421, |
|
"grad_norm": 0.33576603130586835, |
|
"learning_rate": 0.00038345564467322197, |
|
"loss": 0.3981, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.13720589034396566, |
|
"grad_norm": 0.3395300651756381, |
|
"learning_rate": 0.00038311197586216023, |
|
"loss": 0.3908, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.1385643645057871, |
|
"grad_norm": 0.3641141735123601, |
|
"learning_rate": 0.0003827649314609011, |
|
"loss": 0.4156, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.13992283866760855, |
|
"grad_norm": 0.3880481801495523, |
|
"learning_rate": 0.00038241451786705824, |
|
"loss": 0.4225, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.14128131282943, |
|
"grad_norm": 0.33587356117829287, |
|
"learning_rate": 0.0003820607415403548, |
|
"loss": 0.4322, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.14263978699125143, |
|
"grad_norm": 0.2651951238410833, |
|
"learning_rate": 0.0003817036090025046, |
|
"loss": 0.3882, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.14399826115307288, |
|
"grad_norm": 0.3108835459594419, |
|
"learning_rate": 0.0003813431268370919, |
|
"loss": 0.3962, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.14535673531489432, |
|
"grad_norm": 0.5822321494392535, |
|
"learning_rate": 0.0003809793016894496, |
|
"loss": 0.4092, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.14671520947671576, |
|
"grad_norm": 0.37563297659114, |
|
"learning_rate": 0.0003806121402665372, |
|
"loss": 0.4168, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.1480736836385372, |
|
"grad_norm": 0.3315292653141971, |
|
"learning_rate": 0.00038024164933681703, |
|
"loss": 0.4094, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.14943215780035865, |
|
"grad_norm": 0.3989400802203129, |
|
"learning_rate": 0.00037986783573012935, |
|
"loss": 0.4068, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.15079063196218007, |
|
"grad_norm": 0.3194388411256492, |
|
"learning_rate": 0.0003794907063375666, |
|
"loss": 0.4003, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.1521491061240015, |
|
"grad_norm": 0.30240424166641394, |
|
"learning_rate": 0.00037911026811134616, |
|
"loss": 0.407, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.15350758028582295, |
|
"grad_norm": 0.351737936530188, |
|
"learning_rate": 0.0003787265280646825, |
|
"loss": 0.4107, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.1548660544476444, |
|
"grad_norm": 0.3836451635236239, |
|
"learning_rate": 0.0003783394932716577, |
|
"loss": 0.3999, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.15622452860946584, |
|
"grad_norm": 0.25767700494067813, |
|
"learning_rate": 0.0003779491708670909, |
|
"loss": 0.388, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.15758300277128728, |
|
"grad_norm": 0.35195866802663683, |
|
"learning_rate": 0.00037755556804640723, |
|
"loss": 0.3986, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.15894147693310873, |
|
"grad_norm": 0.37789059875509895, |
|
"learning_rate": 0.00037715869206550467, |
|
"loss": 0.4124, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.16029995109493017, |
|
"grad_norm": 0.29714505650099465, |
|
"learning_rate": 0.0003767585502406204, |
|
"loss": 0.382, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.16165842525675161, |
|
"grad_norm": 0.36569606019843054, |
|
"learning_rate": 0.0003763551499481964, |
|
"loss": 0.4091, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.16301689941857306, |
|
"grad_norm": 0.5124251442347727, |
|
"learning_rate": 0.0003759484986247426, |
|
"loss": 0.3957, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1643753735803945, |
|
"grad_norm": 0.42061839044447147, |
|
"learning_rate": 0.0003755386037667007, |
|
"loss": 0.3939, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.16573384774221595, |
|
"grad_norm": 0.278996914346875, |
|
"learning_rate": 0.0003751254729303053, |
|
"loss": 0.4171, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.1670923219040374, |
|
"grad_norm": 0.22931110142699168, |
|
"learning_rate": 0.0003747091137314451, |
|
"loss": 0.4037, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.16845079606585883, |
|
"grad_norm": 0.4632003674215028, |
|
"learning_rate": 0.00037428953384552197, |
|
"loss": 0.3856, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.16980927022768028, |
|
"grad_norm": 0.3456285538182738, |
|
"learning_rate": 0.00037386674100730986, |
|
"loss": 0.3887, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.17116774438950172, |
|
"grad_norm": 0.4299448792360789, |
|
"learning_rate": 0.0003734407430108124, |
|
"loss": 0.3802, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.17252621855132316, |
|
"grad_norm": 0.26911515847901823, |
|
"learning_rate": 0.0003730115477091185, |
|
"loss": 0.3906, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.1738846927131446, |
|
"grad_norm": 0.24894262795042146, |
|
"learning_rate": 0.00037257916301425823, |
|
"loss": 0.3743, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.17524316687496605, |
|
"grad_norm": 0.36696930956387547, |
|
"learning_rate": 0.00037214359689705676, |
|
"loss": 0.3977, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.1766016410367875, |
|
"grad_norm": 0.4978113756154764, |
|
"learning_rate": 0.0003717048573869873, |
|
"loss": 0.3782, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1779601151986089, |
|
"grad_norm": 0.3180001579838143, |
|
"learning_rate": 0.00037126295257202324, |
|
"loss": 0.3975, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.17931858936043035, |
|
"grad_norm": 0.408582540741261, |
|
"learning_rate": 0.0003708178905984891, |
|
"loss": 0.3763, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.1806770635222518, |
|
"grad_norm": 0.27659616140789006, |
|
"learning_rate": 0.00037036967967091005, |
|
"loss": 0.4013, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.18203553768407324, |
|
"grad_norm": 0.16425635963494883, |
|
"learning_rate": 0.00036991832805186107, |
|
"loss": 0.3865, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.18339401184589468, |
|
"grad_norm": 0.37646449133193777, |
|
"learning_rate": 0.00036946384406181425, |
|
"loss": 0.3892, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.18475248600771613, |
|
"grad_norm": 0.3427684491705063, |
|
"learning_rate": 0.0003690062360789858, |
|
"loss": 0.3969, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.18611096016953757, |
|
"grad_norm": 0.392156716422773, |
|
"learning_rate": 0.0003685455125391811, |
|
"loss": 0.3709, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.18746943433135901, |
|
"grad_norm": 0.3626113579097081, |
|
"learning_rate": 0.0003680816819356398, |
|
"loss": 0.3929, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.18882790849318046, |
|
"grad_norm": 0.3652111299862884, |
|
"learning_rate": 0.00036761475281887863, |
|
"loss": 0.3941, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.1901863826550019, |
|
"grad_norm": 0.3038051956143636, |
|
"learning_rate": 0.0003671541644021072, |
|
"loss": 0.4019, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.19154485681682334, |
|
"grad_norm": 0.34844140332875567, |
|
"learning_rate": 0.00036668112567831633, |
|
"loss": 0.3666, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.1929033309786448, |
|
"grad_norm": 0.31453187134210386, |
|
"learning_rate": 0.0003662050142599555, |
|
"loss": 0.4062, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.19426180514046623, |
|
"grad_norm": 0.29627297234275446, |
|
"learning_rate": 0.00036572583892393305, |
|
"loss": 0.3807, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.19562027930228768, |
|
"grad_norm": 0.37160716135610367, |
|
"learning_rate": 0.0003652436085036393, |
|
"loss": 0.3936, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.19697875346410912, |
|
"grad_norm": 0.3178371792735983, |
|
"learning_rate": 0.0003647583318887839, |
|
"loss": 0.3942, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.19833722762593056, |
|
"grad_norm": 0.36580162938243826, |
|
"learning_rate": 0.0003642700180252315, |
|
"loss": 0.3932, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.199695701787752, |
|
"grad_norm": 0.2503536243665017, |
|
"learning_rate": 0.0003637786759148375, |
|
"loss": 0.3835, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.20105417594957345, |
|
"grad_norm": 0.3422541205714727, |
|
"learning_rate": 0.0003632942313704729, |
|
"loss": 0.3869, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.2024126501113949, |
|
"grad_norm": 0.2635673669764689, |
|
"learning_rate": 0.00036279692010693837, |
|
"loss": 0.374, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.20377112427321634, |
|
"grad_norm": 0.32133691105349677, |
|
"learning_rate": 0.0003622966077524861, |
|
"loss": 0.3829, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.20512959843503775, |
|
"grad_norm": 0.2963373700211773, |
|
"learning_rate": 0.0003617933035301583, |
|
"loss": 0.3784, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.2064880725968592, |
|
"grad_norm": 0.34175259284676457, |
|
"learning_rate": 0.000361287016718151, |
|
"loss": 0.3634, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.20784654675868064, |
|
"grad_norm": 0.21695916175140184, |
|
"learning_rate": 0.0003607777566496428, |
|
"loss": 0.3913, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.20920502092050208, |
|
"grad_norm": 0.38835064397473545, |
|
"learning_rate": 0.00036027580617629013, |
|
"loss": 0.3937, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.21056349508232353, |
|
"grad_norm": 0.33577145967062894, |
|
"learning_rate": 0.00035976068680901367, |
|
"loss": 0.4041, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.21192196924414497, |
|
"grad_norm": 0.2829507495162838, |
|
"learning_rate": 0.0003592426223224691, |
|
"loss": 0.3885, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.2132804434059664, |
|
"grad_norm": 0.30282305495180506, |
|
"learning_rate": 0.00035872162226695156, |
|
"loss": 0.425, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.21463891756778786, |
|
"grad_norm": 0.26572704020691806, |
|
"learning_rate": 0.000358197696246872, |
|
"loss": 0.3719, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.2159973917296093, |
|
"grad_norm": 0.4362525108778928, |
|
"learning_rate": 0.0003576708539205804, |
|
"loss": 0.3751, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.21735586589143074, |
|
"grad_norm": 0.25828364696271605, |
|
"learning_rate": 0.0003571411050001875, |
|
"loss": 0.3863, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.2187143400532522, |
|
"grad_norm": 0.3678501770422065, |
|
"learning_rate": 0.00035660845925138585, |
|
"loss": 0.3931, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.22007281421507363, |
|
"grad_norm": 0.27819454488467, |
|
"learning_rate": 0.00035607292649326983, |
|
"loss": 0.3633, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.22143128837689507, |
|
"grad_norm": 0.32104809246529153, |
|
"learning_rate": 0.00035553451659815457, |
|
"loss": 0.3914, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.22278976253871652, |
|
"grad_norm": 0.32593288782577173, |
|
"learning_rate": 0.000354993239491394, |
|
"loss": 0.3721, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.22414823670053796, |
|
"grad_norm": 0.2740573766851601, |
|
"learning_rate": 0.00035444910515119776, |
|
"loss": 0.3725, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.2255067108623594, |
|
"grad_norm": 0.6372146103791719, |
|
"learning_rate": 0.00035390212360844744, |
|
"loss": 0.3786, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.22686518502418085, |
|
"grad_norm": 0.3847972604563355, |
|
"learning_rate": 0.00035335230494651165, |
|
"loss": 0.3807, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.2282236591860023, |
|
"grad_norm": 0.3297830679777594, |
|
"learning_rate": 0.00035279965930105987, |
|
"loss": 0.3757, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.22958213334782374, |
|
"grad_norm": 0.27775568777577214, |
|
"learning_rate": 0.00035224419685987593, |
|
"loss": 0.3796, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.23094060750964518, |
|
"grad_norm": 0.2985445199508582, |
|
"learning_rate": 0.0003516859278626702, |
|
"loss": 0.385, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.2322990816714666, |
|
"grad_norm": 0.2822498403343061, |
|
"learning_rate": 0.00035112486260089026, |
|
"loss": 0.3654, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.23365755583328804, |
|
"grad_norm": 0.29173659614651143, |
|
"learning_rate": 0.0003505610114175323, |
|
"loss": 0.3693, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.23501602999510948, |
|
"grad_norm": 0.37316571053101977, |
|
"learning_rate": 0.00034999438470694903, |
|
"loss": 0.3624, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.23637450415693093, |
|
"grad_norm": 0.2991870149215513, |
|
"learning_rate": 0.0003494249929146593, |
|
"loss": 0.3944, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.23773297831875237, |
|
"grad_norm": 0.2626766731044403, |
|
"learning_rate": 0.000348852846537155, |
|
"loss": 0.3562, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.2390914524805738, |
|
"grad_norm": 0.3742270372557048, |
|
"learning_rate": 0.0003482779561217074, |
|
"loss": 0.3737, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.24044992664239526, |
|
"grad_norm": 0.33975911074890713, |
|
"learning_rate": 0.000347700332266173, |
|
"loss": 0.3673, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.2418084008042167, |
|
"grad_norm": 0.24493733544746576, |
|
"learning_rate": 0.00034711998561879823, |
|
"loss": 0.3863, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.24316687496603814, |
|
"grad_norm": 0.24367293507823867, |
|
"learning_rate": 0.00034653692687802295, |
|
"loss": 0.3597, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.2445253491278596, |
|
"grad_norm": 0.4992858322232737, |
|
"learning_rate": 0.0003459511667922831, |
|
"loss": 0.3759, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.24588382328968103, |
|
"grad_norm": 0.34074243370950325, |
|
"learning_rate": 0.000345362716159813, |
|
"loss": 0.3704, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.24724229745150247, |
|
"grad_norm": 0.37589710424283923, |
|
"learning_rate": 0.0003447715858284458, |
|
"loss": 0.3605, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.24860077161332392, |
|
"grad_norm": 0.2954917370820438, |
|
"learning_rate": 0.00034417778669541414, |
|
"loss": 0.3619, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.24995924577514536, |
|
"grad_norm": 0.2682683326827451, |
|
"learning_rate": 0.00034358132970714833, |
|
"loss": 0.3548, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.2513177199369668, |
|
"grad_norm": 0.3334186903450503, |
|
"learning_rate": 0.00034298222585907556, |
|
"loss": 0.3582, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.2526761940987882, |
|
"grad_norm": 0.3037548456565183, |
|
"learning_rate": 0.0003423804861954165, |
|
"loss": 0.3598, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.25403466826060966, |
|
"grad_norm": 0.3415172861536472, |
|
"learning_rate": 0.00034177612180898186, |
|
"loss": 0.3596, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.2553931424224311, |
|
"grad_norm": 0.2778208976878839, |
|
"learning_rate": 0.0003411691438409683, |
|
"loss": 0.3557, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.25675161658425255, |
|
"grad_norm": 0.2722662895108223, |
|
"learning_rate": 0.0003405595634807524, |
|
"loss": 0.3568, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.258110090746074, |
|
"grad_norm": 0.2470305006609605, |
|
"learning_rate": 0.00033994739196568485, |
|
"loss": 0.3693, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.25946856490789544, |
|
"grad_norm": 0.3204976945527186, |
|
"learning_rate": 0.00033933264058088323, |
|
"loss": 0.3744, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.2608270390697169, |
|
"grad_norm": 0.29419054157347835, |
|
"learning_rate": 0.0003387153206590238, |
|
"loss": 0.3578, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.2621855132315383, |
|
"grad_norm": 0.285418283786098, |
|
"learning_rate": 0.0003380954435801327, |
|
"loss": 0.3666, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.26354398739335977, |
|
"grad_norm": 0.40228260917678826, |
|
"learning_rate": 0.0003374730207713763, |
|
"loss": 0.3642, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.2649024615551812, |
|
"grad_norm": 0.30226961110995426, |
|
"learning_rate": 0.0003368480637068501, |
|
"loss": 0.3955, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.26626093571700266, |
|
"grad_norm": 0.445566495529195, |
|
"learning_rate": 0.00033622058390736785, |
|
"loss": 0.3756, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.2676194098788241, |
|
"grad_norm": 0.387905313007887, |
|
"learning_rate": 0.00033559059294024864, |
|
"loss": 0.3657, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.26897788404064554, |
|
"grad_norm": 0.22352584167074716, |
|
"learning_rate": 0.00033495810241910385, |
|
"loss": 0.3452, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.270336358202467, |
|
"grad_norm": 0.2556859512831143, |
|
"learning_rate": 0.00033432312400362305, |
|
"loss": 0.3463, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.27169483236428843, |
|
"grad_norm": 0.5717326212718582, |
|
"learning_rate": 0.00033368566939935925, |
|
"loss": 0.3731, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2730533065261099, |
|
"grad_norm": 0.32255486027652513, |
|
"learning_rate": 0.0003330457503575127, |
|
"loss": 0.3698, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.2744117806879313, |
|
"grad_norm": 0.25946616692420554, |
|
"learning_rate": 0.0003324033786747145, |
|
"loss": 0.3637, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.27577025484975276, |
|
"grad_norm": 0.3530893683862247, |
|
"learning_rate": 0.0003317585661928094, |
|
"loss": 0.3646, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.2771287290115742, |
|
"grad_norm": 0.2853492397892913, |
|
"learning_rate": 0.000331111324798637, |
|
"loss": 0.3295, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.27848720317339565, |
|
"grad_norm": 0.19156693197511587, |
|
"learning_rate": 0.0003304616664238127, |
|
"loss": 0.359, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.2798456773352171, |
|
"grad_norm": 0.26150890027393986, |
|
"learning_rate": 0.00032980960304450834, |
|
"loss": 0.3665, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.28120415149703853, |
|
"grad_norm": 0.3656668619278649, |
|
"learning_rate": 0.00032915514668123056, |
|
"loss": 0.3498, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.28256262565886, |
|
"grad_norm": 0.36465329584026973, |
|
"learning_rate": 0.00032849830939859977, |
|
"loss": 0.3722, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.2839210998206814, |
|
"grad_norm": 0.28300896811439313, |
|
"learning_rate": 0.00032783910330512776, |
|
"loss": 0.3583, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.28527957398250287, |
|
"grad_norm": 0.2470361242697161, |
|
"learning_rate": 0.000327177540552994, |
|
"loss": 0.3462, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.2866380481443243, |
|
"grad_norm": 0.36918740507242426, |
|
"learning_rate": 0.0003265136333378223, |
|
"loss": 0.3699, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.28799652230614575, |
|
"grad_norm": 0.32934977838919777, |
|
"learning_rate": 0.0003258473938984554, |
|
"loss": 0.3625, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.2893549964679672, |
|
"grad_norm": 0.2087576531023101, |
|
"learning_rate": 0.0003251788345167296, |
|
"loss": 0.3568, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.29071347062978864, |
|
"grad_norm": 0.39857663501798557, |
|
"learning_rate": 0.00032450796751724837, |
|
"loss": 0.3591, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.2920719447916101, |
|
"grad_norm": 0.32871619749282505, |
|
"learning_rate": 0.00032383480526715526, |
|
"loss": 0.3603, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.2934304189534315, |
|
"grad_norm": 0.3062057976969262, |
|
"learning_rate": 0.00032315936017590554, |
|
"loss": 0.3575, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.29478889311525297, |
|
"grad_norm": 0.35676299616191043, |
|
"learning_rate": 0.0003224816446950378, |
|
"loss": 0.3406, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.2961473672770744, |
|
"grad_norm": 0.268494161462533, |
|
"learning_rate": 0.00032180167131794425, |
|
"loss": 0.3356, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.29750584143889586, |
|
"grad_norm": 0.39304660551244835, |
|
"learning_rate": 0.0003211194525796404, |
|
"loss": 0.3681, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.2988643156007173, |
|
"grad_norm": 0.36242243481768954, |
|
"learning_rate": 0.00032043500105653414, |
|
"loss": 0.3624, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.3002227897625387, |
|
"grad_norm": 0.32191025299969356, |
|
"learning_rate": 0.0003197483293661937, |
|
"loss": 0.3639, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.30158126392436013, |
|
"grad_norm": 0.3819533916481645, |
|
"learning_rate": 0.0003190594501671151, |
|
"loss": 0.3639, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.3029397380861816, |
|
"grad_norm": 0.3051700734949664, |
|
"learning_rate": 0.000318368376158489, |
|
"loss": 0.3495, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.304298212248003, |
|
"grad_norm": 0.25353257677490404, |
|
"learning_rate": 0.00031768900650322744, |
|
"loss": 0.3424, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.30565668640982446, |
|
"grad_norm": 0.2971513332547502, |
|
"learning_rate": 0.000316993624394983, |
|
"loss": 0.3465, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.3070151605716459, |
|
"grad_norm": 0.3393454172568527, |
|
"learning_rate": 0.00031629608555979686, |
|
"loss": 0.357, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.30837363473346735, |
|
"grad_norm": 0.3079000714041467, |
|
"learning_rate": 0.0003155964028564964, |
|
"loss": 0.3315, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.3097321088952888, |
|
"grad_norm": 0.236457076827118, |
|
"learning_rate": 0.00031489458918342993, |
|
"loss": 0.3586, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.31109058305711024, |
|
"grad_norm": 0.37187670656153765, |
|
"learning_rate": 0.0003141906574782295, |
|
"loss": 0.3479, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.3124490572189317, |
|
"grad_norm": 0.2858744824646288, |
|
"learning_rate": 0.0003134846207175722, |
|
"loss": 0.359, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.3138075313807531, |
|
"grad_norm": 0.29954433740207526, |
|
"learning_rate": 0.00031277649191694063, |
|
"loss": 0.3466, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.31516600554257457, |
|
"grad_norm": 0.25530263584194796, |
|
"learning_rate": 0.0003120662841303836, |
|
"loss": 0.3488, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.316524479704396, |
|
"grad_norm": 0.22413446350946586, |
|
"learning_rate": 0.0003113540104502747, |
|
"loss": 0.3471, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.31788295386621745, |
|
"grad_norm": 0.40702625283242805, |
|
"learning_rate": 0.000310639684007072, |
|
"loss": 0.3382, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.3192414280280389, |
|
"grad_norm": 0.2866280004893114, |
|
"learning_rate": 0.0003099233179690746, |
|
"loss": 0.3779, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.32059990218986034, |
|
"grad_norm": 0.313655190983661, |
|
"learning_rate": 0.0003092049255421813, |
|
"loss": 0.3646, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.3219583763516818, |
|
"grad_norm": 0.34970496506197146, |
|
"learning_rate": 0.00030848451996964615, |
|
"loss": 0.3628, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.32331685051350323, |
|
"grad_norm": 0.36130996602692567, |
|
"learning_rate": 0.00030776211453183475, |
|
"loss": 0.3608, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.3246753246753247, |
|
"grad_norm": 0.22850628919525512, |
|
"learning_rate": 0.00030703772254597945, |
|
"loss": 0.326, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.3260337988371461, |
|
"grad_norm": 0.3620511895369416, |
|
"learning_rate": 0.00030631135736593364, |
|
"loss": 0.349, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.32739227299896756, |
|
"grad_norm": 0.2122923442045741, |
|
"learning_rate": 0.0003055830323819257, |
|
"loss": 0.3734, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.328750747160789, |
|
"grad_norm": 0.24737840068319314, |
|
"learning_rate": 0.00030485276102031235, |
|
"loss": 0.358, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.33010922132261045, |
|
"grad_norm": 0.3610838024240164, |
|
"learning_rate": 0.0003041205567433305, |
|
"loss": 0.3513, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.3314676954844319, |
|
"grad_norm": 0.33939684182516894, |
|
"learning_rate": 0.0003033864330488499, |
|
"loss": 0.3555, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.33282616964625333, |
|
"grad_norm": 0.2666873772787006, |
|
"learning_rate": 0.00030265040347012397, |
|
"loss": 0.3469, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.3341846438080748, |
|
"grad_norm": 0.21914168729339542, |
|
"learning_rate": 0.00030191248157554, |
|
"loss": 0.3323, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.3355431179698962, |
|
"grad_norm": 0.3499432909434212, |
|
"learning_rate": 0.0003011726809683694, |
|
"loss": 0.3321, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.33690159213171766, |
|
"grad_norm": 0.2752315627002723, |
|
"learning_rate": 0.0003004310152865169, |
|
"loss": 0.366, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3382600662935391, |
|
"grad_norm": 0.3224312997036977, |
|
"learning_rate": 0.0002996874982022692, |
|
"loss": 0.3363, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.33961854045536055, |
|
"grad_norm": 0.2614682577027786, |
|
"learning_rate": 0.00029894214342204243, |
|
"loss": 0.3364, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.340977014617182, |
|
"grad_norm": 0.35386811908507626, |
|
"learning_rate": 0.00029819496468613024, |
|
"loss": 0.3468, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.34233548877900344, |
|
"grad_norm": 0.3451776698004379, |
|
"learning_rate": 0.00029744597576844995, |
|
"loss": 0.3457, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.3436939629408249, |
|
"grad_norm": 0.2075635034305044, |
|
"learning_rate": 0.00029669519047628874, |
|
"loss": 0.3217, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.3450524371026463, |
|
"grad_norm": 0.35938341724916706, |
|
"learning_rate": 0.0002959426226500493, |
|
"loss": 0.3518, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.34641091126446777, |
|
"grad_norm": 0.2028404175509972, |
|
"learning_rate": 0.0002951882861629944, |
|
"loss": 0.3464, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.3477693854262892, |
|
"grad_norm": 0.3092038953376563, |
|
"learning_rate": 0.00029443219492099153, |
|
"loss": 0.3565, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.34912785958811066, |
|
"grad_norm": 0.29068071721416333, |
|
"learning_rate": 0.0002936743628622562, |
|
"loss": 0.3315, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.3504863337499321, |
|
"grad_norm": 0.20779330405236773, |
|
"learning_rate": 0.0002929148039570951, |
|
"loss": 0.3174, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.35184480791175354, |
|
"grad_norm": 0.31923873979474604, |
|
"learning_rate": 0.00029215353220764863, |
|
"loss": 0.3441, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.353203282073575, |
|
"grad_norm": 0.2745041226462606, |
|
"learning_rate": 0.00029139056164763274, |
|
"loss": 0.3467, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.35456175623539643, |
|
"grad_norm": 0.4368395215278957, |
|
"learning_rate": 0.0002906259063420803, |
|
"loss": 0.3517, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.3559202303972178, |
|
"grad_norm": 0.30792463599025904, |
|
"learning_rate": 0.0002898595803870815, |
|
"loss": 0.3442, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.35727870455903926, |
|
"grad_norm": 0.3611952448865168, |
|
"learning_rate": 0.0002890915979095244, |
|
"loss": 0.3204, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.3586371787208607, |
|
"grad_norm": 0.23056033787481225, |
|
"learning_rate": 0.0002883219730668345, |
|
"loss": 0.3239, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.35999565288268215, |
|
"grad_norm": 0.2530394826085691, |
|
"learning_rate": 0.00028755072004671314, |
|
"loss": 0.3473, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.3613541270445036, |
|
"grad_norm": 0.33962698046120804, |
|
"learning_rate": 0.000286793326131175, |
|
"loss": 0.3416, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.36271260120632504, |
|
"grad_norm": 0.21053853436821962, |
|
"learning_rate": 0.0002860188912935213, |
|
"loss": 0.3278, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.3640710753681465, |
|
"grad_norm": 0.3129818212559564, |
|
"learning_rate": 0.00028524287073475416, |
|
"loss": 0.3541, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.3654295495299679, |
|
"grad_norm": 0.2699867150782398, |
|
"learning_rate": 0.0002844652787604775, |
|
"loss": 0.3403, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.36678802369178937, |
|
"grad_norm": 0.28737860107629143, |
|
"learning_rate": 0.00028368612970526357, |
|
"loss": 0.3323, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.3681464978536108, |
|
"grad_norm": 0.3515960746260734, |
|
"learning_rate": 0.00028290543793238867, |
|
"loss": 0.3293, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.36950497201543225, |
|
"grad_norm": 0.20401533840321576, |
|
"learning_rate": 0.0002821232178335684, |
|
"loss": 0.3316, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.3708634461772537, |
|
"grad_norm": 0.19995437318728543, |
|
"learning_rate": 0.0002813551732516669, |
|
"loss": 0.3427, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.37222192033907514, |
|
"grad_norm": 0.2545451160615089, |
|
"learning_rate": 0.00028056996963593105, |
|
"loss": 0.3246, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.3735803945008966, |
|
"grad_norm": 0.29065996361482416, |
|
"learning_rate": 0.0002797832807475994, |
|
"loss": 0.3377, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.37493886866271803, |
|
"grad_norm": 0.3334762345639782, |
|
"learning_rate": 0.00027899512108894186, |
|
"loss": 0.3281, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.37629734282453947, |
|
"grad_norm": 0.18363139112462235, |
|
"learning_rate": 0.00027820550518934127, |
|
"loss": 0.3498, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.3776558169863609, |
|
"grad_norm": 0.303677922590966, |
|
"learning_rate": 0.00027741444760502593, |
|
"loss": 0.3282, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.37901429114818236, |
|
"grad_norm": 0.33021307742532524, |
|
"learning_rate": 0.0002766378265036753, |
|
"loss": 0.3612, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.3803727653100038, |
|
"grad_norm": 0.3855197948015209, |
|
"learning_rate": 0.00027584395743117087, |
|
"loss": 0.326, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.38173123947182525, |
|
"grad_norm": 0.17305752786285836, |
|
"learning_rate": 0.0002750486902080647, |
|
"loss": 0.3306, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.3830897136336467, |
|
"grad_norm": 0.3557889572340088, |
|
"learning_rate": 0.0002742520394947646, |
|
"loss": 0.3363, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.38444818779546813, |
|
"grad_norm": 0.269254653829798, |
|
"learning_rate": 0.0002734540199771824, |
|
"loss": 0.3509, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.3858066619572896, |
|
"grad_norm": 0.46153677475953025, |
|
"learning_rate": 0.00027265464636646333, |
|
"loss": 0.3423, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.387165136119111, |
|
"grad_norm": 0.25450280604338793, |
|
"learning_rate": 0.0002718539333987147, |
|
"loss": 0.3344, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.38852361028093246, |
|
"grad_norm": 0.24854855950361845, |
|
"learning_rate": 0.00027105189583473416, |
|
"loss": 0.317, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.3898820844427539, |
|
"grad_norm": 0.25191512294105933, |
|
"learning_rate": 0.00027024854845973797, |
|
"loss": 0.3343, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.39124055860457535, |
|
"grad_norm": 0.3399094367009323, |
|
"learning_rate": 0.000269443906083088, |
|
"loss": 0.3141, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.3925990327663968, |
|
"grad_norm": 0.27297702861099216, |
|
"learning_rate": 0.00026863798353801905, |
|
"loss": 0.344, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.39395750692821824, |
|
"grad_norm": 0.3089505317673794, |
|
"learning_rate": 0.000267830795681365, |
|
"loss": 0.3248, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.3953159810900397, |
|
"grad_norm": 0.28407318632921835, |
|
"learning_rate": 0.0002670223573932857, |
|
"loss": 0.3218, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.3966744552518611, |
|
"grad_norm": 0.27517856010825675, |
|
"learning_rate": 0.0002662126835769916, |
|
"loss": 0.3207, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.39803292941368257, |
|
"grad_norm": 0.2209431864475645, |
|
"learning_rate": 0.00026540178915847003, |
|
"loss": 0.3213, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.399391403575504, |
|
"grad_norm": 0.3012179785372981, |
|
"learning_rate": 0.0002645896890862093, |
|
"loss": 0.3031, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.40074987773732546, |
|
"grad_norm": 0.35758174495742123, |
|
"learning_rate": 0.0002637763983309235, |
|
"loss": 0.3244, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.4021083518991469, |
|
"grad_norm": 0.20197976836253828, |
|
"learning_rate": 0.00026296193188527655, |
|
"loss": 0.3211, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.40346682606096834, |
|
"grad_norm": 0.2784592655459722, |
|
"learning_rate": 0.0002621463047636057, |
|
"loss": 0.3233, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.4048253002227898, |
|
"grad_norm": 0.3125528717241462, |
|
"learning_rate": 0.0002613295320016445, |
|
"loss": 0.324, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.40618377438461123, |
|
"grad_norm": 0.3316178107391592, |
|
"learning_rate": 0.00026051162865624636, |
|
"loss": 0.3358, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.4075422485464327, |
|
"grad_norm": 0.18439640337971394, |
|
"learning_rate": 0.00025969260980510605, |
|
"loss": 0.3031, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.4089007227082541, |
|
"grad_norm": 0.2963162351967641, |
|
"learning_rate": 0.00025887249054648245, |
|
"loss": 0.3276, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.4102591968700755, |
|
"grad_norm": 0.22466036509634918, |
|
"learning_rate": 0.00025805128599891994, |
|
"loss": 0.3364, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.41161767103189695, |
|
"grad_norm": 0.2956284294357639, |
|
"learning_rate": 0.00025722901130096975, |
|
"loss": 0.3314, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.4129761451937184, |
|
"grad_norm": 0.36079018502753485, |
|
"learning_rate": 0.00025642215844549676, |
|
"loss": 0.3351, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.41433461935553983, |
|
"grad_norm": 0.3005152688044544, |
|
"learning_rate": 0.00025559780958847773, |
|
"loss": 0.3202, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.4156930935173613, |
|
"grad_norm": 0.33773262295043566, |
|
"learning_rate": 0.00025477243580984904, |
|
"loss": 0.3089, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.4170515676791827, |
|
"grad_norm": 0.3045253275707874, |
|
"learning_rate": 0.00025394605232501987, |
|
"loss": 0.32, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"grad_norm": 0.34532028635872886, |
|
"learning_rate": 0.0002531186743680128, |
|
"loss": 0.3449, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.4197685160028256, |
|
"grad_norm": 0.14134553464927366, |
|
"learning_rate": 0.0002522903171911834, |
|
"loss": 0.3184, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.42112699016464705, |
|
"grad_norm": 0.2761208223745771, |
|
"learning_rate": 0.00025146099606493817, |
|
"loss": 0.3133, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.4224854643264685, |
|
"grad_norm": 0.26549068110573254, |
|
"learning_rate": 0.0002506307262774542, |
|
"loss": 0.3205, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.42384393848828994, |
|
"grad_norm": 0.2773532590377829, |
|
"learning_rate": 0.00024979952313439636, |
|
"loss": 0.3064, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.4252024126501114, |
|
"grad_norm": 0.3073493622335924, |
|
"learning_rate": 0.0002489674019586356, |
|
"loss": 0.3188, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.4265608868119328, |
|
"grad_norm": 0.21684387112528378, |
|
"learning_rate": 0.0002481343780899665, |
|
"loss": 0.3198, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.42791936097375427, |
|
"grad_norm": 0.25977297083107986, |
|
"learning_rate": 0.00024730046688482436, |
|
"loss": 0.3065, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.4292778351355757, |
|
"grad_norm": 0.3308957326333168, |
|
"learning_rate": 0.000246465683716002, |
|
"loss": 0.3085, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.43063630929739716, |
|
"grad_norm": 0.25944521132960924, |
|
"learning_rate": 0.0002456300439723668, |
|
"loss": 0.3136, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.4319947834592186, |
|
"grad_norm": 0.22121128637476822, |
|
"learning_rate": 0.0002447935630585764, |
|
"loss": 0.322, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.43335325762104004, |
|
"grad_norm": 0.32019002146360315, |
|
"learning_rate": 0.0002439562563947953, |
|
"loss": 0.3103, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.4347117317828615, |
|
"grad_norm": 0.26761100791647713, |
|
"learning_rate": 0.0002431181394164103, |
|
"loss": 0.3114, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.43607020594468293, |
|
"grad_norm": 0.22262870758692213, |
|
"learning_rate": 0.00024227922757374582, |
|
"loss": 0.3069, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.4374286801065044, |
|
"grad_norm": 0.18940890843015826, |
|
"learning_rate": 0.00024143953633177937, |
|
"loss": 0.327, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.4387871542683258, |
|
"grad_norm": 0.27459192854267717, |
|
"learning_rate": 0.00024059908116985654, |
|
"loss": 0.3183, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.44014562843014726, |
|
"grad_norm": 0.36514373383887516, |
|
"learning_rate": 0.00023975787758140525, |
|
"loss": 0.2878, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.4415041025919687, |
|
"grad_norm": 0.30714779342945764, |
|
"learning_rate": 0.00023891594107365024, |
|
"loss": 0.3173, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.44286257675379015, |
|
"grad_norm": 0.24572160078772548, |
|
"learning_rate": 0.0002380732871673276, |
|
"loss": 0.3169, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.4442210509156116, |
|
"grad_norm": 0.22451585676228034, |
|
"learning_rate": 0.00023722993139639806, |
|
"loss": 0.2982, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.44557952507743304, |
|
"grad_norm": 0.4312837719351318, |
|
"learning_rate": 0.000236436550903555, |
|
"loss": 0.3126, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.4469379992392545, |
|
"grad_norm": 0.23649698073314787, |
|
"learning_rate": 0.00023559187786324523, |
|
"loss": 0.3229, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.4482964734010759, |
|
"grad_norm": 0.21885398793120167, |
|
"learning_rate": 0.00023474654870203753, |
|
"loss": 0.3066, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.44965494756289737, |
|
"grad_norm": 0.2522766751448378, |
|
"learning_rate": 0.00023390057900320987, |
|
"loss": 0.3121, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.4510134217247188, |
|
"grad_norm": 0.2023032182722522, |
|
"learning_rate": 0.0002330539843618484, |
|
"loss": 0.295, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.45237189588654025, |
|
"grad_norm": 0.3093016288187825, |
|
"learning_rate": 0.00023220678038455975, |
|
"loss": 0.2962, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.4537303700483617, |
|
"grad_norm": 0.2805332120341892, |
|
"learning_rate": 0.00023135898268918323, |
|
"loss": 0.313, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.45508884421018314, |
|
"grad_norm": 0.25366173411593823, |
|
"learning_rate": 0.00023051060690450337, |
|
"loss": 0.308, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.4564473183720046, |
|
"grad_norm": 0.2848859608687515, |
|
"learning_rate": 0.00022966166866996134, |
|
"loss": 0.2966, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.45780579253382603, |
|
"grad_norm": 0.3400405221454168, |
|
"learning_rate": 0.0002288121836353669, |
|
"loss": 0.313, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.45916426669564747, |
|
"grad_norm": 0.25178053552917457, |
|
"learning_rate": 0.0002279621674606098, |
|
"loss": 0.3008, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.4605227408574689, |
|
"grad_norm": 0.2173042023564375, |
|
"learning_rate": 0.00022711163581537106, |
|
"loss": 0.3062, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.46188121501929036, |
|
"grad_norm": 0.23983156392471572, |
|
"learning_rate": 0.00022626060437883435, |
|
"loss": 0.3055, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.4632396891811118, |
|
"grad_norm": 0.21031049569805663, |
|
"learning_rate": 0.00022540908883939668, |
|
"loss": 0.311, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.4645981633429332, |
|
"grad_norm": 0.1989362043985782, |
|
"learning_rate": 0.00022455710489437927, |
|
"loss": 0.3259, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.46595663750475463, |
|
"grad_norm": 0.1900668237823236, |
|
"learning_rate": 0.00022370466824973812, |
|
"loss": 0.2797, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.4673151116665761, |
|
"grad_norm": 0.20840494435322787, |
|
"learning_rate": 0.00022285179461977483, |
|
"loss": 0.3079, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.4686735858283975, |
|
"grad_norm": 0.4022346673956682, |
|
"learning_rate": 0.00022199849972684633, |
|
"loss": 0.2958, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.47003205999021896, |
|
"grad_norm": 0.3270990206921089, |
|
"learning_rate": 0.0002211447993010755, |
|
"loss": 0.3313, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.4713905341520404, |
|
"grad_norm": 0.249803246986443, |
|
"learning_rate": 0.00022029070908006096, |
|
"loss": 0.3104, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.47274900831386185, |
|
"grad_norm": 0.2813145656422356, |
|
"learning_rate": 0.0002194362448085872, |
|
"loss": 0.3039, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.4741074824756833, |
|
"grad_norm": 0.20904103519051825, |
|
"learning_rate": 0.00021858142223833395, |
|
"loss": 0.3093, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.47546595663750474, |
|
"grad_norm": 0.2476519540180904, |
|
"learning_rate": 0.00021772625712758624, |
|
"loss": 0.3133, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.4768244307993262, |
|
"grad_norm": 0.2897735958185, |
|
"learning_rate": 0.00021687076524094353, |
|
"loss": 0.3184, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.4781829049611476, |
|
"grad_norm": 0.36797022439353905, |
|
"learning_rate": 0.0002160149623490293, |
|
"loss": 0.2982, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.47954137912296907, |
|
"grad_norm": 0.22151406862910683, |
|
"learning_rate": 0.0002151588642282003, |
|
"loss": 0.3031, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.4808998532847905, |
|
"grad_norm": 0.24573689529627643, |
|
"learning_rate": 0.00021430248666025561, |
|
"loss": 0.2927, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.48225832744661196, |
|
"grad_norm": 0.25110843175386494, |
|
"learning_rate": 0.0002134458454321459, |
|
"loss": 0.2984, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.4836168016084334, |
|
"grad_norm": 0.26920027208505604, |
|
"learning_rate": 0.00021258895633568238, |
|
"loss": 0.2869, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.48497527577025484, |
|
"grad_norm": 0.3111889899596438, |
|
"learning_rate": 0.0002117318351672454, |
|
"loss": 0.3215, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.4863337499320763, |
|
"grad_norm": 0.20320042839557148, |
|
"learning_rate": 0.00021087449772749347, |
|
"loss": 0.3019, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.48769222409389773, |
|
"grad_norm": 0.29026043340389285, |
|
"learning_rate": 0.00021001695982107217, |
|
"loss": 0.3087, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.4890506982557192, |
|
"grad_norm": 0.26193168931031524, |
|
"learning_rate": 0.00020915923725632244, |
|
"loss": 0.3036, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.4904091724175406, |
|
"grad_norm": 0.23673083795318206, |
|
"learning_rate": 0.0002083013458449893, |
|
"loss": 0.3111, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.49176764657936206, |
|
"grad_norm": 0.2259659757224692, |
|
"learning_rate": 0.00020744330140193046, |
|
"loss": 0.2883, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.4931261207411835, |
|
"grad_norm": 0.2902171908048496, |
|
"learning_rate": 0.00020658511974482475, |
|
"loss": 0.2898, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.49448459490300495, |
|
"grad_norm": 0.31472212166057917, |
|
"learning_rate": 0.0002057268166938803, |
|
"loss": 0.3111, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.4958430690648264, |
|
"grad_norm": 0.27417754560735935, |
|
"learning_rate": 0.00020486840807154325, |
|
"loss": 0.3013, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.49720154322664784, |
|
"grad_norm": 0.24533216444780298, |
|
"learning_rate": 0.0002040099097022059, |
|
"loss": 0.3073, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.4985600173884693, |
|
"grad_norm": 0.2597365406230817, |
|
"learning_rate": 0.0002031513374119148, |
|
"loss": 0.2918, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.4999184915502907, |
|
"grad_norm": 0.23849823607914308, |
|
"learning_rate": 0.00020229270702807952, |
|
"loss": 0.3044, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.5012769657121121, |
|
"grad_norm": 0.40233301575689023, |
|
"learning_rate": 0.0002014340343791802, |
|
"loss": 0.3086, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.5026354398739336, |
|
"grad_norm": 0.24678497017149986, |
|
"learning_rate": 0.00020057533529447647, |
|
"loss": 0.2947, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.503993914035755, |
|
"grad_norm": 0.18418790064404403, |
|
"learning_rate": 0.000199716625603715, |
|
"loss": 0.2802, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.5053523881975764, |
|
"grad_norm": 0.20614362466496808, |
|
"learning_rate": 0.00019887509507259376, |
|
"loss": 0.3082, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.5067108623593979, |
|
"grad_norm": 0.3176004501620565, |
|
"learning_rate": 0.0001980164110832425, |
|
"loss": 0.2946, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.5080693365212193, |
|
"grad_norm": 0.24434247355813202, |
|
"learning_rate": 0.00019715776366049622, |
|
"loss": 0.2852, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.5094278106830408, |
|
"grad_norm": 0.2632819823395696, |
|
"learning_rate": 0.00019629916863314945, |
|
"loss": 0.3119, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.5107862848448622, |
|
"grad_norm": 0.36866015249871253, |
|
"learning_rate": 0.00019544064182903077, |
|
"loss": 0.3064, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.5121447590066837, |
|
"grad_norm": 0.28334197775915865, |
|
"learning_rate": 0.000194582199074711, |
|
"loss": 0.2982, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.5135032331685051, |
|
"grad_norm": 0.29353450964831995, |
|
"learning_rate": 0.00019372385619521155, |
|
"loss": 0.2997, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.5148617073303265, |
|
"grad_norm": 0.30235983080661416, |
|
"learning_rate": 0.00019286562901371282, |
|
"loss": 0.2953, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.516220181492148, |
|
"grad_norm": 0.24006103860300088, |
|
"learning_rate": 0.0001920075333512621, |
|
"loss": 0.312, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.5175786556539694, |
|
"grad_norm": 0.25401074594196943, |
|
"learning_rate": 0.00019114958502648258, |
|
"loss": 0.2928, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.5189371298157909, |
|
"grad_norm": 0.3126940882002115, |
|
"learning_rate": 0.00019029179985528095, |
|
"loss": 0.2881, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.5202956039776123, |
|
"grad_norm": 0.244186090338719, |
|
"learning_rate": 0.00018945134391851735, |
|
"loss": 0.2844, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.5216540781394338, |
|
"grad_norm": 0.2620555496999193, |
|
"learning_rate": 0.00018861107474107164, |
|
"loss": 0.3033, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"grad_norm": 0.29660068432502984, |
|
"learning_rate": 0.00018775386516779982, |
|
"loss": 0.2815, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.5243710264630767, |
|
"grad_norm": 0.24636353127452668, |
|
"learning_rate": 0.0001868968813467351, |
|
"loss": 0.2982, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.5257295006248981, |
|
"grad_norm": 0.3036729051937609, |
|
"learning_rate": 0.00018604013907600413, |
|
"loss": 0.2697, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.5270879747867195, |
|
"grad_norm": 0.25151244998729483, |
|
"learning_rate": 0.00018518365414928073, |
|
"loss": 0.3005, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.528446448948541, |
|
"grad_norm": 0.3900757856018299, |
|
"learning_rate": 0.00018432744235549457, |
|
"loss": 0.3163, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.5298049231103624, |
|
"grad_norm": 0.3209166901430777, |
|
"learning_rate": 0.0001834715194785403, |
|
"loss": 0.2946, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.5311633972721839, |
|
"grad_norm": 0.20611000381285643, |
|
"learning_rate": 0.00018261590129698663, |
|
"loss": 0.2877, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.5325218714340053, |
|
"grad_norm": 0.21332069721707292, |
|
"learning_rate": 0.00018176060358378503, |
|
"loss": 0.2916, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.5338803455958268, |
|
"grad_norm": 0.34732582027624836, |
|
"learning_rate": 0.00018090564210597975, |
|
"loss": 0.3057, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.5352388197576482, |
|
"grad_norm": 0.23660042062818817, |
|
"learning_rate": 0.00018005103262441622, |
|
"loss": 0.2746, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.5365972939194696, |
|
"grad_norm": 0.23653513107119012, |
|
"learning_rate": 0.00017919679089345122, |
|
"loss": 0.295, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.5379557680812911, |
|
"grad_norm": 0.2066174691631555, |
|
"learning_rate": 0.00017834293266066222, |
|
"loss": 0.2896, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.5393142422431125, |
|
"grad_norm": 0.26332165957058984, |
|
"learning_rate": 0.00017748947366655687, |
|
"loss": 0.2811, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.540672716404934, |
|
"grad_norm": 0.22960074466120436, |
|
"learning_rate": 0.00017663642964428318, |
|
"loss": 0.2846, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.5420311905667554, |
|
"grad_norm": 0.3090166915756585, |
|
"learning_rate": 0.00017578381631933946, |
|
"loss": 0.2924, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.5433896647285769, |
|
"grad_norm": 0.36568571497107416, |
|
"learning_rate": 0.00017493164940928402, |
|
"loss": 0.2865, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.5447481388903983, |
|
"grad_norm": 0.29059486954556535, |
|
"learning_rate": 0.00017407994462344584, |
|
"loss": 0.2785, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.5461066130522197, |
|
"grad_norm": 0.27957466708084117, |
|
"learning_rate": 0.00017322871766263487, |
|
"loss": 0.2935, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.5474650872140412, |
|
"grad_norm": 0.2151461608605068, |
|
"learning_rate": 0.00017237798421885253, |
|
"loss": 0.2841, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.5488235613758626, |
|
"grad_norm": 0.24819887268532007, |
|
"learning_rate": 0.00017152775997500238, |
|
"loss": 0.285, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.5501820355376841, |
|
"grad_norm": 0.20284647935207317, |
|
"learning_rate": 0.0001706780606046013, |
|
"loss": 0.2927, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.5515405096995055, |
|
"grad_norm": 0.19244100345976062, |
|
"learning_rate": 0.0001698289017714902, |
|
"loss": 0.2645, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.552898983861327, |
|
"grad_norm": 0.22539860380829246, |
|
"learning_rate": 0.00016898029912954546, |
|
"loss": 0.2939, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.5542574580231484, |
|
"grad_norm": 0.2619800733732195, |
|
"learning_rate": 0.00016813226832239025, |
|
"loss": 0.2836, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.5556159321849699, |
|
"grad_norm": 0.23393114722266678, |
|
"learning_rate": 0.00016728482498310637, |
|
"loss": 0.2736, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.5569744063467913, |
|
"grad_norm": 0.30087081995833126, |
|
"learning_rate": 0.00016643798473394566, |
|
"loss": 0.2794, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.5583328805086127, |
|
"grad_norm": 0.308240444312431, |
|
"learning_rate": 0.00016559176318604258, |
|
"loss": 0.2671, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.5596913546704342, |
|
"grad_norm": 0.24052215603123736, |
|
"learning_rate": 0.00016474617593912583, |
|
"loss": 0.2874, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.5610498288322556, |
|
"grad_norm": 0.2750519886277399, |
|
"learning_rate": 0.00016390123858123118, |
|
"loss": 0.2732, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.5624083029940771, |
|
"grad_norm": 0.2175806661894403, |
|
"learning_rate": 0.0001630569666884139, |
|
"loss": 0.2885, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.5637667771558985, |
|
"grad_norm": 0.2923956849374819, |
|
"learning_rate": 0.00016221337582446172, |
|
"loss": 0.2924, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.56512525131772, |
|
"grad_norm": 0.2708091098394788, |
|
"learning_rate": 0.00016137048154060785, |
|
"loss": 0.2705, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.5664837254795414, |
|
"grad_norm": 0.260062882274282, |
|
"learning_rate": 0.0001605282993752446, |
|
"loss": 0.2833, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.5678421996413628, |
|
"grad_norm": 0.28046003747194964, |
|
"learning_rate": 0.00015968684485363635, |
|
"loss": 0.2875, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.5692006738031843, |
|
"grad_norm": 0.18648990278831484, |
|
"learning_rate": 0.0001588461334876338, |
|
"loss": 0.2788, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.5705591479650057, |
|
"grad_norm": 0.26108175409809964, |
|
"learning_rate": 0.000158006180775388, |
|
"loss": 0.2809, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.5719176221268272, |
|
"grad_norm": 0.15533902511877934, |
|
"learning_rate": 0.0001571670022010644, |
|
"loss": 0.2808, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.5732760962886486, |
|
"grad_norm": 0.17785716374013105, |
|
"learning_rate": 0.0001563286132345576, |
|
"loss": 0.2854, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.5746345704504701, |
|
"grad_norm": 0.2493856351979774, |
|
"learning_rate": 0.00015549102933120625, |
|
"loss": 0.2672, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.5759930446122915, |
|
"grad_norm": 0.37551758591172574, |
|
"learning_rate": 0.00015467099305876942, |
|
"loss": 0.2883, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.577351518774113, |
|
"grad_norm": 0.21750010428694388, |
|
"learning_rate": 0.00015383504871844582, |
|
"loss": 0.2779, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.5787099929359344, |
|
"grad_norm": 0.19042627120914027, |
|
"learning_rate": 0.00015299995540906267, |
|
"loss": 0.2764, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.5800684670977558, |
|
"grad_norm": 0.2797732165932674, |
|
"learning_rate": 0.0001521657285252044, |
|
"loss": 0.2922, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.5814269412595773, |
|
"grad_norm": 0.3591848479346681, |
|
"learning_rate": 0.00015133238344548327, |
|
"loss": 0.2884, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.5827854154213987, |
|
"grad_norm": 0.21764914836042967, |
|
"learning_rate": 0.00015049993553225608, |
|
"loss": 0.2715, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.5841438895832202, |
|
"grad_norm": 0.26727180336133755, |
|
"learning_rate": 0.0001496684001313406, |
|
"loss": 0.2753, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.5855023637450416, |
|
"grad_norm": 0.21915535565528904, |
|
"learning_rate": 0.00014883779257173285, |
|
"loss": 0.265, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.586860837906863, |
|
"grad_norm": 0.25668689734119876, |
|
"learning_rate": 0.0001480081281653244, |
|
"loss": 0.2762, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.5882193120686845, |
|
"grad_norm": 0.2834782294538094, |
|
"learning_rate": 0.00014717942220662038, |
|
"loss": 0.28, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.5895777862305059, |
|
"grad_norm": 0.24516802954697497, |
|
"learning_rate": 0.00014635168997245712, |
|
"loss": 0.2755, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.5909362603923274, |
|
"grad_norm": 0.22053403799293927, |
|
"learning_rate": 0.00014552494672172113, |
|
"loss": 0.2732, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.5922947345541488, |
|
"grad_norm": 0.297493134455997, |
|
"learning_rate": 0.00014469920769506704, |
|
"loss": 0.2819, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.5936532087159703, |
|
"grad_norm": 0.26448034669148435, |
|
"learning_rate": 0.00014387448811463722, |
|
"loss": 0.2947, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.5950116828777917, |
|
"grad_norm": 0.1887478278727578, |
|
"learning_rate": 0.00014305080318378105, |
|
"loss": 0.2573, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.5963701570396132, |
|
"grad_norm": 0.24486372742215648, |
|
"learning_rate": 0.0001422281680867744, |
|
"loss": 0.2762, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.5977286312014346, |
|
"grad_norm": 0.22891270758035537, |
|
"learning_rate": 0.00014140659798854012, |
|
"loss": 0.2816, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.599087105363256, |
|
"grad_norm": 0.25531740500430156, |
|
"learning_rate": 0.00014058610803436813, |
|
"loss": 0.2544, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.6004455795250774, |
|
"grad_norm": 0.2198360405690994, |
|
"learning_rate": 0.00013976671334963648, |
|
"loss": 0.27, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.6018040536868988, |
|
"grad_norm": 0.22767226535607382, |
|
"learning_rate": 0.0001389484290395323, |
|
"loss": 0.2869, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.6031625278487203, |
|
"grad_norm": 0.2694860139304321, |
|
"learning_rate": 0.00013813127018877331, |
|
"loss": 0.2752, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.6045210020105417, |
|
"grad_norm": 0.19898660564261053, |
|
"learning_rate": 0.00013731525186133026, |
|
"loss": 0.2624, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.6058794761723632, |
|
"grad_norm": 0.23150351646391246, |
|
"learning_rate": 0.00013653296123522198, |
|
"loss": 0.2718, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.6072379503341846, |
|
"grad_norm": 0.24064115266253058, |
|
"learning_rate": 0.00013571922195028266, |
|
"loss": 0.2812, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.608596424496006, |
|
"grad_norm": 0.25687846535740555, |
|
"learning_rate": 0.0001349066676537268, |
|
"loss": 0.262, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.6099548986578275, |
|
"grad_norm": 0.20024379738006956, |
|
"learning_rate": 0.00013409531332464196, |
|
"loss": 0.2796, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.6113133728196489, |
|
"grad_norm": 0.30669943060449323, |
|
"learning_rate": 0.00013328517391999483, |
|
"loss": 0.2748, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.6126718469814704, |
|
"grad_norm": 0.26517225209707274, |
|
"learning_rate": 0.00013247626437435539, |
|
"loss": 0.2641, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.6140303211432918, |
|
"grad_norm": 0.23089105114814368, |
|
"learning_rate": 0.0001316685995996218, |
|
"loss": 0.2716, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.6153887953051133, |
|
"grad_norm": 0.3141172219746477, |
|
"learning_rate": 0.0001308621944847455, |
|
"loss": 0.2601, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.6167472694669347, |
|
"grad_norm": 0.2290976880794265, |
|
"learning_rate": 0.0001300570638954565, |
|
"loss": 0.2805, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.6181057436287561, |
|
"grad_norm": 0.21218409171582492, |
|
"learning_rate": 0.0001292532226739894, |
|
"loss": 0.2686, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.6194642177905776, |
|
"grad_norm": 0.22628948026088308, |
|
"learning_rate": 0.0001284506856388101, |
|
"loss": 0.2688, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.620822691952399, |
|
"grad_norm": 0.2948337400203754, |
|
"learning_rate": 0.00012764946758434225, |
|
"loss": 0.2655, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.6221811661142205, |
|
"grad_norm": 0.3340188815254344, |
|
"learning_rate": 0.00012684958328069453, |
|
"loss": 0.2754, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.6235396402760419, |
|
"grad_norm": 0.2767372638913053, |
|
"learning_rate": 0.0001260510474733888, |
|
"loss": 0.2602, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.6248981144378634, |
|
"grad_norm": 0.270894988791611, |
|
"learning_rate": 0.00012525387488308783, |
|
"loss": 0.2564, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.6262565885996848, |
|
"grad_norm": 0.20130647702859084, |
|
"learning_rate": 0.000124458080205324, |
|
"loss": 0.2699, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.6276150627615062, |
|
"grad_norm": 0.2606352685620501, |
|
"learning_rate": 0.0001236795524100573, |
|
"loss": 0.2777, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.6289735369233277, |
|
"grad_norm": 0.26862575508349007, |
|
"learning_rate": 0.00012288652925419885, |
|
"loss": 0.27, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.6303320110851491, |
|
"grad_norm": 0.2264767237464518, |
|
"learning_rate": 0.00012209492765187177, |
|
"loss": 0.2717, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.6316904852469706, |
|
"grad_norm": 0.3116565801871334, |
|
"learning_rate": 0.00012130476219590986, |
|
"loss": 0.2595, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.633048959408792, |
|
"grad_norm": 0.2778393951264189, |
|
"learning_rate": 0.00012051604745267213, |
|
"loss": 0.2791, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.6344074335706135, |
|
"grad_norm": 0.1850696129101786, |
|
"learning_rate": 0.00011972879796177415, |
|
"loss": 0.2717, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.6357659077324349, |
|
"grad_norm": 0.24958891669063782, |
|
"learning_rate": 0.00011894302823582031, |
|
"loss": 0.2638, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.6371243818942564, |
|
"grad_norm": 0.3700870104750999, |
|
"learning_rate": 0.00011815875276013624, |
|
"loss": 0.2742, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.6384828560560778, |
|
"grad_norm": 0.33264994031715317, |
|
"learning_rate": 0.0001173759859925015, |
|
"loss": 0.2774, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.6398413302178992, |
|
"grad_norm": 0.31037389441035956, |
|
"learning_rate": 0.00011659474236288361, |
|
"loss": 0.2403, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.6411998043797207, |
|
"grad_norm": 0.2731125175831413, |
|
"learning_rate": 0.00011581503627317138, |
|
"loss": 0.2568, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.6425582785415421, |
|
"grad_norm": 0.31542476581603357, |
|
"learning_rate": 0.00011503688209690988, |
|
"loss": 0.2405, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.6439167527033636, |
|
"grad_norm": 0.2856271842999882, |
|
"learning_rate": 0.00011426029417903521, |
|
"loss": 0.2594, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.645275226865185, |
|
"grad_norm": 0.304609790388205, |
|
"learning_rate": 0.00011348528683561044, |
|
"loss": 0.2617, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.6466337010270065, |
|
"grad_norm": 0.24926409052563817, |
|
"learning_rate": 0.00011271187435356107, |
|
"loss": 0.2624, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.6479921751888279, |
|
"grad_norm": 0.29444243889916777, |
|
"learning_rate": 0.00011194007099041242, |
|
"loss": 0.267, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.6493506493506493, |
|
"grad_norm": 0.251174398975187, |
|
"learning_rate": 0.00011116989097402601, |
|
"loss": 0.2745, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.6507091235124708, |
|
"grad_norm": 0.26364700269491465, |
|
"learning_rate": 0.0001104013485023379, |
|
"loss": 0.2695, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.6520675976742922, |
|
"grad_norm": 0.1408465902411862, |
|
"learning_rate": 0.00010963445774309668, |
|
"loss": 0.2423, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.6534260718361137, |
|
"grad_norm": 0.1933859329942763, |
|
"learning_rate": 0.00010886923283360217, |
|
"loss": 0.2359, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.6547845459979351, |
|
"grad_norm": 0.2614195528425062, |
|
"learning_rate": 0.00010810568788044524, |
|
"loss": 0.2673, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.6561430201597566, |
|
"grad_norm": 0.24091031620062864, |
|
"learning_rate": 0.00010734383695924741, |
|
"loss": 0.2493, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.657501494321578, |
|
"grad_norm": 0.2697615824186297, |
|
"learning_rate": 0.00010658369411440134, |
|
"loss": 0.2729, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.6588599684833994, |
|
"grad_norm": 0.20653067849872642, |
|
"learning_rate": 0.00010582527335881209, |
|
"loss": 0.274, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.6602184426452209, |
|
"grad_norm": 0.2589626095489949, |
|
"learning_rate": 0.0001050685886736388, |
|
"loss": 0.2609, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.6615769168070423, |
|
"grad_norm": 0.2672837103760092, |
|
"learning_rate": 0.00010431365400803682, |
|
"loss": 0.2524, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.6629353909688638, |
|
"grad_norm": 0.1824225008155396, |
|
"learning_rate": 0.00010356048327890064, |
|
"loss": 0.2702, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.6642938651306852, |
|
"grad_norm": 0.22598161309206102, |
|
"learning_rate": 0.00010280909037060747, |
|
"loss": 0.2601, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.6656523392925067, |
|
"grad_norm": 0.3087441379489739, |
|
"learning_rate": 0.00010205948913476113, |
|
"loss": 0.2645, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.6670108134543281, |
|
"grad_norm": 0.26641776561733793, |
|
"learning_rate": 0.00010131169338993662, |
|
"loss": 0.2572, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.6683692876161496, |
|
"grad_norm": 0.16642889477958095, |
|
"learning_rate": 0.00010056571692142558, |
|
"loss": 0.2437, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.669727761777971, |
|
"grad_norm": 0.33006631915049106, |
|
"learning_rate": 9.982157348098204e-05, |
|
"loss": 0.2557, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.6710862359397924, |
|
"grad_norm": 0.25184518346403906, |
|
"learning_rate": 9.907927678656888e-05, |
|
"loss": 0.2481, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.6724447101016139, |
|
"grad_norm": 0.21781761609625996, |
|
"learning_rate": 9.833884052210525e-05, |
|
"loss": 0.2474, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.6738031842634353, |
|
"grad_norm": 0.2707646383120265, |
|
"learning_rate": 9.760027833721379e-05, |
|
"loss": 0.2652, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.6751616584252568, |
|
"grad_norm": 0.2069393478176125, |
|
"learning_rate": 9.686360384696958e-05, |
|
"loss": 0.2595, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.6765201325870782, |
|
"grad_norm": 0.24428561492811254, |
|
"learning_rate": 9.614350737579221e-05, |
|
"loss": 0.2501, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.6778786067488997, |
|
"grad_norm": 0.26915065049504966, |
|
"learning_rate": 9.541061055170308e-05, |
|
"loss": 0.2595, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.6792370809107211, |
|
"grad_norm": 0.32715798463519263, |
|
"learning_rate": 9.467964178784106e-05, |
|
"loss": 0.249, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.6805955550725425, |
|
"grad_norm": 0.2188565278615699, |
|
"learning_rate": 9.395061455929976e-05, |
|
"loss": 0.2644, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.681954029234364, |
|
"grad_norm": 0.1464346234966987, |
|
"learning_rate": 9.32235423053812e-05, |
|
"loss": 0.2489, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.6833125033961854, |
|
"grad_norm": 0.23268193269727472, |
|
"learning_rate": 9.249843842934851e-05, |
|
"loss": 0.2524, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.6846709775580069, |
|
"grad_norm": 0.2823606594876491, |
|
"learning_rate": 9.177531629817841e-05, |
|
"loss": 0.2734, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.6860294517198283, |
|
"grad_norm": 0.24467058752217685, |
|
"learning_rate": 9.105418924231516e-05, |
|
"loss": 0.2579, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.6873879258816498, |
|
"grad_norm": 0.2721349790032047, |
|
"learning_rate": 9.034943316134114e-05, |
|
"loss": 0.2501, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.6887464000434712, |
|
"grad_norm": 0.2063496873982564, |
|
"learning_rate": 8.96322955378789e-05, |
|
"loss": 0.2546, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.6901048742052927, |
|
"grad_norm": 0.1388645672356858, |
|
"learning_rate": 8.891719249538568e-05, |
|
"loss": 0.2481, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.6914633483671141, |
|
"grad_norm": 0.26747348762140405, |
|
"learning_rate": 8.820413721647738e-05, |
|
"loss": 0.2406, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.6928218225289355, |
|
"grad_norm": 0.24773718412732226, |
|
"learning_rate": 8.749314284602002e-05, |
|
"loss": 0.2345, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.694180296690757, |
|
"grad_norm": 0.1875176742846847, |
|
"learning_rate": 8.67842224908878e-05, |
|
"loss": 0.2697, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.6955387708525784, |
|
"grad_norm": 0.34193810804953745, |
|
"learning_rate": 8.607738921972125e-05, |
|
"loss": 0.2499, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.6968972450143999, |
|
"grad_norm": 0.19405847865847933, |
|
"learning_rate": 8.537265606268663e-05, |
|
"loss": 0.2469, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.6982557191762213, |
|
"grad_norm": 0.1829187140282853, |
|
"learning_rate": 8.467003601123527e-05, |
|
"loss": 0.2519, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.6996141933380428, |
|
"grad_norm": 0.20733927044724373, |
|
"learning_rate": 8.396954201786429e-05, |
|
"loss": 0.2655, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.7009726674998642, |
|
"grad_norm": 0.2261928459658941, |
|
"learning_rate": 8.32711869958781e-05, |
|
"loss": 0.2593, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.7023311416616856, |
|
"grad_norm": 0.2718188963862619, |
|
"learning_rate": 8.25749838191499e-05, |
|
"loss": 0.2415, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.7036896158235071, |
|
"grad_norm": 0.3565494856705099, |
|
"learning_rate": 8.18809453218845e-05, |
|
"loss": 0.2586, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.7050480899853285, |
|
"grad_norm": 0.23853635314623642, |
|
"learning_rate": 8.118908429838201e-05, |
|
"loss": 0.2495, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.70640656414715, |
|
"grad_norm": 0.12974546184866537, |
|
"learning_rate": 8.049941350280157e-05, |
|
"loss": 0.241, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.7077650383089714, |
|
"grad_norm": 0.15240966539892364, |
|
"learning_rate": 7.981194564892645e-05, |
|
"loss": 0.2642, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.7091235124707929, |
|
"grad_norm": 0.18578994346470928, |
|
"learning_rate": 7.912669340992957e-05, |
|
"loss": 0.2561, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.7104819866326142, |
|
"grad_norm": 0.2354542836489054, |
|
"learning_rate": 7.844366941814016e-05, |
|
"loss": 0.2433, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.7118404607944356, |
|
"grad_norm": 0.32359876529310133, |
|
"learning_rate": 7.776288626481043e-05, |
|
"loss": 0.2589, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.7131989349562571, |
|
"grad_norm": 0.21721518409143126, |
|
"learning_rate": 7.708435649988394e-05, |
|
"loss": 0.248, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.7145574091180785, |
|
"grad_norm": 0.2413841328575766, |
|
"learning_rate": 7.640809263176381e-05, |
|
"loss": 0.2495, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.7159158832799, |
|
"grad_norm": 0.1937874091125614, |
|
"learning_rate": 7.57341071270824e-05, |
|
"loss": 0.2379, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.7172743574417214, |
|
"grad_norm": 0.29670499546178025, |
|
"learning_rate": 7.507582377492124e-05, |
|
"loss": 0.2481, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.7186328316035429, |
|
"grad_norm": 0.2733674523937474, |
|
"learning_rate": 7.44063860443e-05, |
|
"loss": 0.24, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.7199913057653643, |
|
"grad_norm": 0.24849432830004892, |
|
"learning_rate": 7.373926357771387e-05, |
|
"loss": 0.2518, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.7213497799271857, |
|
"grad_norm": 0.3217997284475769, |
|
"learning_rate": 7.307446867327764e-05, |
|
"loss": 0.2558, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.7227082540890072, |
|
"grad_norm": 0.1903670555116767, |
|
"learning_rate": 7.241201358619814e-05, |
|
"loss": 0.2459, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.7240667282508286, |
|
"grad_norm": 0.1308938028529946, |
|
"learning_rate": 7.175191052854886e-05, |
|
"loss": 0.2507, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.7254252024126501, |
|
"grad_norm": 0.2795123652476836, |
|
"learning_rate": 7.109417166904457e-05, |
|
"loss": 0.2518, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.7267836765744715, |
|
"grad_norm": 0.16091370835854293, |
|
"learning_rate": 7.043880913281707e-05, |
|
"loss": 0.2554, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.728142150736293, |
|
"grad_norm": 0.16950014605111838, |
|
"learning_rate": 6.978583500119171e-05, |
|
"loss": 0.2451, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.7295006248981144, |
|
"grad_norm": 0.19788089327913239, |
|
"learning_rate": 6.913526131146473e-05, |
|
"loss": 0.2456, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.7308590990599358, |
|
"grad_norm": 0.23996422423355868, |
|
"learning_rate": 6.848710005668106e-05, |
|
"loss": 0.2372, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.7322175732217573, |
|
"grad_norm": 0.30447979386999535, |
|
"learning_rate": 6.784136318541352e-05, |
|
"loss": 0.2507, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.7335760473835787, |
|
"grad_norm": 0.2442932467375467, |
|
"learning_rate": 6.719806260154248e-05, |
|
"loss": 0.2499, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.7349345215454002, |
|
"grad_norm": 0.2053301139703188, |
|
"learning_rate": 6.655721016403638e-05, |
|
"loss": 0.2351, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.7362929957072216, |
|
"grad_norm": 0.28412523900572856, |
|
"learning_rate": 6.591881768673309e-05, |
|
"loss": 0.2463, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.7376514698690431, |
|
"grad_norm": 0.2102789887873736, |
|
"learning_rate": 6.52828969381223e-05, |
|
"loss": 0.2469, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.7390099440308645, |
|
"grad_norm": 0.37446012395142053, |
|
"learning_rate": 6.464945964112845e-05, |
|
"loss": 0.2381, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.740368418192686, |
|
"grad_norm": 0.16201575759035203, |
|
"learning_rate": 6.401851747289451e-05, |
|
"loss": 0.2349, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.7417268923545074, |
|
"grad_norm": 0.2489903791806012, |
|
"learning_rate": 6.339008206456684e-05, |
|
"loss": 0.2482, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.7430853665163288, |
|
"grad_norm": 0.21608399737617504, |
|
"learning_rate": 6.276416500108084e-05, |
|
"loss": 0.2446, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.7444438406781503, |
|
"grad_norm": 0.2704960434877356, |
|
"learning_rate": 6.215322069728647e-05, |
|
"loss": 0.2424, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.7458023148399717, |
|
"grad_norm": 0.2267608806933957, |
|
"learning_rate": 6.153232395255646e-05, |
|
"loss": 0.2441, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.7471607890017932, |
|
"grad_norm": 0.183167292044454, |
|
"learning_rate": 6.0913979799636686e-05, |
|
"loss": 0.2445, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.7485192631636146, |
|
"grad_norm": 0.15376761881823003, |
|
"learning_rate": 6.0298199637434525e-05, |
|
"loss": 0.2253, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.7498777373254361, |
|
"grad_norm": 0.19314678658445544, |
|
"learning_rate": 5.9684994817591334e-05, |
|
"loss": 0.2383, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.7512362114872575, |
|
"grad_norm": 0.2544241890699629, |
|
"learning_rate": 5.907437664427311e-05, |
|
"loss": 0.2391, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.7525946856490789, |
|
"grad_norm": 0.21989322641900727, |
|
"learning_rate": 5.846635637396216e-05, |
|
"loss": 0.2332, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.7539531598109004, |
|
"grad_norm": 0.22618174199432453, |
|
"learning_rate": 5.7860945215249696e-05, |
|
"loss": 0.2337, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.7553116339727218, |
|
"grad_norm": 0.18764808609541392, |
|
"learning_rate": 5.725815432862887e-05, |
|
"loss": 0.2482, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.7566701081345433, |
|
"grad_norm": 0.4380376298961902, |
|
"learning_rate": 5.6657994826289465e-05, |
|
"loss": 0.2262, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.7580285822963647, |
|
"grad_norm": 0.23698867029895784, |
|
"learning_rate": 5.606047777191268e-05, |
|
"loss": 0.2409, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.7593870564581862, |
|
"grad_norm": 0.20034127068488122, |
|
"learning_rate": 5.546561418046736e-05, |
|
"loss": 0.2419, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.7607455306200076, |
|
"grad_norm": 0.2949286374600259, |
|
"learning_rate": 5.4873415018006867e-05, |
|
"loss": 0.2261, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.762104004781829, |
|
"grad_norm": 0.25152518852471184, |
|
"learning_rate": 5.428389120146715e-05, |
|
"loss": 0.2375, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.7634624789436505, |
|
"grad_norm": 0.1611737332419803, |
|
"learning_rate": 5.369705359846511e-05, |
|
"loss": 0.2318, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.7648209531054719, |
|
"grad_norm": 0.24055178507097832, |
|
"learning_rate": 5.311291302709844e-05, |
|
"loss": 0.2373, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.7661794272672934, |
|
"grad_norm": 0.1897183193395996, |
|
"learning_rate": 5.2531480255746476e-05, |
|
"loss": 0.245, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.7675379014291148, |
|
"grad_norm": 0.17982933996634243, |
|
"learning_rate": 5.195276600287118e-05, |
|
"loss": 0.2369, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.7688963755909363, |
|
"grad_norm": 0.25848505633412666, |
|
"learning_rate": 5.137678093681983e-05, |
|
"loss": 0.2319, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.7702548497527577, |
|
"grad_norm": 0.20072845241494364, |
|
"learning_rate": 5.0803535675628497e-05, |
|
"loss": 0.2306, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.7716133239145792, |
|
"grad_norm": 0.20242303091668362, |
|
"learning_rate": 5.0233040786825935e-05, |
|
"loss": 0.2422, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.7729717980764006, |
|
"grad_norm": 0.2519217142033256, |
|
"learning_rate": 4.9665306787239086e-05, |
|
"loss": 0.25, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.774330272238222, |
|
"grad_norm": 0.20962707780239995, |
|
"learning_rate": 4.910034414279902e-05, |
|
"loss": 0.2253, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.7756887464000435, |
|
"grad_norm": 0.22029442635682775, |
|
"learning_rate": 4.853816326834808e-05, |
|
"loss": 0.2411, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.7770472205618649, |
|
"grad_norm": 0.2324490253296684, |
|
"learning_rate": 4.797877452744792e-05, |
|
"loss": 0.2373, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.7784056947236864, |
|
"grad_norm": 0.25563113455518527, |
|
"learning_rate": 4.742218823218851e-05, |
|
"loss": 0.2363, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.7797641688855078, |
|
"grad_norm": 0.2147681728902342, |
|
"learning_rate": 4.686841464299776e-05, |
|
"loss": 0.2474, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.7811226430473293, |
|
"grad_norm": 0.25383739922693677, |
|
"learning_rate": 4.6317463968452624e-05, |
|
"loss": 0.2212, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.7824811172091507, |
|
"grad_norm": 0.2679584758554305, |
|
"learning_rate": 4.5769346365090894e-05, |
|
"loss": 0.252, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.7838395913709721, |
|
"grad_norm": 0.2088581606519284, |
|
"learning_rate": 4.522407193722382e-05, |
|
"loss": 0.2277, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.7851980655327936, |
|
"grad_norm": 0.26508761902628303, |
|
"learning_rate": 4.4681650736750016e-05, |
|
"loss": 0.2277, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.786556539694615, |
|
"grad_norm": 0.377399622435696, |
|
"learning_rate": 4.416361998302716e-05, |
|
"loss": 0.2278, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.7879150138564365, |
|
"grad_norm": 0.15962057847455954, |
|
"learning_rate": 4.3626820065221566e-05, |
|
"loss": 0.2242, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.7892734880182579, |
|
"grad_norm": 0.32929537012542076, |
|
"learning_rate": 4.309290281945775e-05, |
|
"loss": 0.228, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.7906319621800794, |
|
"grad_norm": 0.21846014602740327, |
|
"learning_rate": 4.256187808826948e-05, |
|
"loss": 0.2446, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.7919904363419008, |
|
"grad_norm": 0.21017757165369907, |
|
"learning_rate": 4.203375566086851e-05, |
|
"loss": 0.2401, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.7933489105037222, |
|
"grad_norm": 0.24137967337019786, |
|
"learning_rate": 4.15085452729636e-05, |
|
"loss": 0.2465, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.7947073846655437, |
|
"grad_norm": 0.25590967007232035, |
|
"learning_rate": 4.098625660658151e-05, |
|
"loss": 0.2375, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.7960658588273651, |
|
"grad_norm": 0.279864351972487, |
|
"learning_rate": 4.0466899289888205e-05, |
|
"loss": 0.2374, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.7974243329891866, |
|
"grad_norm": 0.2607946144900689, |
|
"learning_rate": 3.995048289701155e-05, |
|
"loss": 0.222, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.798782807151008, |
|
"grad_norm": 0.3426303124257882, |
|
"learning_rate": 3.9437016947864745e-05, |
|
"loss": 0.2367, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.8001412813128295, |
|
"grad_norm": 0.27770053634270586, |
|
"learning_rate": 3.892651090797075e-05, |
|
"loss": 0.2417, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.8014997554746509, |
|
"grad_norm": 0.24470556593709372, |
|
"learning_rate": 3.841897418828797e-05, |
|
"loss": 0.219, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.8028582296364724, |
|
"grad_norm": 0.24604839411340365, |
|
"learning_rate": 3.791441614503675e-05, |
|
"loss": 0.2382, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.8042167037982938, |
|
"grad_norm": 0.47026656573464204, |
|
"learning_rate": 3.7412846079526644e-05, |
|
"loss": 0.2196, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.8055751779601152, |
|
"grad_norm": 0.26091598553410145, |
|
"learning_rate": 3.691427323798522e-05, |
|
"loss": 0.2268, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.8069336521219367, |
|
"grad_norm": 0.21960674238700215, |
|
"learning_rate": 3.6418706811387504e-05, |
|
"loss": 0.2356, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.8082921262837581, |
|
"grad_norm": 0.2181680329611913, |
|
"learning_rate": 3.592615593528652e-05, |
|
"loss": 0.2261, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.8096506004455796, |
|
"grad_norm": 0.1831881150211827, |
|
"learning_rate": 3.543662968964496e-05, |
|
"loss": 0.2306, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.811009074607401, |
|
"grad_norm": 0.24753134638996258, |
|
"learning_rate": 3.4950137098667836e-05, |
|
"loss": 0.2459, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.8123675487692225, |
|
"grad_norm": 0.3847831369965376, |
|
"learning_rate": 3.4466687130635856e-05, |
|
"loss": 0.2201, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.8137260229310439, |
|
"grad_norm": 0.2975898486391868, |
|
"learning_rate": 3.39862886977405e-05, |
|
"loss": 0.2166, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.8150844970928653, |
|
"grad_norm": 0.20045687385866154, |
|
"learning_rate": 3.3508950655919394e-05, |
|
"loss": 0.228, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.8164429712546868, |
|
"grad_norm": 0.303426868110847, |
|
"learning_rate": 3.3034681804693204e-05, |
|
"loss": 0.22, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.8178014454165082, |
|
"grad_norm": 0.20754051862810569, |
|
"learning_rate": 3.25634908870033e-05, |
|
"loss": 0.2301, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.8191599195783296, |
|
"grad_norm": 0.24422546737008857, |
|
"learning_rate": 3.209538658905087e-05, |
|
"loss": 0.2367, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.820518393740151, |
|
"grad_norm": 0.3027570941981141, |
|
"learning_rate": 3.163037754013647e-05, |
|
"loss": 0.2417, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.8218768679019725, |
|
"grad_norm": 0.280254043533181, |
|
"learning_rate": 3.116847231250104e-05, |
|
"loss": 0.2266, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.8232353420637939, |
|
"grad_norm": 0.28505781897752897, |
|
"learning_rate": 3.070967942116807e-05, |
|
"loss": 0.2307, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.8245938162256153, |
|
"grad_norm": 0.22370571404265266, |
|
"learning_rate": 3.0254007323786338e-05, |
|
"loss": 0.2292, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.8259522903874368, |
|
"grad_norm": 0.20314542315669792, |
|
"learning_rate": 2.9801464420474135e-05, |
|
"loss": 0.2384, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.8273107645492582, |
|
"grad_norm": 0.3091717822159854, |
|
"learning_rate": 2.9352059053664515e-05, |
|
"loss": 0.2252, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.8286692387110797, |
|
"grad_norm": 0.2850647955523155, |
|
"learning_rate": 2.8905799507951314e-05, |
|
"loss": 0.2228, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.8300277128729011, |
|
"grad_norm": 0.256986010255855, |
|
"learning_rate": 2.846269400993655e-05, |
|
"loss": 0.2176, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.8313861870347226, |
|
"grad_norm": 0.19662900160930957, |
|
"learning_rate": 2.802275072807865e-05, |
|
"loss": 0.2271, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.832744661196544, |
|
"grad_norm": 0.2745095935502404, |
|
"learning_rate": 2.7585977772542126e-05, |
|
"loss": 0.2254, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.8341031353583654, |
|
"grad_norm": 0.20724780113594732, |
|
"learning_rate": 2.715238319504769e-05, |
|
"loss": 0.2415, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.8354616095201869, |
|
"grad_norm": 0.21591886814918512, |
|
"learning_rate": 2.6721974988724264e-05, |
|
"loss": 0.2305, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 0.4269961594661858, |
|
"learning_rate": 2.629476108796114e-05, |
|
"loss": 0.2344, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.8381785578438298, |
|
"grad_norm": 0.3444022954836087, |
|
"learning_rate": 2.587074936826215e-05, |
|
"loss": 0.2355, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.8395370320056512, |
|
"grad_norm": 0.2810532430087154, |
|
"learning_rate": 2.5449947646100202e-05, |
|
"loss": 0.2333, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.8408955061674727, |
|
"grad_norm": 0.2016521391071412, |
|
"learning_rate": 2.5032363678773284e-05, |
|
"loss": 0.2345, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.8422539803292941, |
|
"grad_norm": 0.26305838948166177, |
|
"learning_rate": 2.4626260675610046e-05, |
|
"loss": 0.2229, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.8436124544911155, |
|
"grad_norm": 0.3334236797955835, |
|
"learning_rate": 2.4223262587394115e-05, |
|
"loss": 0.2414, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.844970928652937, |
|
"grad_norm": 0.1995390748027635, |
|
"learning_rate": 2.381524806289641e-05, |
|
"loss": 0.229, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.8463294028147584, |
|
"grad_norm": 0.24559427709489323, |
|
"learning_rate": 2.3410481428214603e-05, |
|
"loss": 0.2139, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.8476878769765799, |
|
"grad_norm": 0.20659570740849767, |
|
"learning_rate": 2.300897014504688e-05, |
|
"loss": 0.227, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.8490463511384013, |
|
"grad_norm": 0.21220065960007847, |
|
"learning_rate": 2.261072161508033e-05, |
|
"loss": 0.2374, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.8504048253002228, |
|
"grad_norm": 0.24984661749787465, |
|
"learning_rate": 2.2215743179854577e-05, |
|
"loss": 0.2266, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.8517632994620442, |
|
"grad_norm": 0.3258474284548005, |
|
"learning_rate": 2.1824042120626543e-05, |
|
"loss": 0.2231, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.8531217736238657, |
|
"grad_norm": 0.3217882648747335, |
|
"learning_rate": 2.143562565823609e-05, |
|
"loss": 0.2313, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.8544802477856871, |
|
"grad_norm": 0.2660872932773675, |
|
"learning_rate": 2.1050500952972985e-05, |
|
"loss": 0.2443, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.8558387219475085, |
|
"grad_norm": 0.2552800785940888, |
|
"learning_rate": 2.0668675104444745e-05, |
|
"loss": 0.2282, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.85719719610933, |
|
"grad_norm": 0.23195860174218688, |
|
"learning_rate": 2.0290155151446145e-05, |
|
"loss": 0.2375, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.8585556702711514, |
|
"grad_norm": 0.29024757595999545, |
|
"learning_rate": 1.9914948071828922e-05, |
|
"loss": 0.2222, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.8599141444329729, |
|
"grad_norm": 0.2658334720707247, |
|
"learning_rate": 1.9543060782373667e-05, |
|
"loss": 0.2351, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.8612726185947943, |
|
"grad_norm": 0.26827491653104296, |
|
"learning_rate": 1.917450013866189e-05, |
|
"loss": 0.2397, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.8626310927566158, |
|
"grad_norm": 0.2739389172356759, |
|
"learning_rate": 1.880927293494994e-05, |
|
"loss": 0.233, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.8639895669184372, |
|
"grad_norm": 0.2784269064173774, |
|
"learning_rate": 1.8447385904043534e-05, |
|
"loss": 0.2418, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.8653480410802586, |
|
"grad_norm": 0.3614092162288049, |
|
"learning_rate": 1.808884571717384e-05, |
|
"loss": 0.2257, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.8667065152420801, |
|
"grad_norm": 0.24413648696026682, |
|
"learning_rate": 1.7733658983874336e-05, |
|
"loss": 0.2389, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.8680649894039015, |
|
"grad_norm": 0.1967545842426806, |
|
"learning_rate": 1.7381832251859075e-05, |
|
"loss": 0.2191, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.869423463565723, |
|
"grad_norm": 0.23747418190146288, |
|
"learning_rate": 1.7033372006901982e-05, |
|
"loss": 0.223, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.8707819377275444, |
|
"grad_norm": 0.28815680461600557, |
|
"learning_rate": 1.6702023326195593e-05, |
|
"loss": 0.2242, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.8721404118893659, |
|
"grad_norm": 0.2868186834763779, |
|
"learning_rate": 1.636017997206618e-05, |
|
"loss": 0.2155, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.8734988860511873, |
|
"grad_norm": 0.20903132836326485, |
|
"learning_rate": 1.6021721938713497e-05, |
|
"loss": 0.2258, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.8748573602130088, |
|
"grad_norm": 0.22192290777199325, |
|
"learning_rate": 1.568665546546517e-05, |
|
"loss": 0.2322, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.8762158343748302, |
|
"grad_norm": 0.27939346399599835, |
|
"learning_rate": 1.5354986729126963e-05, |
|
"loss": 0.2166, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.8775743085366516, |
|
"grad_norm": 0.23760881910404164, |
|
"learning_rate": 1.5026721843868797e-05, |
|
"loss": 0.2231, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.8789327826984731, |
|
"grad_norm": 0.2384919572031985, |
|
"learning_rate": 1.4701866861112057e-05, |
|
"loss": 0.2115, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.8802912568602945, |
|
"grad_norm": 0.18196788330053723, |
|
"learning_rate": 1.4380427769418081e-05, |
|
"loss": 0.2214, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.881649731022116, |
|
"grad_norm": 0.11092114968179356, |
|
"learning_rate": 1.4062410494377642e-05, |
|
"loss": 0.2136, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.8830082051839374, |
|
"grad_norm": 0.24840886469424456, |
|
"learning_rate": 1.3747820898501929e-05, |
|
"loss": 0.228, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.8843666793457589, |
|
"grad_norm": 0.24015308441997552, |
|
"learning_rate": 1.3436664781114295e-05, |
|
"loss": 0.2225, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.8857251535075803, |
|
"grad_norm": 0.2836665970861565, |
|
"learning_rate": 1.3128947878243392e-05, |
|
"loss": 0.2203, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.8870836276694017, |
|
"grad_norm": 0.21821870263137624, |
|
"learning_rate": 1.2824675862517388e-05, |
|
"loss": 0.2236, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.8884421018312232, |
|
"grad_norm": 0.24628179490379828, |
|
"learning_rate": 1.2523854343059538e-05, |
|
"loss": 0.2224, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.8898005759930446, |
|
"grad_norm": 0.3785043038913189, |
|
"learning_rate": 1.2226488865384622e-05, |
|
"loss": 0.2328, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.8911590501548661, |
|
"grad_norm": 0.27390465067758646, |
|
"learning_rate": 1.1932584911296762e-05, |
|
"loss": 0.2409, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.8925175243166875, |
|
"grad_norm": 0.21777850622413425, |
|
"learning_rate": 1.164214789878848e-05, |
|
"loss": 0.2113, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.893875998478509, |
|
"grad_norm": 0.31282081336865486, |
|
"learning_rate": 1.1355183181940688e-05, |
|
"loss": 0.2294, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.8952344726403304, |
|
"grad_norm": 0.21847851398276275, |
|
"learning_rate": 1.1071696050823988e-05, |
|
"loss": 0.2176, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.8965929468021518, |
|
"grad_norm": 0.1797782169135658, |
|
"learning_rate": 1.0791691731401221e-05, |
|
"loss": 0.2197, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.8979514209639733, |
|
"grad_norm": 0.236661072817734, |
|
"learning_rate": 1.0526169006027186e-05, |
|
"loss": 0.2287, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.8993098951257947, |
|
"grad_norm": 0.24545168612281812, |
|
"learning_rate": 1.0253005911068837e-05, |
|
"loss": 0.2248, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.9006683692876162, |
|
"grad_norm": 0.2998814580945627, |
|
"learning_rate": 9.98334072000362e-06, |
|
"loss": 0.2279, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.9020268434494376, |
|
"grad_norm": 0.25372185228970084, |
|
"learning_rate": 9.717178403992866e-06, |
|
"loss": 0.2296, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.9033853176112591, |
|
"grad_norm": 0.22808552163017606, |
|
"learning_rate": 9.454523869623889e-06, |
|
"loss": 0.2142, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.9047437917730805, |
|
"grad_norm": 0.22587780733166465, |
|
"learning_rate": 9.195381958819637e-06, |
|
"loss": 0.2332, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.906102265934902, |
|
"grad_norm": 0.40072192174734456, |
|
"learning_rate": 8.939757448749286e-06, |
|
"loss": 0.2294, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.9074607400967234, |
|
"grad_norm": 0.207716936686938, |
|
"learning_rate": 8.687655051740318e-06, |
|
"loss": 0.2296, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.9088192142585448, |
|
"grad_norm": 0.2779354186342964, |
|
"learning_rate": 8.439079415191532e-06, |
|
"loss": 0.2219, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 0.9101776884203663, |
|
"grad_norm": 0.2723935977374799, |
|
"learning_rate": 8.194035121487465e-06, |
|
"loss": 0.2153, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.9115361625821877, |
|
"grad_norm": 0.19697799899707563, |
|
"learning_rate": 7.952526687913842e-06, |
|
"loss": 0.2146, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 0.9128946367440092, |
|
"grad_norm": 0.2085749517551805, |
|
"learning_rate": 7.714558566574325e-06, |
|
"loss": 0.2136, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.9142531109058306, |
|
"grad_norm": 0.21717861835570473, |
|
"learning_rate": 7.480135144308475e-06, |
|
"loss": 0.2148, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 0.9156115850676521, |
|
"grad_norm": 0.19269266158892198, |
|
"learning_rate": 7.2492607426108305e-06, |
|
"loss": 0.2257, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.9169700592294735, |
|
"grad_norm": 0.24145080749460107, |
|
"learning_rate": 7.02193961755131e-06, |
|
"loss": 0.2207, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.9183285333912949, |
|
"grad_norm": 0.38368673649347684, |
|
"learning_rate": 6.798175959696629e-06, |
|
"loss": 0.2277, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.9196870075531164, |
|
"grad_norm": 0.23667938172589895, |
|
"learning_rate": 6.577973894033274e-06, |
|
"loss": 0.2175, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 0.9210454817149378, |
|
"grad_norm": 0.3161307046290422, |
|
"learning_rate": 6.3613374798911605e-06, |
|
"loss": 0.2343, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.9224039558767593, |
|
"grad_norm": 0.21360593050098195, |
|
"learning_rate": 6.148270710869053e-06, |
|
"loss": 0.2239, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 0.9237624300385807, |
|
"grad_norm": 0.28937490357759443, |
|
"learning_rate": 5.938777514760796e-06, |
|
"loss": 0.2124, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.9251209042004022, |
|
"grad_norm": 0.2795740945530124, |
|
"learning_rate": 5.732861753483043e-06, |
|
"loss": 0.2204, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 0.9264793783622236, |
|
"grad_norm": 0.2537546094067306, |
|
"learning_rate": 5.538551797587777e-06, |
|
"loss": 0.2112, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.927837852524045, |
|
"grad_norm": 0.33705155604910225, |
|
"learning_rate": 5.339658758640753e-06, |
|
"loss": 0.2199, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 0.9291963266858664, |
|
"grad_norm": 0.3270077837617184, |
|
"learning_rate": 5.14435419901973e-06, |
|
"loss": 0.2297, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.9305548008476878, |
|
"grad_norm": 0.22117872354331958, |
|
"learning_rate": 4.95264171907992e-06, |
|
"loss": 0.22, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.9319132750095093, |
|
"grad_norm": 0.2091602624721329, |
|
"learning_rate": 4.7645248529581076e-06, |
|
"loss": 0.2107, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.9332717491713307, |
|
"grad_norm": 0.19828806364167093, |
|
"learning_rate": 4.580007068507497e-06, |
|
"loss": 0.2215, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 0.9346302233331522, |
|
"grad_norm": 0.2922062157376195, |
|
"learning_rate": 4.399091767233743e-06, |
|
"loss": 0.234, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.9359886974949736, |
|
"grad_norm": 0.2833929720260013, |
|
"learning_rate": 4.221782284232312e-06, |
|
"loss": 0.2358, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 0.937347171656795, |
|
"grad_norm": 0.10391453032816735, |
|
"learning_rate": 4.048081888126931e-06, |
|
"loss": 0.2194, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.9387056458186165, |
|
"grad_norm": 0.31685765392965615, |
|
"learning_rate": 3.877993781009415e-06, |
|
"loss": 0.2237, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 0.9400641199804379, |
|
"grad_norm": 0.32028046645415253, |
|
"learning_rate": 3.7115210983805326e-06, |
|
"loss": 0.2296, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.9414225941422594, |
|
"grad_norm": 0.2275872271102818, |
|
"learning_rate": 3.548666909092324e-06, |
|
"loss": 0.2237, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 0.9427810683040808, |
|
"grad_norm": 0.28672637661803746, |
|
"learning_rate": 3.3894342152914092e-06, |
|
"loss": 0.2129, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.9441395424659023, |
|
"grad_norm": 0.17447544388507297, |
|
"learning_rate": 3.233825952363767e-06, |
|
"loss": 0.2156, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.9454980166277237, |
|
"grad_norm": 0.27717659551061696, |
|
"learning_rate": 3.081844988880511e-06, |
|
"loss": 0.2325, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.9468564907895451, |
|
"grad_norm": 0.19384754194181844, |
|
"learning_rate": 2.9334941265450666e-06, |
|
"loss": 0.2264, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 0.9482149649513666, |
|
"grad_norm": 0.20190545639653648, |
|
"learning_rate": 2.788776100141499e-06, |
|
"loss": 0.2162, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.949573439113188, |
|
"grad_norm": 0.13741628116355178, |
|
"learning_rate": 2.647693577484156e-06, |
|
"loss": 0.2175, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 0.9509319132750095, |
|
"grad_norm": 0.30120083790962326, |
|
"learning_rate": 2.5102491593684164e-06, |
|
"loss": 0.2098, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.9522903874368309, |
|
"grad_norm": 0.18521977588022978, |
|
"learning_rate": 2.3764453795227737e-06, |
|
"loss": 0.2232, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 0.9536488615986524, |
|
"grad_norm": 0.31542769729636866, |
|
"learning_rate": 2.2462847045620737e-06, |
|
"loss": 0.2223, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.9550073357604738, |
|
"grad_norm": 0.27121032732352324, |
|
"learning_rate": 2.1247601176086262e-06, |
|
"loss": 0.2167, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 0.9563658099222953, |
|
"grad_norm": 0.23513329499397734, |
|
"learning_rate": 2.0017468261825268e-06, |
|
"loss": 0.2118, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.9577242840841167, |
|
"grad_norm": 0.15376591935080916, |
|
"learning_rate": 1.8823835470474395e-06, |
|
"loss": 0.2247, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.9590827582459381, |
|
"grad_norm": 0.3234625822847048, |
|
"learning_rate": 1.766672480613818e-06, |
|
"loss": 0.2229, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.9604412324077596, |
|
"grad_norm": 0.17001324587245673, |
|
"learning_rate": 1.6546157599652613e-06, |
|
"loss": 0.2148, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 0.961799706569581, |
|
"grad_norm": 0.11359209160906845, |
|
"learning_rate": 1.5462154508190108e-06, |
|
"loss": 0.214, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.9631581807314025, |
|
"grad_norm": 0.2409519071516936, |
|
"learning_rate": 1.4414735514879373e-06, |
|
"loss": 0.2118, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 0.9645166548932239, |
|
"grad_norm": 0.21439695271092557, |
|
"learning_rate": 1.3403919928437036e-06, |
|
"loss": 0.2219, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.9658751290550454, |
|
"grad_norm": 0.21370470770092295, |
|
"learning_rate": 1.2429726382812368e-06, |
|
"loss": 0.2147, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 0.9672336032168668, |
|
"grad_norm": 0.2311514348226777, |
|
"learning_rate": 1.149217283684223e-06, |
|
"loss": 0.23, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.9685920773786882, |
|
"grad_norm": 0.29860147103327, |
|
"learning_rate": 1.059127657392156e-06, |
|
"loss": 0.2313, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 0.9699505515405097, |
|
"grad_norm": 0.1814371857599918, |
|
"learning_rate": 9.72705420168407e-07, |
|
"loss": 0.2241, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.9713090257023311, |
|
"grad_norm": 0.31658685339190534, |
|
"learning_rate": 8.899521651695831e-07, |
|
"loss": 0.2207, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.9726674998641526, |
|
"grad_norm": 0.19909543685451667, |
|
"learning_rate": 8.124150923443096e-07, |
|
"loss": 0.2351, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.974025974025974, |
|
"grad_norm": 0.24098233089589674, |
|
"learning_rate": 7.369308575313927e-07, |
|
"loss": 0.2192, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 0.9753844481877955, |
|
"grad_norm": 0.23547731000564504, |
|
"learning_rate": 6.651199513456607e-07, |
|
"loss": 0.2268, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.9767429223496169, |
|
"grad_norm": 0.14817468589671154, |
|
"learning_rate": 5.969836975901366e-07, |
|
"loss": 0.2175, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 0.9781013965114383, |
|
"grad_norm": 0.140443367773756, |
|
"learning_rate": 5.3252335232723e-07, |
|
"loss": 0.2214, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.9794598706732598, |
|
"grad_norm": 0.3119367682303709, |
|
"learning_rate": 4.71740103855578e-07, |
|
"loss": 0.2249, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 0.9808183448350812, |
|
"grad_norm": 0.311395190548215, |
|
"learning_rate": 4.146350726881076e-07, |
|
"loss": 0.2222, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.9821768189969027, |
|
"grad_norm": 0.37040140534262844, |
|
"learning_rate": 3.6120931153138525e-07, |
|
"loss": 0.2361, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 0.9835352931587241, |
|
"grad_norm": 0.2988864815826086, |
|
"learning_rate": 3.114638052662988e-07, |
|
"loss": 0.2207, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.9848937673205456, |
|
"grad_norm": 0.21108645711904683, |
|
"learning_rate": 2.6539947092976135e-07, |
|
"loss": 0.2247, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.986252241482367, |
|
"grad_norm": 0.17869396874533416, |
|
"learning_rate": 2.2301715769783572e-07, |
|
"loss": 0.2231, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.9876107156441885, |
|
"grad_norm": 0.30165958371764556, |
|
"learning_rate": 1.8431764687021347e-07, |
|
"loss": 0.2232, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 0.9889691898060099, |
|
"grad_norm": 0.24564888452021394, |
|
"learning_rate": 1.4930165185564894e-07, |
|
"loss": 0.2135, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.9903276639678313, |
|
"grad_norm": 0.2710306063596811, |
|
"learning_rate": 1.1796981815888064e-07, |
|
"loss": 0.2099, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 0.9916861381296528, |
|
"grad_norm": 0.2109106341757756, |
|
"learning_rate": 9.032272336875203e-08, |
|
"loss": 0.21, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.9930446122914742, |
|
"grad_norm": 0.27834177280791966, |
|
"learning_rate": 6.636087714748662e-08, |
|
"loss": 0.2265, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 0.9944030864532957, |
|
"grad_norm": 0.30415917234664425, |
|
"learning_rate": 4.608472122138441e-08, |
|
"loss": 0.232, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.9957615606151171, |
|
"grad_norm": 0.24438726040214506, |
|
"learning_rate": 2.949462937262837e-08, |
|
"loss": 0.221, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 0.9971200347769386, |
|
"grad_norm": 0.26240483157432476, |
|
"learning_rate": 1.6590907432401104e-08, |
|
"loss": 0.2276, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.99847850893876, |
|
"grad_norm": 0.24193448916634921, |
|
"learning_rate": 7.3737932752226955e-09, |
|
"loss": 0.2229, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 0.9998369831005814, |
|
"grad_norm": 0.308625063910033, |
|
"learning_rate": 1.843456814643041e-09, |
|
"loss": 0.219, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 36806, |
|
"total_flos": 244684510289920.0, |
|
"train_loss": 0.3143004386741001, |
|
"train_runtime": 666350.8913, |
|
"train_samples_per_second": 1.326, |
|
"train_steps_per_second": 0.055 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 36806, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 244684510289920.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|