{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2096774193548387, "eval_steps": 500, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010080645161290322, "grad_norm": 0.9473515748977661, "learning_rate": 4.000000000000001e-06, "loss": 1.9769, "step": 1 }, { "epoch": 0.0020161290322580645, "grad_norm": 0.9036028981208801, "learning_rate": 8.000000000000001e-06, "loss": 1.9331, "step": 2 }, { "epoch": 0.0030241935483870967, "grad_norm": 0.9499556422233582, "learning_rate": 1.2e-05, "loss": 1.9852, "step": 3 }, { "epoch": 0.004032258064516129, "grad_norm": 0.903069019317627, "learning_rate": 1.6000000000000003e-05, "loss": 1.9668, "step": 4 }, { "epoch": 0.005040322580645161, "grad_norm": 0.5635794997215271, "learning_rate": 2e-05, "loss": 1.9327, "step": 5 }, { "epoch": 0.006048387096774193, "grad_norm": 0.9521661996841431, "learning_rate": 2.4e-05, "loss": 2.0026, "step": 6 }, { "epoch": 0.007056451612903226, "grad_norm": 0.4393383860588074, "learning_rate": 2.8000000000000003e-05, "loss": 1.8885, "step": 7 }, { "epoch": 0.008064516129032258, "grad_norm": 0.36857879161834717, "learning_rate": 3.2000000000000005e-05, "loss": 1.8537, "step": 8 }, { "epoch": 0.009072580645161291, "grad_norm": 0.3844268321990967, "learning_rate": 3.6e-05, "loss": 1.8874, "step": 9 }, { "epoch": 0.010080645161290322, "grad_norm": 0.41415101289749146, "learning_rate": 4e-05, "loss": 1.9386, "step": 10 }, { "epoch": 0.011088709677419355, "grad_norm": 0.3869949281215668, "learning_rate": 4.4000000000000006e-05, "loss": 1.9359, "step": 11 }, { "epoch": 0.012096774193548387, "grad_norm": 0.3345952033996582, "learning_rate": 4.8e-05, "loss": 1.903, "step": 12 }, { "epoch": 0.01310483870967742, "grad_norm": 0.3590312600135803, "learning_rate": 5.2000000000000004e-05, "loss": 1.9024, "step": 13 }, { "epoch": 0.014112903225806451, "grad_norm": 0.2288215309381485, "learning_rate": 5.6000000000000006e-05, "loss": 1.8431, "step": 14 }, { "epoch": 0.015120967741935484, "grad_norm": 0.20984530448913574, "learning_rate": 6e-05, "loss": 1.8522, "step": 15 }, { "epoch": 0.016129032258064516, "grad_norm": 0.2080329954624176, "learning_rate": 6.400000000000001e-05, "loss": 1.9895, "step": 16 }, { "epoch": 0.017137096774193547, "grad_norm": 0.20060451328754425, "learning_rate": 6.800000000000001e-05, "loss": 1.8289, "step": 17 }, { "epoch": 0.018145161290322582, "grad_norm": 0.16062042117118835, "learning_rate": 7.2e-05, "loss": 1.8823, "step": 18 }, { "epoch": 0.019153225806451613, "grad_norm": 0.15423905849456787, "learning_rate": 7.6e-05, "loss": 1.7997, "step": 19 }, { "epoch": 0.020161290322580645, "grad_norm": 0.15496863424777985, "learning_rate": 8e-05, "loss": 1.8237, "step": 20 }, { "epoch": 0.021169354838709676, "grad_norm": 0.16305851936340332, "learning_rate": 8.4e-05, "loss": 1.7973, "step": 21 }, { "epoch": 0.02217741935483871, "grad_norm": 0.1680663675069809, "learning_rate": 8.800000000000001e-05, "loss": 1.82, "step": 22 }, { "epoch": 0.023185483870967742, "grad_norm": 0.16471807658672333, "learning_rate": 9.200000000000001e-05, "loss": 1.8314, "step": 23 }, { "epoch": 0.024193548387096774, "grad_norm": 0.13601982593536377, "learning_rate": 9.6e-05, "loss": 1.8488, "step": 24 }, { "epoch": 0.025201612903225805, "grad_norm": 0.12553684413433075, "learning_rate": 0.0001, "loss": 1.839, "step": 25 }, { "epoch": 0.02620967741935484, "grad_norm": 0.12679991126060486, "learning_rate": 0.00010400000000000001, "loss": 1.8615, "step": 26 }, { "epoch": 0.02721774193548387, "grad_norm": 0.1284348964691162, "learning_rate": 0.00010800000000000001, "loss": 1.8215, "step": 27 }, { "epoch": 0.028225806451612902, "grad_norm": 0.11629381030797958, "learning_rate": 0.00011200000000000001, "loss": 1.8536, "step": 28 }, { "epoch": 0.029233870967741934, "grad_norm": 0.10016848891973495, "learning_rate": 0.000116, "loss": 1.8095, "step": 29 }, { "epoch": 0.03024193548387097, "grad_norm": 0.10154619067907333, "learning_rate": 0.00012, "loss": 1.8355, "step": 30 }, { "epoch": 0.03125, "grad_norm": 0.11825895309448242, "learning_rate": 0.000124, "loss": 1.7984, "step": 31 }, { "epoch": 0.03225806451612903, "grad_norm": 0.104405976831913, "learning_rate": 0.00012800000000000002, "loss": 1.7673, "step": 32 }, { "epoch": 0.03326612903225806, "grad_norm": 0.09943860024213791, "learning_rate": 0.000132, "loss": 1.813, "step": 33 }, { "epoch": 0.034274193548387094, "grad_norm": 0.10970743000507355, "learning_rate": 0.00013600000000000003, "loss": 1.9213, "step": 34 }, { "epoch": 0.03528225806451613, "grad_norm": 0.1049584224820137, "learning_rate": 0.00014, "loss": 1.7818, "step": 35 }, { "epoch": 0.036290322580645164, "grad_norm": 0.08986247330904007, "learning_rate": 0.000144, "loss": 1.7944, "step": 36 }, { "epoch": 0.037298387096774195, "grad_norm": 0.09243710339069366, "learning_rate": 0.000148, "loss": 1.7158, "step": 37 }, { "epoch": 0.038306451612903226, "grad_norm": 0.10768643021583557, "learning_rate": 0.000152, "loss": 1.8295, "step": 38 }, { "epoch": 0.03931451612903226, "grad_norm": 0.07883578538894653, "learning_rate": 0.00015600000000000002, "loss": 1.757, "step": 39 }, { "epoch": 0.04032258064516129, "grad_norm": 0.10219922661781311, "learning_rate": 0.00016, "loss": 1.7423, "step": 40 }, { "epoch": 0.04133064516129032, "grad_norm": 0.08045803755521774, "learning_rate": 0.000164, "loss": 1.7649, "step": 41 }, { "epoch": 0.04233870967741935, "grad_norm": 0.07191110402345657, "learning_rate": 0.000168, "loss": 1.7441, "step": 42 }, { "epoch": 0.04334677419354839, "grad_norm": 0.08571028709411621, "learning_rate": 0.000172, "loss": 1.8094, "step": 43 }, { "epoch": 0.04435483870967742, "grad_norm": 0.08775891363620758, "learning_rate": 0.00017600000000000002, "loss": 1.817, "step": 44 }, { "epoch": 0.04536290322580645, "grad_norm": 0.08328275382518768, "learning_rate": 0.00018, "loss": 1.7753, "step": 45 }, { "epoch": 0.046370967741935484, "grad_norm": 0.08221882581710815, "learning_rate": 0.00018400000000000003, "loss": 1.7824, "step": 46 }, { "epoch": 0.047379032258064516, "grad_norm": 0.0885847732424736, "learning_rate": 0.000188, "loss": 1.7423, "step": 47 }, { "epoch": 0.04838709677419355, "grad_norm": 0.08126149326562881, "learning_rate": 0.000192, "loss": 1.7495, "step": 48 }, { "epoch": 0.04939516129032258, "grad_norm": 0.08296285569667816, "learning_rate": 0.000196, "loss": 1.6909, "step": 49 }, { "epoch": 0.05040322580645161, "grad_norm": 0.09005258232355118, "learning_rate": 0.0002, "loss": 1.8159, "step": 50 }, { "epoch": 0.05141129032258065, "grad_norm": 0.08956532180309296, "learning_rate": 0.00019999986806600454, "loss": 1.6662, "step": 51 }, { "epoch": 0.05241935483870968, "grad_norm": 0.08471240848302841, "learning_rate": 0.00019999947226436628, "loss": 1.8274, "step": 52 }, { "epoch": 0.05342741935483871, "grad_norm": 0.09117641299962997, "learning_rate": 0.00019999881259612963, "loss": 1.7027, "step": 53 }, { "epoch": 0.05443548387096774, "grad_norm": 0.08552085608243942, "learning_rate": 0.00019999788906303518, "loss": 1.7738, "step": 54 }, { "epoch": 0.055443548387096774, "grad_norm": 0.07708004862070084, "learning_rate": 0.00019999670166751993, "loss": 1.7821, "step": 55 }, { "epoch": 0.056451612903225805, "grad_norm": 0.07826384156942368, "learning_rate": 0.000199995250412717, "loss": 1.7579, "step": 56 }, { "epoch": 0.057459677419354836, "grad_norm": 0.0721641331911087, "learning_rate": 0.00019999353530245572, "loss": 1.7372, "step": 57 }, { "epoch": 0.05846774193548387, "grad_norm": 0.07667742669582367, "learning_rate": 0.0001999915563412618, "loss": 1.7323, "step": 58 }, { "epoch": 0.059475806451612906, "grad_norm": 0.10455285757780075, "learning_rate": 0.00019998931353435709, "loss": 1.8221, "step": 59 }, { "epoch": 0.06048387096774194, "grad_norm": 0.07621350884437561, "learning_rate": 0.00019998680688765959, "loss": 1.7305, "step": 60 }, { "epoch": 0.06149193548387097, "grad_norm": 0.08454013615846634, "learning_rate": 0.00019998403640778358, "loss": 1.7558, "step": 61 }, { "epoch": 0.0625, "grad_norm": 0.08005455136299133, "learning_rate": 0.00019998100210203942, "loss": 1.6703, "step": 62 }, { "epoch": 0.06350806451612903, "grad_norm": 0.09527427703142166, "learning_rate": 0.0001999777039784337, "loss": 1.7896, "step": 63 }, { "epoch": 0.06451612903225806, "grad_norm": 0.10536834597587585, "learning_rate": 0.00019997414204566915, "loss": 1.7909, "step": 64 }, { "epoch": 0.0655241935483871, "grad_norm": 0.08326593041419983, "learning_rate": 0.0001999703163131445, "loss": 1.7501, "step": 65 }, { "epoch": 0.06653225806451613, "grad_norm": 0.0823182687163353, "learning_rate": 0.00019996622679095468, "loss": 1.7625, "step": 66 }, { "epoch": 0.06754032258064516, "grad_norm": 0.07878896594047546, "learning_rate": 0.00019996187348989063, "loss": 1.7235, "step": 67 }, { "epoch": 0.06854838709677419, "grad_norm": 0.0899212434887886, "learning_rate": 0.0001999572564214393, "loss": 1.7685, "step": 68 }, { "epoch": 0.06955645161290322, "grad_norm": 0.07247278839349747, "learning_rate": 0.00019995237559778363, "loss": 1.6281, "step": 69 }, { "epoch": 0.07056451612903226, "grad_norm": 0.08588135987520218, "learning_rate": 0.00019994723103180265, "loss": 1.7785, "step": 70 }, { "epoch": 0.0715725806451613, "grad_norm": 0.12004637718200684, "learning_rate": 0.00019994182273707107, "loss": 1.7552, "step": 71 }, { "epoch": 0.07258064516129033, "grad_norm": 0.1002095490694046, "learning_rate": 0.00019993615072785978, "loss": 1.715, "step": 72 }, { "epoch": 0.07358870967741936, "grad_norm": 0.07339724153280258, "learning_rate": 0.00019993021501913536, "loss": 1.7019, "step": 73 }, { "epoch": 0.07459677419354839, "grad_norm": 0.1305348128080368, "learning_rate": 0.00019992401562656022, "loss": 1.8078, "step": 74 }, { "epoch": 0.07560483870967742, "grad_norm": 0.09164395183324814, "learning_rate": 0.0001999175525664926, "loss": 1.6756, "step": 75 }, { "epoch": 0.07661290322580645, "grad_norm": 0.0749751552939415, "learning_rate": 0.0001999108258559864, "loss": 1.7616, "step": 76 }, { "epoch": 0.07762096774193548, "grad_norm": 0.1132885217666626, "learning_rate": 0.00019990383551279136, "loss": 1.8232, "step": 77 }, { "epoch": 0.07862903225806452, "grad_norm": 0.0832655057311058, "learning_rate": 0.00019989658155535262, "loss": 1.7371, "step": 78 }, { "epoch": 0.07963709677419355, "grad_norm": 0.09641417115926743, "learning_rate": 0.00019988906400281116, "loss": 1.7989, "step": 79 }, { "epoch": 0.08064516129032258, "grad_norm": 0.08800283074378967, "learning_rate": 0.00019988128287500335, "loss": 1.7235, "step": 80 }, { "epoch": 0.08165322580645161, "grad_norm": 0.0772438570857048, "learning_rate": 0.00019987323819246108, "loss": 1.7488, "step": 81 }, { "epoch": 0.08266129032258064, "grad_norm": 0.09178374707698822, "learning_rate": 0.00019986492997641175, "loss": 1.7018, "step": 82 }, { "epoch": 0.08366935483870967, "grad_norm": 0.09313932806253433, "learning_rate": 0.00019985635824877802, "loss": 1.7914, "step": 83 }, { "epoch": 0.0846774193548387, "grad_norm": 0.0906209945678711, "learning_rate": 0.00019984752303217797, "loss": 1.7197, "step": 84 }, { "epoch": 0.08568548387096774, "grad_norm": 0.09081698209047318, "learning_rate": 0.0001998384243499249, "loss": 1.7666, "step": 85 }, { "epoch": 0.08669354838709678, "grad_norm": 0.07680635154247284, "learning_rate": 0.0001998290622260273, "loss": 1.6946, "step": 86 }, { "epoch": 0.08770161290322581, "grad_norm": 0.0743766576051712, "learning_rate": 0.00019981943668518888, "loss": 1.7588, "step": 87 }, { "epoch": 0.08870967741935484, "grad_norm": 0.07674787193536758, "learning_rate": 0.00019980954775280832, "loss": 1.6896, "step": 88 }, { "epoch": 0.08971774193548387, "grad_norm": 0.07708673924207687, "learning_rate": 0.00019979939545497933, "loss": 1.6944, "step": 89 }, { "epoch": 0.0907258064516129, "grad_norm": 0.07248947024345398, "learning_rate": 0.00019978897981849056, "loss": 1.7114, "step": 90 }, { "epoch": 0.09173387096774194, "grad_norm": 0.07939179986715317, "learning_rate": 0.0001997783008708256, "loss": 1.7552, "step": 91 }, { "epoch": 0.09274193548387097, "grad_norm": 0.09288234263658524, "learning_rate": 0.00019976735864016276, "loss": 1.7554, "step": 92 }, { "epoch": 0.09375, "grad_norm": 0.08074582368135452, "learning_rate": 0.00019975615315537506, "loss": 1.7209, "step": 93 }, { "epoch": 0.09475806451612903, "grad_norm": 0.08087307959794998, "learning_rate": 0.0001997446844460302, "loss": 1.7118, "step": 94 }, { "epoch": 0.09576612903225806, "grad_norm": 0.08976717293262482, "learning_rate": 0.00019973295254239044, "loss": 1.7384, "step": 95 }, { "epoch": 0.0967741935483871, "grad_norm": 0.08545631170272827, "learning_rate": 0.0001997209574754125, "loss": 1.7524, "step": 96 }, { "epoch": 0.09778225806451613, "grad_norm": 0.07703512907028198, "learning_rate": 0.00019970869927674753, "loss": 1.6947, "step": 97 }, { "epoch": 0.09879032258064516, "grad_norm": 0.07614375650882721, "learning_rate": 0.000199696177978741, "loss": 1.7135, "step": 98 }, { "epoch": 0.09979838709677419, "grad_norm": 0.0809471607208252, "learning_rate": 0.0001996833936144326, "loss": 1.727, "step": 99 }, { "epoch": 0.10080645161290322, "grad_norm": 0.1023879274725914, "learning_rate": 0.00019967034621755622, "loss": 1.7297, "step": 100 }, { "epoch": 0.10181451612903226, "grad_norm": 0.07705037295818329, "learning_rate": 0.00019965703582253965, "loss": 1.6571, "step": 101 }, { "epoch": 0.1028225806451613, "grad_norm": 0.08601151406764984, "learning_rate": 0.00019964346246450487, "loss": 1.7404, "step": 102 }, { "epoch": 0.10383064516129033, "grad_norm": 0.0756453350186348, "learning_rate": 0.00019962962617926756, "loss": 1.7311, "step": 103 }, { "epoch": 0.10483870967741936, "grad_norm": 0.10456051677465439, "learning_rate": 0.00019961552700333734, "loss": 1.7517, "step": 104 }, { "epoch": 0.10584677419354839, "grad_norm": 0.07731463760137558, "learning_rate": 0.00019960116497391733, "loss": 1.716, "step": 105 }, { "epoch": 0.10685483870967742, "grad_norm": 0.0789295881986618, "learning_rate": 0.00019958654012890435, "loss": 1.7233, "step": 106 }, { "epoch": 0.10786290322580645, "grad_norm": 0.08179011940956116, "learning_rate": 0.0001995716525068887, "loss": 1.6556, "step": 107 }, { "epoch": 0.10887096774193548, "grad_norm": 0.08565866947174072, "learning_rate": 0.00019955650214715406, "loss": 1.7512, "step": 108 }, { "epoch": 0.10987903225806452, "grad_norm": 0.08556907624006271, "learning_rate": 0.00019954108908967736, "loss": 1.7522, "step": 109 }, { "epoch": 0.11088709677419355, "grad_norm": 0.08097026497125626, "learning_rate": 0.00019952541337512868, "loss": 1.6656, "step": 110 }, { "epoch": 0.11189516129032258, "grad_norm": 0.07853402197360992, "learning_rate": 0.0001995094750448713, "loss": 1.7299, "step": 111 }, { "epoch": 0.11290322580645161, "grad_norm": 0.07205012440681458, "learning_rate": 0.00019949327414096134, "loss": 1.7118, "step": 112 }, { "epoch": 0.11391129032258064, "grad_norm": 0.0683959424495697, "learning_rate": 0.00019947681070614777, "loss": 1.6742, "step": 113 }, { "epoch": 0.11491935483870967, "grad_norm": 0.07890711724758148, "learning_rate": 0.00019946008478387238, "loss": 1.6962, "step": 114 }, { "epoch": 0.1159274193548387, "grad_norm": 0.08321288973093033, "learning_rate": 0.00019944309641826947, "loss": 1.7552, "step": 115 }, { "epoch": 0.11693548387096774, "grad_norm": 0.0974084734916687, "learning_rate": 0.0001994258456541659, "loss": 1.7971, "step": 116 }, { "epoch": 0.11794354838709678, "grad_norm": 0.08591660857200623, "learning_rate": 0.00019940833253708097, "loss": 1.7644, "step": 117 }, { "epoch": 0.11895161290322581, "grad_norm": 0.07388189435005188, "learning_rate": 0.00019939055711322616, "loss": 1.6513, "step": 118 }, { "epoch": 0.11995967741935484, "grad_norm": 0.07635471969842911, "learning_rate": 0.00019937251942950512, "loss": 1.7005, "step": 119 }, { "epoch": 0.12096774193548387, "grad_norm": 0.08252502232789993, "learning_rate": 0.0001993542195335135, "loss": 1.7267, "step": 120 }, { "epoch": 0.1219758064516129, "grad_norm": 0.10845799744129181, "learning_rate": 0.0001993356574735389, "loss": 1.7756, "step": 121 }, { "epoch": 0.12298387096774194, "grad_norm": 0.07942607253789902, "learning_rate": 0.00019931683329856066, "loss": 1.6849, "step": 122 }, { "epoch": 0.12399193548387097, "grad_norm": 0.08841695636510849, "learning_rate": 0.00019929774705824973, "loss": 1.7343, "step": 123 }, { "epoch": 0.125, "grad_norm": 0.09001098573207855, "learning_rate": 0.0001992783988029686, "loss": 1.7534, "step": 124 }, { "epoch": 0.12600806451612903, "grad_norm": 0.07412228733301163, "learning_rate": 0.00019925878858377113, "loss": 1.7125, "step": 125 }, { "epoch": 0.12701612903225806, "grad_norm": 0.09205227345228195, "learning_rate": 0.00019923891645240238, "loss": 1.6712, "step": 126 }, { "epoch": 0.1280241935483871, "grad_norm": 0.07850176095962524, "learning_rate": 0.00019921878246129858, "loss": 1.6747, "step": 127 }, { "epoch": 0.12903225806451613, "grad_norm": 0.07801543176174164, "learning_rate": 0.00019919838666358688, "loss": 1.6799, "step": 128 }, { "epoch": 0.13004032258064516, "grad_norm": 0.08263793587684631, "learning_rate": 0.00019917772911308524, "loss": 1.7368, "step": 129 }, { "epoch": 0.1310483870967742, "grad_norm": 0.10233369469642639, "learning_rate": 0.00019915680986430233, "loss": 1.7377, "step": 130 }, { "epoch": 0.13205645161290322, "grad_norm": 0.08960834890604019, "learning_rate": 0.00019913562897243736, "loss": 1.7146, "step": 131 }, { "epoch": 0.13306451612903225, "grad_norm": 0.07425748556852341, "learning_rate": 0.00019911418649337997, "loss": 1.6796, "step": 132 }, { "epoch": 0.13407258064516128, "grad_norm": 0.11380482465028763, "learning_rate": 0.00019909248248370988, "loss": 1.7688, "step": 133 }, { "epoch": 0.1350806451612903, "grad_norm": 0.09946684539318085, "learning_rate": 0.00019907051700069714, "loss": 1.7016, "step": 134 }, { "epoch": 0.13608870967741934, "grad_norm": 0.07686997205018997, "learning_rate": 0.0001990482901023016, "loss": 1.7209, "step": 135 }, { "epoch": 0.13709677419354838, "grad_norm": 0.08980387449264526, "learning_rate": 0.0001990258018471729, "loss": 1.6922, "step": 136 }, { "epoch": 0.1381048387096774, "grad_norm": 0.08946418762207031, "learning_rate": 0.00019900305229465036, "loss": 1.7231, "step": 137 }, { "epoch": 0.13911290322580644, "grad_norm": 0.07228976488113403, "learning_rate": 0.00019898004150476278, "loss": 1.6864, "step": 138 }, { "epoch": 0.14012096774193547, "grad_norm": 0.09577012807130814, "learning_rate": 0.00019895676953822822, "loss": 1.6812, "step": 139 }, { "epoch": 0.14112903225806453, "grad_norm": 0.08688167482614517, "learning_rate": 0.00019893323645645404, "loss": 1.738, "step": 140 }, { "epoch": 0.14213709677419356, "grad_norm": 0.07488682866096497, "learning_rate": 0.00019890944232153643, "loss": 1.6202, "step": 141 }, { "epoch": 0.1431451612903226, "grad_norm": 0.09752912074327469, "learning_rate": 0.00019888538719626053, "loss": 1.7006, "step": 142 }, { "epoch": 0.14415322580645162, "grad_norm": 0.08033961057662964, "learning_rate": 0.0001988610711441001, "loss": 1.7119, "step": 143 }, { "epoch": 0.14516129032258066, "grad_norm": 0.07507845759391785, "learning_rate": 0.00019883649422921745, "loss": 1.6504, "step": 144 }, { "epoch": 0.1461693548387097, "grad_norm": 0.07756344974040985, "learning_rate": 0.00019881165651646317, "loss": 1.7107, "step": 145 }, { "epoch": 0.14717741935483872, "grad_norm": 0.07581036537885666, "learning_rate": 0.00019878655807137603, "loss": 1.6777, "step": 146 }, { "epoch": 0.14818548387096775, "grad_norm": 0.06943333894014359, "learning_rate": 0.0001987611989601828, "loss": 1.6282, "step": 147 }, { "epoch": 0.14919354838709678, "grad_norm": 0.07314992696046829, "learning_rate": 0.00019873557924979804, "loss": 1.6773, "step": 148 }, { "epoch": 0.1502016129032258, "grad_norm": 0.08181635290384293, "learning_rate": 0.000198709699007824, "loss": 1.668, "step": 149 }, { "epoch": 0.15120967741935484, "grad_norm": 0.07046262919902802, "learning_rate": 0.00019868355830255033, "loss": 1.6857, "step": 150 }, { "epoch": 0.15221774193548387, "grad_norm": 0.07162804901599884, "learning_rate": 0.00019865715720295397, "loss": 1.6299, "step": 151 }, { "epoch": 0.1532258064516129, "grad_norm": 0.0785004273056984, "learning_rate": 0.00019863049577869898, "loss": 1.6651, "step": 152 }, { "epoch": 0.15423387096774194, "grad_norm": 0.06895990669727325, "learning_rate": 0.00019860357410013638, "loss": 1.636, "step": 153 }, { "epoch": 0.15524193548387097, "grad_norm": 0.0736781507730484, "learning_rate": 0.00019857639223830377, "loss": 1.6859, "step": 154 }, { "epoch": 0.15625, "grad_norm": 0.07190602272748947, "learning_rate": 0.00019854895026492545, "loss": 1.706, "step": 155 }, { "epoch": 0.15725806451612903, "grad_norm": 0.07781372219324112, "learning_rate": 0.00019852124825241201, "loss": 1.7015, "step": 156 }, { "epoch": 0.15826612903225806, "grad_norm": 0.08466929197311401, "learning_rate": 0.0001984932862738601, "loss": 1.6684, "step": 157 }, { "epoch": 0.1592741935483871, "grad_norm": 0.08189702033996582, "learning_rate": 0.00019846506440305257, "loss": 1.6914, "step": 158 }, { "epoch": 0.16028225806451613, "grad_norm": 0.08032141625881195, "learning_rate": 0.00019843658271445776, "loss": 1.6574, "step": 159 }, { "epoch": 0.16129032258064516, "grad_norm": 0.08438081294298172, "learning_rate": 0.00019840784128322985, "loss": 1.7503, "step": 160 }, { "epoch": 0.1622983870967742, "grad_norm": 0.10350456833839417, "learning_rate": 0.0001983788401852082, "loss": 1.697, "step": 161 }, { "epoch": 0.16330645161290322, "grad_norm": 0.08714311569929123, "learning_rate": 0.00019834957949691747, "loss": 1.7595, "step": 162 }, { "epoch": 0.16431451612903225, "grad_norm": 0.08562017232179642, "learning_rate": 0.00019832005929556722, "loss": 1.7502, "step": 163 }, { "epoch": 0.16532258064516128, "grad_norm": 0.0961882621049881, "learning_rate": 0.00019829027965905186, "loss": 1.6875, "step": 164 }, { "epoch": 0.1663306451612903, "grad_norm": 0.09505471587181091, "learning_rate": 0.00019826024066595027, "loss": 1.6958, "step": 165 }, { "epoch": 0.16733870967741934, "grad_norm": 0.07493823021650314, "learning_rate": 0.00019822994239552573, "loss": 1.6677, "step": 166 }, { "epoch": 0.16834677419354838, "grad_norm": 0.09159812331199646, "learning_rate": 0.00019819938492772568, "loss": 1.6994, "step": 167 }, { "epoch": 0.1693548387096774, "grad_norm": 0.1118432804942131, "learning_rate": 0.00019816856834318155, "loss": 1.7143, "step": 168 }, { "epoch": 0.17036290322580644, "grad_norm": 0.09199640899896622, "learning_rate": 0.0001981374927232084, "loss": 1.6896, "step": 169 }, { "epoch": 0.17137096774193547, "grad_norm": 0.0801042765378952, "learning_rate": 0.00019810615814980483, "loss": 1.7292, "step": 170 }, { "epoch": 0.17237903225806453, "grad_norm": 0.1115993857383728, "learning_rate": 0.00019807456470565283, "loss": 1.6995, "step": 171 }, { "epoch": 0.17338709677419356, "grad_norm": 0.10155931115150452, "learning_rate": 0.00019804271247411727, "loss": 1.6984, "step": 172 }, { "epoch": 0.1743951612903226, "grad_norm": 0.07809167355298996, "learning_rate": 0.00019801060153924608, "loss": 1.7152, "step": 173 }, { "epoch": 0.17540322580645162, "grad_norm": 0.08765136450529099, "learning_rate": 0.0001979782319857697, "loss": 1.6451, "step": 174 }, { "epoch": 0.17641129032258066, "grad_norm": 0.07360592484474182, "learning_rate": 0.00019794560389910102, "loss": 1.6889, "step": 175 }, { "epoch": 0.1774193548387097, "grad_norm": 0.09308324754238129, "learning_rate": 0.00019791271736533512, "loss": 1.7225, "step": 176 }, { "epoch": 0.17842741935483872, "grad_norm": 0.08810586482286453, "learning_rate": 0.00019787957247124907, "loss": 1.6808, "step": 177 }, { "epoch": 0.17943548387096775, "grad_norm": 0.07750339061021805, "learning_rate": 0.00019784616930430157, "loss": 1.6324, "step": 178 }, { "epoch": 0.18044354838709678, "grad_norm": 0.08474040031433105, "learning_rate": 0.00019781250795263295, "loss": 1.6858, "step": 179 }, { "epoch": 0.1814516129032258, "grad_norm": 0.08277326822280884, "learning_rate": 0.0001977785885050647, "loss": 1.7043, "step": 180 }, { "epoch": 0.18245967741935484, "grad_norm": 0.07668858766555786, "learning_rate": 0.00019774441105109943, "loss": 1.6599, "step": 181 }, { "epoch": 0.18346774193548387, "grad_norm": 0.07402200996875763, "learning_rate": 0.00019770997568092046, "loss": 1.6524, "step": 182 }, { "epoch": 0.1844758064516129, "grad_norm": 0.08267819881439209, "learning_rate": 0.0001976752824853917, "loss": 1.6973, "step": 183 }, { "epoch": 0.18548387096774194, "grad_norm": 0.0688646137714386, "learning_rate": 0.00019764033155605747, "loss": 1.63, "step": 184 }, { "epoch": 0.18649193548387097, "grad_norm": 0.0818399116396904, "learning_rate": 0.00019760512298514198, "loss": 1.6773, "step": 185 }, { "epoch": 0.1875, "grad_norm": 0.08086924254894257, "learning_rate": 0.0001975696568655494, "loss": 1.7037, "step": 186 }, { "epoch": 0.18850806451612903, "grad_norm": 0.08136597275733948, "learning_rate": 0.00019753393329086354, "loss": 1.6634, "step": 187 }, { "epoch": 0.18951612903225806, "grad_norm": 0.10008742660284042, "learning_rate": 0.00019749795235534737, "loss": 1.7139, "step": 188 }, { "epoch": 0.1905241935483871, "grad_norm": 0.08657586574554443, "learning_rate": 0.0001974617141539432, "loss": 1.6877, "step": 189 }, { "epoch": 0.19153225806451613, "grad_norm": 0.09825193136930466, "learning_rate": 0.0001974252187822719, "loss": 1.7274, "step": 190 }, { "epoch": 0.19254032258064516, "grad_norm": 0.06964825093746185, "learning_rate": 0.00019738846633663318, "loss": 1.6431, "step": 191 }, { "epoch": 0.1935483870967742, "grad_norm": 0.07197541743516922, "learning_rate": 0.0001973514569140049, "loss": 1.6532, "step": 192 }, { "epoch": 0.19455645161290322, "grad_norm": 0.07691382616758347, "learning_rate": 0.00019731419061204316, "loss": 1.6816, "step": 193 }, { "epoch": 0.19556451612903225, "grad_norm": 0.08229187875986099, "learning_rate": 0.00019727666752908173, "loss": 1.6471, "step": 194 }, { "epoch": 0.19657258064516128, "grad_norm": 0.0788332000374794, "learning_rate": 0.00019723888776413206, "loss": 1.6745, "step": 195 }, { "epoch": 0.1975806451612903, "grad_norm": 0.08446817100048065, "learning_rate": 0.00019720085141688285, "loss": 1.6863, "step": 196 }, { "epoch": 0.19858870967741934, "grad_norm": 0.0747678205370903, "learning_rate": 0.00019716255858769982, "loss": 1.6553, "step": 197 }, { "epoch": 0.19959677419354838, "grad_norm": 0.08248293399810791, "learning_rate": 0.0001971240093776255, "loss": 1.7021, "step": 198 }, { "epoch": 0.2006048387096774, "grad_norm": 0.0832241103053093, "learning_rate": 0.00019708520388837897, "loss": 1.6832, "step": 199 }, { "epoch": 0.20161290322580644, "grad_norm": 0.10792431235313416, "learning_rate": 0.00019704614222235543, "loss": 1.7196, "step": 200 }, { "epoch": 0.20262096774193547, "grad_norm": 0.09173596650362015, "learning_rate": 0.0001970068244826261, "loss": 1.7039, "step": 201 }, { "epoch": 0.20362903225806453, "grad_norm": 0.07657129317522049, "learning_rate": 0.00019696725077293796, "loss": 1.6614, "step": 202 }, { "epoch": 0.20463709677419356, "grad_norm": 0.08881079405546188, "learning_rate": 0.00019692742119771338, "loss": 1.7062, "step": 203 }, { "epoch": 0.2056451612903226, "grad_norm": 0.11070767790079117, "learning_rate": 0.00019688733586204976, "loss": 1.7759, "step": 204 }, { "epoch": 0.20665322580645162, "grad_norm": 0.07556972652673721, "learning_rate": 0.00019684699487171957, "loss": 1.6664, "step": 205 }, { "epoch": 0.20766129032258066, "grad_norm": 0.11293460428714752, "learning_rate": 0.00019680639833316975, "loss": 1.7476, "step": 206 }, { "epoch": 0.2086693548387097, "grad_norm": 0.08948105573654175, "learning_rate": 0.00019676554635352154, "loss": 1.6933, "step": 207 }, { "epoch": 0.20967741935483872, "grad_norm": 0.1004069596529007, "learning_rate": 0.00019672443904057024, "loss": 1.6909, "step": 208 }, { "epoch": 0.21068548387096775, "grad_norm": 0.0815928652882576, "learning_rate": 0.00019668307650278492, "loss": 1.6881, "step": 209 }, { "epoch": 0.21169354838709678, "grad_norm": 0.10198971629142761, "learning_rate": 0.00019664145884930808, "loss": 1.6653, "step": 210 }, { "epoch": 0.2127016129032258, "grad_norm": 0.07174786180257797, "learning_rate": 0.00019659958618995532, "loss": 1.6204, "step": 211 }, { "epoch": 0.21370967741935484, "grad_norm": 0.09819284826517105, "learning_rate": 0.0001965574586352153, "loss": 1.6574, "step": 212 }, { "epoch": 0.21471774193548387, "grad_norm": 0.07578348368406296, "learning_rate": 0.00019651507629624902, "loss": 1.7096, "step": 213 }, { "epoch": 0.2157258064516129, "grad_norm": 0.09160558879375458, "learning_rate": 0.00019647243928489, "loss": 1.673, "step": 214 }, { "epoch": 0.21673387096774194, "grad_norm": 0.07697172462940216, "learning_rate": 0.00019642954771364362, "loss": 1.7069, "step": 215 }, { "epoch": 0.21774193548387097, "grad_norm": 0.0956280305981636, "learning_rate": 0.00019638640169568702, "loss": 1.6727, "step": 216 }, { "epoch": 0.21875, "grad_norm": 0.0775306299328804, "learning_rate": 0.00019634300134486877, "loss": 1.6846, "step": 217 }, { "epoch": 0.21975806451612903, "grad_norm": 0.11724736541509628, "learning_rate": 0.00019629934677570848, "loss": 1.6723, "step": 218 }, { "epoch": 0.22076612903225806, "grad_norm": 0.08374209702014923, "learning_rate": 0.00019625543810339652, "loss": 1.6552, "step": 219 }, { "epoch": 0.2217741935483871, "grad_norm": 0.09895430505275726, "learning_rate": 0.00019621127544379392, "loss": 1.6843, "step": 220 }, { "epoch": 0.22278225806451613, "grad_norm": 0.07595435529947281, "learning_rate": 0.00019616685891343173, "loss": 1.6878, "step": 221 }, { "epoch": 0.22379032258064516, "grad_norm": 0.10327397286891937, "learning_rate": 0.00019612218862951098, "loss": 1.641, "step": 222 }, { "epoch": 0.2247983870967742, "grad_norm": 0.08979543298482895, "learning_rate": 0.00019607726470990229, "loss": 1.7116, "step": 223 }, { "epoch": 0.22580645161290322, "grad_norm": 0.08411210030317307, "learning_rate": 0.00019603208727314543, "loss": 1.6503, "step": 224 }, { "epoch": 0.22681451612903225, "grad_norm": 0.08849965780973434, "learning_rate": 0.00019598665643844924, "loss": 1.7119, "step": 225 }, { "epoch": 0.22782258064516128, "grad_norm": 0.08358252048492432, "learning_rate": 0.00019594097232569118, "loss": 1.7034, "step": 226 }, { "epoch": 0.2288306451612903, "grad_norm": 0.08862830698490143, "learning_rate": 0.0001958950350554169, "loss": 1.6937, "step": 227 }, { "epoch": 0.22983870967741934, "grad_norm": 0.09029026329517365, "learning_rate": 0.00019584884474884025, "loss": 1.6537, "step": 228 }, { "epoch": 0.23084677419354838, "grad_norm": 0.0766313225030899, "learning_rate": 0.00019580240152784265, "loss": 1.6399, "step": 229 }, { "epoch": 0.2318548387096774, "grad_norm": 0.09331216663122177, "learning_rate": 0.00019575570551497287, "loss": 1.6876, "step": 230 }, { "epoch": 0.23286290322580644, "grad_norm": 0.07506153732538223, "learning_rate": 0.00019570875683344672, "loss": 1.6339, "step": 231 }, { "epoch": 0.23387096774193547, "grad_norm": 0.08822404593229294, "learning_rate": 0.0001956615556071468, "loss": 1.6883, "step": 232 }, { "epoch": 0.23487903225806453, "grad_norm": 0.07617950439453125, "learning_rate": 0.000195614101960622, "loss": 1.6845, "step": 233 }, { "epoch": 0.23588709677419356, "grad_norm": 0.0857347846031189, "learning_rate": 0.00019556639601908728, "loss": 1.6769, "step": 234 }, { "epoch": 0.2368951612903226, "grad_norm": 0.08155297487974167, "learning_rate": 0.00019551843790842338, "loss": 1.7275, "step": 235 }, { "epoch": 0.23790322580645162, "grad_norm": 0.08427773416042328, "learning_rate": 0.00019547022775517645, "loss": 1.627, "step": 236 }, { "epoch": 0.23891129032258066, "grad_norm": 0.0765247493982315, "learning_rate": 0.00019542176568655757, "loss": 1.6719, "step": 237 }, { "epoch": 0.2399193548387097, "grad_norm": 0.07752780616283417, "learning_rate": 0.00019537305183044268, "loss": 1.6307, "step": 238 }, { "epoch": 0.24092741935483872, "grad_norm": 0.07956812530755997, "learning_rate": 0.00019532408631537203, "loss": 1.6466, "step": 239 }, { "epoch": 0.24193548387096775, "grad_norm": 0.07456839084625244, "learning_rate": 0.00019527486927054994, "loss": 1.6692, "step": 240 }, { "epoch": 0.24294354838709678, "grad_norm": 0.08381907641887665, "learning_rate": 0.00019522540082584443, "loss": 1.679, "step": 241 }, { "epoch": 0.2439516129032258, "grad_norm": 0.07443513721227646, "learning_rate": 0.0001951756811117869, "loss": 1.6867, "step": 242 }, { "epoch": 0.24495967741935484, "grad_norm": 0.08541234582662582, "learning_rate": 0.00019512571025957182, "loss": 1.6424, "step": 243 }, { "epoch": 0.24596774193548387, "grad_norm": 0.07867056876420975, "learning_rate": 0.00019507548840105618, "loss": 1.6847, "step": 244 }, { "epoch": 0.2469758064516129, "grad_norm": 0.11804165691137314, "learning_rate": 0.00019502501566875943, "loss": 1.783, "step": 245 }, { "epoch": 0.24798387096774194, "grad_norm": 0.0737847164273262, "learning_rate": 0.00019497429219586296, "loss": 1.6644, "step": 246 }, { "epoch": 0.24899193548387097, "grad_norm": 0.08608712255954742, "learning_rate": 0.00019492331811620976, "loss": 1.6763, "step": 247 }, { "epoch": 0.25, "grad_norm": 0.09786904603242874, "learning_rate": 0.00019487209356430413, "loss": 1.7245, "step": 248 }, { "epoch": 0.25100806451612906, "grad_norm": 0.10795535892248154, "learning_rate": 0.00019482061867531127, "loss": 1.7183, "step": 249 }, { "epoch": 0.25201612903225806, "grad_norm": 0.0815276950597763, "learning_rate": 0.0001947688935850569, "loss": 1.7026, "step": 250 }, { "epoch": 0.2530241935483871, "grad_norm": 0.09202085435390472, "learning_rate": 0.00019471691843002701, "loss": 1.6327, "step": 251 }, { "epoch": 0.2540322580645161, "grad_norm": 0.08682993054389954, "learning_rate": 0.00019466469334736739, "loss": 1.6532, "step": 252 }, { "epoch": 0.2550403225806452, "grad_norm": 0.08007092773914337, "learning_rate": 0.00019461221847488333, "loss": 1.6587, "step": 253 }, { "epoch": 0.2560483870967742, "grad_norm": 0.12094767391681671, "learning_rate": 0.0001945594939510392, "loss": 1.7491, "step": 254 }, { "epoch": 0.25705645161290325, "grad_norm": 0.10074511170387268, "learning_rate": 0.00019450651991495812, "loss": 1.7363, "step": 255 }, { "epoch": 0.25806451612903225, "grad_norm": 0.0891348272562027, "learning_rate": 0.00019445329650642163, "loss": 1.6925, "step": 256 }, { "epoch": 0.2590725806451613, "grad_norm": 0.1022176444530487, "learning_rate": 0.00019439982386586932, "loss": 1.6419, "step": 257 }, { "epoch": 0.2600806451612903, "grad_norm": 0.08925571292638779, "learning_rate": 0.00019434610213439832, "loss": 1.6575, "step": 258 }, { "epoch": 0.2610887096774194, "grad_norm": 0.07562322169542313, "learning_rate": 0.0001942921314537631, "loss": 1.6187, "step": 259 }, { "epoch": 0.2620967741935484, "grad_norm": 0.09982999414205551, "learning_rate": 0.000194237911966375, "loss": 1.6341, "step": 260 }, { "epoch": 0.26310483870967744, "grad_norm": 0.08155392110347748, "learning_rate": 0.0001941834438153019, "loss": 1.7189, "step": 261 }, { "epoch": 0.26411290322580644, "grad_norm": 0.08979921042919159, "learning_rate": 0.00019412872714426782, "loss": 1.6556, "step": 262 }, { "epoch": 0.2651209677419355, "grad_norm": 0.08493686467409134, "learning_rate": 0.00019407376209765255, "loss": 1.6919, "step": 263 }, { "epoch": 0.2661290322580645, "grad_norm": 0.0822565034031868, "learning_rate": 0.0001940185488204912, "loss": 1.6205, "step": 264 }, { "epoch": 0.26713709677419356, "grad_norm": 0.08931294083595276, "learning_rate": 0.00019396308745847402, "loss": 1.6848, "step": 265 }, { "epoch": 0.26814516129032256, "grad_norm": 0.08736932277679443, "learning_rate": 0.00019390737815794574, "loss": 1.6882, "step": 266 }, { "epoch": 0.2691532258064516, "grad_norm": 0.09153414517641068, "learning_rate": 0.00019385142106590535, "loss": 1.7596, "step": 267 }, { "epoch": 0.2701612903225806, "grad_norm": 0.07890645414590836, "learning_rate": 0.00019379521633000572, "loss": 1.6987, "step": 268 }, { "epoch": 0.2711693548387097, "grad_norm": 0.08790858089923859, "learning_rate": 0.0001937387640985532, "loss": 1.6744, "step": 269 }, { "epoch": 0.2721774193548387, "grad_norm": 0.0803663581609726, "learning_rate": 0.00019368206452050713, "loss": 1.6846, "step": 270 }, { "epoch": 0.27318548387096775, "grad_norm": 0.09086322039365768, "learning_rate": 0.00019362511774547955, "loss": 1.6878, "step": 271 }, { "epoch": 0.27419354838709675, "grad_norm": 0.07199586182832718, "learning_rate": 0.00019356792392373479, "loss": 1.6316, "step": 272 }, { "epoch": 0.2752016129032258, "grad_norm": 0.08460623025894165, "learning_rate": 0.00019351048320618896, "loss": 1.6558, "step": 273 }, { "epoch": 0.2762096774193548, "grad_norm": 0.0732608363032341, "learning_rate": 0.0001934527957444098, "loss": 1.6752, "step": 274 }, { "epoch": 0.2772177419354839, "grad_norm": 0.0906132385134697, "learning_rate": 0.00019339486169061608, "loss": 1.7395, "step": 275 }, { "epoch": 0.2782258064516129, "grad_norm": 0.07827211916446686, "learning_rate": 0.00019333668119767716, "loss": 1.6681, "step": 276 }, { "epoch": 0.27923387096774194, "grad_norm": 0.08276840299367905, "learning_rate": 0.00019327825441911275, "loss": 1.6645, "step": 277 }, { "epoch": 0.28024193548387094, "grad_norm": 0.09114561229944229, "learning_rate": 0.00019321958150909243, "loss": 1.6857, "step": 278 }, { "epoch": 0.28125, "grad_norm": 0.08729056268930435, "learning_rate": 0.00019316066262243525, "loss": 1.6483, "step": 279 }, { "epoch": 0.28225806451612906, "grad_norm": 0.08572946488857269, "learning_rate": 0.00019310149791460925, "loss": 1.6872, "step": 280 }, { "epoch": 0.28326612903225806, "grad_norm": 0.10044838488101959, "learning_rate": 0.00019304208754173117, "loss": 1.6935, "step": 281 }, { "epoch": 0.2842741935483871, "grad_norm": 0.0785636454820633, "learning_rate": 0.000192982431660566, "loss": 1.6613, "step": 282 }, { "epoch": 0.2852822580645161, "grad_norm": 0.08499724417924881, "learning_rate": 0.00019292253042852648, "loss": 1.6208, "step": 283 }, { "epoch": 0.2862903225806452, "grad_norm": 0.09399082511663437, "learning_rate": 0.00019286238400367277, "loss": 1.619, "step": 284 }, { "epoch": 0.2872983870967742, "grad_norm": 0.07334808260202408, "learning_rate": 0.0001928019925447121, "loss": 1.6813, "step": 285 }, { "epoch": 0.28830645161290325, "grad_norm": 0.09035395085811615, "learning_rate": 0.00019274135621099813, "loss": 1.6265, "step": 286 }, { "epoch": 0.28931451612903225, "grad_norm": 0.07861501723527908, "learning_rate": 0.00019268047516253077, "loss": 1.6808, "step": 287 }, { "epoch": 0.2903225806451613, "grad_norm": 0.09788773208856583, "learning_rate": 0.00019261934955995563, "loss": 1.708, "step": 288 }, { "epoch": 0.2913306451612903, "grad_norm": 0.07571721822023392, "learning_rate": 0.00019255797956456357, "loss": 1.6612, "step": 289 }, { "epoch": 0.2923387096774194, "grad_norm": 0.0836874321103096, "learning_rate": 0.00019249636533829042, "loss": 1.6804, "step": 290 }, { "epoch": 0.2933467741935484, "grad_norm": 0.08373916894197464, "learning_rate": 0.00019243450704371632, "loss": 1.6317, "step": 291 }, { "epoch": 0.29435483870967744, "grad_norm": 0.08029752969741821, "learning_rate": 0.00019237240484406561, "loss": 1.6782, "step": 292 }, { "epoch": 0.29536290322580644, "grad_norm": 0.08353215456008911, "learning_rate": 0.00019231005890320602, "loss": 1.6517, "step": 293 }, { "epoch": 0.2963709677419355, "grad_norm": 0.09467596560716629, "learning_rate": 0.00019224746938564859, "loss": 1.6862, "step": 294 }, { "epoch": 0.2973790322580645, "grad_norm": 0.10909095406532288, "learning_rate": 0.000192184636456547, "loss": 1.6579, "step": 295 }, { "epoch": 0.29838709677419356, "grad_norm": 0.08434964716434479, "learning_rate": 0.00019212156028169724, "loss": 1.6516, "step": 296 }, { "epoch": 0.29939516129032256, "grad_norm": 0.09146866202354431, "learning_rate": 0.00019205824102753717, "loss": 1.6754, "step": 297 }, { "epoch": 0.3004032258064516, "grad_norm": 0.10936370491981506, "learning_rate": 0.00019199467886114603, "loss": 1.6495, "step": 298 }, { "epoch": 0.3014112903225806, "grad_norm": 0.08099015057086945, "learning_rate": 0.00019193087395024397, "loss": 1.6656, "step": 299 }, { "epoch": 0.3024193548387097, "grad_norm": 0.09252738207578659, "learning_rate": 0.0001918668264631918, "loss": 1.6711, "step": 300 }, { "epoch": 0.3034274193548387, "grad_norm": 0.08917499333620071, "learning_rate": 0.0001918025365689903, "loss": 1.6356, "step": 301 }, { "epoch": 0.30443548387096775, "grad_norm": 0.088597372174263, "learning_rate": 0.00019173800443727994, "loss": 1.6659, "step": 302 }, { "epoch": 0.30544354838709675, "grad_norm": 0.09308971464633942, "learning_rate": 0.00019167323023834033, "loss": 1.7218, "step": 303 }, { "epoch": 0.3064516129032258, "grad_norm": 0.07813969999551773, "learning_rate": 0.00019160821414308988, "loss": 1.6042, "step": 304 }, { "epoch": 0.3074596774193548, "grad_norm": 0.08843039721250534, "learning_rate": 0.0001915429563230853, "loss": 1.6409, "step": 305 }, { "epoch": 0.3084677419354839, "grad_norm": 0.09537311643362045, "learning_rate": 0.00019147745695052097, "loss": 1.6723, "step": 306 }, { "epoch": 0.3094758064516129, "grad_norm": 0.08754942566156387, "learning_rate": 0.00019141171619822882, "loss": 1.643, "step": 307 }, { "epoch": 0.31048387096774194, "grad_norm": 0.07768256217241287, "learning_rate": 0.0001913457342396777, "loss": 1.6109, "step": 308 }, { "epoch": 0.31149193548387094, "grad_norm": 0.09593945741653442, "learning_rate": 0.00019127951124897283, "loss": 1.6756, "step": 309 }, { "epoch": 0.3125, "grad_norm": 0.07348258048295975, "learning_rate": 0.00019121304740085546, "loss": 1.623, "step": 310 }, { "epoch": 0.31350806451612906, "grad_norm": 0.08579769730567932, "learning_rate": 0.0001911463428707025, "loss": 1.658, "step": 311 }, { "epoch": 0.31451612903225806, "grad_norm": 0.08485422283411026, "learning_rate": 0.00019107939783452577, "loss": 1.655, "step": 312 }, { "epoch": 0.3155241935483871, "grad_norm": 0.08101114630699158, "learning_rate": 0.00019101221246897184, "loss": 1.6391, "step": 313 }, { "epoch": 0.3165322580645161, "grad_norm": 0.08206996321678162, "learning_rate": 0.00019094478695132138, "loss": 1.6131, "step": 314 }, { "epoch": 0.3175403225806452, "grad_norm": 0.07818609476089478, "learning_rate": 0.00019087712145948868, "loss": 1.6632, "step": 315 }, { "epoch": 0.3185483870967742, "grad_norm": 0.09414539486169815, "learning_rate": 0.0001908092161720214, "loss": 1.6717, "step": 316 }, { "epoch": 0.31955645161290325, "grad_norm": 0.08382460474967957, "learning_rate": 0.00019074107126809984, "loss": 1.6867, "step": 317 }, { "epoch": 0.32056451612903225, "grad_norm": 0.07750436663627625, "learning_rate": 0.00019067268692753655, "loss": 1.6311, "step": 318 }, { "epoch": 0.3215725806451613, "grad_norm": 0.08067768812179565, "learning_rate": 0.00019060406333077596, "loss": 1.6681, "step": 319 }, { "epoch": 0.3225806451612903, "grad_norm": 0.074059396982193, "learning_rate": 0.00019053520065889375, "loss": 1.6408, "step": 320 }, { "epoch": 0.3235887096774194, "grad_norm": 0.10559958219528198, "learning_rate": 0.00019046609909359648, "loss": 1.7342, "step": 321 }, { "epoch": 0.3245967741935484, "grad_norm": 0.08121935278177261, "learning_rate": 0.00019039675881722104, "loss": 1.6808, "step": 322 }, { "epoch": 0.32560483870967744, "grad_norm": 0.08211352676153183, "learning_rate": 0.00019032718001273427, "loss": 1.6127, "step": 323 }, { "epoch": 0.32661290322580644, "grad_norm": 0.07450398057699203, "learning_rate": 0.0001902573628637323, "loss": 1.6555, "step": 324 }, { "epoch": 0.3276209677419355, "grad_norm": 0.0976330116391182, "learning_rate": 0.0001901873075544403, "loss": 1.6775, "step": 325 }, { "epoch": 0.3286290322580645, "grad_norm": 0.08012880384922028, "learning_rate": 0.00019011701426971178, "loss": 1.6213, "step": 326 }, { "epoch": 0.32963709677419356, "grad_norm": 0.08508668839931488, "learning_rate": 0.00019004648319502824, "loss": 1.5809, "step": 327 }, { "epoch": 0.33064516129032256, "grad_norm": 0.08622655272483826, "learning_rate": 0.00018997571451649856, "loss": 1.666, "step": 328 }, { "epoch": 0.3316532258064516, "grad_norm": 0.09803669154644012, "learning_rate": 0.00018990470842085867, "loss": 1.6784, "step": 329 }, { "epoch": 0.3326612903225806, "grad_norm": 0.08453961461782455, "learning_rate": 0.0001898334650954709, "loss": 1.6109, "step": 330 }, { "epoch": 0.3336693548387097, "grad_norm": 0.07246208935976028, "learning_rate": 0.00018976198472832364, "loss": 1.6117, "step": 331 }, { "epoch": 0.3346774193548387, "grad_norm": 0.08284757286310196, "learning_rate": 0.00018969026750803063, "loss": 1.6094, "step": 332 }, { "epoch": 0.33568548387096775, "grad_norm": 0.08026500046253204, "learning_rate": 0.00018961831362383067, "loss": 1.6555, "step": 333 }, { "epoch": 0.33669354838709675, "grad_norm": 0.08912428468465805, "learning_rate": 0.00018954612326558707, "loss": 1.6602, "step": 334 }, { "epoch": 0.3377016129032258, "grad_norm": 0.08738451451063156, "learning_rate": 0.00018947369662378704, "loss": 1.6125, "step": 335 }, { "epoch": 0.3387096774193548, "grad_norm": 0.07017836719751358, "learning_rate": 0.00018940103388954133, "loss": 1.6173, "step": 336 }, { "epoch": 0.3397177419354839, "grad_norm": 0.08264176547527313, "learning_rate": 0.00018932813525458363, "loss": 1.6716, "step": 337 }, { "epoch": 0.3407258064516129, "grad_norm": 0.08516332507133484, "learning_rate": 0.00018925500091127007, "loss": 1.6752, "step": 338 }, { "epoch": 0.34173387096774194, "grad_norm": 0.07101423293352127, "learning_rate": 0.00018918163105257883, "loss": 1.6393, "step": 339 }, { "epoch": 0.34274193548387094, "grad_norm": 0.07172892987728119, "learning_rate": 0.00018910802587210942, "loss": 1.6116, "step": 340 }, { "epoch": 0.34375, "grad_norm": 0.07889813184738159, "learning_rate": 0.0001890341855640824, "loss": 1.6107, "step": 341 }, { "epoch": 0.34475806451612906, "grad_norm": 0.07734905183315277, "learning_rate": 0.0001889601103233387, "loss": 1.6686, "step": 342 }, { "epoch": 0.34576612903225806, "grad_norm": 0.09568161517381668, "learning_rate": 0.00018888580034533915, "loss": 1.6914, "step": 343 }, { "epoch": 0.3467741935483871, "grad_norm": 0.0727929100394249, "learning_rate": 0.000188811255826164, "loss": 1.6271, "step": 344 }, { "epoch": 0.3477822580645161, "grad_norm": 0.07241855561733246, "learning_rate": 0.0001887364769625124, "loss": 1.6514, "step": 345 }, { "epoch": 0.3487903225806452, "grad_norm": 0.07215382158756256, "learning_rate": 0.00018866146395170178, "loss": 1.6578, "step": 346 }, { "epoch": 0.3497983870967742, "grad_norm": 0.07429207116365433, "learning_rate": 0.00018858621699166755, "loss": 1.6176, "step": 347 }, { "epoch": 0.35080645161290325, "grad_norm": 0.07516060024499893, "learning_rate": 0.00018851073628096225, "loss": 1.6735, "step": 348 }, { "epoch": 0.35181451612903225, "grad_norm": 0.08864877372980118, "learning_rate": 0.0001884350220187554, "loss": 1.6044, "step": 349 }, { "epoch": 0.3528225806451613, "grad_norm": 0.0749056488275528, "learning_rate": 0.00018835907440483267, "loss": 1.6316, "step": 350 }, { "epoch": 0.3538306451612903, "grad_norm": 0.09181974828243256, "learning_rate": 0.0001882828936395955, "loss": 1.6834, "step": 351 }, { "epoch": 0.3548387096774194, "grad_norm": 0.08013599365949631, "learning_rate": 0.00018820647992406054, "loss": 1.6367, "step": 352 }, { "epoch": 0.3558467741935484, "grad_norm": 0.0809824988245964, "learning_rate": 0.00018812983345985914, "loss": 1.658, "step": 353 }, { "epoch": 0.35685483870967744, "grad_norm": 0.1000952199101448, "learning_rate": 0.0001880529544492368, "loss": 1.6571, "step": 354 }, { "epoch": 0.35786290322580644, "grad_norm": 0.074663445353508, "learning_rate": 0.00018797584309505254, "loss": 1.6358, "step": 355 }, { "epoch": 0.3588709677419355, "grad_norm": 0.0898260623216629, "learning_rate": 0.00018789849960077864, "loss": 1.6496, "step": 356 }, { "epoch": 0.3598790322580645, "grad_norm": 0.08878135681152344, "learning_rate": 0.00018782092417049979, "loss": 1.6819, "step": 357 }, { "epoch": 0.36088709677419356, "grad_norm": 0.07256605476140976, "learning_rate": 0.00018774311700891269, "loss": 1.6521, "step": 358 }, { "epoch": 0.36189516129032256, "grad_norm": 0.07939675450325012, "learning_rate": 0.00018766507832132558, "loss": 1.6898, "step": 359 }, { "epoch": 0.3629032258064516, "grad_norm": 0.07508337497711182, "learning_rate": 0.00018758680831365755, "loss": 1.6204, "step": 360 }, { "epoch": 0.3639112903225806, "grad_norm": 0.07679913192987442, "learning_rate": 0.00018750830719243812, "loss": 1.597, "step": 361 }, { "epoch": 0.3649193548387097, "grad_norm": 0.07900839298963547, "learning_rate": 0.00018742957516480657, "loss": 1.6197, "step": 362 }, { "epoch": 0.3659274193548387, "grad_norm": 0.08279551565647125, "learning_rate": 0.00018735061243851158, "loss": 1.7151, "step": 363 }, { "epoch": 0.36693548387096775, "grad_norm": 0.10616319626569748, "learning_rate": 0.00018727141922191047, "loss": 1.7228, "step": 364 }, { "epoch": 0.36794354838709675, "grad_norm": 0.08777708560228348, "learning_rate": 0.00018719199572396882, "loss": 1.6661, "step": 365 }, { "epoch": 0.3689516129032258, "grad_norm": 0.0981433242559433, "learning_rate": 0.00018711234215425978, "loss": 1.6331, "step": 366 }, { "epoch": 0.3699596774193548, "grad_norm": 0.07754123210906982, "learning_rate": 0.00018703245872296365, "loss": 1.6757, "step": 367 }, { "epoch": 0.3709677419354839, "grad_norm": 0.09494742751121521, "learning_rate": 0.00018695234564086724, "loss": 1.6565, "step": 368 }, { "epoch": 0.3719758064516129, "grad_norm": 0.100984126329422, "learning_rate": 0.00018687200311936328, "loss": 1.6879, "step": 369 }, { "epoch": 0.37298387096774194, "grad_norm": 0.08996261656284332, "learning_rate": 0.00018679143137045006, "loss": 1.6579, "step": 370 }, { "epoch": 0.37399193548387094, "grad_norm": 0.0966666117310524, "learning_rate": 0.00018671063060673055, "loss": 1.5853, "step": 371 }, { "epoch": 0.375, "grad_norm": 0.07991211116313934, "learning_rate": 0.00018662960104141215, "loss": 1.6355, "step": 372 }, { "epoch": 0.37600806451612906, "grad_norm": 0.09592580795288086, "learning_rate": 0.00018654834288830591, "loss": 1.6172, "step": 373 }, { "epoch": 0.37701612903225806, "grad_norm": 0.07976924628019333, "learning_rate": 0.00018646685636182614, "loss": 1.641, "step": 374 }, { "epoch": 0.3780241935483871, "grad_norm": 0.08822676539421082, "learning_rate": 0.00018638514167698965, "loss": 1.6267, "step": 375 }, { "epoch": 0.3790322580645161, "grad_norm": 0.07680735737085342, "learning_rate": 0.00018630319904941535, "loss": 1.6484, "step": 376 }, { "epoch": 0.3800403225806452, "grad_norm": 0.09095903486013412, "learning_rate": 0.0001862210286953236, "loss": 1.6041, "step": 377 }, { "epoch": 0.3810483870967742, "grad_norm": 0.07204829901456833, "learning_rate": 0.0001861386308315357, "loss": 1.6058, "step": 378 }, { "epoch": 0.38205645161290325, "grad_norm": 0.12447134405374527, "learning_rate": 0.00018605600567547318, "loss": 1.6528, "step": 379 }, { "epoch": 0.38306451612903225, "grad_norm": 0.08234449476003647, "learning_rate": 0.00018597315344515744, "loss": 1.6408, "step": 380 }, { "epoch": 0.3840725806451613, "grad_norm": 0.0997692123055458, "learning_rate": 0.00018589007435920892, "loss": 1.631, "step": 381 }, { "epoch": 0.3850806451612903, "grad_norm": 0.10275771468877792, "learning_rate": 0.0001858067686368468, "loss": 1.6979, "step": 382 }, { "epoch": 0.3860887096774194, "grad_norm": 0.07703027874231339, "learning_rate": 0.00018572323649788822, "loss": 1.6037, "step": 383 }, { "epoch": 0.3870967741935484, "grad_norm": 0.08485141396522522, "learning_rate": 0.0001856394781627477, "loss": 1.6027, "step": 384 }, { "epoch": 0.38810483870967744, "grad_norm": 0.09312494099140167, "learning_rate": 0.00018555549385243674, "loss": 1.6757, "step": 385 }, { "epoch": 0.38911290322580644, "grad_norm": 0.09300917387008667, "learning_rate": 0.000185471283788563, "loss": 1.6615, "step": 386 }, { "epoch": 0.3901209677419355, "grad_norm": 0.07911553978919983, "learning_rate": 0.0001853868481933299, "loss": 1.6214, "step": 387 }, { "epoch": 0.3911290322580645, "grad_norm": 0.07960621267557144, "learning_rate": 0.00018530218728953597, "loss": 1.6709, "step": 388 }, { "epoch": 0.39213709677419356, "grad_norm": 0.0723830983042717, "learning_rate": 0.0001852173013005742, "loss": 1.6287, "step": 389 }, { "epoch": 0.39314516129032256, "grad_norm": 0.08178212493658066, "learning_rate": 0.00018513219045043156, "loss": 1.5888, "step": 390 }, { "epoch": 0.3941532258064516, "grad_norm": 0.07604778558015823, "learning_rate": 0.00018504685496368838, "loss": 1.6097, "step": 391 }, { "epoch": 0.3951612903225806, "grad_norm": 0.07833520323038101, "learning_rate": 0.00018496129506551763, "loss": 1.6119, "step": 392 }, { "epoch": 0.3961693548387097, "grad_norm": 0.0738687738776207, "learning_rate": 0.00018487551098168452, "loss": 1.646, "step": 393 }, { "epoch": 0.3971774193548387, "grad_norm": 0.08156421035528183, "learning_rate": 0.0001847895029385458, "loss": 1.612, "step": 394 }, { "epoch": 0.39818548387096775, "grad_norm": 0.0760064423084259, "learning_rate": 0.00018470327116304916, "loss": 1.6556, "step": 395 }, { "epoch": 0.39919354838709675, "grad_norm": 0.07635514438152313, "learning_rate": 0.0001846168158827326, "loss": 1.5948, "step": 396 }, { "epoch": 0.4002016129032258, "grad_norm": 0.07415641099214554, "learning_rate": 0.00018453013732572403, "loss": 1.6379, "step": 397 }, { "epoch": 0.4012096774193548, "grad_norm": 0.07627629488706589, "learning_rate": 0.00018444323572074035, "loss": 1.6067, "step": 398 }, { "epoch": 0.4022177419354839, "grad_norm": 0.08279147744178772, "learning_rate": 0.00018435611129708713, "loss": 1.6152, "step": 399 }, { "epoch": 0.4032258064516129, "grad_norm": 0.07391797006130219, "learning_rate": 0.00018426876428465777, "loss": 1.6568, "step": 400 }, { "epoch": 0.40423387096774194, "grad_norm": 0.07815629243850708, "learning_rate": 0.00018418119491393312, "loss": 1.6301, "step": 401 }, { "epoch": 0.40524193548387094, "grad_norm": 0.07491758465766907, "learning_rate": 0.0001840934034159807, "loss": 1.6668, "step": 402 }, { "epoch": 0.40625, "grad_norm": 0.07878877222537994, "learning_rate": 0.0001840053900224542, "loss": 1.6305, "step": 403 }, { "epoch": 0.40725806451612906, "grad_norm": 0.07592154294252396, "learning_rate": 0.00018391715496559273, "loss": 1.6853, "step": 404 }, { "epoch": 0.40826612903225806, "grad_norm": 0.082845039665699, "learning_rate": 0.00018382869847822044, "loss": 1.6918, "step": 405 }, { "epoch": 0.4092741935483871, "grad_norm": 0.07842651754617691, "learning_rate": 0.00018374002079374569, "loss": 1.65, "step": 406 }, { "epoch": 0.4102822580645161, "grad_norm": 0.07326355576515198, "learning_rate": 0.0001836511221461604, "loss": 1.6157, "step": 407 }, { "epoch": 0.4112903225806452, "grad_norm": 0.08537916839122772, "learning_rate": 0.00018356200277003975, "loss": 1.5959, "step": 408 }, { "epoch": 0.4122983870967742, "grad_norm": 0.09612290561199188, "learning_rate": 0.00018347266290054116, "loss": 1.6876, "step": 409 }, { "epoch": 0.41330645161290325, "grad_norm": 0.07688483595848083, "learning_rate": 0.00018338310277340406, "loss": 1.6094, "step": 410 }, { "epoch": 0.41431451612903225, "grad_norm": 0.09224136173725128, "learning_rate": 0.00018329332262494887, "loss": 1.616, "step": 411 }, { "epoch": 0.4153225806451613, "grad_norm": 0.09629214555025101, "learning_rate": 0.00018320332269207667, "loss": 1.6197, "step": 412 }, { "epoch": 0.4163306451612903, "grad_norm": 0.0956406518816948, "learning_rate": 0.00018311310321226853, "loss": 1.6939, "step": 413 }, { "epoch": 0.4173387096774194, "grad_norm": 0.11505012959241867, "learning_rate": 0.00018302266442358472, "loss": 1.6692, "step": 414 }, { "epoch": 0.4183467741935484, "grad_norm": 0.08150719106197357, "learning_rate": 0.0001829320065646643, "loss": 1.6428, "step": 415 }, { "epoch": 0.41935483870967744, "grad_norm": 0.10705471783876419, "learning_rate": 0.0001828411298747243, "loss": 1.7328, "step": 416 }, { "epoch": 0.42036290322580644, "grad_norm": 0.10280334204435349, "learning_rate": 0.00018275003459355924, "loss": 1.6245, "step": 417 }, { "epoch": 0.4213709677419355, "grad_norm": 0.07620084285736084, "learning_rate": 0.00018265872096154043, "loss": 1.6317, "step": 418 }, { "epoch": 0.4223790322580645, "grad_norm": 0.09292726963758469, "learning_rate": 0.00018256718921961525, "loss": 1.6555, "step": 419 }, { "epoch": 0.42338709677419356, "grad_norm": 0.07884904742240906, "learning_rate": 0.00018247543960930672, "loss": 1.6325, "step": 420 }, { "epoch": 0.42439516129032256, "grad_norm": 0.1114020049571991, "learning_rate": 0.00018238347237271266, "loss": 1.6861, "step": 421 }, { "epoch": 0.4254032258064516, "grad_norm": 0.08363789319992065, "learning_rate": 0.00018229128775250523, "loss": 1.6398, "step": 422 }, { "epoch": 0.4264112903225806, "grad_norm": 0.10317594558000565, "learning_rate": 0.00018219888599193008, "loss": 1.5966, "step": 423 }, { "epoch": 0.4274193548387097, "grad_norm": 0.09324808418750763, "learning_rate": 0.00018210626733480593, "loss": 1.6463, "step": 424 }, { "epoch": 0.4284274193548387, "grad_norm": 0.0866997167468071, "learning_rate": 0.00018201343202552367, "loss": 1.5802, "step": 425 }, { "epoch": 0.42943548387096775, "grad_norm": 0.09528562426567078, "learning_rate": 0.00018192038030904608, "loss": 1.6768, "step": 426 }, { "epoch": 0.43044354838709675, "grad_norm": 0.08449150621891022, "learning_rate": 0.00018182711243090678, "loss": 1.6323, "step": 427 }, { "epoch": 0.4314516129032258, "grad_norm": 0.07713552564382553, "learning_rate": 0.00018173362863720986, "loss": 1.6264, "step": 428 }, { "epoch": 0.4324596774193548, "grad_norm": 0.08549489825963974, "learning_rate": 0.00018163992917462918, "loss": 1.6628, "step": 429 }, { "epoch": 0.4334677419354839, "grad_norm": 0.07783807069063187, "learning_rate": 0.00018154601429040757, "loss": 1.6892, "step": 430 }, { "epoch": 0.4344758064516129, "grad_norm": 0.09653409570455551, "learning_rate": 0.00018145188423235634, "loss": 1.6651, "step": 431 }, { "epoch": 0.43548387096774194, "grad_norm": 0.08650687336921692, "learning_rate": 0.00018135753924885465, "loss": 1.6113, "step": 432 }, { "epoch": 0.43649193548387094, "grad_norm": 0.08643219619989395, "learning_rate": 0.00018126297958884866, "loss": 1.6111, "step": 433 }, { "epoch": 0.4375, "grad_norm": 0.08586744964122772, "learning_rate": 0.00018116820550185107, "loss": 1.643, "step": 434 }, { "epoch": 0.43850806451612906, "grad_norm": 0.09063699096441269, "learning_rate": 0.00018107321723794036, "loss": 1.6422, "step": 435 }, { "epoch": 0.43951612903225806, "grad_norm": 0.07849163562059402, "learning_rate": 0.00018097801504776012, "loss": 1.6183, "step": 436 }, { "epoch": 0.4405241935483871, "grad_norm": 0.07795203477144241, "learning_rate": 0.00018088259918251846, "loss": 1.6267, "step": 437 }, { "epoch": 0.4415322580645161, "grad_norm": 0.08508776873350143, "learning_rate": 0.00018078696989398734, "loss": 1.6581, "step": 438 }, { "epoch": 0.4425403225806452, "grad_norm": 0.08001305162906647, "learning_rate": 0.00018069112743450183, "loss": 1.6287, "step": 439 }, { "epoch": 0.4435483870967742, "grad_norm": 0.07482777535915375, "learning_rate": 0.0001805950720569595, "loss": 1.6426, "step": 440 }, { "epoch": 0.44455645161290325, "grad_norm": 0.07578035444021225, "learning_rate": 0.00018049880401481972, "loss": 1.6294, "step": 441 }, { "epoch": 0.44556451612903225, "grad_norm": 0.07782859355211258, "learning_rate": 0.00018040232356210308, "loss": 1.5935, "step": 442 }, { "epoch": 0.4465725806451613, "grad_norm": 0.07492804527282715, "learning_rate": 0.00018030563095339062, "loss": 1.5769, "step": 443 }, { "epoch": 0.4475806451612903, "grad_norm": 0.07825621962547302, "learning_rate": 0.00018020872644382313, "loss": 1.5786, "step": 444 }, { "epoch": 0.4485887096774194, "grad_norm": 0.09208081662654877, "learning_rate": 0.0001801116102891006, "loss": 1.6649, "step": 445 }, { "epoch": 0.4495967741935484, "grad_norm": 0.07900070399045944, "learning_rate": 0.00018001428274548156, "loss": 1.6529, "step": 446 }, { "epoch": 0.45060483870967744, "grad_norm": 0.07847368717193604, "learning_rate": 0.00017991674406978215, "loss": 1.6133, "step": 447 }, { "epoch": 0.45161290322580644, "grad_norm": 0.0754162147641182, "learning_rate": 0.00017981899451937573, "loss": 1.6478, "step": 448 }, { "epoch": 0.4526209677419355, "grad_norm": 0.08314093947410583, "learning_rate": 0.0001797210343521921, "loss": 1.5926, "step": 449 }, { "epoch": 0.4536290322580645, "grad_norm": 0.07506029307842255, "learning_rate": 0.00017962286382671678, "loss": 1.6031, "step": 450 }, { "epoch": 0.45463709677419356, "grad_norm": 0.09021966904401779, "learning_rate": 0.00017952448320199035, "loss": 1.5805, "step": 451 }, { "epoch": 0.45564516129032256, "grad_norm": 0.07435688376426697, "learning_rate": 0.00017942589273760783, "loss": 1.6291, "step": 452 }, { "epoch": 0.4566532258064516, "grad_norm": 0.07785916328430176, "learning_rate": 0.00017932709269371784, "loss": 1.6525, "step": 453 }, { "epoch": 0.4576612903225806, "grad_norm": 0.07916136831045151, "learning_rate": 0.00017922808333102207, "loss": 1.6301, "step": 454 }, { "epoch": 0.4586693548387097, "grad_norm": 0.08399738371372223, "learning_rate": 0.00017912886491077462, "loss": 1.6915, "step": 455 }, { "epoch": 0.4596774193548387, "grad_norm": 0.08618689328432083, "learning_rate": 0.000179029437694781, "loss": 1.6718, "step": 456 }, { "epoch": 0.46068548387096775, "grad_norm": 0.07570008933544159, "learning_rate": 0.00017892980194539798, "loss": 1.6588, "step": 457 }, { "epoch": 0.46169354838709675, "grad_norm": 0.09821120649576187, "learning_rate": 0.00017882995792553228, "loss": 1.6914, "step": 458 }, { "epoch": 0.4627016129032258, "grad_norm": 0.07994726300239563, "learning_rate": 0.00017872990589864034, "loss": 1.6077, "step": 459 }, { "epoch": 0.4637096774193548, "grad_norm": 0.08893134444952011, "learning_rate": 0.00017862964612872748, "loss": 1.6447, "step": 460 }, { "epoch": 0.4647177419354839, "grad_norm": 0.08347106724977493, "learning_rate": 0.00017852917888034706, "loss": 1.6501, "step": 461 }, { "epoch": 0.4657258064516129, "grad_norm": 0.07879969477653503, "learning_rate": 0.00017842850441860005, "loss": 1.643, "step": 462 }, { "epoch": 0.46673387096774194, "grad_norm": 0.08305401355028152, "learning_rate": 0.00017832762300913413, "loss": 1.677, "step": 463 }, { "epoch": 0.46774193548387094, "grad_norm": 0.0827251598238945, "learning_rate": 0.00017822653491814304, "loss": 1.6432, "step": 464 }, { "epoch": 0.46875, "grad_norm": 0.08472172170877457, "learning_rate": 0.00017812524041236586, "loss": 1.654, "step": 465 }, { "epoch": 0.46975806451612906, "grad_norm": 0.07689754664897919, "learning_rate": 0.0001780237397590864, "loss": 1.5642, "step": 466 }, { "epoch": 0.47076612903225806, "grad_norm": 0.10658534616231918, "learning_rate": 0.00017792203322613236, "loss": 1.6561, "step": 467 }, { "epoch": 0.4717741935483871, "grad_norm": 0.08347711712121964, "learning_rate": 0.0001778201210818748, "loss": 1.6595, "step": 468 }, { "epoch": 0.4727822580645161, "grad_norm": 0.08595866709947586, "learning_rate": 0.0001777180035952272, "loss": 1.6185, "step": 469 }, { "epoch": 0.4737903225806452, "grad_norm": 0.08824612945318222, "learning_rate": 0.00017761568103564487, "loss": 1.6779, "step": 470 }, { "epoch": 0.4747983870967742, "grad_norm": 0.07452390342950821, "learning_rate": 0.0001775131536731244, "loss": 1.6252, "step": 471 }, { "epoch": 0.47580645161290325, "grad_norm": 0.09783647954463959, "learning_rate": 0.00017741042177820258, "loss": 1.6417, "step": 472 }, { "epoch": 0.47681451612903225, "grad_norm": 0.07527977973222733, "learning_rate": 0.0001773074856219561, "loss": 1.6128, "step": 473 }, { "epoch": 0.4778225806451613, "grad_norm": 0.07836946099996567, "learning_rate": 0.00017720434547600043, "loss": 1.625, "step": 474 }, { "epoch": 0.4788306451612903, "grad_norm": 0.07427874952554703, "learning_rate": 0.00017710100161248945, "loss": 1.6261, "step": 475 }, { "epoch": 0.4798387096774194, "grad_norm": 0.09168553352355957, "learning_rate": 0.0001769974543041145, "loss": 1.702, "step": 476 }, { "epoch": 0.4808467741935484, "grad_norm": 0.0791415199637413, "learning_rate": 0.00017689370382410386, "loss": 1.6129, "step": 477 }, { "epoch": 0.48185483870967744, "grad_norm": 0.07638856768608093, "learning_rate": 0.00017678975044622174, "loss": 1.593, "step": 478 }, { "epoch": 0.48286290322580644, "grad_norm": 0.08905162662267685, "learning_rate": 0.00017668559444476793, "loss": 1.6803, "step": 479 }, { "epoch": 0.4838709677419355, "grad_norm": 0.08039755374193192, "learning_rate": 0.00017658123609457668, "loss": 1.6624, "step": 480 }, { "epoch": 0.4848790322580645, "grad_norm": 0.07831753045320511, "learning_rate": 0.00017647667567101632, "loss": 1.6602, "step": 481 }, { "epoch": 0.48588709677419356, "grad_norm": 0.07645969092845917, "learning_rate": 0.00017637191344998837, "loss": 1.6462, "step": 482 }, { "epoch": 0.48689516129032256, "grad_norm": 0.0790887251496315, "learning_rate": 0.00017626694970792673, "loss": 1.581, "step": 483 }, { "epoch": 0.4879032258064516, "grad_norm": 0.07644886523485184, "learning_rate": 0.00017616178472179715, "loss": 1.6035, "step": 484 }, { "epoch": 0.4889112903225806, "grad_norm": 0.08160758763551712, "learning_rate": 0.0001760564187690964, "loss": 1.6169, "step": 485 }, { "epoch": 0.4899193548387097, "grad_norm": 0.09234445542097092, "learning_rate": 0.00017595085212785146, "loss": 1.5878, "step": 486 }, { "epoch": 0.4909274193548387, "grad_norm": 0.09042947739362717, "learning_rate": 0.0001758450850766189, "loss": 1.6629, "step": 487 }, { "epoch": 0.49193548387096775, "grad_norm": 0.08583879470825195, "learning_rate": 0.00017573911789448414, "loss": 1.6398, "step": 488 }, { "epoch": 0.49294354838709675, "grad_norm": 0.07878076285123825, "learning_rate": 0.00017563295086106063, "loss": 1.64, "step": 489 }, { "epoch": 0.4939516129032258, "grad_norm": 0.08849604427814484, "learning_rate": 0.00017552658425648923, "loss": 1.6015, "step": 490 }, { "epoch": 0.4949596774193548, "grad_norm": 0.07961837202310562, "learning_rate": 0.00017542001836143731, "loss": 1.6392, "step": 491 }, { "epoch": 0.4959677419354839, "grad_norm": 0.08883430808782578, "learning_rate": 0.00017531325345709816, "loss": 1.6417, "step": 492 }, { "epoch": 0.4969758064516129, "grad_norm": 0.07420235127210617, "learning_rate": 0.00017520628982519023, "loss": 1.635, "step": 493 }, { "epoch": 0.49798387096774194, "grad_norm": 0.08477555215358734, "learning_rate": 0.0001750991277479563, "loss": 1.6264, "step": 494 }, { "epoch": 0.49899193548387094, "grad_norm": 0.07410185784101486, "learning_rate": 0.00017499176750816276, "loss": 1.6414, "step": 495 }, { "epoch": 0.5, "grad_norm": 0.08427213877439499, "learning_rate": 0.00017488420938909893, "loss": 1.6546, "step": 496 }, { "epoch": 0.501008064516129, "grad_norm": 0.0739702582359314, "learning_rate": 0.00017477645367457628, "loss": 1.6316, "step": 497 }, { "epoch": 0.5020161290322581, "grad_norm": 0.08044146001338959, "learning_rate": 0.00017466850064892762, "loss": 1.6256, "step": 498 }, { "epoch": 0.5030241935483871, "grad_norm": 0.08690078556537628, "learning_rate": 0.0001745603505970064, "loss": 1.589, "step": 499 }, { "epoch": 0.5040322580645161, "grad_norm": 0.07842793315649033, "learning_rate": 0.00017445200380418607, "loss": 1.6352, "step": 500 }, { "epoch": 0.5050403225806451, "grad_norm": 0.08214239776134491, "learning_rate": 0.00017434346055635912, "loss": 1.6244, "step": 501 }, { "epoch": 0.5060483870967742, "grad_norm": 0.07770374417304993, "learning_rate": 0.00017423472113993634, "loss": 1.65, "step": 502 }, { "epoch": 0.5070564516129032, "grad_norm": 0.08378950506448746, "learning_rate": 0.00017412578584184637, "loss": 1.6129, "step": 503 }, { "epoch": 0.5080645161290323, "grad_norm": 0.07839113473892212, "learning_rate": 0.00017401665494953453, "loss": 1.6479, "step": 504 }, { "epoch": 0.5090725806451613, "grad_norm": 0.0775337815284729, "learning_rate": 0.00017390732875096227, "loss": 1.6005, "step": 505 }, { "epoch": 0.5100806451612904, "grad_norm": 0.08532094955444336, "learning_rate": 0.00017379780753460654, "loss": 1.6669, "step": 506 }, { "epoch": 0.5110887096774194, "grad_norm": 0.07484716176986694, "learning_rate": 0.00017368809158945872, "loss": 1.6786, "step": 507 }, { "epoch": 0.5120967741935484, "grad_norm": 0.08861152827739716, "learning_rate": 0.00017357818120502402, "loss": 1.6753, "step": 508 }, { "epoch": 0.5131048387096774, "grad_norm": 0.08586420863866806, "learning_rate": 0.00017346807667132085, "loss": 1.6483, "step": 509 }, { "epoch": 0.5141129032258065, "grad_norm": 0.08970779180526733, "learning_rate": 0.00017335777827887978, "loss": 1.6776, "step": 510 }, { "epoch": 0.5151209677419355, "grad_norm": 0.08755983412265778, "learning_rate": 0.00017324728631874298, "loss": 1.6666, "step": 511 }, { "epoch": 0.5161290322580645, "grad_norm": 0.08634518831968307, "learning_rate": 0.00017313660108246337, "loss": 1.6195, "step": 512 }, { "epoch": 0.5171370967741935, "grad_norm": 0.08298657834529877, "learning_rate": 0.00017302572286210382, "loss": 1.5564, "step": 513 }, { "epoch": 0.5181451612903226, "grad_norm": 0.07834544777870178, "learning_rate": 0.00017291465195023653, "loss": 1.6109, "step": 514 }, { "epoch": 0.5191532258064516, "grad_norm": 0.09181385487318039, "learning_rate": 0.000172803388639942, "loss": 1.6387, "step": 515 }, { "epoch": 0.5201612903225806, "grad_norm": 0.07698329538106918, "learning_rate": 0.00017269193322480856, "loss": 1.6223, "step": 516 }, { "epoch": 0.5211693548387096, "grad_norm": 0.10118810087442398, "learning_rate": 0.00017258028599893136, "loss": 1.6365, "step": 517 }, { "epoch": 0.5221774193548387, "grad_norm": 0.08565083891153336, "learning_rate": 0.00017246844725691166, "loss": 1.5905, "step": 518 }, { "epoch": 0.5231854838709677, "grad_norm": 0.08563411980867386, "learning_rate": 0.00017235641729385615, "loss": 1.6141, "step": 519 }, { "epoch": 0.5241935483870968, "grad_norm": 0.07669138163328171, "learning_rate": 0.00017224419640537598, "loss": 1.6278, "step": 520 }, { "epoch": 0.5252016129032258, "grad_norm": 0.09773047268390656, "learning_rate": 0.00017213178488758622, "loss": 1.7324, "step": 521 }, { "epoch": 0.5262096774193549, "grad_norm": 0.07799120247364044, "learning_rate": 0.00017201918303710482, "loss": 1.5967, "step": 522 }, { "epoch": 0.5272177419354839, "grad_norm": 0.0810832753777504, "learning_rate": 0.0001719063911510521, "loss": 1.6204, "step": 523 }, { "epoch": 0.5282258064516129, "grad_norm": 0.08055137097835541, "learning_rate": 0.0001717934095270497, "loss": 1.6138, "step": 524 }, { "epoch": 0.5292338709677419, "grad_norm": 0.08200159668922424, "learning_rate": 0.0001716802384632199, "loss": 1.6211, "step": 525 }, { "epoch": 0.530241935483871, "grad_norm": 0.0793243944644928, "learning_rate": 0.00017156687825818504, "loss": 1.579, "step": 526 }, { "epoch": 0.53125, "grad_norm": 0.08332548290491104, "learning_rate": 0.00017145332921106633, "loss": 1.5874, "step": 527 }, { "epoch": 0.532258064516129, "grad_norm": 0.07582446932792664, "learning_rate": 0.00017133959162148336, "loss": 1.5871, "step": 528 }, { "epoch": 0.5332661290322581, "grad_norm": 0.0803590714931488, "learning_rate": 0.00017122566578955324, "loss": 1.6451, "step": 529 }, { "epoch": 0.5342741935483871, "grad_norm": 0.07705288380384445, "learning_rate": 0.00017111155201588978, "loss": 1.5892, "step": 530 }, { "epoch": 0.5352822580645161, "grad_norm": 0.08003994822502136, "learning_rate": 0.0001709972506016027, "loss": 1.6701, "step": 531 }, { "epoch": 0.5362903225806451, "grad_norm": 0.07644215226173401, "learning_rate": 0.00017088276184829685, "loss": 1.6271, "step": 532 }, { "epoch": 0.5372983870967742, "grad_norm": 0.08193427324295044, "learning_rate": 0.00017076808605807138, "loss": 1.5906, "step": 533 }, { "epoch": 0.5383064516129032, "grad_norm": 0.08339913934469223, "learning_rate": 0.00017065322353351903, "loss": 1.6452, "step": 534 }, { "epoch": 0.5393145161290323, "grad_norm": 0.08375068008899689, "learning_rate": 0.0001705381745777252, "loss": 1.6573, "step": 535 }, { "epoch": 0.5403225806451613, "grad_norm": 0.07980147749185562, "learning_rate": 0.00017042293949426726, "loss": 1.5999, "step": 536 }, { "epoch": 0.5413306451612904, "grad_norm": 0.07945246994495392, "learning_rate": 0.00017030751858721375, "loss": 1.6372, "step": 537 }, { "epoch": 0.5423387096774194, "grad_norm": 0.07931476086378098, "learning_rate": 0.00017019191216112342, "loss": 1.6244, "step": 538 }, { "epoch": 0.5433467741935484, "grad_norm": 0.07984746247529984, "learning_rate": 0.00017007612052104474, "loss": 1.5592, "step": 539 }, { "epoch": 0.5443548387096774, "grad_norm": 0.09376467764377594, "learning_rate": 0.00016996014397251466, "loss": 1.6774, "step": 540 }, { "epoch": 0.5453629032258065, "grad_norm": 0.08642607182264328, "learning_rate": 0.00016984398282155825, "loss": 1.6101, "step": 541 }, { "epoch": 0.5463709677419355, "grad_norm": 0.07891902327537537, "learning_rate": 0.00016972763737468758, "loss": 1.6109, "step": 542 }, { "epoch": 0.5473790322580645, "grad_norm": 0.07893992215394974, "learning_rate": 0.00016961110793890108, "loss": 1.643, "step": 543 }, { "epoch": 0.5483870967741935, "grad_norm": 0.08107249438762665, "learning_rate": 0.00016949439482168255, "loss": 1.6093, "step": 544 }, { "epoch": 0.5493951612903226, "grad_norm": 0.08450604975223541, "learning_rate": 0.00016937749833100064, "loss": 1.6406, "step": 545 }, { "epoch": 0.5504032258064516, "grad_norm": 0.08088622242212296, "learning_rate": 0.0001692604187753077, "loss": 1.6293, "step": 546 }, { "epoch": 0.5514112903225806, "grad_norm": 0.09227669984102249, "learning_rate": 0.0001691431564635392, "loss": 1.6022, "step": 547 }, { "epoch": 0.5524193548387096, "grad_norm": 0.08562039583921432, "learning_rate": 0.00016902571170511292, "loss": 1.6341, "step": 548 }, { "epoch": 0.5534274193548387, "grad_norm": 0.09240545332431793, "learning_rate": 0.0001689080848099279, "loss": 1.643, "step": 549 }, { "epoch": 0.5544354838709677, "grad_norm": 0.09082893282175064, "learning_rate": 0.00016879027608836394, "loss": 1.6132, "step": 550 }, { "epoch": 0.5554435483870968, "grad_norm": 0.08730785548686981, "learning_rate": 0.00016867228585128047, "loss": 1.631, "step": 551 }, { "epoch": 0.5564516129032258, "grad_norm": 0.08937687426805496, "learning_rate": 0.000168554114410016, "loss": 1.7034, "step": 552 }, { "epoch": 0.5574596774193549, "grad_norm": 0.07652641087770462, "learning_rate": 0.0001684357620763872, "loss": 1.6019, "step": 553 }, { "epoch": 0.5584677419354839, "grad_norm": 0.08145558089017868, "learning_rate": 0.00016831722916268787, "loss": 1.6705, "step": 554 }, { "epoch": 0.5594758064516129, "grad_norm": 0.09578656405210495, "learning_rate": 0.0001681985159816885, "loss": 1.6889, "step": 555 }, { "epoch": 0.5604838709677419, "grad_norm": 0.085781030356884, "learning_rate": 0.00016807962284663518, "loss": 1.6362, "step": 556 }, { "epoch": 0.561491935483871, "grad_norm": 0.07998887449502945, "learning_rate": 0.0001679605500712488, "loss": 1.6045, "step": 557 }, { "epoch": 0.5625, "grad_norm": 0.09279566258192062, "learning_rate": 0.00016784129796972431, "loss": 1.5786, "step": 558 }, { "epoch": 0.563508064516129, "grad_norm": 0.08150017261505127, "learning_rate": 0.0001677218668567299, "loss": 1.6313, "step": 559 }, { "epoch": 0.5645161290322581, "grad_norm": 0.08562348783016205, "learning_rate": 0.00016760225704740594, "loss": 1.6047, "step": 560 }, { "epoch": 0.5655241935483871, "grad_norm": 0.09371492266654968, "learning_rate": 0.00016748246885736452, "loss": 1.6599, "step": 561 }, { "epoch": 0.5665322580645161, "grad_norm": 0.08150923997163773, "learning_rate": 0.00016736250260268828, "loss": 1.6556, "step": 562 }, { "epoch": 0.5675403225806451, "grad_norm": 0.08109602332115173, "learning_rate": 0.0001672423585999298, "loss": 1.6143, "step": 563 }, { "epoch": 0.5685483870967742, "grad_norm": 0.07796693593263626, "learning_rate": 0.0001671220371661106, "loss": 1.6046, "step": 564 }, { "epoch": 0.5695564516129032, "grad_norm": 0.08694635331630707, "learning_rate": 0.0001670015386187205, "loss": 1.6564, "step": 565 }, { "epoch": 0.5705645161290323, "grad_norm": 0.08142531663179398, "learning_rate": 0.00016688086327571648, "loss": 1.6406, "step": 566 }, { "epoch": 0.5715725806451613, "grad_norm": 0.07907096296548843, "learning_rate": 0.00016676001145552228, "loss": 1.5948, "step": 567 }, { "epoch": 0.5725806451612904, "grad_norm": 0.08147318661212921, "learning_rate": 0.0001666389834770271, "loss": 1.5789, "step": 568 }, { "epoch": 0.5735887096774194, "grad_norm": 0.08041603118181229, "learning_rate": 0.00016651777965958503, "loss": 1.6229, "step": 569 }, { "epoch": 0.5745967741935484, "grad_norm": 0.07601971924304962, "learning_rate": 0.00016639640032301413, "loss": 1.5722, "step": 570 }, { "epoch": 0.5756048387096774, "grad_norm": 0.08111369609832764, "learning_rate": 0.0001662748457875957, "loss": 1.6485, "step": 571 }, { "epoch": 0.5766129032258065, "grad_norm": 0.07956349104642868, "learning_rate": 0.00016615311637407316, "loss": 1.6118, "step": 572 }, { "epoch": 0.5776209677419355, "grad_norm": 0.08260063081979752, "learning_rate": 0.00016603121240365152, "loss": 1.6618, "step": 573 }, { "epoch": 0.5786290322580645, "grad_norm": 0.077680803835392, "learning_rate": 0.00016590913419799633, "loss": 1.6316, "step": 574 }, { "epoch": 0.5796370967741935, "grad_norm": 0.08391865342855453, "learning_rate": 0.00016578688207923289, "loss": 1.6273, "step": 575 }, { "epoch": 0.5806451612903226, "grad_norm": 0.08210872858762741, "learning_rate": 0.0001656644563699454, "loss": 1.6222, "step": 576 }, { "epoch": 0.5816532258064516, "grad_norm": 0.07796725630760193, "learning_rate": 0.00016554185739317616, "loss": 1.5981, "step": 577 }, { "epoch": 0.5826612903225806, "grad_norm": 0.0765356495976448, "learning_rate": 0.00016541908547242459, "loss": 1.6164, "step": 578 }, { "epoch": 0.5836693548387096, "grad_norm": 0.090540811419487, "learning_rate": 0.00016529614093164648, "loss": 1.6994, "step": 579 }, { "epoch": 0.5846774193548387, "grad_norm": 0.08444759249687195, "learning_rate": 0.00016517302409525315, "loss": 1.6154, "step": 580 }, { "epoch": 0.5856854838709677, "grad_norm": 0.0766877606511116, "learning_rate": 0.0001650497352881105, "loss": 1.6046, "step": 581 }, { "epoch": 0.5866935483870968, "grad_norm": 0.0797574445605278, "learning_rate": 0.00016492627483553822, "loss": 1.6298, "step": 582 }, { "epoch": 0.5877016129032258, "grad_norm": 0.07783927023410797, "learning_rate": 0.00016480264306330898, "loss": 1.5702, "step": 583 }, { "epoch": 0.5887096774193549, "grad_norm": 0.08371485024690628, "learning_rate": 0.0001646788402976474, "loss": 1.6215, "step": 584 }, { "epoch": 0.5897177419354839, "grad_norm": 0.08839402347803116, "learning_rate": 0.0001645548668652294, "loss": 1.5996, "step": 585 }, { "epoch": 0.5907258064516129, "grad_norm": 0.07832740247249603, "learning_rate": 0.0001644307230931811, "loss": 1.6281, "step": 586 }, { "epoch": 0.5917338709677419, "grad_norm": 0.07553452998399734, "learning_rate": 0.00016430640930907827, "loss": 1.6147, "step": 587 }, { "epoch": 0.592741935483871, "grad_norm": 0.07809963822364807, "learning_rate": 0.00016418192584094515, "loss": 1.5993, "step": 588 }, { "epoch": 0.59375, "grad_norm": 0.07688596844673157, "learning_rate": 0.00016405727301725377, "loss": 1.6019, "step": 589 }, { "epoch": 0.594758064516129, "grad_norm": 0.07611083984375, "learning_rate": 0.00016393245116692304, "loss": 1.5689, "step": 590 }, { "epoch": 0.5957661290322581, "grad_norm": 0.08132312446832657, "learning_rate": 0.00016380746061931786, "loss": 1.6307, "step": 591 }, { "epoch": 0.5967741935483871, "grad_norm": 0.07959824800491333, "learning_rate": 0.00016368230170424826, "loss": 1.5851, "step": 592 }, { "epoch": 0.5977822580645161, "grad_norm": 0.08210327476263046, "learning_rate": 0.0001635569747519686, "loss": 1.6139, "step": 593 }, { "epoch": 0.5987903225806451, "grad_norm": 0.1014091745018959, "learning_rate": 0.00016343148009317657, "loss": 1.564, "step": 594 }, { "epoch": 0.5997983870967742, "grad_norm": 0.08163224905729294, "learning_rate": 0.00016330581805901239, "loss": 1.5896, "step": 595 }, { "epoch": 0.6008064516129032, "grad_norm": 0.08205213397741318, "learning_rate": 0.00016317998898105797, "loss": 1.6271, "step": 596 }, { "epoch": 0.6018145161290323, "grad_norm": 0.07970026135444641, "learning_rate": 0.00016305399319133595, "loss": 1.6024, "step": 597 }, { "epoch": 0.6028225806451613, "grad_norm": 0.07718155533075333, "learning_rate": 0.00016292783102230888, "loss": 1.5951, "step": 598 }, { "epoch": 0.6038306451612904, "grad_norm": 0.09728401899337769, "learning_rate": 0.00016280150280687834, "loss": 1.6838, "step": 599 }, { "epoch": 0.6048387096774194, "grad_norm": 0.08184093236923218, "learning_rate": 0.00016267500887838412, "loss": 1.5902, "step": 600 }, { "epoch": 0.6058467741935484, "grad_norm": 0.08744041621685028, "learning_rate": 0.00016254834957060309, "loss": 1.6292, "step": 601 }, { "epoch": 0.6068548387096774, "grad_norm": 0.09200835227966309, "learning_rate": 0.00016242152521774874, "loss": 1.6393, "step": 602 }, { "epoch": 0.6078629032258065, "grad_norm": 0.08810313045978546, "learning_rate": 0.0001622945361544699, "loss": 1.6201, "step": 603 }, { "epoch": 0.6088709677419355, "grad_norm": 0.09700248390436172, "learning_rate": 0.00016216738271584999, "loss": 1.5638, "step": 604 }, { "epoch": 0.6098790322580645, "grad_norm": 0.08686663955450058, "learning_rate": 0.00016204006523740634, "loss": 1.5734, "step": 605 }, { "epoch": 0.6108870967741935, "grad_norm": 0.07873237133026123, "learning_rate": 0.00016191258405508896, "loss": 1.5469, "step": 606 }, { "epoch": 0.6118951612903226, "grad_norm": 0.08019126206636429, "learning_rate": 0.0001617849395052799, "loss": 1.6431, "step": 607 }, { "epoch": 0.6129032258064516, "grad_norm": 0.08971964567899704, "learning_rate": 0.00016165713192479227, "loss": 1.6535, "step": 608 }, { "epoch": 0.6139112903225806, "grad_norm": 0.07752855867147446, "learning_rate": 0.00016152916165086936, "loss": 1.5829, "step": 609 }, { "epoch": 0.6149193548387096, "grad_norm": 0.08348417282104492, "learning_rate": 0.00016140102902118377, "loss": 1.6305, "step": 610 }, { "epoch": 0.6159274193548387, "grad_norm": 0.0761261060833931, "learning_rate": 0.0001612727343738365, "loss": 1.5835, "step": 611 }, { "epoch": 0.6169354838709677, "grad_norm": 0.11013983935117722, "learning_rate": 0.00016114427804735603, "loss": 1.6364, "step": 612 }, { "epoch": 0.6179435483870968, "grad_norm": 0.086505226790905, "learning_rate": 0.00016101566038069756, "loss": 1.61, "step": 613 }, { "epoch": 0.6189516129032258, "grad_norm": 0.08692600578069687, "learning_rate": 0.00016088688171324184, "loss": 1.6153, "step": 614 }, { "epoch": 0.6199596774193549, "grad_norm": 0.09537503123283386, "learning_rate": 0.0001607579423847946, "loss": 1.6053, "step": 615 }, { "epoch": 0.6209677419354839, "grad_norm": 0.08204115927219391, "learning_rate": 0.00016062884273558545, "loss": 1.5939, "step": 616 }, { "epoch": 0.6219758064516129, "grad_norm": 0.08595214784145355, "learning_rate": 0.00016049958310626708, "loss": 1.6162, "step": 617 }, { "epoch": 0.6229838709677419, "grad_norm": 0.08318503201007843, "learning_rate": 0.00016037016383791425, "loss": 1.6401, "step": 618 }, { "epoch": 0.623991935483871, "grad_norm": 0.08207780867815018, "learning_rate": 0.00016024058527202298, "loss": 1.6226, "step": 619 }, { "epoch": 0.625, "grad_norm": 0.08268122375011444, "learning_rate": 0.00016011084775050959, "loss": 1.6522, "step": 620 }, { "epoch": 0.626008064516129, "grad_norm": 0.07751034945249557, "learning_rate": 0.00015998095161570995, "loss": 1.5455, "step": 621 }, { "epoch": 0.6270161290322581, "grad_norm": 0.08539839088916779, "learning_rate": 0.00015985089721037832, "loss": 1.6116, "step": 622 }, { "epoch": 0.6280241935483871, "grad_norm": 0.08065900206565857, "learning_rate": 0.00015972068487768665, "loss": 1.6102, "step": 623 }, { "epoch": 0.6290322580645161, "grad_norm": 0.07968778163194656, "learning_rate": 0.00015959031496122364, "loss": 1.6065, "step": 624 }, { "epoch": 0.6300403225806451, "grad_norm": 0.08040513843297958, "learning_rate": 0.00015945978780499375, "loss": 1.5974, "step": 625 }, { "epoch": 0.6310483870967742, "grad_norm": 0.0841718390583992, "learning_rate": 0.00015932910375341639, "loss": 1.5943, "step": 626 }, { "epoch": 0.6320564516129032, "grad_norm": 0.07834211736917496, "learning_rate": 0.0001591982631513249, "loss": 1.5856, "step": 627 }, { "epoch": 0.6330645161290323, "grad_norm": 0.08371677994728088, "learning_rate": 0.00015906726634396575, "loss": 1.5972, "step": 628 }, { "epoch": 0.6340725806451613, "grad_norm": 0.09251397848129272, "learning_rate": 0.00015893611367699762, "loss": 1.6529, "step": 629 }, { "epoch": 0.6350806451612904, "grad_norm": 0.080534428358078, "learning_rate": 0.00015880480549649038, "loss": 1.5786, "step": 630 }, { "epoch": 0.6360887096774194, "grad_norm": 0.09134898334741592, "learning_rate": 0.00015867334214892436, "loss": 1.6303, "step": 631 }, { "epoch": 0.6370967741935484, "grad_norm": 0.08673352748155594, "learning_rate": 0.00015854172398118913, "loss": 1.6281, "step": 632 }, { "epoch": 0.6381048387096774, "grad_norm": 0.11661474406719208, "learning_rate": 0.000158409951340583, "loss": 1.6826, "step": 633 }, { "epoch": 0.6391129032258065, "grad_norm": 0.08508265018463135, "learning_rate": 0.0001582780245748118, "loss": 1.5785, "step": 634 }, { "epoch": 0.6401209677419355, "grad_norm": 0.09865213930606842, "learning_rate": 0.00015814594403198794, "loss": 1.619, "step": 635 }, { "epoch": 0.6411290322580645, "grad_norm": 0.08882018178701401, "learning_rate": 0.00015801371006062982, "loss": 1.6076, "step": 636 }, { "epoch": 0.6421370967741935, "grad_norm": 0.10395356267690659, "learning_rate": 0.00015788132300966046, "loss": 1.6193, "step": 637 }, { "epoch": 0.6431451612903226, "grad_norm": 0.08556309342384338, "learning_rate": 0.00015774878322840694, "loss": 1.6313, "step": 638 }, { "epoch": 0.6441532258064516, "grad_norm": 0.08463555574417114, "learning_rate": 0.00015761609106659935, "loss": 1.5852, "step": 639 }, { "epoch": 0.6451612903225806, "grad_norm": 0.08253596723079681, "learning_rate": 0.0001574832468743698, "loss": 1.65, "step": 640 }, { "epoch": 0.6461693548387096, "grad_norm": 0.09345366060733795, "learning_rate": 0.0001573502510022516, "loss": 1.5869, "step": 641 }, { "epoch": 0.6471774193548387, "grad_norm": 0.08240879327058792, "learning_rate": 0.00015721710380117826, "loss": 1.6057, "step": 642 }, { "epoch": 0.6481854838709677, "grad_norm": 0.08767805248498917, "learning_rate": 0.0001570838056224827, "loss": 1.5864, "step": 643 }, { "epoch": 0.6491935483870968, "grad_norm": 0.08595956861972809, "learning_rate": 0.0001569503568178961, "loss": 1.593, "step": 644 }, { "epoch": 0.6502016129032258, "grad_norm": 0.0859324112534523, "learning_rate": 0.0001568167577395471, "loss": 1.6248, "step": 645 }, { "epoch": 0.6512096774193549, "grad_norm": 0.07949813455343246, "learning_rate": 0.00015668300873996095, "loss": 1.6269, "step": 646 }, { "epoch": 0.6522177419354839, "grad_norm": 0.08270735293626785, "learning_rate": 0.00015654911017205846, "loss": 1.6161, "step": 647 }, { "epoch": 0.6532258064516129, "grad_norm": 0.08057011663913727, "learning_rate": 0.000156415062389155, "loss": 1.615, "step": 648 }, { "epoch": 0.6542338709677419, "grad_norm": 0.07924232631921768, "learning_rate": 0.00015628086574495992, "loss": 1.5898, "step": 649 }, { "epoch": 0.655241935483871, "grad_norm": 0.08501306176185608, "learning_rate": 0.00015614652059357508, "loss": 1.6709, "step": 650 }, { "epoch": 0.65625, "grad_norm": 0.08682959526777267, "learning_rate": 0.00015601202728949436, "loss": 1.6214, "step": 651 }, { "epoch": 0.657258064516129, "grad_norm": 0.08149803429841995, "learning_rate": 0.00015587738618760258, "loss": 1.6337, "step": 652 }, { "epoch": 0.6582661290322581, "grad_norm": 0.09022454917430878, "learning_rate": 0.00015574259764317448, "loss": 1.5809, "step": 653 }, { "epoch": 0.6592741935483871, "grad_norm": 0.08189895004034042, "learning_rate": 0.00015560766201187386, "loss": 1.6188, "step": 654 }, { "epoch": 0.6602822580645161, "grad_norm": 0.080174021422863, "learning_rate": 0.00015547257964975273, "loss": 1.5991, "step": 655 }, { "epoch": 0.6612903225806451, "grad_norm": 0.08346089720726013, "learning_rate": 0.0001553373509132501, "loss": 1.5734, "step": 656 }, { "epoch": 0.6622983870967742, "grad_norm": 0.07657915353775024, "learning_rate": 0.00015520197615919145, "loss": 1.5422, "step": 657 }, { "epoch": 0.6633064516129032, "grad_norm": 0.08029603213071823, "learning_rate": 0.0001550664557447873, "loss": 1.5886, "step": 658 }, { "epoch": 0.6643145161290323, "grad_norm": 0.08529450744390488, "learning_rate": 0.0001549307900276327, "loss": 1.629, "step": 659 }, { "epoch": 0.6653225806451613, "grad_norm": 0.07882041484117508, "learning_rate": 0.0001547949793657061, "loss": 1.66, "step": 660 }, { "epoch": 0.6663306451612904, "grad_norm": 0.08514705300331116, "learning_rate": 0.00015465902411736828, "loss": 1.6113, "step": 661 }, { "epoch": 0.6673387096774194, "grad_norm": 0.07738941162824631, "learning_rate": 0.00015452292464136167, "loss": 1.5959, "step": 662 }, { "epoch": 0.6683467741935484, "grad_norm": 0.08031867444515228, "learning_rate": 0.0001543866812968092, "loss": 1.601, "step": 663 }, { "epoch": 0.6693548387096774, "grad_norm": 0.08055873215198517, "learning_rate": 0.00015425029444321347, "loss": 1.5731, "step": 664 }, { "epoch": 0.6703629032258065, "grad_norm": 0.08486857265233994, "learning_rate": 0.0001541137644404557, "loss": 1.5703, "step": 665 }, { "epoch": 0.6713709677419355, "grad_norm": 0.07934212684631348, "learning_rate": 0.0001539770916487949, "loss": 1.6163, "step": 666 }, { "epoch": 0.6723790322580645, "grad_norm": 0.08954691141843796, "learning_rate": 0.0001538402764288668, "loss": 1.6139, "step": 667 }, { "epoch": 0.6733870967741935, "grad_norm": 0.08842763304710388, "learning_rate": 0.00015370331914168296, "loss": 1.6322, "step": 668 }, { "epoch": 0.6743951612903226, "grad_norm": 0.08686459064483643, "learning_rate": 0.00015356622014862988, "loss": 1.59, "step": 669 }, { "epoch": 0.6754032258064516, "grad_norm": 0.07980991154909134, "learning_rate": 0.00015342897981146785, "loss": 1.576, "step": 670 }, { "epoch": 0.6764112903225806, "grad_norm": 0.08613515645265579, "learning_rate": 0.00015329159849233022, "loss": 1.6328, "step": 671 }, { "epoch": 0.6774193548387096, "grad_norm": 0.10668696463108063, "learning_rate": 0.0001531540765537223, "loss": 1.6482, "step": 672 }, { "epoch": 0.6784274193548387, "grad_norm": 0.07826445251703262, "learning_rate": 0.00015301641435852046, "loss": 1.5984, "step": 673 }, { "epoch": 0.6794354838709677, "grad_norm": 0.09749854356050491, "learning_rate": 0.00015287861226997125, "loss": 1.586, "step": 674 }, { "epoch": 0.6804435483870968, "grad_norm": 0.09301649779081345, "learning_rate": 0.00015274067065169017, "loss": 1.6806, "step": 675 }, { "epoch": 0.6814516129032258, "grad_norm": 0.08719351887702942, "learning_rate": 0.00015260258986766104, "loss": 1.5568, "step": 676 }, { "epoch": 0.6824596774193549, "grad_norm": 0.08005709946155548, "learning_rate": 0.00015246437028223486, "loss": 1.6252, "step": 677 }, { "epoch": 0.6834677419354839, "grad_norm": 0.08304545283317566, "learning_rate": 0.00015232601226012886, "loss": 1.6137, "step": 678 }, { "epoch": 0.6844758064516129, "grad_norm": 0.07949443906545639, "learning_rate": 0.0001521875161664256, "loss": 1.5808, "step": 679 }, { "epoch": 0.6854838709677419, "grad_norm": 0.08979618549346924, "learning_rate": 0.00015204888236657188, "loss": 1.6164, "step": 680 }, { "epoch": 0.686491935483871, "grad_norm": 0.07843173295259476, "learning_rate": 0.00015191011122637796, "loss": 1.6246, "step": 681 }, { "epoch": 0.6875, "grad_norm": 0.09026903659105301, "learning_rate": 0.00015177120311201647, "loss": 1.6352, "step": 682 }, { "epoch": 0.688508064516129, "grad_norm": 0.09385894238948822, "learning_rate": 0.00015163215839002146, "loss": 1.622, "step": 683 }, { "epoch": 0.6895161290322581, "grad_norm": 0.07961908727884293, "learning_rate": 0.0001514929774272874, "loss": 1.5745, "step": 684 }, { "epoch": 0.6905241935483871, "grad_norm": 0.08670490235090256, "learning_rate": 0.00015135366059106832, "loss": 1.5945, "step": 685 }, { "epoch": 0.6915322580645161, "grad_norm": 0.08476680517196655, "learning_rate": 0.00015121420824897678, "loss": 1.6316, "step": 686 }, { "epoch": 0.6925403225806451, "grad_norm": 0.0937148854136467, "learning_rate": 0.00015107462076898289, "loss": 1.6054, "step": 687 }, { "epoch": 0.6935483870967742, "grad_norm": 0.08981835842132568, "learning_rate": 0.00015093489851941328, "loss": 1.6683, "step": 688 }, { "epoch": 0.6945564516129032, "grad_norm": 0.08677362650632858, "learning_rate": 0.0001507950418689503, "loss": 1.6306, "step": 689 }, { "epoch": 0.6955645161290323, "grad_norm": 0.07769922912120819, "learning_rate": 0.00015065505118663078, "loss": 1.6164, "step": 690 }, { "epoch": 0.6965725806451613, "grad_norm": 0.08614321053028107, "learning_rate": 0.00015051492684184546, "loss": 1.5615, "step": 691 }, { "epoch": 0.6975806451612904, "grad_norm": 0.09230528026819229, "learning_rate": 0.00015037466920433753, "loss": 1.6901, "step": 692 }, { "epoch": 0.6985887096774194, "grad_norm": 0.09350752830505371, "learning_rate": 0.00015023427864420202, "loss": 1.6465, "step": 693 }, { "epoch": 0.6995967741935484, "grad_norm": 0.09468571841716766, "learning_rate": 0.00015009375553188468, "loss": 1.6485, "step": 694 }, { "epoch": 0.7006048387096774, "grad_norm": 0.08464954793453217, "learning_rate": 0.00014995310023818107, "loss": 1.5865, "step": 695 }, { "epoch": 0.7016129032258065, "grad_norm": 0.09060323238372803, "learning_rate": 0.00014981231313423545, "loss": 1.6074, "step": 696 }, { "epoch": 0.7026209677419355, "grad_norm": 0.08714771270751953, "learning_rate": 0.00014967139459153993, "loss": 1.5824, "step": 697 }, { "epoch": 0.7036290322580645, "grad_norm": 0.0776834785938263, "learning_rate": 0.00014953034498193341, "loss": 1.5689, "step": 698 }, { "epoch": 0.7046370967741935, "grad_norm": 0.08315813541412354, "learning_rate": 0.0001493891646776007, "loss": 1.6187, "step": 699 }, { "epoch": 0.7056451612903226, "grad_norm": 0.07914920896291733, "learning_rate": 0.00014924785405107143, "loss": 1.5417, "step": 700 }, { "epoch": 0.7066532258064516, "grad_norm": 0.08314627408981323, "learning_rate": 0.00014910641347521907, "loss": 1.6298, "step": 701 }, { "epoch": 0.7076612903225806, "grad_norm": 0.07665257155895233, "learning_rate": 0.0001489648433232601, "loss": 1.5464, "step": 702 }, { "epoch": 0.7086693548387096, "grad_norm": 0.09670589119195938, "learning_rate": 0.00014882314396875274, "loss": 1.654, "step": 703 }, { "epoch": 0.7096774193548387, "grad_norm": 0.08459917455911636, "learning_rate": 0.00014868131578559633, "loss": 1.6326, "step": 704 }, { "epoch": 0.7106854838709677, "grad_norm": 0.08236029744148254, "learning_rate": 0.00014853935914802994, "loss": 1.59, "step": 705 }, { "epoch": 0.7116935483870968, "grad_norm": 0.07780009508132935, "learning_rate": 0.0001483972744306318, "loss": 1.5801, "step": 706 }, { "epoch": 0.7127016129032258, "grad_norm": 0.0835953950881958, "learning_rate": 0.00014825506200831794, "loss": 1.5765, "step": 707 }, { "epoch": 0.7137096774193549, "grad_norm": 0.08014727383852005, "learning_rate": 0.00014811272225634145, "loss": 1.6156, "step": 708 }, { "epoch": 0.7147177419354839, "grad_norm": 0.08108653128147125, "learning_rate": 0.00014797025555029133, "loss": 1.5825, "step": 709 }, { "epoch": 0.7157258064516129, "grad_norm": 0.08455085754394531, "learning_rate": 0.00014782766226609166, "loss": 1.6218, "step": 710 }, { "epoch": 0.7167338709677419, "grad_norm": 0.07630985975265503, "learning_rate": 0.00014768494278000048, "loss": 1.5889, "step": 711 }, { "epoch": 0.717741935483871, "grad_norm": 0.08318428695201874, "learning_rate": 0.00014754209746860878, "loss": 1.5827, "step": 712 }, { "epoch": 0.71875, "grad_norm": 0.08248715102672577, "learning_rate": 0.00014739912670883967, "loss": 1.621, "step": 713 }, { "epoch": 0.719758064516129, "grad_norm": 0.07857991755008698, "learning_rate": 0.00014725603087794716, "loss": 1.5605, "step": 714 }, { "epoch": 0.7207661290322581, "grad_norm": 0.08540824055671692, "learning_rate": 0.0001471128103535154, "loss": 1.5471, "step": 715 }, { "epoch": 0.7217741935483871, "grad_norm": 0.0777583196759224, "learning_rate": 0.00014696946551345747, "loss": 1.5029, "step": 716 }, { "epoch": 0.7227822580645161, "grad_norm": 0.08295831829309464, "learning_rate": 0.00014682599673601458, "loss": 1.5709, "step": 717 }, { "epoch": 0.7237903225806451, "grad_norm": 0.08069245517253876, "learning_rate": 0.00014668240439975482, "loss": 1.5601, "step": 718 }, { "epoch": 0.7247983870967742, "grad_norm": 0.08142071962356567, "learning_rate": 0.00014653868888357249, "loss": 1.6004, "step": 719 }, { "epoch": 0.7258064516129032, "grad_norm": 0.09048129618167877, "learning_rate": 0.0001463948505666868, "loss": 1.6614, "step": 720 }, { "epoch": 0.7268145161290323, "grad_norm": 0.09065764397382736, "learning_rate": 0.00014625088982864098, "loss": 1.6612, "step": 721 }, { "epoch": 0.7278225806451613, "grad_norm": 0.0859372541308403, "learning_rate": 0.00014610680704930142, "loss": 1.5914, "step": 722 }, { "epoch": 0.7288306451612904, "grad_norm": 0.0821571797132492, "learning_rate": 0.0001459626026088564, "loss": 1.5458, "step": 723 }, { "epoch": 0.7298387096774194, "grad_norm": 0.08414388447999954, "learning_rate": 0.0001458182768878153, "loss": 1.5608, "step": 724 }, { "epoch": 0.7308467741935484, "grad_norm": 0.08222994953393936, "learning_rate": 0.00014567383026700752, "loss": 1.5943, "step": 725 }, { "epoch": 0.7318548387096774, "grad_norm": 0.08996201306581497, "learning_rate": 0.0001455292631275814, "loss": 1.5524, "step": 726 }, { "epoch": 0.7328629032258065, "grad_norm": 0.08061891794204712, "learning_rate": 0.0001453845758510034, "loss": 1.6428, "step": 727 }, { "epoch": 0.7338709677419355, "grad_norm": 0.09720771759748459, "learning_rate": 0.0001452397688190569, "loss": 1.6538, "step": 728 }, { "epoch": 0.7348790322580645, "grad_norm": 0.08087541162967682, "learning_rate": 0.00014509484241384134, "loss": 1.6078, "step": 729 }, { "epoch": 0.7358870967741935, "grad_norm": 0.09106358885765076, "learning_rate": 0.00014494979701777102, "loss": 1.589, "step": 730 }, { "epoch": 0.7368951612903226, "grad_norm": 0.07827623188495636, "learning_rate": 0.00014480463301357445, "loss": 1.5937, "step": 731 }, { "epoch": 0.7379032258064516, "grad_norm": 0.09681122750043869, "learning_rate": 0.00014465935078429286, "loss": 1.6308, "step": 732 }, { "epoch": 0.7389112903225806, "grad_norm": 0.0876043364405632, "learning_rate": 0.00014451395071327964, "loss": 1.6136, "step": 733 }, { "epoch": 0.7399193548387096, "grad_norm": 0.10326588153839111, "learning_rate": 0.00014436843318419896, "loss": 1.5964, "step": 734 }, { "epoch": 0.7409274193548387, "grad_norm": 0.08790312707424164, "learning_rate": 0.00014422279858102504, "loss": 1.5992, "step": 735 }, { "epoch": 0.7419354838709677, "grad_norm": 0.0805894061923027, "learning_rate": 0.00014407704728804097, "loss": 1.5503, "step": 736 }, { "epoch": 0.7429435483870968, "grad_norm": 0.0813809409737587, "learning_rate": 0.00014393117968983777, "loss": 1.5807, "step": 737 }, { "epoch": 0.7439516129032258, "grad_norm": 0.0871429443359375, "learning_rate": 0.0001437851961713133, "loss": 1.6493, "step": 738 }, { "epoch": 0.7449596774193549, "grad_norm": 0.08929460495710373, "learning_rate": 0.0001436390971176714, "loss": 1.58, "step": 739 }, { "epoch": 0.7459677419354839, "grad_norm": 0.08278234302997589, "learning_rate": 0.0001434928829144206, "loss": 1.6442, "step": 740 }, { "epoch": 0.7469758064516129, "grad_norm": 0.09997319430112839, "learning_rate": 0.00014334655394737355, "loss": 1.5756, "step": 741 }, { "epoch": 0.7479838709677419, "grad_norm": 0.07914005219936371, "learning_rate": 0.0001432001106026454, "loss": 1.5642, "step": 742 }, { "epoch": 0.748991935483871, "grad_norm": 0.09618489444255829, "learning_rate": 0.00014305355326665339, "loss": 1.6108, "step": 743 }, { "epoch": 0.75, "grad_norm": 0.09149473160505295, "learning_rate": 0.00014290688232611526, "loss": 1.6007, "step": 744 }, { "epoch": 0.751008064516129, "grad_norm": 0.08550098538398743, "learning_rate": 0.00014276009816804885, "loss": 1.588, "step": 745 }, { "epoch": 0.7520161290322581, "grad_norm": 0.08285672217607498, "learning_rate": 0.00014261320117977042, "loss": 1.5845, "step": 746 }, { "epoch": 0.7530241935483871, "grad_norm": 0.09440962970256805, "learning_rate": 0.00014246619174889422, "loss": 1.7127, "step": 747 }, { "epoch": 0.7540322580645161, "grad_norm": 0.08045286685228348, "learning_rate": 0.00014231907026333098, "loss": 1.6066, "step": 748 }, { "epoch": 0.7550403225806451, "grad_norm": 0.08301718533039093, "learning_rate": 0.0001421718371112873, "loss": 1.5732, "step": 749 }, { "epoch": 0.7560483870967742, "grad_norm": 0.08225584775209427, "learning_rate": 0.00014202449268126426, "loss": 1.563, "step": 750 }, { "epoch": 0.7570564516129032, "grad_norm": 0.08871738612651825, "learning_rate": 0.00014187703736205667, "loss": 1.6364, "step": 751 }, { "epoch": 0.7580645161290323, "grad_norm": 0.08189701288938522, "learning_rate": 0.00014172947154275195, "loss": 1.5972, "step": 752 }, { "epoch": 0.7590725806451613, "grad_norm": 0.08560924977064133, "learning_rate": 0.00014158179561272907, "loss": 1.5971, "step": 753 }, { "epoch": 0.7600806451612904, "grad_norm": 0.08616410940885544, "learning_rate": 0.00014143400996165746, "loss": 1.6331, "step": 754 }, { "epoch": 0.7610887096774194, "grad_norm": 0.08963197469711304, "learning_rate": 0.00014128611497949626, "loss": 1.5887, "step": 755 }, { "epoch": 0.7620967741935484, "grad_norm": 0.09272851049900055, "learning_rate": 0.0001411381110564929, "loss": 1.5692, "step": 756 }, { "epoch": 0.7631048387096774, "grad_norm": 0.08667407929897308, "learning_rate": 0.0001409899985831824, "loss": 1.5852, "step": 757 }, { "epoch": 0.7641129032258065, "grad_norm": 0.08354497700929642, "learning_rate": 0.00014084177795038613, "loss": 1.6024, "step": 758 }, { "epoch": 0.7651209677419355, "grad_norm": 0.09121601283550262, "learning_rate": 0.00014069344954921096, "loss": 1.5896, "step": 759 }, { "epoch": 0.7661290322580645, "grad_norm": 0.09622003138065338, "learning_rate": 0.00014054501377104797, "loss": 1.5781, "step": 760 }, { "epoch": 0.7671370967741935, "grad_norm": 0.08506747335195541, "learning_rate": 0.00014039647100757177, "loss": 1.5752, "step": 761 }, { "epoch": 0.7681451612903226, "grad_norm": 0.09725549817085266, "learning_rate": 0.00014024782165073912, "loss": 1.599, "step": 762 }, { "epoch": 0.7691532258064516, "grad_norm": 0.08023160696029663, "learning_rate": 0.00014009906609278806, "loss": 1.5503, "step": 763 }, { "epoch": 0.7701612903225806, "grad_norm": 0.092674620449543, "learning_rate": 0.00013995020472623693, "loss": 1.6196, "step": 764 }, { "epoch": 0.7711693548387096, "grad_norm": 0.07756571471691132, "learning_rate": 0.0001398012379438832, "loss": 1.599, "step": 765 }, { "epoch": 0.7721774193548387, "grad_norm": 0.09609861671924591, "learning_rate": 0.00013965216613880257, "loss": 1.6356, "step": 766 }, { "epoch": 0.7731854838709677, "grad_norm": 0.08073242753744125, "learning_rate": 0.00013950298970434775, "loss": 1.5975, "step": 767 }, { "epoch": 0.7741935483870968, "grad_norm": 0.08342421054840088, "learning_rate": 0.00013935370903414768, "loss": 1.594, "step": 768 }, { "epoch": 0.7752016129032258, "grad_norm": 0.07886181771755219, "learning_rate": 0.00013920432452210619, "loss": 1.5947, "step": 769 }, { "epoch": 0.7762096774193549, "grad_norm": 0.08256496489048004, "learning_rate": 0.00013905483656240125, "loss": 1.5772, "step": 770 }, { "epoch": 0.7772177419354839, "grad_norm": 0.08527923375368118, "learning_rate": 0.0001389052455494837, "loss": 1.5936, "step": 771 }, { "epoch": 0.7782258064516129, "grad_norm": 0.08340179920196533, "learning_rate": 0.00013875555187807637, "loss": 1.5786, "step": 772 }, { "epoch": 0.7792338709677419, "grad_norm": 0.07682585716247559, "learning_rate": 0.00013860575594317292, "loss": 1.542, "step": 773 }, { "epoch": 0.780241935483871, "grad_norm": 0.08884165436029434, "learning_rate": 0.00013845585814003684, "loss": 1.5969, "step": 774 }, { "epoch": 0.78125, "grad_norm": 0.07785353809595108, "learning_rate": 0.00013830585886420054, "loss": 1.5671, "step": 775 }, { "epoch": 0.782258064516129, "grad_norm": 0.08034134656190872, "learning_rate": 0.000138155758511464, "loss": 1.5774, "step": 776 }, { "epoch": 0.7832661290322581, "grad_norm": 0.0796407014131546, "learning_rate": 0.0001380055574778941, "loss": 1.5606, "step": 777 }, { "epoch": 0.7842741935483871, "grad_norm": 0.07933478057384491, "learning_rate": 0.00013785525615982319, "loss": 1.5651, "step": 778 }, { "epoch": 0.7852822580645161, "grad_norm": 0.08734553307294846, "learning_rate": 0.00013770485495384843, "loss": 1.6262, "step": 779 }, { "epoch": 0.7862903225806451, "grad_norm": 0.08349025249481201, "learning_rate": 0.0001375543542568304, "loss": 1.5835, "step": 780 }, { "epoch": 0.7872983870967742, "grad_norm": 0.09640732407569885, "learning_rate": 0.00013740375446589232, "loss": 1.586, "step": 781 }, { "epoch": 0.7883064516129032, "grad_norm": 0.09520639479160309, "learning_rate": 0.00013725305597841878, "loss": 1.6521, "step": 782 }, { "epoch": 0.7893145161290323, "grad_norm": 0.07939834147691727, "learning_rate": 0.00013710225919205484, "loss": 1.5062, "step": 783 }, { "epoch": 0.7903225806451613, "grad_norm": 0.08648645132780075, "learning_rate": 0.000136951364504705, "loss": 1.6303, "step": 784 }, { "epoch": 0.7913306451612904, "grad_norm": 0.09467138350009918, "learning_rate": 0.00013680037231453203, "loss": 1.6333, "step": 785 }, { "epoch": 0.7923387096774194, "grad_norm": 0.08505504578351974, "learning_rate": 0.000136649283019956, "loss": 1.5953, "step": 786 }, { "epoch": 0.7933467741935484, "grad_norm": 0.0903257429599762, "learning_rate": 0.00013649809701965311, "loss": 1.5841, "step": 787 }, { "epoch": 0.7943548387096774, "grad_norm": 0.08327475190162659, "learning_rate": 0.00013634681471255493, "loss": 1.578, "step": 788 }, { "epoch": 0.7953629032258065, "grad_norm": 0.09311467409133911, "learning_rate": 0.000136195436497847, "loss": 1.5911, "step": 789 }, { "epoch": 0.7963709677419355, "grad_norm": 0.09214780479669571, "learning_rate": 0.00013604396277496796, "loss": 1.6009, "step": 790 }, { "epoch": 0.7973790322580645, "grad_norm": 0.08812731504440308, "learning_rate": 0.00013589239394360848, "loss": 1.6141, "step": 791 }, { "epoch": 0.7983870967741935, "grad_norm": 0.11389174312353134, "learning_rate": 0.00013574073040371022, "loss": 1.6369, "step": 792 }, { "epoch": 0.7993951612903226, "grad_norm": 0.08469700813293457, "learning_rate": 0.00013558897255546473, "loss": 1.6009, "step": 793 }, { "epoch": 0.8004032258064516, "grad_norm": 0.08306135982275009, "learning_rate": 0.0001354371207993123, "loss": 1.5556, "step": 794 }, { "epoch": 0.8014112903225806, "grad_norm": 0.08287226408720016, "learning_rate": 0.00013528517553594124, "loss": 1.571, "step": 795 }, { "epoch": 0.8024193548387096, "grad_norm": 0.0797332376241684, "learning_rate": 0.00013513313716628637, "loss": 1.5679, "step": 796 }, { "epoch": 0.8034274193548387, "grad_norm": 0.07978206872940063, "learning_rate": 0.0001349810060915283, "loss": 1.5865, "step": 797 }, { "epoch": 0.8044354838709677, "grad_norm": 0.07792511582374573, "learning_rate": 0.00013482878271309226, "loss": 1.5849, "step": 798 }, { "epoch": 0.8054435483870968, "grad_norm": 0.07994278520345688, "learning_rate": 0.000134676467432647, "loss": 1.6026, "step": 799 }, { "epoch": 0.8064516129032258, "grad_norm": 0.08317188918590546, "learning_rate": 0.00013452406065210382, "loss": 1.6333, "step": 800 }, { "epoch": 0.8074596774193549, "grad_norm": 0.09058106690645218, "learning_rate": 0.00013437156277361538, "loss": 1.5936, "step": 801 }, { "epoch": 0.8084677419354839, "grad_norm": 0.08963512629270554, "learning_rate": 0.00013421897419957482, "loss": 1.6422, "step": 802 }, { "epoch": 0.8094758064516129, "grad_norm": 0.09142173826694489, "learning_rate": 0.0001340662953326145, "loss": 1.6779, "step": 803 }, { "epoch": 0.8104838709677419, "grad_norm": 0.08868789672851562, "learning_rate": 0.00013391352657560513, "loss": 1.6594, "step": 804 }, { "epoch": 0.811491935483871, "grad_norm": 0.08746343106031418, "learning_rate": 0.0001337606683316545, "loss": 1.5312, "step": 805 }, { "epoch": 0.8125, "grad_norm": 0.07589108496904373, "learning_rate": 0.00013360772100410665, "loss": 1.5462, "step": 806 }, { "epoch": 0.813508064516129, "grad_norm": 0.0817432850599289, "learning_rate": 0.00013345468499654056, "loss": 1.5393, "step": 807 }, { "epoch": 0.8145161290322581, "grad_norm": 0.07965264469385147, "learning_rate": 0.00013330156071276932, "loss": 1.5687, "step": 808 }, { "epoch": 0.8155241935483871, "grad_norm": 0.08861200511455536, "learning_rate": 0.00013314834855683886, "loss": 1.6412, "step": 809 }, { "epoch": 0.8165322580645161, "grad_norm": 0.07894746214151382, "learning_rate": 0.00013299504893302705, "loss": 1.5738, "step": 810 }, { "epoch": 0.8175403225806451, "grad_norm": 0.07987947016954422, "learning_rate": 0.00013284166224584253, "loss": 1.6212, "step": 811 }, { "epoch": 0.8185483870967742, "grad_norm": 0.09027516096830368, "learning_rate": 0.0001326881889000236, "loss": 1.6113, "step": 812 }, { "epoch": 0.8195564516129032, "grad_norm": 0.11448541283607483, "learning_rate": 0.00013253462930053742, "loss": 1.6315, "step": 813 }, { "epoch": 0.8205645161290323, "grad_norm": 0.08771926164627075, "learning_rate": 0.00013238098385257848, "loss": 1.5919, "step": 814 }, { "epoch": 0.8215725806451613, "grad_norm": 0.09016083925962448, "learning_rate": 0.00013222725296156807, "loss": 1.5629, "step": 815 }, { "epoch": 0.8225806451612904, "grad_norm": 0.08411089330911636, "learning_rate": 0.0001320734370331527, "loss": 1.6037, "step": 816 }, { "epoch": 0.8235887096774194, "grad_norm": 0.09559720754623413, "learning_rate": 0.0001319195364732034, "loss": 1.5463, "step": 817 }, { "epoch": 0.8245967741935484, "grad_norm": 0.10408146679401398, "learning_rate": 0.00013176555168781451, "loss": 1.5768, "step": 818 }, { "epoch": 0.8256048387096774, "grad_norm": 0.09700962156057358, "learning_rate": 0.00013161148308330257, "loss": 1.5739, "step": 819 }, { "epoch": 0.8266129032258065, "grad_norm": 0.10024348646402359, "learning_rate": 0.00013145733106620532, "loss": 1.6281, "step": 820 }, { "epoch": 0.8276209677419355, "grad_norm": 0.09777159988880157, "learning_rate": 0.00013130309604328057, "loss": 1.6059, "step": 821 }, { "epoch": 0.8286290322580645, "grad_norm": 0.0887807309627533, "learning_rate": 0.00013114877842150516, "loss": 1.5857, "step": 822 }, { "epoch": 0.8296370967741935, "grad_norm": 0.09031641483306885, "learning_rate": 0.000130994378608074, "loss": 1.5523, "step": 823 }, { "epoch": 0.8306451612903226, "grad_norm": 0.0985943153500557, "learning_rate": 0.00013083989701039868, "loss": 1.5464, "step": 824 }, { "epoch": 0.8316532258064516, "grad_norm": 0.09250693768262863, "learning_rate": 0.0001306853340361067, "loss": 1.5564, "step": 825 }, { "epoch": 0.8326612903225806, "grad_norm": 0.10353913903236389, "learning_rate": 0.0001305306900930403, "loss": 1.6126, "step": 826 }, { "epoch": 0.8336693548387096, "grad_norm": 0.10408423840999603, "learning_rate": 0.00013037596558925532, "loss": 1.5946, "step": 827 }, { "epoch": 0.8346774193548387, "grad_norm": 0.09186139702796936, "learning_rate": 0.00013022116093302022, "loss": 1.5692, "step": 828 }, { "epoch": 0.8356854838709677, "grad_norm": 0.08551473915576935, "learning_rate": 0.00013006627653281493, "loss": 1.5486, "step": 829 }, { "epoch": 0.8366935483870968, "grad_norm": 0.0928485244512558, "learning_rate": 0.0001299113127973298, "loss": 1.5435, "step": 830 }, { "epoch": 0.8377016129032258, "grad_norm": 0.08251947164535522, "learning_rate": 0.00012975627013546453, "loss": 1.5519, "step": 831 }, { "epoch": 0.8387096774193549, "grad_norm": 0.09292181581258774, "learning_rate": 0.0001296011489563271, "loss": 1.6129, "step": 832 }, { "epoch": 0.8397177419354839, "grad_norm": 0.07900629937648773, "learning_rate": 0.00012944594966923263, "loss": 1.5951, "step": 833 }, { "epoch": 0.8407258064516129, "grad_norm": 0.08966945856809616, "learning_rate": 0.00012929067268370234, "loss": 1.5484, "step": 834 }, { "epoch": 0.8417338709677419, "grad_norm": 0.08244184404611588, "learning_rate": 0.00012913531840946248, "loss": 1.5852, "step": 835 }, { "epoch": 0.842741935483871, "grad_norm": 0.0986471101641655, "learning_rate": 0.00012897988725644335, "loss": 1.5797, "step": 836 }, { "epoch": 0.84375, "grad_norm": 0.09217972308397293, "learning_rate": 0.0001288243796347779, "loss": 1.6433, "step": 837 }, { "epoch": 0.844758064516129, "grad_norm": 0.07959865033626556, "learning_rate": 0.00012866879595480098, "loss": 1.5639, "step": 838 }, { "epoch": 0.8457661290322581, "grad_norm": 0.08987965434789658, "learning_rate": 0.0001285131366270482, "loss": 1.567, "step": 839 }, { "epoch": 0.8467741935483871, "grad_norm": 0.08139210939407349, "learning_rate": 0.00012835740206225464, "loss": 1.5881, "step": 840 }, { "epoch": 0.8477822580645161, "grad_norm": 0.09342298656702042, "learning_rate": 0.00012820159267135396, "loss": 1.6147, "step": 841 }, { "epoch": 0.8487903225806451, "grad_norm": 0.08475241810083389, "learning_rate": 0.0001280457088654773, "loss": 1.6063, "step": 842 }, { "epoch": 0.8497983870967742, "grad_norm": 0.0910174772143364, "learning_rate": 0.00012788975105595214, "loss": 1.6055, "step": 843 }, { "epoch": 0.8508064516129032, "grad_norm": 0.08082278817892075, "learning_rate": 0.00012773371965430115, "loss": 1.5668, "step": 844 }, { "epoch": 0.8518145161290323, "grad_norm": 0.0862516313791275, "learning_rate": 0.00012757761507224132, "loss": 1.5415, "step": 845 }, { "epoch": 0.8528225806451613, "grad_norm": 0.07902859151363373, "learning_rate": 0.00012742143772168264, "loss": 1.5333, "step": 846 }, { "epoch": 0.8538306451612904, "grad_norm": 0.090780109167099, "learning_rate": 0.00012726518801472718, "loss": 1.6311, "step": 847 }, { "epoch": 0.8548387096774194, "grad_norm": 0.08239061385393143, "learning_rate": 0.0001271088663636679, "loss": 1.5331, "step": 848 }, { "epoch": 0.8558467741935484, "grad_norm": 0.08999927341938019, "learning_rate": 0.0001269524731809875, "loss": 1.5775, "step": 849 }, { "epoch": 0.8568548387096774, "grad_norm": 0.07954005897045135, "learning_rate": 0.00012679600887935768, "loss": 1.5969, "step": 850 }, { "epoch": 0.8578629032258065, "grad_norm": 0.08286864310503006, "learning_rate": 0.00012663947387163755, "loss": 1.551, "step": 851 }, { "epoch": 0.8588709677419355, "grad_norm": 0.08236175030469894, "learning_rate": 0.00012648286857087294, "loss": 1.5575, "step": 852 }, { "epoch": 0.8598790322580645, "grad_norm": 0.08063997328281403, "learning_rate": 0.00012632619339029508, "loss": 1.5899, "step": 853 }, { "epoch": 0.8608870967741935, "grad_norm": 0.08329153805971146, "learning_rate": 0.00012616944874331963, "loss": 1.5523, "step": 854 }, { "epoch": 0.8618951612903226, "grad_norm": 0.08181768655776978, "learning_rate": 0.00012601263504354555, "loss": 1.5743, "step": 855 }, { "epoch": 0.8629032258064516, "grad_norm": 0.07989370822906494, "learning_rate": 0.00012585575270475402, "loss": 1.5629, "step": 856 }, { "epoch": 0.8639112903225806, "grad_norm": 0.0804544985294342, "learning_rate": 0.00012569880214090726, "loss": 1.5573, "step": 857 }, { "epoch": 0.8649193548387096, "grad_norm": 0.08739953488111496, "learning_rate": 0.0001255417837661476, "loss": 1.5705, "step": 858 }, { "epoch": 0.8659274193548387, "grad_norm": 0.08386445045471191, "learning_rate": 0.00012538469799479627, "loss": 1.6106, "step": 859 }, { "epoch": 0.8669354838709677, "grad_norm": 0.10252925008535385, "learning_rate": 0.00012522754524135228, "loss": 1.5472, "step": 860 }, { "epoch": 0.8679435483870968, "grad_norm": 0.08197301626205444, "learning_rate": 0.0001250703259204916, "loss": 1.5955, "step": 861 }, { "epoch": 0.8689516129032258, "grad_norm": 0.09445837140083313, "learning_rate": 0.00012491304044706553, "loss": 1.5536, "step": 862 }, { "epoch": 0.8699596774193549, "grad_norm": 0.0779092088341713, "learning_rate": 0.00012475568923610015, "loss": 1.5235, "step": 863 }, { "epoch": 0.8709677419354839, "grad_norm": 0.08657954633235931, "learning_rate": 0.00012459827270279499, "loss": 1.5306, "step": 864 }, { "epoch": 0.8719758064516129, "grad_norm": 0.08000969886779785, "learning_rate": 0.0001244407912625218, "loss": 1.5451, "step": 865 }, { "epoch": 0.8729838709677419, "grad_norm": 0.1217707023024559, "learning_rate": 0.00012428324533082376, "loss": 1.5896, "step": 866 }, { "epoch": 0.873991935483871, "grad_norm": 0.09770061075687408, "learning_rate": 0.00012412563532341413, "loss": 1.5649, "step": 867 }, { "epoch": 0.875, "grad_norm": 0.08925329893827438, "learning_rate": 0.0001239679616561753, "loss": 1.59, "step": 868 }, { "epoch": 0.876008064516129, "grad_norm": 0.0919514149427414, "learning_rate": 0.0001238102247451575, "loss": 1.6517, "step": 869 }, { "epoch": 0.8770161290322581, "grad_norm": 0.0922718271613121, "learning_rate": 0.0001236524250065781, "loss": 1.6104, "step": 870 }, { "epoch": 0.8780241935483871, "grad_norm": 0.08782748132944107, "learning_rate": 0.00012349456285682002, "loss": 1.6027, "step": 871 }, { "epoch": 0.8790322580645161, "grad_norm": 0.08689384907484055, "learning_rate": 0.00012333663871243094, "loss": 1.5969, "step": 872 }, { "epoch": 0.8800403225806451, "grad_norm": 0.08294008672237396, "learning_rate": 0.00012317865299012212, "loss": 1.5852, "step": 873 }, { "epoch": 0.8810483870967742, "grad_norm": 0.1106681302189827, "learning_rate": 0.00012302060610676737, "loss": 1.622, "step": 874 }, { "epoch": 0.8820564516129032, "grad_norm": 0.10415118932723999, "learning_rate": 0.00012286249847940178, "loss": 1.6416, "step": 875 }, { "epoch": 0.8830645161290323, "grad_norm": 0.08293262124061584, "learning_rate": 0.00012270433052522073, "loss": 1.5963, "step": 876 }, { "epoch": 0.8840725806451613, "grad_norm": 0.09230700880289078, "learning_rate": 0.0001225461026615789, "loss": 1.6242, "step": 877 }, { "epoch": 0.8850806451612904, "grad_norm": 0.08799263834953308, "learning_rate": 0.00012238781530598896, "loss": 1.5607, "step": 878 }, { "epoch": 0.8860887096774194, "grad_norm": 0.08640427887439728, "learning_rate": 0.00012222946887612056, "loss": 1.6114, "step": 879 }, { "epoch": 0.8870967741935484, "grad_norm": 0.08553026616573334, "learning_rate": 0.0001220710637897992, "loss": 1.5549, "step": 880 }, { "epoch": 0.8881048387096774, "grad_norm": 0.0878986194729805, "learning_rate": 0.00012191260046500525, "loss": 1.5697, "step": 881 }, { "epoch": 0.8891129032258065, "grad_norm": 0.08509572595357895, "learning_rate": 0.00012175407931987273, "loss": 1.6237, "step": 882 }, { "epoch": 0.8901209677419355, "grad_norm": 0.09629905223846436, "learning_rate": 0.0001215955007726881, "loss": 1.5869, "step": 883 }, { "epoch": 0.8911290322580645, "grad_norm": 0.07942201942205429, "learning_rate": 0.00012143686524188954, "loss": 1.5933, "step": 884 }, { "epoch": 0.8921370967741935, "grad_norm": 0.0878920629620552, "learning_rate": 0.00012127817314606526, "loss": 1.5485, "step": 885 }, { "epoch": 0.8931451612903226, "grad_norm": 0.07961869984865189, "learning_rate": 0.00012111942490395305, "loss": 1.571, "step": 886 }, { "epoch": 0.8941532258064516, "grad_norm": 0.08690143376588821, "learning_rate": 0.00012096062093443863, "loss": 1.5437, "step": 887 }, { "epoch": 0.8951612903225806, "grad_norm": 0.08331328630447388, "learning_rate": 0.00012080176165655488, "loss": 1.5967, "step": 888 }, { "epoch": 0.8961693548387096, "grad_norm": 0.08849766850471497, "learning_rate": 0.00012064284748948053, "loss": 1.6156, "step": 889 }, { "epoch": 0.8971774193548387, "grad_norm": 0.08413555473089218, "learning_rate": 0.00012048387885253925, "loss": 1.5603, "step": 890 }, { "epoch": 0.8981854838709677, "grad_norm": 0.08616600930690765, "learning_rate": 0.0001203248561651984, "loss": 1.5682, "step": 891 }, { "epoch": 0.8991935483870968, "grad_norm": 0.08520584553480148, "learning_rate": 0.00012016577984706792, "loss": 1.6327, "step": 892 }, { "epoch": 0.9002016129032258, "grad_norm": 0.08620157837867737, "learning_rate": 0.0001200066503178993, "loss": 1.6143, "step": 893 }, { "epoch": 0.9012096774193549, "grad_norm": 0.07895144820213318, "learning_rate": 0.00011984746799758442, "loss": 1.5533, "step": 894 }, { "epoch": 0.9022177419354839, "grad_norm": 0.08743470162153244, "learning_rate": 0.0001196882333061545, "loss": 1.6004, "step": 895 }, { "epoch": 0.9032258064516129, "grad_norm": 0.08172673732042313, "learning_rate": 0.0001195289466637789, "loss": 1.6032, "step": 896 }, { "epoch": 0.9042338709677419, "grad_norm": 0.09668843448162079, "learning_rate": 0.00011936960849076411, "loss": 1.6198, "step": 897 }, { "epoch": 0.905241935483871, "grad_norm": 0.08503922075033188, "learning_rate": 0.00011921021920755253, "loss": 1.5638, "step": 898 }, { "epoch": 0.90625, "grad_norm": 0.0889093279838562, "learning_rate": 0.00011905077923472146, "loss": 1.624, "step": 899 }, { "epoch": 0.907258064516129, "grad_norm": 0.08409906178712845, "learning_rate": 0.00011889128899298198, "loss": 1.5562, "step": 900 }, { "epoch": 0.9082661290322581, "grad_norm": 0.08293265849351883, "learning_rate": 0.00011873174890317775, "loss": 1.5709, "step": 901 }, { "epoch": 0.9092741935483871, "grad_norm": 0.09479732066392899, "learning_rate": 0.00011857215938628403, "loss": 1.6222, "step": 902 }, { "epoch": 0.9102822580645161, "grad_norm": 0.08044169843196869, "learning_rate": 0.00011841252086340649, "loss": 1.5862, "step": 903 }, { "epoch": 0.9112903225806451, "grad_norm": 0.08543860912322998, "learning_rate": 0.00011825283375578005, "loss": 1.5764, "step": 904 }, { "epoch": 0.9122983870967742, "grad_norm": 0.08160272240638733, "learning_rate": 0.0001180930984847679, "loss": 1.5204, "step": 905 }, { "epoch": 0.9133064516129032, "grad_norm": 0.10486453771591187, "learning_rate": 0.00011793331547186026, "loss": 1.5921, "step": 906 }, { "epoch": 0.9143145161290323, "grad_norm": 0.0780840739607811, "learning_rate": 0.00011777348513867341, "loss": 1.5173, "step": 907 }, { "epoch": 0.9153225806451613, "grad_norm": 0.08347219228744507, "learning_rate": 0.00011761360790694837, "loss": 1.5543, "step": 908 }, { "epoch": 0.9163306451612904, "grad_norm": 0.09629109501838684, "learning_rate": 0.00011745368419855005, "loss": 1.6039, "step": 909 }, { "epoch": 0.9173387096774194, "grad_norm": 0.08534412831068039, "learning_rate": 0.00011729371443546587, "loss": 1.5787, "step": 910 }, { "epoch": 0.9183467741935484, "grad_norm": 0.08703077584505081, "learning_rate": 0.00011713369903980485, "loss": 1.6218, "step": 911 }, { "epoch": 0.9193548387096774, "grad_norm": 0.08057136088609695, "learning_rate": 0.00011697363843379641, "loss": 1.5475, "step": 912 }, { "epoch": 0.9203629032258065, "grad_norm": 0.09287240356206894, "learning_rate": 0.00011681353303978924, "loss": 1.5587, "step": 913 }, { "epoch": 0.9213709677419355, "grad_norm": 0.08380912989377975, "learning_rate": 0.00011665338328025027, "loss": 1.6194, "step": 914 }, { "epoch": 0.9223790322580645, "grad_norm": 0.08018894493579865, "learning_rate": 0.00011649318957776336, "loss": 1.545, "step": 915 }, { "epoch": 0.9233870967741935, "grad_norm": 0.07932014018297195, "learning_rate": 0.00011633295235502851, "loss": 1.5688, "step": 916 }, { "epoch": 0.9243951612903226, "grad_norm": 0.08409032970666885, "learning_rate": 0.0001161726720348604, "loss": 1.5354, "step": 917 }, { "epoch": 0.9254032258064516, "grad_norm": 0.07981358468532562, "learning_rate": 0.00011601234904018751, "loss": 1.5604, "step": 918 }, { "epoch": 0.9264112903225806, "grad_norm": 0.0860762745141983, "learning_rate": 0.00011585198379405092, "loss": 1.5857, "step": 919 }, { "epoch": 0.9274193548387096, "grad_norm": 0.09491165727376938, "learning_rate": 0.00011569157671960316, "loss": 1.5479, "step": 920 }, { "epoch": 0.9284274193548387, "grad_norm": 0.08277281373739243, "learning_rate": 0.00011553112824010716, "loss": 1.5773, "step": 921 }, { "epoch": 0.9294354838709677, "grad_norm": 0.08350729942321777, "learning_rate": 0.00011537063877893513, "loss": 1.5508, "step": 922 }, { "epoch": 0.9304435483870968, "grad_norm": 0.08306790888309479, "learning_rate": 0.00011521010875956734, "loss": 1.5807, "step": 923 }, { "epoch": 0.9314516129032258, "grad_norm": 0.07756998389959335, "learning_rate": 0.00011504953860559116, "loss": 1.546, "step": 924 }, { "epoch": 0.9324596774193549, "grad_norm": 0.08689188212156296, "learning_rate": 0.00011488892874069981, "loss": 1.5929, "step": 925 }, { "epoch": 0.9334677419354839, "grad_norm": 0.08053242415189743, "learning_rate": 0.00011472827958869133, "loss": 1.5578, "step": 926 }, { "epoch": 0.9344758064516129, "grad_norm": 0.08326185494661331, "learning_rate": 0.0001145675915734674, "loss": 1.544, "step": 927 }, { "epoch": 0.9354838709677419, "grad_norm": 0.08363624662160873, "learning_rate": 0.00011440686511903223, "loss": 1.5564, "step": 928 }, { "epoch": 0.936491935483871, "grad_norm": 0.08229418098926544, "learning_rate": 0.00011424610064949153, "loss": 1.5542, "step": 929 }, { "epoch": 0.9375, "grad_norm": 0.09776529669761658, "learning_rate": 0.00011408529858905126, "loss": 1.5353, "step": 930 }, { "epoch": 0.938508064516129, "grad_norm": 0.09616075456142426, "learning_rate": 0.0001139244593620166, "loss": 1.6193, "step": 931 }, { "epoch": 0.9395161290322581, "grad_norm": 0.1000729650259018, "learning_rate": 0.00011376358339279076, "loss": 1.633, "step": 932 }, { "epoch": 0.9405241935483871, "grad_norm": 0.08457247912883759, "learning_rate": 0.00011360267110587393, "loss": 1.5798, "step": 933 }, { "epoch": 0.9415322580645161, "grad_norm": 0.07730599492788315, "learning_rate": 0.00011344172292586217, "loss": 1.5163, "step": 934 }, { "epoch": 0.9425403225806451, "grad_norm": 0.09660627692937851, "learning_rate": 0.00011328073927744616, "loss": 1.6322, "step": 935 }, { "epoch": 0.9435483870967742, "grad_norm": 0.08001486957073212, "learning_rate": 0.00011311972058541023, "loss": 1.5755, "step": 936 }, { "epoch": 0.9445564516129032, "grad_norm": 0.08265230059623718, "learning_rate": 0.0001129586672746312, "loss": 1.61, "step": 937 }, { "epoch": 0.9455645161290323, "grad_norm": 0.09588516503572464, "learning_rate": 0.00011279757977007717, "loss": 1.6023, "step": 938 }, { "epoch": 0.9465725806451613, "grad_norm": 0.0791090875864029, "learning_rate": 0.0001126364584968065, "loss": 1.5158, "step": 939 }, { "epoch": 0.9475806451612904, "grad_norm": 0.09306017309427261, "learning_rate": 0.00011247530387996668, "loss": 1.5724, "step": 940 }, { "epoch": 0.9485887096774194, "grad_norm": 0.08578615635633469, "learning_rate": 0.00011231411634479316, "loss": 1.5692, "step": 941 }, { "epoch": 0.9495967741935484, "grad_norm": 0.0851496234536171, "learning_rate": 0.00011215289631660823, "loss": 1.5677, "step": 942 }, { "epoch": 0.9506048387096774, "grad_norm": 0.08048581331968307, "learning_rate": 0.00011199164422081995, "loss": 1.5537, "step": 943 }, { "epoch": 0.9516129032258065, "grad_norm": 0.08106819540262222, "learning_rate": 0.000111830360482921, "loss": 1.5296, "step": 944 }, { "epoch": 0.9526209677419355, "grad_norm": 0.07831558585166931, "learning_rate": 0.00011166904552848749, "loss": 1.5503, "step": 945 }, { "epoch": 0.9536290322580645, "grad_norm": 0.07931654155254364, "learning_rate": 0.000111507699783178, "loss": 1.5592, "step": 946 }, { "epoch": 0.9546370967741935, "grad_norm": 0.07992593944072723, "learning_rate": 0.0001113463236727323, "loss": 1.5671, "step": 947 }, { "epoch": 0.9556451612903226, "grad_norm": 0.08474520593881607, "learning_rate": 0.00011118491762297027, "loss": 1.5699, "step": 948 }, { "epoch": 0.9566532258064516, "grad_norm": 0.08235491812229156, "learning_rate": 0.0001110234820597908, "loss": 1.5671, "step": 949 }, { "epoch": 0.9576612903225806, "grad_norm": 0.09822028130292892, "learning_rate": 0.00011086201740917075, "loss": 1.6389, "step": 950 }, { "epoch": 0.9586693548387096, "grad_norm": 0.08909379690885544, "learning_rate": 0.00011070052409716354, "loss": 1.6273, "step": 951 }, { "epoch": 0.9596774193548387, "grad_norm": 0.08938673883676529, "learning_rate": 0.00011053900254989837, "loss": 1.5894, "step": 952 }, { "epoch": 0.9606854838709677, "grad_norm": 0.08622390776872635, "learning_rate": 0.00011037745319357893, "loss": 1.6217, "step": 953 }, { "epoch": 0.9616935483870968, "grad_norm": 0.08985532820224762, "learning_rate": 0.00011021587645448222, "loss": 1.6432, "step": 954 }, { "epoch": 0.9627016129032258, "grad_norm": 0.08598313480615616, "learning_rate": 0.00011005427275895756, "loss": 1.54, "step": 955 }, { "epoch": 0.9637096774193549, "grad_norm": 0.0815306007862091, "learning_rate": 0.00010989264253342538, "loss": 1.5172, "step": 956 }, { "epoch": 0.9647177419354839, "grad_norm": 0.09671612083911896, "learning_rate": 0.00010973098620437609, "loss": 1.6054, "step": 957 }, { "epoch": 0.9657258064516129, "grad_norm": 0.0809609442949295, "learning_rate": 0.00010956930419836899, "loss": 1.528, "step": 958 }, { "epoch": 0.9667338709677419, "grad_norm": 0.08456597477197647, "learning_rate": 0.0001094075969420312, "loss": 1.5383, "step": 959 }, { "epoch": 0.967741935483871, "grad_norm": 0.09076231718063354, "learning_rate": 0.00010924586486205632, "loss": 1.5948, "step": 960 }, { "epoch": 0.96875, "grad_norm": 0.08709228038787842, "learning_rate": 0.00010908410838520362, "loss": 1.5425, "step": 961 }, { "epoch": 0.969758064516129, "grad_norm": 0.09060946106910706, "learning_rate": 0.00010892232793829659, "loss": 1.57, "step": 962 }, { "epoch": 0.9707661290322581, "grad_norm": 0.0881752297282219, "learning_rate": 0.0001087605239482221, "loss": 1.5874, "step": 963 }, { "epoch": 0.9717741935483871, "grad_norm": 0.086030974984169, "learning_rate": 0.00010859869684192907, "loss": 1.5792, "step": 964 }, { "epoch": 0.9727822580645161, "grad_norm": 0.0817110538482666, "learning_rate": 0.00010843684704642744, "loss": 1.5506, "step": 965 }, { "epoch": 0.9737903225806451, "grad_norm": 0.08721321821212769, "learning_rate": 0.00010827497498878703, "loss": 1.5907, "step": 966 }, { "epoch": 0.9747983870967742, "grad_norm": 0.07887570559978485, "learning_rate": 0.00010811308109613634, "loss": 1.578, "step": 967 }, { "epoch": 0.9758064516129032, "grad_norm": 0.11064060032367706, "learning_rate": 0.00010795116579566158, "loss": 1.6, "step": 968 }, { "epoch": 0.9768145161290323, "grad_norm": 0.08672841638326645, "learning_rate": 0.00010778922951460537, "loss": 1.5872, "step": 969 }, { "epoch": 0.9778225806451613, "grad_norm": 0.08424878865480423, "learning_rate": 0.00010762727268026571, "loss": 1.5698, "step": 970 }, { "epoch": 0.9788306451612904, "grad_norm": 0.08876322209835052, "learning_rate": 0.00010746529571999491, "loss": 1.5775, "step": 971 }, { "epoch": 0.9798387096774194, "grad_norm": 0.08440111577510834, "learning_rate": 0.00010730329906119822, "loss": 1.5574, "step": 972 }, { "epoch": 0.9808467741935484, "grad_norm": 0.08397315442562103, "learning_rate": 0.00010714128313133307, "loss": 1.6166, "step": 973 }, { "epoch": 0.9818548387096774, "grad_norm": 0.09894799441099167, "learning_rate": 0.00010697924835790758, "loss": 1.6352, "step": 974 }, { "epoch": 0.9828629032258065, "grad_norm": 0.08329147845506668, "learning_rate": 0.00010681719516847968, "loss": 1.555, "step": 975 }, { "epoch": 0.9838709677419355, "grad_norm": 0.08748366683721542, "learning_rate": 0.00010665512399065582, "loss": 1.5519, "step": 976 }, { "epoch": 0.9848790322580645, "grad_norm": 0.08558699488639832, "learning_rate": 0.00010649303525209005, "loss": 1.5762, "step": 977 }, { "epoch": 0.9858870967741935, "grad_norm": 0.11034592986106873, "learning_rate": 0.00010633092938048257, "loss": 1.5972, "step": 978 }, { "epoch": 0.9868951612903226, "grad_norm": 0.08514732867479324, "learning_rate": 0.00010616880680357892, "loss": 1.5625, "step": 979 }, { "epoch": 0.9879032258064516, "grad_norm": 0.09123446047306061, "learning_rate": 0.00010600666794916871, "loss": 1.5516, "step": 980 }, { "epoch": 0.9889112903225806, "grad_norm": 0.08317586034536362, "learning_rate": 0.00010584451324508444, "loss": 1.6043, "step": 981 }, { "epoch": 0.9899193548387096, "grad_norm": 0.09369304031133652, "learning_rate": 0.00010568234311920051, "loss": 1.5575, "step": 982 }, { "epoch": 0.9909274193548387, "grad_norm": 0.08730312436819077, "learning_rate": 0.00010552015799943193, "loss": 1.5848, "step": 983 }, { "epoch": 0.9919354838709677, "grad_norm": 0.08520778268575668, "learning_rate": 0.00010535795831373337, "loss": 1.5697, "step": 984 }, { "epoch": 0.9929435483870968, "grad_norm": 0.08985403925180435, "learning_rate": 0.00010519574449009784, "loss": 1.546, "step": 985 }, { "epoch": 0.9939516129032258, "grad_norm": 0.08371421694755554, "learning_rate": 0.0001050335169565557, "loss": 1.5724, "step": 986 }, { "epoch": 0.9949596774193549, "grad_norm": 0.08613915741443634, "learning_rate": 0.00010487127614117352, "loss": 1.5974, "step": 987 }, { "epoch": 0.9959677419354839, "grad_norm": 0.09249399602413177, "learning_rate": 0.00010470902247205283, "loss": 1.6173, "step": 988 }, { "epoch": 0.9969758064516129, "grad_norm": 0.09915943443775177, "learning_rate": 0.00010454675637732916, "loss": 1.5947, "step": 989 }, { "epoch": 0.9979838709677419, "grad_norm": 0.0846395492553711, "learning_rate": 0.00010438447828517077, "loss": 1.5243, "step": 990 }, { "epoch": 0.998991935483871, "grad_norm": 0.08313705772161484, "learning_rate": 0.00010422218862377764, "loss": 1.5333, "step": 991 }, { "epoch": 1.0, "grad_norm": 0.08256080746650696, "learning_rate": 0.00010405988782138019, "loss": 1.5527, "step": 992 }, { "epoch": 1.001008064516129, "grad_norm": 0.09215422719717026, "learning_rate": 0.00010389757630623831, "loss": 1.5035, "step": 993 }, { "epoch": 1.002016129032258, "grad_norm": 0.08784796297550201, "learning_rate": 0.00010373525450664016, "loss": 1.5397, "step": 994 }, { "epoch": 1.003024193548387, "grad_norm": 0.08578605949878693, "learning_rate": 0.000103572922850901, "loss": 1.5449, "step": 995 }, { "epoch": 1.0040322580645162, "grad_norm": 0.09281399846076965, "learning_rate": 0.00010341058176736207, "loss": 1.4507, "step": 996 }, { "epoch": 1.0050403225806452, "grad_norm": 0.09404852986335754, "learning_rate": 0.00010324823168438953, "loss": 1.4817, "step": 997 }, { "epoch": 1.0060483870967742, "grad_norm": 0.0944603756070137, "learning_rate": 0.00010308587303037334, "loss": 1.536, "step": 998 }, { "epoch": 1.0070564516129032, "grad_norm": 0.11103025823831558, "learning_rate": 0.00010292350623372598, "loss": 1.5278, "step": 999 }, { "epoch": 1.0080645161290323, "grad_norm": 0.0859605222940445, "learning_rate": 0.00010276113172288144, "loss": 1.4855, "step": 1000 }, { "epoch": 1.0090725806451613, "grad_norm": 0.08268768340349197, "learning_rate": 0.0001025987499262941, "loss": 1.4975, "step": 1001 }, { "epoch": 1.0100806451612903, "grad_norm": 0.09450601041316986, "learning_rate": 0.00010243636127243754, "loss": 1.5052, "step": 1002 }, { "epoch": 1.0110887096774193, "grad_norm": 0.10182943195104599, "learning_rate": 0.00010227396618980344, "loss": 1.5889, "step": 1003 }, { "epoch": 1.0120967741935485, "grad_norm": 0.10887010395526886, "learning_rate": 0.00010211156510690043, "loss": 1.5387, "step": 1004 }, { "epoch": 1.0131048387096775, "grad_norm": 0.09432150423526764, "learning_rate": 0.00010194915845225304, "loss": 1.51, "step": 1005 }, { "epoch": 1.0141129032258065, "grad_norm": 0.0892212763428688, "learning_rate": 0.00010178674665440034, "loss": 1.4975, "step": 1006 }, { "epoch": 1.0151209677419355, "grad_norm": 0.08749305456876755, "learning_rate": 0.00010162433014189519, "loss": 1.5303, "step": 1007 }, { "epoch": 1.0161290322580645, "grad_norm": 0.09416648000478745, "learning_rate": 0.00010146190934330268, "loss": 1.499, "step": 1008 }, { "epoch": 1.0171370967741935, "grad_norm": 0.10288472473621368, "learning_rate": 0.00010129948468719939, "loss": 1.4785, "step": 1009 }, { "epoch": 1.0181451612903225, "grad_norm": 0.08718498051166534, "learning_rate": 0.00010113705660217197, "loss": 1.5045, "step": 1010 }, { "epoch": 1.0191532258064515, "grad_norm": 0.08473226428031921, "learning_rate": 0.00010097462551681612, "loss": 1.4799, "step": 1011 }, { "epoch": 1.0201612903225807, "grad_norm": 0.09531670063734055, "learning_rate": 0.00010081219185973552, "loss": 1.545, "step": 1012 }, { "epoch": 1.0211693548387097, "grad_norm": 0.08223138749599457, "learning_rate": 0.00010064975605954054, "loss": 1.4807, "step": 1013 }, { "epoch": 1.0221774193548387, "grad_norm": 0.08815553784370422, "learning_rate": 0.00010048731854484735, "loss": 1.47, "step": 1014 }, { "epoch": 1.0231854838709677, "grad_norm": 0.09323311597108841, "learning_rate": 0.00010032487974427645, "loss": 1.5823, "step": 1015 }, { "epoch": 1.0241935483870968, "grad_norm": 0.1007145345211029, "learning_rate": 0.00010016244008645195, "loss": 1.4864, "step": 1016 }, { "epoch": 1.0252016129032258, "grad_norm": 0.09309312701225281, "learning_rate": 0.0001, "loss": 1.5118, "step": 1017 }, { "epoch": 1.0262096774193548, "grad_norm": 0.08557573705911636, "learning_rate": 9.983755991354809e-05, "loss": 1.5165, "step": 1018 }, { "epoch": 1.0272177419354838, "grad_norm": 0.10075996816158295, "learning_rate": 9.967512025572356e-05, "loss": 1.5106, "step": 1019 }, { "epoch": 1.028225806451613, "grad_norm": 0.08483249694108963, "learning_rate": 9.951268145515269e-05, "loss": 1.4974, "step": 1020 }, { "epoch": 1.029233870967742, "grad_norm": 0.11874374747276306, "learning_rate": 9.935024394045948e-05, "loss": 1.5622, "step": 1021 }, { "epoch": 1.030241935483871, "grad_norm": 0.11608150601387024, "learning_rate": 9.918780814026452e-05, "loss": 1.5636, "step": 1022 }, { "epoch": 1.03125, "grad_norm": 0.11097010225057602, "learning_rate": 9.90253744831839e-05, "loss": 1.5388, "step": 1023 }, { "epoch": 1.032258064516129, "grad_norm": 0.08821584284305573, "learning_rate": 9.886294339782805e-05, "loss": 1.4808, "step": 1024 }, { "epoch": 1.033266129032258, "grad_norm": 0.08906351774930954, "learning_rate": 9.870051531280064e-05, "loss": 1.4567, "step": 1025 }, { "epoch": 1.034274193548387, "grad_norm": 0.08993887901306152, "learning_rate": 9.853809065669733e-05, "loss": 1.5174, "step": 1026 }, { "epoch": 1.0352822580645162, "grad_norm": 0.0829705148935318, "learning_rate": 9.837566985810484e-05, "loss": 1.5275, "step": 1027 }, { "epoch": 1.0362903225806452, "grad_norm": 0.09338941425085068, "learning_rate": 9.821325334559967e-05, "loss": 1.5197, "step": 1028 }, { "epoch": 1.0372983870967742, "grad_norm": 0.0843081921339035, "learning_rate": 9.8050841547747e-05, "loss": 1.5121, "step": 1029 }, { "epoch": 1.0383064516129032, "grad_norm": 0.09108688682317734, "learning_rate": 9.78884348930996e-05, "loss": 1.5642, "step": 1030 }, { "epoch": 1.0393145161290323, "grad_norm": 0.08404973894357681, "learning_rate": 9.772603381019658e-05, "loss": 1.4552, "step": 1031 }, { "epoch": 1.0403225806451613, "grad_norm": 0.08852069824934006, "learning_rate": 9.756363872756249e-05, "loss": 1.5511, "step": 1032 }, { "epoch": 1.0413306451612903, "grad_norm": 0.08855357021093369, "learning_rate": 9.740125007370592e-05, "loss": 1.5341, "step": 1033 }, { "epoch": 1.0423387096774193, "grad_norm": 0.08306348323822021, "learning_rate": 9.723886827711857e-05, "loss": 1.4941, "step": 1034 }, { "epoch": 1.0433467741935485, "grad_norm": 0.11460579931735992, "learning_rate": 9.707649376627406e-05, "loss": 1.541, "step": 1035 }, { "epoch": 1.0443548387096775, "grad_norm": 0.0861547440290451, "learning_rate": 9.691412696962667e-05, "loss": 1.5364, "step": 1036 }, { "epoch": 1.0453629032258065, "grad_norm": 0.092412069439888, "learning_rate": 9.675176831561048e-05, "loss": 1.5179, "step": 1037 }, { "epoch": 1.0463709677419355, "grad_norm": 0.08788943290710449, "learning_rate": 9.658941823263797e-05, "loss": 1.4936, "step": 1038 }, { "epoch": 1.0473790322580645, "grad_norm": 0.08519960939884186, "learning_rate": 9.642707714909904e-05, "loss": 1.539, "step": 1039 }, { "epoch": 1.0483870967741935, "grad_norm": 0.08832072466611862, "learning_rate": 9.626474549335986e-05, "loss": 1.5077, "step": 1040 }, { "epoch": 1.0493951612903225, "grad_norm": 0.10505390167236328, "learning_rate": 9.61024236937617e-05, "loss": 1.5432, "step": 1041 }, { "epoch": 1.0504032258064515, "grad_norm": 0.09197022020816803, "learning_rate": 9.594011217861982e-05, "loss": 1.5595, "step": 1042 }, { "epoch": 1.0514112903225807, "grad_norm": 0.0843205377459526, "learning_rate": 9.577781137622238e-05, "loss": 1.4353, "step": 1043 }, { "epoch": 1.0524193548387097, "grad_norm": 0.10806506127119064, "learning_rate": 9.561552171482925e-05, "loss": 1.515, "step": 1044 }, { "epoch": 1.0534274193548387, "grad_norm": 0.08592282235622406, "learning_rate": 9.545324362267086e-05, "loss": 1.5279, "step": 1045 }, { "epoch": 1.0544354838709677, "grad_norm": 0.11082509160041809, "learning_rate": 9.52909775279472e-05, "loss": 1.5395, "step": 1046 }, { "epoch": 1.0554435483870968, "grad_norm": 0.08529554307460785, "learning_rate": 9.51287238588265e-05, "loss": 1.4849, "step": 1047 }, { "epoch": 1.0564516129032258, "grad_norm": 0.08765090256929398, "learning_rate": 9.496648304344433e-05, "loss": 1.4944, "step": 1048 }, { "epoch": 1.0574596774193548, "grad_norm": 0.08893377333879471, "learning_rate": 9.480425550990219e-05, "loss": 1.5, "step": 1049 }, { "epoch": 1.0584677419354838, "grad_norm": 0.09724058210849762, "learning_rate": 9.464204168626665e-05, "loss": 1.5281, "step": 1050 }, { "epoch": 1.059475806451613, "grad_norm": 0.0883408635854721, "learning_rate": 9.447984200056808e-05, "loss": 1.5211, "step": 1051 }, { "epoch": 1.060483870967742, "grad_norm": 0.08431454002857208, "learning_rate": 9.43176568807995e-05, "loss": 1.5175, "step": 1052 }, { "epoch": 1.061491935483871, "grad_norm": 0.09407296776771545, "learning_rate": 9.415548675491559e-05, "loss": 1.5722, "step": 1053 }, { "epoch": 1.0625, "grad_norm": 0.08895613998174667, "learning_rate": 9.399333205083131e-05, "loss": 1.5702, "step": 1054 }, { "epoch": 1.063508064516129, "grad_norm": 0.08799167722463608, "learning_rate": 9.38311931964211e-05, "loss": 1.5531, "step": 1055 }, { "epoch": 1.064516129032258, "grad_norm": 0.08785036206245422, "learning_rate": 9.366907061951745e-05, "loss": 1.5398, "step": 1056 }, { "epoch": 1.065524193548387, "grad_norm": 0.10027193278074265, "learning_rate": 9.350696474790999e-05, "loss": 1.5256, "step": 1057 }, { "epoch": 1.066532258064516, "grad_norm": 0.08771440386772156, "learning_rate": 9.334487600934416e-05, "loss": 1.5085, "step": 1058 }, { "epoch": 1.0675403225806452, "grad_norm": 0.08703982830047607, "learning_rate": 9.318280483152033e-05, "loss": 1.4827, "step": 1059 }, { "epoch": 1.0685483870967742, "grad_norm": 0.08651833981275558, "learning_rate": 9.302075164209241e-05, "loss": 1.5255, "step": 1060 }, { "epoch": 1.0695564516129032, "grad_norm": 0.0868133008480072, "learning_rate": 9.285871686866692e-05, "loss": 1.4953, "step": 1061 }, { "epoch": 1.0705645161290323, "grad_norm": 0.08549060672521591, "learning_rate": 9.269670093880177e-05, "loss": 1.5239, "step": 1062 }, { "epoch": 1.0715725806451613, "grad_norm": 0.08664209395647049, "learning_rate": 9.25347042800051e-05, "loss": 1.5328, "step": 1063 }, { "epoch": 1.0725806451612903, "grad_norm": 0.0853060856461525, "learning_rate": 9.237272731973428e-05, "loss": 1.4854, "step": 1064 }, { "epoch": 1.0735887096774193, "grad_norm": 0.10764405876398087, "learning_rate": 9.221077048539464e-05, "loss": 1.5174, "step": 1065 }, { "epoch": 1.0745967741935485, "grad_norm": 0.09327509254217148, "learning_rate": 9.204883420433844e-05, "loss": 1.5074, "step": 1066 }, { "epoch": 1.0756048387096775, "grad_norm": 0.08912849426269531, "learning_rate": 9.188691890386367e-05, "loss": 1.4915, "step": 1067 }, { "epoch": 1.0766129032258065, "grad_norm": 0.08654549717903137, "learning_rate": 9.172502501121297e-05, "loss": 1.4998, "step": 1068 }, { "epoch": 1.0776209677419355, "grad_norm": 0.09039713442325592, "learning_rate": 9.156315295357257e-05, "loss": 1.5139, "step": 1069 }, { "epoch": 1.0786290322580645, "grad_norm": 0.08438859134912491, "learning_rate": 9.140130315807091e-05, "loss": 1.4935, "step": 1070 }, { "epoch": 1.0796370967741935, "grad_norm": 0.08553072065114975, "learning_rate": 9.123947605177791e-05, "loss": 1.508, "step": 1071 }, { "epoch": 1.0806451612903225, "grad_norm": 0.08692750334739685, "learning_rate": 9.107767206170342e-05, "loss": 1.5114, "step": 1072 }, { "epoch": 1.0816532258064515, "grad_norm": 0.09480643272399902, "learning_rate": 9.09158916147964e-05, "loss": 1.5726, "step": 1073 }, { "epoch": 1.0826612903225807, "grad_norm": 0.0879359245300293, "learning_rate": 9.075413513794369e-05, "loss": 1.4962, "step": 1074 }, { "epoch": 1.0836693548387097, "grad_norm": 0.09322493523359299, "learning_rate": 9.059240305796884e-05, "loss": 1.5454, "step": 1075 }, { "epoch": 1.0846774193548387, "grad_norm": 0.09673374146223068, "learning_rate": 9.043069580163099e-05, "loss": 1.509, "step": 1076 }, { "epoch": 1.0856854838709677, "grad_norm": 0.08707006275653839, "learning_rate": 9.02690137956239e-05, "loss": 1.5632, "step": 1077 }, { "epoch": 1.0866935483870968, "grad_norm": 0.08686521649360657, "learning_rate": 9.010735746657462e-05, "loss": 1.4968, "step": 1078 }, { "epoch": 1.0877016129032258, "grad_norm": 0.08472903817892075, "learning_rate": 8.994572724104242e-05, "loss": 1.4908, "step": 1079 }, { "epoch": 1.0887096774193548, "grad_norm": 0.09030890464782715, "learning_rate": 8.978412354551779e-05, "loss": 1.5018, "step": 1080 }, { "epoch": 1.089717741935484, "grad_norm": 0.08417510986328125, "learning_rate": 8.962254680642107e-05, "loss": 1.4444, "step": 1081 }, { "epoch": 1.090725806451613, "grad_norm": 0.09092919528484344, "learning_rate": 8.946099745010164e-05, "loss": 1.5303, "step": 1082 }, { "epoch": 1.091733870967742, "grad_norm": 0.09100567549467087, "learning_rate": 8.929947590283647e-05, "loss": 1.5403, "step": 1083 }, { "epoch": 1.092741935483871, "grad_norm": 0.12923839688301086, "learning_rate": 8.913798259082928e-05, "loss": 1.4664, "step": 1084 }, { "epoch": 1.09375, "grad_norm": 0.09925505518913269, "learning_rate": 8.897651794020918e-05, "loss": 1.5229, "step": 1085 }, { "epoch": 1.094758064516129, "grad_norm": 0.08671566098928452, "learning_rate": 8.881508237702973e-05, "loss": 1.4995, "step": 1086 }, { "epoch": 1.095766129032258, "grad_norm": 0.08649452030658722, "learning_rate": 8.865367632726772e-05, "loss": 1.4993, "step": 1087 }, { "epoch": 1.096774193548387, "grad_norm": 0.0882314071059227, "learning_rate": 8.849230021682199e-05, "loss": 1.5442, "step": 1088 }, { "epoch": 1.097782258064516, "grad_norm": 0.088679738342762, "learning_rate": 8.833095447151252e-05, "loss": 1.5014, "step": 1089 }, { "epoch": 1.0987903225806452, "grad_norm": 0.09637542814016342, "learning_rate": 8.816963951707901e-05, "loss": 1.5503, "step": 1090 }, { "epoch": 1.0997983870967742, "grad_norm": 0.09071476012468338, "learning_rate": 8.800835577918006e-05, "loss": 1.5016, "step": 1091 }, { "epoch": 1.1008064516129032, "grad_norm": 0.09719227999448776, "learning_rate": 8.784710368339178e-05, "loss": 1.4767, "step": 1092 }, { "epoch": 1.1018145161290323, "grad_norm": 0.08729701489210129, "learning_rate": 8.768588365520685e-05, "loss": 1.5011, "step": 1093 }, { "epoch": 1.1028225806451613, "grad_norm": 0.08893397450447083, "learning_rate": 8.752469612003332e-05, "loss": 1.5368, "step": 1094 }, { "epoch": 1.1038306451612903, "grad_norm": 0.08354583382606506, "learning_rate": 8.736354150319349e-05, "loss": 1.5199, "step": 1095 }, { "epoch": 1.1048387096774193, "grad_norm": 0.08970467001199722, "learning_rate": 8.720242022992284e-05, "loss": 1.5328, "step": 1096 }, { "epoch": 1.1058467741935485, "grad_norm": 0.09049658477306366, "learning_rate": 8.704133272536879e-05, "loss": 1.5323, "step": 1097 }, { "epoch": 1.1068548387096775, "grad_norm": 0.08495205640792847, "learning_rate": 8.68802794145898e-05, "loss": 1.4833, "step": 1098 }, { "epoch": 1.1078629032258065, "grad_norm": 0.08763737976551056, "learning_rate": 8.671926072255389e-05, "loss": 1.5314, "step": 1099 }, { "epoch": 1.1088709677419355, "grad_norm": 0.0835312008857727, "learning_rate": 8.655827707413788e-05, "loss": 1.5162, "step": 1100 }, { "epoch": 1.1098790322580645, "grad_norm": 0.08878222852945328, "learning_rate": 8.63973288941261e-05, "loss": 1.4885, "step": 1101 }, { "epoch": 1.1108870967741935, "grad_norm": 0.09213855862617493, "learning_rate": 8.623641660720928e-05, "loss": 1.5398, "step": 1102 }, { "epoch": 1.1118951612903225, "grad_norm": 0.08432666957378387, "learning_rate": 8.607554063798346e-05, "loss": 1.4907, "step": 1103 }, { "epoch": 1.1129032258064515, "grad_norm": 0.10029254853725433, "learning_rate": 8.591470141094878e-05, "loss": 1.5904, "step": 1104 }, { "epoch": 1.1139112903225807, "grad_norm": 0.08696424961090088, "learning_rate": 8.57538993505085e-05, "loss": 1.5079, "step": 1105 }, { "epoch": 1.1149193548387097, "grad_norm": 0.08842870593070984, "learning_rate": 8.559313488096782e-05, "loss": 1.5223, "step": 1106 }, { "epoch": 1.1159274193548387, "grad_norm": 0.08505623787641525, "learning_rate": 8.543240842653266e-05, "loss": 1.4939, "step": 1107 }, { "epoch": 1.1169354838709677, "grad_norm": 0.09814995527267456, "learning_rate": 8.527172041130874e-05, "loss": 1.5732, "step": 1108 }, { "epoch": 1.1179435483870968, "grad_norm": 0.09438839554786682, "learning_rate": 8.511107125930022e-05, "loss": 1.5903, "step": 1109 }, { "epoch": 1.1189516129032258, "grad_norm": 0.08910852670669556, "learning_rate": 8.49504613944089e-05, "loss": 1.5203, "step": 1110 }, { "epoch": 1.1199596774193548, "grad_norm": 0.0924610123038292, "learning_rate": 8.47898912404327e-05, "loss": 1.5302, "step": 1111 }, { "epoch": 1.120967741935484, "grad_norm": 0.08957453072071075, "learning_rate": 8.462936122106489e-05, "loss": 1.5179, "step": 1112 }, { "epoch": 1.121975806451613, "grad_norm": 0.1187904104590416, "learning_rate": 8.446887175989286e-05, "loss": 1.5622, "step": 1113 }, { "epoch": 1.122983870967742, "grad_norm": 0.0907069593667984, "learning_rate": 8.430842328039686e-05, "loss": 1.502, "step": 1114 }, { "epoch": 1.123991935483871, "grad_norm": 0.09245329350233078, "learning_rate": 8.414801620594912e-05, "loss": 1.476, "step": 1115 }, { "epoch": 1.125, "grad_norm": 0.10100734978914261, "learning_rate": 8.398765095981251e-05, "loss": 1.5111, "step": 1116 }, { "epoch": 1.126008064516129, "grad_norm": 0.09156333655118942, "learning_rate": 8.382732796513966e-05, "loss": 1.4985, "step": 1117 }, { "epoch": 1.127016129032258, "grad_norm": 0.11173349618911743, "learning_rate": 8.366704764497154e-05, "loss": 1.4869, "step": 1118 }, { "epoch": 1.128024193548387, "grad_norm": 0.08984418958425522, "learning_rate": 8.35068104222367e-05, "loss": 1.52, "step": 1119 }, { "epoch": 1.129032258064516, "grad_norm": 0.11599362641572952, "learning_rate": 8.33466167197498e-05, "loss": 1.5154, "step": 1120 }, { "epoch": 1.1300403225806452, "grad_norm": 0.09752003848552704, "learning_rate": 8.318646696021077e-05, "loss": 1.4838, "step": 1121 }, { "epoch": 1.1310483870967742, "grad_norm": 0.09071122109889984, "learning_rate": 8.302636156620363e-05, "loss": 1.5761, "step": 1122 }, { "epoch": 1.1320564516129032, "grad_norm": 0.08928891271352768, "learning_rate": 8.286630096019518e-05, "loss": 1.5265, "step": 1123 }, { "epoch": 1.1330645161290323, "grad_norm": 0.08508775383234024, "learning_rate": 8.270628556453417e-05, "loss": 1.548, "step": 1124 }, { "epoch": 1.1340725806451613, "grad_norm": 0.08637328445911407, "learning_rate": 8.254631580144999e-05, "loss": 1.4786, "step": 1125 }, { "epoch": 1.1350806451612903, "grad_norm": 0.08538668602705002, "learning_rate": 8.238639209305166e-05, "loss": 1.4797, "step": 1126 }, { "epoch": 1.1360887096774193, "grad_norm": 0.08973052352666855, "learning_rate": 8.222651486132664e-05, "loss": 1.5066, "step": 1127 }, { "epoch": 1.1370967741935485, "grad_norm": 0.08729778975248337, "learning_rate": 8.206668452813978e-05, "loss": 1.4973, "step": 1128 }, { "epoch": 1.1381048387096775, "grad_norm": 0.08795138448476791, "learning_rate": 8.190690151523215e-05, "loss": 1.4892, "step": 1129 }, { "epoch": 1.1391129032258065, "grad_norm": 0.08695145696401596, "learning_rate": 8.174716624421997e-05, "loss": 1.5163, "step": 1130 }, { "epoch": 1.1401209677419355, "grad_norm": 0.08848337084054947, "learning_rate": 8.158747913659355e-05, "loss": 1.4907, "step": 1131 }, { "epoch": 1.1411290322580645, "grad_norm": 0.08827504515647888, "learning_rate": 8.142784061371598e-05, "loss": 1.5306, "step": 1132 }, { "epoch": 1.1421370967741935, "grad_norm": 0.09366059303283691, "learning_rate": 8.126825109682228e-05, "loss": 1.4598, "step": 1133 }, { "epoch": 1.1431451612903225, "grad_norm": 0.09082233905792236, "learning_rate": 8.110871100701807e-05, "loss": 1.5746, "step": 1134 }, { "epoch": 1.1441532258064515, "grad_norm": 0.10159925371408463, "learning_rate": 8.094922076527859e-05, "loss": 1.5689, "step": 1135 }, { "epoch": 1.1451612903225807, "grad_norm": 0.10202515870332718, "learning_rate": 8.078978079244752e-05, "loss": 1.5155, "step": 1136 }, { "epoch": 1.1461693548387097, "grad_norm": 0.0907059907913208, "learning_rate": 8.063039150923595e-05, "loss": 1.5552, "step": 1137 }, { "epoch": 1.1471774193548387, "grad_norm": 0.08588322252035141, "learning_rate": 8.047105333622112e-05, "loss": 1.5299, "step": 1138 }, { "epoch": 1.1481854838709677, "grad_norm": 0.08953887969255447, "learning_rate": 8.031176669384552e-05, "loss": 1.5528, "step": 1139 }, { "epoch": 1.1491935483870968, "grad_norm": 0.08963429927825928, "learning_rate": 8.01525320024156e-05, "loss": 1.4823, "step": 1140 }, { "epoch": 1.1502016129032258, "grad_norm": 0.09360229223966599, "learning_rate": 7.999334968210073e-05, "loss": 1.5288, "step": 1141 }, { "epoch": 1.1512096774193548, "grad_norm": 0.09653651714324951, "learning_rate": 7.983422015293212e-05, "loss": 1.502, "step": 1142 }, { "epoch": 1.152217741935484, "grad_norm": 0.0958021953701973, "learning_rate": 7.967514383480161e-05, "loss": 1.4772, "step": 1143 }, { "epoch": 1.153225806451613, "grad_norm": 0.0900203064084053, "learning_rate": 7.951612114746076e-05, "loss": 1.5536, "step": 1144 }, { "epoch": 1.154233870967742, "grad_norm": 0.1079091802239418, "learning_rate": 7.935715251051949e-05, "loss": 1.482, "step": 1145 }, { "epoch": 1.155241935483871, "grad_norm": 0.09951366484165192, "learning_rate": 7.919823834344516e-05, "loss": 1.4908, "step": 1146 }, { "epoch": 1.15625, "grad_norm": 0.08866190165281296, "learning_rate": 7.90393790655614e-05, "loss": 1.5027, "step": 1147 }, { "epoch": 1.157258064516129, "grad_norm": 0.09670446068048477, "learning_rate": 7.888057509604697e-05, "loss": 1.4905, "step": 1148 }, { "epoch": 1.158266129032258, "grad_norm": 0.0998421311378479, "learning_rate": 7.872182685393475e-05, "loss": 1.5349, "step": 1149 }, { "epoch": 1.159274193548387, "grad_norm": 0.09023125469684601, "learning_rate": 7.85631347581105e-05, "loss": 1.5502, "step": 1150 }, { "epoch": 1.160282258064516, "grad_norm": 0.09362298995256424, "learning_rate": 7.84044992273119e-05, "loss": 1.4587, "step": 1151 }, { "epoch": 1.1612903225806452, "grad_norm": 0.09614353626966476, "learning_rate": 7.82459206801273e-05, "loss": 1.5398, "step": 1152 }, { "epoch": 1.1622983870967742, "grad_norm": 0.08735020458698273, "learning_rate": 7.808739953499478e-05, "loss": 1.5106, "step": 1153 }, { "epoch": 1.1633064516129032, "grad_norm": 0.11043401807546616, "learning_rate": 7.792893621020082e-05, "loss": 1.533, "step": 1154 }, { "epoch": 1.1643145161290323, "grad_norm": 0.11868879944086075, "learning_rate": 7.777053112387949e-05, "loss": 1.5086, "step": 1155 }, { "epoch": 1.1653225806451613, "grad_norm": 0.08818439394235611, "learning_rate": 7.761218469401108e-05, "loss": 1.5127, "step": 1156 }, { "epoch": 1.1663306451612903, "grad_norm": 0.1308388113975525, "learning_rate": 7.745389733842112e-05, "loss": 1.4556, "step": 1157 }, { "epoch": 1.1673387096774193, "grad_norm": 0.09634990245103836, "learning_rate": 7.729566947477928e-05, "loss": 1.5527, "step": 1158 }, { "epoch": 1.1683467741935485, "grad_norm": 0.11291810870170593, "learning_rate": 7.713750152059826e-05, "loss": 1.5556, "step": 1159 }, { "epoch": 1.1693548387096775, "grad_norm": 0.10674012452363968, "learning_rate": 7.697939389323267e-05, "loss": 1.4921, "step": 1160 }, { "epoch": 1.1703629032258065, "grad_norm": 0.09948462247848511, "learning_rate": 7.682134700987789e-05, "loss": 1.5691, "step": 1161 }, { "epoch": 1.1713709677419355, "grad_norm": 0.09521344304084778, "learning_rate": 7.66633612875691e-05, "loss": 1.475, "step": 1162 }, { "epoch": 1.1723790322580645, "grad_norm": 0.11034592241048813, "learning_rate": 7.650543714318001e-05, "loss": 1.5353, "step": 1163 }, { "epoch": 1.1733870967741935, "grad_norm": 0.08763138949871063, "learning_rate": 7.634757499342191e-05, "loss": 1.4952, "step": 1164 }, { "epoch": 1.1743951612903225, "grad_norm": 0.09569991379976273, "learning_rate": 7.61897752548425e-05, "loss": 1.5287, "step": 1165 }, { "epoch": 1.1754032258064515, "grad_norm": 0.12841151654720306, "learning_rate": 7.603203834382476e-05, "loss": 1.6028, "step": 1166 }, { "epoch": 1.1764112903225807, "grad_norm": 0.08578557521104813, "learning_rate": 7.58743646765859e-05, "loss": 1.4683, "step": 1167 }, { "epoch": 1.1774193548387097, "grad_norm": 0.10593171417713165, "learning_rate": 7.571675466917626e-05, "loss": 1.5351, "step": 1168 }, { "epoch": 1.1784274193548387, "grad_norm": 0.10871924459934235, "learning_rate": 7.555920873747823e-05, "loss": 1.5334, "step": 1169 }, { "epoch": 1.1794354838709677, "grad_norm": 0.08840969204902649, "learning_rate": 7.540172729720504e-05, "loss": 1.5035, "step": 1170 }, { "epoch": 1.1804435483870968, "grad_norm": 0.08680961281061172, "learning_rate": 7.524431076389986e-05, "loss": 1.4756, "step": 1171 }, { "epoch": 1.1814516129032258, "grad_norm": 0.0890466570854187, "learning_rate": 7.50869595529345e-05, "loss": 1.5077, "step": 1172 }, { "epoch": 1.1824596774193548, "grad_norm": 0.10022439807653427, "learning_rate": 7.492967407950844e-05, "loss": 1.5001, "step": 1173 }, { "epoch": 1.183467741935484, "grad_norm": 0.12129071354866028, "learning_rate": 7.477245475864771e-05, "loss": 1.5234, "step": 1174 }, { "epoch": 1.184475806451613, "grad_norm": 0.09167549759149551, "learning_rate": 7.461530200520377e-05, "loss": 1.4971, "step": 1175 }, { "epoch": 1.185483870967742, "grad_norm": 0.08763924986124039, "learning_rate": 7.445821623385245e-05, "loss": 1.5229, "step": 1176 }, { "epoch": 1.186491935483871, "grad_norm": 0.13247455656528473, "learning_rate": 7.430119785909278e-05, "loss": 1.4973, "step": 1177 }, { "epoch": 1.1875, "grad_norm": 0.10564038902521133, "learning_rate": 7.414424729524602e-05, "loss": 1.4549, "step": 1178 }, { "epoch": 1.188508064516129, "grad_norm": 0.09784973412752151, "learning_rate": 7.398736495645447e-05, "loss": 1.514, "step": 1179 }, { "epoch": 1.189516129032258, "grad_norm": 0.0994093120098114, "learning_rate": 7.383055125668038e-05, "loss": 1.4899, "step": 1180 }, { "epoch": 1.190524193548387, "grad_norm": 0.09787564724683762, "learning_rate": 7.367380660970493e-05, "loss": 1.5306, "step": 1181 }, { "epoch": 1.191532258064516, "grad_norm": 0.09221166372299194, "learning_rate": 7.351713142912707e-05, "loss": 1.5314, "step": 1182 }, { "epoch": 1.1925403225806452, "grad_norm": 0.10157594084739685, "learning_rate": 7.336052612836246e-05, "loss": 1.5738, "step": 1183 }, { "epoch": 1.1935483870967742, "grad_norm": 0.09838045388460159, "learning_rate": 7.320399112064233e-05, "loss": 1.543, "step": 1184 }, { "epoch": 1.1945564516129032, "grad_norm": 0.0991106852889061, "learning_rate": 7.304752681901251e-05, "loss": 1.5159, "step": 1185 }, { "epoch": 1.1955645161290323, "grad_norm": 0.09091247618198395, "learning_rate": 7.289113363633215e-05, "loss": 1.5109, "step": 1186 }, { "epoch": 1.1965725806451613, "grad_norm": 0.10874085873365402, "learning_rate": 7.273481198527285e-05, "loss": 1.539, "step": 1187 }, { "epoch": 1.1975806451612903, "grad_norm": 0.26701486110687256, "learning_rate": 7.257856227831738e-05, "loss": 1.5231, "step": 1188 }, { "epoch": 1.1985887096774193, "grad_norm": 0.11611919850111008, "learning_rate": 7.242238492775869e-05, "loss": 1.5325, "step": 1189 }, { "epoch": 1.1995967741935485, "grad_norm": 0.09033048897981644, "learning_rate": 7.226628034569886e-05, "loss": 1.5223, "step": 1190 }, { "epoch": 1.2006048387096775, "grad_norm": 0.09677241742610931, "learning_rate": 7.211024894404788e-05, "loss": 1.5277, "step": 1191 }, { "epoch": 1.2016129032258065, "grad_norm": 0.09878189116716385, "learning_rate": 7.195429113452271e-05, "loss": 1.525, "step": 1192 }, { "epoch": 1.2026209677419355, "grad_norm": 0.09079443663358688, "learning_rate": 7.179840732864604e-05, "loss": 1.4836, "step": 1193 }, { "epoch": 1.2036290322580645, "grad_norm": 0.10527854412794113, "learning_rate": 7.16425979377454e-05, "loss": 1.5068, "step": 1194 }, { "epoch": 1.2046370967741935, "grad_norm": 0.1167258769273758, "learning_rate": 7.148686337295181e-05, "loss": 1.4535, "step": 1195 }, { "epoch": 1.2056451612903225, "grad_norm": 0.09939006716012955, "learning_rate": 7.133120404519903e-05, "loss": 1.4873, "step": 1196 }, { "epoch": 1.2066532258064515, "grad_norm": 0.09883987158536911, "learning_rate": 7.117562036522213e-05, "loss": 1.5022, "step": 1197 }, { "epoch": 1.2076612903225807, "grad_norm": 0.10209079831838608, "learning_rate": 7.102011274355667e-05, "loss": 1.5432, "step": 1198 }, { "epoch": 1.2086693548387097, "grad_norm": 0.10384919494390488, "learning_rate": 7.086468159053751e-05, "loss": 1.5568, "step": 1199 }, { "epoch": 1.2096774193548387, "grad_norm": 0.1108224168419838, "learning_rate": 7.070932731629769e-05, "loss": 1.4903, "step": 1200 } ], "logging_steps": 1, "max_steps": 1984, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.728889247690916e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }