poetry-bygpt5-small-en / trainer_state.json
potamides's picture
add model files
ed0f78f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.999915498738517,
"global_step": 51770,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.1583011583011583e-06,
"loss": 2.5038,
"step": 1
},
{
"epoch": 0.05,
"learning_rate": 0.00028957528957528956,
"loss": 1.5196,
"step": 250
},
{
"epoch": 0.1,
"learning_rate": 0.0005791505791505791,
"loss": 1.297,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 0.0005999696654176312,
"loss": 1.2658,
"step": 750
},
{
"epoch": 0.19,
"learning_rate": 0.0005998690722049872,
"loss": 1.2455,
"step": 1000
},
{
"epoch": 0.24,
"learning_rate": 0.0005996980613784548,
"loss": 1.2319,
"step": 1250
},
{
"epoch": 0.29,
"learning_rate": 0.0005994566730961414,
"loss": 1.2214,
"step": 1500
},
{
"epoch": 0.34,
"learning_rate": 0.0005991449640427416,
"loss": 1.213,
"step": 1750
},
{
"epoch": 0.39,
"learning_rate": 0.0005987630074162269,
"loss": 1.2059,
"step": 2000
},
{
"epoch": 0.43,
"learning_rate": 0.0005983108929106564,
"loss": 1.2019,
"step": 2250
},
{
"epoch": 0.48,
"learning_rate": 0.0005977887266951138,
"loss": 1.1952,
"step": 2500
},
{
"epoch": 0.53,
"learning_rate": 0.0005971966313887766,
"loss": 1.1878,
"step": 2750
},
{
"epoch": 0.58,
"learning_rate": 0.0005965347460321212,
"loss": 1.1876,
"step": 3000
},
{
"epoch": 0.63,
"learning_rate": 0.0005958032260542726,
"loss": 1.1812,
"step": 3250
},
{
"epoch": 0.68,
"learning_rate": 0.0005950022432365049,
"loss": 1.1786,
"step": 3500
},
{
"epoch": 0.72,
"learning_rate": 0.0005941319856719031,
"loss": 1.1741,
"step": 3750
},
{
"epoch": 0.77,
"learning_rate": 0.0005931926577211924,
"loss": 1.1714,
"step": 4000
},
{
"epoch": 0.82,
"learning_rate": 0.0005921844799647499,
"loss": 1.1685,
"step": 4250
},
{
"epoch": 0.87,
"learning_rate": 0.0005911076891508052,
"loss": 1.166,
"step": 4500
},
{
"epoch": 0.92,
"learning_rate": 0.0005899625381398457,
"loss": 1.1635,
"step": 4750
},
{
"epoch": 0.97,
"learning_rate": 0.0005887492958452381,
"loss": 1.1602,
"step": 5000
},
{
"epoch": 1.0,
"eval_alliteration_score": 0.33734156646083846,
"eval_harmonic_meter_score": 0.08910893104383713,
"eval_harmonic_rhyme_score": 0.18416322432066148,
"eval_meter_score": 0.27888669912946146,
"eval_rhyme_score": 0.6019525602542674,
"eval_runtime": 1534.0921,
"eval_samples_per_second": 1.76,
"eval_steps_per_second": 0.22,
"step": 5177
},
{
"epoch": 1.01,
"learning_rate": 0.0005874682471700796,
"loss": 1.1552,
"step": 5250
},
{
"epoch": 1.06,
"learning_rate": 0.0005861196929402952,
"loss": 1.1417,
"step": 5500
},
{
"epoch": 1.11,
"learning_rate": 0.0005847039498339947,
"loss": 1.1433,
"step": 5750
},
{
"epoch": 1.16,
"learning_rate": 0.0005832213503071088,
"loss": 1.1418,
"step": 6000
},
{
"epoch": 1.21,
"learning_rate": 0.0005816722425153186,
"loss": 1.1408,
"step": 6250
},
{
"epoch": 1.26,
"learning_rate": 0.0005800569902322985,
"loss": 1.1394,
"step": 6500
},
{
"epoch": 1.3,
"learning_rate": 0.0005783759727642932,
"loss": 1.1364,
"step": 6750
},
{
"epoch": 1.35,
"learning_rate": 0.0005766295848610451,
"loss": 1.1355,
"step": 7000
},
{
"epoch": 1.4,
"learning_rate": 0.0005748182366230962,
"loss": 1.1337,
"step": 7250
},
{
"epoch": 1.45,
"learning_rate": 0.0005729423534054853,
"loss": 1.1325,
"step": 7500
},
{
"epoch": 1.5,
"learning_rate": 0.0005710023757178627,
"loss": 1.1299,
"step": 7750
},
{
"epoch": 1.55,
"learning_rate": 0.000568998759121046,
"loss": 1.1285,
"step": 8000
},
{
"epoch": 1.59,
"learning_rate": 0.0005669319741200425,
"loss": 1.1278,
"step": 8250
},
{
"epoch": 1.64,
"learning_rate": 0.0005648025060535602,
"loss": 1.1261,
"step": 8500
},
{
"epoch": 1.69,
"learning_rate": 0.0005626108549800381,
"loss": 1.1245,
"step": 8750
},
{
"epoch": 1.74,
"learning_rate": 0.0005603575355602176,
"loss": 1.123,
"step": 9000
},
{
"epoch": 1.79,
"learning_rate": 0.0005580430769362867,
"loss": 1.123,
"step": 9250
},
{
"epoch": 1.83,
"learning_rate": 0.0005556680226076214,
"loss": 1.1201,
"step": 9500
},
{
"epoch": 1.88,
"learning_rate": 0.0005532329303031583,
"loss": 1.12,
"step": 9750
},
{
"epoch": 1.93,
"learning_rate": 0.0005507383718504232,
"loss": 1.1176,
"step": 10000
},
{
"epoch": 1.98,
"learning_rate": 0.0005481849330412508,
"loss": 1.1157,
"step": 10250
},
{
"epoch": 2.0,
"eval_alliteration_score": 0.38736706413148564,
"eval_harmonic_meter_score": 0.0895880700551104,
"eval_harmonic_rhyme_score": 0.41931891077613487,
"eval_meter_score": 0.3107654652536241,
"eval_rhyme_score": 0.7829217375053705,
"eval_runtime": 1335.1994,
"eval_samples_per_second": 2.022,
"eval_steps_per_second": 0.253,
"step": 10354
},
{
"epoch": 2.03,
"learning_rate": 0.000545573213494224,
"loss": 1.1078,
"step": 10500
},
{
"epoch": 2.08,
"learning_rate": 0.0005429038265138671,
"loss": 1.1014,
"step": 10750
},
{
"epoch": 2.12,
"learning_rate": 0.0005401773989466244,
"loss": 1.1022,
"step": 11000
},
{
"epoch": 2.17,
"learning_rate": 0.0005373945710336596,
"loss": 1.1014,
"step": 11250
},
{
"epoch": 2.22,
"learning_rate": 0.0005345559962605089,
"loss": 1.1008,
"step": 11500
},
{
"epoch": 2.27,
"learning_rate": 0.0005316623412036252,
"loss": 1.0992,
"step": 11750
},
{
"epoch": 2.32,
"learning_rate": 0.000528714285373846,
"loss": 1.0989,
"step": 12000
},
{
"epoch": 2.37,
"learning_rate": 0.0005257125210568268,
"loss": 1.0979,
"step": 12250
},
{
"epoch": 2.41,
"learning_rate": 0.0005226577531504722,
"loss": 1.0984,
"step": 12500
},
{
"epoch": 2.46,
"learning_rate": 0.0005195506989994064,
"loss": 1.0965,
"step": 12750
},
{
"epoch": 2.51,
"learning_rate": 0.0005163920882265211,
"loss": 1.0963,
"step": 13000
},
{
"epoch": 2.56,
"learning_rate": 0.0005131826625616392,
"loss": 1.0953,
"step": 13250
},
{
"epoch": 2.61,
"learning_rate": 0.0005099231756673361,
"loss": 1.0929,
"step": 13500
},
{
"epoch": 2.66,
"learning_rate": 0.0005066143929619589,
"loss": 1.0941,
"step": 13750
},
{
"epoch": 2.7,
"learning_rate": 0.000503257091439885,
"loss": 1.0925,
"step": 14000
},
{
"epoch": 2.75,
"learning_rate": 0.0004998520594890613,
"loss": 1.0895,
"step": 14250
},
{
"epoch": 2.8,
"learning_rate": 0.00049640009670587,
"loss": 1.09,
"step": 14500
},
{
"epoch": 2.85,
"learning_rate": 0.0004929020137073603,
"loss": 1.0894,
"step": 14750
},
{
"epoch": 2.9,
"learning_rate": 0.0004893586319408926,
"loss": 1.0892,
"step": 15000
},
{
"epoch": 2.95,
"learning_rate": 0.0004857707834912409,
"loss": 1.0881,
"step": 15250
},
{
"epoch": 2.99,
"learning_rate": 0.0004821393108851951,
"loss": 1.0867,
"step": 15500
},
{
"epoch": 3.0,
"eval_alliteration_score": 0.3956901936289819,
"eval_harmonic_meter_score": 0.10651791396667262,
"eval_harmonic_rhyme_score": 0.4171799492877077,
"eval_meter_score": 0.3202445754833885,
"eval_rhyme_score": 0.7817402156606966,
"eval_runtime": 1356.2806,
"eval_samples_per_second": 1.991,
"eval_steps_per_second": 0.249,
"step": 15531
},
{
"epoch": 3.04,
"learning_rate": 0.0004784650668937127,
"loss": 1.0717,
"step": 15750
},
{
"epoch": 3.09,
"learning_rate": 0.0004747489143316642,
"loss": 1.0692,
"step": 16000
},
{
"epoch": 3.14,
"learning_rate": 0.0004709917258552203,
"loss": 1.071,
"step": 16250
},
{
"epoch": 3.19,
"learning_rate": 0.00046719438375692797,
"loss": 1.0713,
"step": 16500
},
{
"epoch": 3.24,
"learning_rate": 0.0004633577797585233,
"loss": 1.0712,
"step": 16750
},
{
"epoch": 3.28,
"learning_rate": 0.0004594828148015305,
"loss": 1.0705,
"step": 17000
},
{
"epoch": 3.33,
"learning_rate": 0.00045557039883569595,
"loss": 1.0694,
"step": 17250
},
{
"epoch": 3.38,
"learning_rate": 0.0004516214506053063,
"loss": 1.0682,
"step": 17500
},
{
"epoch": 3.43,
"learning_rate": 0.000447636897433442,
"loss": 1.07,
"step": 17750
},
{
"epoch": 3.48,
"learning_rate": 0.000443617675004216,
"loss": 1.0672,
"step": 18000
},
{
"epoch": 3.53,
"learning_rate": 0.00043956472714304834,
"loss": 1.0667,
"step": 18250
},
{
"epoch": 3.57,
"learning_rate": 0.0004354790055950309,
"loss": 1.0662,
"step": 18500
},
{
"epoch": 3.62,
"learning_rate": 0.0004313614698014302,
"loss": 1.0656,
"step": 18750
},
{
"epoch": 3.67,
"learning_rate": 0.00042721308667438394,
"loss": 1.0649,
"step": 19000
},
{
"epoch": 3.72,
"learning_rate": 0.00042303483036984366,
"loss": 1.0636,
"step": 19250
},
{
"epoch": 3.77,
"learning_rate": 0.00041882768205881495,
"loss": 1.0629,
"step": 19500
},
{
"epoch": 3.81,
"learning_rate": 0.00041459262969695184,
"loss": 1.0627,
"step": 19750
},
{
"epoch": 3.86,
"learning_rate": 0.0004103306677925571,
"loss": 1.0611,
"step": 20000
},
{
"epoch": 3.91,
"learning_rate": 0.00040604279717304357,
"loss": 1.0595,
"step": 20250
},
{
"epoch": 3.96,
"learning_rate": 0.0004017300247499127,
"loss": 1.0609,
"step": 20500
},
{
"epoch": 4.0,
"eval_alliteration_score": 0.41442581461562794,
"eval_harmonic_meter_score": 0.0987295348952947,
"eval_harmonic_rhyme_score": 0.4578505943539395,
"eval_meter_score": 0.3220205905401726,
"eval_rhyme_score": 0.8007234223820661,
"eval_runtime": 1328.0652,
"eval_samples_per_second": 2.033,
"eval_steps_per_second": 0.255,
"step": 20708
},
{
"epoch": 4.01,
"learning_rate": 0.00039739336328230323,
"loss": 1.0365,
"step": 20750
},
{
"epoch": 4.06,
"learning_rate": 0.00039303383113916687,
"loss": 1.0371,
"step": 21000
},
{
"epoch": 4.1,
"learning_rate": 0.00038865245206012774,
"loss": 1.0398,
"step": 21250
},
{
"epoch": 4.15,
"learning_rate": 0.00038425025491507883,
"loss": 1.0413,
"step": 21500
},
{
"epoch": 4.2,
"learning_rate": 0.0003798282734625755,
"loss": 1.0416,
"step": 21750
},
{
"epoch": 4.25,
"learning_rate": 0.0003753875461070794,
"loss": 1.0409,
"step": 22000
},
{
"epoch": 4.3,
"learning_rate": 0.0003709291156551129,
"loss": 1.0413,
"step": 22250
},
{
"epoch": 4.35,
"learning_rate": 0.0003664540290703784,
"loss": 1.0401,
"step": 22500
},
{
"epoch": 4.39,
"learning_rate": 0.00036196333722790264,
"loss": 1.0395,
"step": 22750
},
{
"epoch": 4.44,
"learning_rate": 0.00035745809466726145,
"loss": 1.038,
"step": 23000
},
{
"epoch": 4.49,
"learning_rate": 0.0003529393593449451,
"loss": 1.0382,
"step": 23250
},
{
"epoch": 4.54,
"learning_rate": 0.00034840819238591994,
"loss": 1.0386,
"step": 23500
},
{
"epoch": 4.59,
"learning_rate": 0.0003438656578344473,
"loss": 1.0371,
"step": 23750
},
{
"epoch": 4.64,
"learning_rate": 0.0003393128224042155,
"loss": 1.0361,
"step": 24000
},
{
"epoch": 4.68,
"learning_rate": 0.0003347507552278469,
"loss": 1.0355,
"step": 24250
},
{
"epoch": 4.73,
"learning_rate": 0.00033018052760583447,
"loss": 1.0334,
"step": 24500
},
{
"epoch": 4.78,
"learning_rate": 0.0003256032127549717,
"loss": 1.0341,
"step": 24750
},
{
"epoch": 4.83,
"learning_rate": 0.0003210198855563304,
"loss": 1.0338,
"step": 25000
},
{
"epoch": 4.88,
"learning_rate": 0.00031643162230284954,
"loss": 1.0315,
"step": 25250
},
{
"epoch": 4.93,
"learning_rate": 0.00031183950044659135,
"loss": 1.0321,
"step": 25500
},
{
"epoch": 4.97,
"learning_rate": 0.0003072445983457252,
"loss": 1.0313,
"step": 25750
},
{
"epoch": 5.0,
"eval_alliteration_score": 0.41847826086956524,
"eval_harmonic_meter_score": 0.10429340770685562,
"eval_harmonic_rhyme_score": 0.44835226139123785,
"eval_meter_score": 0.31689277852422454,
"eval_rhyme_score": 0.7882407888398814,
"eval_runtime": 1355.1583,
"eval_samples_per_second": 1.992,
"eval_steps_per_second": 0.249,
"step": 25885
},
{
"epoch": 5.02,
"learning_rate": 0.0003026479950112996,
"loss": 1.0191,
"step": 26000
},
{
"epoch": 5.07,
"learning_rate": 0.00029805076985386,
"loss": 1.0078,
"step": 26250
},
{
"epoch": 5.12,
"learning_rate": 0.00029345400242997323,
"loss": 1.0091,
"step": 26500
},
{
"epoch": 5.17,
"learning_rate": 0.0002888587721887175,
"loss": 1.0091,
"step": 26750
},
{
"epoch": 5.22,
"learning_rate": 0.0002842661582181979,
"loss": 1.01,
"step": 27000
},
{
"epoch": 5.26,
"learning_rate": 0.000279677238992146,
"loss": 1.009,
"step": 27250
},
{
"epoch": 5.31,
"learning_rate": 0.00027509309211666463,
"loss": 1.0085,
"step": 27500
},
{
"epoch": 5.36,
"learning_rate": 0.0002705147940771754,
"loss": 1.0078,
"step": 27750
},
{
"epoch": 5.41,
"learning_rate": 0.0002659434199856307,
"loss": 1.0075,
"step": 28000
},
{
"epoch": 5.46,
"learning_rate": 0.0002613800433280466,
"loss": 1.0069,
"step": 28250
},
{
"epoch": 5.51,
"learning_rate": 0.0002568257357124192,
"loss": 1.0053,
"step": 28500
},
{
"epoch": 5.55,
"learning_rate": 0.0002522815666170804,
"loss": 1.0038,
"step": 28750
},
{
"epoch": 5.6,
"learning_rate": 0.00024774860313955555,
"loss": 1.0052,
"step": 29000
},
{
"epoch": 5.65,
"learning_rate": 0.00024322790974597822,
"loss": 1.003,
"step": 29250
},
{
"epoch": 5.7,
"learning_rate": 0.00023872054802112475,
"loss": 1.0028,
"step": 29500
},
{
"epoch": 5.75,
"learning_rate": 0.00023422757641912385,
"loss": 1.0033,
"step": 29750
},
{
"epoch": 5.79,
"learning_rate": 0.0002297500500149027,
"loss": 1.001,
"step": 30000
},
{
"epoch": 5.84,
"learning_rate": 0.00022528902025642543,
"loss": 1.0013,
"step": 30250
},
{
"epoch": 5.89,
"learning_rate": 0.00022084553471778432,
"loss": 0.9993,
"step": 30500
},
{
"epoch": 5.94,
"learning_rate": 0.00021642063685319983,
"loss": 0.9969,
"step": 30750
},
{
"epoch": 5.99,
"learning_rate": 0.00021201536575198834,
"loss": 0.9967,
"step": 31000
},
{
"epoch": 6.0,
"eval_alliteration_score": 0.4239095315024233,
"eval_harmonic_meter_score": 0.10471658916077409,
"eval_harmonic_rhyme_score": 0.5099131421399749,
"eval_meter_score": 0.3276790822599601,
"eval_rhyme_score": 0.8216395530060676,
"eval_runtime": 1328.5938,
"eval_samples_per_second": 2.032,
"eval_steps_per_second": 0.254,
"step": 31062
},
{
"epoch": 6.04,
"learning_rate": 0.00020763075589455592,
"loss": 0.98,
"step": 31250
},
{
"epoch": 6.08,
"learning_rate": 0.00020326783690947226,
"loss": 0.9726,
"step": 31500
},
{
"epoch": 6.13,
"learning_rate": 0.00019892763333168628,
"loss": 0.9741,
"step": 31750
},
{
"epoch": 6.18,
"learning_rate": 0.000194611164361936,
"loss": 0.9746,
"step": 32000
},
{
"epoch": 6.23,
"learning_rate": 0.0001903194436274124,
"loss": 0.9738,
"step": 32250
},
{
"epoch": 6.28,
"learning_rate": 0.0001860534789437309,
"loss": 0.9742,
"step": 32500
},
{
"epoch": 6.33,
"learning_rate": 0.00018181427207826875,
"loss": 0.9748,
"step": 32750
},
{
"epoch": 6.37,
"learning_rate": 0.0001776028185149218,
"loss": 0.9728,
"step": 33000
},
{
"epoch": 6.42,
"learning_rate": 0.00017342010722033724,
"loss": 0.9748,
"step": 33250
},
{
"epoch": 6.47,
"learning_rate": 0.00016926712041167666,
"loss": 0.9698,
"step": 33500
},
{
"epoch": 6.52,
"learning_rate": 0.00016514483332596397,
"loss": 0.9703,
"step": 33750
},
{
"epoch": 6.57,
"learning_rate": 0.000161054213991073,
"loss": 0.9726,
"step": 34000
},
{
"epoch": 6.62,
"learning_rate": 0.00015699622299840705,
"loss": 0.9695,
"step": 34250
},
{
"epoch": 6.66,
"learning_rate": 0.00015297181327732549,
"loss": 0.9697,
"step": 34500
},
{
"epoch": 6.71,
"learning_rate": 0.00014898192987136932,
"loss": 0.9681,
"step": 34750
},
{
"epoch": 6.76,
"learning_rate": 0.000145027509716339,
"loss": 0.9671,
"step": 35000
},
{
"epoch": 6.81,
"learning_rate": 0.0001411094814202753,
"loss": 0.9667,
"step": 35250
},
{
"epoch": 6.86,
"learning_rate": 0.00013722876504539635,
"loss": 0.9644,
"step": 35500
},
{
"epoch": 6.91,
"learning_rate": 0.00013338627189204153,
"loss": 0.9647,
"step": 35750
},
{
"epoch": 6.95,
"learning_rate": 0.0001295829042846731,
"loss": 0.9636,
"step": 36000
},
{
"epoch": 7.0,
"eval_alliteration_score": 0.439117929050815,
"eval_harmonic_meter_score": 0.1013845208427906,
"eval_harmonic_rhyme_score": 0.5635069892236091,
"eval_meter_score": 0.32354736135648304,
"eval_rhyme_score": 0.8347036594862961,
"eval_runtime": 1288.6194,
"eval_samples_per_second": 2.095,
"eval_steps_per_second": 0.262,
"step": 36239
},
{
"epoch": 7.0,
"learning_rate": 0.00012581955535998448,
"loss": 0.9639,
"step": 36250
},
{
"epoch": 7.05,
"learning_rate": 0.0001220971088571674,
"loss": 0.9378,
"step": 36500
},
{
"epoch": 7.1,
"learning_rate": 0.00011841643891038518,
"loss": 0.9388,
"step": 36750
},
{
"epoch": 7.15,
"learning_rate": 0.00011477840984350193,
"loss": 0.9395,
"step": 37000
},
{
"epoch": 7.2,
"learning_rate": 0.00011118387596711477,
"loss": 0.9403,
"step": 37250
},
{
"epoch": 7.24,
"learning_rate": 0.00010763368137793809,
"loss": 0.9399,
"step": 37500
},
{
"epoch": 7.29,
"learning_rate": 0.00010412865976058613,
"loss": 0.939,
"step": 37750
},
{
"epoch": 7.34,
"learning_rate": 0.00010066963419180093,
"loss": 0.9391,
"step": 38000
},
{
"epoch": 7.39,
"learning_rate": 9.725741694717035e-05,
"loss": 0.9372,
"step": 38250
},
{
"epoch": 7.44,
"learning_rate": 9.389280931038336e-05,
"loss": 0.9398,
"step": 38500
},
{
"epoch": 7.48,
"learning_rate": 9.057660138506682e-05,
"loss": 0.9387,
"step": 38750
},
{
"epoch": 7.53,
"learning_rate": 8.730957190924632e-05,
"loss": 0.9378,
"step": 39000
},
{
"epoch": 7.58,
"learning_rate": 8.409248807247727e-05,
"loss": 0.9378,
"step": 39250
},
{
"epoch": 7.63,
"learning_rate": 8.092610533568725e-05,
"loss": 0.9366,
"step": 39500
},
{
"epoch": 7.68,
"learning_rate": 7.781116725377309e-05,
"loss": 0.935,
"step": 39750
},
{
"epoch": 7.73,
"learning_rate": 7.474840530099277e-05,
"loss": 0.9356,
"step": 40000
},
{
"epoch": 7.77,
"learning_rate": 7.173853869919559e-05,
"loss": 0.9341,
"step": 40250
},
{
"epoch": 7.82,
"learning_rate": 6.878227424892822e-05,
"loss": 0.936,
"step": 40500
},
{
"epoch": 7.87,
"learning_rate": 6.588030616345898e-05,
"loss": 0.9338,
"step": 40750
},
{
"epoch": 7.92,
"learning_rate": 6.303331590575642e-05,
"loss": 0.9329,
"step": 41000
},
{
"epoch": 7.97,
"learning_rate": 6.0241972028463316e-05,
"loss": 0.9333,
"step": 41250
},
{
"epoch": 8.0,
"eval_alliteration_score": 0.42669584245076586,
"eval_harmonic_meter_score": 0.10211813060163026,
"eval_harmonic_rhyme_score": 0.5177186665744951,
"eval_meter_score": 0.323707550822104,
"eval_rhyme_score": 0.8264104348934438,
"eval_runtime": 1297.8514,
"eval_samples_per_second": 2.08,
"eval_steps_per_second": 0.26,
"step": 41416
},
{
"epoch": 8.02,
"learning_rate": 5.7506930016901755e-05,
"loss": 0.9268,
"step": 41500
},
{
"epoch": 8.06,
"learning_rate": 5.4828832135146994e-05,
"loss": 0.9115,
"step": 41750
},
{
"epoch": 8.11,
"learning_rate": 5.2208307275205774e-05,
"loss": 0.9122,
"step": 42000
},
{
"epoch": 8.16,
"learning_rate": 4.9645970809335146e-05,
"loss": 0.9119,
"step": 42250
},
{
"epoch": 8.21,
"learning_rate": 4.7142424445535695e-05,
"loss": 0.912,
"step": 42500
},
{
"epoch": 8.26,
"learning_rate": 4.4698256086254156e-05,
"loss": 0.9114,
"step": 42750
},
{
"epoch": 8.31,
"learning_rate": 4.231403969032698e-05,
"loss": 0.9132,
"step": 43000
},
{
"epoch": 8.35,
"learning_rate": 3.999033513819922e-05,
"loss": 0.9138,
"step": 43250
},
{
"epoch": 8.4,
"learning_rate": 3.772768810044874e-05,
"loss": 0.9122,
"step": 43500
},
{
"epoch": 8.45,
"learning_rate": 3.552662990964793e-05,
"loss": 0.9115,
"step": 43750
},
{
"epoch": 8.5,
"learning_rate": 3.338767743559162e-05,
"loss": 0.9135,
"step": 44000
},
{
"epoch": 8.55,
"learning_rate": 3.131133296392159e-05,
"loss": 0.9113,
"step": 44250
},
{
"epoch": 8.6,
"learning_rate": 2.929808407817651e-05,
"loss": 0.9122,
"step": 44500
},
{
"epoch": 8.64,
"learning_rate": 2.734840354529305e-05,
"loss": 0.9112,
"step": 44750
},
{
"epoch": 8.69,
"learning_rate": 2.5462749204587507e-05,
"loss": 0.9105,
"step": 45000
},
{
"epoch": 8.74,
"learning_rate": 2.3641563860241965e-05,
"loss": 0.9095,
"step": 45250
},
{
"epoch": 8.79,
"learning_rate": 2.1885275177322048e-05,
"loss": 0.9109,
"step": 45500
},
{
"epoch": 8.84,
"learning_rate": 2.019429558134873e-05,
"loss": 0.9095,
"step": 45750
},
{
"epoch": 8.89,
"learning_rate": 1.856902216144962e-05,
"loss": 0.9089,
"step": 46000
},
{
"epoch": 8.93,
"learning_rate": 1.7009836577111302e-05,
"loss": 0.9091,
"step": 46250
},
{
"epoch": 8.98,
"learning_rate": 1.551710496855515e-05,
"loss": 0.9084,
"step": 46500
},
{
"epoch": 9.0,
"eval_alliteration_score": 0.43871778755499685,
"eval_harmonic_meter_score": 0.10228823262732262,
"eval_harmonic_rhyme_score": 0.5493860049384648,
"eval_meter_score": 0.32683853787547473,
"eval_rhyme_score": 0.8411704041762098,
"eval_runtime": 1292.8693,
"eval_samples_per_second": 2.088,
"eval_steps_per_second": 0.261,
"step": 46593
},
{
"epoch": 9.03,
"learning_rate": 1.4091177870757209e-05,
"loss": 0.9044,
"step": 46750
},
{
"epoch": 9.08,
"learning_rate": 1.2732390131132907e-05,
"loss": 0.8983,
"step": 47000
},
{
"epoch": 9.13,
"learning_rate": 1.1441060830905591e-05,
"loss": 0.8971,
"step": 47250
},
{
"epoch": 9.18,
"learning_rate": 1.0217493210177418e-05,
"loss": 0.8978,
"step": 47500
},
{
"epoch": 9.22,
"learning_rate": 9.061974596719934e-06,
"loss": 0.8991,
"step": 47750
},
{
"epoch": 9.27,
"learning_rate": 7.974776338501631e-06,
"loss": 0.8969,
"step": 48000
},
{
"epoch": 9.32,
"learning_rate": 6.956153739967863e-06,
"loss": 0.9,
"step": 48250
},
{
"epoch": 9.37,
"learning_rate": 6.0063460020883915e-06,
"loss": 0.898,
"step": 48500
},
{
"epoch": 9.42,
"learning_rate": 5.125576166185996e-06,
"loss": 0.8977,
"step": 48750
},
{
"epoch": 9.46,
"learning_rate": 4.314051061560497e-06,
"loss": 0.8983,
"step": 49000
},
{
"epoch": 9.51,
"learning_rate": 3.571961256919276e-06,
"loss": 0.897,
"step": 49250
},
{
"epoch": 9.56,
"learning_rate": 2.8994810156265035e-06,
"loss": 0.8978,
"step": 49500
},
{
"epoch": 9.61,
"learning_rate": 2.2967682547812782e-06,
"loss": 0.8968,
"step": 49750
},
{
"epoch": 9.66,
"learning_rate": 1.7639645081341524e-06,
"loss": 0.8982,
"step": 50000
},
{
"epoch": 9.71,
"learning_rate": 1.3011948928511873e-06,
"loss": 0.8971,
"step": 50250
},
{
"epoch": 9.75,
"learning_rate": 9.085680801330208e-07,
"loss": 0.8975,
"step": 50500
},
{
"epoch": 9.8,
"learning_rate": 5.861762696956151e-07,
"loss": 0.897,
"step": 50750
},
{
"epoch": 9.85,
"learning_rate": 3.340951681194082e-07,
"loss": 0.8984,
"step": 51000
},
{
"epoch": 9.9,
"learning_rate": 1.523839710711683e-07,
"loss": 0.897,
"step": 51250
},
{
"epoch": 9.95,
"learning_rate": 4.108534940331365e-08,
"loss": 0.8969,
"step": 51500
},
{
"epoch": 10.0,
"learning_rate": 2.2543913346106945e-10,
"loss": 0.8982,
"step": 51750
},
{
"epoch": 10.0,
"eval_alliteration_score": 0.4247429105640386,
"eval_harmonic_meter_score": 0.11325822463626577,
"eval_harmonic_rhyme_score": 0.5430495530851162,
"eval_meter_score": 0.33602397160908126,
"eval_rhyme_score": 0.8321207205854919,
"eval_runtime": 1301.4663,
"eval_samples_per_second": 2.075,
"eval_steps_per_second": 0.26,
"step": 51770
},
{
"epoch": 10.0,
"step": 51770,
"total_flos": 5.976175248261775e+17,
"train_loss": 0.575801368235897,
"train_runtime": 18250.6753,
"train_samples_per_second": 363.114,
"train_steps_per_second": 2.837
}
],
"max_steps": 51770,
"num_train_epochs": 10,
"total_flos": 5.976175248261775e+17,
"trial_name": null,
"trial_params": null
}