|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.999915498738517, |
|
"global_step": 51770, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.1583011583011583e-06, |
|
"loss": 2.5038, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028957528957528956, |
|
"loss": 1.5196, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005791505791505791, |
|
"loss": 1.297, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005999696654176312, |
|
"loss": 1.2658, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005998690722049872, |
|
"loss": 1.2455, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005996980613784548, |
|
"loss": 1.2319, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0005994566730961414, |
|
"loss": 1.2214, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005991449640427416, |
|
"loss": 1.213, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0005987630074162269, |
|
"loss": 1.2059, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0005983108929106564, |
|
"loss": 1.2019, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005977887266951138, |
|
"loss": 1.1952, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0005971966313887766, |
|
"loss": 1.1878, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0005965347460321212, |
|
"loss": 1.1876, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0005958032260542726, |
|
"loss": 1.1812, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0005950022432365049, |
|
"loss": 1.1786, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0005941319856719031, |
|
"loss": 1.1741, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0005931926577211924, |
|
"loss": 1.1714, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0005921844799647499, |
|
"loss": 1.1685, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0005911076891508052, |
|
"loss": 1.166, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005899625381398457, |
|
"loss": 1.1635, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005887492958452381, |
|
"loss": 1.1602, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_alliteration_score": 0.33734156646083846, |
|
"eval_harmonic_meter_score": 0.08910893104383713, |
|
"eval_harmonic_rhyme_score": 0.18416322432066148, |
|
"eval_meter_score": 0.27888669912946146, |
|
"eval_rhyme_score": 0.6019525602542674, |
|
"eval_runtime": 1534.0921, |
|
"eval_samples_per_second": 1.76, |
|
"eval_steps_per_second": 0.22, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0005874682471700796, |
|
"loss": 1.1552, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0005861196929402952, |
|
"loss": 1.1417, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0005847039498339947, |
|
"loss": 1.1433, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0005832213503071088, |
|
"loss": 1.1418, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0005816722425153186, |
|
"loss": 1.1408, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0005800569902322985, |
|
"loss": 1.1394, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0005783759727642932, |
|
"loss": 1.1364, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0005766295848610451, |
|
"loss": 1.1355, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005748182366230962, |
|
"loss": 1.1337, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0005729423534054853, |
|
"loss": 1.1325, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0005710023757178627, |
|
"loss": 1.1299, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.000568998759121046, |
|
"loss": 1.1285, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0005669319741200425, |
|
"loss": 1.1278, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0005648025060535602, |
|
"loss": 1.1261, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0005626108549800381, |
|
"loss": 1.1245, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0005603575355602176, |
|
"loss": 1.123, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0005580430769362867, |
|
"loss": 1.123, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0005556680226076214, |
|
"loss": 1.1201, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0005532329303031583, |
|
"loss": 1.12, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0005507383718504232, |
|
"loss": 1.1176, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0005481849330412508, |
|
"loss": 1.1157, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_alliteration_score": 0.38736706413148564, |
|
"eval_harmonic_meter_score": 0.0895880700551104, |
|
"eval_harmonic_rhyme_score": 0.41931891077613487, |
|
"eval_meter_score": 0.3107654652536241, |
|
"eval_rhyme_score": 0.7829217375053705, |
|
"eval_runtime": 1335.1994, |
|
"eval_samples_per_second": 2.022, |
|
"eval_steps_per_second": 0.253, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.000545573213494224, |
|
"loss": 1.1078, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0005429038265138671, |
|
"loss": 1.1014, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0005401773989466244, |
|
"loss": 1.1022, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0005373945710336596, |
|
"loss": 1.1014, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0005345559962605089, |
|
"loss": 1.1008, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0005316623412036252, |
|
"loss": 1.0992, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.000528714285373846, |
|
"loss": 1.0989, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0005257125210568268, |
|
"loss": 1.0979, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0005226577531504722, |
|
"loss": 1.0984, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0005195506989994064, |
|
"loss": 1.0965, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0005163920882265211, |
|
"loss": 1.0963, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0005131826625616392, |
|
"loss": 1.0953, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0005099231756673361, |
|
"loss": 1.0929, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0005066143929619589, |
|
"loss": 1.0941, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.000503257091439885, |
|
"loss": 1.0925, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0004998520594890613, |
|
"loss": 1.0895, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00049640009670587, |
|
"loss": 1.09, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0004929020137073603, |
|
"loss": 1.0894, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0004893586319408926, |
|
"loss": 1.0892, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0004857707834912409, |
|
"loss": 1.0881, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0004821393108851951, |
|
"loss": 1.0867, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_alliteration_score": 0.3956901936289819, |
|
"eval_harmonic_meter_score": 0.10651791396667262, |
|
"eval_harmonic_rhyme_score": 0.4171799492877077, |
|
"eval_meter_score": 0.3202445754833885, |
|
"eval_rhyme_score": 0.7817402156606966, |
|
"eval_runtime": 1356.2806, |
|
"eval_samples_per_second": 1.991, |
|
"eval_steps_per_second": 0.249, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0004784650668937127, |
|
"loss": 1.0717, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0004747489143316642, |
|
"loss": 1.0692, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0004709917258552203, |
|
"loss": 1.071, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00046719438375692797, |
|
"loss": 1.0713, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0004633577797585233, |
|
"loss": 1.0712, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0004594828148015305, |
|
"loss": 1.0705, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00045557039883569595, |
|
"loss": 1.0694, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0004516214506053063, |
|
"loss": 1.0682, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.000447636897433442, |
|
"loss": 1.07, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000443617675004216, |
|
"loss": 1.0672, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00043956472714304834, |
|
"loss": 1.0667, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0004354790055950309, |
|
"loss": 1.0662, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0004313614698014302, |
|
"loss": 1.0656, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00042721308667438394, |
|
"loss": 1.0649, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00042303483036984366, |
|
"loss": 1.0636, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00041882768205881495, |
|
"loss": 1.0629, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00041459262969695184, |
|
"loss": 1.0627, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0004103306677925571, |
|
"loss": 1.0611, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00040604279717304357, |
|
"loss": 1.0595, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0004017300247499127, |
|
"loss": 1.0609, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_alliteration_score": 0.41442581461562794, |
|
"eval_harmonic_meter_score": 0.0987295348952947, |
|
"eval_harmonic_rhyme_score": 0.4578505943539395, |
|
"eval_meter_score": 0.3220205905401726, |
|
"eval_rhyme_score": 0.8007234223820661, |
|
"eval_runtime": 1328.0652, |
|
"eval_samples_per_second": 2.033, |
|
"eval_steps_per_second": 0.255, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00039739336328230323, |
|
"loss": 1.0365, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.00039303383113916687, |
|
"loss": 1.0371, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00038865245206012774, |
|
"loss": 1.0398, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00038425025491507883, |
|
"loss": 1.0413, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0003798282734625755, |
|
"loss": 1.0416, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0003753875461070794, |
|
"loss": 1.0409, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0003709291156551129, |
|
"loss": 1.0413, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0003664540290703784, |
|
"loss": 1.0401, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00036196333722790264, |
|
"loss": 1.0395, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00035745809466726145, |
|
"loss": 1.038, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0003529393593449451, |
|
"loss": 1.0382, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00034840819238591994, |
|
"loss": 1.0386, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0003438656578344473, |
|
"loss": 1.0371, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0003393128224042155, |
|
"loss": 1.0361, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0003347507552278469, |
|
"loss": 1.0355, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00033018052760583447, |
|
"loss": 1.0334, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0003256032127549717, |
|
"loss": 1.0341, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0003210198855563304, |
|
"loss": 1.0338, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00031643162230284954, |
|
"loss": 1.0315, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00031183950044659135, |
|
"loss": 1.0321, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0003072445983457252, |
|
"loss": 1.0313, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_alliteration_score": 0.41847826086956524, |
|
"eval_harmonic_meter_score": 0.10429340770685562, |
|
"eval_harmonic_rhyme_score": 0.44835226139123785, |
|
"eval_meter_score": 0.31689277852422454, |
|
"eval_rhyme_score": 0.7882407888398814, |
|
"eval_runtime": 1355.1583, |
|
"eval_samples_per_second": 1.992, |
|
"eval_steps_per_second": 0.249, |
|
"step": 25885 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0003026479950112996, |
|
"loss": 1.0191, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00029805076985386, |
|
"loss": 1.0078, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00029345400242997323, |
|
"loss": 1.0091, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0002888587721887175, |
|
"loss": 1.0091, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.0002842661582181979, |
|
"loss": 1.01, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.000279677238992146, |
|
"loss": 1.009, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00027509309211666463, |
|
"loss": 1.0085, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0002705147940771754, |
|
"loss": 1.0078, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0002659434199856307, |
|
"loss": 1.0075, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0002613800433280466, |
|
"loss": 1.0069, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002568257357124192, |
|
"loss": 1.0053, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0002522815666170804, |
|
"loss": 1.0038, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00024774860313955555, |
|
"loss": 1.0052, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00024322790974597822, |
|
"loss": 1.003, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00023872054802112475, |
|
"loss": 1.0028, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00023422757641912385, |
|
"loss": 1.0033, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0002297500500149027, |
|
"loss": 1.001, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00022528902025642543, |
|
"loss": 1.0013, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00022084553471778432, |
|
"loss": 0.9993, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00021642063685319983, |
|
"loss": 0.9969, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00021201536575198834, |
|
"loss": 0.9967, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_alliteration_score": 0.4239095315024233, |
|
"eval_harmonic_meter_score": 0.10471658916077409, |
|
"eval_harmonic_rhyme_score": 0.5099131421399749, |
|
"eval_meter_score": 0.3276790822599601, |
|
"eval_rhyme_score": 0.8216395530060676, |
|
"eval_runtime": 1328.5938, |
|
"eval_samples_per_second": 2.032, |
|
"eval_steps_per_second": 0.254, |
|
"step": 31062 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00020763075589455592, |
|
"loss": 0.98, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00020326783690947226, |
|
"loss": 0.9726, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00019892763333168628, |
|
"loss": 0.9741, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.000194611164361936, |
|
"loss": 0.9746, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0001903194436274124, |
|
"loss": 0.9738, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0001860534789437309, |
|
"loss": 0.9742, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00018181427207826875, |
|
"loss": 0.9748, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0001776028185149218, |
|
"loss": 0.9728, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00017342010722033724, |
|
"loss": 0.9748, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00016926712041167666, |
|
"loss": 0.9698, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00016514483332596397, |
|
"loss": 0.9703, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.000161054213991073, |
|
"loss": 0.9726, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00015699622299840705, |
|
"loss": 0.9695, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00015297181327732549, |
|
"loss": 0.9697, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00014898192987136932, |
|
"loss": 0.9681, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.000145027509716339, |
|
"loss": 0.9671, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.0001411094814202753, |
|
"loss": 0.9667, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00013722876504539635, |
|
"loss": 0.9644, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00013338627189204153, |
|
"loss": 0.9647, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0001295829042846731, |
|
"loss": 0.9636, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_alliteration_score": 0.439117929050815, |
|
"eval_harmonic_meter_score": 0.1013845208427906, |
|
"eval_harmonic_rhyme_score": 0.5635069892236091, |
|
"eval_meter_score": 0.32354736135648304, |
|
"eval_rhyme_score": 0.8347036594862961, |
|
"eval_runtime": 1288.6194, |
|
"eval_samples_per_second": 2.095, |
|
"eval_steps_per_second": 0.262, |
|
"step": 36239 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00012581955535998448, |
|
"loss": 0.9639, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0001220971088571674, |
|
"loss": 0.9378, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00011841643891038518, |
|
"loss": 0.9388, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00011477840984350193, |
|
"loss": 0.9395, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00011118387596711477, |
|
"loss": 0.9403, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00010763368137793809, |
|
"loss": 0.9399, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00010412865976058613, |
|
"loss": 0.939, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00010066963419180093, |
|
"loss": 0.9391, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 9.725741694717035e-05, |
|
"loss": 0.9372, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 9.389280931038336e-05, |
|
"loss": 0.9398, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 9.057660138506682e-05, |
|
"loss": 0.9387, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 8.730957190924632e-05, |
|
"loss": 0.9378, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 8.409248807247727e-05, |
|
"loss": 0.9378, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 8.092610533568725e-05, |
|
"loss": 0.9366, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 7.781116725377309e-05, |
|
"loss": 0.935, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 7.474840530099277e-05, |
|
"loss": 0.9356, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 7.173853869919559e-05, |
|
"loss": 0.9341, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.878227424892822e-05, |
|
"loss": 0.936, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 6.588030616345898e-05, |
|
"loss": 0.9338, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.303331590575642e-05, |
|
"loss": 0.9329, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 6.0241972028463316e-05, |
|
"loss": 0.9333, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_alliteration_score": 0.42669584245076586, |
|
"eval_harmonic_meter_score": 0.10211813060163026, |
|
"eval_harmonic_rhyme_score": 0.5177186665744951, |
|
"eval_meter_score": 0.323707550822104, |
|
"eval_rhyme_score": 0.8264104348934438, |
|
"eval_runtime": 1297.8514, |
|
"eval_samples_per_second": 2.08, |
|
"eval_steps_per_second": 0.26, |
|
"step": 41416 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 5.7506930016901755e-05, |
|
"loss": 0.9268, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.4828832135146994e-05, |
|
"loss": 0.9115, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.2208307275205774e-05, |
|
"loss": 0.9122, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 4.9645970809335146e-05, |
|
"loss": 0.9119, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.7142424445535695e-05, |
|
"loss": 0.912, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 4.4698256086254156e-05, |
|
"loss": 0.9114, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 4.231403969032698e-05, |
|
"loss": 0.9132, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 3.999033513819922e-05, |
|
"loss": 0.9138, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.772768810044874e-05, |
|
"loss": 0.9122, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.552662990964793e-05, |
|
"loss": 0.9115, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.338767743559162e-05, |
|
"loss": 0.9135, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 3.131133296392159e-05, |
|
"loss": 0.9113, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 2.929808407817651e-05, |
|
"loss": 0.9122, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 2.734840354529305e-05, |
|
"loss": 0.9112, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 2.5462749204587507e-05, |
|
"loss": 0.9105, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.3641563860241965e-05, |
|
"loss": 0.9095, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.1885275177322048e-05, |
|
"loss": 0.9109, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 2.019429558134873e-05, |
|
"loss": 0.9095, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 1.856902216144962e-05, |
|
"loss": 0.9089, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.7009836577111302e-05, |
|
"loss": 0.9091, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 1.551710496855515e-05, |
|
"loss": 0.9084, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_alliteration_score": 0.43871778755499685, |
|
"eval_harmonic_meter_score": 0.10228823262732262, |
|
"eval_harmonic_rhyme_score": 0.5493860049384648, |
|
"eval_meter_score": 0.32683853787547473, |
|
"eval_rhyme_score": 0.8411704041762098, |
|
"eval_runtime": 1292.8693, |
|
"eval_samples_per_second": 2.088, |
|
"eval_steps_per_second": 0.261, |
|
"step": 46593 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.4091177870757209e-05, |
|
"loss": 0.9044, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.2732390131132907e-05, |
|
"loss": 0.8983, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 1.1441060830905591e-05, |
|
"loss": 0.8971, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 1.0217493210177418e-05, |
|
"loss": 0.8978, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 9.061974596719934e-06, |
|
"loss": 0.8991, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 7.974776338501631e-06, |
|
"loss": 0.8969, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.956153739967863e-06, |
|
"loss": 0.9, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 6.0063460020883915e-06, |
|
"loss": 0.898, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 5.125576166185996e-06, |
|
"loss": 0.8977, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 4.314051061560497e-06, |
|
"loss": 0.8983, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 3.571961256919276e-06, |
|
"loss": 0.897, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.8994810156265035e-06, |
|
"loss": 0.8978, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 2.2967682547812782e-06, |
|
"loss": 0.8968, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.7639645081341524e-06, |
|
"loss": 0.8982, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.3011948928511873e-06, |
|
"loss": 0.8971, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 9.085680801330208e-07, |
|
"loss": 0.8975, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.861762696956151e-07, |
|
"loss": 0.897, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 3.340951681194082e-07, |
|
"loss": 0.8984, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 1.523839710711683e-07, |
|
"loss": 0.897, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.108534940331365e-08, |
|
"loss": 0.8969, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.2543913346106945e-10, |
|
"loss": 0.8982, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_alliteration_score": 0.4247429105640386, |
|
"eval_harmonic_meter_score": 0.11325822463626577, |
|
"eval_harmonic_rhyme_score": 0.5430495530851162, |
|
"eval_meter_score": 0.33602397160908126, |
|
"eval_rhyme_score": 0.8321207205854919, |
|
"eval_runtime": 1301.4663, |
|
"eval_samples_per_second": 2.075, |
|
"eval_steps_per_second": 0.26, |
|
"step": 51770 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 51770, |
|
"total_flos": 5.976175248261775e+17, |
|
"train_loss": 0.575801368235897, |
|
"train_runtime": 18250.6753, |
|
"train_samples_per_second": 363.114, |
|
"train_steps_per_second": 2.837 |
|
} |
|
], |
|
"max_steps": 51770, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.976175248261775e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|