|
{ |
|
"best_metric": 50.781657113079724, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Vietnamese-VTB/checkpoint-15000", |
|
"epoch": 340.90909090909093, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.52e-05, |
|
"loss": 4.4887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 7.949530201342283e-05, |
|
"loss": 1.7443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 7.895838926174497e-05, |
|
"loss": 0.8269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.842147651006712e-05, |
|
"loss": 0.4089, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 7.788456375838927e-05, |
|
"loss": 0.2343, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"eval_las": 48.28903943025882, |
|
"eval_loss": 3.7556207180023193, |
|
"eval_runtime": 5.1839, |
|
"eval_samples_per_second": 154.325, |
|
"eval_steps_per_second": 19.291, |
|
"eval_uas": 59.15407330206705, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 7.734765100671142e-05, |
|
"loss": 0.1547, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 7.681073825503357e-05, |
|
"loss": 0.1145, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 7.627382550335572e-05, |
|
"loss": 0.089, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 7.573691275167786e-05, |
|
"loss": 0.0777, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 7.52e-05, |
|
"loss": 0.0697, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_las": 47.168664234844535, |
|
"eval_loss": 5.077505588531494, |
|
"eval_runtime": 5.1923, |
|
"eval_samples_per_second": 154.073, |
|
"eval_steps_per_second": 19.259, |
|
"eval_uas": 57.755775577557756, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 7.466308724832215e-05, |
|
"loss": 0.0611, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 7.41261744966443e-05, |
|
"loss": 0.0498, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 7.358926174496644e-05, |
|
"loss": 0.049, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 7.305234899328859e-05, |
|
"loss": 0.0443, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 7.251543624161074e-05, |
|
"loss": 0.04, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_las": 48.20218863991662, |
|
"eval_loss": 5.622523307800293, |
|
"eval_runtime": 5.1912, |
|
"eval_samples_per_second": 154.107, |
|
"eval_steps_per_second": 19.263, |
|
"eval_uas": 58.580858085808586, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 7.197852348993289e-05, |
|
"loss": 0.0428, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 7.144161073825504e-05, |
|
"loss": 0.0402, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 7.090469798657718e-05, |
|
"loss": 0.0353, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 43.18, |
|
"learning_rate": 7.036778523489933e-05, |
|
"loss": 0.0292, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 6.983087248322148e-05, |
|
"loss": 0.0363, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_las": 48.53222164321696, |
|
"eval_loss": 6.002054214477539, |
|
"eval_runtime": 5.1905, |
|
"eval_samples_per_second": 154.128, |
|
"eval_steps_per_second": 19.266, |
|
"eval_uas": 59.37988535695675, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 6.929395973154363e-05, |
|
"loss": 0.0321, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 6.875704697986578e-05, |
|
"loss": 0.031, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 52.27, |
|
"learning_rate": 6.822013422818793e-05, |
|
"loss": 0.0282, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 6.768322147651007e-05, |
|
"loss": 0.0237, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"learning_rate": 6.714630872483222e-05, |
|
"loss": 0.0238, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"eval_las": 48.27166927219038, |
|
"eval_loss": 6.428050994873047, |
|
"eval_runtime": 5.1921, |
|
"eval_samples_per_second": 154.079, |
|
"eval_steps_per_second": 19.26, |
|
"eval_uas": 59.05853743269064, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 6.660939597315437e-05, |
|
"loss": 0.0246, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 61.36, |
|
"learning_rate": 6.607248322147652e-05, |
|
"loss": 0.0224, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 6.553557046979867e-05, |
|
"loss": 0.0231, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 65.91, |
|
"learning_rate": 6.499865771812081e-05, |
|
"loss": 0.0224, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 6.446174496644296e-05, |
|
"loss": 0.0184, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"eval_las": 48.966475594927914, |
|
"eval_loss": 7.119157791137695, |
|
"eval_runtime": 5.1953, |
|
"eval_samples_per_second": 153.986, |
|
"eval_steps_per_second": 19.248, |
|
"eval_uas": 59.47542122633316, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 70.45, |
|
"learning_rate": 6.392483221476511e-05, |
|
"loss": 0.02, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 6.338791946308726e-05, |
|
"loss": 0.0198, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 6.28510067114094e-05, |
|
"loss": 0.0196, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 6.231409395973154e-05, |
|
"loss": 0.0177, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 79.55, |
|
"learning_rate": 6.177718120805369e-05, |
|
"loss": 0.0149, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 79.55, |
|
"eval_las": 48.75803369810665, |
|
"eval_loss": 7.0862298011779785, |
|
"eval_runtime": 5.1933, |
|
"eval_samples_per_second": 154.044, |
|
"eval_steps_per_second": 19.256, |
|
"eval_uas": 59.44936598923051, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 6.124026845637584e-05, |
|
"loss": 0.0138, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 84.09, |
|
"learning_rate": 6.070335570469799e-05, |
|
"loss": 0.0173, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 6.0166442953020136e-05, |
|
"loss": 0.0165, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 5.962953020134229e-05, |
|
"loss": 0.015, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 5.909261744966444e-05, |
|
"loss": 0.0132, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_las": 49.035956227201666, |
|
"eval_loss": 7.622977256774902, |
|
"eval_runtime": 5.1898, |
|
"eval_samples_per_second": 154.148, |
|
"eval_steps_per_second": 19.268, |
|
"eval_uas": 59.31908980371722, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 93.18, |
|
"learning_rate": 5.855570469798659e-05, |
|
"loss": 0.0154, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 5.8018791946308735e-05, |
|
"loss": 0.0131, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 97.73, |
|
"learning_rate": 5.7481879194630884e-05, |
|
"loss": 0.0124, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 5.694496644295303e-05, |
|
"loss": 0.0125, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 102.27, |
|
"learning_rate": 5.6408053691275166e-05, |
|
"loss": 0.0142, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 102.27, |
|
"eval_las": 49.27045336112559, |
|
"eval_loss": 7.618669509887695, |
|
"eval_runtime": 5.1961, |
|
"eval_samples_per_second": 153.962, |
|
"eval_steps_per_second": 19.245, |
|
"eval_uas": 59.47542122633316, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 104.55, |
|
"learning_rate": 5.5871140939597315e-05, |
|
"loss": 0.0131, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 106.82, |
|
"learning_rate": 5.533422818791946e-05, |
|
"loss": 0.0126, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 5.479731543624161e-05, |
|
"loss": 0.0101, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 111.36, |
|
"learning_rate": 5.426040268456376e-05, |
|
"loss": 0.0094, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"learning_rate": 5.372348993288591e-05, |
|
"loss": 0.0098, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"eval_las": 49.678652075733886, |
|
"eval_loss": 7.6977925300598145, |
|
"eval_runtime": 5.1957, |
|
"eval_samples_per_second": 153.974, |
|
"eval_steps_per_second": 19.247, |
|
"eval_uas": 59.82282438770192, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 115.91, |
|
"learning_rate": 5.3186577181208056e-05, |
|
"loss": 0.0096, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 5.2649664429530204e-05, |
|
"loss": 0.009, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 120.45, |
|
"learning_rate": 5.211275167785235e-05, |
|
"loss": 0.0104, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 122.73, |
|
"learning_rate": 5.15758389261745e-05, |
|
"loss": 0.0111, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 5.1038926174496656e-05, |
|
"loss": 0.0095, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_las": 49.17491749174918, |
|
"eval_loss": 7.922099590301514, |
|
"eval_runtime": 5.1917, |
|
"eval_samples_per_second": 154.091, |
|
"eval_steps_per_second": 19.261, |
|
"eval_uas": 59.80545422963349, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 5.050201342281879e-05, |
|
"loss": 0.0104, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 129.55, |
|
"learning_rate": 4.996510067114094e-05, |
|
"loss": 0.0103, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 131.82, |
|
"learning_rate": 4.942818791946309e-05, |
|
"loss": 0.0104, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 134.09, |
|
"learning_rate": 4.8891275167785235e-05, |
|
"loss": 0.0073, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 4.835436241610738e-05, |
|
"loss": 0.0085, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"eval_las": 49.504950495049506, |
|
"eval_loss": 8.327244758605957, |
|
"eval_runtime": 5.1925, |
|
"eval_samples_per_second": 154.068, |
|
"eval_steps_per_second": 19.258, |
|
"eval_uas": 59.73597359735974, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 138.64, |
|
"learning_rate": 4.781744966442953e-05, |
|
"loss": 0.0076, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 140.91, |
|
"learning_rate": 4.728053691275168e-05, |
|
"loss": 0.0078, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 143.18, |
|
"learning_rate": 4.674362416107383e-05, |
|
"loss": 0.007, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 4.6206711409395976e-05, |
|
"loss": 0.005, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"learning_rate": 4.5669798657718125e-05, |
|
"loss": 0.005, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"eval_las": 49.71339239187076, |
|
"eval_loss": 8.44931697845459, |
|
"eval_runtime": 5.1924, |
|
"eval_samples_per_second": 154.072, |
|
"eval_steps_per_second": 19.259, |
|
"eval_uas": 59.96178565224943, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 4.513288590604027e-05, |
|
"loss": 0.0064, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 152.27, |
|
"learning_rate": 4.459597315436242e-05, |
|
"loss": 0.0049, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 154.55, |
|
"learning_rate": 4.405906040268456e-05, |
|
"loss": 0.0039, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 156.82, |
|
"learning_rate": 4.352214765100671e-05, |
|
"loss": 0.0044, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"learning_rate": 4.298523489932886e-05, |
|
"loss": 0.0052, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"eval_las": 49.70470731283655, |
|
"eval_loss": 8.672440528869629, |
|
"eval_runtime": 5.1898, |
|
"eval_samples_per_second": 154.15, |
|
"eval_steps_per_second": 19.269, |
|
"eval_uas": 60.36998436685774, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 161.36, |
|
"learning_rate": 4.244832214765101e-05, |
|
"loss": 0.0049, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"learning_rate": 4.1911409395973156e-05, |
|
"loss": 0.0089, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 165.91, |
|
"learning_rate": 4.1374496644295304e-05, |
|
"loss": 0.0036, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 168.18, |
|
"learning_rate": 4.083758389261745e-05, |
|
"loss": 0.0056, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 170.45, |
|
"learning_rate": 4.03006711409396e-05, |
|
"loss": 0.0038, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 170.45, |
|
"eval_las": 49.58311620635748, |
|
"eval_loss": 8.693171501159668, |
|
"eval_runtime": 5.1886, |
|
"eval_samples_per_second": 154.183, |
|
"eval_steps_per_second": 19.273, |
|
"eval_uas": 60.700017370158065, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 172.73, |
|
"learning_rate": 3.976375838926175e-05, |
|
"loss": 0.0039, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 3.92268456375839e-05, |
|
"loss": 0.003, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 177.27, |
|
"learning_rate": 3.8689932885906045e-05, |
|
"loss": 0.0025, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 179.55, |
|
"learning_rate": 3.815302013422819e-05, |
|
"loss": 0.003, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"learning_rate": 3.761610738255034e-05, |
|
"loss": 0.0035, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"eval_las": 50.20844189682126, |
|
"eval_loss": 8.397028923034668, |
|
"eval_runtime": 5.1807, |
|
"eval_samples_per_second": 154.42, |
|
"eval_steps_per_second": 19.303, |
|
"eval_uas": 60.552371026576346, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 184.09, |
|
"learning_rate": 3.707919463087249e-05, |
|
"loss": 0.0034, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 186.36, |
|
"learning_rate": 3.654228187919463e-05, |
|
"loss": 0.0023, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 188.64, |
|
"learning_rate": 3.600536912751678e-05, |
|
"loss": 0.004, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 190.91, |
|
"learning_rate": 3.546845637583893e-05, |
|
"loss": 0.0031, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 193.18, |
|
"learning_rate": 3.4931543624161076e-05, |
|
"loss": 0.0037, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 193.18, |
|
"eval_las": 49.7741879451103, |
|
"eval_loss": 8.718303680419922, |
|
"eval_runtime": 5.1796, |
|
"eval_samples_per_second": 154.452, |
|
"eval_steps_per_second": 19.306, |
|
"eval_uas": 60.54368594754212, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 195.45, |
|
"learning_rate": 3.4394630872483224e-05, |
|
"loss": 0.003, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 197.73, |
|
"learning_rate": 3.385771812080537e-05, |
|
"loss": 0.004, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 3.332080536912752e-05, |
|
"loss": 0.004, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 202.27, |
|
"learning_rate": 3.278389261744967e-05, |
|
"loss": 0.0023, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"learning_rate": 3.224697986577182e-05, |
|
"loss": 0.0029, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"eval_las": 49.73944762897342, |
|
"eval_loss": 8.950847625732422, |
|
"eval_runtime": 5.1787, |
|
"eval_samples_per_second": 154.478, |
|
"eval_steps_per_second": 19.31, |
|
"eval_uas": 60.40472468299461, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 206.82, |
|
"learning_rate": 3.1710067114093965e-05, |
|
"loss": 0.0023, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 209.09, |
|
"learning_rate": 3.1173154362416114e-05, |
|
"loss": 0.004, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 211.36, |
|
"learning_rate": 3.0636241610738255e-05, |
|
"loss": 0.0023, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 213.64, |
|
"learning_rate": 3.0099328859060403e-05, |
|
"loss": 0.0056, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 215.91, |
|
"learning_rate": 2.956241610738255e-05, |
|
"loss": 0.0022, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 215.91, |
|
"eval_las": 49.75681778704186, |
|
"eval_loss": 8.873396873474121, |
|
"eval_runtime": 5.1795, |
|
"eval_samples_per_second": 154.455, |
|
"eval_steps_per_second": 19.307, |
|
"eval_uas": 60.23102310231023, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"learning_rate": 2.90255033557047e-05, |
|
"loss": 0.0024, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 220.45, |
|
"learning_rate": 2.8488590604026848e-05, |
|
"loss": 0.0018, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 222.73, |
|
"learning_rate": 2.7951677852348996e-05, |
|
"loss": 0.0013, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 2.741476510067114e-05, |
|
"loss": 0.0016, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"learning_rate": 2.687785234899329e-05, |
|
"loss": 0.0015, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"eval_las": 50.225812054889694, |
|
"eval_loss": 9.140296936035156, |
|
"eval_runtime": 5.1821, |
|
"eval_samples_per_second": 154.379, |
|
"eval_steps_per_second": 19.297, |
|
"eval_uas": 60.70870244919229, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 229.55, |
|
"learning_rate": 2.6340939597315438e-05, |
|
"loss": 0.0013, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 231.82, |
|
"learning_rate": 2.5804026845637586e-05, |
|
"loss": 0.0013, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 234.09, |
|
"learning_rate": 2.5267114093959734e-05, |
|
"loss": 0.0031, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 236.36, |
|
"learning_rate": 2.4730201342281882e-05, |
|
"loss": 0.0018, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 238.64, |
|
"learning_rate": 2.4193288590604027e-05, |
|
"loss": 0.0015, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 238.64, |
|
"eval_las": 49.86972381448671, |
|
"eval_loss": 9.165424346923828, |
|
"eval_runtime": 5.1856, |
|
"eval_samples_per_second": 154.274, |
|
"eval_steps_per_second": 19.284, |
|
"eval_uas": 60.63922181691853, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 240.91, |
|
"learning_rate": 2.3656375838926175e-05, |
|
"loss": 0.0019, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 243.18, |
|
"learning_rate": 2.3119463087248324e-05, |
|
"loss": 0.0012, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 245.45, |
|
"learning_rate": 2.2582550335570472e-05, |
|
"loss": 0.0016, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 247.73, |
|
"learning_rate": 2.204563758389262e-05, |
|
"loss": 0.0005, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 2.150872483221477e-05, |
|
"loss": 0.0007, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_las": 50.48636442591628, |
|
"eval_loss": 8.970284461975098, |
|
"eval_runtime": 5.2059, |
|
"eval_samples_per_second": 153.671, |
|
"eval_steps_per_second": 19.209, |
|
"eval_uas": 61.09953100573215, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 252.27, |
|
"learning_rate": 2.0971812080536913e-05, |
|
"loss": 0.0005, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 254.55, |
|
"learning_rate": 2.043489932885906e-05, |
|
"loss": 0.0004, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 256.82, |
|
"learning_rate": 1.989798657718121e-05, |
|
"loss": 0.0004, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 259.09, |
|
"learning_rate": 1.9361073825503358e-05, |
|
"loss": 0.0009, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 261.36, |
|
"learning_rate": 1.8824161073825503e-05, |
|
"loss": 0.0009, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 261.36, |
|
"eval_las": 50.13027618551329, |
|
"eval_loss": 9.078216552734375, |
|
"eval_runtime": 5.2191, |
|
"eval_samples_per_second": 153.282, |
|
"eval_steps_per_second": 19.16, |
|
"eval_uas": 60.769498002431824, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 263.64, |
|
"learning_rate": 1.828724832214765e-05, |
|
"loss": 0.0005, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 265.91, |
|
"learning_rate": 1.77503355704698e-05, |
|
"loss": 0.0004, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 268.18, |
|
"learning_rate": 1.7213422818791948e-05, |
|
"loss": 0.0004, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 270.45, |
|
"learning_rate": 1.6676510067114096e-05, |
|
"loss": 0.0003, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"learning_rate": 1.6139597315436244e-05, |
|
"loss": 0.0022, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"eval_las": 50.13896126454751, |
|
"eval_loss": 8.995458602905273, |
|
"eval_runtime": 5.1822, |
|
"eval_samples_per_second": 154.375, |
|
"eval_steps_per_second": 19.297, |
|
"eval_uas": 60.61316657981588, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 1.560268456375839e-05, |
|
"loss": 0.0007, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 277.27, |
|
"learning_rate": 1.5065771812080539e-05, |
|
"loss": 0.0006, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 279.55, |
|
"learning_rate": 1.4528859060402685e-05, |
|
"loss": 0.0011, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 281.82, |
|
"learning_rate": 1.3991946308724834e-05, |
|
"loss": 0.0007, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 284.09, |
|
"learning_rate": 1.3455033557046982e-05, |
|
"loss": 0.0008, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 284.09, |
|
"eval_las": 50.23449713392392, |
|
"eval_loss": 9.353437423706055, |
|
"eval_runtime": 5.1815, |
|
"eval_samples_per_second": 154.397, |
|
"eval_steps_per_second": 19.3, |
|
"eval_uas": 60.969254820218865, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 286.36, |
|
"learning_rate": 1.2918120805369127e-05, |
|
"loss": 0.0004, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 288.64, |
|
"learning_rate": 1.2381208053691277e-05, |
|
"loss": 0.0005, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 290.91, |
|
"learning_rate": 1.1844295302013425e-05, |
|
"loss": 0.0004, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 293.18, |
|
"learning_rate": 1.130738255033557e-05, |
|
"loss": 0.0005, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"learning_rate": 1.0770469798657718e-05, |
|
"loss": 0.0006, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"eval_las": 50.62532569046378, |
|
"eval_loss": 9.346752166748047, |
|
"eval_runtime": 5.1837, |
|
"eval_samples_per_second": 154.331, |
|
"eval_steps_per_second": 19.291, |
|
"eval_uas": 61.54247003647734, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 297.73, |
|
"learning_rate": 1.0233557046979868e-05, |
|
"loss": 0.0004, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 9.696644295302015e-06, |
|
"loss": 0.0004, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 302.27, |
|
"learning_rate": 9.159731543624161e-06, |
|
"loss": 0.0003, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 304.55, |
|
"learning_rate": 8.62281879194631e-06, |
|
"loss": 0.0003, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 306.82, |
|
"learning_rate": 8.085906040268458e-06, |
|
"loss": 0.0003, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 306.82, |
|
"eval_las": 50.37345839847143, |
|
"eval_loss": 9.42686653137207, |
|
"eval_runtime": 5.1835, |
|
"eval_samples_per_second": 154.335, |
|
"eval_steps_per_second": 19.292, |
|
"eval_uas": 61.281917665450756, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 309.09, |
|
"learning_rate": 7.548993288590605e-06, |
|
"loss": 0.0003, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 311.36, |
|
"learning_rate": 7.012080536912752e-06, |
|
"loss": 0.0003, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 313.64, |
|
"learning_rate": 6.475167785234899e-06, |
|
"loss": 0.0003, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 315.91, |
|
"learning_rate": 5.938255033557048e-06, |
|
"loss": 0.0002, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"learning_rate": 5.401342281879195e-06, |
|
"loss": 0.0002, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"eval_las": 50.64269584853223, |
|
"eval_loss": 9.432953834533691, |
|
"eval_runtime": 5.1928, |
|
"eval_samples_per_second": 154.06, |
|
"eval_steps_per_second": 19.258, |
|
"eval_uas": 61.50772972034046, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 320.45, |
|
"learning_rate": 4.864429530201343e-06, |
|
"loss": 0.0002, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 322.73, |
|
"learning_rate": 4.32751677852349e-06, |
|
"loss": 0.0003, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 3.790604026845638e-06, |
|
"loss": 0.0002, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 327.27, |
|
"learning_rate": 3.253691275167786e-06, |
|
"loss": 0.0003, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 329.55, |
|
"learning_rate": 2.716778523489933e-06, |
|
"loss": 0.0002, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 329.55, |
|
"eval_las": 50.73823171790863, |
|
"eval_loss": 9.487878799438477, |
|
"eval_runtime": 5.2057, |
|
"eval_samples_per_second": 153.676, |
|
"eval_steps_per_second": 19.21, |
|
"eval_uas": 61.42087892999827, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 331.82, |
|
"learning_rate": 2.1798657718120807e-06, |
|
"loss": 0.0002, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 334.09, |
|
"learning_rate": 1.6429530201342283e-06, |
|
"loss": 0.0002, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 336.36, |
|
"learning_rate": 1.1060402684563759e-06, |
|
"loss": 0.0003, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 338.64, |
|
"learning_rate": 5.691275167785235e-07, |
|
"loss": 0.0002, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"learning_rate": 3.2214765100671145e-08, |
|
"loss": 0.0002, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"eval_las": 50.781657113079724, |
|
"eval_loss": 9.509201049804688, |
|
"eval_runtime": 5.1995, |
|
"eval_samples_per_second": 153.862, |
|
"eval_steps_per_second": 19.233, |
|
"eval_uas": 61.53378495744312, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"step": 15000, |
|
"total_flos": 7.969472217071616e+16, |
|
"train_loss": 0.06309178725369274, |
|
"train_runtime": 7782.9093, |
|
"train_samples_per_second": 61.674, |
|
"train_steps_per_second": 1.927 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 341, |
|
"total_flos": 7.969472217071616e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|