################################### TRAIN_CONFIG ################################### dataset_dir: ./Audio_XenoCanto labels_list: ./xeno_labels.csv model_name: BirdAST_Baseline_GroupKFold backbone_name: MIT/ast-finetuned-audioset-10-10-0.4593 n_classes: 728 audio_sr: 16000 segment_length: 10 fft_window: 0.025 hop_window_length: 0.01 n_mels: 128 low_cut: 1000 high_cut: 8000 top_db: 100 batch_size: 16 num_workers: 0 n_splits: 5 log_dir: ./training_logs max_lr: 1e-05 epochs: 10 weight_decay: 0.01 lr_final_div: 1000 amp: True grad_accum_steps: 1 max_grad_norm: 10000000.0 print_epoch_freq: 1 print_freq: 500 random_seed: 2046 copy: )> ################################################################################ Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving. Epoch 1 [0/559] | Train Loss: 0.3797 Grad: 132458.4531 LR: 4.0008e-07 | Elapse: 5.22s Epoch 1 [500/559] | Train Loss: 0.1767 Grad: 17217.5918 LR: 9.7549e-06 | Elapse: 632.27s Epoch 1 [558/559] | Train Loss: 0.1659 Grad: 38565.3086 LR: 1.0000e-05 | Elapse: 704.92s Epoch 1 [0/140] | Valid Loss: 0.0956 | Elapse: 1.77s Epoch 1 [139/140] | Valid Loss: 0.1626 | Elapse: 179.13s Epoch 1 - Train Loss: 0.1659 - Valid Loss: 0.5170 - Elapsed Time: 902.38s - Epoch 1: Best model found with loss = 0.5170. Epoch 2 [0/559] | Train Loss: 0.3837 Grad: 82366.4531 LR: 1.0000e-05 | Elapse: 1.39s Epoch 2 [500/559] | Train Loss: 0.1670 Grad: 26346.2246 LR: 9.7564e-06 | Elapse: 647.59s Epoch 2 [558/559] | Train Loss: 0.1563 Grad: 53784.7227 LR: 9.6974e-06 | Elapse: 716.52s Epoch 2 [0/140] | Valid Loss: 0.0949 | Elapse: 1.36s Epoch 2 [139/140] | Valid Loss: 0.1759 | Elapse: 176.02s Epoch 2 - Train Loss: 0.1563 - Valid Loss: 0.5562 - Elapsed Time: 910.59s - Epoch 2: Best model found with loss = 0.5562. Epoch 3 [0/559] | Train Loss: 0.3296 Grad: 136677.4531 LR: 9.6963e-06 | Elapse: 1.60s Epoch 3 [500/559] | Train Loss: 0.1347 Grad: 29127.7148 LR: 8.9422e-06 | Elapse: 630.69s Epoch 3 [558/559] | Train Loss: 0.1259 Grad: 57361.0430 LR: 8.8283e-06 | Elapse: 700.52s Epoch 3 [0/140] | Valid Loss: 0.0909 | Elapse: 1.56s Epoch 3 [139/140] | Valid Loss: 0.1843 | Elapse: 176.22s Epoch 3 - Train Loss: 0.1259 - Valid Loss: 0.6019 - Elapsed Time: 894.87s - Epoch 3: Best model found with loss = 0.6019. Epoch 4 [0/559] | Train Loss: 0.2495 Grad: 174822.3438 LR: 8.8263e-06 | Elapse: 1.03s Epoch 4 [500/559] | Train Loss: 0.0971 Grad: 30384.9941 LR: 7.6526e-06 | Elapse: 616.92s Epoch 4 [558/559] | Train Loss: 0.0909 Grad: 54755.8555 LR: 7.4974e-06 | Elapse: 686.08s Epoch 4 [0/140] | Valid Loss: 0.0883 | Elapse: 0.96s Epoch 4 [139/140] | Valid Loss: 0.1906 | Elapse: 170.98s Epoch 4 - Train Loss: 0.0909 - Valid Loss: 0.6292 - Elapsed Time: 875.26s - Epoch 4: Best model found with loss = 0.6292. Epoch 5 [0/559] | Train Loss: 0.1445 Grad: 179717.0781 LR: 7.4947e-06 | Elapse: 1.67s Epoch 5 [500/559] | Train Loss: 0.0679 Grad: 31367.4883 LR: 6.0431e-06 | Elapse: 636.79s Epoch 5 [558/559] | Train Loss: 0.0638 Grad: 46204.8477 LR: 5.8653e-06 | Elapse: 710.08s Epoch 5 [0/140] | Valid Loss: 0.0862 | Elapse: 1.37s Epoch 5 [139/140] | Valid Loss: 0.1974 | Elapse: 172.42s Epoch 5 - Train Loss: 0.0638 - Valid Loss: 0.6417 - Elapsed Time: 900.70s - Epoch 5: Best model found with loss = 0.6417. Epoch 6 [0/559] | Train Loss: 0.0752 Grad: 150651.5312 LR: 5.8623e-06 | Elapse: 1.26s Epoch 6 [500/559] | Train Loss: 0.0498 Grad: 30212.4238 LR: 4.3078e-06 | Elapse: 625.35s Epoch 6 [558/559] | Train Loss: 0.0471 Grad: 45234.8984 LR: 4.1289e-06 | Elapse: 698.58s Epoch 6 [0/140] | Valid Loss: 0.0843 | Elapse: 1.56s Epoch 6 [139/140] | Valid Loss: 0.2014 | Elapse: 168.62s Epoch 6 - Train Loss: 0.0471 - Valid Loss: 0.6506 - Elapsed Time: 885.11s - Epoch 6: Best model found with loss = 0.6506. Epoch 7 [0/559] | Train Loss: 0.0401 Grad: 110378.2734 LR: 4.1258e-06 | Elapse: 1.55s Epoch 7 [500/559] | Train Loss: 0.0401 Grad: 29949.4160 LR: 2.6560e-06 | Elapse: 747.46s Epoch 7 [558/559] | Train Loss: 0.0381 Grad: 47635.7148 LR: 2.4976e-06 | Elapse: 850.90s Epoch 7 [0/140] | Valid Loss: 0.0835 | Elapse: 1.84s Epoch 7 [139/140] | Valid Loss: 0.2044 | Elapse: 247.83s Epoch 7 - Train Loss: 0.0381 - Valid Loss: 0.6516 - Elapsed Time: 1122.23s - Epoch 7: Best model found with loss = 0.6516. Epoch 8 [0/559] | Train Loss: 0.0310 Grad: 93998.0625 LR: 2.4949e-06 | Elapse: 2.01s Epoch 8 [500/559] | Train Loss: 0.0364 Grad: 34944.8828 LR: 1.2869e-06 | Elapse: 898.62s Epoch 8 [558/559] | Train Loss: 0.0347 Grad: 48920.4258 LR: 1.1681e-06 | Elapse: 1001.16s Epoch 8 [0/140] | Valid Loss: 0.0855 | Elapse: 1.69s Epoch 8 [139/140] | Valid Loss: 0.2072 | Elapse: 250.89s Epoch 8 - Train Loss: 0.0347 - Valid Loss: 0.6495 - Elapsed Time: 1275.92s Epoch 9 [0/559] | Train Loss: 0.0334 Grad: 111821.3047 LR: 1.1661e-06 | Elapse: 1.79s Epoch 9 [500/559] | Train Loss: 0.0380 Grad: 48075.5664 LR: 3.6575e-07 | Elapse: 896.79s Epoch 9 [558/559] | Train Loss: 0.0362 Grad: 48004.2852 LR: 3.0086e-07 | Elapse: 999.50s Epoch 9 [0/140] | Valid Loss: 0.0802 | Elapse: 1.68s Epoch 9 [139/140] | Valid Loss: 0.2040 | Elapse: 247.83s Epoch 9 - Train Loss: 0.0362 - Valid Loss: 0.6773 - Elapsed Time: 1272.24s - Epoch 9: Best model found with loss = 0.6773. Epoch 10 [0/559] | Train Loss: 0.0419 Grad: 138725.0625 LR: 2.9979e-07 | Elapse: 1.85s Epoch 10 [500/559] | Train Loss: 0.0442 Grad: 51908.7266 LR: 3.5668e-09 | Elapse: 851.64s Epoch 10 [558/559] | Train Loss: 0.0418 Grad: 36428.0664 LR: 4.0097e-10 | Elapse: 950.39s Epoch 10 [0/140] | Valid Loss: 0.0763 | Elapse: 1.74s Epoch 10 [139/140] | Valid Loss: 0.2015 | Elapse: 253.39s Epoch 10 - Train Loss: 0.0418 - Valid Loss: 0.6896 - Elapsed Time: 1228.92s - Epoch 10: Best model found with loss = 0.6896. Fold 0 | Time: 171.93min | Overall Evaluation Loss: 0.6896 Epoch 1 [0/559] | Train Loss: 0.4015 Grad: 130138.6250 LR: 4.0008e-07 | Elapse: 1.81s Epoch 1 [500/559] | Train Loss: 0.1759 Grad: 863.0330 LR: 9.7549e-06 | Elapse: 869.10s Epoch 1 [558/559] | Train Loss: 0.1663 Grad: 33445.6641 LR: 1.0000e-05 | Elapse: 956.09s Epoch 1 [0/140] | Valid Loss: 0.2185 | Elapse: 1.43s Epoch 1 [139/140] | Valid Loss: 0.1571 | Elapse: 206.29s Epoch 1 - Train Loss: 0.1663 - Valid Loss: 0.5072 - Elapsed Time: 1181.07s - Epoch 1: Best model found with loss = 0.5072. Epoch 2 [0/559] | Train Loss: 0.3793 Grad: 81459.7891 LR: 1.0000e-05 | Elapse: 1.45s Epoch 2 [500/559] | Train Loss: 0.1659 Grad: 1246.6095 LR: 9.7564e-06 | Elapse: 724.53s Epoch 2 [558/559] | Train Loss: 0.1560 Grad: 45349.8438 LR: 9.6974e-06 | Elapse: 796.86s Epoch 2 [0/140] | Valid Loss: 0.2406 | Elapse: 1.39s Epoch 2 [139/140] | Valid Loss: 0.1642 | Elapse: 172.62s Epoch 2 - Train Loss: 0.1560 - Valid Loss: 0.5597 - Elapsed Time: 988.42s - Epoch 2: Best model found with loss = 0.5597. Epoch 3 [0/559] | Train Loss: 0.3372 Grad: 126511.1250 LR: 9.6963e-06 | Elapse: 1.61s Epoch 3 [500/559] | Train Loss: 0.1332 Grad: 1709.5671 LR: 8.9422e-06 | Elapse: 626.50s Epoch 3 [558/559] | Train Loss: 0.1245 Grad: 48516.1641 LR: 8.8283e-06 | Elapse: 698.54s Epoch 3 [0/140] | Valid Loss: 0.2499 | Elapse: 1.15s Epoch 3 [139/140] | Valid Loss: 0.1690 | Elapse: 175.40s Epoch 3 - Train Loss: 0.1245 - Valid Loss: 0.5997 - Elapsed Time: 892.90s - Epoch 3: Best model found with loss = 0.5997. Epoch 4 [0/559] | Train Loss: 0.2329 Grad: 165485.4688 LR: 8.8263e-06 | Elapse: 1.42s Epoch 4 [500/559] | Train Loss: 0.0928 Grad: 2085.9751 LR: 7.6526e-06 | Elapse: 617.80s Epoch 4 [558/559] | Train Loss: 0.0867 Grad: 45565.9609 LR: 7.4974e-06 | Elapse: 690.54s Epoch 4 [0/140] | Valid Loss: 0.2734 | Elapse: 1.55s Epoch 4 [139/140] | Valid Loss: 0.1746 | Elapse: 167.59s Epoch 4 - Train Loss: 0.0867 - Valid Loss: 0.6215 - Elapsed Time: 877.07s - Epoch 4: Best model found with loss = 0.6215. Epoch 5 [0/559] | Train Loss: 0.1356 Grad: 175726.2500 LR: 7.4947e-06 | Elapse: 1.12s Epoch 5 [500/559] | Train Loss: 0.0635 Grad: 2302.8323 LR: 6.0431e-06 | Elapse: 619.62s Epoch 5 [558/559] | Train Loss: 0.0595 Grad: 41125.3477 LR: 5.8653e-06 | Elapse: 690.76s Epoch 5 [0/140] | Valid Loss: 0.3010 | Elapse: 1.17s Epoch 5 [139/140] | Valid Loss: 0.1791 | Elapse: 169.10s Epoch 5 - Train Loss: 0.0595 - Valid Loss: 0.6472 - Elapsed Time: 878.90s - Epoch 5: Best model found with loss = 0.6472. Epoch 6 [0/559] | Train Loss: 0.0700 Grad: 136908.1094 LR: 5.8623e-06 | Elapse: 1.13s Epoch 6 [500/559] | Train Loss: 0.0446 Grad: 2514.1721 LR: 4.3078e-06 | Elapse: 625.46s Epoch 6 [558/559] | Train Loss: 0.0420 Grad: 37248.8633 LR: 4.1289e-06 | Elapse: 697.66s Epoch 6 [0/140] | Valid Loss: 0.3092 | Elapse: 1.25s Epoch 6 [139/140] | Valid Loss: 0.1812 | Elapse: 171.69s Epoch 6 - Train Loss: 0.0420 - Valid Loss: 0.6583 - Elapsed Time: 888.53s - Epoch 6: Best model found with loss = 0.6583. Epoch 7 [0/559] | Train Loss: 0.0358 Grad: 92237.4297 LR: 4.1258e-06 | Elapse: 1.29s Epoch 7 [500/559] | Train Loss: 0.0349 Grad: 2724.6714 LR: 2.6560e-06 | Elapse: 625.38s Epoch 7 [558/559] | Train Loss: 0.0330 Grad: 36025.4375 LR: 2.4976e-06 | Elapse: 692.62s Epoch 7 [0/140] | Valid Loss: 0.3133 | Elapse: 1.35s Epoch 7 [139/140] | Valid Loss: 0.1820 | Elapse: 169.90s Epoch 7 - Train Loss: 0.0330 - Valid Loss: 0.6669 - Elapsed Time: 881.25s - Epoch 7: Best model found with loss = 0.6669. Epoch 8 [0/559] | Train Loss: 0.0239 Grad: 68634.6406 LR: 2.4949e-06 | Elapse: 0.94s Epoch 8 [500/559] | Train Loss: 0.0310 Grad: 2664.3289 LR: 1.2869e-06 | Elapse: 623.73s Epoch 8 [558/559] | Train Loss: 0.0293 Grad: 35448.2188 LR: 1.1681e-06 | Elapse: 693.87s Epoch 8 [0/140] | Valid Loss: 0.3229 | Elapse: 1.65s Epoch 8 [139/140] | Valid Loss: 0.1835 | Elapse: 170.20s Epoch 8 - Train Loss: 0.0293 - Valid Loss: 0.6775 - Elapsed Time: 883.04s - Epoch 8: Best model found with loss = 0.6775. Epoch 9 [0/559] | Train Loss: 0.0214 Grad: 67280.5625 LR: 1.1661e-06 | Elapse: 1.50s Epoch 9 [500/559] | Train Loss: 0.0320 Grad: 2366.7151 LR: 3.6575e-07 | Elapse: 627.09s Epoch 9 [558/559] | Train Loss: 0.0303 Grad: 33299.4180 LR: 3.0086e-07 | Elapse: 700.43s Epoch 9 [0/140] | Valid Loss: 0.3247 | Elapse: 1.65s Epoch 9 [139/140] | Valid Loss: 0.1822 | Elapse: 171.60s Epoch 9 - Train Loss: 0.0303 - Valid Loss: 0.6887 - Elapsed Time: 891.22s - Epoch 9: Best model found with loss = 0.6887. Epoch 10 [0/559] | Train Loss: 0.0396 Grad: 140012.0156 LR: 2.9979e-07 | Elapse: 1.47s Epoch 10 [500/559] | Train Loss: 0.0399 Grad: 2683.7830 LR: 3.5668e-09 | Elapse: 627.50s Epoch 10 [558/559] | Train Loss: 0.0374 Grad: 31579.2891 LR: 4.0097e-10 | Elapse: 699.21s Epoch 10 [0/140] | Valid Loss: 0.3429 | Elapse: 1.75s Epoch 10 [139/140] | Valid Loss: 0.1868 | Elapse: 170.59s Epoch 10 - Train Loss: 0.0374 - Valid Loss: 0.6865 - Elapsed Time: 888.77s Fold 1 | Time: 154.91min | Overall Evaluation Loss: 0.5993 Epoch 1 [0/559] | Train Loss: 0.4080 Grad: 124709.3203 LR: 4.0008e-07 | Elapse: 1.14s Epoch 1 [500/559] | Train Loss: 0.1747 Grad: 1045.7129 LR: 9.7549e-06 | Elapse: 628.84s Epoch 1 [558/559] | Train Loss: 0.1648 Grad: 1430.1704 LR: 1.0000e-05 | Elapse: 702.27s Epoch 1 [0/140] | Valid Loss: 0.0024 | Elapse: 1.05s Epoch 1 [139/140] | Valid Loss: 0.1646 | Elapse: 172.09s Epoch 1 - Train Loss: 0.1648 - Valid Loss: 0.5391 - Elapsed Time: 892.89s - Epoch 1: Best model found with loss = 0.5391. Epoch 2 [0/559] | Train Loss: 0.3756 Grad: 93382.1719 LR: 1.0000e-05 | Elapse: 1.35s Epoch 2 [500/559] | Train Loss: 0.1645 Grad: 1447.0669 LR: 9.7564e-06 | Elapse: 626.54s Epoch 2 [558/559] | Train Loss: 0.1548 Grad: 2336.7964 LR: 9.6974e-06 | Elapse: 695.28s Epoch 2 [0/140] | Valid Loss: 0.0028 | Elapse: 1.35s Epoch 2 [139/140] | Valid Loss: 0.1744 | Elapse: 168.90s Epoch 2 - Train Loss: 0.1548 - Valid Loss: 0.5480 - Elapsed Time: 882.83s - Epoch 2: Best model found with loss = 0.5480. Epoch 3 [0/559] | Train Loss: 0.3395 Grad: 155200.7188 LR: 9.6963e-06 | Elapse: 1.21s Epoch 3 [500/559] | Train Loss: 0.1350 Grad: 1883.7952 LR: 8.9422e-06 | Elapse: 616.61s Epoch 3 [558/559] | Train Loss: 0.1265 Grad: 3005.8718 LR: 8.8283e-06 | Elapse: 684.27s Epoch 3 [0/140] | Valid Loss: 0.0033 | Elapse: 1.33s Epoch 3 [139/140] | Valid Loss: 0.1848 | Elapse: 169.68s Epoch 3 - Train Loss: 0.1265 - Valid Loss: 0.5793 - Elapsed Time: 872.76s - Epoch 3: Best model found with loss = 0.5793. Epoch 4 [0/559] | Train Loss: 0.2507 Grad: 184021.4375 LR: 8.8263e-06 | Elapse: 1.55s Epoch 4 [500/559] | Train Loss: 0.0979 Grad: 2342.1018 LR: 7.6526e-06 | Elapse: 620.24s Epoch 4 [558/559] | Train Loss: 0.0919 Grad: 3157.7532 LR: 7.4974e-06 | Elapse: 694.89s Epoch 4 [0/140] | Valid Loss: 0.0033 | Elapse: 1.65s Epoch 4 [139/140] | Valid Loss: 0.1921 | Elapse: 171.19s Epoch 4 - Train Loss: 0.0919 - Valid Loss: 0.5966 - Elapsed Time: 884.77s - Epoch 4: Best model found with loss = 0.5966. Epoch 5 [0/559] | Train Loss: 0.1526 Grad: 191586.5938 LR: 7.4947e-06 | Elapse: 1.38s Epoch 5 [500/559] | Train Loss: 0.0690 Grad: 2454.4775 LR: 6.0431e-06 | Elapse: 619.48s Epoch 5 [558/559] | Train Loss: 0.0652 Grad: 3468.5071 LR: 5.8653e-06 | Elapse: 691.21s Epoch 5 [0/140] | Valid Loss: 0.0035 | Elapse: 1.45s Epoch 5 [139/140] | Valid Loss: 0.1998 | Elapse: 171.49s Epoch 5 - Train Loss: 0.0652 - Valid Loss: 0.6213 - Elapsed Time: 881.09s - Epoch 5: Best model found with loss = 0.6213. Epoch 6 [0/559] | Train Loss: 0.0984 Grad: 176191.0312 LR: 5.8623e-06 | Elapse: 1.48s Epoch 6 [500/559] | Train Loss: 0.0498 Grad: 2697.2048 LR: 4.3078e-06 | Elapse: 626.78s Epoch 6 [558/559] | Train Loss: 0.0471 Grad: 3713.1016 LR: 4.1289e-06 | Elapse: 698.05s Epoch 6 [0/140] | Valid Loss: 0.0033 | Elapse: 1.41s Epoch 6 [139/140] | Valid Loss: 0.2037 | Elapse: 168.17s Epoch 6 - Train Loss: 0.0471 - Valid Loss: 0.6357 - Elapsed Time: 885.22s - Epoch 6: Best model found with loss = 0.6357. Epoch 7 [0/559] | Train Loss: 0.0513 Grad: 126208.9375 LR: 4.1258e-06 | Elapse: 1.46s Epoch 7 [500/559] | Train Loss: 0.0387 Grad: 2829.2466 LR: 2.6560e-06 | Elapse: 634.97s Epoch 7 [558/559] | Train Loss: 0.0369 Grad: 3826.6626 LR: 2.4976e-06 | Elapse: 705.57s Epoch 7 [0/140] | Valid Loss: 0.0033 | Elapse: 0.98s Epoch 7 [139/140] | Valid Loss: 0.2071 | Elapse: 172.22s Epoch 7 - Train Loss: 0.0369 - Valid Loss: 0.6424 - Elapsed Time: 896.53s - Epoch 7: Best model found with loss = 0.6424. Epoch 8 [0/559] | Train Loss: 0.0380 Grad: 107768.7891 LR: 2.4949e-06 | Elapse: 1.63s Epoch 8 [500/559] | Train Loss: 0.0336 Grad: 2959.4180 LR: 1.2869e-06 | Elapse: 626.93s Epoch 8 [558/559] | Train Loss: 0.0322 Grad: 3683.2998 LR: 1.1681e-06 | Elapse: 702.47s Epoch 8 [0/140] | Valid Loss: 0.0034 | Elapse: 1.14s Epoch 8 [139/140] | Valid Loss: 0.2092 | Elapse: 171.83s Epoch 8 - Train Loss: 0.0322 - Valid Loss: 0.6436 - Elapsed Time: 892.56s - Epoch 8: Best model found with loss = 0.6436. Epoch 9 [0/559] | Train Loss: 0.0356 Grad: 110887.7266 LR: 1.1661e-06 | Elapse: 1.26s Epoch 9 [500/559] | Train Loss: 0.0349 Grad: 2969.2019 LR: 3.6575e-07 | Elapse: 618.59s Epoch 9 [558/559] | Train Loss: 0.0333 Grad: 3657.1890 LR: 3.0086e-07 | Elapse: 689.52s Epoch 9 [0/140] | Valid Loss: 0.0034 | Elapse: 0.85s Epoch 9 [139/140] | Valid Loss: 0.2080 | Elapse: 169.88s Epoch 9 - Train Loss: 0.0333 - Valid Loss: 0.6454 - Elapsed Time: 877.97s - Epoch 9: Best model found with loss = 0.6454. Epoch 10 [0/559] | Train Loss: 0.0413 Grad: 124596.9844 LR: 2.9979e-07 | Elapse: 1.29s Epoch 10 [500/559] | Train Loss: 0.0474 Grad: 3126.1436 LR: 3.5668e-09 | Elapse: 627.28s Epoch 10 [558/559] | Train Loss: 0.0448 Grad: 4568.4751 LR: 4.0097e-10 | Elapse: 698.02s Epoch 10 [0/140] | Valid Loss: 0.0033 | Elapse: 1.65s Epoch 10 [139/140] | Valid Loss: 0.2082 | Elapse: 171.99s Epoch 10 - Train Loss: 0.0448 - Valid Loss: 0.6580 - Elapsed Time: 888.30s - Epoch 10: Best model found with loss = 0.6580. Fold 2 | Time: 148.58min | Overall Evaluation Loss: 0.5356 Epoch 1 [0/559] | Train Loss: 0.3735 Grad: 136774.1406 LR: 4.0008e-07 | Elapse: 1.12s Epoch 1 [500/559] | Train Loss: 0.1727 Grad: 19389.6543 LR: 9.7549e-06 | Elapse: 623.82s Epoch 1 [558/559] | Train Loss: 0.1621 Grad: 33160.3281 LR: 1.0000e-05 | Elapse: 697.46s Epoch 1 [0/140] | Valid Loss: 0.0017 | Elapse: 1.14s Epoch 1 [139/140] | Valid Loss: 0.1746 | Elapse: 169.70s Epoch 1 - Train Loss: 0.1621 - Valid Loss: 0.5274 - Elapsed Time: 887.75s - Epoch 1: Best model found with loss = 0.5274. Epoch 2 [0/559] | Train Loss: 0.3857 Grad: 82156.1875 LR: 1.0000e-05 | Elapse: 1.27s Epoch 2 [500/559] | Train Loss: 0.1630 Grad: 29308.9199 LR: 9.7564e-06 | Elapse: 623.37s Epoch 2 [558/559] | Train Loss: 0.1524 Grad: 44503.8945 LR: 9.6974e-06 | Elapse: 693.21s Epoch 2 [0/140] | Valid Loss: 0.0018 | Elapse: 1.15s Epoch 2 [139/140] | Valid Loss: 0.1843 | Elapse: 176.11s Epoch 2 - Train Loss: 0.1524 - Valid Loss: 0.5781 - Elapsed Time: 889.88s - Epoch 2: Best model found with loss = 0.5781. Epoch 3 [0/559] | Train Loss: 0.3332 Grad: 135450.9531 LR: 9.6963e-06 | Elapse: 1.49s Epoch 3 [500/559] | Train Loss: 0.1318 Grad: 32993.6094 LR: 8.9422e-06 | Elapse: 622.89s Epoch 3 [558/559] | Train Loss: 0.1228 Grad: 51153.7461 LR: 8.8283e-06 | Elapse: 691.03s Epoch 3 [0/140] | Valid Loss: 0.0020 | Elapse: 1.04s Epoch 3 [139/140] | Valid Loss: 0.1926 | Elapse: 168.78s Epoch 3 - Train Loss: 0.1228 - Valid Loss: 0.6165 - Elapsed Time: 880.74s - Epoch 3: Best model found with loss = 0.6165. Epoch 4 [0/559] | Train Loss: 0.2050 Grad: 158852.4688 LR: 8.8263e-06 | Elapse: 1.24s Epoch 4 [500/559] | Train Loss: 0.0946 Grad: 32502.8730 LR: 7.6526e-06 | Elapse: 611.44s Epoch 4 [558/559] | Train Loss: 0.0882 Grad: 52789.3359 LR: 7.4974e-06 | Elapse: 684.08s Epoch 4 [0/140] | Valid Loss: 0.0021 | Elapse: 1.26s Epoch 4 [139/140] | Valid Loss: 0.2005 | Elapse: 173.50s Epoch 4 - Train Loss: 0.0882 - Valid Loss: 0.6403 - Elapsed Time: 878.81s - Epoch 4: Best model found with loss = 0.6403. Epoch 5 [0/559] | Train Loss: 0.1045 Grad: 160419.8594 LR: 7.4947e-06 | Elapse: 1.13s Epoch 5 [500/559] | Train Loss: 0.0674 Grad: 33515.8281 LR: 6.0431e-06 | Elapse: 622.62s Epoch 5 [558/559] | Train Loss: 0.0630 Grad: 48679.0625 LR: 5.8653e-06 | Elapse: 694.96s Epoch 5 [0/140] | Valid Loss: 0.0022 | Elapse: 1.26s Epoch 5 [139/140] | Valid Loss: 0.2054 | Elapse: 174.00s Epoch 5 - Train Loss: 0.0630 - Valid Loss: 0.6581 - Elapsed Time: 889.52s - Epoch 5: Best model found with loss = 0.6581. Epoch 6 [0/559] | Train Loss: 0.0513 Grad: 123881.2109 LR: 5.8623e-06 | Elapse: 1.20s Epoch 6 [500/559] | Train Loss: 0.0489 Grad: 34166.4883 LR: 4.3078e-06 | Elapse: 619.33s Epoch 6 [558/559] | Train Loss: 0.0459 Grad: 46318.1602 LR: 4.1289e-06 | Elapse: 692.04s Epoch 6 [0/140] | Valid Loss: 0.0022 | Elapse: 1.06s Epoch 6 [139/140] | Valid Loss: 0.2085 | Elapse: 175.60s Epoch 6 - Train Loss: 0.0459 - Valid Loss: 0.6727 - Elapsed Time: 888.27s - Epoch 6: Best model found with loss = 0.6727. Epoch 7 [0/559] | Train Loss: 0.0245 Grad: 69471.7734 LR: 4.1258e-06 | Elapse: 1.23s Epoch 7 [500/559] | Train Loss: 0.0379 Grad: 33260.8320 LR: 2.6560e-06 | Elapse: 633.33s Epoch 7 [558/559] | Train Loss: 0.0358 Grad: 43805.9805 LR: 2.4976e-06 | Elapse: 707.50s Epoch 7 [0/140] | Valid Loss: 0.0023 | Elapse: 1.22s Epoch 7 [139/140] | Valid Loss: 0.2125 | Elapse: 173.57s Epoch 7 - Train Loss: 0.0358 - Valid Loss: 0.6797 - Elapsed Time: 901.75s - Epoch 7: Best model found with loss = 0.6797. Epoch 8 [0/559] | Train Loss: 0.0170 Grad: 45662.7891 LR: 2.4949e-06 | Elapse: 1.28s Epoch 8 [500/559] | Train Loss: 0.0332 Grad: 33284.9766 LR: 1.2869e-06 | Elapse: 636.77s Epoch 8 [558/559] | Train Loss: 0.0315 Grad: 45330.4883 LR: 1.1681e-06 | Elapse: 709.71s Epoch 8 [0/140] | Valid Loss: 0.0023 | Elapse: 1.45s Epoch 8 [139/140] | Valid Loss: 0.2158 | Elapse: 172.70s Epoch 8 - Train Loss: 0.0315 - Valid Loss: 0.6806 - Elapsed Time: 903.01s - Epoch 8: Best model found with loss = 0.6806. Epoch 9 [0/559] | Train Loss: 0.0181 Grad: 55811.3711 LR: 1.1661e-06 | Elapse: 1.26s Epoch 9 [500/559] | Train Loss: 0.0337 Grad: 36090.6758 LR: 3.6575e-07 | Elapse: 622.66s Epoch 9 [558/559] | Train Loss: 0.0319 Grad: 40806.4766 LR: 3.0086e-07 | Elapse: 695.00s Epoch 9 [0/140] | Valid Loss: 0.0024 | Elapse: 1.55s Epoch 9 [139/140] | Valid Loss: 0.2160 | Elapse: 172.99s Epoch 9 - Train Loss: 0.0319 - Valid Loss: 0.6900 - Elapsed Time: 888.59s - Epoch 9: Best model found with loss = 0.6900. Epoch 10 [0/559] | Train Loss: 0.0291 Grad: 108929.7500 LR: 2.9979e-07 | Elapse: 1.67s Epoch 10 [500/559] | Train Loss: 0.0408 Grad: 33068.8359 LR: 3.5668e-09 | Elapse: 628.66s Epoch 10 [558/559] | Train Loss: 0.0381 Grad: 40680.0781 LR: 4.0097e-10 | Elapse: 701.00s Epoch 10 [0/140] | Valid Loss: 0.0026 | Elapse: 1.65s Epoch 10 [139/140] | Valid Loss: 0.2175 | Elapse: 172.10s Epoch 10 - Train Loss: 0.0381 - Valid Loss: 0.6948 - Elapsed Time: 893.76s - Epoch 10: Best model found with loss = 0.6948. Fold 3 | Time: 149.63min | Overall Evaluation Loss: 0.4956 Epoch 1 [0/559] | Train Loss: 0.0050 Grad: 2809.6936 LR: 4.0008e-07 | Elapse: 1.47s Epoch 1 [500/559] | Train Loss: 0.1740 Grad: 374.4365 LR: 9.7549e-06 | Elapse: 619.57s Epoch 1 [558/559] | Train Loss: 0.1637 Grad: 36396.9766 LR: 1.0000e-05 | Elapse: 689.00s Epoch 1 [0/140] | Valid Loss: 0.4124 | Elapse: 1.45s Epoch 1 [139/140] | Valid Loss: 0.1685 | Elapse: 171.89s Epoch 1 - Train Loss: 0.1637 - Valid Loss: 0.5389 - Elapsed Time: 881.37s - Epoch 1: Best model found with loss = 0.5389. Epoch 2 [0/559] | Train Loss: 0.0050 Grad: 1995.7759 LR: 1.0000e-05 | Elapse: 1.59s Epoch 2 [500/559] | Train Loss: 0.1633 Grad: 583.9670 LR: 9.7564e-06 | Elapse: 624.89s Epoch 2 [558/559] | Train Loss: 0.1530 Grad: 46425.1641 LR: 9.6974e-06 | Elapse: 694.86s Epoch 2 [0/140] | Valid Loss: 0.4686 | Elapse: 1.01s Epoch 2 [139/140] | Valid Loss: 0.1789 | Elapse: 167.87s Epoch 2 - Train Loss: 0.1530 - Valid Loss: 0.5844 - Elapsed Time: 882.92s - Epoch 2: Best model found with loss = 0.5844. Epoch 3 [0/559] | Train Loss: 0.0053 Grad: 3130.1858 LR: 9.6963e-06 | Elapse: 1.07s Epoch 3 [500/559] | Train Loss: 0.1322 Grad: 783.8658 LR: 8.9422e-06 | Elapse: 627.07s Epoch 3 [558/559] | Train Loss: 0.1232 Grad: 45816.0273 LR: 8.8283e-06 | Elapse: 699.61s Epoch 3 [0/140] | Valid Loss: 0.4931 | Elapse: 1.25s Epoch 3 [139/140] | Valid Loss: 0.1861 | Elapse: 167.99s Epoch 3 - Train Loss: 0.1232 - Valid Loss: 0.6180 - Elapsed Time: 887.79s - Epoch 3: Best model found with loss = 0.6180. Epoch 4 [0/559] | Train Loss: 0.0056 Grad: 4049.7507 LR: 8.8263e-06 | Elapse: 1.48s Epoch 4 [500/559] | Train Loss: 0.0952 Grad: 915.9907 LR: 7.6526e-06 | Elapse: 621.37s Epoch 4 [558/559] | Train Loss: 0.0887 Grad: 42097.1250 LR: 7.4974e-06 | Elapse: 692.63s Epoch 4 [0/140] | Valid Loss: 0.4977 | Elapse: 1.44s Epoch 4 [139/140] | Valid Loss: 0.1917 | Elapse: 166.80s Epoch 4 - Train Loss: 0.0887 - Valid Loss: 0.6386 - Elapsed Time: 879.67s - Epoch 4: Best model found with loss = 0.6386. Epoch 5 [0/559] | Train Loss: 0.0056 Grad: 4627.5327 LR: 7.4947e-06 | Elapse: 1.31s Epoch 5 [500/559] | Train Loss: 0.0673 Grad: 1042.5446 LR: 6.0431e-06 | Elapse: 623.91s Epoch 5 [558/559] | Train Loss: 0.0628 Grad: 39756.8047 LR: 5.8653e-06 | Elapse: 695.74s Epoch 5 [0/140] | Valid Loss: 0.4978 | Elapse: 1.65s Epoch 5 [139/140] | Valid Loss: 0.1959 | Elapse: 172.59s Epoch 5 - Train Loss: 0.0628 - Valid Loss: 0.6606 - Elapsed Time: 888.42s - Epoch 5: Best model found with loss = 0.6606. Epoch 6 [0/559] | Train Loss: 0.0055 Grad: 4887.3267 LR: 5.8623e-06 | Elapse: 1.38s Epoch 6 [500/559] | Train Loss: 0.0492 Grad: 1069.9318 LR: 4.3078e-06 | Elapse: 619.50s Epoch 6 [558/559] | Train Loss: 0.0460 Grad: 38461.5625 LR: 4.1289e-06 | Elapse: 692.72s Epoch 6 [0/140] | Valid Loss: 0.5020 | Elapse: 1.05s Epoch 6 [139/140] | Valid Loss: 0.1990 | Elapse: 174.79s Epoch 6 - Train Loss: 0.0460 - Valid Loss: 0.6746 - Elapsed Time: 887.61s - Epoch 6: Best model found with loss = 0.6746. Epoch 7 [0/559] | Train Loss: 0.0054 Grad: 5169.7212 LR: 4.1258e-06 | Elapse: 1.07s Epoch 7 [500/559] | Train Loss: 0.0381 Grad: 1063.5841 LR: 2.6560e-06 | Elapse: 621.07s Epoch 7 [558/559] | Train Loss: 0.0359 Grad: 35426.7031 LR: 2.4976e-06 | Elapse: 693.61s Epoch 7 [0/140] | Valid Loss: 0.5056 | Elapse: 1.28s Epoch 7 [139/140] | Valid Loss: 0.2010 | Elapse: 169.21s Epoch 7 - Train Loss: 0.0359 - Valid Loss: 0.6811 - Elapsed Time: 883.41s - Epoch 7: Best model found with loss = 0.6811. Epoch 8 [0/559] | Train Loss: 0.0054 Grad: 5201.8013 LR: 2.4949e-06 | Elapse: 1.16s Epoch 8 [500/559] | Train Loss: 0.0335 Grad: 1033.7025 LR: 1.2869e-06 | Elapse: 621.26s Epoch 8 [558/559] | Train Loss: 0.0316 Grad: 32125.7207 LR: 1.1681e-06 | Elapse: 691.80s Epoch 8 [0/140] | Valid Loss: 0.5071 | Elapse: 1.45s Epoch 8 [139/140] | Valid Loss: 0.2006 | Elapse: 174.00s Epoch 8 - Train Loss: 0.0316 - Valid Loss: 0.6861 - Elapsed Time: 885.99s - Epoch 8: Best model found with loss = 0.6861. Epoch 9 [0/559] | Train Loss: 0.0054 Grad: 5315.4302 LR: 1.1661e-06 | Elapse: 1.47s Epoch 9 [500/559] | Train Loss: 0.0337 Grad: 1095.0151 LR: 3.6575e-07 | Elapse: 622.67s Epoch 9 [558/559] | Train Loss: 0.0319 Grad: 27265.7305 LR: 3.0086e-07 | Elapse: 694.01s Epoch 9 [0/140] | Valid Loss: 0.4932 | Elapse: 1.35s Epoch 9 [139/140] | Valid Loss: 0.1994 | Elapse: 174.70s Epoch 9 - Train Loss: 0.0319 - Valid Loss: 0.6887 - Elapsed Time: 888.81s - Epoch 9: Best model found with loss = 0.6887. Epoch 10 [0/559] | Train Loss: 0.0052 Grad: 5499.5928 LR: 2.9979e-07 | Elapse: 1.36s Epoch 10 [500/559] | Train Loss: 0.0392 Grad: 1228.2296 LR: 3.5668e-09 | Elapse: 626.25s Epoch 10 [558/559] | Train Loss: 0.0367 Grad: 28973.5898 LR: 4.0097e-10 | Elapse: 696.89s Epoch 10 [0/140] | Valid Loss: 0.5141 | Elapse: 1.16s Epoch 10 [139/140] | Valid Loss: 0.2049 | Elapse: 174.49s Epoch 10 - Train Loss: 0.0367 - Valid Loss: 0.6837 - Elapsed Time: 891.96s Fold 4 | Time: 149.09min | Overall Evaluation Loss: 0.4522