################################### TRAIN_CONFIG ################################### dataset_dir: ./Audio_XenoCanto labels_list: ./xeno_labels.csv model_name: BirdAST_Baseline_5folds backbone_name: MIT/ast-finetuned-audioset-10-10-0.4593 n_classes: 728 audio_sr: 16000 segment_length: 10 fft_window: 0.025 hop_window_length: 0.01 n_mels: 128 low_cut: 1000 high_cut: 8000 top_db: 100 batch_size: 4 num_workers: 0 n_splits: 5 log_dir: ./training_logs max_lr: 1e-05 epochs: 15 weight_decay: 0.01 lr_final_div: 1000 amp: True grad_accum_steps: 1 max_grad_norm: 10000000.0 print_epoch_freq: 1 print_freq: 500 random_seed: 2046 copy: )> ################################################################################ Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving. Epoch 1 [0/2235] | Train Loss: 0.0084 Grad: 5827.5073 LR: 4.0000e-07 | Elapse: 4.53s ################################### TRAIN_CONFIG ################################### dataset_dir: ./Audio_XenoCanto labels_list: ./xeno_labels.csv model_name: BirdAST_Baseline_5folds backbone_name: MIT/ast-finetuned-audioset-10-10-0.4593 n_classes: 728 audio_sr: 16000 segment_length: 10 fft_window: 0.025 hop_window_length: 0.01 n_mels: 128 low_cut: 1000 high_cut: 8000 top_db: 100 batch_size: 16 num_workers: 0 n_splits: 5 log_dir: ./training_logs max_lr: 1e-05 epochs: 15 weight_decay: 0.01 lr_final_div: 1000 amp: True grad_accum_steps: 1 max_grad_norm: 10000000.0 print_epoch_freq: 1 print_freq: 500 random_seed: 2046 copy: )> ################################################################################ Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving. Epoch 1 [0/559] | Train Loss: 0.1937 Grad: 68037.4219 LR: 4.0003e-07 | Elapse: 5.38s Epoch 1 [500/559] | Train Loss: 0.1745 Grad: 33365.0820 LR: 6.6576e-06 | Elapse: 662.72s Epoch 1 [558/559] | Train Loss: 0.1640 Grad: 45012.7109 LR: 7.6104e-06 | Elapse: 736.30s Epoch 1 [0/140] | Valid Loss: 0.1974 | Elapse: 1.78s Epoch 1 [139/140] | Valid Loss: 0.1618 | Elapse: 177.47s Epoch 1 - Train Loss: 0.1640 - Valid Loss: 0.5766 - Elapsed Time: 973.32s - Epoch 1: Best model found with loss = 0.5766. Epoch 2 [0/559] | Train Loss: 0.1964 Grad: 55763.4883 LR: 7.6259e-06 | Elapse: 1.70s Epoch 2 [500/559] | Train Loss: 0.1703 Grad: 39483.6250 LR: 9.9786e-06 | Elapse: 663.00s Epoch 2 [558/559] | Train Loss: 0.1594 Grad: 47397.5625 LR: 9.9660e-06 | Elapse: 736.53s Epoch 2 [0/140] | Valid Loss: 0.1822 | Elapse: 1.45s Epoch 2 [139/140] | Valid Loss: 0.1503 | Elapse: 177.50s Epoch 2 - Train Loss: 0.1594 - Valid Loss: 0.7081 - Elapsed Time: 973.49s - Epoch 2: Best model found with loss = 0.7081. Epoch 3 [0/559] | Train Loss: 0.1799 Grad: 67393.6719 LR: 9.9657e-06 | Elapse: 1.30s Epoch 3 [500/559] | Train Loss: 0.1420 Grad: 46305.6367 LR: 9.7377e-06 | Elapse: 653.21s Epoch 3 [558/559] | Train Loss: 0.1324 Grad: 49667.2188 LR: 9.6978e-06 | Elapse: 727.64s Epoch 3 [0/140] | Valid Loss: 0.1788 | Elapse: 1.65s Epoch 3 [139/140] | Valid Loss: 0.1328 | Elapse: 180.90s Epoch 3 - Train Loss: 0.1324 - Valid Loss: 0.7903 - Elapsed Time: 967.32s - Epoch 3: Best model found with loss = 0.7903. Epoch 4 [0/559] | Train Loss: 0.1564 Grad: 90217.3984 LR: 9.6970e-06 | Elapse: 1.47s Epoch 4 [500/559] | Train Loss: 0.1024 Grad: 57958.9102 LR: 9.2415e-06 | Elapse: 645.67s Epoch 4 [558/559] | Train Loss: 0.0954 Grad: 50135.2852 LR: 9.1763e-06 | Elapse: 715.82s Epoch 4 [0/140] | Valid Loss: 0.1678 | Elapse: 1.34s Epoch 4 [139/140] | Valid Loss: 0.1166 | Elapse: 174.58s Epoch 4 - Train Loss: 0.0954 - Valid Loss: 0.8370 - Elapsed Time: 950.32s - Epoch 4: Best model found with loss = 0.8370. Epoch 5 [0/559] | Train Loss: 0.1133 Grad: 98545.1953 LR: 9.1752e-06 | Elapse: 1.75s Epoch 5 [500/559] | Train Loss: 0.0708 Grad: 55418.7812 LR: 8.5166e-06 | Elapse: 646.95s Epoch 5 [558/559] | Train Loss: 0.0660 Grad: 38844.8906 LR: 8.4298e-06 | Elapse: 723.69s Epoch 5 [0/140] | Valid Loss: 0.1484 | Elapse: 1.25s Epoch 5 [139/140] | Valid Loss: 0.1034 | Elapse: 174.60s Epoch 5 - Train Loss: 0.0660 - Valid Loss: 0.8642 - Elapsed Time: 958.10s - Epoch 5: Best model found with loss = 0.8642. Epoch 6 [0/559] | Train Loss: 0.0655 Grad: 92076.9453 LR: 8.4282e-06 | Elapse: 1.32s Epoch 6 [500/559] | Train Loss: 0.0480 Grad: 49009.5000 LR: 7.6021e-06 | Elapse: 646.01s Epoch 6 [558/559] | Train Loss: 0.0449 Grad: 35213.2227 LR: 7.4983e-06 | Elapse: 719.55s Epoch 6 [0/140] | Valid Loss: 0.1325 | Elapse: 1.06s Epoch 6 [139/140] | Valid Loss: 0.0923 | Elapse: 180.89s Epoch 6 - Train Loss: 0.0449 - Valid Loss: 0.8900 - Elapsed Time: 960.07s - Epoch 6: Best model found with loss = 0.8900. Epoch 7 [0/559] | Train Loss: 0.0334 Grad: 69795.4609 LR: 7.4965e-06 | Elapse: 1.24s Epoch 7 [500/559] | Train Loss: 0.0334 Grad: 43454.2812 LR: 6.5474e-06 | Elapse: 650.74s Epoch 7 [558/559] | Train Loss: 0.0313 Grad: 28944.0547 LR: 6.4322e-06 | Elapse: 724.98s Epoch 7 [0/140] | Valid Loss: 0.1219 | Elapse: 1.35s Epoch 7 [139/140] | Valid Loss: 0.0838 | Elapse: 176.90s Epoch 7 - Train Loss: 0.0313 - Valid Loss: 0.9075 - Elapsed Time: 961.58s - Epoch 7: Best model found with loss = 0.9075. Epoch 8 [0/559] | Train Loss: 0.0170 Grad: 43371.9336 LR: 6.4302e-06 | Elapse: 1.26s Epoch 8 [500/559] | Train Loss: 0.0248 Grad: 39485.9570 LR: 5.4093e-06 | Elapse: 653.76s Epoch 8 [558/559] | Train Loss: 0.0234 Grad: 24213.1797 LR: 5.2888e-06 | Elapse: 732.80s Epoch 8 [0/140] | Valid Loss: 0.1195 | Elapse: 1.55s Epoch 8 [139/140] | Valid Loss: 0.0784 | Elapse: 182.49s Epoch 8 - Train Loss: 0.0234 - Valid Loss: 0.9198 - Elapsed Time: 975.01s - Epoch 8: Best model found with loss = 0.9198. Epoch 9 [0/559] | Train Loss: 0.0113 Grad: 29259.3535 LR: 5.2868e-06 | Elapse: 1.64s Epoch 9 [500/559] | Train Loss: 0.0198 Grad: 35717.7930 LR: 4.2491e-06 | Elapse: 656.84s Epoch 9 [558/559] | Train Loss: 0.0187 Grad: 20421.3691 LR: 4.1299e-06 | Elapse: 732.78s Epoch 9 [0/140] | Valid Loss: 0.1195 | Elapse: 1.65s Epoch 9 [139/140] | Valid Loss: 0.0751 | Elapse: 177.80s Epoch 9 - Train Loss: 0.0187 - Valid Loss: 0.9285 - Elapsed Time: 970.49s - Epoch 9: Best model found with loss = 0.9285. Epoch 10 [0/559] | Train Loss: 0.0088 Grad: 22859.0410 LR: 4.1279e-06 | Elapse: 1.36s Epoch 10 [500/559] | Train Loss: 0.0165 Grad: 29824.0293 LR: 3.1294e-06 | Elapse: 651.96s Epoch 10 [558/559] | Train Loss: 0.0157 Grad: 17685.0957 LR: 3.0180e-06 | Elapse: 725.69s Epoch 10 [0/140] | Valid Loss: 0.1202 | Elapse: 1.45s Epoch 10 [139/140] | Valid Loss: 0.0731 | Elapse: 178.00s Epoch 10 - Train Loss: 0.0157 - Valid Loss: 0.9346 - Elapsed Time: 964.26s - Epoch 10: Best model found with loss = 0.9346. Epoch 11 [0/559] | Train Loss: 0.0078 Grad: 20275.6602 LR: 3.0161e-06 | Elapse: 1.09s Epoch 11 [500/559] | Train Loss: 0.0146 Grad: 26114.4258 LR: 2.1105e-06 | Elapse: 643.29s Epoch 11 [558/559] | Train Loss: 0.0138 Grad: 16013.0762 LR: 2.0129e-06 | Elapse: 713.93s Epoch 11 [0/140] | Valid Loss: 0.1210 | Elapse: 1.35s Epoch 11 [139/140] | Valid Loss: 0.0714 | Elapse: 178.40s Epoch 11 - Train Loss: 0.0138 - Valid Loss: 0.9384 - Elapsed Time: 952.42s - Epoch 11: Best model found with loss = 0.9384. Epoch 12 [0/559] | Train Loss: 0.0074 Grad: 20586.8594 LR: 2.0112e-06 | Elapse: 1.37s Epoch 12 [500/559] | Train Loss: 0.0135 Grad: 28101.5156 LR: 1.2475e-06 | Elapse: 647.48s Epoch 12 [558/559] | Train Loss: 0.0128 Grad: 15472.9678 LR: 1.1688e-06 | Elapse: 721.03s Epoch 12 [0/140] | Valid Loss: 0.1222 | Elapse: 0.93s Epoch 12 [139/140] | Valid Loss: 0.0692 | Elapse: 180.78s Epoch 12 - Train Loss: 0.0128 - Valid Loss: 0.9413 - Elapsed Time: 960.70s - Epoch 12: Best model found with loss = 0.9413. Epoch 13 [0/559] | Train Loss: 0.0072 Grad: 20721.5391 LR: 1.1675e-06 | Elapse: 1.37s Epoch 13 [500/559] | Train Loss: 0.0134 Grad: 32372.1992 LR: 5.8671e-07 | Elapse: 650.37s Epoch 13 [558/559] | Train Loss: 0.0128 Grad: 16761.3359 LR: 5.3128e-07 | Elapse: 724.01s Epoch 13 [0/140] | Valid Loss: 0.1244 | Elapse: 1.34s Epoch 13 [139/140] | Valid Loss: 0.0688 | Elapse: 172.69s Epoch 13 - Train Loss: 0.0128 - Valid Loss: 0.9419 - Elapsed Time: 956.02s - Epoch 13: Best model found with loss = 0.9419. Epoch 14 [0/559] | Train Loss: 0.0076 Grad: 26602.7090 LR: 5.3035e-07 | Elapse: 1.34s Epoch 14 [500/559] | Train Loss: 0.0153 Grad: 28308.5898 LR: 1.6390e-07 | Elapse: 643.94s Epoch 14 [558/559] | Train Loss: 0.0145 Grad: 14808.6914 LR: 1.3469e-07 | Elapse: 714.98s Epoch 14 [0/140] | Valid Loss: 0.1268 | Elapse: 1.44s Epoch 14 [139/140] | Valid Loss: 0.0656 | Elapse: 176.59s Epoch 14 - Train Loss: 0.0145 - Valid Loss: 0.9434 - Elapsed Time: 951.58s - Epoch 14: Best model found with loss = 0.9434. Epoch 15 [0/559] | Train Loss: 0.0078 Grad: 28353.9336 LR: 1.3421e-07 | Elapse: 1.18s Epoch 15 [500/559] | Train Loss: 0.0188 Grad: 24622.5938 LR: 1.8075e-09 | Elapse: 638.46s Epoch 15 [558/559] | Train Loss: 0.0176 Grad: 12045.1396 LR: 4.0043e-10 | Elapse: 712.99s Epoch 15 [0/140] | Valid Loss: 0.1179 | Elapse: 1.32s Epoch 15 [139/140] | Valid Loss: 0.0590 | Elapse: 177.87s Epoch 15 - Train Loss: 0.0176 - Valid Loss: 0.9461 - Elapsed Time: 950.37s - Epoch 15: Best model found with loss = 0.9461. Fold 1 | Time: 241.61min | Overall Evaluation Loss: 0.9461 Epoch 1 [0/559] | Train Loss: 0.4216 Grad: 124713.3594 LR: 4.0003e-07 | Elapse: 1.29s Epoch 1 [500/559] | Train Loss: 0.1733 Grad: 3465.7744 LR: 6.6576e-06 | Elapse: 638.69s Epoch 1 [558/559] | Train Loss: 0.1627 Grad: 33522.4336 LR: 7.6104e-06 | Elapse: 713.22s Epoch 1 [0/140] | Valid Loss: 0.0022 | Elapse: 1.25s Epoch 1 [139/140] | Valid Loss: 0.1657 | Elapse: 177.49s Epoch 1 - Train Loss: 0.1627 - Valid Loss: 0.5743 - Elapsed Time: 951.73s - Epoch 1: Best model found with loss = 0.5743. Epoch 2 [0/559] | Train Loss: 0.3910 Grad: 93335.5234 LR: 7.6259e-06 | Elapse: 1.65s Epoch 2 [500/559] | Train Loss: 0.1689 Grad: 3991.0815 LR: 9.9786e-06 | Elapse: 645.65s Epoch 2 [558/559] | Train Loss: 0.1581 Grad: 36387.4648 LR: 9.9660e-06 | Elapse: 720.68s Epoch 2 [0/140] | Valid Loss: 0.0025 | Elapse: 1.35s Epoch 2 [139/140] | Valid Loss: 0.1551 | Elapse: 174.90s Epoch 2 - Train Loss: 0.1581 - Valid Loss: 0.7066 - Elapsed Time: 954.89s - Epoch 2: Best model found with loss = 0.7066. Epoch 3 [0/559] | Train Loss: 0.3425 Grad: 120896.1484 LR: 9.9657e-06 | Elapse: 1.76s Epoch 3 [500/559] | Train Loss: 0.1436 Grad: 5205.2388 LR: 9.7377e-06 | Elapse: 651.85s Epoch 3 [558/559] | Train Loss: 0.1338 Grad: 41782.4336 LR: 9.6978e-06 | Elapse: 726.09s Epoch 3 [0/140] | Valid Loss: 0.0028 | Elapse: 1.28s Epoch 3 [139/140] | Valid Loss: 0.1380 | Elapse: 176.91s Epoch 3 - Train Loss: 0.1338 - Valid Loss: 0.7882 - Elapsed Time: 962.77s - Epoch 3: Best model found with loss = 0.7882. Epoch 4 [0/559] | Train Loss: 0.2430 Grad: 165577.8125 LR: 9.6970e-06 | Elapse: 1.38s Epoch 4 [500/559] | Train Loss: 0.1065 Grad: 5715.7354 LR: 9.2415e-06 | Elapse: 647.98s Epoch 4 [558/559] | Train Loss: 0.0990 Grad: 40249.1367 LR: 9.1763e-06 | Elapse: 719.71s Epoch 4 [0/140] | Valid Loss: 0.0029 | Elapse: 1.45s Epoch 4 [139/140] | Valid Loss: 0.1204 | Elapse: 180.50s Epoch 4 - Train Loss: 0.0990 - Valid Loss: 0.8343 - Elapsed Time: 959.92s - Epoch 4: Best model found with loss = 0.8343. Epoch 5 [0/559] | Train Loss: 0.1413 Grad: 195477.3906 LR: 9.1752e-06 | Elapse: 1.36s Epoch 5 [500/559] | Train Loss: 0.0759 Grad: 6313.8467 LR: 8.5166e-06 | Elapse: 649.84s Epoch 5 [558/559] | Train Loss: 0.0705 Grad: 37943.1094 LR: 8.4298e-06 | Elapse: 721.81s Epoch 5 [0/140] | Valid Loss: 0.0029 | Elapse: 1.13s Epoch 5 [139/140] | Valid Loss: 0.1068 | Elapse: 179.07s Epoch 5 - Train Loss: 0.0705 - Valid Loss: 0.8644 - Elapsed Time: 960.74s - Epoch 5: Best model found with loss = 0.8644. Epoch 6 [0/559] | Train Loss: 0.0582 Grad: 128484.4844 LR: 8.4282e-06 | Elapse: 1.10s Epoch 6 [500/559] | Train Loss: 0.0512 Grad: 6602.7700 LR: 7.6021e-06 | Elapse: 650.60s Epoch 6 [558/559] | Train Loss: 0.0478 Grad: 33963.3555 LR: 7.4983e-06 | Elapse: 724.14s Epoch 6 [0/140] | Valid Loss: 0.0029 | Elapse: 1.25s Epoch 6 [139/140] | Valid Loss: 0.0955 | Elapse: 180.59s Epoch 6 - Train Loss: 0.0478 - Valid Loss: 0.8855 - Elapsed Time: 963.84s - Epoch 6: Best model found with loss = 0.8855. Epoch 7 [0/559] | Train Loss: 0.0247 Grad: 70728.7188 LR: 7.4965e-06 | Elapse: 1.66s Epoch 7 [500/559] | Train Loss: 0.0355 Grad: 6962.7485 LR: 6.5474e-06 | Elapse: 652.36s Epoch 7 [558/559] | Train Loss: 0.0333 Grad: 28417.3770 LR: 6.4322e-06 | Elapse: 724.99s Epoch 7 [0/140] | Valid Loss: 0.0029 | Elapse: 1.25s Epoch 7 [139/140] | Valid Loss: 0.0878 | Elapse: 177.79s Epoch 7 - Train Loss: 0.0333 - Valid Loss: 0.9002 - Elapsed Time: 962.18s - Epoch 7: Best model found with loss = 0.9002. Epoch 8 [0/559] | Train Loss: 0.0140 Grad: 36290.3242 LR: 6.4302e-06 | Elapse: 1.17s Epoch 8 [500/559] | Train Loss: 0.0260 Grad: 6885.5967 LR: 5.4093e-06 | Elapse: 653.16s Epoch 8 [558/559] | Train Loss: 0.0245 Grad: 23809.4492 LR: 5.2888e-06 | Elapse: 726.92s Epoch 8 [0/140] | Valid Loss: 0.0028 | Elapse: 1.36s Epoch 8 [139/140] | Valid Loss: 0.0823 | Elapse: 176.58s Epoch 8 - Train Loss: 0.0245 - Valid Loss: 0.9112 - Elapsed Time: 963.31s - Epoch 8: Best model found with loss = 0.9112. Epoch 9 [0/559] | Train Loss: 0.0105 Grad: 24844.7188 LR: 5.2868e-06 | Elapse: 1.06s Epoch 9 [500/559] | Train Loss: 0.0205 Grad: 6883.3525 LR: 4.2491e-06 | Elapse: 642.26s Epoch 9 [558/559] | Train Loss: 0.0194 Grad: 20682.1309 LR: 4.1299e-06 | Elapse: 716.99s Epoch 9 [0/140] | Valid Loss: 0.0028 | Elapse: 1.45s Epoch 9 [139/140] | Valid Loss: 0.0783 | Elapse: 176.79s Epoch 9 - Train Loss: 0.0194 - Valid Loss: 0.9188 - Elapsed Time: 954.44s - Epoch 9: Best model found with loss = 0.9188. Epoch 10 [0/559] | Train Loss: 0.0091 Grad: 20860.1367 LR: 4.1279e-06 | Elapse: 1.11s Epoch 10 [500/559] | Train Loss: 0.0170 Grad: 6804.7153 LR: 3.1294e-06 | Elapse: 642.90s Epoch 10 [558/559] | Train Loss: 0.0162 Grad: 18611.3281 LR: 3.0180e-06 | Elapse: 714.56s Epoch 10 [0/140] | Valid Loss: 0.0028 | Elapse: 1.23s Epoch 10 [139/140] | Valid Loss: 0.0753 | Elapse: 178.87s Epoch 10 - Train Loss: 0.0162 - Valid Loss: 0.9246 - Elapsed Time: 953.05s - Epoch 10: Best model found with loss = 0.9246. Epoch 11 [0/559] | Train Loss: 0.0085 Grad: 19455.4121 LR: 3.0161e-06 | Elapse: 1.35s Epoch 11 [500/559] | Train Loss: 0.0149 Grad: 6900.8940 LR: 2.1105e-06 | Elapse: 646.55s Epoch 11 [558/559] | Train Loss: 0.0142 Grad: 17686.6504 LR: 2.0129e-06 | Elapse: 719.29s Epoch 11 [0/140] | Valid Loss: 0.0028 | Elapse: 1.56s Epoch 11 [139/140] | Valid Loss: 0.0728 | Elapse: 176.09s Epoch 11 - Train Loss: 0.0142 - Valid Loss: 0.9285 - Elapsed Time: 954.97s - Epoch 11: Best model found with loss = 0.9285. Epoch 12 [0/559] | Train Loss: 0.0082 Grad: 19114.8496 LR: 2.0112e-06 | Elapse: 0.99s Epoch 12 [500/559] | Train Loss: 0.0139 Grad: 6952.1978 LR: 1.2475e-06 | Elapse: 645.08s Epoch 12 [558/559] | Train Loss: 0.0133 Grad: 15867.8369 LR: 1.1688e-06 | Elapse: 719.55s Epoch 12 [0/140] | Valid Loss: 0.0029 | Elapse: 1.42s Epoch 12 [139/140] | Valid Loss: 0.0696 | Elapse: 179.57s Epoch 12 - Train Loss: 0.0133 - Valid Loss: 0.9321 - Elapsed Time: 959.11s - Epoch 12: Best model found with loss = 0.9321. Epoch 13 [0/559] | Train Loss: 0.0081 Grad: 19348.4922 LR: 1.1675e-06 | Elapse: 1.08s Epoch 13 [500/559] | Train Loss: 0.0140 Grad: 7006.5898 LR: 5.8671e-07 | Elapse: 652.07s Epoch 13 [558/559] | Train Loss: 0.0134 Grad: 14142.9746 LR: 5.3128e-07 | Elapse: 728.81s Epoch 13 [0/140] | Valid Loss: 0.0029 | Elapse: 1.15s Epoch 13 [139/140] | Valid Loss: 0.0682 | Elapse: 177.89s Epoch 13 - Train Loss: 0.0134 - Valid Loss: 0.9330 - Elapsed Time: 965.98s - Epoch 13: Best model found with loss = 0.9330. Epoch 14 [0/559] | Train Loss: 0.0109 Grad: 42109.3281 LR: 5.3035e-07 | Elapse: 1.28s Epoch 14 [500/559] | Train Loss: 0.0160 Grad: 7358.2085 LR: 1.6390e-07 | Elapse: 641.98s Epoch 14 [558/559] | Train Loss: 0.0152 Grad: 12558.0586 LR: 1.3469e-07 | Elapse: 713.12s Epoch 14 [0/140] | Valid Loss: 0.0029 | Elapse: 1.05s Epoch 14 [139/140] | Valid Loss: 0.0636 | Elapse: 173.62s Epoch 14 - Train Loss: 0.0152 - Valid Loss: 0.9368 - Elapsed Time: 946.93s - Epoch 14: Best model found with loss = 0.9368. Epoch 15 [0/559] | Train Loss: 0.0121 Grad: 59014.1406 LR: 1.3421e-07 | Elapse: 1.55s Epoch 15 [500/559] | Train Loss: 0.0176 Grad: 7590.2749 LR: 1.8075e-09 | Elapse: 643.34s Epoch 15 [558/559] | Train Loss: 0.0166 Grad: 13347.2998 LR: 4.0043e-10 | Elapse: 717.88s Epoch 15 [0/140] | Valid Loss: 0.0030 | Elapse: 1.55s Epoch 15 [139/140] | Valid Loss: 0.0606 | Elapse: 175.80s Epoch 15 - Train Loss: 0.0166 - Valid Loss: 0.9384 - Elapsed Time: 953.08s - Epoch 15: Best model found with loss = 0.9384. Fold 2 | Time: 240.75min | Overall Evaluation Loss: 0.9405 Epoch 1 [0/559] | Train Loss: 0.3982 Grad: 125106.8906 LR: 4.0003e-07 | Elapse: 1.41s Epoch 1 [500/559] | Train Loss: 0.1755 Grad: 57749.5234 LR: 6.6576e-06 | Elapse: 643.70s Epoch 1 [558/559] | Train Loss: 0.1648 Grad: 31245.7852 LR: 7.6104e-06 | Elapse: 719.98s Epoch 1 [0/140] | Valid Loss: 0.3405 | Elapse: 1.41s Epoch 1 [139/140] | Valid Loss: 0.1585 | Elapse: 177.45s Epoch 1 - Train Loss: 0.1648 - Valid Loss: 0.5808 - Elapsed Time: 955.69s - Epoch 1: Best model found with loss = 0.5808. Epoch 2 [0/559] | Train Loss: 0.3776 Grad: 105642.1875 LR: 7.6259e-06 | Elapse: 1.21s Epoch 2 [500/559] | Train Loss: 0.1717 Grad: 66910.3984 LR: 9.9786e-06 | Elapse: 637.51s Epoch 2 [558/559] | Train Loss: 0.1607 Grad: 31724.7090 LR: 9.9660e-06 | Elapse: 709.85s Epoch 2 [0/140] | Valid Loss: 0.3099 | Elapse: 1.45s Epoch 2 [139/140] | Valid Loss: 0.1498 | Elapse: 175.69s Epoch 2 - Train Loss: 0.1607 - Valid Loss: 0.6966 - Elapsed Time: 943.63s - Epoch 2: Best model found with loss = 0.6966. Epoch 3 [0/559] | Train Loss: 0.3213 Grad: 121472.9141 LR: 9.9657e-06 | Elapse: 1.18s Epoch 3 [500/559] | Train Loss: 0.1440 Grad: 103090.0625 LR: 9.7377e-06 | Elapse: 644.68s Epoch 3 [558/559] | Train Loss: 0.1341 Grad: 36844.5195 LR: 9.6978e-06 | Elapse: 716.82s Epoch 3 [0/140] | Valid Loss: 0.2482 | Elapse: 0.88s Epoch 3 [139/140] | Valid Loss: 0.1333 | Elapse: 176.00s Epoch 3 - Train Loss: 0.1341 - Valid Loss: 0.7746 - Elapsed Time: 951.51s - Epoch 3: Best model found with loss = 0.7746. Epoch 4 [0/559] | Train Loss: 0.2317 Grad: 167371.5625 LR: 9.6970e-06 | Elapse: 1.19s Epoch 4 [500/559] | Train Loss: 0.1037 Grad: 111101.2422 LR: 9.2415e-06 | Elapse: 640.47s Epoch 4 [558/559] | Train Loss: 0.0964 Grad: 36731.4844 LR: 9.1763e-06 | Elapse: 712.91s Epoch 4 [0/140] | Valid Loss: 0.1839 | Elapse: 1.05s Epoch 4 [139/140] | Valid Loss: 0.1155 | Elapse: 180.00s Epoch 4 - Train Loss: 0.0964 - Valid Loss: 0.8242 - Elapsed Time: 952.45s - Epoch 4: Best model found with loss = 0.8242. Epoch 5 [0/559] | Train Loss: 0.1119 Grad: 162986.0000 LR: 9.1752e-06 | Elapse: 1.52s Epoch 5 [500/559] | Train Loss: 0.0713 Grad: 100894.1797 LR: 8.5166e-06 | Elapse: 656.92s Epoch 5 [558/559] | Train Loss: 0.0663 Grad: 34186.8906 LR: 8.4298e-06 | Elapse: 732.87s Epoch 5 [0/140] | Valid Loss: 0.1360 | Elapse: 1.45s Epoch 5 [139/140] | Valid Loss: 0.1025 | Elapse: 176.98s Epoch 5 - Train Loss: 0.0663 - Valid Loss: 0.8523 - Elapsed Time: 967.91s - Epoch 5: Best model found with loss = 0.8523. Epoch 6 [0/559] | Train Loss: 0.0425 Grad: 102910.8594 LR: 8.4282e-06 | Elapse: 1.61s Epoch 6 [500/559] | Train Loss: 0.0477 Grad: 59404.9648 LR: 7.6021e-06 | Elapse: 657.10s Epoch 6 [558/559] | Train Loss: 0.0446 Grad: 33187.6953 LR: 7.4983e-06 | Elapse: 731.94s Epoch 6 [0/140] | Valid Loss: 0.1006 | Elapse: 1.05s Epoch 6 [139/140] | Valid Loss: 0.0935 | Elapse: 177.79s Epoch 6 - Train Loss: 0.0446 - Valid Loss: 0.8722 - Elapsed Time: 968.10s - Epoch 6: Best model found with loss = 0.8722. Epoch 7 [0/559] | Train Loss: 0.0218 Grad: 57458.2891 LR: 7.4965e-06 | Elapse: 1.40s Epoch 7 [500/559] | Train Loss: 0.0329 Grad: 38623.2969 LR: 6.5474e-06 | Elapse: 646.70s Epoch 7 [558/559] | Train Loss: 0.0310 Grad: 30045.2578 LR: 6.4322e-06 | Elapse: 721.54s Epoch 7 [0/140] | Valid Loss: 0.0848 | Elapse: 1.25s Epoch 7 [139/140] | Valid Loss: 0.0860 | Elapse: 175.20s Epoch 7 - Train Loss: 0.0310 - Valid Loss: 0.8886 - Elapsed Time: 955.19s - Epoch 7: Best model found with loss = 0.8886. Epoch 8 [0/559] | Train Loss: 0.0142 Grad: 36089.4648 LR: 6.4302e-06 | Elapse: 1.21s Epoch 8 [500/559] | Train Loss: 0.0244 Grad: 29544.0957 LR: 5.4093e-06 | Elapse: 656.81s Epoch 8 [558/559] | Train Loss: 0.0231 Grad: 26089.1152 LR: 5.2888e-06 | Elapse: 731.55s Epoch 8 [0/140] | Valid Loss: 0.0702 | Elapse: 1.25s Epoch 8 [139/140] | Valid Loss: 0.0802 | Elapse: 175.81s Epoch 8 - Train Loss: 0.0231 - Valid Loss: 0.9009 - Elapsed Time: 965.36s - Epoch 8: Best model found with loss = 0.9009. Epoch 9 [0/559] | Train Loss: 0.0116 Grad: 26916.4824 LR: 5.2868e-06 | Elapse: 1.45s Epoch 9 [500/559] | Train Loss: 0.0194 Grad: 27676.7578 LR: 4.2491e-06 | Elapse: 650.75s Epoch 9 [558/559] | Train Loss: 0.0185 Grad: 22893.2988 LR: 4.1299e-06 | Elapse: 724.12s Epoch 9 [0/140] | Valid Loss: 0.0627 | Elapse: 1.13s Epoch 9 [139/140] | Valid Loss: 0.0771 | Elapse: 178.01s Epoch 9 - Train Loss: 0.0185 - Valid Loss: 0.9090 - Elapsed Time: 960.16s - Epoch 9: Best model found with loss = 0.9090. Epoch 10 [0/559] | Train Loss: 0.0107 Grad: 24621.1934 LR: 4.1279e-06 | Elapse: 1.29s Epoch 10 [500/559] | Train Loss: 0.0162 Grad: 25335.7344 LR: 3.1294e-06 | Elapse: 645.49s Epoch 10 [558/559] | Train Loss: 0.0155 Grad: 21706.3672 LR: 3.0180e-06 | Elapse: 716.23s Epoch 10 [0/140] | Valid Loss: 0.0593 | Elapse: 1.35s Epoch 10 [139/140] | Valid Loss: 0.0754 | Elapse: 179.70s Epoch 10 - Train Loss: 0.0155 - Valid Loss: 0.9146 - Elapsed Time: 954.55s - Epoch 10: Best model found with loss = 0.9146. Epoch 11 [0/559] | Train Loss: 0.0096 Grad: 22430.6914 LR: 3.0161e-06 | Elapse: 1.13s Epoch 11 [500/559] | Train Loss: 0.0143 Grad: 24016.1289 LR: 2.1105e-06 | Elapse: 644.93s Epoch 11 [558/559] | Train Loss: 0.0138 Grad: 21748.4395 LR: 2.0129e-06 | Elapse: 720.27s Epoch 11 [0/140] | Valid Loss: 0.0590 | Elapse: 1.15s Epoch 11 [139/140] | Valid Loss: 0.0751 | Elapse: 171.39s Epoch 11 - Train Loss: 0.0138 - Valid Loss: 0.9171 - Elapsed Time: 950.43s - Epoch 11: Best model found with loss = 0.9171. Epoch 12 [0/559] | Train Loss: 0.0088 Grad: 20412.5254 LR: 2.0112e-06 | Elapse: 1.40s Epoch 12 [500/559] | Train Loss: 0.0136 Grad: 32134.5352 LR: 1.2475e-06 | Elapse: 647.60s Epoch 12 [558/559] | Train Loss: 0.0131 Grad: 23307.2793 LR: 1.1688e-06 | Elapse: 719.54s Epoch 12 [0/140] | Valid Loss: 0.0584 | Elapse: 1.45s Epoch 12 [139/140] | Valid Loss: 0.0751 | Elapse: 180.09s Epoch 12 - Train Loss: 0.0131 - Valid Loss: 0.9179 - Elapsed Time: 957.95s - Epoch 12: Best model found with loss = 0.9179. Epoch 13 [0/559] | Train Loss: 0.0087 Grad: 23177.0508 LR: 1.1675e-06 | Elapse: 1.25s Epoch 13 [500/559] | Train Loss: 0.0142 Grad: 23201.0508 LR: 5.8671e-07 | Elapse: 653.55s Epoch 13 [558/559] | Train Loss: 0.0137 Grad: 30058.3652 LR: 5.3128e-07 | Elapse: 729.39s Epoch 13 [0/140] | Valid Loss: 0.0632 | Elapse: 1.25s Epoch 13 [139/140] | Valid Loss: 0.0799 | Elapse: 179.09s Epoch 13 - Train Loss: 0.0137 - Valid Loss: 0.9144 - Elapsed Time: 966.71s Epoch 14 [0/559] | Train Loss: 0.0112 Grad: 51606.1914 LR: 5.3035e-07 | Elapse: 1.54s Epoch 14 [500/559] | Train Loss: 0.0171 Grad: 24198.2812 LR: 1.6390e-07 | Elapse: 651.34s Epoch 14 [558/559] | Train Loss: 0.0161 Grad: 17722.1055 LR: 1.3469e-07 | Elapse: 727.78s Epoch 14 [0/140] | Valid Loss: 0.0579 | Elapse: 1.05s Epoch 14 [139/140] | Valid Loss: 0.0655 | Elapse: 182.60s Epoch 14 - Train Loss: 0.0161 - Valid Loss: 0.9277 - Elapsed Time: 969.78s - Epoch 14: Best model found with loss = 0.9277. Epoch 15 [0/559] | Train Loss: 0.0095 Grad: 25194.0781 LR: 1.3421e-07 | Elapse: 1.46s Epoch 15 [500/559] | Train Loss: 0.0181 Grad: 19076.9629 LR: 1.8075e-09 | Elapse: 647.35s Epoch 15 [558/559] | Train Loss: 0.0171 Grad: 14837.4453 LR: 4.0043e-10 | Elapse: 721.40s Epoch 15 [0/140] | Valid Loss: 0.0564 | Elapse: 1.45s Epoch 15 [139/140] | Valid Loss: 0.0629 | Elapse: 179.29s Epoch 15 - Train Loss: 0.0171 - Valid Loss: 0.9295 - Elapsed Time: 959.98s - Epoch 15: Best model found with loss = 0.9295. Fold 3 | Time: 241.13min | Overall Evaluation Loss: 0.9382 Epoch 1 [0/559] | Train Loss: 0.3633 Grad: 124250.9141 LR: 4.0003e-07 | Elapse: 1.09s Epoch 1 [500/559] | Train Loss: 0.1642 Grad: 550.4676 LR: 6.6576e-06 | Elapse: 652.68s Epoch 1 [558/559] | Train Loss: 0.1646 Grad: 18192.2656 LR: 7.6104e-06 | Elapse: 729.42s Epoch 1 [0/140] | Valid Loss: 0.2690 | Elapse: 1.55s Epoch 1 [139/140] | Valid Loss: 0.1589 | Elapse: 178.19s Epoch 1 - Train Loss: 0.1646 - Valid Loss: 0.5713 - Elapsed Time: 967.22s - Epoch 1: Best model found with loss = 0.5713. Epoch 2 [0/559] | Train Loss: 0.3655 Grad: 99907.1562 LR: 7.6259e-06 | Elapse: 1.37s Epoch 2 [500/559] | Train Loss: 0.1607 Grad: 672.9160 LR: 9.9786e-06 | Elapse: 654.16s Epoch 2 [558/559] | Train Loss: 0.1598 Grad: 21048.8535 LR: 9.9660e-06 | Elapse: 728.21s Epoch 2 [0/140] | Valid Loss: 0.2438 | Elapse: 1.54s Epoch 2 [139/140] | Valid Loss: 0.1508 | Elapse: 185.69s Epoch 2 - Train Loss: 0.1598 - Valid Loss: 0.6758 - Elapsed Time: 973.26s - Epoch 2: Best model found with loss = 0.6758. Epoch 3 [0/559] | Train Loss: 0.3119 Grad: 131969.9375 LR: 9.9657e-06 | Elapse: 1.40s Epoch 3 [500/559] | Train Loss: 0.1382 Grad: 983.9833 LR: 9.7377e-06 | Elapse: 645.90s Epoch 3 [558/559] | Train Loss: 0.1360 Grad: 22967.9785 LR: 9.6978e-06 | Elapse: 720.34s Epoch 3 [0/140] | Valid Loss: 0.2111 | Elapse: 1.35s Epoch 3 [139/140] | Valid Loss: 0.1370 | Elapse: 180.99s Epoch 3 - Train Loss: 0.1360 - Valid Loss: 0.7520 - Elapsed Time: 960.87s - Epoch 3: Best model found with loss = 0.7520. Epoch 4 [0/559] | Train Loss: 0.1790 Grad: 156240.8906 LR: 9.6970e-06 | Elapse: 1.53s Epoch 4 [500/559] | Train Loss: 0.1014 Grad: 1266.3868 LR: 9.2415e-06 | Elapse: 642.03s Epoch 4 [558/559] | Train Loss: 0.0994 Grad: 23239.8164 LR: 9.1763e-06 | Elapse: 716.17s Epoch 4 [0/140] | Valid Loss: 0.1738 | Elapse: 1.24s Epoch 4 [139/140] | Valid Loss: 0.1241 | Elapse: 176.79s Epoch 4 - Train Loss: 0.0994 - Valid Loss: 0.7934 - Elapsed Time: 951.72s - Epoch 4: Best model found with loss = 0.7934. Epoch 5 [0/559] | Train Loss: 0.0794 Grad: 141872.0156 LR: 9.1752e-06 | Elapse: 1.50s Epoch 5 [500/559] | Train Loss: 0.0701 Grad: 1435.0442 LR: 8.5166e-06 | Elapse: 655.00s Epoch 5 [558/559] | Train Loss: 0.0688 Grad: 22167.5000 LR: 8.4298e-06 | Elapse: 729.74s Epoch 5 [0/140] | Valid Loss: 0.1549 | Elapse: 1.45s Epoch 5 [139/140] | Valid Loss: 0.1157 | Elapse: 177.20s Epoch 5 - Train Loss: 0.0688 - Valid Loss: 0.8173 - Elapsed Time: 965.22s - Epoch 5: Best model found with loss = 0.8173. Epoch 6 [0/559] | Train Loss: 0.0344 Grad: 87340.9609 LR: 8.4282e-06 | Elapse: 1.48s Epoch 6 [500/559] | Train Loss: 0.0480 Grad: 1550.3583 LR: 7.6021e-06 | Elapse: 650.57s Epoch 6 [558/559] | Train Loss: 0.0474 Grad: 20225.2676 LR: 7.4983e-06 | Elapse: 724.31s Epoch 6 [0/140] | Valid Loss: 0.1368 | Elapse: 1.45s Epoch 6 [139/140] | Valid Loss: 0.1052 | Elapse: 180.79s Epoch 6 - Train Loss: 0.0474 - Valid Loss: 0.8380 - Elapsed Time: 963.61s - Epoch 6: Best model found with loss = 0.8380. Epoch 7 [0/559] | Train Loss: 0.0166 Grad: 44716.8789 LR: 7.4965e-06 | Elapse: 1.76s Epoch 7 [500/559] | Train Loss: 0.0329 Grad: 1702.5387 LR: 6.5474e-06 | Elapse: 646.56s Epoch 7 [558/559] | Train Loss: 0.0328 Grad: 16368.6797 LR: 6.4322e-06 | Elapse: 720.99s Epoch 7 [0/140] | Valid Loss: 0.1277 | Elapse: 1.55s Epoch 7 [139/140] | Valid Loss: 0.0995 | Elapse: 179.20s Epoch 7 - Train Loss: 0.0328 - Valid Loss: 0.8543 - Elapsed Time: 959.19s - Epoch 7: Best model found with loss = 0.8543. Epoch 8 [0/559] | Train Loss: 0.0134 Grad: 37146.0703 LR: 6.4302e-06 | Elapse: 1.26s Epoch 8 [500/559] | Train Loss: 0.0241 Grad: 1791.6235 LR: 5.4093e-06 | Elapse: 647.76s Epoch 8 [558/559] | Train Loss: 0.0242 Grad: 13751.4150 LR: 5.2888e-06 | Elapse: 725.20s Epoch 8 [0/140] | Valid Loss: 0.1255 | Elapse: 0.96s Epoch 8 [139/140] | Valid Loss: 0.0937 | Elapse: 184.89s Epoch 8 - Train Loss: 0.0242 - Valid Loss: 0.8670 - Elapsed Time: 970.38s - Epoch 8: Best model found with loss = 0.8670. Epoch 9 [0/559] | Train Loss: 0.0105 Grad: 25991.3223 LR: 5.2868e-06 | Elapse: 1.18s Epoch 9 [500/559] | Train Loss: 0.0191 Grad: 1816.1155 LR: 4.2491e-06 | Elapse: 647.57s Epoch 9 [558/559] | Train Loss: 0.0193 Grad: 11664.7656 LR: 4.1299e-06 | Elapse: 723.21s Epoch 9 [0/140] | Valid Loss: 0.1225 | Elapse: 1.45s Epoch 9 [139/140] | Valid Loss: 0.0896 | Elapse: 176.91s Epoch 9 - Train Loss: 0.0193 - Valid Loss: 0.8769 - Elapsed Time: 959.24s - Epoch 9: Best model found with loss = 0.8769. Epoch 10 [0/559] | Train Loss: 0.0091 Grad: 20763.9258 LR: 4.1279e-06 | Elapse: 1.63s Epoch 10 [500/559] | Train Loss: 0.0160 Grad: 1870.6489 LR: 3.1294e-06 | Elapse: 650.43s Epoch 10 [558/559] | Train Loss: 0.0163 Grad: 9963.1377 LR: 3.0180e-06 | Elapse: 725.57s Epoch 10 [0/140] | Valid Loss: 0.1228 | Elapse: 1.55s Epoch 10 [139/140] | Valid Loss: 0.0863 | Elapse: 179.89s Epoch 10 - Train Loss: 0.0163 - Valid Loss: 0.8838 - Elapsed Time: 964.06s - Epoch 10: Best model found with loss = 0.8838. Epoch 11 [0/559] | Train Loss: 0.0087 Grad: 19777.8633 LR: 3.0161e-06 | Elapse: 1.57s Epoch 11 [500/559] | Train Loss: 0.0142 Grad: 1895.5302 LR: 2.1105e-06 | Elapse: 646.77s Epoch 11 [558/559] | Train Loss: 0.0145 Grad: 9178.5586 LR: 2.0129e-06 | Elapse: 720.21s Epoch 11 [0/140] | Valid Loss: 0.1234 | Elapse: 1.05s Epoch 11 [139/140] | Valid Loss: 0.0840 | Elapse: 177.70s Epoch 11 - Train Loss: 0.0145 - Valid Loss: 0.8882 - Elapsed Time: 956.95s - Epoch 11: Best model found with loss = 0.8882. Epoch 12 [0/559] | Train Loss: 0.0085 Grad: 21362.3984 LR: 2.0112e-06 | Elapse: 1.13s Epoch 12 [500/559] | Train Loss: 0.0133 Grad: 1930.3713 LR: 1.2475e-06 | Elapse: 643.53s Epoch 12 [558/559] | Train Loss: 0.0136 Grad: 10055.4883 LR: 1.1688e-06 | Elapse: 719.66s Epoch 12 [0/140] | Valid Loss: 0.1230 | Elapse: 1.16s Epoch 12 [139/140] | Valid Loss: 0.0822 | Elapse: 177.09s Epoch 12 - Train Loss: 0.0136 - Valid Loss: 0.8905 - Elapsed Time: 956.91s - Epoch 12: Best model found with loss = 0.8905. Epoch 13 [0/559] | Train Loss: 0.0089 Grad: 28545.0586 LR: 1.1675e-06 | Elapse: 1.31s Epoch 13 [500/559] | Train Loss: 0.0134 Grad: 2208.5732 LR: 5.8671e-07 | Elapse: 651.11s Epoch 13 [558/559] | Train Loss: 0.0137 Grad: 11693.8750 LR: 5.3128e-07 | Elapse: 723.84s Epoch 13 [0/140] | Valid Loss: 0.1226 | Elapse: 1.35s Epoch 13 [139/140] | Valid Loss: 0.0797 | Elapse: 178.20s Epoch 13 - Train Loss: 0.0137 - Valid Loss: 0.8916 - Elapsed Time: 961.21s - Epoch 13: Best model found with loss = 0.8916. Epoch 14 [0/559] | Train Loss: 0.0106 Grad: 44991.1055 LR: 5.3035e-07 | Elapse: 1.23s Epoch 14 [500/559] | Train Loss: 0.0151 Grad: 2720.3081 LR: 1.6390e-07 | Elapse: 647.89s Epoch 14 [558/559] | Train Loss: 0.0160 Grad: 8372.2275 LR: 1.3469e-07 | Elapse: 720.33s Epoch 14 [0/140] | Valid Loss: 0.1300 | Elapse: 1.35s Epoch 14 [139/140] | Valid Loss: 0.0720 | Elapse: 175.69s Epoch 14 - Train Loss: 0.0160 - Valid Loss: 0.9002 - Elapsed Time: 954.98s - Epoch 14: Best model found with loss = 0.9002. Epoch 15 [0/559] | Train Loss: 0.0121 Grad: 53557.4883 LR: 1.3421e-07 | Elapse: 1.72s Epoch 15 [500/559] | Train Loss: 0.0168 Grad: 2852.8547 LR: 1.8075e-09 | Elapse: 649.21s Epoch 15 [558/559] | Train Loss: 0.0171 Grad: 9246.8477 LR: 4.0043e-10 | Elapse: 725.75s Epoch 15 [0/140] | Valid Loss: 0.1239 | Elapse: 1.35s Epoch 15 [139/140] | Valid Loss: 0.0694 | Elapse: 177.70s Epoch 15 - Train Loss: 0.0171 - Valid Loss: 0.9015 - Elapsed Time: 962.32s - Epoch 15: Best model found with loss = 0.9015. Fold 4 | Time: 242.02min | Overall Evaluation Loss: 0.9291 Epoch 1 [0/559] | Train Loss: 0.1902 Grad: 66413.2578 LR: 4.0003e-07 | Elapse: 1.13s Epoch 1 [500/559] | Train Loss: 0.1762 Grad: 2669.0898 LR: 6.6576e-06 | Elapse: 645.72s Epoch 1 [558/559] | Train Loss: 0.1660 Grad: 50164.2422 LR: 7.6104e-06 | Elapse: 721.57s Epoch 1 [0/140] | Valid Loss: 0.1869 | Elapse: 1.34s Epoch 1 [139/140] | Valid Loss: 0.1561 | Elapse: 181.44s Epoch 1 - Train Loss: 0.1660 - Valid Loss: 0.5658 - Elapsed Time: 961.36s - Epoch 1: Best model found with loss = 0.5658. Epoch 2 [0/559] | Train Loss: 0.1867 Grad: 57774.2891 LR: 7.6259e-06 | Elapse: 1.37s Epoch 2 [500/559] | Train Loss: 0.1714 Grad: 2945.2744 LR: 9.9786e-06 | Elapse: 657.97s Epoch 2 [558/559] | Train Loss: 0.1607 Grad: 50242.5000 LR: 9.9660e-06 | Elapse: 736.99s Epoch 2 [0/140] | Valid Loss: 0.1820 | Elapse: 1.36s Epoch 2 [139/140] | Valid Loss: 0.1447 | Elapse: 180.02s Epoch 2 - Train Loss: 0.1607 - Valid Loss: 0.7065 - Elapsed Time: 976.65s - Epoch 2: Best model found with loss = 0.7065. Epoch 3 [0/559] | Train Loss: 0.1736 Grad: 71577.5156 LR: 9.9657e-06 | Elapse: 1.61s Epoch 3 [500/559] | Train Loss: 0.1443 Grad: 4092.1748 LR: 9.7377e-06 | Elapse: 652.20s Epoch 3 [558/559] | Train Loss: 0.1345 Grad: 50957.5117 LR: 9.6978e-06 | Elapse: 727.83s Epoch 3 [0/140] | Valid Loss: 0.1715 | Elapse: 1.26s Epoch 3 [139/140] | Valid Loss: 0.1274 | Elapse: 179.61s Epoch 3 - Train Loss: 0.1345 - Valid Loss: 0.7868 - Elapsed Time: 965.51s - Epoch 3: Best model found with loss = 0.7868. Epoch 4 [0/559] | Train Loss: 0.1299 Grad: 89553.9766 LR: 9.6970e-06 | Elapse: 1.30s Epoch 4 [500/559] | Train Loss: 0.1024 Grad: 4908.1235 LR: 9.2415e-06 | Elapse: 646.79s Epoch 4 [558/559] | Train Loss: 0.0953 Grad: 51755.4883 LR: 9.1763e-06 | Elapse: 723.82s Epoch 4 [0/140] | Valid Loss: 0.1466 | Elapse: 1.17s Epoch 4 [139/140] | Valid Loss: 0.1127 | Elapse: 177.61s Epoch 4 - Train Loss: 0.0953 - Valid Loss: 0.8238 - Elapsed Time: 959.55s - Epoch 4: Best model found with loss = 0.8238. Epoch 5 [0/559] | Train Loss: 0.0821 Grad: 102318.5078 LR: 9.1752e-06 | Elapse: 1.54s Epoch 5 [500/559] | Train Loss: 0.0698 Grad: 5586.0479 LR: 8.5166e-06 | Elapse: 647.14s Epoch 5 [558/559] | Train Loss: 0.0650 Grad: 44889.3242 LR: 8.4298e-06 | Elapse: 721.04s Epoch 5 [0/140] | Valid Loss: 0.1277 | Elapse: 1.28s Epoch 5 [139/140] | Valid Loss: 0.1013 | Elapse: 178.04s Epoch 5 - Train Loss: 0.0650 - Valid Loss: 0.8493 - Elapsed Time: 958.05s - Epoch 5: Best model found with loss = 0.8493. Epoch 6 [0/559] | Train Loss: 0.0456 Grad: 94000.5000 LR: 8.4282e-06 | Elapse: 1.29s Epoch 6 [500/559] | Train Loss: 0.0469 Grad: 5821.9785 LR: 7.6021e-06 | Elapse: 650.70s Epoch 6 [558/559] | Train Loss: 0.0439 Grad: 36521.1172 LR: 7.4983e-06 | Elapse: 726.91s Epoch 6 [0/140] | Valid Loss: 0.1070 | Elapse: 0.91s Epoch 6 [139/140] | Valid Loss: 0.0911 | Elapse: 176.52s Epoch 6 - Train Loss: 0.0439 - Valid Loss: 0.8720 - Elapsed Time: 962.33s - Epoch 6: Best model found with loss = 0.8720. Epoch 7 [0/559] | Train Loss: 0.0244 Grad: 67486.2812 LR: 7.4965e-06 | Elapse: 1.45s Epoch 7 [500/559] | Train Loss: 0.0322 Grad: 6044.8472 LR: 6.5474e-06 | Elapse: 649.35s Epoch 7 [558/559] | Train Loss: 0.0303 Grad: 31346.2227 LR: 6.4322e-06 | Elapse: 723.66s Epoch 7 [0/140] | Valid Loss: 0.0936 | Elapse: 0.88s Epoch 7 [139/140] | Valid Loss: 0.0839 | Elapse: 176.47s Epoch 7 - Train Loss: 0.0303 - Valid Loss: 0.8888 - Elapsed Time: 958.02s - Epoch 7: Best model found with loss = 0.8888. Epoch 8 [0/559] | Train Loss: 0.0108 Grad: 29399.3164 LR: 6.4302e-06 | Elapse: 1.63s Epoch 8 [500/559] | Train Loss: 0.0237 Grad: 6178.5142 LR: 5.4093e-06 | Elapse: 660.03s Epoch 8 [558/559] | Train Loss: 0.0224 Grad: 27034.6348 LR: 5.2888e-06 | Elapse: 732.26s Epoch 8 [0/140] | Valid Loss: 0.0897 | Elapse: 0.90s Epoch 8 [139/140] | Valid Loss: 0.0791 | Elapse: 178.92s Epoch 8 - Train Loss: 0.0224 - Valid Loss: 0.9013 - Elapsed Time: 968.97s - Epoch 8: Best model found with loss = 0.9013. Epoch 9 [0/559] | Train Loss: 0.0083 Grad: 21065.5977 LR: 5.2868e-06 | Elapse: 1.36s Epoch 9 [500/559] | Train Loss: 0.0187 Grad: 6257.6196 LR: 4.2491e-06 | Elapse: 662.97s Epoch 9 [558/559] | Train Loss: 0.0177 Grad: 22987.4766 LR: 4.1299e-06 | Elapse: 734.99s Epoch 9 [0/140] | Valid Loss: 0.0882 | Elapse: 1.26s Epoch 9 [139/140] | Valid Loss: 0.0756 | Elapse: 172.32s Epoch 9 - Train Loss: 0.0177 - Valid Loss: 0.9107 - Elapsed Time: 965.28s - Epoch 9: Best model found with loss = 0.9107. Epoch 10 [0/559] | Train Loss: 0.0070 Grad: 16650.1582 LR: 4.1279e-06 | Elapse: 1.39s Epoch 10 [500/559] | Train Loss: 0.0156 Grad: 6256.2896 LR: 3.1294e-06 | Elapse: 654.08s Epoch 10 [558/559] | Train Loss: 0.0149 Grad: 19737.6699 LR: 3.0180e-06 | Elapse: 727.41s Epoch 10 [0/140] | Valid Loss: 0.0873 | Elapse: 1.47s Epoch 10 [139/140] | Valid Loss: 0.0728 | Elapse: 180.91s Epoch 10 - Train Loss: 0.0149 - Valid Loss: 0.9173 - Elapsed Time: 966.90s - Epoch 10: Best model found with loss = 0.9173. Epoch 11 [0/559] | Train Loss: 0.0062 Grad: 13547.4805 LR: 3.0161e-06 | Elapse: 1.28s Epoch 11 [500/559] | Train Loss: 0.0137 Grad: 6225.1914 LR: 2.1105e-06 | Elapse: 648.78s Epoch 11 [558/559] | Train Loss: 0.0131 Grad: 18167.6406 LR: 2.0129e-06 | Elapse: 722.64s Epoch 11 [0/140] | Valid Loss: 0.0885 | Elapse: 1.53s Epoch 11 [139/140] | Valid Loss: 0.0705 | Elapse: 177.18s Epoch 11 - Train Loss: 0.0131 - Valid Loss: 0.9215 - Elapsed Time: 957.63s - Epoch 11: Best model found with loss = 0.9215. Epoch 12 [0/559] | Train Loss: 0.0057 Grad: 11864.8066 LR: 2.0112e-06 | Elapse: 1.57s Epoch 12 [500/559] | Train Loss: 0.0127 Grad: 6288.3857 LR: 1.2475e-06 | Elapse: 651.31s Epoch 12 [558/559] | Train Loss: 0.0121 Grad: 18782.9395 LR: 1.1688e-06 | Elapse: 727.78s Epoch 12 [0/140] | Valid Loss: 0.0907 | Elapse: 1.06s Epoch 12 [139/140] | Valid Loss: 0.0689 | Elapse: 178.82s Epoch 12 - Train Loss: 0.0121 - Valid Loss: 0.9232 - Elapsed Time: 964.75s - Epoch 12: Best model found with loss = 0.9232. Epoch 13 [0/559] | Train Loss: 0.0055 Grad: 11599.3584 LR: 1.1675e-06 | Elapse: 1.30s Epoch 13 [500/559] | Train Loss: 0.0123 Grad: 6445.4780 LR: 5.8671e-07 | Elapse: 649.70s Epoch 13 [558/559] | Train Loss: 0.0118 Grad: 19700.5469 LR: 5.3128e-07 | Elapse: 723.02s Epoch 13 [0/140] | Valid Loss: 0.0884 | Elapse: 1.26s Epoch 13 [139/140] | Valid Loss: 0.0675 | Elapse: 176.02s Epoch 13 - Train Loss: 0.0118 - Valid Loss: 0.9250 - Elapsed Time: 956.68s - Epoch 13: Best model found with loss = 0.9250. Epoch 14 [0/559] | Train Loss: 0.0054 Grad: 12537.0361 LR: 5.3035e-07 | Elapse: 1.41s Epoch 14 [500/559] | Train Loss: 0.0135 Grad: 6409.6084 LR: 1.6390e-07 | Elapse: 649.01s Epoch 14 [558/559] | Train Loss: 0.0128 Grad: 13459.7871 LR: 1.3469e-07 | Elapse: 723.84s Epoch 14 [0/140] | Valid Loss: 0.0842 | Elapse: 1.16s Epoch 14 [139/140] | Valid Loss: 0.0630 | Elapse: 178.11s Epoch 14 - Train Loss: 0.0128 - Valid Loss: 0.9302 - Elapsed Time: 960.51s - Epoch 14: Best model found with loss = 0.9302. Epoch 15 [0/559] | Train Loss: 0.0060 Grad: 18633.8516 LR: 1.3421e-07 | Elapse: 1.50s Epoch 15 [500/559] | Train Loss: 0.0144 Grad: 6711.6953 LR: 1.8075e-09 | Elapse: 645.79s Epoch 15 [558/559] | Train Loss: 0.0136 Grad: 15909.6465 LR: 4.0043e-10 | Elapse: 722.12s Epoch 15 [0/140] | Valid Loss: 0.0799 | Elapse: 1.26s Epoch 15 [139/140] | Valid Loss: 0.0600 | Elapse: 178.51s Epoch 15 - Train Loss: 0.0136 - Valid Loss: 0.9312 - Elapsed Time: 958.84s - Epoch 15: Best model found with loss = 0.9312. Fold 0 | Time: 242.28min | Overall Evaluation Loss: 0.9283