End of training
Browse files
all_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"eval_loss": 0.8710034489631653,
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second": 1.
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
"eval_wer": 60.05599273607748,
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
-
"train_samples_per_second":
|
11 |
-
"train_steps_per_second": 0.
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 30.0,
|
3 |
"eval_loss": 0.8710034489631653,
|
4 |
+
"eval_runtime": 337.4912,
|
5 |
+
"eval_samples_per_second": 1.517,
|
6 |
+
"eval_steps_per_second": 0.047,
|
7 |
"eval_wer": 60.05599273607748,
|
8 |
+
"train_loss": 0.04157245059808095,
|
9 |
+
"train_runtime": 1332.7289,
|
10 |
+
"train_samples_per_second": 57.626,
|
11 |
+
"train_steps_per_second": 0.9
|
12 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"eval_loss": 0.8710034489631653,
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second": 1.
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
"eval_wer": 60.05599273607748
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 30.0,
|
3 |
"eval_loss": 0.8710034489631653,
|
4 |
+
"eval_runtime": 337.4912,
|
5 |
+
"eval_samples_per_second": 1.517,
|
6 |
+
"eval_steps_per_second": 0.047,
|
7 |
"eval_wer": 60.05599273607748
|
8 |
}
|
runs/Dec20_19-12-39_129-146-32-172/events.out.tfevents.1671565316.129-146-32-172.141824.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec56a28eb4e411c0adfb0babd31b28a184b5ee3604d6eb7ab0a277729f38d64b
|
3 |
+
size 358
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 30.0,
|
3 |
+
"train_loss": 0.04157245059808095,
|
4 |
+
"train_runtime": 1332.7289,
|
5 |
+
"train_samples_per_second": 57.626,
|
6 |
+
"train_steps_per_second": 0.9
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": 0.8710034489631653,
|
3 |
"best_model_checkpoint": "./checkpoint-400",
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -698,18 +698,156 @@
|
|
698 |
"step": 1000
|
699 |
},
|
700 |
{
|
701 |
-
"epoch": 25.
|
702 |
-
"
|
703 |
-
"
|
704 |
-
"
|
705 |
-
|
706 |
-
|
707 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
708 |
}
|
709 |
],
|
710 |
-
"max_steps":
|
711 |
-
"num_train_epochs":
|
712 |
-
"total_flos": 4.
|
713 |
"trial_name": null,
|
714 |
"trial_params": null
|
715 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 0.8710034489631653,
|
3 |
"best_model_checkpoint": "./checkpoint-400",
|
4 |
+
"epoch": 30.0,
|
5 |
+
"global_step": 1200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
698 |
"step": 1000
|
699 |
},
|
700 |
{
|
701 |
+
"epoch": 25.25,
|
702 |
+
"learning_rate": 1.6581196581196582e-06,
|
703 |
+
"loss": 0.2704,
|
704 |
+
"step": 1010
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 25.5,
|
708 |
+
"learning_rate": 1.5726495726495727e-06,
|
709 |
+
"loss": 0.2582,
|
710 |
+
"step": 1020
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 25.75,
|
714 |
+
"learning_rate": 1.4871794871794873e-06,
|
715 |
+
"loss": 0.2634,
|
716 |
+
"step": 1030
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 26.0,
|
720 |
+
"learning_rate": 1.401709401709402e-06,
|
721 |
+
"loss": 0.2451,
|
722 |
+
"step": 1040
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 26.25,
|
726 |
+
"learning_rate": 1.3162393162393163e-06,
|
727 |
+
"loss": 0.2546,
|
728 |
+
"step": 1050
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 26.5,
|
732 |
+
"learning_rate": 1.230769230769231e-06,
|
733 |
+
"loss": 0.2604,
|
734 |
+
"step": 1060
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 26.75,
|
738 |
+
"learning_rate": 1.1452991452991454e-06,
|
739 |
+
"loss": 0.2459,
|
740 |
+
"step": 1070
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 27.0,
|
744 |
+
"learning_rate": 1.0598290598290598e-06,
|
745 |
+
"loss": 0.255,
|
746 |
+
"step": 1080
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"epoch": 27.25,
|
750 |
+
"learning_rate": 9.743589743589745e-07,
|
751 |
+
"loss": 0.2505,
|
752 |
+
"step": 1090
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 27.5,
|
756 |
+
"learning_rate": 8.88888888888889e-07,
|
757 |
+
"loss": 0.2454,
|
758 |
+
"step": 1100
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 27.5,
|
762 |
+
"eval_loss": 0.9438697099685669,
|
763 |
+
"eval_runtime": 297.6469,
|
764 |
+
"eval_samples_per_second": 1.72,
|
765 |
+
"eval_steps_per_second": 0.054,
|
766 |
+
"eval_wer": 59.193401937046005,
|
767 |
+
"step": 1100
|
768 |
+
},
|
769 |
+
{
|
770 |
+
"epoch": 27.75,
|
771 |
+
"learning_rate": 8.034188034188035e-07,
|
772 |
+
"loss": 0.2506,
|
773 |
+
"step": 1110
|
774 |
+
},
|
775 |
+
{
|
776 |
+
"epoch": 28.0,
|
777 |
+
"learning_rate": 7.179487179487179e-07,
|
778 |
+
"loss": 0.2497,
|
779 |
+
"step": 1120
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 28.25,
|
783 |
+
"learning_rate": 6.324786324786325e-07,
|
784 |
+
"loss": 0.2477,
|
785 |
+
"step": 1130
|
786 |
+
},
|
787 |
+
{
|
788 |
+
"epoch": 28.5,
|
789 |
+
"learning_rate": 5.470085470085471e-07,
|
790 |
+
"loss": 0.2489,
|
791 |
+
"step": 1140
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"epoch": 28.75,
|
795 |
+
"learning_rate": 4.615384615384616e-07,
|
796 |
+
"loss": 0.2428,
|
797 |
+
"step": 1150
|
798 |
+
},
|
799 |
+
{
|
800 |
+
"epoch": 29.0,
|
801 |
+
"learning_rate": 3.760683760683761e-07,
|
802 |
+
"loss": 0.2395,
|
803 |
+
"step": 1160
|
804 |
+
},
|
805 |
+
{
|
806 |
+
"epoch": 29.25,
|
807 |
+
"learning_rate": 2.905982905982906e-07,
|
808 |
+
"loss": 0.2438,
|
809 |
+
"step": 1170
|
810 |
+
},
|
811 |
+
{
|
812 |
+
"epoch": 29.5,
|
813 |
+
"learning_rate": 2.0512820512820514e-07,
|
814 |
+
"loss": 0.2403,
|
815 |
+
"step": 1180
|
816 |
+
},
|
817 |
+
{
|
818 |
+
"epoch": 29.75,
|
819 |
+
"learning_rate": 1.1965811965811967e-07,
|
820 |
+
"loss": 0.2469,
|
821 |
+
"step": 1190
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 30.0,
|
825 |
+
"learning_rate": 3.418803418803419e-08,
|
826 |
+
"loss": 0.2297,
|
827 |
+
"step": 1200
|
828 |
+
},
|
829 |
+
{
|
830 |
+
"epoch": 30.0,
|
831 |
+
"eval_loss": 0.9485259652137756,
|
832 |
+
"eval_runtime": 355.4454,
|
833 |
+
"eval_samples_per_second": 1.44,
|
834 |
+
"eval_steps_per_second": 0.045,
|
835 |
+
"eval_wer": 59.042070217917676,
|
836 |
+
"step": 1200
|
837 |
+
},
|
838 |
+
{
|
839 |
+
"epoch": 30.0,
|
840 |
+
"step": 1200,
|
841 |
+
"total_flos": 4.8586623123456e+18,
|
842 |
+
"train_loss": 0.04157245059808095,
|
843 |
+
"train_runtime": 1332.7289,
|
844 |
+
"train_samples_per_second": 57.626,
|
845 |
+
"train_steps_per_second": 0.9
|
846 |
}
|
847 |
],
|
848 |
+
"max_steps": 1200,
|
849 |
+
"num_train_epochs": 30,
|
850 |
+
"total_flos": 4.8586623123456e+18,
|
851 |
"trial_name": null,
|
852 |
"trial_params": null
|
853 |
}
|