razhan commited on
Commit
08cd6c8
1 Parent(s): e761088

End of training

Browse files
Files changed (5) hide show
  1. README.md +14 -2
  2. all_results.json +10 -10
  3. eval_results.json +6 -6
  4. train_results.json +5 -5
  5. trainer_state.json +911 -14
README.md CHANGED
@@ -3,11 +3,23 @@ license: apache-2.0
3
  base_model: openai/whisper-base
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - wer
8
  model-index:
9
  - name: whisper-base-ckb
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +27,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # whisper-base-ckb
17
 
18
- This model is a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.0641
21
  - Wer: 0.1262
 
3
  base_model: openai/whisper-base
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - razhan/common_voice_ckb_16
8
  metrics:
9
  - wer
10
  model-index:
11
  - name: whisper-base-ckb
12
+ results:
13
+ - task:
14
+ name: Automatic Speech Recognition
15
+ type: automatic-speech-recognition
16
+ dataset:
17
+ name: razhan/common_voice_ckb_16
18
+ type: razhan/common_voice_ckb_16
19
+ metrics:
20
+ - name: Wer
21
+ type: wer
22
+ value: 0.12623194275685162
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # whisper-base-ckb
29
 
30
+ This model is a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) on the razhan/common_voice_ckb_16 dataset.
31
  It achieves the following results on the evaluation set:
32
  - Loss: 0.0641
33
  - Wer: 0.1262
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 10.87,
3
- "eval_loss": 0.1419677734375,
4
- "eval_runtime": 117.7598,
5
  "eval_samples": 4940,
6
- "eval_samples_per_second": 41.95,
7
- "eval_steps_per_second": 0.059,
8
- "eval_wer": 0.2917510463075469,
9
- "train_loss": 0.25973586964607237,
10
- "train_runtime": 25779.7961,
11
  "train_samples": 105929,
12
- "train_samples_per_second": 44.686,
13
- "train_steps_per_second": 0.039
14
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "eval_loss": 0.0640869140625,
4
+ "eval_runtime": 115.2612,
5
  "eval_samples": 4940,
6
+ "eval_samples_per_second": 42.859,
7
+ "eval_steps_per_second": 0.061,
8
+ "eval_wer": 0.12623194275685162,
9
+ "train_loss": 0.0027616678631823995,
10
+ "train_runtime": 6144.0313,
11
  "train_samples": 105929,
12
+ "train_samples_per_second": 431.248,
13
+ "train_steps_per_second": 0.374
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.87,
3
- "eval_loss": 0.1419677734375,
4
- "eval_runtime": 117.7598,
5
  "eval_samples": 4940,
6
- "eval_samples_per_second": 41.95,
7
- "eval_steps_per_second": 0.059,
8
- "eval_wer": 0.2917510463075469
9
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "eval_loss": 0.0640869140625,
4
+ "eval_runtime": 115.2612,
5
  "eval_samples": 4940,
6
+ "eval_samples_per_second": 42.859,
7
+ "eval_steps_per_second": 0.061,
8
+ "eval_wer": 0.12623194275685162
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.87,
3
- "train_loss": 0.25973586964607237,
4
- "train_runtime": 25779.7961,
5
  "train_samples": 105929,
6
- "train_samples_per_second": 44.686,
7
- "train_steps_per_second": 0.039
8
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "train_loss": 0.0027616678631823995,
4
+ "train_runtime": 6144.0313,
5
  "train_samples": 105929,
6
+ "train_samples_per_second": 431.248,
7
+ "train_steps_per_second": 0.374
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2917510463075469,
3
- "best_model_checkpoint": "./whisper-base-ckb/checkpoint-1000",
4
- "epoch": 10.869565217391305,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -699,21 +699,918 @@
699
  "step": 1000
700
  },
701
  {
702
- "epoch": 10.87,
703
- "step": 1000,
704
- "total_flos": 7.47187145428435e+19,
705
- "train_loss": 0.25973586964607237,
706
- "train_runtime": 25779.7961,
707
- "train_samples_per_second": 44.686,
708
- "train_steps_per_second": 0.039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  }
710
  ],
711
  "logging_steps": 10,
712
- "max_steps": 1000,
713
  "num_input_tokens_seen": 0,
714
- "num_train_epochs": 11,
715
  "save_steps": 100,
716
- "total_flos": 7.47187145428435e+19,
717
  "train_batch_size": 192,
718
  "trial_name": null,
719
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.12623194275685162,
3
+ "best_model_checkpoint": "./whisper-base-ckb/checkpoint-2300",
4
+ "epoch": 25.0,
5
  "eval_steps": 100,
6
+ "global_step": 2300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
699
  "step": 1000
700
  },
701
  {
702
+ "epoch": 10.98,
703
+ "learning_rate": 7.889473684210527e-06,
704
+ "loss": 0.0921,
705
+ "step": 1010
706
+ },
707
+ {
708
+ "epoch": 11.09,
709
+ "learning_rate": 7.863157894736842e-06,
710
+ "loss": 0.0904,
711
+ "step": 1020
712
+ },
713
+ {
714
+ "epoch": 11.2,
715
+ "learning_rate": 7.836842105263159e-06,
716
+ "loss": 0.0983,
717
+ "step": 1030
718
+ },
719
+ {
720
+ "epoch": 11.3,
721
+ "learning_rate": 7.810526315789474e-06,
722
+ "loss": 0.0976,
723
+ "step": 1040
724
+ },
725
+ {
726
+ "epoch": 11.41,
727
+ "learning_rate": 7.78421052631579e-06,
728
+ "loss": 0.094,
729
+ "step": 1050
730
+ },
731
+ {
732
+ "epoch": 11.52,
733
+ "learning_rate": 7.757894736842105e-06,
734
+ "loss": 0.0908,
735
+ "step": 1060
736
+ },
737
+ {
738
+ "epoch": 11.63,
739
+ "learning_rate": 7.731578947368422e-06,
740
+ "loss": 0.0882,
741
+ "step": 1070
742
+ },
743
+ {
744
+ "epoch": 11.74,
745
+ "learning_rate": 7.705263157894738e-06,
746
+ "loss": 0.096,
747
+ "step": 1080
748
+ },
749
+ {
750
+ "epoch": 11.85,
751
+ "learning_rate": 7.678947368421053e-06,
752
+ "loss": 0.0949,
753
+ "step": 1090
754
+ },
755
+ {
756
+ "epoch": 11.96,
757
+ "learning_rate": 7.65263157894737e-06,
758
+ "loss": 0.0919,
759
+ "step": 1100
760
+ },
761
+ {
762
+ "epoch": 11.96,
763
+ "eval_loss": 0.1314697265625,
764
+ "eval_runtime": 111.7237,
765
+ "eval_samples_per_second": 44.216,
766
+ "eval_steps_per_second": 0.063,
767
+ "eval_wer": 0.27420008100445525,
768
+ "step": 1100
769
+ },
770
+ {
771
+ "epoch": 12.07,
772
+ "learning_rate": 7.626315789473685e-06,
773
+ "loss": 0.0866,
774
+ "step": 1110
775
+ },
776
+ {
777
+ "epoch": 12.17,
778
+ "learning_rate": 7.600000000000001e-06,
779
+ "loss": 0.0797,
780
+ "step": 1120
781
+ },
782
+ {
783
+ "epoch": 12.28,
784
+ "learning_rate": 7.573684210526317e-06,
785
+ "loss": 0.0859,
786
+ "step": 1130
787
+ },
788
+ {
789
+ "epoch": 12.39,
790
+ "learning_rate": 7.547368421052632e-06,
791
+ "loss": 0.0869,
792
+ "step": 1140
793
+ },
794
+ {
795
+ "epoch": 12.5,
796
+ "learning_rate": 7.5210526315789475e-06,
797
+ "loss": 0.0843,
798
+ "step": 1150
799
+ },
800
+ {
801
+ "epoch": 12.61,
802
+ "learning_rate": 7.494736842105263e-06,
803
+ "loss": 0.083,
804
+ "step": 1160
805
+ },
806
+ {
807
+ "epoch": 12.72,
808
+ "learning_rate": 7.468421052631579e-06,
809
+ "loss": 0.0784,
810
+ "step": 1170
811
+ },
812
+ {
813
+ "epoch": 12.83,
814
+ "learning_rate": 7.442105263157895e-06,
815
+ "loss": 0.0853,
816
+ "step": 1180
817
+ },
818
+ {
819
+ "epoch": 12.93,
820
+ "learning_rate": 7.415789473684211e-06,
821
+ "loss": 0.0861,
822
+ "step": 1190
823
+ },
824
+ {
825
+ "epoch": 13.04,
826
+ "learning_rate": 7.3894736842105275e-06,
827
+ "loss": 0.0839,
828
+ "step": 1200
829
+ },
830
+ {
831
+ "epoch": 13.04,
832
+ "eval_loss": 0.1217041015625,
833
+ "eval_runtime": 110.1755,
834
+ "eval_samples_per_second": 44.838,
835
+ "eval_steps_per_second": 0.064,
836
+ "eval_wer": 0.2596867827730525,
837
+ "step": 1200
838
+ },
839
+ {
840
+ "epoch": 13.15,
841
+ "learning_rate": 7.363157894736843e-06,
842
+ "loss": 0.0759,
843
+ "step": 1210
844
+ },
845
+ {
846
+ "epoch": 13.26,
847
+ "learning_rate": 7.336842105263159e-06,
848
+ "loss": 0.0733,
849
+ "step": 1220
850
+ },
851
+ {
852
+ "epoch": 13.37,
853
+ "learning_rate": 7.310526315789475e-06,
854
+ "loss": 0.0768,
855
+ "step": 1230
856
+ },
857
+ {
858
+ "epoch": 13.48,
859
+ "learning_rate": 7.28421052631579e-06,
860
+ "loss": 0.0834,
861
+ "step": 1240
862
+ },
863
+ {
864
+ "epoch": 13.59,
865
+ "learning_rate": 7.257894736842106e-06,
866
+ "loss": 0.077,
867
+ "step": 1250
868
+ },
869
+ {
870
+ "epoch": 13.7,
871
+ "learning_rate": 7.2315789473684215e-06,
872
+ "loss": 0.0738,
873
+ "step": 1260
874
+ },
875
+ {
876
+ "epoch": 13.8,
877
+ "learning_rate": 7.205263157894737e-06,
878
+ "loss": 0.0723,
879
+ "step": 1270
880
+ },
881
+ {
882
+ "epoch": 13.91,
883
+ "learning_rate": 7.178947368421053e-06,
884
+ "loss": 0.0752,
885
+ "step": 1280
886
+ },
887
+ {
888
+ "epoch": 14.02,
889
+ "learning_rate": 7.152631578947369e-06,
890
+ "loss": 0.0794,
891
+ "step": 1290
892
+ },
893
+ {
894
+ "epoch": 14.13,
895
+ "learning_rate": 7.126315789473685e-06,
896
+ "loss": 0.0713,
897
+ "step": 1300
898
+ },
899
+ {
900
+ "epoch": 14.13,
901
+ "eval_loss": 0.11322021484375,
902
+ "eval_runtime": 110.9942,
903
+ "eval_samples_per_second": 44.507,
904
+ "eval_steps_per_second": 0.063,
905
+ "eval_wer": 0.23710679087349804,
906
+ "step": 1300
907
+ },
908
+ {
909
+ "epoch": 14.24,
910
+ "learning_rate": 7.100000000000001e-06,
911
+ "loss": 0.0703,
912
+ "step": 1310
913
+ },
914
+ {
915
+ "epoch": 14.35,
916
+ "learning_rate": 7.073684210526316e-06,
917
+ "loss": 0.0662,
918
+ "step": 1320
919
+ },
920
+ {
921
+ "epoch": 14.46,
922
+ "learning_rate": 7.047368421052631e-06,
923
+ "loss": 0.0686,
924
+ "step": 1330
925
+ },
926
+ {
927
+ "epoch": 14.57,
928
+ "learning_rate": 7.021052631578948e-06,
929
+ "loss": 0.0771,
930
+ "step": 1340
931
+ },
932
+ {
933
+ "epoch": 14.67,
934
+ "learning_rate": 6.994736842105264e-06,
935
+ "loss": 0.0717,
936
+ "step": 1350
937
+ },
938
+ {
939
+ "epoch": 14.78,
940
+ "learning_rate": 6.96842105263158e-06,
941
+ "loss": 0.071,
942
+ "step": 1360
943
+ },
944
+ {
945
+ "epoch": 14.89,
946
+ "learning_rate": 6.9421052631578955e-06,
947
+ "loss": 0.0674,
948
+ "step": 1370
949
+ },
950
+ {
951
+ "epoch": 15.0,
952
+ "learning_rate": 6.915789473684211e-06,
953
+ "loss": 0.0694,
954
+ "step": 1380
955
+ },
956
+ {
957
+ "epoch": 15.11,
958
+ "learning_rate": 6.889473684210527e-06,
959
+ "loss": 0.0732,
960
+ "step": 1390
961
+ },
962
+ {
963
+ "epoch": 15.22,
964
+ "learning_rate": 6.863157894736843e-06,
965
+ "loss": 0.0687,
966
+ "step": 1400
967
+ },
968
+ {
969
+ "epoch": 15.22,
970
+ "eval_loss": 0.109130859375,
971
+ "eval_runtime": 110.2423,
972
+ "eval_samples_per_second": 44.81,
973
+ "eval_steps_per_second": 0.063,
974
+ "eval_wer": 0.23717429458620223,
975
+ "step": 1400
976
+ },
977
+ {
978
+ "epoch": 15.33,
979
+ "learning_rate": 6.836842105263158e-06,
980
+ "loss": 0.0635,
981
+ "step": 1410
982
+ },
983
+ {
984
+ "epoch": 15.43,
985
+ "learning_rate": 6.810526315789474e-06,
986
+ "loss": 0.0644,
987
+ "step": 1420
988
+ },
989
+ {
990
+ "epoch": 15.54,
991
+ "learning_rate": 6.78421052631579e-06,
992
+ "loss": 0.0583,
993
+ "step": 1430
994
+ },
995
+ {
996
+ "epoch": 15.65,
997
+ "learning_rate": 6.7578947368421054e-06,
998
+ "loss": 0.0743,
999
+ "step": 1440
1000
+ },
1001
+ {
1002
+ "epoch": 15.76,
1003
+ "learning_rate": 6.731578947368421e-06,
1004
+ "loss": 0.069,
1005
+ "step": 1450
1006
+ },
1007
+ {
1008
+ "epoch": 15.87,
1009
+ "learning_rate": 6.705263157894737e-06,
1010
+ "loss": 0.0659,
1011
+ "step": 1460
1012
+ },
1013
+ {
1014
+ "epoch": 15.98,
1015
+ "learning_rate": 6.678947368421053e-06,
1016
+ "loss": 0.0637,
1017
+ "step": 1470
1018
+ },
1019
+ {
1020
+ "epoch": 16.09,
1021
+ "learning_rate": 6.6526315789473695e-06,
1022
+ "loss": 0.0586,
1023
+ "step": 1480
1024
+ },
1025
+ {
1026
+ "epoch": 16.2,
1027
+ "learning_rate": 6.626315789473685e-06,
1028
+ "loss": 0.0672,
1029
+ "step": 1490
1030
+ },
1031
+ {
1032
+ "epoch": 16.3,
1033
+ "learning_rate": 6.600000000000001e-06,
1034
+ "loss": 0.0647,
1035
+ "step": 1500
1036
+ },
1037
+ {
1038
+ "epoch": 16.3,
1039
+ "eval_loss": 0.1021728515625,
1040
+ "eval_runtime": 111.1773,
1041
+ "eval_samples_per_second": 44.434,
1042
+ "eval_steps_per_second": 0.063,
1043
+ "eval_wer": 0.21726069933846362,
1044
+ "step": 1500
1045
+ },
1046
+ {
1047
+ "epoch": 16.41,
1048
+ "learning_rate": 6.573684210526316e-06,
1049
+ "loss": 0.0602,
1050
+ "step": 1510
1051
+ },
1052
+ {
1053
+ "epoch": 16.52,
1054
+ "learning_rate": 6.547368421052632e-06,
1055
+ "loss": 0.0586,
1056
+ "step": 1520
1057
+ },
1058
+ {
1059
+ "epoch": 16.63,
1060
+ "learning_rate": 6.521052631578948e-06,
1061
+ "loss": 0.0556,
1062
+ "step": 1530
1063
+ },
1064
+ {
1065
+ "epoch": 16.74,
1066
+ "learning_rate": 6.494736842105264e-06,
1067
+ "loss": 0.0647,
1068
+ "step": 1540
1069
+ },
1070
+ {
1071
+ "epoch": 16.85,
1072
+ "learning_rate": 6.4684210526315794e-06,
1073
+ "loss": 0.064,
1074
+ "step": 1550
1075
+ },
1076
+ {
1077
+ "epoch": 16.96,
1078
+ "learning_rate": 6.442105263157895e-06,
1079
+ "loss": 0.0623,
1080
+ "step": 1560
1081
+ },
1082
+ {
1083
+ "epoch": 17.07,
1084
+ "learning_rate": 6.415789473684211e-06,
1085
+ "loss": 0.0551,
1086
+ "step": 1570
1087
+ },
1088
+ {
1089
+ "epoch": 17.17,
1090
+ "learning_rate": 6.389473684210527e-06,
1091
+ "loss": 0.053,
1092
+ "step": 1580
1093
+ },
1094
+ {
1095
+ "epoch": 17.28,
1096
+ "learning_rate": 6.363157894736842e-06,
1097
+ "loss": 0.0598,
1098
+ "step": 1590
1099
+ },
1100
+ {
1101
+ "epoch": 17.39,
1102
+ "learning_rate": 6.336842105263158e-06,
1103
+ "loss": 0.059,
1104
+ "step": 1600
1105
+ },
1106
+ {
1107
+ "epoch": 17.39,
1108
+ "eval_loss": 0.09674072265625,
1109
+ "eval_runtime": 110.518,
1110
+ "eval_samples_per_second": 44.699,
1111
+ "eval_steps_per_second": 0.063,
1112
+ "eval_wer": 0.20433373835560956,
1113
+ "step": 1600
1114
+ },
1115
+ {
1116
+ "epoch": 17.5,
1117
+ "learning_rate": 6.310526315789474e-06,
1118
+ "loss": 0.0564,
1119
+ "step": 1610
1120
+ },
1121
+ {
1122
+ "epoch": 17.61,
1123
+ "learning_rate": 6.28421052631579e-06,
1124
+ "loss": 0.0536,
1125
+ "step": 1620
1126
+ },
1127
+ {
1128
+ "epoch": 17.72,
1129
+ "learning_rate": 6.257894736842106e-06,
1130
+ "loss": 0.0523,
1131
+ "step": 1630
1132
+ },
1133
+ {
1134
+ "epoch": 17.83,
1135
+ "learning_rate": 6.231578947368422e-06,
1136
+ "loss": 0.0583,
1137
+ "step": 1640
1138
+ },
1139
+ {
1140
+ "epoch": 17.93,
1141
+ "learning_rate": 6.205263157894738e-06,
1142
+ "loss": 0.058,
1143
+ "step": 1650
1144
+ },
1145
+ {
1146
+ "epoch": 18.04,
1147
+ "learning_rate": 6.1789473684210534e-06,
1148
+ "loss": 0.0549,
1149
+ "step": 1660
1150
+ },
1151
+ {
1152
+ "epoch": 18.15,
1153
+ "learning_rate": 6.152631578947369e-06,
1154
+ "loss": 0.0504,
1155
+ "step": 1670
1156
+ },
1157
+ {
1158
+ "epoch": 18.26,
1159
+ "learning_rate": 6.126315789473685e-06,
1160
+ "loss": 0.0479,
1161
+ "step": 1680
1162
+ },
1163
+ {
1164
+ "epoch": 18.37,
1165
+ "learning_rate": 6.1e-06,
1166
+ "loss": 0.0516,
1167
+ "step": 1690
1168
+ },
1169
+ {
1170
+ "epoch": 18.48,
1171
+ "learning_rate": 6.073684210526316e-06,
1172
+ "loss": 0.0539,
1173
+ "step": 1700
1174
+ },
1175
+ {
1176
+ "epoch": 18.48,
1177
+ "eval_loss": 0.0897216796875,
1178
+ "eval_runtime": 110.8559,
1179
+ "eval_samples_per_second": 44.562,
1180
+ "eval_steps_per_second": 0.063,
1181
+ "eval_wer": 0.19289185905224787,
1182
+ "step": 1700
1183
+ },
1184
+ {
1185
+ "epoch": 18.59,
1186
+ "learning_rate": 6.047368421052632e-06,
1187
+ "loss": 0.0525,
1188
+ "step": 1710
1189
+ },
1190
+ {
1191
+ "epoch": 18.7,
1192
+ "learning_rate": 6.0210526315789475e-06,
1193
+ "loss": 0.0519,
1194
+ "step": 1720
1195
+ },
1196
+ {
1197
+ "epoch": 18.8,
1198
+ "learning_rate": 5.994736842105263e-06,
1199
+ "loss": 0.0463,
1200
+ "step": 1730
1201
+ },
1202
+ {
1203
+ "epoch": 18.91,
1204
+ "learning_rate": 5.968421052631579e-06,
1205
+ "loss": 0.054,
1206
+ "step": 1740
1207
+ },
1208
+ {
1209
+ "epoch": 19.02,
1210
+ "learning_rate": 5.942105263157896e-06,
1211
+ "loss": 0.0552,
1212
+ "step": 1750
1213
+ },
1214
+ {
1215
+ "epoch": 19.13,
1216
+ "learning_rate": 5.915789473684212e-06,
1217
+ "loss": 0.0482,
1218
+ "step": 1760
1219
+ },
1220
+ {
1221
+ "epoch": 19.24,
1222
+ "learning_rate": 5.8894736842105274e-06,
1223
+ "loss": 0.0464,
1224
+ "step": 1770
1225
+ },
1226
+ {
1227
+ "epoch": 19.35,
1228
+ "learning_rate": 5.863157894736842e-06,
1229
+ "loss": 0.0433,
1230
+ "step": 1780
1231
+ },
1232
+ {
1233
+ "epoch": 19.46,
1234
+ "learning_rate": 5.836842105263158e-06,
1235
+ "loss": 0.0456,
1236
+ "step": 1790
1237
+ },
1238
+ {
1239
+ "epoch": 19.57,
1240
+ "learning_rate": 5.810526315789474e-06,
1241
+ "loss": 0.0518,
1242
+ "step": 1800
1243
+ },
1244
+ {
1245
+ "epoch": 19.57,
1246
+ "eval_loss": 0.08270263671875,
1247
+ "eval_runtime": 114.8201,
1248
+ "eval_samples_per_second": 43.024,
1249
+ "eval_steps_per_second": 0.061,
1250
+ "eval_wer": 0.17183070068853787,
1251
+ "step": 1800
1252
+ },
1253
+ {
1254
+ "epoch": 19.67,
1255
+ "learning_rate": 5.78421052631579e-06,
1256
+ "loss": 0.048,
1257
+ "step": 1810
1258
+ },
1259
+ {
1260
+ "epoch": 19.78,
1261
+ "learning_rate": 5.757894736842106e-06,
1262
+ "loss": 0.0453,
1263
+ "step": 1820
1264
+ },
1265
+ {
1266
+ "epoch": 19.89,
1267
+ "learning_rate": 5.7315789473684215e-06,
1268
+ "loss": 0.0438,
1269
+ "step": 1830
1270
+ },
1271
+ {
1272
+ "epoch": 20.0,
1273
+ "learning_rate": 5.705263157894737e-06,
1274
+ "loss": 0.0445,
1275
+ "step": 1840
1276
+ },
1277
+ {
1278
+ "epoch": 20.11,
1279
+ "learning_rate": 5.678947368421053e-06,
1280
+ "loss": 0.048,
1281
+ "step": 1850
1282
+ },
1283
+ {
1284
+ "epoch": 20.22,
1285
+ "learning_rate": 5.652631578947368e-06,
1286
+ "loss": 0.0443,
1287
+ "step": 1860
1288
+ },
1289
+ {
1290
+ "epoch": 20.33,
1291
+ "learning_rate": 5.626315789473684e-06,
1292
+ "loss": 0.0435,
1293
+ "step": 1870
1294
+ },
1295
+ {
1296
+ "epoch": 20.43,
1297
+ "learning_rate": 5.600000000000001e-06,
1298
+ "loss": 0.0408,
1299
+ "step": 1880
1300
+ },
1301
+ {
1302
+ "epoch": 20.54,
1303
+ "learning_rate": 5.573684210526316e-06,
1304
+ "loss": 0.0382,
1305
+ "step": 1890
1306
+ },
1307
+ {
1308
+ "epoch": 20.65,
1309
+ "learning_rate": 5.547368421052632e-06,
1310
+ "loss": 0.0495,
1311
+ "step": 1900
1312
+ },
1313
+ {
1314
+ "epoch": 20.65,
1315
+ "eval_loss": 0.07867431640625,
1316
+ "eval_runtime": 110.7928,
1317
+ "eval_samples_per_second": 44.588,
1318
+ "eval_steps_per_second": 0.063,
1319
+ "eval_wer": 0.16673417037937086,
1320
+ "step": 1900
1321
+ },
1322
+ {
1323
+ "epoch": 20.76,
1324
+ "learning_rate": 5.521052631578948e-06,
1325
+ "loss": 0.0451,
1326
+ "step": 1910
1327
+ },
1328
+ {
1329
+ "epoch": 20.87,
1330
+ "learning_rate": 5.494736842105264e-06,
1331
+ "loss": 0.042,
1332
+ "step": 1920
1333
+ },
1334
+ {
1335
+ "epoch": 20.98,
1336
+ "learning_rate": 5.46842105263158e-06,
1337
+ "loss": 0.0413,
1338
+ "step": 1930
1339
+ },
1340
+ {
1341
+ "epoch": 21.09,
1342
+ "learning_rate": 5.4421052631578955e-06,
1343
+ "loss": 0.0374,
1344
+ "step": 1940
1345
+ },
1346
+ {
1347
+ "epoch": 21.2,
1348
+ "learning_rate": 5.415789473684211e-06,
1349
+ "loss": 0.0445,
1350
+ "step": 1950
1351
+ },
1352
+ {
1353
+ "epoch": 21.3,
1354
+ "learning_rate": 5.389473684210526e-06,
1355
+ "loss": 0.0406,
1356
+ "step": 1960
1357
+ },
1358
+ {
1359
+ "epoch": 21.41,
1360
+ "learning_rate": 5.363157894736842e-06,
1361
+ "loss": 0.0389,
1362
+ "step": 1970
1363
+ },
1364
+ {
1365
+ "epoch": 21.52,
1366
+ "learning_rate": 5.336842105263158e-06,
1367
+ "loss": 0.0373,
1368
+ "step": 1980
1369
+ },
1370
+ {
1371
+ "epoch": 21.63,
1372
+ "learning_rate": 5.310526315789474e-06,
1373
+ "loss": 0.035,
1374
+ "step": 1990
1375
+ },
1376
+ {
1377
+ "epoch": 21.74,
1378
+ "learning_rate": 5.2842105263157896e-06,
1379
+ "loss": 0.0444,
1380
+ "step": 2000
1381
+ },
1382
+ {
1383
+ "epoch": 21.74,
1384
+ "eval_loss": 0.07183837890625,
1385
+ "eval_runtime": 112.6262,
1386
+ "eval_samples_per_second": 43.862,
1387
+ "eval_steps_per_second": 0.062,
1388
+ "eval_wer": 0.14692183070068854,
1389
+ "step": 2000
1390
+ },
1391
+ {
1392
+ "epoch": 21.85,
1393
+ "learning_rate": 5.257894736842105e-06,
1394
+ "loss": 0.04,
1395
+ "step": 2010
1396
+ },
1397
+ {
1398
+ "epoch": 21.96,
1399
+ "learning_rate": 5.231578947368422e-06,
1400
+ "loss": 0.0391,
1401
+ "step": 2020
1402
+ },
1403
+ {
1404
+ "epoch": 22.07,
1405
+ "learning_rate": 5.205263157894738e-06,
1406
+ "loss": 0.037,
1407
+ "step": 2030
1408
+ },
1409
+ {
1410
+ "epoch": 22.17,
1411
+ "learning_rate": 5.178947368421054e-06,
1412
+ "loss": 0.0332,
1413
+ "step": 2040
1414
+ },
1415
+ {
1416
+ "epoch": 22.28,
1417
+ "learning_rate": 5.1526315789473695e-06,
1418
+ "loss": 0.0385,
1419
+ "step": 2050
1420
+ },
1421
+ {
1422
+ "epoch": 22.39,
1423
+ "learning_rate": 5.1263157894736845e-06,
1424
+ "loss": 0.0377,
1425
+ "step": 2060
1426
+ },
1427
+ {
1428
+ "epoch": 22.5,
1429
+ "learning_rate": 5.1e-06,
1430
+ "loss": 0.0353,
1431
+ "step": 2070
1432
+ },
1433
+ {
1434
+ "epoch": 22.61,
1435
+ "learning_rate": 5.073684210526316e-06,
1436
+ "loss": 0.0338,
1437
+ "step": 2080
1438
+ },
1439
+ {
1440
+ "epoch": 22.72,
1441
+ "learning_rate": 5.047368421052632e-06,
1442
+ "loss": 0.0327,
1443
+ "step": 2090
1444
+ },
1445
+ {
1446
+ "epoch": 22.83,
1447
+ "learning_rate": 5.021052631578948e-06,
1448
+ "loss": 0.0392,
1449
+ "step": 2100
1450
+ },
1451
+ {
1452
+ "epoch": 22.83,
1453
+ "eval_loss": 0.067138671875,
1454
+ "eval_runtime": 111.3072,
1455
+ "eval_samples_per_second": 44.382,
1456
+ "eval_steps_per_second": 0.063,
1457
+ "eval_wer": 0.13683002565141084,
1458
+ "step": 2100
1459
+ },
1460
+ {
1461
+ "epoch": 22.93,
1462
+ "learning_rate": 9.42857142857143e-07,
1463
+ "loss": 0.0362,
1464
+ "step": 2110
1465
+ },
1466
+ {
1467
+ "epoch": 23.04,
1468
+ "learning_rate": 8.952380952380953e-07,
1469
+ "loss": 0.032,
1470
+ "step": 2120
1471
+ },
1472
+ {
1473
+ "epoch": 23.15,
1474
+ "learning_rate": 8.476190476190477e-07,
1475
+ "loss": 0.0309,
1476
+ "step": 2130
1477
+ },
1478
+ {
1479
+ "epoch": 23.26,
1480
+ "learning_rate": 8.000000000000001e-07,
1481
+ "loss": 0.0296,
1482
+ "step": 2140
1483
+ },
1484
+ {
1485
+ "epoch": 23.37,
1486
+ "learning_rate": 7.523809523809525e-07,
1487
+ "loss": 0.0338,
1488
+ "step": 2150
1489
+ },
1490
+ {
1491
+ "epoch": 23.48,
1492
+ "learning_rate": 7.047619047619048e-07,
1493
+ "loss": 0.035,
1494
+ "step": 2160
1495
+ },
1496
+ {
1497
+ "epoch": 23.59,
1498
+ "learning_rate": 6.571428571428571e-07,
1499
+ "loss": 0.0312,
1500
+ "step": 2170
1501
+ },
1502
+ {
1503
+ "epoch": 23.7,
1504
+ "learning_rate": 6.095238095238095e-07,
1505
+ "loss": 0.0309,
1506
+ "step": 2180
1507
+ },
1508
+ {
1509
+ "epoch": 23.8,
1510
+ "learning_rate": 5.619047619047619e-07,
1511
+ "loss": 0.0292,
1512
+ "step": 2190
1513
+ },
1514
+ {
1515
+ "epoch": 23.91,
1516
+ "learning_rate": 5.142857142857143e-07,
1517
+ "loss": 0.0335,
1518
+ "step": 2200
1519
+ },
1520
+ {
1521
+ "epoch": 23.91,
1522
+ "eval_loss": 0.064453125,
1523
+ "eval_runtime": 113.7883,
1524
+ "eval_samples_per_second": 43.414,
1525
+ "eval_steps_per_second": 0.062,
1526
+ "eval_wer": 0.12626569461320372,
1527
+ "step": 2200
1528
+ },
1529
+ {
1530
+ "epoch": 24.02,
1531
+ "learning_rate": 4.666666666666667e-07,
1532
+ "loss": 0.0349,
1533
+ "step": 2210
1534
+ },
1535
+ {
1536
+ "epoch": 24.13,
1537
+ "learning_rate": 4.1904761904761906e-07,
1538
+ "loss": 0.0327,
1539
+ "step": 2220
1540
+ },
1541
+ {
1542
+ "epoch": 24.24,
1543
+ "learning_rate": 3.7142857142857145e-07,
1544
+ "loss": 0.0317,
1545
+ "step": 2230
1546
+ },
1547
+ {
1548
+ "epoch": 24.35,
1549
+ "learning_rate": 3.238095238095238e-07,
1550
+ "loss": 0.0298,
1551
+ "step": 2240
1552
+ },
1553
+ {
1554
+ "epoch": 24.46,
1555
+ "learning_rate": 2.7619047619047624e-07,
1556
+ "loss": 0.0302,
1557
+ "step": 2250
1558
+ },
1559
+ {
1560
+ "epoch": 24.57,
1561
+ "learning_rate": 2.285714285714286e-07,
1562
+ "loss": 0.0355,
1563
+ "step": 2260
1564
+ },
1565
+ {
1566
+ "epoch": 24.67,
1567
+ "learning_rate": 1.8095238095238097e-07,
1568
+ "loss": 0.0309,
1569
+ "step": 2270
1570
+ },
1571
+ {
1572
+ "epoch": 24.78,
1573
+ "learning_rate": 1.3333333333333336e-07,
1574
+ "loss": 0.0299,
1575
+ "step": 2280
1576
+ },
1577
+ {
1578
+ "epoch": 24.89,
1579
+ "learning_rate": 8.571428571428573e-08,
1580
+ "loss": 0.028,
1581
+ "step": 2290
1582
+ },
1583
+ {
1584
+ "epoch": 25.0,
1585
+ "learning_rate": 3.8095238095238096e-08,
1586
+ "loss": 0.0292,
1587
+ "step": 2300
1588
+ },
1589
+ {
1590
+ "epoch": 25.0,
1591
+ "eval_loss": 0.0640869140625,
1592
+ "eval_runtime": 116.9844,
1593
+ "eval_samples_per_second": 42.228,
1594
+ "eval_steps_per_second": 0.06,
1595
+ "eval_wer": 0.12623194275685162,
1596
+ "step": 2300
1597
+ },
1598
+ {
1599
+ "epoch": 25.0,
1600
+ "step": 2300,
1601
+ "total_flos": 1.7185304344854004e+20,
1602
+ "train_loss": 0.0027616678631823995,
1603
+ "train_runtime": 6144.0313,
1604
+ "train_samples_per_second": 431.248,
1605
+ "train_steps_per_second": 0.374
1606
  }
1607
  ],
1608
  "logging_steps": 10,
1609
+ "max_steps": 2300,
1610
  "num_input_tokens_seen": 0,
1611
+ "num_train_epochs": 25,
1612
  "save_steps": 100,
1613
+ "total_flos": 1.7185304344854004e+20,
1614
  "train_batch_size": 192,
1615
  "trial_name": null,
1616
  "trial_params": null