MohamedAhmedAE commited on
Commit
81e6bf0
1 Parent(s): 8316459

Training in progress, step 97800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aec0c1d18defd241431f12919f56311e5a5a2b33ee98aff0675e2d1054db73a3
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff2dae35f81010c72af01f07977963be57010375af5f3bff3c7967ef1499de0
3
  size 1715561468
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1ec0449a02acf3e70b6f1fd6fbb2ea2224f1607e2a52b78112be63d1a623ec3
3
  size 3431474364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3263ed01043192613ac98e7715d788d77b459bd25223a60198e37a6edcc20c
3
  size 3431474364
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f22dde64bed4a150bba9fae975b5ff2db4d79f2cc7c992c6da344d97f930b61
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be13e37a24a07e10fc68f5c2cd4ef1d31eaee86b299d03fbc1b4e1c4f98603ae
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72114cb302bd561c7ea851b1b8082add5139795f45804ba466465e1f30e93de9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0030adf7ec1a6b0e5ba568a360b75e530967917df22f6bc95fa46387ae323b64
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.178272980501393,
5
  "eval_steps": 1000,
6
- "global_step": 93000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4006,6 +4006,206 @@
4006
  "eval_samples_per_second": 9.146,
4007
  "eval_steps_per_second": 0.143,
4008
  "step": 93000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4009
  }
4010
  ],
4011
  "logging_steps": 200,
@@ -4025,7 +4225,7 @@
4025
  "attributes": {}
4026
  }
4027
  },
4028
- "total_flos": 1.333252196683776e+18,
4029
  "train_batch_size": 6,
4030
  "trial_name": null,
4031
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.393925779495013,
5
  "eval_steps": 1000,
6
+ "global_step": 97800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4006
  "eval_samples_per_second": 9.146,
4007
  "eval_steps_per_second": 0.143,
4008
  "step": 93000
4009
+ },
4010
+ {
4011
+ "epoch": 4.187258513792794,
4012
+ "grad_norm": 18.494911193847656,
4013
+ "learning_rate": 4.913983787235996e-05,
4014
+ "loss": 0.5905,
4015
+ "step": 93200
4016
+ },
4017
+ {
4018
+ "epoch": 4.196244047084194,
4019
+ "grad_norm": 4.566243648529053,
4020
+ "learning_rate": 4.913616346215057e-05,
4021
+ "loss": 0.5712,
4022
+ "step": 93400
4023
+ },
4024
+ {
4025
+ "epoch": 4.205229580375596,
4026
+ "grad_norm": 5.748531818389893,
4027
+ "learning_rate": 4.9132481358518735e-05,
4028
+ "loss": 0.558,
4029
+ "step": 93600
4030
+ },
4031
+ {
4032
+ "epoch": 4.214215113666996,
4033
+ "grad_norm": 3.77885365486145,
4034
+ "learning_rate": 4.9128791562638096e-05,
4035
+ "loss": 0.5927,
4036
+ "step": 93800
4037
+ },
4038
+ {
4039
+ "epoch": 4.223200646958397,
4040
+ "grad_norm": 2.6284022331237793,
4041
+ "learning_rate": 4.9125094075684805e-05,
4042
+ "loss": 0.5953,
4043
+ "step": 94000
4044
+ },
4045
+ {
4046
+ "epoch": 4.223200646958397,
4047
+ "eval_loss": 2.712245225906372,
4048
+ "eval_runtime": 1088.8302,
4049
+ "eval_samples_per_second": 9.096,
4050
+ "eval_steps_per_second": 0.142,
4051
+ "step": 94000
4052
+ },
4053
+ {
4054
+ "epoch": 4.232186180249798,
4055
+ "grad_norm": 5.8867645263671875,
4056
+ "learning_rate": 4.9121388898837415e-05,
4057
+ "loss": 0.5895,
4058
+ "step": 94200
4059
+ },
4060
+ {
4061
+ "epoch": 4.241171713541199,
4062
+ "grad_norm": 6.118598937988281,
4063
+ "learning_rate": 4.911767603327698e-05,
4064
+ "loss": 0.6138,
4065
+ "step": 94400
4066
+ },
4067
+ {
4068
+ "epoch": 4.250157246832599,
4069
+ "grad_norm": 7.058086395263672,
4070
+ "learning_rate": 4.911395548018696e-05,
4071
+ "loss": 0.5921,
4072
+ "step": 94600
4073
+ },
4074
+ {
4075
+ "epoch": 4.259142780124001,
4076
+ "grad_norm": 6.587648391723633,
4077
+ "learning_rate": 4.911022724075329e-05,
4078
+ "loss": 0.5778,
4079
+ "step": 94800
4080
+ },
4081
+ {
4082
+ "epoch": 4.268128313415401,
4083
+ "grad_norm": 1.6069397926330566,
4084
+ "learning_rate": 4.910649131616435e-05,
4085
+ "loss": 0.6262,
4086
+ "step": 95000
4087
+ },
4088
+ {
4089
+ "epoch": 4.268128313415401,
4090
+ "eval_loss": 2.6547911167144775,
4091
+ "eval_runtime": 1085.8261,
4092
+ "eval_samples_per_second": 9.121,
4093
+ "eval_steps_per_second": 0.143,
4094
+ "step": 95000
4095
+ },
4096
+ {
4097
+ "epoch": 4.277113846706802,
4098
+ "grad_norm": 6.686661243438721,
4099
+ "learning_rate": 4.910274770761096e-05,
4100
+ "loss": 0.5864,
4101
+ "step": 95200
4102
+ },
4103
+ {
4104
+ "epoch": 4.286099379998203,
4105
+ "grad_norm": 7.897719860076904,
4106
+ "learning_rate": 4.909899641628641e-05,
4107
+ "loss": 0.5884,
4108
+ "step": 95400
4109
+ },
4110
+ {
4111
+ "epoch": 4.295084913289604,
4112
+ "grad_norm": 7.400073528289795,
4113
+ "learning_rate": 4.9095237443386435e-05,
4114
+ "loss": 0.6021,
4115
+ "step": 95600
4116
+ },
4117
+ {
4118
+ "epoch": 4.3040704465810045,
4119
+ "grad_norm": 4.220474720001221,
4120
+ "learning_rate": 4.9091470790109196e-05,
4121
+ "loss": 0.5518,
4122
+ "step": 95800
4123
+ },
4124
+ {
4125
+ "epoch": 4.313055979872406,
4126
+ "grad_norm": 1.6574774980545044,
4127
+ "learning_rate": 4.908769645765532e-05,
4128
+ "loss": 0.5867,
4129
+ "step": 96000
4130
+ },
4131
+ {
4132
+ "epoch": 4.313055979872406,
4133
+ "eval_loss": 2.691925525665283,
4134
+ "eval_runtime": 1089.0317,
4135
+ "eval_samples_per_second": 9.094,
4136
+ "eval_steps_per_second": 0.142,
4137
+ "step": 96000
4138
+ },
4139
+ {
4140
+ "epoch": 4.322041513163806,
4141
+ "grad_norm": 3.5609164237976074,
4142
+ "learning_rate": 4.908391444722787e-05,
4143
+ "loss": 0.5803,
4144
+ "step": 96200
4145
+ },
4146
+ {
4147
+ "epoch": 4.331027046455207,
4148
+ "grad_norm": 3.427290201187134,
4149
+ "learning_rate": 4.908012476003239e-05,
4150
+ "loss": 0.554,
4151
+ "step": 96400
4152
+ },
4153
+ {
4154
+ "epoch": 4.340012579746608,
4155
+ "grad_norm": 52.728878021240234,
4156
+ "learning_rate": 4.907632739727682e-05,
4157
+ "loss": 0.5962,
4158
+ "step": 96600
4159
+ },
4160
+ {
4161
+ "epoch": 4.348998113038009,
4162
+ "grad_norm": 12.754006385803223,
4163
+ "learning_rate": 4.907252236017159e-05,
4164
+ "loss": 0.5742,
4165
+ "step": 96800
4166
+ },
4167
+ {
4168
+ "epoch": 4.3579836463294095,
4169
+ "grad_norm": 8.12136173248291,
4170
+ "learning_rate": 4.9068709649929544e-05,
4171
+ "loss": 0.6085,
4172
+ "step": 97000
4173
+ },
4174
+ {
4175
+ "epoch": 4.3579836463294095,
4176
+ "eval_loss": 2.6768929958343506,
4177
+ "eval_runtime": 1090.8411,
4178
+ "eval_samples_per_second": 9.079,
4179
+ "eval_steps_per_second": 0.142,
4180
+ "step": 97000
4181
+ },
4182
+ {
4183
+ "epoch": 4.366969179620811,
4184
+ "grad_norm": 5.45872688293457,
4185
+ "learning_rate": 4.9064889267766e-05,
4186
+ "loss": 0.5137,
4187
+ "step": 97200
4188
+ },
4189
+ {
4190
+ "epoch": 4.375954712912211,
4191
+ "grad_norm": 3.9804370403289795,
4192
+ "learning_rate": 4.9061061214898707e-05,
4193
+ "loss": 0.5567,
4194
+ "step": 97400
4195
+ },
4196
+ {
4197
+ "epoch": 4.3849402462036124,
4198
+ "grad_norm": 29.226791381835938,
4199
+ "learning_rate": 4.9057225492547846e-05,
4200
+ "loss": 0.5694,
4201
+ "step": 97600
4202
+ },
4203
+ {
4204
+ "epoch": 4.393925779495013,
4205
+ "grad_norm": 6.9307169914245605,
4206
+ "learning_rate": 4.9053382101936076e-05,
4207
+ "loss": 0.5909,
4208
+ "step": 97800
4209
  }
4210
  ],
4211
  "logging_steps": 200,
 
4225
  "attributes": {}
4226
  }
4227
  },
4228
+ "total_flos": 1.4020652132868096e+18,
4229
  "train_batch_size": 6,
4230
  "trial_name": null,
4231
  "trial_params": null