MohamedAhmedAE commited on
Commit
5aecaec
1 Parent(s): a796027

Training in progress, step 100000

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98007a9e4d32ca734c0938db281ef1d38a942b440ffffede684c69f10d0c0ab0
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec0c1d18defd241431f12919f56311e5a5a2b33ee98aff0675e2d1054db73a3
3
  size 1715561468
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31cfa35ff9229ac8a845a53b47fd4abeaacf3050350e2982ec0b10a385c91f6d
3
  size 3431474364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1ec0449a02acf3e70b6f1fd6fbb2ea2224f1607e2a52b78112be63d1a623ec3
3
  size 3431474364
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e3b727d695301e31ac214f1b0bb89d1bb023257396f0e4584c73785a8cc7b5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f22dde64bed4a150bba9fae975b5ff2db4d79f2cc7c992c6da344d97f930b61
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:915cd65f50f22a27b7a2b8eb5210ce1b997270418a9a41f4ee4664dbebbf4673
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72114cb302bd561c7ea851b1b8082add5139795f45804ba466465e1f30e93de9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.474795579117621,
5
  "eval_steps": 1000,
6
- "global_step": 99600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4006,285 +4006,6 @@
4006
  "eval_samples_per_second": 9.146,
4007
  "eval_steps_per_second": 0.143,
4008
  "step": 93000
4009
- },
4010
- {
4011
- "epoch": 4.187258513792794,
4012
- "grad_norm": 18.494911193847656,
4013
- "learning_rate": 4.913983787235996e-05,
4014
- "loss": 0.5905,
4015
- "step": 93200
4016
- },
4017
- {
4018
- "epoch": 4.196244047084194,
4019
- "grad_norm": 4.566243648529053,
4020
- "learning_rate": 4.913616346215057e-05,
4021
- "loss": 0.5712,
4022
- "step": 93400
4023
- },
4024
- {
4025
- "epoch": 4.205229580375596,
4026
- "grad_norm": 5.748531818389893,
4027
- "learning_rate": 4.9132481358518735e-05,
4028
- "loss": 0.558,
4029
- "step": 93600
4030
- },
4031
- {
4032
- "epoch": 4.214215113666996,
4033
- "grad_norm": 3.77885365486145,
4034
- "learning_rate": 4.9128791562638096e-05,
4035
- "loss": 0.5927,
4036
- "step": 93800
4037
- },
4038
- {
4039
- "epoch": 4.223200646958397,
4040
- "grad_norm": 2.6284022331237793,
4041
- "learning_rate": 4.9125094075684805e-05,
4042
- "loss": 0.5953,
4043
- "step": 94000
4044
- },
4045
- {
4046
- "epoch": 4.223200646958397,
4047
- "eval_loss": 2.712245225906372,
4048
- "eval_runtime": 1088.8302,
4049
- "eval_samples_per_second": 9.096,
4050
- "eval_steps_per_second": 0.142,
4051
- "step": 94000
4052
- },
4053
- {
4054
- "epoch": 4.232186180249798,
4055
- "grad_norm": 5.8867645263671875,
4056
- "learning_rate": 4.9121388898837415e-05,
4057
- "loss": 0.5895,
4058
- "step": 94200
4059
- },
4060
- {
4061
- "epoch": 4.241171713541199,
4062
- "grad_norm": 6.118598937988281,
4063
- "learning_rate": 4.911767603327698e-05,
4064
- "loss": 0.6138,
4065
- "step": 94400
4066
- },
4067
- {
4068
- "epoch": 4.250157246832599,
4069
- "grad_norm": 7.058086395263672,
4070
- "learning_rate": 4.911395548018696e-05,
4071
- "loss": 0.5921,
4072
- "step": 94600
4073
- },
4074
- {
4075
- "epoch": 4.259142780124001,
4076
- "grad_norm": 6.587648391723633,
4077
- "learning_rate": 4.911022724075329e-05,
4078
- "loss": 0.5778,
4079
- "step": 94800
4080
- },
4081
- {
4082
- "epoch": 4.268128313415401,
4083
- "grad_norm": 1.6069397926330566,
4084
- "learning_rate": 4.910649131616435e-05,
4085
- "loss": 0.6262,
4086
- "step": 95000
4087
- },
4088
- {
4089
- "epoch": 4.268128313415401,
4090
- "eval_loss": 2.6547911167144775,
4091
- "eval_runtime": 1085.8261,
4092
- "eval_samples_per_second": 9.121,
4093
- "eval_steps_per_second": 0.143,
4094
- "step": 95000
4095
- },
4096
- {
4097
- "epoch": 4.277113846706802,
4098
- "grad_norm": 6.686661243438721,
4099
- "learning_rate": 4.910274770761096e-05,
4100
- "loss": 0.5864,
4101
- "step": 95200
4102
- },
4103
- {
4104
- "epoch": 4.286099379998203,
4105
- "grad_norm": 7.897719860076904,
4106
- "learning_rate": 4.909899641628641e-05,
4107
- "loss": 0.5884,
4108
- "step": 95400
4109
- },
4110
- {
4111
- "epoch": 4.295084913289604,
4112
- "grad_norm": 7.400073528289795,
4113
- "learning_rate": 4.9095237443386435e-05,
4114
- "loss": 0.6021,
4115
- "step": 95600
4116
- },
4117
- {
4118
- "epoch": 4.3040704465810045,
4119
- "grad_norm": 4.220474720001221,
4120
- "learning_rate": 4.9091470790109196e-05,
4121
- "loss": 0.5518,
4122
- "step": 95800
4123
- },
4124
- {
4125
- "epoch": 4.313055979872406,
4126
- "grad_norm": 1.6574774980545044,
4127
- "learning_rate": 4.908769645765532e-05,
4128
- "loss": 0.5867,
4129
- "step": 96000
4130
- },
4131
- {
4132
- "epoch": 4.313055979872406,
4133
- "eval_loss": 2.691925525665283,
4134
- "eval_runtime": 1089.0317,
4135
- "eval_samples_per_second": 9.094,
4136
- "eval_steps_per_second": 0.142,
4137
- "step": 96000
4138
- },
4139
- {
4140
- "epoch": 4.322041513163806,
4141
- "grad_norm": 3.5609164237976074,
4142
- "learning_rate": 4.908391444722787e-05,
4143
- "loss": 0.5803,
4144
- "step": 96200
4145
- },
4146
- {
4147
- "epoch": 4.331027046455207,
4148
- "grad_norm": 3.427290201187134,
4149
- "learning_rate": 4.908012476003239e-05,
4150
- "loss": 0.554,
4151
- "step": 96400
4152
- },
4153
- {
4154
- "epoch": 4.340012579746608,
4155
- "grad_norm": 52.728878021240234,
4156
- "learning_rate": 4.907632739727682e-05,
4157
- "loss": 0.5962,
4158
- "step": 96600
4159
- },
4160
- {
4161
- "epoch": 4.348998113038009,
4162
- "grad_norm": 12.754006385803223,
4163
- "learning_rate": 4.907252236017159e-05,
4164
- "loss": 0.5742,
4165
- "step": 96800
4166
- },
4167
- {
4168
- "epoch": 4.3579836463294095,
4169
- "grad_norm": 8.12136173248291,
4170
- "learning_rate": 4.9068709649929544e-05,
4171
- "loss": 0.6085,
4172
- "step": 97000
4173
- },
4174
- {
4175
- "epoch": 4.3579836463294095,
4176
- "eval_loss": 2.6768929958343506,
4177
- "eval_runtime": 1090.8411,
4178
- "eval_samples_per_second": 9.079,
4179
- "eval_steps_per_second": 0.142,
4180
- "step": 97000
4181
- },
4182
- {
4183
- "epoch": 4.366969179620811,
4184
- "grad_norm": 5.45872688293457,
4185
- "learning_rate": 4.9064889267766e-05,
4186
- "loss": 0.5137,
4187
- "step": 97200
4188
- },
4189
- {
4190
- "epoch": 4.375954712912211,
4191
- "grad_norm": 3.9804370403289795,
4192
- "learning_rate": 4.9061061214898707e-05,
4193
- "loss": 0.5567,
4194
- "step": 97400
4195
- },
4196
- {
4197
- "epoch": 4.3849402462036124,
4198
- "grad_norm": 29.226791381835938,
4199
- "learning_rate": 4.9057225492547846e-05,
4200
- "loss": 0.5694,
4201
- "step": 97600
4202
- },
4203
- {
4204
- "epoch": 4.393925779495013,
4205
- "grad_norm": 6.9307169914245605,
4206
- "learning_rate": 4.9053382101936076e-05,
4207
- "loss": 0.5909,
4208
- "step": 97800
4209
- },
4210
- {
4211
- "epoch": 4.402911312786414,
4212
- "grad_norm": 5.833766937255859,
4213
- "learning_rate": 4.904953104428846e-05,
4214
- "loss": 0.5692,
4215
- "step": 98000
4216
- },
4217
- {
4218
- "epoch": 4.402911312786414,
4219
- "eval_loss": 2.714953660964966,
4220
- "eval_runtime": 1094.2189,
4221
- "eval_samples_per_second": 9.051,
4222
- "eval_steps_per_second": 0.142,
4223
- "step": 98000
4224
- },
4225
- {
4226
- "epoch": 4.4118968460778145,
4227
- "grad_norm": 9.674918174743652,
4228
- "learning_rate": 4.904567232083255e-05,
4229
- "loss": 0.5795,
4230
- "step": 98200
4231
- },
4232
- {
4233
- "epoch": 4.420882379369216,
4234
- "grad_norm": 17.37355613708496,
4235
- "learning_rate": 4.9041805932798295e-05,
4236
- "loss": 0.581,
4237
- "step": 98400
4238
- },
4239
- {
4240
- "epoch": 4.429867912660616,
4241
- "grad_norm": 2.3987767696380615,
4242
- "learning_rate": 4.9037931881418126e-05,
4243
- "loss": 0.5911,
4244
- "step": 98600
4245
- },
4246
- {
4247
- "epoch": 4.4388534459520175,
4248
- "grad_norm": 6.0703558921813965,
4249
- "learning_rate": 4.903405016792689e-05,
4250
- "loss": 0.6068,
4251
- "step": 98800
4252
- },
4253
- {
4254
- "epoch": 4.447838979243418,
4255
- "grad_norm": 3.4397573471069336,
4256
- "learning_rate": 4.9030160793561886e-05,
4257
- "loss": 0.5542,
4258
- "step": 99000
4259
- },
4260
- {
4261
- "epoch": 4.447838979243418,
4262
- "eval_loss": 2.6832633018493652,
4263
- "eval_runtime": 1085.7638,
4264
- "eval_samples_per_second": 9.122,
4265
- "eval_steps_per_second": 0.143,
4266
- "step": 99000
4267
- },
4268
- {
4269
- "epoch": 4.456824512534819,
4270
- "grad_norm": 1.5094788074493408,
4271
- "learning_rate": 4.902626375956287e-05,
4272
- "loss": 0.575,
4273
- "step": 99200
4274
- },
4275
- {
4276
- "epoch": 4.4658100458262195,
4277
- "grad_norm": 1.8952089548110962,
4278
- "learning_rate": 4.902235906717201e-05,
4279
- "loss": 0.5773,
4280
- "step": 99400
4281
- },
4282
- {
4283
- "epoch": 4.474795579117621,
4284
- "grad_norm": 6.439733505249023,
4285
- "learning_rate": 4.9018446717633923e-05,
4286
- "loss": 0.5653,
4287
- "step": 99600
4288
  }
4289
  ],
4290
  "logging_steps": 200,
@@ -4304,7 +4025,7 @@
4304
  "attributes": {}
4305
  }
4306
  },
4307
- "total_flos": 1.4278700945129472e+18,
4308
  "train_batch_size": 6,
4309
  "trial_name": null,
4310
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.178272980501393,
5
  "eval_steps": 1000,
6
+ "global_step": 93000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4006
  "eval_samples_per_second": 9.146,
4007
  "eval_steps_per_second": 0.143,
4008
  "step": 93000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4009
  }
4010
  ],
4011
  "logging_steps": 200,
 
4025
  "attributes": {}
4026
  }
4027
  },
4028
+ "total_flos": 1.333252196683776e+18,
4029
  "train_batch_size": 6,
4030
  "trial_name": null,
4031
  "trial_params": null
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98007a9e4d32ca734c0938db281ef1d38a942b440ffffede684c69f10d0c0ab0
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b500a70d606f699a4441b167e887b3cfdac70a199b4314772ce7e9255f0589e
3
  size 1715561468