MohamedAhmedAE commited on
Commit
8316459
1 Parent(s): 095e0d2

Training in progress, step 97800

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2b71a1693c4833d0a4fa56ef72c0a86bde99cc4cd84faa870ab826523bd961
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec0c1d18defd241431f12919f56311e5a5a2b33ee98aff0675e2d1054db73a3
3
  size 1715561468
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51c0349e11e2f94819ee46dbe2e04d04cc011a97f9f2124c8be293defb0a206a
3
  size 3431474364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1ec0449a02acf3e70b6f1fd6fbb2ea2224f1607e2a52b78112be63d1a623ec3
3
  size 3431474364
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be13e37a24a07e10fc68f5c2cd4ef1d31eaee86b299d03fbc1b4e1c4f98603ae
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f22dde64bed4a150bba9fae975b5ff2db4d79f2cc7c992c6da344d97f930b61
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d8a59bea0974691465530372450279c1418d6621c1543f23f4ad569da87f652
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72114cb302bd561c7ea851b1b8082add5139795f45804ba466465e1f30e93de9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.3849402462036124,
5
  "eval_steps": 1000,
6
- "global_step": 97600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4006,199 +4006,6 @@
4006
  "eval_samples_per_second": 9.146,
4007
  "eval_steps_per_second": 0.143,
4008
  "step": 93000
4009
- },
4010
- {
4011
- "epoch": 4.187258513792794,
4012
- "grad_norm": 18.494911193847656,
4013
- "learning_rate": 4.913983787235996e-05,
4014
- "loss": 0.5905,
4015
- "step": 93200
4016
- },
4017
- {
4018
- "epoch": 4.196244047084194,
4019
- "grad_norm": 4.566243648529053,
4020
- "learning_rate": 4.913616346215057e-05,
4021
- "loss": 0.5712,
4022
- "step": 93400
4023
- },
4024
- {
4025
- "epoch": 4.205229580375596,
4026
- "grad_norm": 5.748531818389893,
4027
- "learning_rate": 4.9132481358518735e-05,
4028
- "loss": 0.558,
4029
- "step": 93600
4030
- },
4031
- {
4032
- "epoch": 4.214215113666996,
4033
- "grad_norm": 3.77885365486145,
4034
- "learning_rate": 4.9128791562638096e-05,
4035
- "loss": 0.5927,
4036
- "step": 93800
4037
- },
4038
- {
4039
- "epoch": 4.223200646958397,
4040
- "grad_norm": 2.6284022331237793,
4041
- "learning_rate": 4.9125094075684805e-05,
4042
- "loss": 0.5953,
4043
- "step": 94000
4044
- },
4045
- {
4046
- "epoch": 4.223200646958397,
4047
- "eval_loss": 2.712245225906372,
4048
- "eval_runtime": 1088.8302,
4049
- "eval_samples_per_second": 9.096,
4050
- "eval_steps_per_second": 0.142,
4051
- "step": 94000
4052
- },
4053
- {
4054
- "epoch": 4.232186180249798,
4055
- "grad_norm": 5.8867645263671875,
4056
- "learning_rate": 4.9121388898837415e-05,
4057
- "loss": 0.5895,
4058
- "step": 94200
4059
- },
4060
- {
4061
- "epoch": 4.241171713541199,
4062
- "grad_norm": 6.118598937988281,
4063
- "learning_rate": 4.911767603327698e-05,
4064
- "loss": 0.6138,
4065
- "step": 94400
4066
- },
4067
- {
4068
- "epoch": 4.250157246832599,
4069
- "grad_norm": 7.058086395263672,
4070
- "learning_rate": 4.911395548018696e-05,
4071
- "loss": 0.5921,
4072
- "step": 94600
4073
- },
4074
- {
4075
- "epoch": 4.259142780124001,
4076
- "grad_norm": 6.587648391723633,
4077
- "learning_rate": 4.911022724075329e-05,
4078
- "loss": 0.5778,
4079
- "step": 94800
4080
- },
4081
- {
4082
- "epoch": 4.268128313415401,
4083
- "grad_norm": 1.6069397926330566,
4084
- "learning_rate": 4.910649131616435e-05,
4085
- "loss": 0.6262,
4086
- "step": 95000
4087
- },
4088
- {
4089
- "epoch": 4.268128313415401,
4090
- "eval_loss": 2.6547911167144775,
4091
- "eval_runtime": 1085.8261,
4092
- "eval_samples_per_second": 9.121,
4093
- "eval_steps_per_second": 0.143,
4094
- "step": 95000
4095
- },
4096
- {
4097
- "epoch": 4.277113846706802,
4098
- "grad_norm": 6.686661243438721,
4099
- "learning_rate": 4.910274770761096e-05,
4100
- "loss": 0.5864,
4101
- "step": 95200
4102
- },
4103
- {
4104
- "epoch": 4.286099379998203,
4105
- "grad_norm": 7.897719860076904,
4106
- "learning_rate": 4.909899641628641e-05,
4107
- "loss": 0.5884,
4108
- "step": 95400
4109
- },
4110
- {
4111
- "epoch": 4.295084913289604,
4112
- "grad_norm": 7.400073528289795,
4113
- "learning_rate": 4.9095237443386435e-05,
4114
- "loss": 0.6021,
4115
- "step": 95600
4116
- },
4117
- {
4118
- "epoch": 4.3040704465810045,
4119
- "grad_norm": 4.220474720001221,
4120
- "learning_rate": 4.9091470790109196e-05,
4121
- "loss": 0.5518,
4122
- "step": 95800
4123
- },
4124
- {
4125
- "epoch": 4.313055979872406,
4126
- "grad_norm": 1.6574774980545044,
4127
- "learning_rate": 4.908769645765532e-05,
4128
- "loss": 0.5867,
4129
- "step": 96000
4130
- },
4131
- {
4132
- "epoch": 4.313055979872406,
4133
- "eval_loss": 2.691925525665283,
4134
- "eval_runtime": 1089.0317,
4135
- "eval_samples_per_second": 9.094,
4136
- "eval_steps_per_second": 0.142,
4137
- "step": 96000
4138
- },
4139
- {
4140
- "epoch": 4.322041513163806,
4141
- "grad_norm": 3.5609164237976074,
4142
- "learning_rate": 4.908391444722787e-05,
4143
- "loss": 0.5803,
4144
- "step": 96200
4145
- },
4146
- {
4147
- "epoch": 4.331027046455207,
4148
- "grad_norm": 3.427290201187134,
4149
- "learning_rate": 4.908012476003239e-05,
4150
- "loss": 0.554,
4151
- "step": 96400
4152
- },
4153
- {
4154
- "epoch": 4.340012579746608,
4155
- "grad_norm": 52.728878021240234,
4156
- "learning_rate": 4.907632739727682e-05,
4157
- "loss": 0.5962,
4158
- "step": 96600
4159
- },
4160
- {
4161
- "epoch": 4.348998113038009,
4162
- "grad_norm": 12.754006385803223,
4163
- "learning_rate": 4.907252236017159e-05,
4164
- "loss": 0.5742,
4165
- "step": 96800
4166
- },
4167
- {
4168
- "epoch": 4.3579836463294095,
4169
- "grad_norm": 8.12136173248291,
4170
- "learning_rate": 4.9068709649929544e-05,
4171
- "loss": 0.6085,
4172
- "step": 97000
4173
- },
4174
- {
4175
- "epoch": 4.3579836463294095,
4176
- "eval_loss": 2.6768929958343506,
4177
- "eval_runtime": 1090.8411,
4178
- "eval_samples_per_second": 9.079,
4179
- "eval_steps_per_second": 0.142,
4180
- "step": 97000
4181
- },
4182
- {
4183
- "epoch": 4.366969179620811,
4184
- "grad_norm": 5.45872688293457,
4185
- "learning_rate": 4.9064889267766e-05,
4186
- "loss": 0.5137,
4187
- "step": 97200
4188
- },
4189
- {
4190
- "epoch": 4.375954712912211,
4191
- "grad_norm": 3.9804370403289795,
4192
- "learning_rate": 4.9061061214898707e-05,
4193
- "loss": 0.5567,
4194
- "step": 97400
4195
- },
4196
- {
4197
- "epoch": 4.3849402462036124,
4198
- "grad_norm": 29.226791381835938,
4199
- "learning_rate": 4.9057225492547846e-05,
4200
- "loss": 0.5694,
4201
- "step": 97600
4202
  }
4203
  ],
4204
  "logging_steps": 200,
@@ -4218,7 +4025,7 @@
4218
  "attributes": {}
4219
  }
4220
  },
4221
- "total_flos": 1.3991980042616832e+18,
4222
  "train_batch_size": 6,
4223
  "trial_name": null,
4224
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.178272980501393,
5
  "eval_steps": 1000,
6
+ "global_step": 93000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4006
  "eval_samples_per_second": 9.146,
4007
  "eval_steps_per_second": 0.143,
4008
  "step": 93000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4009
  }
4010
  ],
4011
  "logging_steps": 200,
 
4025
  "attributes": {}
4026
  }
4027
  },
4028
+ "total_flos": 1.333252196683776e+18,
4029
  "train_batch_size": 6,
4030
  "trial_name": null,
4031
  "trial_params": null
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2b71a1693c4833d0a4fa56ef72c0a86bde99cc4cd84faa870ab826523bd961
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff2dae35f81010c72af01f07977963be57010375af5f3bff3c7967ef1499de0
3
  size 1715561468