Federic commited on
Commit
f1f20fd
1 Parent(s): 8bf492d

Training in progress, step 225, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6052060fee70b746dfbacd8af991cb069c6e2d6f47894a3b7a85456b4f5a015
3
  size 2931614832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b225360958a382ec287517387a6abacab0f1d2394a362e9767f84473949b3e7a
3
  size 2931614832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1299814310ef8143db69c7e0f57933728708c9587aea349f43769e11720550c5
3
  size 628648428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25d9ef7e070c77b5aeefa9d8a3f85c0f430cf64b32a2830c48ac368bfb96a923
3
  size 628648428
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f872f30db80f4bb445041adea1f9bfc526c9ed631be5b44711cea34fecc28076
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376a71289f7848b2a2e275814c62ab7090b1b8276e7e80bba0894efbd9bf880b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859ff0676471245c9481ca25d6d6778d1c7963c39b7877af46bb8ca30a9ead21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,13 +1207,163 @@
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.4644,
1209
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 1,
1213
  "max_steps": 250,
1214
  "num_train_epochs": 1,
1215
  "save_steps": 25,
1216
- "total_flos": 1.1759299247996928e+16,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9,
5
  "eval_steps": 500,
6
+ "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.4644,
1209
  "step": 200
1210
+ },
1211
+ {
1212
+ "epoch": 0.8,
1213
+ "learning_rate": 0.0002,
1214
+ "loss": 0.6799,
1215
+ "step": 201
1216
+ },
1217
+ {
1218
+ "epoch": 0.81,
1219
+ "learning_rate": 0.0002,
1220
+ "loss": 0.7253,
1221
+ "step": 202
1222
+ },
1223
+ {
1224
+ "epoch": 0.81,
1225
+ "learning_rate": 0.0002,
1226
+ "loss": 0.8481,
1227
+ "step": 203
1228
+ },
1229
+ {
1230
+ "epoch": 0.82,
1231
+ "learning_rate": 0.0002,
1232
+ "loss": 0.662,
1233
+ "step": 204
1234
+ },
1235
+ {
1236
+ "epoch": 0.82,
1237
+ "learning_rate": 0.0002,
1238
+ "loss": 0.6507,
1239
+ "step": 205
1240
+ },
1241
+ {
1242
+ "epoch": 0.82,
1243
+ "learning_rate": 0.0002,
1244
+ "loss": 0.5734,
1245
+ "step": 206
1246
+ },
1247
+ {
1248
+ "epoch": 0.83,
1249
+ "learning_rate": 0.0002,
1250
+ "loss": 0.6026,
1251
+ "step": 207
1252
+ },
1253
+ {
1254
+ "epoch": 0.83,
1255
+ "learning_rate": 0.0002,
1256
+ "loss": 0.7017,
1257
+ "step": 208
1258
+ },
1259
+ {
1260
+ "epoch": 0.84,
1261
+ "learning_rate": 0.0002,
1262
+ "loss": 0.6172,
1263
+ "step": 209
1264
+ },
1265
+ {
1266
+ "epoch": 0.84,
1267
+ "learning_rate": 0.0002,
1268
+ "loss": 0.5782,
1269
+ "step": 210
1270
+ },
1271
+ {
1272
+ "epoch": 0.84,
1273
+ "learning_rate": 0.0002,
1274
+ "loss": 0.5805,
1275
+ "step": 211
1276
+ },
1277
+ {
1278
+ "epoch": 0.85,
1279
+ "learning_rate": 0.0002,
1280
+ "loss": 0.6031,
1281
+ "step": 212
1282
+ },
1283
+ {
1284
+ "epoch": 0.85,
1285
+ "learning_rate": 0.0002,
1286
+ "loss": 0.5795,
1287
+ "step": 213
1288
+ },
1289
+ {
1290
+ "epoch": 0.86,
1291
+ "learning_rate": 0.0002,
1292
+ "loss": 0.6324,
1293
+ "step": 214
1294
+ },
1295
+ {
1296
+ "epoch": 0.86,
1297
+ "learning_rate": 0.0002,
1298
+ "loss": 0.5426,
1299
+ "step": 215
1300
+ },
1301
+ {
1302
+ "epoch": 0.86,
1303
+ "learning_rate": 0.0002,
1304
+ "loss": 0.5527,
1305
+ "step": 216
1306
+ },
1307
+ {
1308
+ "epoch": 0.87,
1309
+ "learning_rate": 0.0002,
1310
+ "loss": 0.6277,
1311
+ "step": 217
1312
+ },
1313
+ {
1314
+ "epoch": 0.87,
1315
+ "learning_rate": 0.0002,
1316
+ "loss": 0.5427,
1317
+ "step": 218
1318
+ },
1319
+ {
1320
+ "epoch": 0.88,
1321
+ "learning_rate": 0.0002,
1322
+ "loss": 0.5526,
1323
+ "step": 219
1324
+ },
1325
+ {
1326
+ "epoch": 0.88,
1327
+ "learning_rate": 0.0002,
1328
+ "loss": 0.5193,
1329
+ "step": 220
1330
+ },
1331
+ {
1332
+ "epoch": 0.88,
1333
+ "learning_rate": 0.0002,
1334
+ "loss": 0.5112,
1335
+ "step": 221
1336
+ },
1337
+ {
1338
+ "epoch": 0.89,
1339
+ "learning_rate": 0.0002,
1340
+ "loss": 0.5111,
1341
+ "step": 222
1342
+ },
1343
+ {
1344
+ "epoch": 0.89,
1345
+ "learning_rate": 0.0002,
1346
+ "loss": 0.5119,
1347
+ "step": 223
1348
+ },
1349
+ {
1350
+ "epoch": 0.9,
1351
+ "learning_rate": 0.0002,
1352
+ "loss": 0.5078,
1353
+ "step": 224
1354
+ },
1355
+ {
1356
+ "epoch": 0.9,
1357
+ "learning_rate": 0.0002,
1358
+ "loss": 0.4422,
1359
+ "step": 225
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 1,
1365
  "save_steps": 25,
1366
+ "total_flos": 1.3393660247801856e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }