Federic commited on
Commit
501e5bb
1 Parent(s): dde74fb

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b225360958a382ec287517387a6abacab0f1d2394a362e9767f84473949b3e7a
3
  size 2931614832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e94919c47ebaeadb6c69c16dcd15eca4096d2abff60da1c0afcb24468fee0170
3
  size 2931614832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25d9ef7e070c77b5aeefa9d8a3f85c0f430cf64b32a2830c48ac368bfb96a923
3
  size 628648428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22950a76deed4e24012e84caba3a000b5362c18fb8ecb0429d861bb63952b74f
3
  size 628648428
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376a71289f7848b2a2e275814c62ab7090b1b8276e7e80bba0894efbd9bf880b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12e2bb6bd1f3ad57589a75f2df06bbf4676ff5120e4773d51e19acf33cd8dce5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9,
5
  "eval_steps": 500,
6
- "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1357,13 +1357,163 @@
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.4422,
1359
  "step": 225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 1,
1365
  "save_steps": 25,
1366
- "total_flos": 1.3393660247801856e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.4422,
1359
  "step": 225
1360
+ },
1361
+ {
1362
+ "epoch": 0.9,
1363
+ "learning_rate": 0.0002,
1364
+ "loss": 0.5137,
1365
+ "step": 226
1366
+ },
1367
+ {
1368
+ "epoch": 0.91,
1369
+ "learning_rate": 0.0002,
1370
+ "loss": 0.4991,
1371
+ "step": 227
1372
+ },
1373
+ {
1374
+ "epoch": 0.91,
1375
+ "learning_rate": 0.0002,
1376
+ "loss": 0.4743,
1377
+ "step": 228
1378
+ },
1379
+ {
1380
+ "epoch": 0.92,
1381
+ "learning_rate": 0.0002,
1382
+ "loss": 0.5094,
1383
+ "step": 229
1384
+ },
1385
+ {
1386
+ "epoch": 0.92,
1387
+ "learning_rate": 0.0002,
1388
+ "loss": 0.4255,
1389
+ "step": 230
1390
+ },
1391
+ {
1392
+ "epoch": 0.92,
1393
+ "learning_rate": 0.0002,
1394
+ "loss": 0.5178,
1395
+ "step": 231
1396
+ },
1397
+ {
1398
+ "epoch": 0.93,
1399
+ "learning_rate": 0.0002,
1400
+ "loss": 0.45,
1401
+ "step": 232
1402
+ },
1403
+ {
1404
+ "epoch": 0.93,
1405
+ "learning_rate": 0.0002,
1406
+ "loss": 0.4507,
1407
+ "step": 233
1408
+ },
1409
+ {
1410
+ "epoch": 0.94,
1411
+ "learning_rate": 0.0002,
1412
+ "loss": 0.4985,
1413
+ "step": 234
1414
+ },
1415
+ {
1416
+ "epoch": 0.94,
1417
+ "learning_rate": 0.0002,
1418
+ "loss": 0.4585,
1419
+ "step": 235
1420
+ },
1421
+ {
1422
+ "epoch": 0.94,
1423
+ "learning_rate": 0.0002,
1424
+ "loss": 0.4703,
1425
+ "step": 236
1426
+ },
1427
+ {
1428
+ "epoch": 0.95,
1429
+ "learning_rate": 0.0002,
1430
+ "loss": 0.4126,
1431
+ "step": 237
1432
+ },
1433
+ {
1434
+ "epoch": 0.95,
1435
+ "learning_rate": 0.0002,
1436
+ "loss": 0.4401,
1437
+ "step": 238
1438
+ },
1439
+ {
1440
+ "epoch": 0.96,
1441
+ "learning_rate": 0.0002,
1442
+ "loss": 0.3744,
1443
+ "step": 239
1444
+ },
1445
+ {
1446
+ "epoch": 0.96,
1447
+ "learning_rate": 0.0002,
1448
+ "loss": 0.4532,
1449
+ "step": 240
1450
+ },
1451
+ {
1452
+ "epoch": 0.96,
1453
+ "learning_rate": 0.0002,
1454
+ "loss": 0.4043,
1455
+ "step": 241
1456
+ },
1457
+ {
1458
+ "epoch": 0.97,
1459
+ "learning_rate": 0.0002,
1460
+ "loss": 0.3634,
1461
+ "step": 242
1462
+ },
1463
+ {
1464
+ "epoch": 0.97,
1465
+ "learning_rate": 0.0002,
1466
+ "loss": 0.4066,
1467
+ "step": 243
1468
+ },
1469
+ {
1470
+ "epoch": 0.98,
1471
+ "learning_rate": 0.0002,
1472
+ "loss": 0.4081,
1473
+ "step": 244
1474
+ },
1475
+ {
1476
+ "epoch": 0.98,
1477
+ "learning_rate": 0.0002,
1478
+ "loss": 0.4268,
1479
+ "step": 245
1480
+ },
1481
+ {
1482
+ "epoch": 0.98,
1483
+ "learning_rate": 0.0002,
1484
+ "loss": 0.3535,
1485
+ "step": 246
1486
+ },
1487
+ {
1488
+ "epoch": 0.99,
1489
+ "learning_rate": 0.0002,
1490
+ "loss": 0.34,
1491
+ "step": 247
1492
+ },
1493
+ {
1494
+ "epoch": 0.99,
1495
+ "learning_rate": 0.0002,
1496
+ "loss": 0.3733,
1497
+ "step": 248
1498
+ },
1499
+ {
1500
+ "epoch": 1.0,
1501
+ "learning_rate": 0.0002,
1502
+ "loss": 0.41,
1503
+ "step": 249
1504
+ },
1505
+ {
1506
+ "epoch": 1.0,
1507
+ "learning_rate": 0.0002,
1508
+ "loss": 0.3494,
1509
+ "step": 250
1510
  }
1511
  ],
1512
  "logging_steps": 1,
1513
  "max_steps": 250,
1514
  "num_train_epochs": 1,
1515
  "save_steps": 25,
1516
+ "total_flos": 1.4689489038114816e+16,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }