versae commited on
Commit
f75455f
1 Parent(s): 19fc122

Step... (42001/50000 | Loss: 1.5368554592132568, Acc: 0.6858981847763062): 86%|████████████████████████ | 42934/50000 [7:12:02<3:21:33, 1.71s/it]

Browse files
Files changed (50) hide show
  1. flax_model.msgpack +1 -1
  2. outputs/checkpoints/checkpoint-20000/training_state.json +0 -1
  3. outputs/checkpoints/checkpoint-21000/training_state.json +0 -1
  4. outputs/checkpoints/checkpoint-22000/training_state.json +0 -1
  5. outputs/checkpoints/checkpoint-23000/training_state.json +0 -1
  6. outputs/checkpoints/checkpoint-24000/data_collator.joblib +0 -3
  7. outputs/checkpoints/checkpoint-24000/flax_model.msgpack +0 -3
  8. outputs/checkpoints/checkpoint-24000/optimizer_state.msgpack +0 -3
  9. outputs/checkpoints/checkpoint-24000/training_args.joblib +0 -3
  10. outputs/checkpoints/checkpoint-24000/training_state.json +0 -1
  11. outputs/checkpoints/{checkpoint-20000 → checkpoint-38000}/config.json +0 -0
  12. outputs/checkpoints/{checkpoint-20000 → checkpoint-38000}/data_collator.joblib +2 -2
  13. outputs/checkpoints/{checkpoint-21000 → checkpoint-38000}/flax_model.msgpack +1 -1
  14. outputs/checkpoints/{checkpoint-21000 → checkpoint-38000}/optimizer_state.msgpack +1 -1
  15. outputs/checkpoints/{checkpoint-22000 → checkpoint-38000}/training_args.joblib +1 -1
  16. outputs/checkpoints/checkpoint-38000/training_state.json +1 -0
  17. outputs/checkpoints/{checkpoint-21000 → checkpoint-39000}/config.json +0 -0
  18. outputs/checkpoints/{checkpoint-21000 → checkpoint-39000}/data_collator.joblib +2 -2
  19. outputs/checkpoints/{checkpoint-20000 → checkpoint-39000}/flax_model.msgpack +1 -1
  20. outputs/checkpoints/{checkpoint-22000 → checkpoint-39000}/optimizer_state.msgpack +1 -1
  21. outputs/checkpoints/{checkpoint-23000 → checkpoint-39000}/training_args.joblib +1 -1
  22. outputs/checkpoints/checkpoint-39000/training_state.json +1 -0
  23. outputs/checkpoints/{checkpoint-22000 → checkpoint-40000}/config.json +0 -0
  24. outputs/checkpoints/{checkpoint-22000 → checkpoint-40000}/data_collator.joblib +2 -2
  25. outputs/checkpoints/{checkpoint-22000 → checkpoint-40000}/flax_model.msgpack +1 -1
  26. outputs/checkpoints/{checkpoint-20000 → checkpoint-40000}/optimizer_state.msgpack +1 -1
  27. outputs/checkpoints/{checkpoint-21000 → checkpoint-40000}/training_args.joblib +1 -1
  28. outputs/checkpoints/checkpoint-40000/training_state.json +1 -0
  29. outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/config.json +0 -0
  30. outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/data_collator.joblib +2 -2
  31. outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/flax_model.msgpack +1 -1
  32. outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/optimizer_state.msgpack +1 -1
  33. outputs/checkpoints/{checkpoint-20000 → checkpoint-41000}/training_args.joblib +1 -1
  34. outputs/checkpoints/checkpoint-41000/training_state.json +1 -0
  35. outputs/checkpoints/{checkpoint-24000 → checkpoint-42000}/config.json +0 -0
  36. outputs/checkpoints/checkpoint-42000/data_collator.joblib +3 -0
  37. outputs/checkpoints/checkpoint-42000/flax_model.msgpack +3 -0
  38. outputs/checkpoints/checkpoint-42000/optimizer_state.msgpack +3 -0
  39. outputs/checkpoints/checkpoint-42000/training_args.joblib +3 -0
  40. outputs/checkpoints/checkpoint-42000/training_state.json +1 -0
  41. outputs/data_collator.joblib +2 -2
  42. outputs/events.out.tfevents.1626649897.tablespoon.3816803.3.v2 +3 -0
  43. outputs/events.out.tfevents.1626650133.tablespoon.3825201.3.v2 +3 -0
  44. outputs/flax_model.msgpack +1 -1
  45. outputs/optimizer_state.msgpack +1 -1
  46. outputs/training_args.joblib +1 -1
  47. outputs/training_state.json +1 -1
  48. pytorch_model.bin +1 -1
  49. run_stream.512.sh +3 -3
  50. run_stream_checkpoint.log +3 -0
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09beaeeb31a7c0151bd2c93e8cc32b6ea04983edc759ef4cb3c8648db5ad7730
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac36d419b795cdd68ff42020fca0fc4178e228de7917c12ff9a5bc4b9f3a3525
3
  size 249750019
outputs/checkpoints/checkpoint-20000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 20001}
 
 
outputs/checkpoints/checkpoint-21000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 21001}
 
 
outputs/checkpoints/checkpoint-22000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 22001}
 
 
outputs/checkpoints/checkpoint-23000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 23001}
 
 
outputs/checkpoints/checkpoint-24000/data_collator.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
3
- size 1471394
 
 
 
 
outputs/checkpoints/checkpoint-24000/flax_model.msgpack DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09beaeeb31a7c0151bd2c93e8cc32b6ea04983edc759ef4cb3c8648db5ad7730
3
- size 249750019
 
 
 
 
outputs/checkpoints/checkpoint-24000/optimizer_state.msgpack DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e28559bb3ef8565f8edae3fabb0dd7f897d6c713613daa2253997225190e79c4
3
- size 499500278
 
 
 
 
outputs/checkpoints/checkpoint-24000/training_args.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
3
- size 1871
 
 
 
 
outputs/checkpoints/checkpoint-24000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 24001}
 
 
outputs/checkpoints/{checkpoint-20000 → checkpoint-38000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-20000 → checkpoint-38000}/data_collator.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
3
- size 1471394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0321b1a9629e1be122045cd72470365a63c8496fec109fdeec34827f01ffbb9e
3
+ size 1471424
outputs/checkpoints/{checkpoint-21000 → checkpoint-38000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09bef80851e5190807b15c1dbe9d0270c9384ab44c3dcb4db178b85e80a71379
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b57cfed0bd1af308d57c60afcbc41332c7594791006961099fa2589551bb8b
3
  size 249750019
outputs/checkpoints/{checkpoint-21000 → checkpoint-38000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a99a82909987b205609070cca9c4c5799ca4a0b78511f099fc3a9e538161a195
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa564492e9abc3ae93bccb3376efa2894724957eebf32209d00faa14b32a07a
3
  size 499500278
outputs/checkpoints/{checkpoint-22000 → checkpoint-38000}/training_args.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
3
  size 1871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc5556c9f3bac3feeb66af493f47061448701f9ba2c8745a8a858eaeb81f3b
3
  size 1871
outputs/checkpoints/checkpoint-38000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 38001}
outputs/checkpoints/{checkpoint-21000 → checkpoint-39000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-21000 → checkpoint-39000}/data_collator.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
3
- size 1471394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0321b1a9629e1be122045cd72470365a63c8496fec109fdeec34827f01ffbb9e
3
+ size 1471424
outputs/checkpoints/{checkpoint-20000 → checkpoint-39000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc062cff226dee3ccdfbbc5682d4b2882c4f57c0a40a3f54bd16d8567298d5c0
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c51961044c08a6cc62c686aff8438567c1595b59a508657a6482722d778e60
3
  size 249750019
outputs/checkpoints/{checkpoint-22000 → checkpoint-39000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999d37700b86141a104d23f6d71b9c6d986487679ae464c42f3cd32807cfc1fe
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:309552248b5455cb24a1d5fcdd655f4ea19b92b2b96453b3779448518e2c019d
3
  size 499500278
outputs/checkpoints/{checkpoint-23000 → checkpoint-39000}/training_args.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
3
  size 1871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc5556c9f3bac3feeb66af493f47061448701f9ba2c8745a8a858eaeb81f3b
3
  size 1871
outputs/checkpoints/checkpoint-39000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 39001}
outputs/checkpoints/{checkpoint-22000 → checkpoint-40000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-22000 → checkpoint-40000}/data_collator.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
3
- size 1471394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0321b1a9629e1be122045cd72470365a63c8496fec109fdeec34827f01ffbb9e
3
+ size 1471424
outputs/checkpoints/{checkpoint-22000 → checkpoint-40000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456166d50c18f7f3142206184d4feef24a636439fdcef000abad0390a3d059b5
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a46d681ebfe45e93c4cc28012a1feaef7abaa51286f963504b939f9a851860
3
  size 249750019
outputs/checkpoints/{checkpoint-20000 → checkpoint-40000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d1e8ec7e4b6752640d08a898bdf803649b14ffa733d5e17300d4eb26761b32c
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6662a4e279bfe7a2e99b3cc15b3fed96405ca0231e3605e8f556a9187af72b
3
  size 499500278
outputs/checkpoints/{checkpoint-21000 → checkpoint-40000}/training_args.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
3
  size 1871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc5556c9f3bac3feeb66af493f47061448701f9ba2c8745a8a858eaeb81f3b
3
  size 1871
outputs/checkpoints/checkpoint-40000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 40001}
outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/data_collator.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
3
- size 1471394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0321b1a9629e1be122045cd72470365a63c8496fec109fdeec34827f01ffbb9e
3
+ size 1471424
outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:998dbfb165df6528c9aba1a3b7f780529f9f4b1802aa9747e677cb3c0df13367
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0516a9ff720d94a52b4fab1f1693d4eaf5cca4d8d28dcc071f34fef91f79f4a6
3
  size 249750019
outputs/checkpoints/{checkpoint-23000 → checkpoint-41000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f600914b8fd791890a084de4c55f62c1bfdb83ea63fd68009fad4b9e326a7993
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9493e290beeb6e31bf944af01243d995fd53abc3d1d72b5dba6ad7e68c037369
3
  size 499500278
outputs/checkpoints/{checkpoint-20000 → checkpoint-41000}/training_args.joblib RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
3
  size 1871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc5556c9f3bac3feeb66af493f47061448701f9ba2c8745a8a858eaeb81f3b
3
  size 1871
outputs/checkpoints/checkpoint-41000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 41001}
outputs/checkpoints/{checkpoint-24000 → checkpoint-42000}/config.json RENAMED
File without changes
outputs/checkpoints/checkpoint-42000/data_collator.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0321b1a9629e1be122045cd72470365a63c8496fec109fdeec34827f01ffbb9e
3
+ size 1471424
outputs/checkpoints/checkpoint-42000/flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac36d419b795cdd68ff42020fca0fc4178e228de7917c12ff9a5bc4b9f3a3525
3
+ size 249750019
outputs/checkpoints/checkpoint-42000/optimizer_state.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfe574d2780b2d3f3a4a3a4ebf4363ec087eecfe83b009683fe3a4230d77d0c
3
+ size 499500278
outputs/checkpoints/checkpoint-42000/training_args.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc5556c9f3bac3feeb66af493f47061448701f9ba2c8745a8a858eaeb81f3b
3
+ size 1871
outputs/checkpoints/checkpoint-42000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 42001}
outputs/data_collator.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
3
- size 1471394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0321b1a9629e1be122045cd72470365a63c8496fec109fdeec34827f01ffbb9e
3
+ size 1471424
outputs/events.out.tfevents.1626649897.tablespoon.3816803.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e68788bc941966d47e46ed6bb370982bbca2905b890a16cc90a886c753ff87e
3
+ size 40
outputs/events.out.tfevents.1626650133.tablespoon.3825201.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9a74e7c890bc9c3d14d4dd6666844942a518b53c9342cd090c5f0294c0a534
3
+ size 2761686
outputs/flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09beaeeb31a7c0151bd2c93e8cc32b6ea04983edc759ef4cb3c8648db5ad7730
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac36d419b795cdd68ff42020fca0fc4178e228de7917c12ff9a5bc4b9f3a3525
3
  size 249750019
outputs/optimizer_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e28559bb3ef8565f8edae3fabb0dd7f897d6c713613daa2253997225190e79c4
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfe574d2780b2d3f3a4a3a4ebf4363ec087eecfe83b009683fe3a4230d77d0c
3
  size 499500278
outputs/training_args.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
3
  size 1871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc5556c9f3bac3feeb66af493f47061448701f9ba2c8745a8a858eaeb81f3b
3
  size 1871
outputs/training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 24001}
 
1
+ {"step": 42001}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5245e6d027e2a607f8dd81c64eeb4619c68790451c1c7486cd4a78aff54a84da
3
  size 498858859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf15e143cbdfa808d30b9b10b76bdc58f12f26fc1fe64077ee5a11e6ac9ca60a
3
  size 498858859
run_stream.512.sh CHANGED
@@ -1,12 +1,12 @@
1
  # From https://arxiv.org/pdf/1907.11692.pdf for base model
2
  python -c "import jax; print('TPUs', jax.device_count())"
3
  python ./run_mlm_flax_stream.py \
4
- --model_name_or_path="bertin-project/bertin-base-gaussian" \
5
  --output_dir="./outputs" \
6
  --model_type="roberta" \
7
  --config_name="./configs/base" \
8
  --tokenizer_name="./configs/base" \
9
- --dataset_name="versae/mc4-es-sampled" \
10
  --dataset_config_name="gaussian" \
11
  --max_seq_length="512" \
12
  --pad_to_max_length \
@@ -24,4 +24,4 @@ python ./run_mlm_flax_stream.py \
24
  --num_train_steps="50000" \
25
  --eval_steps="1000" \
26
  --dtype="bfloat16" \
27
- --logging_steps="500" 2>&1 | tee run_stream.log
 
1
  # From https://arxiv.org/pdf/1907.11692.pdf for base model
2
  python -c "import jax; print('TPUs', jax.device_count())"
3
  python ./run_mlm_flax_stream.py \
4
+ --model_name_or_path="./outputs/checkpoints/checkpoint-24000" \
5
  --output_dir="./outputs" \
6
  --model_type="roberta" \
7
  --config_name="./configs/base" \
8
  --tokenizer_name="./configs/base" \
9
+ --dataset_name="bertin-project/mc4-es-sampled" \
10
  --dataset_config_name="gaussian" \
11
  --max_seq_length="512" \
12
  --pad_to_max_length \
 
24
  --num_train_steps="50000" \
25
  --eval_steps="1000" \
26
  --dtype="bfloat16" \
27
+ --logging_steps="500" 2>&1 | tee run_stream_checkpoint.log
run_stream_checkpoint.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faae34d57263f4ac25e67dbfcd5bff6b3761c71f82a0cbf00bd0ef64982f3ec5
3
+ size 4191777