Training in progress, epoch 5

Browse files

Files changed (13) hide show

model.safetensors +1 -1
run-2/checkpoint-10690/config.json +25 -0
run-2/checkpoint-10690/model.safetensors +3 -0
run-2/checkpoint-10690/optimizer.pt +3 -0
run-2/checkpoint-10690/rng_state.pth +3 -0
run-2/checkpoint-10690/scheduler.pt +3 -0
run-2/checkpoint-10690/special_tokens_map.json +7 -0
run-2/checkpoint-10690/tokenizer.json +0 -0
run-2/checkpoint-10690/tokenizer_config.json +55 -0
run-2/checkpoint-10690/trainer_state.json +218 -0
run-2/checkpoint-10690/training_args.bin +3 -0
run-2/checkpoint-10690/vocab.txt +0 -0
runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4 +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c607fa5c2cbe8d1e975d4269b9024790aaf45ba1c82c80025876d29d4c01c52
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:856d0bff7eb638267c505ff2b9f2b6bedf380c021825067b4b731de1758bf529
 size 267832560

run-2/checkpoint-10690/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-10690/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:856d0bff7eb638267c505ff2b9f2b6bedf380c021825067b4b731de1758bf529
+size 267832560

run-2/checkpoint-10690/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c670da37038a543cce7e76e00bffafb783d9de868ce1828d42b37ec2652d1bf5
+size 535727290

run-2/checkpoint-10690/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37b56eb48261dbd637b37a0cca8d4170576e5cfe95bd5515ed0073c1e84c2f93
+size 14244

run-2/checkpoint-10690/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ec5fcd10c827e1776307f548b12c853c3571b3b6047376e46ed808e44776328
+size 1064

run-2/checkpoint-10690/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-10690/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-10690/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-10690/trainer_state.json ADDED Viewed

	@@ -0,0 +1,218 @@

+{
+  "best_metric": 0.4779281382373973,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-8552",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 10690,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.23,
+      "grad_norm": 4.28505277633667,
+      "learning_rate": 3.0702893894484785e-06,
+      "loss": 0.6069,
+      "step": 500
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 9.482794761657715,
+      "learning_rate": 2.9196373094951675e-06,
+      "loss": 0.5628,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 22.521339416503906,
+      "learning_rate": 2.7689852295418565e-06,
+      "loss": 0.5565,
+      "step": 1500
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 26.7753849029541,
+      "learning_rate": 2.6183331495885454e-06,
+      "loss": 0.5184,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5730993747711182,
+      "eval_matthews_correlation": 0.3853198145814999,
+      "eval_runtime": 0.7612,
+      "eval_samples_per_second": 1370.225,
+      "eval_steps_per_second": 86.706,
+      "step": 2138
+    },
+    {
+      "epoch": 1.17,
+      "grad_norm": 17.77669334411621,
+      "learning_rate": 2.4676810696352344e-06,
+      "loss": 0.4619,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 37.4239387512207,
+      "learning_rate": 2.3170289896819234e-06,
+      "loss": 0.5014,
+      "step": 3000
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 46.75569534301758,
+      "learning_rate": 2.1663769097286124e-06,
+      "loss": 0.492,
+      "step": 3500
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 66.9134750366211,
+      "learning_rate": 2.0157248297753013e-06,
+      "loss": 0.4809,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6646500825881958,
+      "eval_matthews_correlation": 0.4691032179514943,
+      "eval_runtime": 0.8224,
+      "eval_samples_per_second": 1268.193,
+      "eval_steps_per_second": 80.25,
+      "step": 4276
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 11.169896125793457,
+      "learning_rate": 1.8650727498219905e-06,
+      "loss": 0.4934,
+      "step": 4500
+    },
+    {
+      "epoch": 2.34,
+      "grad_norm": 64.01177215576172,
+      "learning_rate": 1.7144206698686793e-06,
+      "loss": 0.4653,
+      "step": 5000
+    },
+    {
+      "epoch": 2.57,
+      "grad_norm": 0.48781171441078186,
+      "learning_rate": 1.5637685899153684e-06,
+      "loss": 0.4639,
+      "step": 5500
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 0.21301893889904022,
+      "learning_rate": 1.4131165099620574e-06,
+      "loss": 0.514,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.8122562766075134,
+      "eval_matthews_correlation": 0.44860917123689154,
+      "eval_runtime": 0.7584,
+      "eval_samples_per_second": 1375.339,
+      "eval_steps_per_second": 87.03,
+      "step": 6414
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 76.10978698730469,
+      "learning_rate": 1.2624644300087464e-06,
+      "loss": 0.5175,
+      "step": 6500
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 13.475150108337402,
+      "learning_rate": 1.1118123500554353e-06,
+      "loss": 0.4448,
+      "step": 7000
+    },
+    {
+      "epoch": 3.51,
+      "grad_norm": 0.116688072681427,
+      "learning_rate": 9.611602701021243e-07,
+      "loss": 0.4159,
+      "step": 7500
+    },
+    {
+      "epoch": 3.74,
+      "grad_norm": 0.9807508587837219,
+      "learning_rate": 8.105081901488134e-07,
+      "loss": 0.4664,
+      "step": 8000
+    },
+    {
+      "epoch": 3.98,
+      "grad_norm": 0.25140833854675293,
+      "learning_rate": 6.598561101955022e-07,
+      "loss": 0.4243,
+      "step": 8500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.8493317365646362,
+      "eval_matthews_correlation": 0.4779281382373973,
+      "eval_runtime": 0.8106,
+      "eval_samples_per_second": 1286.694,
+      "eval_steps_per_second": 81.421,
+      "step": 8552
+    },
+    {
+      "epoch": 4.21,
+      "grad_norm": 149.03529357910156,
+      "learning_rate": 5.092040302421912e-07,
+      "loss": 0.4192,
+      "step": 9000
+    },
+    {
+      "epoch": 4.44,
+      "grad_norm": 0.7424508929252625,
+      "learning_rate": 3.585519502888802e-07,
+      "loss": 0.4218,
+      "step": 9500
+    },
+    {
+      "epoch": 4.68,
+      "grad_norm": 11.759597778320312,
+      "learning_rate": 2.078998703355692e-07,
+      "loss": 0.469,
+      "step": 10000
+    },
+    {
+      "epoch": 4.91,
+      "grad_norm": 7.273904800415039,
+      "learning_rate": 5.7247790382258184e-08,
+      "loss": 0.4144,
+      "step": 10500
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.8652405738830566,
+      "eval_matthews_correlation": 0.4755357129573294,
+      "eval_runtime": 0.7744,
+      "eval_samples_per_second": 1346.924,
+      "eval_steps_per_second": 85.232,
+      "step": 10690
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 10690,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 171279829011432.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.2209414694017896e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 16
+  }
+}

run-2/checkpoint-10690/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88d2c3d6804ca2d9d22cb74f328c5ae8ec320f8d12a0ef15ea5ae2037f02bd85
+size 4984

run-2/checkpoint-10690/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1d3d5d136c241945fd96f4c7140c4735311875e6e9a1f29a0e85d8f1ba39a7b
-size 9938

 version https://git-lfs.github.com/spec/v1
+oid sha256:16448ef64ad6e9d2b5a5c34b0343fa5846f28d2aba72f2b75407371dd2c9dc47
+size 11049