sibstrider commited on
Commit
2ca3c1d
1 Parent(s): d95c13c

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73e4d6a6e947f36beb3343b91199b7400ab1fad9f13965ee5f2f874b0f4d0ea0
3
  size 116795404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ced358dc3bc4605895c8027d988748fb8764af9b57b5edaad008d467ac9df5
3
  size 116795404
run-2/checkpoint-1100/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0363f67352cbd2526e198d0c20d3193cdea64d07435cfaa2ea803f2af4cc753
3
  size 116795404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ced358dc3bc4605895c8027d988748fb8764af9b57b5edaad008d467ac9df5
3
  size 116795404
run-2/checkpoint-1100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f615d3633d28e1402491ae73af70416cc428deec610ad3f6c9513f9e00f23ec
3
  size 233621882
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171427067aaa2194d8a81db089c70b669189c9c98fde6c739061a0a3a2068e40
3
  size 233621882
run-2/checkpoint-1100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3180da2883c56c8e0a6a790414652de70473fb9817815bef9e488338d650da67
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ada2bb04b88d0294fe44121ff921a07d99208db29ca33d0b0f391e55a9684bc
3
  size 13990
run-2/checkpoint-1100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85e5ab558f76098b7874b0df51d8d53a08350d7818f4d448db67ff258e0c2c24
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee9d68d7ffd6e02e89ecadac88c09363aa50fa3ffbe10ea51c490d81f296210
3
  size 1064
run-2/checkpoint-1100/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8571428571428571,
3
  "best_model_checkpoint": "rubert-tiny2-finetuned-classification\\run-2\\checkpoint-1100",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,47 +10,47 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.91,
13
- "learning_rate": 1.5039435145104783e-05,
14
- "loss": 1.8268,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_accuracy": 0.7224489795918367,
20
- "eval_loss": 1.1639864444732666,
21
- "eval_runtime": 1.3165,
22
- "eval_samples_per_second": 186.096,
23
- "eval_steps_per_second": 12.153,
24
  "step": 550
25
  },
26
  {
27
  "epoch": 1.82,
28
- "learning_rate": 1.0616071867132787e-05,
29
- "loss": 1.0445,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.8571428571428571,
35
- "eval_loss": 0.730604887008667,
36
- "eval_runtime": 1.103,
37
- "eval_samples_per_second": 222.131,
38
- "eval_steps_per_second": 14.507,
39
  "step": 1100
40
  }
41
  ],
42
  "logging_steps": 500,
43
- "max_steps": 2200,
44
  "num_input_tokens_seen": 0,
45
- "num_train_epochs": 4,
46
  "save_steps": 500,
47
- "total_flos": 1918563271800.0,
48
  "train_batch_size": 4,
49
  "trial_name": null,
50
  "trial_params": {
51
- "learning_rate": 1.946279842307678e-05,
52
- "num_train_epochs": 4,
53
  "per_device_train_batch_size": 4,
54
- "seed": 23
55
  }
56
  }
 
1
  {
2
+ "best_metric": 0.7755102040816326,
3
  "best_model_checkpoint": "rubert-tiny2-finetuned-classification\\run-2\\checkpoint-1100",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.91,
13
+ "learning_rate": 7.123150895957912e-06,
14
+ "loss": 2.1151,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_accuracy": 0.6326530612244898,
20
+ "eval_loss": 1.7264677286148071,
21
+ "eval_runtime": 1.125,
22
+ "eval_samples_per_second": 217.778,
23
+ "eval_steps_per_second": 14.222,
24
  "step": 550
25
  },
26
  {
27
  "epoch": 1.82,
28
+ "learning_rate": 5.540228474633931e-06,
29
+ "loss": 1.5823,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7755102040816326,
35
+ "eval_loss": 1.2948896884918213,
36
+ "eval_runtime": 1.575,
37
+ "eval_samples_per_second": 155.555,
38
+ "eval_steps_per_second": 10.159,
39
  "step": 1100
40
  }
41
  ],
42
  "logging_steps": 500,
43
+ "max_steps": 2750,
44
  "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 5,
46
  "save_steps": 500,
47
+ "total_flos": 1945715472750.0,
48
  "train_batch_size": 4,
49
  "trial_name": null,
50
  "trial_params": {
51
+ "learning_rate": 8.706073317281892e-06,
52
+ "num_train_epochs": 5,
53
  "per_device_train_batch_size": 4,
54
+ "seed": 1
55
  }
56
  }
run-2/checkpoint-1100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24844b54c4e4c2ebe3c69bc8bd3a5158a5ee05c4aebe057006cf6ebf084d9bab
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf6249fcaffd3bf1a6e41efb7044edf4d8222cdd0729549fa8f87e1c5b20556
3
  size 4728
run-2/checkpoint-1650/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8734693877551021,
3
  "best_model_checkpoint": "rubert-tiny2-finetuned-classification\\run-2\\checkpoint-1650",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,62 +10,62 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.91,
13
- "learning_rate": 1.5039435145104783e-05,
14
- "loss": 1.8268,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_accuracy": 0.7224489795918367,
20
- "eval_loss": 1.1639864444732666,
21
- "eval_runtime": 1.3165,
22
- "eval_samples_per_second": 186.096,
23
- "eval_steps_per_second": 12.153,
24
  "step": 550
25
  },
26
  {
27
  "epoch": 1.82,
28
- "learning_rate": 1.0616071867132787e-05,
29
- "loss": 1.0445,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.8571428571428571,
35
- "eval_loss": 0.730604887008667,
36
- "eval_runtime": 1.103,
37
- "eval_samples_per_second": 222.131,
38
- "eval_steps_per_second": 14.507,
39
  "step": 1100
40
  },
41
  {
42
  "epoch": 2.73,
43
- "learning_rate": 6.192708589160794e-06,
44
- "loss": 0.7493,
45
  "step": 1500
46
  },
47
  {
48
  "epoch": 3.0,
49
- "eval_accuracy": 0.8734693877551021,
50
- "eval_loss": 0.5868724584579468,
51
- "eval_runtime": 1.1252,
52
- "eval_samples_per_second": 217.733,
53
- "eval_steps_per_second": 14.219,
54
  "step": 1650
55
  }
56
  ],
57
  "logging_steps": 500,
58
- "max_steps": 2200,
59
  "num_input_tokens_seen": 0,
60
- "num_train_epochs": 4,
61
  "save_steps": 500,
62
- "total_flos": 2922545822700.0,
63
  "train_batch_size": 4,
64
  "trial_name": null,
65
  "trial_params": {
66
- "learning_rate": 1.946279842307678e-05,
67
- "num_train_epochs": 4,
68
  "per_device_train_batch_size": 4,
69
- "seed": 23
70
  }
71
  }
 
1
  {
2
+ "best_metric": 0.8,
3
  "best_model_checkpoint": "rubert-tiny2-finetuned-classification\\run-2\\checkpoint-1650",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.91,
13
+ "learning_rate": 7.123150895957912e-06,
14
+ "loss": 2.1151,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_accuracy": 0.6326530612244898,
20
+ "eval_loss": 1.7264677286148071,
21
+ "eval_runtime": 1.125,
22
+ "eval_samples_per_second": 217.778,
23
+ "eval_steps_per_second": 14.222,
24
  "step": 550
25
  },
26
  {
27
  "epoch": 1.82,
28
+ "learning_rate": 5.540228474633931e-06,
29
+ "loss": 1.5823,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7755102040816326,
35
+ "eval_loss": 1.2948896884918213,
36
+ "eval_runtime": 1.575,
37
+ "eval_samples_per_second": 155.555,
38
+ "eval_steps_per_second": 10.159,
39
  "step": 1100
40
  },
41
  {
42
  "epoch": 2.73,
43
+ "learning_rate": 3.9573060533099506e-06,
44
+ "loss": 1.2756,
45
  "step": 1500
46
  },
47
  {
48
  "epoch": 3.0,
49
+ "eval_accuracy": 0.8,
50
+ "eval_loss": 1.0647388696670532,
51
+ "eval_runtime": 2.6287,
52
+ "eval_samples_per_second": 93.202,
53
+ "eval_steps_per_second": 6.087,
54
  "step": 1650
55
  }
56
  ],
57
  "logging_steps": 500,
58
+ "max_steps": 2750,
59
  "num_input_tokens_seen": 0,
60
+ "num_train_epochs": 5,
61
  "save_steps": 500,
62
+ "total_flos": 2905718091150.0,
63
  "train_batch_size": 4,
64
  "trial_name": null,
65
  "trial_params": {
66
+ "learning_rate": 8.706073317281892e-06,
67
+ "num_train_epochs": 5,
68
  "per_device_train_batch_size": 4,
69
+ "seed": 1
70
  }
71
  }
runs/Jan15_00-51-52_DESKTOP-RFEED41/events.out.tfevents.1705269786.DESKTOP-RFEED41.20976.18 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4578bfa83675a3cce33be98262b633d695f981f70048612e5dda786f2becb1cc
3
- size 5430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335d341f4f46113a5629ed21e06e5ea529e9c101dd7cacbc2d2cd91116247770
3
+ size 6390