File size: 2,105 Bytes
9af20ca
0829b32
 
9af20ca
 
 
 
 
 
 
 
 
0829b32
 
9af20ca
 
 
 
0829b32
 
 
 
 
9af20ca
 
 
 
0829b32
 
9af20ca
 
 
 
0829b32
 
 
 
 
9af20ca
 
 
 
0829b32
 
9af20ca
 
 
 
0829b32
 
 
 
 
9af20ca
 
 
 
0829b32
 
9af20ca
 
 
 
0829b32
 
 
 
 
9af20ca
 
 
 
0829b32
9af20ca
0829b32
9af20ca
0829b32
9af20ca
 
 
0829b32
 
9af20ca
0829b32
9af20ca
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
{
  "best_metric": 0.8163265306122449,
  "best_model_checkpoint": "rubert-tiny2-finetuned-classification\\run-2\\checkpoint-2200",
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 2200,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.91,
      "learning_rate": 7.123150895957912e-06,
      "loss": 2.1151,
      "step": 500
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.6326530612244898,
      "eval_loss": 1.7264677286148071,
      "eval_runtime": 1.125,
      "eval_samples_per_second": 217.778,
      "eval_steps_per_second": 14.222,
      "step": 550
    },
    {
      "epoch": 1.82,
      "learning_rate": 5.540228474633931e-06,
      "loss": 1.5823,
      "step": 1000
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.7755102040816326,
      "eval_loss": 1.2948896884918213,
      "eval_runtime": 1.575,
      "eval_samples_per_second": 155.555,
      "eval_steps_per_second": 10.159,
      "step": 1100
    },
    {
      "epoch": 2.73,
      "learning_rate": 3.9573060533099506e-06,
      "loss": 1.2756,
      "step": 1500
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.8,
      "eval_loss": 1.0647388696670532,
      "eval_runtime": 2.6287,
      "eval_samples_per_second": 93.202,
      "eval_steps_per_second": 6.087,
      "step": 1650
    },
    {
      "epoch": 3.64,
      "learning_rate": 2.37438363198597e-06,
      "loss": 1.0916,
      "step": 2000
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.8163265306122449,
      "eval_loss": 0.9502941966056824,
      "eval_runtime": 1.215,
      "eval_samples_per_second": 201.646,
      "eval_steps_per_second": 13.169,
      "step": 2200
    }
  ],
  "logging_steps": 500,
  "max_steps": 2750,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "total_flos": 3864466200000.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": {
    "learning_rate": 8.706073317281892e-06,
    "num_train_epochs": 5,
    "per_device_train_batch_size": 4,
    "seed": 1
  }
}