File size: 3,037 Bytes
f32e816
 
 
aa18b6c
f32e816
aa18b6c
f32e816
 
 
 
 
 
 
 
 
 
feaad17
 
 
 
 
 
 
4d19ee6
 
 
 
 
 
 
78cd5fb
 
 
 
 
 
 
ddd7d11
 
 
 
 
 
 
2520d61
 
 
 
 
 
 
de1e84b
 
 
 
 
 
 
9b3e810
 
 
 
 
 
 
198a8f0
 
 
 
 
 
 
c0f27e7
 
 
 
 
 
 
8f2a57e
 
 
 
 
 
 
84fabf0
 
 
 
 
 
 
e9e98a9
 
 
 
 
 
 
7a9c7cc
 
 
 
 
 
 
be60643
 
 
 
 
 
 
aa18b6c
 
 
 
 
 
 
f32e816
 
 
 
 
 
 
aa18b6c
f32e816
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.0023797217956488276,
  "eval_steps": 2000,
  "global_step": 3200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 8.451894760131836,
      "learning_rate": 1.9999999959757473e-05,
      "loss": 1.835,
      "step": 200
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7373883724212646,
      "learning_rate": 1.9999999832252933e-05,
      "loss": 1.6278,
      "step": 400
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.7490854263305664,
      "learning_rate": 1.9999999617416517e-05,
      "loss": 1.6314,
      "step": 600
    },
    {
      "epoch": 0.0,
      "grad_norm": 10.143038749694824,
      "learning_rate": 1.999999931524823e-05,
      "loss": 1.5416,
      "step": 800
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.783194065093994,
      "learning_rate": 1.999999892574807e-05,
      "loss": 1.5775,
      "step": 1000
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1446919441223145,
      "learning_rate": 1.9999998448916044e-05,
      "loss": 1.6922,
      "step": 1200
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.6168997287750244,
      "learning_rate": 1.9999997884752155e-05,
      "loss": 1.6211,
      "step": 1400
    },
    {
      "epoch": 0.0,
      "grad_norm": 4.068266868591309,
      "learning_rate": 1.9999997233256404e-05,
      "loss": 1.6001,
      "step": 1600
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.046320676803589,
      "learning_rate": 1.9999996494428805e-05,
      "loss": 1.5682,
      "step": 1800
    },
    {
      "epoch": 0.0,
      "grad_norm": 4.574249267578125,
      "learning_rate": 1.9999995668269356e-05,
      "loss": 1.5658,
      "step": 2000
    },
    {
      "epoch": 0.0,
      "grad_norm": 4.401742935180664,
      "learning_rate": 1.999999475956276e-05,
      "loss": 1.6152,
      "step": 2200
    },
    {
      "epoch": 0.0,
      "grad_norm": 4.141517162322998,
      "learning_rate": 1.9999993759176304e-05,
      "loss": 1.564,
      "step": 2400
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8213422298431396,
      "learning_rate": 1.9999992671458023e-05,
      "loss": 1.5586,
      "step": 2600
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.3063032627105713,
      "learning_rate": 1.999999149640793e-05,
      "loss": 1.6118,
      "step": 2800
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.5887880325317383,
      "learning_rate": 1.9999990234026036e-05,
      "loss": 1.586,
      "step": 3000
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.8140385150909424,
      "learning_rate": 1.9999988884312347e-05,
      "loss": 1.6221,
      "step": 3200
    }
  ],
  "logging_steps": 200,
  "max_steps": 6723475,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 200,
  "total_flos": 4.180089275793408e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}