File size: 2,072 Bytes
8f19dbd
 
0387d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f19dbd
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
{
    "epoch": 1.9986120749479528,
    "eval_alpha_0_uf": 0.5116097927093506,
    "eval_alpha_1_rlced_conifer": 0.4883902370929718,
    "eval_excess_loss": 0.03742467654742917,
    "eval_logits/chosen": 1.9375910758972168,
    "eval_logits/chosen_0_uf": 3.0999701023101807,
    "eval_logits/chosen_1_rlced_conifer": 1.572981357574463,
    "eval_logits/rejected": 3.877798080444336,
    "eval_logits/rejected_0_uf": 5.175272464752197,
    "eval_logits/rejected_1_rlced_conifer": 3.567551374435425,
    "eval_logps/chosen": -677.5836791992188,
    "eval_logps/chosen_0_uf": -497.45623779296875,
    "eval_logps/chosen_1_rlced_conifer": -728.9337158203125,
    "eval_logps/rejected": -1262.6171875,
    "eval_logps/rejected_0_uf": -584.9677124023438,
    "eval_logps/rejected_1_rlced_conifer": -1451.01318359375,
    "eval_loss": 0.23950526118278503,
    "eval_rewards/accuracies": 0.8778195381164551,
    "eval_rewards/accuracies_0_uf": 0.7632744312286377,
    "eval_rewards/accuracies_1_rlced_conifer": 0.9097297191619873,
    "eval_rewards/chosen": -2.8511428833007812,
    "eval_rewards/chosen_0_uf": -2.081967353820801,
    "eval_rewards/chosen_1_rlced_conifer": -3.0535335540771484,
    "eval_rewards/margins": 5.737672805786133,
    "eval_rewards/margins_0_uf": 1.3516454696655273,
    "eval_rewards/margins_1_rlced_conifer": 6.98124361038208,
    "eval_rewards/rejected": -8.588815689086914,
    "eval_rewards/rejected_0_uf": -3.433612823486328,
    "eval_rewards/rejected_1_rlced_conifer": -10.03477668762207,
    "eval_runtime": 389.2096,
    "eval_samples": 8491,
    "eval_samples_per_second": 21.816,
    "eval_steps_per_second": 0.342,
    "eval_task_excess_loss_0_uf": 0.0723980620206721,
    "eval_task_excess_loss_1_rlced_conifer": 0.04272522438225138,
    "eval_task_loss_0_uf": 0.5184707641601562,
    "eval_task_loss_1_rlced_conifer": 0.17867246270179749,
    "total_flos": 0.0,
    "train_loss": 0.14047848768532276,
    "train_runtime": 41218.966,
    "train_samples": 184443,
    "train_samples_per_second": 8.949,
    "train_steps_per_second": 0.035
}