diyali95916 commited on
Commit
edf51ed
1 Parent(s): 2567823

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6874
19
- - Rewards/chosen: 0.0803
20
- - Rewards/rejected: 0.0298
21
- - Rewards/accuracies: 1.0
22
- - Rewards/margins: 0.0505
23
- - Logps/rejected: -101.0604
24
- - Logps/chosen: -102.9630
25
- - Logits/rejected: -2.2160
26
- - Logits/chosen: -2.1724
27
 
28
  ## Model description
29
 
@@ -60,10 +60,9 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6931 | 0.8 | 1 | 0.6931 | 0.0 | 0.0 | 0.0 | 0.0 | -101.3584 | -103.7664 | -2.2157 | -2.1717 |
64
- | 0.6931 | 1.6 | 2 | 0.6948 | 0.0296 | 0.0079 | 0.5 | 0.0217 | -101.2790 | -103.4700 | -2.2147 | -2.1715 |
65
- | 0.6931 | 2.4 | 3 | 0.6913 | 0.0277 | 0.0090 | 0.75 | 0.0188 | -101.2689 | -103.4891 | -2.2153 | -2.1709 |
66
- | 0.6931 | 4.0 | 5 | 0.6874 | 0.0803 | 0.0298 | 1.0 | 0.0505 | -101.0604 | -102.9630 | -2.2160 | -2.1724 |
67
 
68
 
69
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6843
19
+ - Rewards/chosen: 0.0440
20
+ - Rewards/rejected: 0.0071
21
+ - Rewards/accuracies: 0.5
22
+ - Rewards/margins: 0.0369
23
+ - Logps/rejected: -132.8740
24
+ - Logps/chosen: -190.5722
25
+ - Logits/rejected: -2.2999
26
+ - Logits/chosen: -2.2747
27
 
28
  ## Model description
29
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6931 | 0.55 | 1 | 0.6931 | 0.0 | 0.0 | 0.0 | 0.0 | -132.9451 | -191.0126 | -2.3015 | -2.2762 |
64
+ | 0.6931 | 1.66 | 3 | 0.6928 | 0.0185 | -0.0111 | 0.5 | 0.0296 | -133.0566 | -190.8279 | -2.3016 | -2.2755 |
65
+ | 0.6931 | 2.76 | 5 | 0.6843 | 0.0440 | 0.0071 | 0.5 | 0.0369 | -132.8740 | -190.5722 | -2.2999 | -2.2747 |
 
66
 
67
 
68
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "k_proj",
20
  "q_proj",
21
- "o_proj",
22
- "v_proj"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "q_proj",
20
+ "k_proj",
21
+ "v_proj",
22
+ "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c85a9785f9eb434dc9c53abbc8850cebbf70813cbc5cdaaee5b484364b861dc6
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb25de8d283b4d3824b2d6f79e594ba446eb8d6e26b9965b1a4a2302946c695
3
  size 109086672
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -2.1724095344543457,
4
- "eval_logits/rejected": -2.2160496711730957,
5
- "eval_logps/chosen": -102.96302795410156,
6
- "eval_logps/rejected": -101.06044006347656,
7
- "eval_loss": 0.6873850226402283,
8
- "eval_rewards/accuracies": 1.0,
9
- "eval_rewards/chosen": 0.08033924549818039,
10
- "eval_rewards/margins": 0.05054035410284996,
11
- "eval_rewards/rejected": 0.02979888767004013,
12
- "eval_runtime": 6.0716,
13
  "eval_samples": 30,
14
- "eval_samples_per_second": 4.941,
15
- "eval_steps_per_second": 0.165,
16
- "train_loss": 0.6922631859779358,
17
- "train_runtime": 883.8621,
18
- "train_samples": 626,
19
- "train_samples_per_second": 3.541,
20
- "train_steps_per_second": 0.006
21
  }
 
1
  {
2
+ "epoch": 2.76,
3
+ "eval_logits/chosen": -2.274653911590576,
4
+ "eval_logits/rejected": -2.2999308109283447,
5
+ "eval_logps/chosen": -190.57220458984375,
6
+ "eval_logps/rejected": -132.8740234375,
7
+ "eval_loss": 0.68434077501297,
8
+ "eval_rewards/accuracies": 0.5,
9
+ "eval_rewards/chosen": 0.04404526203870773,
10
+ "eval_rewards/margins": 0.03693275526165962,
11
+ "eval_rewards/rejected": 0.0071125030517578125,
12
+ "eval_runtime": 6.6846,
13
  "eval_samples": 30,
14
+ "eval_samples_per_second": 4.488,
15
+ "eval_steps_per_second": 0.15,
16
+ "train_loss": 0.6927925229072571,
17
+ "train_runtime": 943.7212,
18
+ "train_samples": 922,
19
+ "train_samples_per_second": 4.885,
20
+ "train_steps_per_second": 0.005
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -2.1724095344543457,
4
- "eval_logits/rejected": -2.2160496711730957,
5
- "eval_logps/chosen": -102.96302795410156,
6
- "eval_logps/rejected": -101.06044006347656,
7
- "eval_loss": 0.6873850226402283,
8
- "eval_rewards/accuracies": 1.0,
9
- "eval_rewards/chosen": 0.08033924549818039,
10
- "eval_rewards/margins": 0.05054035410284996,
11
- "eval_rewards/rejected": 0.02979888767004013,
12
- "eval_runtime": 6.0716,
13
  "eval_samples": 30,
14
- "eval_samples_per_second": 4.941,
15
- "eval_steps_per_second": 0.165
16
  }
 
1
  {
2
+ "epoch": 2.76,
3
+ "eval_logits/chosen": -2.274653911590576,
4
+ "eval_logits/rejected": -2.2999308109283447,
5
+ "eval_logps/chosen": -190.57220458984375,
6
+ "eval_logps/rejected": -132.8740234375,
7
+ "eval_loss": 0.68434077501297,
8
+ "eval_rewards/accuracies": 0.5,
9
+ "eval_rewards/chosen": 0.04404526203870773,
10
+ "eval_rewards/margins": 0.03693275526165962,
11
+ "eval_rewards/rejected": 0.0071125030517578125,
12
+ "eval_runtime": 6.6846,
13
  "eval_samples": 30,
14
+ "eval_samples_per_second": 4.488,
15
+ "eval_steps_per_second": 0.15
16
  }
runs/Jan28_23-26-02_jupyter-dli/events.out.tfevents.1706484442.jupyter-dli.8865.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e38ec1850ba6948e7834bd1ecf020e66cabf371e08273f7569b49604edd9d8cb
3
+ size 7537
runs/Jan28_23-26-02_jupyter-dli/events.out.tfevents.1706485392.jupyter-dli.8865.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95cc5d6cbc6f4a1c8879f422c1889917e6aebef9a6138a3db09df604f6d89c49
3
+ size 815
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "train_loss": 0.6922631859779358,
4
- "train_runtime": 883.8621,
5
- "train_samples": 626,
6
- "train_samples_per_second": 3.541,
7
- "train_steps_per_second": 0.006
8
  }
 
1
  {
2
+ "epoch": 2.76,
3
+ "train_loss": 0.6927925229072571,
4
+ "train_runtime": 943.7212,
5
+ "train_samples": 922,
6
+ "train_samples_per_second": 4.885,
7
+ "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
  "eval_steps": 100,
6
  "global_step": 5,
7
  "is_hyper_param_search": false,
@@ -9,12 +9,12 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.8,
13
  "learning_rate": 5e-07,
14
- "logits/chosen": -2.3972699642181396,
15
- "logits/rejected": -2.39332914352417,
16
- "logps/chosen": -153.26783752441406,
17
- "logps/rejected": -146.77935791015625,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
@@ -23,77 +23,61 @@
23
  "step": 1
24
  },
25
  {
26
- "epoch": 0.8,
27
- "eval_logits/chosen": -2.1716835498809814,
28
- "eval_logits/rejected": -2.2157046794891357,
29
- "eval_logps/chosen": -103.76641845703125,
30
- "eval_logps/rejected": -101.35842895507812,
31
  "eval_loss": 0.6931473016738892,
32
  "eval_rewards/accuracies": 0.0,
33
  "eval_rewards/chosen": 0.0,
34
  "eval_rewards/margins": 0.0,
35
  "eval_rewards/rejected": 0.0,
36
- "eval_runtime": 6.5465,
37
- "eval_samples_per_second": 4.583,
38
- "eval_steps_per_second": 0.153,
39
  "step": 1
40
  },
41
  {
42
- "epoch": 1.6,
43
- "eval_logits/chosen": -2.1715452671051025,
44
- "eval_logits/rejected": -2.2146592140197754,
45
- "eval_logps/chosen": -103.4699935913086,
46
- "eval_logps/rejected": -101.27898406982422,
47
- "eval_loss": 0.694814920425415,
48
  "eval_rewards/accuracies": 0.5,
49
- "eval_rewards/chosen": 0.02964324876666069,
50
- "eval_rewards/margins": 0.021698763594031334,
51
- "eval_rewards/rejected": 0.007944487035274506,
52
- "eval_runtime": 6.0506,
53
- "eval_samples_per_second": 4.958,
54
- "eval_steps_per_second": 0.165,
55
- "step": 2
56
- },
57
- {
58
- "epoch": 2.4,
59
- "eval_logits/chosen": -2.170850992202759,
60
- "eval_logits/rejected": -2.215348482131958,
61
- "eval_logps/chosen": -103.48912811279297,
62
- "eval_logps/rejected": -101.26887512207031,
63
- "eval_loss": 0.6913403868675232,
64
- "eval_rewards/accuracies": 0.75,
65
- "eval_rewards/chosen": 0.027730178087949753,
66
- "eval_rewards/margins": 0.018774602562189102,
67
- "eval_rewards/rejected": 0.00895557552576065,
68
- "eval_runtime": 6.0481,
69
- "eval_samples_per_second": 4.96,
70
- "eval_steps_per_second": 0.165,
71
  "step": 3
72
  },
73
  {
74
- "epoch": 4.0,
75
- "eval_logits/chosen": -2.1724095344543457,
76
- "eval_logits/rejected": -2.2160496711730957,
77
- "eval_logps/chosen": -102.96302795410156,
78
- "eval_logps/rejected": -101.06044006347656,
79
- "eval_loss": 0.6873850226402283,
80
- "eval_rewards/accuracies": 1.0,
81
- "eval_rewards/chosen": 0.08033924549818039,
82
- "eval_rewards/margins": 0.05054035410284996,
83
- "eval_rewards/rejected": 0.02979888767004013,
84
- "eval_runtime": 6.0751,
85
- "eval_samples_per_second": 4.938,
86
- "eval_steps_per_second": 0.165,
87
  "step": 5
88
  },
89
  {
90
- "epoch": 4.0,
91
  "step": 5,
92
  "total_flos": 0.0,
93
- "train_loss": 0.6922631859779358,
94
- "train_runtime": 883.8621,
95
- "train_samples_per_second": 3.541,
96
- "train_steps_per_second": 0.006
97
  }
98
  ],
99
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7586206896551726,
5
  "eval_steps": 100,
6
  "global_step": 5,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.55,
13
  "learning_rate": 5e-07,
14
+ "logits/chosen": -2.442253589630127,
15
+ "logits/rejected": -2.4321861267089844,
16
+ "logps/chosen": -136.51828002929688,
17
+ "logps/rejected": -126.99198913574219,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
 
23
  "step": 1
24
  },
25
  {
26
+ "epoch": 0.55,
27
+ "eval_logits/chosen": -2.2761800289154053,
28
+ "eval_logits/rejected": -2.3014957904815674,
29
+ "eval_logps/chosen": -191.0126495361328,
30
+ "eval_logps/rejected": -132.9451446533203,
31
  "eval_loss": 0.6931473016738892,
32
  "eval_rewards/accuracies": 0.0,
33
  "eval_rewards/chosen": 0.0,
34
  "eval_rewards/margins": 0.0,
35
  "eval_rewards/rejected": 0.0,
36
+ "eval_runtime": 7.3415,
37
+ "eval_samples_per_second": 4.086,
38
+ "eval_steps_per_second": 0.136,
39
  "step": 1
40
  },
41
  {
42
+ "epoch": 1.66,
43
+ "eval_logits/chosen": -2.2754929065704346,
44
+ "eval_logits/rejected": -2.3016197681427,
45
+ "eval_logps/chosen": -190.82786560058594,
46
+ "eval_logps/rejected": -133.05657958984375,
47
+ "eval_loss": 0.6928330659866333,
48
  "eval_rewards/accuracies": 0.5,
49
+ "eval_rewards/chosen": 0.018478775396943092,
50
+ "eval_rewards/margins": 0.029622457921504974,
51
+ "eval_rewards/rejected": -0.011143684387207031,
52
+ "eval_runtime": 6.7101,
53
+ "eval_samples_per_second": 4.471,
54
+ "eval_steps_per_second": 0.149,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "step": 3
56
  },
57
  {
58
+ "epoch": 2.76,
59
+ "eval_logits/chosen": -2.274653911590576,
60
+ "eval_logits/rejected": -2.2999308109283447,
61
+ "eval_logps/chosen": -190.57220458984375,
62
+ "eval_logps/rejected": -132.8740234375,
63
+ "eval_loss": 0.68434077501297,
64
+ "eval_rewards/accuracies": 0.5,
65
+ "eval_rewards/chosen": 0.04404526203870773,
66
+ "eval_rewards/margins": 0.03693275526165962,
67
+ "eval_rewards/rejected": 0.0071125030517578125,
68
+ "eval_runtime": 6.6968,
69
+ "eval_samples_per_second": 4.48,
70
+ "eval_steps_per_second": 0.149,
71
  "step": 5
72
  },
73
  {
74
+ "epoch": 2.76,
75
  "step": 5,
76
  "total_flos": 0.0,
77
+ "train_loss": 0.6927925229072571,
78
+ "train_runtime": 943.7212,
79
+ "train_samples_per_second": 4.885,
80
+ "train_steps_per_second": 0.005
81
  }
82
  ],
83
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1679b26c1e5cc6b5a159fba09aee53f44a4515086460ae0d7e21b31df550a03
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937607f6ae2ef92e6f4666a16242306f8a950f78c01ae53da59393fcb7964c6d
3
  size 5752