scottsuk0306 commited on
Commit
772110b
1 Parent(s): 473d768

Model save

Browse files
Files changed (4) hide show
  1. README.md +13 -17
  2. all_results.json +5 -10
  3. train_results.json +5 -5
  4. trainer_state.json +50 -50
README.md CHANGED
@@ -3,15 +3,11 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - sft
9
- - generated_from_trainer
10
  - trl
11
  - sft
12
  - generated_from_trainer
13
  datasets:
14
- - scottsuk0306/DepthQA
15
  model-index:
16
  - name: zephyr-7b-sft-full
17
  results: []
@@ -22,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # zephyr-7b-sft-full
24
 
25
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the scottsuk0306/DepthQA dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 0.4854
28
 
29
  ## Model description
30
 
@@ -60,16 +56,16 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss |
62
  |:-------------:|:-----:|:----:|:---------------:|
63
- | 1.1139 | 1.0 | 1 | 1.1133 |
64
- | 1.1139 | 2.0 | 2 | 1.2947 |
65
- | 1.1139 | 3.0 | 3 | 1.0839 |
66
- | 1.1139 | 4.0 | 4 | 0.8506 |
67
- | 1.0867 | 5.0 | 5 | 0.7521 |
68
- | 1.0867 | 6.0 | 6 | 0.6283 |
69
- | 1.0867 | 7.0 | 7 | 0.5677 |
70
- | 1.0867 | 8.0 | 8 | 0.5233 |
71
- | 1.0867 | 9.0 | 9 | 0.4948 |
72
- | 0.5927 | 10.0 | 10 | 0.4854 |
73
 
74
 
75
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
 
 
 
 
6
  - trl
7
  - sft
8
  - generated_from_trainer
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: zephyr-7b-sft-full
13
  results: []
 
18
 
19
  # zephyr-7b-sft-full
20
 
21
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4115
24
 
25
  ## Model description
26
 
 
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
+ | 1.1097 | 1.0 | 1 | 1.1095 |
60
+ | 1.1097 | 2.0 | 2 | 1.2840 |
61
+ | 1.1097 | 3.0 | 3 | 1.0815 |
62
+ | 1.1097 | 4.0 | 4 | 0.8740 |
63
+ | 1.0878 | 5.0 | 5 | 0.7459 |
64
+ | 1.0878 | 6.0 | 6 | 0.6034 |
65
+ | 1.0878 | 7.0 | 7 | 0.5202 |
66
+ | 1.0878 | 8.0 | 8 | 0.4572 |
67
+ | 1.0878 | 9.0 | 9 | 0.4221 |
68
+ | 0.5493 | 10.0 | 10 | 0.4115 |
69
 
70
 
71
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 0.48538458347320557,
4
- "eval_runtime": 2.6719,
5
- "eval_samples": 851,
6
- "eval_samples_per_second": 17.965,
7
- "eval_steps_per_second": 0.374,
8
  "total_flos": 8375186227200.0,
9
- "train_loss": 0.8424163699150086,
10
- "train_runtime": 304.1051,
11
- "train_samples": 851,
12
- "train_samples_per_second": 1.578,
13
- "train_steps_per_second": 0.033
14
  }
 
1
  {
2
  "epoch": 10.0,
 
 
 
 
 
3
  "total_flos": 8375186227200.0,
4
+ "train_loss": 0.8207268595695496,
5
+ "train_runtime": 309.7037,
6
+ "train_samples": 848,
7
+ "train_samples_per_second": 1.614,
8
+ "train_steps_per_second": 0.032
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 8375186227200.0,
4
- "train_loss": 0.8424163699150086,
5
- "train_runtime": 304.1051,
6
- "train_samples": 851,
7
- "train_samples_per_second": 1.578,
8
- "train_steps_per_second": 0.033
9
  }
 
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 8375186227200.0,
4
+ "train_loss": 0.8207268595695496,
5
+ "train_runtime": 309.7037,
6
+ "train_samples": 848,
7
+ "train_samples_per_second": 1.614,
8
+ "train_steps_per_second": 0.032
9
  }
trainer_state.json CHANGED
@@ -10,113 +10,113 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 18.364192038860114,
14
  "learning_rate": 1e-05,
15
- "loss": 1.1139,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 1.1133455038070679,
21
- "eval_runtime": 2.8092,
22
- "eval_samples_per_second": 17.087,
23
- "eval_steps_per_second": 0.356,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_loss": 1.2946728467941284,
29
- "eval_runtime": 2.6987,
30
- "eval_samples_per_second": 17.786,
31
- "eval_steps_per_second": 0.371,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 3.0,
36
- "eval_loss": 1.0838984251022339,
37
- "eval_runtime": 2.6695,
38
- "eval_samples_per_second": 17.981,
39
- "eval_steps_per_second": 0.375,
40
  "step": 3
41
  },
42
  {
43
  "epoch": 4.0,
44
- "eval_loss": 0.8506386280059814,
45
- "eval_runtime": 2.6924,
46
- "eval_samples_per_second": 17.828,
47
- "eval_steps_per_second": 0.371,
48
  "step": 4
49
  },
50
  {
51
  "epoch": 5.0,
52
- "grad_norm": 19.328671929154766,
53
  "learning_rate": 5.8682408883346535e-06,
54
- "loss": 1.0867,
55
  "step": 5
56
  },
57
  {
58
  "epoch": 5.0,
59
- "eval_loss": 0.7521028518676758,
60
- "eval_runtime": 2.6708,
61
- "eval_samples_per_second": 17.972,
62
- "eval_steps_per_second": 0.374,
63
  "step": 5
64
  },
65
  {
66
  "epoch": 6.0,
67
- "eval_loss": 0.628281831741333,
68
- "eval_runtime": 2.7091,
69
- "eval_samples_per_second": 17.718,
70
- "eval_steps_per_second": 0.369,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 7.0,
75
- "eval_loss": 0.5677043795585632,
76
- "eval_runtime": 2.6968,
77
- "eval_samples_per_second": 17.799,
78
- "eval_steps_per_second": 0.371,
79
  "step": 7
80
  },
81
  {
82
  "epoch": 8.0,
83
- "eval_loss": 0.5233017802238464,
84
- "eval_runtime": 2.6981,
85
- "eval_samples_per_second": 17.79,
86
- "eval_steps_per_second": 0.371,
87
  "step": 8
88
  },
89
  {
90
  "epoch": 9.0,
91
- "eval_loss": 0.4948171079158783,
92
- "eval_runtime": 2.6682,
93
- "eval_samples_per_second": 17.99,
94
- "eval_steps_per_second": 0.375,
95
  "step": 9
96
  },
97
  {
98
  "epoch": 10.0,
99
- "grad_norm": 4.900067571807948,
100
  "learning_rate": 0.0,
101
- "loss": 0.5927,
102
  "step": 10
103
  },
104
  {
105
  "epoch": 10.0,
106
- "eval_loss": 0.48538458347320557,
107
- "eval_runtime": 2.6954,
108
- "eval_samples_per_second": 17.808,
109
- "eval_steps_per_second": 0.371,
110
  "step": 10
111
  },
112
  {
113
  "epoch": 10.0,
114
  "step": 10,
115
  "total_flos": 8375186227200.0,
116
- "train_loss": 0.8424163699150086,
117
- "train_runtime": 304.1051,
118
- "train_samples_per_second": 1.578,
119
- "train_steps_per_second": 0.033
120
  }
121
  ],
122
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 17.810282459441467,
14
  "learning_rate": 1e-05,
15
+ "loss": 1.1097,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 1.1095243692398071,
21
+ "eval_runtime": 2.8023,
22
+ "eval_samples_per_second": 17.843,
23
+ "eval_steps_per_second": 0.357,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_loss": 1.2839607000350952,
29
+ "eval_runtime": 2.6692,
30
+ "eval_samples_per_second": 18.732,
31
+ "eval_steps_per_second": 0.375,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 3.0,
36
+ "eval_loss": 1.0814800262451172,
37
+ "eval_runtime": 2.6746,
38
+ "eval_samples_per_second": 18.695,
39
+ "eval_steps_per_second": 0.374,
40
  "step": 3
41
  },
42
  {
43
  "epoch": 4.0,
44
+ "eval_loss": 0.874021053314209,
45
+ "eval_runtime": 2.6724,
46
+ "eval_samples_per_second": 18.71,
47
+ "eval_steps_per_second": 0.374,
48
  "step": 4
49
  },
50
  {
51
  "epoch": 5.0,
52
+ "grad_norm": 18.20399999295951,
53
  "learning_rate": 5.8682408883346535e-06,
54
+ "loss": 1.0878,
55
  "step": 5
56
  },
57
  {
58
  "epoch": 5.0,
59
+ "eval_loss": 0.7459021210670471,
60
+ "eval_runtime": 2.6898,
61
+ "eval_samples_per_second": 18.589,
62
+ "eval_steps_per_second": 0.372,
63
  "step": 5
64
  },
65
  {
66
  "epoch": 6.0,
67
+ "eval_loss": 0.6033510565757751,
68
+ "eval_runtime": 2.6879,
69
+ "eval_samples_per_second": 18.602,
70
+ "eval_steps_per_second": 0.372,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 7.0,
75
+ "eval_loss": 0.5201848745346069,
76
+ "eval_runtime": 2.6772,
77
+ "eval_samples_per_second": 18.676,
78
+ "eval_steps_per_second": 0.374,
79
  "step": 7
80
  },
81
  {
82
  "epoch": 8.0,
83
+ "eval_loss": 0.4572071433067322,
84
+ "eval_runtime": 2.6696,
85
+ "eval_samples_per_second": 18.73,
86
+ "eval_steps_per_second": 0.375,
87
  "step": 8
88
  },
89
  {
90
  "epoch": 9.0,
91
+ "eval_loss": 0.4221082031726837,
92
+ "eval_runtime": 2.677,
93
+ "eval_samples_per_second": 18.677,
94
+ "eval_steps_per_second": 0.374,
95
  "step": 9
96
  },
97
  {
98
  "epoch": 10.0,
99
+ "grad_norm": 3.0214434738231213,
100
  "learning_rate": 0.0,
101
+ "loss": 0.5493,
102
  "step": 10
103
  },
104
  {
105
  "epoch": 10.0,
106
+ "eval_loss": 0.41154685616493225,
107
+ "eval_runtime": 2.6558,
108
+ "eval_samples_per_second": 18.827,
109
+ "eval_steps_per_second": 0.377,
110
  "step": 10
111
  },
112
  {
113
  "epoch": 10.0,
114
  "step": 10,
115
  "total_flos": 8375186227200.0,
116
+ "train_loss": 0.8207268595695496,
117
+ "train_runtime": 309.7037,
118
+ "train_samples_per_second": 1.614,
119
+ "train_steps_per_second": 0.032
120
  }
121
  ],
122
  "logging_steps": 5,