Femboyuwu2000
commited on
Commit
•
e4f8bbb
1
Parent(s):
eb8bce9
Training in progress, step 20, checkpoint
Browse files- last-checkpoint/README.md +1 -1
- last-checkpoint/adapter_config.json +10 -2
- last-checkpoint/adapter_model.safetensors +2 -2
- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/tokenizer.json +2 -2
- last-checkpoint/tokenizer.model +3 -0
- last-checkpoint/tokenizer_config.json +7 -11
- last-checkpoint/trainer_state.json +8 -141
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
library_name: peft
|
3 |
-
base_model:
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
2 |
library_name: peft
|
3 |
+
base_model: JackFram/llama-160m
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
last-checkpoint/adapter_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path": "
|
5 |
"bias": "none",
|
6 |
"fan_in_fan_out": false,
|
7 |
"inference_mode": true,
|
@@ -20,7 +20,15 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM",
|
26 |
"use_dora": false,
|
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "JackFram/llama-160m",
|
5 |
"bias": "none",
|
6 |
"fan_in_fan_out": false,
|
7 |
"inference_mode": true,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"up_proj",
|
24 |
+
"embed_tokens",
|
25 |
+
"v_proj",
|
26 |
+
"down_proj",
|
27 |
+
"k_proj",
|
28 |
+
"lm_head",
|
29 |
+
"q_proj",
|
30 |
+
"o_proj",
|
31 |
+
"gate_proj"
|
32 |
],
|
33 |
"task_type": "CAUSAL_LM",
|
34 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec6a12bd652862c2785d139bd15d9582616e16bfdf59174630b76670fc9a963e
|
3 |
+
size 205511192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4a9363a2a673c3fe1089907dfeb0e8180df9ee7402d1e8ff77d6dd7b4aa5650
|
3 |
+
size 4644026
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed0777ad4342578dcc46ca5f5a6d5fcf48dd8094a61fe1c87b41cb2f83ff1c6a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:064312e6375f494574ea3d2e75c4bb8dc97a4b36316db34a10f092589094ee40
|
3 |
size 1064
|
last-checkpoint/tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:054b87d156d39458c2c9bc37f19d8dc373128f5545d309d1e58c83187d68113a
|
3 |
+
size 1842934
|
last-checkpoint/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
last-checkpoint/tokenizer_config.json
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
3 |
"added_tokens_decoder": {
|
4 |
"0": {
|
5 |
"content": "<unk>",
|
@@ -24,22 +25,17 @@
|
|
24 |
"rstrip": false,
|
25 |
"single_word": false,
|
26 |
"special": true
|
27 |
-
},
|
28 |
-
"3": {
|
29 |
-
"content": "<pad>",
|
30 |
-
"lstrip": false,
|
31 |
-
"normalized": false,
|
32 |
-
"rstrip": false,
|
33 |
-
"single_word": false,
|
34 |
-
"special": true
|
35 |
}
|
36 |
},
|
37 |
"bos_token": "<s>",
|
38 |
"clean_up_tokenization_spaces": false,
|
39 |
"eos_token": "</s>",
|
|
|
40 |
"model_max_length": 1000000000000000019884624838656,
|
41 |
"pad_token": "</s>",
|
42 |
"padding_side": "right",
|
43 |
-
"
|
44 |
-
"
|
|
|
|
|
45 |
}
|
|
|
1 |
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
6 |
"content": "<unk>",
|
|
|
25 |
"rstrip": false,
|
26 |
"single_word": false,
|
27 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
29 |
},
|
30 |
"bos_token": "<s>",
|
31 |
"clean_up_tokenization_spaces": false,
|
32 |
"eos_token": "</s>",
|
33 |
+
"legacy": false,
|
34 |
"model_max_length": 1000000000000000019884624838656,
|
35 |
"pad_token": "</s>",
|
36 |
"padding_side": "right",
|
37 |
+
"sp_model_kwargs": {},
|
38 |
+
"tokenizer_class": "LlamaTokenizer",
|
39 |
+
"unk_token": "<unk>",
|
40 |
+
"use_default_system_prompt": false
|
41 |
}
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,161 +1,28 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss":
|
16 |
"step": 20
|
17 |
-
},
|
18 |
-
{
|
19 |
-
"epoch": 0.0,
|
20 |
-
"grad_norm": 2.218903064727783,
|
21 |
-
"learning_rate": 6.666666666666667e-06,
|
22 |
-
"loss": 3.6917,
|
23 |
-
"step": 40
|
24 |
-
},
|
25 |
-
{
|
26 |
-
"epoch": 0.0,
|
27 |
-
"grad_norm": 0.9262466430664062,
|
28 |
-
"learning_rate": 1e-05,
|
29 |
-
"loss": 3.5828,
|
30 |
-
"step": 60
|
31 |
-
},
|
32 |
-
{
|
33 |
-
"epoch": 0.0,
|
34 |
-
"grad_norm": 2.782036542892456,
|
35 |
-
"learning_rate": 1.3166666666666665e-05,
|
36 |
-
"loss": 3.5865,
|
37 |
-
"step": 80
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"epoch": 0.0,
|
41 |
-
"grad_norm": 1.9482054710388184,
|
42 |
-
"learning_rate": 1.65e-05,
|
43 |
-
"loss": 3.3337,
|
44 |
-
"step": 100
|
45 |
-
},
|
46 |
-
{
|
47 |
-
"epoch": 0.0,
|
48 |
-
"grad_norm": 4.047863006591797,
|
49 |
-
"learning_rate": 1.9833333333333335e-05,
|
50 |
-
"loss": 3.1903,
|
51 |
-
"step": 120
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"epoch": 0.0,
|
55 |
-
"grad_norm": 3.08722186088562,
|
56 |
-
"learning_rate": 2.3166666666666666e-05,
|
57 |
-
"loss": 3.5379,
|
58 |
-
"step": 140
|
59 |
-
},
|
60 |
-
{
|
61 |
-
"epoch": 0.0,
|
62 |
-
"grad_norm": 3.540940046310425,
|
63 |
-
"learning_rate": 2.6500000000000004e-05,
|
64 |
-
"loss": 3.16,
|
65 |
-
"step": 160
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"epoch": 0.0,
|
69 |
-
"grad_norm": 5.391817092895508,
|
70 |
-
"learning_rate": 2.9833333333333335e-05,
|
71 |
-
"loss": 3.2489,
|
72 |
-
"step": 180
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"epoch": 0.0,
|
76 |
-
"grad_norm": 5.890682220458984,
|
77 |
-
"learning_rate": 3.316666666666667e-05,
|
78 |
-
"loss": 3.0499,
|
79 |
-
"step": 200
|
80 |
-
},
|
81 |
-
{
|
82 |
-
"epoch": 0.0,
|
83 |
-
"grad_norm": 6.314597129821777,
|
84 |
-
"learning_rate": 3.65e-05,
|
85 |
-
"loss": 2.8568,
|
86 |
-
"step": 220
|
87 |
-
},
|
88 |
-
{
|
89 |
-
"epoch": 0.0,
|
90 |
-
"grad_norm": 1.0859078168869019,
|
91 |
-
"learning_rate": 3.983333333333333e-05,
|
92 |
-
"loss": 2.8566,
|
93 |
-
"step": 240
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"epoch": 0.0,
|
97 |
-
"grad_norm": 4.688353538513184,
|
98 |
-
"learning_rate": 4.316666666666667e-05,
|
99 |
-
"loss": 3.0079,
|
100 |
-
"step": 260
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"epoch": 0.0,
|
104 |
-
"grad_norm": 4.502331256866455,
|
105 |
-
"learning_rate": 4.6500000000000005e-05,
|
106 |
-
"loss": 2.6839,
|
107 |
-
"step": 280
|
108 |
-
},
|
109 |
-
{
|
110 |
-
"epoch": 0.0,
|
111 |
-
"grad_norm": 8.951983451843262,
|
112 |
-
"learning_rate": 4.9833333333333336e-05,
|
113 |
-
"loss": 2.7932,
|
114 |
-
"step": 300
|
115 |
-
},
|
116 |
-
{
|
117 |
-
"epoch": 0.0,
|
118 |
-
"grad_norm": 4.788575172424316,
|
119 |
-
"learning_rate": 4.9999526661182696e-05,
|
120 |
-
"loss": 2.9341,
|
121 |
-
"step": 320
|
122 |
-
},
|
123 |
-
{
|
124 |
-
"epoch": 0.0,
|
125 |
-
"grad_norm": 7.716049671173096,
|
126 |
-
"learning_rate": 4.999800570348766e-05,
|
127 |
-
"loss": 2.5987,
|
128 |
-
"step": 340
|
129 |
-
},
|
130 |
-
{
|
131 |
-
"epoch": 0.0,
|
132 |
-
"grad_norm": 4.9223952293396,
|
133 |
-
"learning_rate": 4.9995435879539254e-05,
|
134 |
-
"loss": 2.7863,
|
135 |
-
"step": 360
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"epoch": 0.0,
|
139 |
-
"grad_norm": 7.647037506103516,
|
140 |
-
"learning_rate": 4.999181729716214e-05,
|
141 |
-
"loss": 2.6197,
|
142 |
-
"step": 380
|
143 |
-
},
|
144 |
-
{
|
145 |
-
"epoch": 0.0,
|
146 |
-
"grad_norm": 1.073474407196045,
|
147 |
-
"learning_rate": 4.998715010818479e-05,
|
148 |
-
"loss": 2.6627,
|
149 |
-
"step": 400
|
150 |
}
|
151 |
],
|
152 |
"logging_steps": 20,
|
153 |
-
"max_steps":
|
154 |
"num_input_tokens_seen": 0,
|
155 |
"num_train_epochs": 1,
|
156 |
"save_steps": 20,
|
157 |
-
"total_flos":
|
158 |
-
"train_batch_size":
|
159 |
"trial_name": null,
|
160 |
"trial_params": null
|
161 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0001245204018895971,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 20,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.0,
|
13 |
+
"grad_norm": NaN,
|
14 |
+
"learning_rate": 9.999999999999999e-06,
|
15 |
+
"loss": 4.7178,
|
16 |
"step": 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
}
|
18 |
],
|
19 |
"logging_steps": 20,
|
20 |
+
"max_steps": 2000,
|
21 |
"num_input_tokens_seen": 0,
|
22 |
"num_train_epochs": 1,
|
23 |
"save_steps": 20,
|
24 |
+
"total_flos": 297330828518400.0,
|
25 |
+
"train_batch_size": 2,
|
26 |
"trial_name": null,
|
27 |
"trial_params": null
|
28 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70e3a7379ab98397e1c0492218b73c68c0db563ddcb245966b7fd947f7930183
|
3 |
size 4984
|