Around6827 commited on
Commit
e4e0320
1 Parent(s): d27290d

Training in progress, epoch 0

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: peft
3
- base_model: NousResearch/Llama-2-13b-chat-hf
4
  ---
5
 
6
  # Model Card for Model ID
@@ -216,4 +216,23 @@ The following `bitsandbytes` quantization config was used during training:
216
  ### Framework versions
217
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  - PEFT 0.6.0
 
1
  ---
2
  library_name: peft
3
+ base_model: NousResearch/Llama-2-13b-hf
4
  ---
5
 
6
  # Model Card for Model ID
 
216
  ### Framework versions
217
 
218
 
219
+ - PEFT 0.6.0
220
+ ## Training procedure
221
+
222
+
223
+ The following `bitsandbytes` quantization config was used during training:
224
+ - quant_method: bitsandbytes
225
+ - load_in_8bit: True
226
+ - load_in_4bit: False
227
+ - llm_int8_threshold: 6.0
228
+ - llm_int8_skip_modules: None
229
+ - llm_int8_enable_fp32_cpu_offload: False
230
+ - llm_int8_has_fp16_weight: False
231
+ - bnb_4bit_quant_type: fp4
232
+ - bnb_4bit_use_double_quant: False
233
+ - bnb_4bit_compute_dtype: float32
234
+
235
+ ### Framework versions
236
+
237
+
238
  - PEFT 0.6.0
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
@@ -16,12 +16,12 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "k_proj",
20
- "gate_proj",
21
  "up_proj",
 
22
  "down_proj",
23
- "q_proj",
24
  "v_proj",
 
 
25
  "o_proj"
26
  ],
27
  "task_type": "CAUSAL_LM"
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-chat-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "up_proj",
20
+ "k_proj",
21
  "down_proj",
 
22
  "v_proj",
23
+ "gate_proj",
24
+ "q_proj",
25
  "o_proj"
26
  ],
27
  "task_type": "CAUSAL_LM"
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68233e958260d3384a1f3af1a0428e9de96c0187fc6ef41a771ba0809f865781
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f901d4b195002956b05d8bc4379698fbad417cbe96dba82402b90c6d99d8a57
3
  size 500897101
added_tokens.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "</s>": 2,
 
3
  "<s>": 1,
4
  "<unk>": 0
5
  }
 
1
  {
2
  "</s>": 2,
3
+ "<pad>": 32000,
4
  "<s>": 1,
5
  "<unk>": 0
6
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "NousResearch/Llama-2-13b-hf",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -36,5 +36,5 @@
36
  "torch_dtype": "float16",
37
  "transformers_version": "4.35.0.dev0",
38
  "use_cache": false,
39
- "vocab_size": 32000
40
  }
 
1
  {
2
+ "_name_or_path": "NousResearch/Llama-2-13b-chat-hf",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
36
  "torch_dtype": "float16",
37
  "transformers_version": "4.35.0.dev0",
38
  "use_cache": false,
39
+ "vocab_size": 32001
40
  }
tokenizer_config.json CHANGED
@@ -25,6 +25,14 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "additional_special_tokens": [],
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<pad>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": true,
34
+ "single_word": false,
35
+ "special": false
36
  }
37
  },
38
  "additional_special_tokens": [],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18461b3ceb672af29bdf70f2b48ef86d90cd60e8af45447000342dce27f9a81c
3
  size 4475
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e1a7219538094be7c3aa3b9acee4e4ca955a354517e10f6445d9860f4e9041
3
  size 4475