mlabonne commited on
Commit
c95e50d
1 Parent(s): df27122

Trained with Unsloth

Browse files
README.md CHANGED
@@ -1,6 +1,9 @@
1
  ---
2
  library_name: transformers
3
- tags: []
 
 
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
2
  library_name: transformers
3
+ tags:
4
+ - unsloth
5
+ - trl
6
+ - sft
7
  ---
8
 
9
  # Model Card for Model ID
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 128256,
9
- "eos_token_id": 128257,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
@@ -17,7 +17,7 @@
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
- "pad_token_id": 128257,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": {
@@ -30,7 +30,8 @@
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
- "transformers_version": "4.43.3",
 
34
  "use_cache": true,
35
- "vocab_size": 128258
36
  }
 
1
  {
2
+ "_name_or_path": "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": 128001,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
 
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
+ "pad_token_id": 128004,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": {
 
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.44.1",
34
+ "unsloth_version": "2024.8",
35
  "use_cache": true,
36
+ "vocab_size": 128256
37
  }
generation_config.json CHANGED
@@ -1,15 +1,11 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": [128256, 128000],
4
  "do_sample": true,
5
- "eos_token_id": [
6
- 128001,
7
- 128008,
8
- 128009,
9
- 128257,
10
- ],
11
- "pad_token_id": 128257,
12
  "temperature": 0.6,
13
  "top_p": 0.9,
14
- "transformers_version": "4.43.3"
15
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
  "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "max_length": 131072,
7
+ "pad_token_id": 128004,
 
 
 
 
8
  "temperature": 0.6,
9
  "top_p": 0.9,
10
+ "transformers_version": "4.44.1"
11
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f77ded706c176f095b2041c9dcf42f09af15f8bb51b7523015d5ef3f711957ae
3
- size 4976715056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b90c639eaff877e1377b57098c7c85ce4fa11c81de3df0b042aee32f458bb54b
3
+ size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fb7efc0264dd2fe1d5f91468776691ed3f603bd2d612cde4b0863f1c3efa468
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ed1c62e1d2e38a55015daf5a0689931dfbf0c4006d082ac9dee8b127b0052b
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27b1158a44135040cd34009f12b209da6663bf2709f328fe44968c1809507b8c
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256b2e01a853299499c16af3136d98b962f2042fb0b8ec0b66f69404c56ab4c0
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a801af0967dd3fdc2ca06e48ddf0d2883eea12480b6cf8e5939495f8a089f7d3
3
- size 1168155192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aece88193eb62fa9c1826f8de7f0d57d689436f96f75dfec82a938448c5c39f
3
+ size 1168138808
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16060555264
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16060522496
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",