Safetensors
llava
multimodal
txiong23 commited on
Commit
8a4728c
1 Parent(s): cb5c53d

6b65959f1627dd310c7fb456a678eab1898ae2776f6cf33c454fa423606d9bc3

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<image>": 151646,
3
+ "<|endoftext|>": 151643,
4
+ "<|im_end|>": 151645,
5
+ "<|im_start|>": 151644
6
+ }
config.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "lmms-lab/llava-onevision-qwen2-72b-ov",
3
+ "architectures": [
4
+ "LlavaQwenForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8192,
11
+ "ignore_index": -100,
12
+ "image_aspect_ratio": "anyres_max_9",
13
+ "image_crop_resolution": null,
14
+ "image_grid_pinpoints": [
15
+ [
16
+ 384,
17
+ 384
18
+ ],
19
+ [
20
+ 384,
21
+ 768
22
+ ],
23
+ [
24
+ 384,
25
+ 1152
26
+ ],
27
+ [
28
+ 384,
29
+ 1536
30
+ ],
31
+ [
32
+ 384,
33
+ 1920
34
+ ],
35
+ [
36
+ 384,
37
+ 2304
38
+ ],
39
+ [
40
+ 768,
41
+ 384
42
+ ],
43
+ [
44
+ 768,
45
+ 768
46
+ ],
47
+ [
48
+ 768,
49
+ 1152
50
+ ],
51
+ [
52
+ 768,
53
+ 1536
54
+ ],
55
+ [
56
+ 768,
57
+ 1920
58
+ ],
59
+ [
60
+ 768,
61
+ 2304
62
+ ],
63
+ [
64
+ 1152,
65
+ 384
66
+ ],
67
+ [
68
+ 1152,
69
+ 768
70
+ ],
71
+ [
72
+ 1152,
73
+ 1152
74
+ ],
75
+ [
76
+ 1152,
77
+ 1536
78
+ ],
79
+ [
80
+ 1152,
81
+ 1920
82
+ ],
83
+ [
84
+ 1152,
85
+ 2304
86
+ ],
87
+ [
88
+ 1536,
89
+ 384
90
+ ],
91
+ [
92
+ 1536,
93
+ 768
94
+ ],
95
+ [
96
+ 1536,
97
+ 1152
98
+ ],
99
+ [
100
+ 1536,
101
+ 1536
102
+ ],
103
+ [
104
+ 1536,
105
+ 1920
106
+ ],
107
+ [
108
+ 1536,
109
+ 2304
110
+ ],
111
+ [
112
+ 1920,
113
+ 384
114
+ ],
115
+ [
116
+ 1920,
117
+ 768
118
+ ],
119
+ [
120
+ 1920,
121
+ 1152
122
+ ],
123
+ [
124
+ 1920,
125
+ 1536
126
+ ],
127
+ [
128
+ 1920,
129
+ 1920
130
+ ],
131
+ [
132
+ 1920,
133
+ 2304
134
+ ],
135
+ [
136
+ 2304,
137
+ 384
138
+ ],
139
+ [
140
+ 2304,
141
+ 768
142
+ ],
143
+ [
144
+ 2304,
145
+ 1152
146
+ ],
147
+ [
148
+ 2304,
149
+ 1536
150
+ ],
151
+ [
152
+ 2304,
153
+ 1920
154
+ ],
155
+ [
156
+ 2304,
157
+ 2304
158
+ ]
159
+ ],
160
+ "image_split_resolution": null,
161
+ "image_token_index": 151646,
162
+ "initializer_range": 0.02,
163
+ "intermediate_size": 29568,
164
+ "max_position_embeddings": 32768,
165
+ "max_window_layers": 80,
166
+ "mm_hidden_size": 1152,
167
+ "mm_newline_position": "one_token",
168
+ "mm_patch_merge_type": "spatial_unpad",
169
+ "mm_projector_lr": null,
170
+ "mm_projector_type": "mlp2x_gelu",
171
+ "mm_resampler_type": null,
172
+ "mm_spatial_pool_mode": "bilinear",
173
+ "mm_tunable_parts": "mm_vision_tower,mm_mlp_adapter,mm_language_model",
174
+ "mm_use_im_patch_token": false,
175
+ "mm_use_im_start_end": false,
176
+ "mm_vision_select_feature": "patch",
177
+ "mm_vision_select_layer": -2,
178
+ "mm_vision_tower": "google/siglip-so400m-patch14-384",
179
+ "mm_vision_tower_lr": 1e-06,
180
+ "model_type": "llava",
181
+ "num_attention_heads": 64,
182
+ "num_hidden_layers": 80,
183
+ "num_key_value_heads": 8,
184
+ "pos_skipping_range": 4096,
185
+ "projector_hidden_act": "gelu",
186
+ "rms_norm_eps": 1e-06,
187
+ "rope_scaling": null,
188
+ "rope_theta": 1000000.0,
189
+ "sliding_window": 131072,
190
+ "text_config": {
191
+ "model_type": "llama"
192
+ },
193
+ "tie_word_embeddings": false,
194
+ "tokenizer_model_max_length": 32768,
195
+ "tokenizer_padding_side": "right",
196
+ "torch_dtype": "bfloat16",
197
+ "transformers_version": "4.40.0.dev0",
198
+ "use_cache": true,
199
+ "use_mm_proj": true,
200
+ "use_pos_skipping": false,
201
+ "use_sliding_window": false,
202
+ "vision_config": {
203
+ "hidden_size": 1024,
204
+ "image_size": 336,
205
+ "intermediate_size": 4096,
206
+ "model_type": "clip_vision_model",
207
+ "num_attention_heads": 16,
208
+ "num_hidden_layers": 24,
209
+ "patch_size": 14,
210
+ "projection_dim": 768,
211
+ "vocab_size": 32000
212
+ },
213
+ "vision_feature_layer": -2,
214
+ "vision_feature_select_strategy": "default",
215
+ "vision_tower_pretrained": null,
216
+ "vocab_size": 152064
217
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.40.0.dev0"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<image>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ }
36
+ },
37
+ "additional_special_tokens": [
38
+ "<|im_start|>",
39
+ "<|im_end|>"
40
+ ],
41
+ "bos_token": null,
42
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
43
+ "clean_up_tokenization_spaces": false,
44
+ "eos_token": "<|im_end|>",
45
+ "errors": "replace",
46
+ "model_max_length": 32768,
47
+ "pad_token": "<|endoftext|>",
48
+ "padding_side": "right",
49
+ "processor_class": "LlavaProcessor",
50
+ "split_special_tokens": false,
51
+ "tokenizer_class": "Qwen2Tokenizer",
52
+ "unk_token": null
53
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9693d87500d88307bee8bc4b6dde2691cb68f7da71c43aa13291d276fabc9a
3
+ size 7864
vocab.json ADDED
The diff for this file is too large to render. See raw diff