kentang1998 commited on
Commit
6f8e13a
1 Parent(s): aa4f5ed

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +5 -5
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-pretrain_r4",
3
  "architectures": [
4
  "LlavaLlamaModel"
5
  ],
@@ -8,7 +8,7 @@
8
  "image_aspect_ratio": "resize",
9
  "interpolate_mode": "linear",
10
  "llm_cfg": {
11
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-pretrain_r4/llm",
12
  "add_cross_attention": false,
13
  "architectures": [
14
  "LlamaForCausalLM"
@@ -93,7 +93,7 @@
93
  },
94
  "mm_hidden_size": 1152,
95
  "mm_projector_cfg": {
96
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-pretrain_r4/mm_projector",
97
  "add_cross_attention": false,
98
  "architectures": [
99
  "MultimodalProjector"
@@ -166,7 +166,7 @@
166
  "model_dtype": "torch.bfloat16",
167
  "model_type": "llava_llama",
168
  "num_video_frames": 8,
169
- "resume_path": "./checkpoints/vila-siglip-llama3-8b-pretrain_r4",
170
  "s2": false,
171
  "s2_max_split_size": 336,
172
  "s2_scales": "336,672,1008",
@@ -176,7 +176,7 @@
176
  "tune_vision_tower": true,
177
  "vision_resolution": -1,
178
  "vision_tower_cfg": {
179
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-pretrain_r4/vision_tower",
180
  "add_cross_attention": false,
181
  "architectures": [
182
  "SiglipVisionModel"
 
1
  {
2
+ "_name_or_path": "./vlm",
3
  "architectures": [
4
  "LlavaLlamaModel"
5
  ],
 
8
  "image_aspect_ratio": "resize",
9
  "interpolate_mode": "linear",
10
  "llm_cfg": {
11
+ "_name_or_path": "./llm",
12
  "add_cross_attention": false,
13
  "architectures": [
14
  "LlamaForCausalLM"
 
93
  },
94
  "mm_hidden_size": 1152,
95
  "mm_projector_cfg": {
96
+ "_name_or_path": "./mm_projector",
97
  "add_cross_attention": false,
98
  "architectures": [
99
  "MultimodalProjector"
 
166
  "model_dtype": "torch.bfloat16",
167
  "model_type": "llava_llama",
168
  "num_video_frames": 8,
169
+ "resume_path": "./vlm",
170
  "s2": false,
171
  "s2_max_split_size": 336,
172
  "s2_scales": "336,672,1008",
 
176
  "tune_vision_tower": true,
177
  "vision_resolution": -1,
178
  "vision_tower_cfg": {
179
+ "_name_or_path": "./vision_tower",
180
  "add_cross_attention": false,
181
  "architectures": [
182
  "SiglipVisionModel"