model: arch: mini_gpt4_llama_v2 model_type: pretrain_vicuna llama_model: "meta-llama/Llama-2-7b-chat-hf" max_txt_len: 160 max_context_len: 512 end_sym: "" prompt_path: "train_configs/alignment.txt" prompt_template: '[INST] {} [/INST] ' ckpt: put your pretrained ckpt here datasets: cc_sbu_align: batch_size: 12 vis_processor: train: name: "blip2_image_train" image_size: 224 text_processor: train: name: "blip_caption" run: task: image_text_pretrain # optimizer lr_sched: "linear_warmup_cosine_lr" init_lr: 3e-5 min_lr: 1e-5 warmup_lr: 1e-6 weight_decay: 0.05 max_epoch: 5 iters_per_epoch: 200 num_workers: 4 warmup_steps: 200 seed: 42 output_dir: "output/minigpt4_stage2_finetune" amp: True resume_ckpt_path: null evaluate: False train_splits: ["train"] device: "cuda" world_size: 1 dist_url: "env://" distributed: True wandb_log: True job_name: minigpt4_finetune