File size: 1,880 Bytes
09bf9a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
image_finetune: false
output_dir: "outputs"
pretrained_model_path: "runwayml/stable-diffusion-v1-5"
unet_additional_kwargs:
use_motion_module : true
motion_module_resolutions : [ 1,2,4,8 ]
unet_use_cross_frame_attention : false
unet_use_temporal_attention : false
motion_module_type: Vanilla
motion_module_kwargs:
num_attention_heads : 8
num_transformer_block : 1
attention_block_types : [ "Temporal_Self", "Temporal_Self" ]
temporal_position_encoding : true
temporal_position_encoding_max_len : 24
temporal_attention_dim_div : 1
zero_initialize : true
noise_scheduler_kwargs:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: "linear"
steps_offset: 1
clip_sample: false
train_data:
csv_path: "data/output.csv"
video_folder: "data/output"
sample_size: 256
sample_stride: 4
sample_n_frames: 5
validation_data:
prompts:
- "Snow rocky mountains peaks canyon. Snow blanketed rocky mountains surround and shadow deep canyons."
- "A drone view of celebration with Christmas tree and fireworks, starry sky - background."
- "Robot dancing in times square."
- "Pacific coast, carmel by the sea ocean and waves."
num_inference_steps: 20
guidance_scale: 12.5
temporal_context": 24
use_inv_latent": True
num_inv_steps: 50
trainable_modules:
- "motion_modules."
unet_checkpoint_path: ""
learning_rate: 3.e-5
train_batch_size: 1
max_train_epoch: -1
max_train_steps: 300
checkpointing_epochs: -1
checkpointing_steps: 1000
validation_steps: 100
# validation_steps_tuple: [2, 50]
global_seed: 42
mixed_precision_training: true
enable_xformers_memory_efficient_attention: True
is_debug: True
|