bluestarburst commited on
Commit
48571f9
1 Parent(s): 1e533fd

Upload folder using huggingface_hub

Browse files
data/data.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
data/output.csv CHANGED
@@ -12,26 +12,25 @@ videoid,name,page_dir
12
  10,"camera panning right to left, a bird's - eye view of a row of buildings in a city with trees in the foreground",raw/3dDS1RL
13
  11,"camera panning left to right, a computer generated image of a blue diamond in the middle of a green and leafy area",raw/3dDS20
14
  12,"camera panning left to right, a computer generated image of a landscape with trees, rocks, and a path through a forest",raw/3dDS21
15
- 13,"camera panning left to right, a computer generated image of a building with a blue roof and a blue roof on top of it",raw/3dDS22
16
  14,"camera panning left to right, a map of a small town with a lot of trees and bushes in the middle of it",raw/3dDS23
17
- 15,"camera panning left to right, a computer generated image of a building with a blue roof and a green lawn in front of it",raw/3dDS24
18
  16,"camera panning left to right, an artist's rendering of a floating island in the middle of a large body of water",raw/3dDS25
19
  17,"camera panning left to right, a couple of white birds floating on top of a lake filled with waterlily green leaves",raw/3dDS26
20
- 18,"camera panning left to right, a bunch of poles that are standing in front of a building with a sign that says shop",raw/3dDS27
21
- 19,"camera panning left to right, a 3d rendering of a jail cell with bars and bars on each side of the cell wall",raw/3dDS28
22
- 20,"camera panning left to right, an artist's rendering of a bridge over a body of water with a building in the background",raw/3dDS29
23
- 21,"camera panning right to left, a bird's eye view of a building with a lot of trees in front of it",raw/3dDS2RL
24
- 22,"camera panning left to right, an artist's rendering of a highway with a bridge in the middle and trees on both sides",raw/3dDS30
25
- 23,"camera panning left to right, an aerial view of a freeway with multiple lanes and a bridge in the foreground and a cityscape in the background",raw/3dDS31
26
- 24,"camera panning left to right, an artist's rendering of a bridge over a body of water with a clock tower in the background",raw/3dDS32
27
- 25,"camera panning left to right, a set of stairs leading up to the top of a set of stairs in a dimly lit area",raw/3dDS33
28
- 26,"camera panning left to right, a 3d rendering of a subway station with red and yellow striped barriers and a red and white sign",raw/3dDS34
29
- 27,"camera panning left to right, a screenshot of a subway station with a man in a suit and a woman in a dress",raw/3dDS35
30
- 28,"camera panning left to right, a screenshot of a hallway in a building with a gate and a sign on the wall",raw/3dDS36
31
- 29,"camera panning left to right, a bird's - eye view of a row of houses in the suburbs of a city",raw/3dDS3LR
32
- 30,"camera panning left to right, a bird's eye view of a house with solar panels on the top of the roof",raw/3dDS4
33
- 31,"camera panning left to right, a bird's eye view of a row of houses with solar panels on top of them",raw/3dDS5
34
- 32,"camera panning left to right, a screenshot of a city street with a bench on one side and a building on the other",raw/3dDS6
35
- 33,"camera panning right to left, a computer generated image of a computer generated image of a construction site with a truck in the background",raw/3dDS7RL
36
- 34,"camera panning right to left, a screenshot of a city with a bunch of green objects in the middle of the street",raw/3dDS8RL
37
- 35,"camera panning left to right, a computer generated image of a city street with tall buildings and a clock tower in the distance",raw/3dDS9
 
12
  10,"camera panning right to left, a bird's - eye view of a row of buildings in a city with trees in the foreground",raw/3dDS1RL
13
  11,"camera panning left to right, a computer generated image of a blue diamond in the middle of a green and leafy area",raw/3dDS20
14
  12,"camera panning left to right, a computer generated image of a landscape with trees, rocks, and a path through a forest",raw/3dDS21
15
+ 13,"camera panning right to left, a computer generated image of a building with a blue roof and a blue roof on top of it",raw/3dDS22RL
16
  14,"camera panning left to right, a map of a small town with a lot of trees and bushes in the middle of it",raw/3dDS23
17
+ 15,"camera panning right to left, a computer generated image of a building with a blue roof and a green lawn in front of it",raw/3dDS24RL
18
  16,"camera panning left to right, an artist's rendering of a floating island in the middle of a large body of water",raw/3dDS25
19
  17,"camera panning left to right, a couple of white birds floating on top of a lake filled with waterlily green leaves",raw/3dDS26
20
+ 18,"camera panning left to right, a 3d rendering of a jail cell with bars and bars on each side of the cell wall",raw/3dDS28
21
+ 19,"camera panning right to left, an artist's rendering of a bridge over a body of water with a building in the background",raw/3dDS29RL
22
+ 20,"camera panning right to left, a bird's eye view of a building with a lot of trees in front of it",raw/3dDS2RL
23
+ 21,"camera panning left to right, an artist's rendering of a highway with a bridge in the middle and trees on both sides",raw/3dDS30
24
+ 22,"camera panning left to right, an aerial view of a freeway with multiple lanes and a bridge in the foreground and a cityscape in the background",raw/3dDS31
25
+ 23,"camera panning left to right, an artist's rendering of a bridge over a body of water with a clock tower in the background",raw/3dDS32
26
+ 24,"camera panning left to right, a set of stairs leading up to the top of a set of stairs in a dimly lit area",raw/3dDS33
27
+ 25,"camera panning right to left, a 3d rendering of a subway station with red and yellow striped barriers and a red and white sign",raw/3dDS34RL
28
+ 26,"camera panning right to left, a screenshot of a subway station with a man in a suit and a woman in a dress",raw/3dDS35RL
29
+ 27,"camera panning right to left, a screenshot of a hallway in a building with a gate and a sign on the wall",raw/3dDS36RL
30
+ 28,"camera panning left to right, a bird's - eye view of a row of houses in the suburbs of a city",raw/3dDS3LR
31
+ 29,"camera panning left to right, a bird's eye view of a house with solar panels on the top of the roof",raw/3dDS4
32
+ 30,"camera panning left to right, a bird's eye view of a row of houses with solar panels on top of them",raw/3dDS5
33
+ 31,"camera panning left to right, a screenshot of a city street with a bench on one side and a building on the other",raw/3dDS6
34
+ 32,"camera panning right to left, a computer generated image of a computer generated image of a construction site with a truck in the background",raw/3dDS7RL
35
+ 33,"camera panning right to left, a screenshot of a city with a bunch of green objects in the middle of the street",raw/3dDS8RL
36
+ 34,"camera panning left to right, a computer generated image of a city street with tall buildings and a clock tower in the distance",raw/3dDS9
 
data/output/18.mp4 CHANGED
Binary files a/data/output/18.mp4 and b/data/output/18.mp4 differ
 
data/output/19.mp4 CHANGED
Binary files a/data/output/19.mp4 and b/data/output/19.mp4 differ
 
data/output/20.mp4 CHANGED
Binary files a/data/output/20.mp4 and b/data/output/20.mp4 differ
 
data/output/21.mp4 CHANGED
Binary files a/data/output/21.mp4 and b/data/output/21.mp4 differ
 
data/output/22.mp4 CHANGED
Binary files a/data/output/22.mp4 and b/data/output/22.mp4 differ
 
data/output/23.mp4 CHANGED
Binary files a/data/output/23.mp4 and b/data/output/23.mp4 differ
 
data/output/24.mp4 CHANGED
Binary files a/data/output/24.mp4 and b/data/output/24.mp4 differ
 
data/output/25.mp4 CHANGED
Binary files a/data/output/25.mp4 and b/data/output/25.mp4 differ
 
data/output/26.mp4 CHANGED
Binary files a/data/output/26.mp4 and b/data/output/26.mp4 differ
 
data/output/27.mp4 CHANGED
Binary files a/data/output/27.mp4 and b/data/output/27.mp4 differ
 
data/output/28.mp4 CHANGED
Binary files a/data/output/28.mp4 and b/data/output/28.mp4 differ
 
data/output/29.mp4 CHANGED
Binary files a/data/output/29.mp4 and b/data/output/29.mp4 differ
 
data/output/30.mp4 CHANGED
Binary files a/data/output/30.mp4 and b/data/output/30.mp4 differ
 
data/output/31.mp4 CHANGED
Binary files a/data/output/31.mp4 and b/data/output/31.mp4 differ
 
data/output/32.mp4 CHANGED
Binary files a/data/output/32.mp4 and b/data/output/32.mp4 differ
 
data/output/33.mp4 CHANGED
Binary files a/data/output/33.mp4 and b/data/output/33.mp4 differ
 
data/output/34.mp4 CHANGED
Binary files a/data/output/34.mp4 and b/data/output/34.mp4 differ
 
handler.py CHANGED
@@ -9,6 +9,8 @@ import os
9
  import json
10
  import base64
11
 
 
 
12
  from diffusers.utils.import_utils import is_xformers_available
13
  from typing import Any
14
  import torch
@@ -21,7 +23,10 @@ from animatediff.models.unet import UNet3DConditionModel
21
  from animatediff.pipelines.pipeline_animation import AnimationPipeline
22
  from animatediff.utils.util import save_videos_grid
23
  from animatediff.utils.util import load_weights
 
 
24
 
 
25
 
26
  class EndpointHandler():
27
  def __init__(self, model_path: str = "bluestarburst/AnimateDiff-SceneFusion"):
@@ -46,6 +51,15 @@ class EndpointHandler():
46
 
47
  unet = UNet3DConditionModel.from_pretrained_2d(pretrained_model_path=unet_model_path, unet_additional_kwargs=OmegaConf.to_container(inference_config.unet_additional_kwargs), config_path=unet_config_path)
48
 
 
 
 
 
 
 
 
 
 
49
  if is_xformers_available(): unet.enable_xformers_memory_efficient_attention()
50
  else: assert False
51
 
@@ -56,18 +70,67 @@ class EndpointHandler():
56
 
57
  # huggingface download motion module from bluestarburst/AnimateDiff-SceneFusion/models/Motion_Module/mm_sd_v15.ckpt
58
 
59
- motion_module = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename="models/Motion_Module/mm_sd_v15.ckpt")
60
-
61
- self.pipeline = load_weights(
62
- self.pipeline,
63
- # motion module
64
- motion_module_path = motion_module,
65
- motion_module_lora_configs = [],
66
- # image layers
67
- dreambooth_model_path = "",
68
- lora_model_path = "",
69
- lora_alpha = 0.8,
70
- ).to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def __call__(self, data : Any):
73
  """
@@ -76,19 +139,33 @@ class EndpointHandler():
76
  """
77
 
78
  prompt = data.pop("prompt", "")
79
- negative_prompt = data.pop("negative_prompt", "easynegative,bad_construction,bad_structure,bad_wail,bad_windows,blurry,cloned_window,cropped,deformed,disfigured,error,extra_windows,extra_chimney,extra_door,extra_structure,extra_frame,fewer_digits,fused_structure,gross_proportions,jpeg_artifacts,long_roof,low_quality,structure_limbs,missing_windows,missing_doors,missing_roofs,mutated_structure,mutation,normal_quality,out_of_frame,owres,poorly_drawn_structure,poorly_drawn_house,signature,text,too_many_windows,ugly,username,uta,watermark,worst_quality")
 
80
  steps = data.pop("steps", 25)
81
  guidance_scale = data.pop("guidance_scale", 12.5)
82
 
 
 
83
  vids = self.pipeline(
84
- prompt=prompt,
85
- negative_prompt=negative_prompt,
86
- num_inference_steps=steps,
87
- guidance_scale=guidance_scale,
88
- width= 256,
89
- height= 256,
90
- video_length= 5,
91
- ).videos
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  videos = rearrange(vids, "b c t h w -> t b c h w")
94
  n_rows=6
 
9
  import json
10
  import base64
11
 
12
+ from safetensors import safe_open
13
+
14
  from diffusers.utils.import_utils import is_xformers_available
15
  from typing import Any
16
  import torch
 
23
  from animatediff.pipelines.pipeline_animation import AnimationPipeline
24
  from animatediff.utils.util import save_videos_grid
25
  from animatediff.utils.util import load_weights
26
+ from animatediff.utils.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
27
+ from animatediff.utils.convert_lora_safetensor_to_diffusers import convert_lora
28
 
29
+ current_model = "backup"
30
 
31
  class EndpointHandler():
32
  def __init__(self, model_path: str = "bluestarburst/AnimateDiff-SceneFusion"):
 
51
 
52
  unet = UNet3DConditionModel.from_pretrained_2d(pretrained_model_path=unet_model_path, unet_additional_kwargs=OmegaConf.to_container(inference_config.unet_additional_kwargs), config_path=unet_config_path)
53
 
54
+ # inv_latent_path = f"{OUTPUT_DIR}/inv_latents/ddim_latent-1.pt"
55
+ inv_latent_path = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename=f"models/Motion_Module/{current_model}/inv_latents/ddim_latent-1.pt")
56
+ self.latents = torch.load(inv_latent_path).to(torch.float)
57
+ print(self.latents.shape, self.latents.dtype)
58
+
59
+ # torch.backends.cuda.enable_mem_efficient_sdp(True)
60
+ torch.backends.cuda.enable_flash_sdp(True)
61
+ torch.backends.cuda.enable_math_sdp(True)
62
+
63
  if is_xformers_available(): unet.enable_xformers_memory_efficient_attention()
64
  else: assert False
65
 
 
70
 
71
  # huggingface download motion module from bluestarburst/AnimateDiff-SceneFusion/models/Motion_Module/mm_sd_v15.ckpt
72
 
73
+ # motion_module = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename="models/Motion_Module/mm_sd_v15.ckpt")
74
+ motion_module = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename=f"models/Motion_Module/{current_model}/mm.pth")
75
+ # LORA_DREAMBOOTH_PATH="models/DreamBooth_LoRA/toonyou_beta3.safetensors"
76
+
77
+ LORA_DREAMBOOTH_PATH = None
78
+ LORA_DREAMBOOTH_PATH = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename="models/DreamBooth_LoRA/toonyou_beta3.safetensors")
79
+
80
+ # self.pipeline = load_weights(
81
+ # self.pipeline,
82
+ # # motion module
83
+ # motion_module_path = motion_module,
84
+ # motion_module_lora_configs = [],
85
+ # # image layers
86
+ # dreambooth_model_path = "",
87
+ # lora_model_path = "",
88
+ # lora_alpha = 0.8,
89
+ # ).to("cuda")
90
+
91
+ motion_module_state_dict = torch.load(motion_module, map_location="cpu")
92
+ missing, unexpected = self.pipeline.unet.load_state_dict(motion_module_state_dict, strict=False)
93
+ assert len(unexpected) == 0
94
+
95
+
96
+ # FIX THIS
97
+ if LORA_DREAMBOOTH_PATH != "":
98
+ if LORA_DREAMBOOTH_PATH.endswith(".ckpt"):
99
+ state_dict = torch.load(LORA_DREAMBOOTH_PATH)
100
+ self.pipeline.unet.load_state_dict(state_dict)
101
+
102
+ elif LORA_DREAMBOOTH_PATH.endswith(".safetensors"):
103
+ state_dict = {}
104
+ with safe_open(LORA_DREAMBOOTH_PATH, framework="pt", device="cpu") as f:
105
+ for key in f.keys():
106
+ state_dict[key] = f.get_tensor(key)
107
+
108
+ is_lora = all("lora" in k for k in state_dict.keys())
109
+ if not is_lora:
110
+ base_state_dict = state_dict
111
+ else:
112
+ base_state_dict = {}
113
+ with safe_open("", framework="pt", device="cpu") as f:
114
+ for key in f.keys():
115
+ base_state_dict[key] = f.get_tensor(key)
116
+
117
+ # vae
118
+ converted_vae_checkpoint = convert_ldm_vae_checkpoint(base_state_dict, self.pipeline.vae.config)
119
+ self.pipeline.vae.load_state_dict(converted_vae_checkpoint)
120
+ # unet
121
+ converted_unet_checkpoint = convert_ldm_unet_checkpoint(base_state_dict, self.pipeline.unet.config)
122
+ self.pipeline.unet.load_state_dict(converted_unet_checkpoint, strict=False)
123
+ # text_model (TODO: problem here)
124
+ # converted_test_encoder_checkpoint = convert_ldm_clip_checkpoint(base_state_dict)
125
+ # pipeline.text_encoder = converted_test_encoder_checkpoint
126
+
127
+ # import pdb
128
+ # pdb.set_trace()
129
+ if is_lora:
130
+ self.pipeline = convert_lora(self.pipeline, state_dict)
131
+ # self.pipeline = convert_lora(self.pipeline, state_dict, alpha=model_config.lora_alpha)
132
+
133
+ self.pipeline.to("cuda")
134
 
135
  def __call__(self, data : Any):
136
  """
 
139
  """
140
 
141
  prompt = data.pop("prompt", "")
142
+ negative_prompt = data.pop("negative_prompt", "")
143
+ negative_prompt += ",easynegative,bad_construction,bad_structure,bad_wail,bad_windows,blurry,cloned_window,cropped,deformed,disfigured,error,extra_windows,extra_chimney,extra_door,extra_structure,extra_frame,fewer_digits,fused_structure,gross_proportions,jpeg_artifacts,long_roof,low_quality,structure_limbs,missing_windows,missing_doors,missing_roofs,mutated_structure,mutation,normal_quality,out_of_frame,owres,poorly_drawn_structure,poorly_drawn_house,signature,text,too_many_windows,ugly,username,uta,watermark,worst_quality"
144
  steps = data.pop("steps", 25)
145
  guidance_scale = data.pop("guidance_scale", 12.5)
146
 
147
+ print(f"current seed: {torch.initial_seed()}")
148
+ print(f"sampling {prompt} ...")
149
  vids = self.pipeline(
150
+ prompt,
151
+ negative_prompt = negative_prompt,
152
+ num_inference_steps = steps,
153
+ guidance_scale = guidance_scale,
154
+ width = 256,
155
+ height = 256,
156
+ video_length = 5,
157
+ latents = self.latents,
158
+ ).videos
159
+
160
+ # vids = self.pipeline(
161
+ # prompt=prompt,
162
+ # negative_prompt=negative_prompt,
163
+ # num_inference_steps=steps,
164
+ # guidance_scale=guidance_scale,
165
+ # width= 256,
166
+ # height= 256,
167
+ # video_length= 5,
168
+ # ).videos
169
 
170
  videos = rearrange(vids, "b c t h w -> t b c h w")
171
  n_rows=6
models/Motion_Module/test/config.yaml CHANGED
@@ -1,5 +1,5 @@
1
- pretrained_model_path: /content/AnimateDiff-SceneFusion/models/StableDiffusion/
2
- output_dir: /content/AnimateDiff-SceneFusion/models/Motion_Module/test
3
  train_data:
4
  video_folder: data/output
5
  csv_path: data/output.csv
@@ -23,7 +23,7 @@ train_whole_module: false
23
  trainable_modules:
24
  - to_q
25
  train_batch_size: 1
26
- max_train_steps: 36
27
  learning_rate: 0.0003
28
  scale_lr: false
29
  lr_scheduler: constant
@@ -42,7 +42,7 @@ mixed_precision: fp16
42
  use_8bit_adam: false
43
  enable_xformers_memory_efficient_attention: true
44
  seed: 33
45
- motion_module: /content/AnimateDiff-SceneFusion/models/Motion_Module/mm_sd_v15.ckpt
46
  inference_config_path: configs/inference/inference-v3.yaml
47
  motion_module_pe_multiplier: 1
48
  dataset_class: MultiTuneAVideoDataset
 
1
+ pretrained_model_path: models/StableDiffusion/
2
+ output_dir: models/Motion_Module/test
3
  train_data:
4
  video_folder: data/output
5
  csv_path: data/output.csv
 
23
  trainable_modules:
24
  - to_q
25
  train_batch_size: 1
26
+ max_train_steps: 1
27
  learning_rate: 0.0003
28
  scale_lr: false
29
  lr_scheduler: constant
 
42
  use_8bit_adam: false
43
  enable_xformers_memory_efficient_attention: true
44
  seed: 33
45
+ motion_module: models/Motion_Module/mm_sd_v15.ckpt
46
  inference_config_path: configs/inference/inference-v3.yaml
47
  motion_module_pe_multiplier: 1
48
  dataset_class: MultiTuneAVideoDataset