bluestarburst
commited on
Commit
•
48571f9
1
Parent(s):
1e533fd
Upload folder using huggingface_hub
Browse files- data/data.ipynb +0 -0
- data/output.csv +19 -20
- data/output/18.mp4 +0 -0
- data/output/19.mp4 +0 -0
- data/output/20.mp4 +0 -0
- data/output/21.mp4 +0 -0
- data/output/22.mp4 +0 -0
- data/output/23.mp4 +0 -0
- data/output/24.mp4 +0 -0
- data/output/25.mp4 +0 -0
- data/output/26.mp4 +0 -0
- data/output/27.mp4 +0 -0
- data/output/28.mp4 +0 -0
- data/output/29.mp4 +0 -0
- data/output/30.mp4 +0 -0
- data/output/31.mp4 +0 -0
- data/output/32.mp4 +0 -0
- data/output/33.mp4 +0 -0
- data/output/34.mp4 +0 -0
- handler.py +98 -21
- models/Motion_Module/test/config.yaml +4 -4
data/data.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/output.csv
CHANGED
@@ -12,26 +12,25 @@ videoid,name,page_dir
|
|
12 |
10,"camera panning right to left, a bird's - eye view of a row of buildings in a city with trees in the foreground",raw/3dDS1RL
|
13 |
11,"camera panning left to right, a computer generated image of a blue diamond in the middle of a green and leafy area",raw/3dDS20
|
14 |
12,"camera panning left to right, a computer generated image of a landscape with trees, rocks, and a path through a forest",raw/3dDS21
|
15 |
-
13,"camera panning
|
16 |
14,"camera panning left to right, a map of a small town with a lot of trees and bushes in the middle of it",raw/3dDS23
|
17 |
-
15,"camera panning
|
18 |
16,"camera panning left to right, an artist's rendering of a floating island in the middle of a large body of water",raw/3dDS25
|
19 |
17,"camera panning left to right, a couple of white birds floating on top of a lake filled with waterlily green leaves",raw/3dDS26
|
20 |
-
18,"camera panning left to right, a
|
21 |
-
19,"camera panning
|
22 |
-
20,"camera panning
|
23 |
-
21,"camera panning
|
24 |
-
22,"camera panning left to right, an
|
25 |
-
23,"camera panning left to right, an
|
26 |
-
24,"camera panning left to right,
|
27 |
-
25,"camera panning
|
28 |
-
26,"camera panning
|
29 |
-
27,"camera panning
|
30 |
-
28,"camera panning left to right, a
|
31 |
-
29,"camera panning left to right, a bird's
|
32 |
-
30,"camera panning left to right, a bird's eye view of a
|
33 |
-
31,"camera panning left to right, a
|
34 |
-
32,"camera panning
|
35 |
-
33,"camera panning right to left, a
|
36 |
-
34,"camera panning
|
37 |
-
35,"camera panning left to right, a computer generated image of a city street with tall buildings and a clock tower in the distance",raw/3dDS9
|
|
|
12 |
10,"camera panning right to left, a bird's - eye view of a row of buildings in a city with trees in the foreground",raw/3dDS1RL
|
13 |
11,"camera panning left to right, a computer generated image of a blue diamond in the middle of a green and leafy area",raw/3dDS20
|
14 |
12,"camera panning left to right, a computer generated image of a landscape with trees, rocks, and a path through a forest",raw/3dDS21
|
15 |
+
13,"camera panning right to left, a computer generated image of a building with a blue roof and a blue roof on top of it",raw/3dDS22RL
|
16 |
14,"camera panning left to right, a map of a small town with a lot of trees and bushes in the middle of it",raw/3dDS23
|
17 |
+
15,"camera panning right to left, a computer generated image of a building with a blue roof and a green lawn in front of it",raw/3dDS24RL
|
18 |
16,"camera panning left to right, an artist's rendering of a floating island in the middle of a large body of water",raw/3dDS25
|
19 |
17,"camera panning left to right, a couple of white birds floating on top of a lake filled with waterlily green leaves",raw/3dDS26
|
20 |
+
18,"camera panning left to right, a 3d rendering of a jail cell with bars and bars on each side of the cell wall",raw/3dDS28
|
21 |
+
19,"camera panning right to left, an artist's rendering of a bridge over a body of water with a building in the background",raw/3dDS29RL
|
22 |
+
20,"camera panning right to left, a bird's eye view of a building with a lot of trees in front of it",raw/3dDS2RL
|
23 |
+
21,"camera panning left to right, an artist's rendering of a highway with a bridge in the middle and trees on both sides",raw/3dDS30
|
24 |
+
22,"camera panning left to right, an aerial view of a freeway with multiple lanes and a bridge in the foreground and a cityscape in the background",raw/3dDS31
|
25 |
+
23,"camera panning left to right, an artist's rendering of a bridge over a body of water with a clock tower in the background",raw/3dDS32
|
26 |
+
24,"camera panning left to right, a set of stairs leading up to the top of a set of stairs in a dimly lit area",raw/3dDS33
|
27 |
+
25,"camera panning right to left, a 3d rendering of a subway station with red and yellow striped barriers and a red and white sign",raw/3dDS34RL
|
28 |
+
26,"camera panning right to left, a screenshot of a subway station with a man in a suit and a woman in a dress",raw/3dDS35RL
|
29 |
+
27,"camera panning right to left, a screenshot of a hallway in a building with a gate and a sign on the wall",raw/3dDS36RL
|
30 |
+
28,"camera panning left to right, a bird's - eye view of a row of houses in the suburbs of a city",raw/3dDS3LR
|
31 |
+
29,"camera panning left to right, a bird's eye view of a house with solar panels on the top of the roof",raw/3dDS4
|
32 |
+
30,"camera panning left to right, a bird's eye view of a row of houses with solar panels on top of them",raw/3dDS5
|
33 |
+
31,"camera panning left to right, a screenshot of a city street with a bench on one side and a building on the other",raw/3dDS6
|
34 |
+
32,"camera panning right to left, a computer generated image of a computer generated image of a construction site with a truck in the background",raw/3dDS7RL
|
35 |
+
33,"camera panning right to left, a screenshot of a city with a bunch of green objects in the middle of the street",raw/3dDS8RL
|
36 |
+
34,"camera panning left to right, a computer generated image of a city street with tall buildings and a clock tower in the distance",raw/3dDS9
|
|
data/output/18.mp4
CHANGED
Binary files a/data/output/18.mp4 and b/data/output/18.mp4 differ
|
|
data/output/19.mp4
CHANGED
Binary files a/data/output/19.mp4 and b/data/output/19.mp4 differ
|
|
data/output/20.mp4
CHANGED
Binary files a/data/output/20.mp4 and b/data/output/20.mp4 differ
|
|
data/output/21.mp4
CHANGED
Binary files a/data/output/21.mp4 and b/data/output/21.mp4 differ
|
|
data/output/22.mp4
CHANGED
Binary files a/data/output/22.mp4 and b/data/output/22.mp4 differ
|
|
data/output/23.mp4
CHANGED
Binary files a/data/output/23.mp4 and b/data/output/23.mp4 differ
|
|
data/output/24.mp4
CHANGED
Binary files a/data/output/24.mp4 and b/data/output/24.mp4 differ
|
|
data/output/25.mp4
CHANGED
Binary files a/data/output/25.mp4 and b/data/output/25.mp4 differ
|
|
data/output/26.mp4
CHANGED
Binary files a/data/output/26.mp4 and b/data/output/26.mp4 differ
|
|
data/output/27.mp4
CHANGED
Binary files a/data/output/27.mp4 and b/data/output/27.mp4 differ
|
|
data/output/28.mp4
CHANGED
Binary files a/data/output/28.mp4 and b/data/output/28.mp4 differ
|
|
data/output/29.mp4
CHANGED
Binary files a/data/output/29.mp4 and b/data/output/29.mp4 differ
|
|
data/output/30.mp4
CHANGED
Binary files a/data/output/30.mp4 and b/data/output/30.mp4 differ
|
|
data/output/31.mp4
CHANGED
Binary files a/data/output/31.mp4 and b/data/output/31.mp4 differ
|
|
data/output/32.mp4
CHANGED
Binary files a/data/output/32.mp4 and b/data/output/32.mp4 differ
|
|
data/output/33.mp4
CHANGED
Binary files a/data/output/33.mp4 and b/data/output/33.mp4 differ
|
|
data/output/34.mp4
CHANGED
Binary files a/data/output/34.mp4 and b/data/output/34.mp4 differ
|
|
handler.py
CHANGED
@@ -9,6 +9,8 @@ import os
|
|
9 |
import json
|
10 |
import base64
|
11 |
|
|
|
|
|
12 |
from diffusers.utils.import_utils import is_xformers_available
|
13 |
from typing import Any
|
14 |
import torch
|
@@ -21,7 +23,10 @@ from animatediff.models.unet import UNet3DConditionModel
|
|
21 |
from animatediff.pipelines.pipeline_animation import AnimationPipeline
|
22 |
from animatediff.utils.util import save_videos_grid
|
23 |
from animatediff.utils.util import load_weights
|
|
|
|
|
24 |
|
|
|
25 |
|
26 |
class EndpointHandler():
|
27 |
def __init__(self, model_path: str = "bluestarburst/AnimateDiff-SceneFusion"):
|
@@ -46,6 +51,15 @@ class EndpointHandler():
|
|
46 |
|
47 |
unet = UNet3DConditionModel.from_pretrained_2d(pretrained_model_path=unet_model_path, unet_additional_kwargs=OmegaConf.to_container(inference_config.unet_additional_kwargs), config_path=unet_config_path)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
if is_xformers_available(): unet.enable_xformers_memory_efficient_attention()
|
50 |
else: assert False
|
51 |
|
@@ -56,18 +70,67 @@ class EndpointHandler():
|
|
56 |
|
57 |
# huggingface download motion module from bluestarburst/AnimateDiff-SceneFusion/models/Motion_Module/mm_sd_v15.ckpt
|
58 |
|
59 |
-
motion_module = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename="models/Motion_Module/mm_sd_v15.ckpt")
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
def __call__(self, data : Any):
|
73 |
"""
|
@@ -76,19 +139,33 @@ class EndpointHandler():
|
|
76 |
"""
|
77 |
|
78 |
prompt = data.pop("prompt", "")
|
79 |
-
negative_prompt = data.pop("negative_prompt", "
|
|
|
80 |
steps = data.pop("steps", 25)
|
81 |
guidance_scale = data.pop("guidance_scale", 12.5)
|
82 |
|
|
|
|
|
83 |
vids = self.pipeline(
|
84 |
-
prompt
|
85 |
-
negative_prompt=negative_prompt,
|
86 |
-
num_inference_steps=steps,
|
87 |
-
guidance_scale=guidance_scale,
|
88 |
-
width= 256,
|
89 |
-
height= 256,
|
90 |
-
video_length= 5,
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
videos = rearrange(vids, "b c t h w -> t b c h w")
|
94 |
n_rows=6
|
|
|
9 |
import json
|
10 |
import base64
|
11 |
|
12 |
+
from safetensors import safe_open
|
13 |
+
|
14 |
from diffusers.utils.import_utils import is_xformers_available
|
15 |
from typing import Any
|
16 |
import torch
|
|
|
23 |
from animatediff.pipelines.pipeline_animation import AnimationPipeline
|
24 |
from animatediff.utils.util import save_videos_grid
|
25 |
from animatediff.utils.util import load_weights
|
26 |
+
from animatediff.utils.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
|
27 |
+
from animatediff.utils.convert_lora_safetensor_to_diffusers import convert_lora
|
28 |
|
29 |
+
current_model = "backup"
|
30 |
|
31 |
class EndpointHandler():
|
32 |
def __init__(self, model_path: str = "bluestarburst/AnimateDiff-SceneFusion"):
|
|
|
51 |
|
52 |
unet = UNet3DConditionModel.from_pretrained_2d(pretrained_model_path=unet_model_path, unet_additional_kwargs=OmegaConf.to_container(inference_config.unet_additional_kwargs), config_path=unet_config_path)
|
53 |
|
54 |
+
# inv_latent_path = f"{OUTPUT_DIR}/inv_latents/ddim_latent-1.pt"
|
55 |
+
inv_latent_path = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename=f"models/Motion_Module/{current_model}/inv_latents/ddim_latent-1.pt")
|
56 |
+
self.latents = torch.load(inv_latent_path).to(torch.float)
|
57 |
+
print(self.latents.shape, self.latents.dtype)
|
58 |
+
|
59 |
+
# torch.backends.cuda.enable_mem_efficient_sdp(True)
|
60 |
+
torch.backends.cuda.enable_flash_sdp(True)
|
61 |
+
torch.backends.cuda.enable_math_sdp(True)
|
62 |
+
|
63 |
if is_xformers_available(): unet.enable_xformers_memory_efficient_attention()
|
64 |
else: assert False
|
65 |
|
|
|
70 |
|
71 |
# huggingface download motion module from bluestarburst/AnimateDiff-SceneFusion/models/Motion_Module/mm_sd_v15.ckpt
|
72 |
|
73 |
+
# motion_module = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename="models/Motion_Module/mm_sd_v15.ckpt")
|
74 |
+
motion_module = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename=f"models/Motion_Module/{current_model}/mm.pth")
|
75 |
+
# LORA_DREAMBOOTH_PATH="models/DreamBooth_LoRA/toonyou_beta3.safetensors"
|
76 |
+
|
77 |
+
LORA_DREAMBOOTH_PATH = None
|
78 |
+
LORA_DREAMBOOTH_PATH = hf_hub_download(repo_id="bluestarburst/AnimateDiff-SceneFusion", filename="models/DreamBooth_LoRA/toonyou_beta3.safetensors")
|
79 |
+
|
80 |
+
# self.pipeline = load_weights(
|
81 |
+
# self.pipeline,
|
82 |
+
# # motion module
|
83 |
+
# motion_module_path = motion_module,
|
84 |
+
# motion_module_lora_configs = [],
|
85 |
+
# # image layers
|
86 |
+
# dreambooth_model_path = "",
|
87 |
+
# lora_model_path = "",
|
88 |
+
# lora_alpha = 0.8,
|
89 |
+
# ).to("cuda")
|
90 |
+
|
91 |
+
motion_module_state_dict = torch.load(motion_module, map_location="cpu")
|
92 |
+
missing, unexpected = self.pipeline.unet.load_state_dict(motion_module_state_dict, strict=False)
|
93 |
+
assert len(unexpected) == 0
|
94 |
+
|
95 |
+
|
96 |
+
# FIX THIS
|
97 |
+
if LORA_DREAMBOOTH_PATH != "":
|
98 |
+
if LORA_DREAMBOOTH_PATH.endswith(".ckpt"):
|
99 |
+
state_dict = torch.load(LORA_DREAMBOOTH_PATH)
|
100 |
+
self.pipeline.unet.load_state_dict(state_dict)
|
101 |
+
|
102 |
+
elif LORA_DREAMBOOTH_PATH.endswith(".safetensors"):
|
103 |
+
state_dict = {}
|
104 |
+
with safe_open(LORA_DREAMBOOTH_PATH, framework="pt", device="cpu") as f:
|
105 |
+
for key in f.keys():
|
106 |
+
state_dict[key] = f.get_tensor(key)
|
107 |
+
|
108 |
+
is_lora = all("lora" in k for k in state_dict.keys())
|
109 |
+
if not is_lora:
|
110 |
+
base_state_dict = state_dict
|
111 |
+
else:
|
112 |
+
base_state_dict = {}
|
113 |
+
with safe_open("", framework="pt", device="cpu") as f:
|
114 |
+
for key in f.keys():
|
115 |
+
base_state_dict[key] = f.get_tensor(key)
|
116 |
+
|
117 |
+
# vae
|
118 |
+
converted_vae_checkpoint = convert_ldm_vae_checkpoint(base_state_dict, self.pipeline.vae.config)
|
119 |
+
self.pipeline.vae.load_state_dict(converted_vae_checkpoint)
|
120 |
+
# unet
|
121 |
+
converted_unet_checkpoint = convert_ldm_unet_checkpoint(base_state_dict, self.pipeline.unet.config)
|
122 |
+
self.pipeline.unet.load_state_dict(converted_unet_checkpoint, strict=False)
|
123 |
+
# text_model (TODO: problem here)
|
124 |
+
# converted_test_encoder_checkpoint = convert_ldm_clip_checkpoint(base_state_dict)
|
125 |
+
# pipeline.text_encoder = converted_test_encoder_checkpoint
|
126 |
+
|
127 |
+
# import pdb
|
128 |
+
# pdb.set_trace()
|
129 |
+
if is_lora:
|
130 |
+
self.pipeline = convert_lora(self.pipeline, state_dict)
|
131 |
+
# self.pipeline = convert_lora(self.pipeline, state_dict, alpha=model_config.lora_alpha)
|
132 |
+
|
133 |
+
self.pipeline.to("cuda")
|
134 |
|
135 |
def __call__(self, data : Any):
|
136 |
"""
|
|
|
139 |
"""
|
140 |
|
141 |
prompt = data.pop("prompt", "")
|
142 |
+
negative_prompt = data.pop("negative_prompt", "")
|
143 |
+
negative_prompt += ",easynegative,bad_construction,bad_structure,bad_wail,bad_windows,blurry,cloned_window,cropped,deformed,disfigured,error,extra_windows,extra_chimney,extra_door,extra_structure,extra_frame,fewer_digits,fused_structure,gross_proportions,jpeg_artifacts,long_roof,low_quality,structure_limbs,missing_windows,missing_doors,missing_roofs,mutated_structure,mutation,normal_quality,out_of_frame,owres,poorly_drawn_structure,poorly_drawn_house,signature,text,too_many_windows,ugly,username,uta,watermark,worst_quality"
|
144 |
steps = data.pop("steps", 25)
|
145 |
guidance_scale = data.pop("guidance_scale", 12.5)
|
146 |
|
147 |
+
print(f"current seed: {torch.initial_seed()}")
|
148 |
+
print(f"sampling {prompt} ...")
|
149 |
vids = self.pipeline(
|
150 |
+
prompt,
|
151 |
+
negative_prompt = negative_prompt,
|
152 |
+
num_inference_steps = steps,
|
153 |
+
guidance_scale = guidance_scale,
|
154 |
+
width = 256,
|
155 |
+
height = 256,
|
156 |
+
video_length = 5,
|
157 |
+
latents = self.latents,
|
158 |
+
).videos
|
159 |
+
|
160 |
+
# vids = self.pipeline(
|
161 |
+
# prompt=prompt,
|
162 |
+
# negative_prompt=negative_prompt,
|
163 |
+
# num_inference_steps=steps,
|
164 |
+
# guidance_scale=guidance_scale,
|
165 |
+
# width= 256,
|
166 |
+
# height= 256,
|
167 |
+
# video_length= 5,
|
168 |
+
# ).videos
|
169 |
|
170 |
videos = rearrange(vids, "b c t h w -> t b c h w")
|
171 |
n_rows=6
|
models/Motion_Module/test/config.yaml
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
pretrained_model_path:
|
2 |
-
output_dir:
|
3 |
train_data:
|
4 |
video_folder: data/output
|
5 |
csv_path: data/output.csv
|
@@ -23,7 +23,7 @@ train_whole_module: false
|
|
23 |
trainable_modules:
|
24 |
- to_q
|
25 |
train_batch_size: 1
|
26 |
-
max_train_steps:
|
27 |
learning_rate: 0.0003
|
28 |
scale_lr: false
|
29 |
lr_scheduler: constant
|
@@ -42,7 +42,7 @@ mixed_precision: fp16
|
|
42 |
use_8bit_adam: false
|
43 |
enable_xformers_memory_efficient_attention: true
|
44 |
seed: 33
|
45 |
-
motion_module:
|
46 |
inference_config_path: configs/inference/inference-v3.yaml
|
47 |
motion_module_pe_multiplier: 1
|
48 |
dataset_class: MultiTuneAVideoDataset
|
|
|
1 |
+
pretrained_model_path: models/StableDiffusion/
|
2 |
+
output_dir: models/Motion_Module/test
|
3 |
train_data:
|
4 |
video_folder: data/output
|
5 |
csv_path: data/output.csv
|
|
|
23 |
trainable_modules:
|
24 |
- to_q
|
25 |
train_batch_size: 1
|
26 |
+
max_train_steps: 1
|
27 |
learning_rate: 0.0003
|
28 |
scale_lr: false
|
29 |
lr_scheduler: constant
|
|
|
42 |
use_8bit_adam: false
|
43 |
enable_xformers_memory_efficient_attention: true
|
44 |
seed: 33
|
45 |
+
motion_module: models/Motion_Module/mm_sd_v15.ckpt
|
46 |
inference_config_path: configs/inference/inference-v3.yaml
|
47 |
motion_module_pe_multiplier: 1
|
48 |
dataset_class: MultiTuneAVideoDataset
|