JasonGilholme
commited on
Commit
•
0feb70d
1
Parent(s):
a372e74
temporalvideo usage changes
Browse filesHi,
This is an updated version of the temporalvideo.py file that does the following things:
* Add argparser for easier basic usage
* Add dynamic lookup of controlnet models in case the names or commit ids of available models are different
* Ensure all image inputs are scaled to the same resolution - the optical flow maps were hard coded to 512x512 and would produce images that didn't line up with non square input images.
* Write out controlnet images to help debug resolution related errors. It's also interesting to see the output of the preprocessors for hed and openpose.
Hopefully you find this helpful. Thanks so much for your work on this topic, it's pretty awesome!
Jase
- temporalvideo.py +98 -35
temporalvideo.py
CHANGED
@@ -4,6 +4,7 @@ import requests
|
|
4 |
import json
|
5 |
import cv2
|
6 |
import numpy as np
|
|
|
7 |
import sys
|
8 |
import torch
|
9 |
from PIL import Image
|
@@ -20,6 +21,27 @@ import cv2
|
|
20 |
from torchvision.io import write_jpeg
|
21 |
import pickle
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
25 |
|
@@ -27,11 +49,9 @@ model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device
|
|
27 |
model = model.eval()
|
28 |
|
29 |
# Replace with the actual path to your image file and folder
|
30 |
-
x_path = "./init.png"
|
31 |
-
y_folder = "./Input_Images"
|
32 |
|
33 |
-
|
34 |
-
|
35 |
|
36 |
def get_image_paths(folder):
|
37 |
image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
|
@@ -40,7 +60,46 @@ def get_image_paths(folder):
|
|
40 |
files.extend(glob.glob(os.path.join(folder, ext)))
|
41 |
return sorted(files)
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def send_request(last_image_path, optical_flow_path,current_image_path):
|
46 |
url = "http://localhost:7860/sdapi/v1/img2img"
|
@@ -51,7 +110,6 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
|
|
51 |
# Load and process the last image
|
52 |
last_image = cv2.imread(last_image_path)
|
53 |
last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
|
54 |
-
last_image = cv2.resize(last_image, (512, 512))
|
55 |
|
56 |
# Load and process the optical flow image
|
57 |
flow_image = cv2.imread(optical_flow_path)
|
@@ -79,31 +137,39 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
|
|
79 |
"inpainting_mask_invert": 1,
|
80 |
"resize_mode": 0,
|
81 |
"denoising_strength": 0.4,
|
82 |
-
"prompt":
|
83 |
-
"negative_prompt":
|
84 |
"alwayson_scripts": {
|
85 |
"ControlNet":{
|
86 |
"args": [
|
87 |
{
|
88 |
"input_image": current_image,
|
89 |
"module": "hed",
|
90 |
-
"model":
|
91 |
"weight": 0.7,
|
92 |
"guidance": 1,
|
|
|
|
|
93 |
},
|
94 |
{
|
95 |
"input_image": encoded_image,
|
96 |
-
"model":
|
97 |
"module": "none",
|
98 |
"weight": 0.6,
|
99 |
"guidance": 1,
|
|
|
|
|
|
|
|
|
100 |
},
|
101 |
{
|
102 |
"input_image": current_image,
|
103 |
-
"model":
|
104 |
"module": "openpose_full",
|
105 |
"weight": 0.7,
|
106 |
-
"guidance":1,
|
|
|
|
|
107 |
}
|
108 |
|
109 |
|
@@ -118,8 +184,8 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
|
|
118 |
"n_iter": 1,
|
119 |
"steps": 20,
|
120 |
"cfg_scale": 6,
|
121 |
-
"width":
|
122 |
-
"height":
|
123 |
"restore_faces": True,
|
124 |
"include_init_images": True,
|
125 |
"override_settings": {},
|
@@ -164,25 +230,18 @@ def infer(frameA, frameB):
|
|
164 |
img2_batch = F.resize(img2_batch, size=[512, 512])
|
165 |
return transforms(img1_batch, img2_batch)
|
166 |
|
167 |
-
|
168 |
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
|
169 |
|
170 |
-
|
171 |
list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
|
172 |
|
173 |
-
|
174 |
-
|
175 |
|
176 |
-
|
|
|
177 |
|
178 |
-
|
179 |
|
180 |
-
predicted_flow = list_of_flows[-1][0]
|
181 |
-
opitcal_flow_path = os.path.join(output_folder, f"flow_{i}.png")
|
182 |
-
flow_img = flow_to_image(predicted_flow).to("cpu")
|
183 |
-
write_jpeg(flow_img,opitcal_flow_path)
|
184 |
-
|
185 |
-
|
186 |
return opitcal_flow_path
|
187 |
|
188 |
output_images = []
|
@@ -190,13 +249,13 @@ output_paths = []
|
|
190 |
|
191 |
# Initialize with the first image path
|
192 |
|
193 |
-
result =
|
194 |
-
output_image_path = os.path.join(
|
195 |
|
196 |
#with open(output_image_path, "wb") as f:
|
197 |
# f.write(result)
|
198 |
|
199 |
-
last_image_path =
|
200 |
for i in range(1, len(y_paths)):
|
201 |
# Use the last image path and optical flow map to generate the next input
|
202 |
optical_flow = infer(y_paths[i - 1], y_paths[i])
|
@@ -204,10 +263,14 @@ for i in range(1, len(y_paths)):
|
|
204 |
# Modify your send_request to use the last_image_path
|
205 |
result = send_request(last_image_path, optical_flow, y_paths[i])
|
206 |
data = json.loads(result)
|
207 |
-
encoded_image = data["images"][0]
|
208 |
-
output_image_path = os.path.join(output_folder, f"output_image_{i}.png")
|
209 |
-
last_image_path = output_image_path
|
210 |
-
with open(output_image_path, "wb") as f:
|
211 |
-
f.write(base64.b64decode(encoded_image))
|
212 |
-
print(f"Written data for frame {i}:")
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import json
|
5 |
import cv2
|
6 |
import numpy as np
|
7 |
+
import re
|
8 |
import sys
|
9 |
import torch
|
10 |
from PIL import Image
|
|
|
21 |
from torchvision.io import write_jpeg
|
22 |
import pickle
|
23 |
|
24 |
+
import argparse
|
25 |
+
|
26 |
+
|
27 |
+
def get_args():
|
28 |
+
parser = argparse.ArgumentParser()
|
29 |
+
|
30 |
+
parser.add_argument('prompt')
|
31 |
+
parser.add_argument('--negative-prompt', dest='negative_prompt', default="")
|
32 |
+
|
33 |
+
parser.add_argument('--init-image', dest='init_image', default="./init.png")
|
34 |
+
parser.add_argument('--input-dir', dest='input_dir', default="./Input_Images")
|
35 |
+
parser.add_argument('--output-dir', dest='output_dir', default="./output")
|
36 |
+
|
37 |
+
parser.add_argument('--width', default=512, type=int)
|
38 |
+
parser.add_argument('--height', default=512, type=int)
|
39 |
+
|
40 |
+
return parser.parse_args()
|
41 |
+
|
42 |
+
|
43 |
+
args = get_args()
|
44 |
+
|
45 |
|
46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
|
|
|
49 |
model = model.eval()
|
50 |
|
51 |
# Replace with the actual path to your image file and folder
|
|
|
|
|
52 |
|
53 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
54 |
+
|
55 |
|
56 |
def get_image_paths(folder):
|
57 |
image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
|
|
|
60 |
files.extend(glob.glob(os.path.join(folder, ext)))
|
61 |
return sorted(files)
|
62 |
|
63 |
+
|
64 |
+
y_paths = get_image_paths(args.input_dir)
|
65 |
+
|
66 |
+
|
67 |
+
def get_controlnet_models():
|
68 |
+
url = "http://localhost:7860/controlnet/model_list"
|
69 |
+
|
70 |
+
temporalnet_model = None
|
71 |
+
temporalnet_re = re.compile("^temporalnetversion2 \[.{8}\]")
|
72 |
+
|
73 |
+
hed_model = None
|
74 |
+
hed_re = re.compile("^control_.*hed.* \[.{8}\]")
|
75 |
+
|
76 |
+
openpose_model = None
|
77 |
+
openpose_re = re.compile("^control_.*openpose.* \[.{8}\]")
|
78 |
+
|
79 |
+
response = requests.get(url)
|
80 |
+
if response.status_code == 200:
|
81 |
+
models = json.loads(response.content)
|
82 |
+
else:
|
83 |
+
raise Exception("Unable to list models from the SD Web API! "
|
84 |
+
"Is it running and is the controlnet extension installed?")
|
85 |
+
|
86 |
+
for model in models['model_list']:
|
87 |
+
if temporalnet_model is None and temporalnet_re.match(model):
|
88 |
+
temporalnet_model = model
|
89 |
+
elif hed_model is None and hed_re.match(model):
|
90 |
+
hed_model = model
|
91 |
+
elif openpose_model is None and openpose_re.match(model):
|
92 |
+
openpose_model = model
|
93 |
+
|
94 |
+
assert temporalnet_model is not None, "Unable to find the temporalnet2 model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
|
95 |
+
assert hed_model is not None, "Unable to find the hed_model model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
|
96 |
+
assert openpose_model is not None, "Unable to find the openpose model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
|
97 |
+
|
98 |
+
return temporalnet_model, hed_model, openpose_model
|
99 |
+
|
100 |
+
|
101 |
+
TEMPORALNET_MODEL, HED_MODEL, OPENPOSE_MODEL = get_controlnet_models()
|
102 |
+
|
103 |
|
104 |
def send_request(last_image_path, optical_flow_path,current_image_path):
|
105 |
url = "http://localhost:7860/sdapi/v1/img2img"
|
|
|
110 |
# Load and process the last image
|
111 |
last_image = cv2.imread(last_image_path)
|
112 |
last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
|
|
|
113 |
|
114 |
# Load and process the optical flow image
|
115 |
flow_image = cv2.imread(optical_flow_path)
|
|
|
137 |
"inpainting_mask_invert": 1,
|
138 |
"resize_mode": 0,
|
139 |
"denoising_strength": 0.4,
|
140 |
+
"prompt": args.prompt,
|
141 |
+
"negative_prompt": args.negative_prompt,
|
142 |
"alwayson_scripts": {
|
143 |
"ControlNet":{
|
144 |
"args": [
|
145 |
{
|
146 |
"input_image": current_image,
|
147 |
"module": "hed",
|
148 |
+
"model": HED_MODEL,
|
149 |
"weight": 0.7,
|
150 |
"guidance": 1,
|
151 |
+
"pixel_perfect": True,
|
152 |
+
"resize_mode": 0,
|
153 |
},
|
154 |
{
|
155 |
"input_image": encoded_image,
|
156 |
+
"model": TEMPORALNET_MODEL,
|
157 |
"module": "none",
|
158 |
"weight": 0.6,
|
159 |
"guidance": 1,
|
160 |
+
# "processor_res": 512,
|
161 |
+
"threshold_a": 64,
|
162 |
+
"threshold_b": 64,
|
163 |
+
"resize_mode": 0,
|
164 |
},
|
165 |
{
|
166 |
"input_image": current_image,
|
167 |
+
"model": OPENPOSE_MODEL,
|
168 |
"module": "openpose_full",
|
169 |
"weight": 0.7,
|
170 |
+
"guidance": 1,
|
171 |
+
"pixel_perfect": True,
|
172 |
+
"resize_mode": 0,
|
173 |
}
|
174 |
|
175 |
|
|
|
184 |
"n_iter": 1,
|
185 |
"steps": 20,
|
186 |
"cfg_scale": 6,
|
187 |
+
"width": args.width,
|
188 |
+
"height": args.height,
|
189 |
"restore_faces": True,
|
190 |
"include_init_images": True,
|
191 |
"override_settings": {},
|
|
|
230 |
img2_batch = F.resize(img2_batch, size=[512, 512])
|
231 |
return transforms(img1_batch, img2_batch)
|
232 |
|
|
|
233 |
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
|
234 |
|
|
|
235 |
list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
|
236 |
|
237 |
+
predicted_flow = list_of_flows[-1][0]
|
238 |
+
opitcal_flow_path = os.path.join(args.output_dir, f"flow_{i}.png")
|
239 |
|
240 |
+
flow_img = flow_to_image(predicted_flow).to("cpu")
|
241 |
+
flow_img = F.resize(flow_img, size=[args.height, args.width])
|
242 |
|
243 |
+
write_jpeg(flow_img, opitcal_flow_path)
|
244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
return opitcal_flow_path
|
246 |
|
247 |
output_images = []
|
|
|
249 |
|
250 |
# Initialize with the first image path
|
251 |
|
252 |
+
result = args.init_image
|
253 |
+
output_image_path = os.path.join(args.output_dir, f"output_image_0.png")
|
254 |
|
255 |
#with open(output_image_path, "wb") as f:
|
256 |
# f.write(result)
|
257 |
|
258 |
+
last_image_path = args.init_image
|
259 |
for i in range(1, len(y_paths)):
|
260 |
# Use the last image path and optical flow map to generate the next input
|
261 |
optical_flow = infer(y_paths[i - 1], y_paths[i])
|
|
|
263 |
# Modify your send_request to use the last_image_path
|
264 |
result = send_request(last_image_path, optical_flow, y_paths[i])
|
265 |
data = json.loads(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
+
for j, encoded_image in enumerate(data["images"]):
|
268 |
+
if j == 0:
|
269 |
+
output_image_path = os.path.join(args.output_dir, f"output_image_{i}.png")
|
270 |
+
last_image_path = output_image_path
|
271 |
+
else:
|
272 |
+
output_image_path = os.path.join(args.output_dir, f"controlnet_image_{j}_{i}.png")
|
273 |
+
|
274 |
+
with open(output_image_path, "wb") as f:
|
275 |
+
f.write(base64.b64decode(encoded_image))
|
276 |
+
print(f"Written data for frame {i}:")
|