Update animatediff/pipelines/pipeline_animation.py
Browse files
animatediff/pipelines/pipeline_animation.py
CHANGED
@@ -317,25 +317,28 @@ class AnimationPipeline(DiffusionPipeline):
|
|
317 |
rand_device = "cpu" if device.type == "mps" else device
|
318 |
|
319 |
if isinstance(generator, list):
|
320 |
-
|
321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
-
# If init_latents is not None, copy the values for each video frame
|
324 |
-
if init_latents is not None:
|
325 |
-
for i in range(video_length):
|
326 |
-
init_alpha = (video_length - float(i)) / video_length / 30
|
327 |
-
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
328 |
|
329 |
-
latents = latents.to(device)
|
330 |
else:
|
331 |
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
|
332 |
-
|
333 |
-
# If init_latents is not None, repeat it for the entire batch
|
334 |
if init_latents is not None:
|
335 |
-
|
336 |
for i in range(video_length):
|
337 |
-
|
338 |
-
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
else:
|
341 |
if latents.shape != shape:
|
|
|
317 |
rand_device = "cpu" if device.type == "mps" else device
|
318 |
|
319 |
if isinstance(generator, list):
|
320 |
+
shape = shape
|
321 |
+
# shape = (1,) + shape[1:]
|
322 |
+
# ignore init latents for batch model
|
323 |
+
latents = [
|
324 |
+
torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
|
325 |
+
for i in range(batch_size)
|
326 |
+
]
|
327 |
+
latents = torch.cat(latents, dim=0).to(device)
|
328 |
|
|
|
|
|
|
|
|
|
|
|
329 |
|
|
|
330 |
else:
|
331 |
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
|
|
|
|
|
332 |
if init_latents is not None:
|
333 |
+
|
334 |
for i in range(video_length):
|
335 |
+
# I just feel dividing by 30 yield stable result but I don't know why
|
336 |
+
# gradully reduce init alpha along video frames (loosen restriction)
|
337 |
+
init_alpha = (video_length - float(i)) / video_length / 30
|
338 |
+
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
339 |
+
|
340 |
+
|
341 |
+
|
342 |
|
343 |
else:
|
344 |
if latents.shape != shape:
|