Spaces:

CharlieAmalet
/

SVD-XT-1.1

Runtime error

App Files Files Community

CharlieAmalet commited on Apr 4

Commit

321b7b2

•

1 Parent(s): 18365d1

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -20

app.py CHANGED Viewed

@@ -18,22 +18,39 @@ import random
 from huggingface_hub import login, hf_hub_download
 import spaces
-pipe = StableVideoDiffusionPipeline.from_pretrained(
-    # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
-    "vdo/stable-video-diffusion-img2vid-xt-1-1",
-    torch_dtype=torch.float16,
-    variant="fp16"
-)
-# pipe.save_pretrained("./model", variant="fp16")
 pipe.to("cuda")
-pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 #pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
 max_64_bit_int = 2**63 - 1
-@spaces.GPU(enable_queue=True)
 def generate_video(
     image: Image,
     seed: int,
@@ -45,6 +62,7 @@ def generate_video(
     device: str = "cuda",
     output_folder: str = "outputs",
 ):
     # note julian: normally we should resize input images, but normally they are already in 1024x576, so..
     # also, I would like to experiment with vertical videos, and 1024x512 videos
@@ -59,6 +77,7 @@ def generate_video(
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
     frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     torch.manual_seed(seed)
@@ -110,22 +129,14 @@ def resize_image(image, output_size=(1024, 576)):
 css = """
-img {
-    max-height: 400px;
-    object-fit: contain;
-}
-#img-display-input {
     max-height: 400px;
     object-fit: contain;
 }
-#vid-display-output {
-    max-height: 60vh;
-}
 """
 with gr.Blocks(css=css) as demo:
-    image = gr.Image(label="Upload your image", type="pil", elem_id="img-display-output")
     generate_btn = gr.Button("Generate")
     base64_out = gr.Textbox(label="Base64 Video")
     seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)

 from huggingface_hub import login, hf_hub_download
 import spaces
+# pipe = StableVideoDiffusionPipeline.from_pretrained(
+#     # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
+#     "vdo/stable-video-diffusion-img2vid-xt-1-1",
+#     torch_dtype=torch.float16,
+#     variant="fp16"
+# )
+# pipe.save_pretrained("./checkpoints", variant="fp16")
+model_directory = './checkpoints'
+if not os.path.exists(model_directory):
+    pipe = StableVideoDiffusionPipeline.from_pretrained(
+        # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
+        "vdo/stable-video-diffusion-img2vid-xt-1-1",
+        torch_dtype=torch.float16,
+        variant="fp16"
+    )
+    pipe.save_pretrained("./checkpoints", variant="fp16")
+else:
+    pipe = StableVideoDiffusionPipeline.from_pretrained(
+        model_directory,
+        torch_dtype=torch.float16,
+        variant="fp16"
+    )
 pipe.to("cuda")
+# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 #pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
 max_64_bit_int = 2**63 - 1
+@spaces.GPU
 def generate_video(
     image: Image,
     seed: int,
     device: str = "cuda",
     output_folder: str = "outputs",
 ):
+    global pipe
     # note julian: normally we should resize input images, but normally they are already in 1024x576, so..
     # also, I would like to experiment with vertical videos, and 1024x512 videos
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
+    # pipe.to("cuda")
     frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     torch.manual_seed(seed)
 css = """
+img, video {
     max-height: 400px;
     object-fit: contain;
 }
 """
 with gr.Blocks(css=css) as demo:
+    image = gr.Image(label="Upload your image", type="pil")
     generate_btn = gr.Button("Generate")
     base64_out = gr.Textbox(label="Base64 Video")
     seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)