CharlieAmalet commited on
Commit
321b7b2
1 Parent(s): 18365d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -20
app.py CHANGED
@@ -18,22 +18,39 @@ import random
18
  from huggingface_hub import login, hf_hub_download
19
  import spaces
20
 
21
- pipe = StableVideoDiffusionPipeline.from_pretrained(
22
- # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
23
- "vdo/stable-video-diffusion-img2vid-xt-1-1",
24
- torch_dtype=torch.float16,
25
- variant="fp16"
26
- )
27
-
28
- # pipe.save_pretrained("./model", variant="fp16")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  pipe.to("cuda")
31
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
32
  #pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
33
 
34
  max_64_bit_int = 2**63 - 1
35
 
36
- @spaces.GPU(enable_queue=True)
37
  def generate_video(
38
  image: Image,
39
  seed: int,
@@ -45,6 +62,7 @@ def generate_video(
45
  device: str = "cuda",
46
  output_folder: str = "outputs",
47
  ):
 
48
  # note julian: normally we should resize input images, but normally they are already in 1024x576, so..
49
 
50
  # also, I would like to experiment with vertical videos, and 1024x512 videos
@@ -59,6 +77,7 @@ def generate_video(
59
  base_count = len(glob(os.path.join(output_folder, "*.mp4")))
60
  video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
61
 
 
62
  frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
63
  export_to_video(frames, video_path, fps=fps_id)
64
  torch.manual_seed(seed)
@@ -110,22 +129,14 @@ def resize_image(image, output_size=(1024, 576)):
110
 
111
  css = """
112
 
113
- img {
114
- max-height: 400px;
115
- object-fit: contain;
116
- }
117
-
118
- #img-display-input {
119
  max-height: 400px;
120
  object-fit: contain;
121
  }
122
- #vid-display-output {
123
- max-height: 60vh;
124
- }
125
  """
126
 
127
  with gr.Blocks(css=css) as demo:
128
- image = gr.Image(label="Upload your image", type="pil", elem_id="img-display-output")
129
  generate_btn = gr.Button("Generate")
130
  base64_out = gr.Textbox(label="Base64 Video")
131
  seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
 
18
  from huggingface_hub import login, hf_hub_download
19
  import spaces
20
 
21
+ # pipe = StableVideoDiffusionPipeline.from_pretrained(
22
+ # # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
23
+ # "vdo/stable-video-diffusion-img2vid-xt-1-1",
24
+ # torch_dtype=torch.float16,
25
+ # variant="fp16"
26
+ # )
27
+
28
+ # pipe.save_pretrained("./checkpoints", variant="fp16")
29
+
30
+ model_directory = './checkpoints'
31
+
32
+ if not os.path.exists(model_directory):
33
+ pipe = StableVideoDiffusionPipeline.from_pretrained(
34
+ # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
35
+ "vdo/stable-video-diffusion-img2vid-xt-1-1",
36
+ torch_dtype=torch.float16,
37
+ variant="fp16"
38
+ )
39
+ pipe.save_pretrained("./checkpoints", variant="fp16")
40
+ else:
41
+ pipe = StableVideoDiffusionPipeline.from_pretrained(
42
+ model_directory,
43
+ torch_dtype=torch.float16,
44
+ variant="fp16"
45
+ )
46
 
47
  pipe.to("cuda")
48
+ # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
49
  #pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
50
 
51
  max_64_bit_int = 2**63 - 1
52
 
53
+ @spaces.GPU
54
  def generate_video(
55
  image: Image,
56
  seed: int,
 
62
  device: str = "cuda",
63
  output_folder: str = "outputs",
64
  ):
65
+ global pipe
66
  # note julian: normally we should resize input images, but normally they are already in 1024x576, so..
67
 
68
  # also, I would like to experiment with vertical videos, and 1024x512 videos
 
77
  base_count = len(glob(os.path.join(output_folder, "*.mp4")))
78
  video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
79
 
80
+ # pipe.to("cuda")
81
  frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
82
  export_to_video(frames, video_path, fps=fps_id)
83
  torch.manual_seed(seed)
 
129
 
130
  css = """
131
 
132
+ img, video {
 
 
 
 
 
133
  max-height: 400px;
134
  object-fit: contain;
135
  }
 
 
 
136
  """
137
 
138
  with gr.Blocks(css=css) as demo:
139
+ image = gr.Image(label="Upload your image", type="pil")
140
  generate_btn = gr.Button("Generate")
141
  base64_out = gr.Textbox(label="Base64 Video")
142
  seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)