allegro-text2video

Runtime error

App Files Files Community

AI-Anchorite commited on 23 days ago

Commit

abe2421

•

1 Parent(s): c178cab

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +26 -151

gradio_app.py CHANGED Viewed

@@ -4,21 +4,27 @@ import os
 import gradio as gr
 import subprocess
 from subprocess import getoutput
 from diffusers.schedulers import EulerAncestralDiscreteScheduler
 from transformers import T5EncoderModel, T5Tokenizer
 from allegro.pipelines.pipeline_allegro import AllegroPipeline
 from allegro.models.vae.vae_allegro import AllegroAutoencoderKL3D
 from allegro.models.transformers.transformer_3d_allegro import AllegroTransformer3DModel
 from huggingface_hub import snapshot_download
 weights_dir = './allegro_weights'
 os.makedirs(weights_dir, exist_ok=True)
-is_shared_ui = True if "fffiloni/allegro-text2video" in os.environ['SPACE_ID'] else False
 is_gpu_associated = torch.cuda.is_available()
-if not is_shared_ui:
     snapshot_download(
         repo_id='rhymes-ai/Allegro',
         allow_patterns=[
@@ -31,11 +37,8 @@ if not is_shared_ui:
         local_dir=weights_dir,
     )
-if is_gpu_associated:
-    gpu_info = getoutput('nvidia-smi')
 def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload):
-    dtype = torch.bfloat16
     # Load models
     vae = AllegroAutoencoderKL3D.from_pretrained(
@@ -80,6 +83,9 @@ def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps,
     if enable_cpu_offload:
         allegro_pipeline.enable_sequential_cpu_offload()
     out_video = allegro_pipeline(
         user_prompt,
         negative_prompt=negative_prompt,
@@ -105,152 +111,22 @@ def run_inference(user_prompt, guidance_scale, num_sampling_steps, seed, enable_
     result_path = single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload)
     return result_path
-css="""
-div#col-container{
-    margin: 0 auto;
-    max-width: 800px;
-}
-div#warning-ready {
-    background-color: #ecfdf5;
-    padding: 0 16px 16px;
-    margin: 20px 0;
-    color: #030303!important;
-}
-div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
-    color: #057857!important;
-}
-div#warning-duplicate {
-    background-color: #ebf5ff;
-    padding: 0 16px 16px;
-    margin: 20px 0;
-    color: #030303!important;
-}
-div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
-    color: #0f4592!important;
-}
-div#warning-duplicate strong {
-    color: #0f4592;
-}
-p.actions {
-    display: flex;
-    align-items: center;
-    margin: 20px 0;
-}
-div#warning-duplicate .actions a {
-    display: inline-block;
-    margin-right: 10px;
-}
-div#warning-setgpu {
-    background-color: #fff4eb;
-    padding: 0 16px 16px;
-    margin: 20px 0;
-    color: #030303!important;
-}
-div#warning-setgpu > .gr-prose > h2, div#warning-setgpu > .gr-prose > p {
-    color: #92220f!important;
-}
-div#warning-setgpu a, div#warning-setgpu b {
-    color: #91230f;
-}
-div#warning-setgpu p.actions > a {
-    display: inline-block;
-    background: #1f1f23;
-    border-radius: 40px;
-    padding: 6px 24px;
-    color: antiquewhite;
-    text-decoration: none;
-    font-weight: 600;
-    font-size: 1.2em;
-}
-div#warning-setsleeptime {
-    background-color: #fff4eb;
-    padding: 10px 10px;
-    margin: 0!important;
-    color: #030303!important;
-}
-.custom-color {
-    color: #030303 !important;
-}
-"""
 # Create Gradio interface
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
         gr.Markdown("# Allegro Video Generation")
         gr.Markdown("Generate a video based on a text prompt using the Allegro pipeline.")
-        gr.HTML("""
-        <div style="display:flex;column-gap:4px;">
-            <a href='https://huggingface.co/rhymes-ai/Allegro'>
-                <img src='https://img.shields.io/badge/HuggingFace-Model-orange'>
-            </a>
-            <a href='https://github.com/rhymes-ai/Allegro/tree/main'>
-                <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
-            </a>
-            <a href='https://arxiv.org/abs/2410.15458'>
-                <img src='https://img.shields.io/badge/ArXivPaper-red'>
-            </a>
-        </div>
-        """)
-        user_prompt=gr.Textbox(label="User Prompt")
         with gr.Row():
-            guidance_scale=gr.Slider(minimum=0, maximum=20, step=0.1, label="Guidance Scale", value=7.5)
-            num_sampling_steps=gr.Slider(minimum=10, maximum=100, step=1, label="Number of Sampling Steps", value=20)
         with gr.Row():
-            seed=gr.Slider(minimum=0, maximum=10000, step=1, label="Random Seed", value=42)
-            enable_cpu_offload=gr.Checkbox(label="Enable CPU Offload", value=False, scale=1)
-        if is_shared_ui:
-            top_description = gr.HTML(f'''
-                <div class="gr-prose">
-                    <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
-                    Attention: this Space need to be duplicated to work</h2>
-                    <p class="main-message custom-color">
-                        To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
-                        You'll be able to offload the model into CPU for less GPU memory cost (about 9.3G, compared to 27.5G if CPU offload is not enabled), but the inference time will increase significantly.
-                    </p>
-                    <p class="actions custom-color">
-                        <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
-                            <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
-                        </a>
-                    </p>
-                </div>
-            ''', elem_id="warning-duplicate")
-            submit_btn = gr.Button("Generate Video", visible=False)
-        else:
-            if(is_gpu_associated):
-                submit_btn = gr.Button("Generate Video", visible=True)
-                top_description = gr.HTML(f'''
-                    <div class="gr-prose">
-                        <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
-                        You have successfully associated a GPU to this Space 🎉</h2>
-                        <p class="custom-color">
-                            You can now generate a video! You will be billed by the minute from when you activated the GPU until when it is turned off.
-                            You can offload the model into CPU for less GPU memory cost (about 9.3G, compared to 27.5G if CPU offload is not enabled), but the inference time will increase significantly.
-                        </p>
-                    </div>
-            ''', elem_id="warning-ready")
-            else:
-                top_description = gr.HTML(f'''
-                        <div class="gr-prose">
-                        <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
-                        You have successfully duplicated the Allegro Video Generation Space 🎉</h2>
-                        <p class="custom-color">There's only one step left before you can generate a video: we recommend to <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings" style="text-decoration: underline" target="_blank">attribute a L40S GPU</b> to it (via the Settings tab)</a>.
-                        You will be billed by the minute from when you activate the GPU until when it is turned off.</p>
-                        <p class="actions custom-color">
-                            <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings">🔥 &nbsp; Set recommended GPU</a>
-                        </p>
-                        </div>
-                ''', elem_id="warning-setgpu")
-                submit_btn = gr.Button("Generate Video", visible=False)
-        video_output=gr.Video(label="Generated Video")
-        def load_allegro_examples(prompt):
-            if prompt == "A Monkey is playing bass guitar.":
-                return "https://rhymes.ai/allegroVideos/30_demo_w_watermark_prompt_1018/11.mp4"
-            elif prompt == "An astronaut riding a horse.":
-                return "https://rhymes.ai/allegroVideos/30_demo_w_watermark_prompt_1018/15.mp4"
-            elif prompt == "A tiny finch on a branch with spring flowers on background.":
-                return "https://rhymes.ai/allegroVideos/30_demo_w_watermark_prompt_1018/22.mp4"
         gr.Examples(
             examples=[
@@ -258,11 +134,10 @@ with gr.Blocks(css=css) as demo:
                 ["An astronaut riding a horse."],
                 ["A tiny finch on a branch with spring flowers on background."]
             ],
-            fn=load_allegro_examples,
             inputs=[user_prompt],
             outputs=video_output,
-            run_on_click=True,
         )
     submit_btn.click(
@@ -272,4 +147,4 @@ with gr.Blocks(css=css) as demo:
     )
 # Launch the interface
-demo.launch(show_error=True, show_api=False)

 import gradio as gr
 import subprocess
 from subprocess import getoutput
 from diffusers.schedulers import EulerAncestralDiscreteScheduler
 from transformers import T5EncoderModel, T5Tokenizer
 from allegro.pipelines.pipeline_allegro import AllegroPipeline
 from allegro.models.vae.vae_allegro import AllegroAutoencoderKL3D
 from allegro.models.transformers.transformer_3d_allegro import AllegroTransformer3DModel
+# from allegro.models.transformers.block import AttnProcessor2_0
 from huggingface_hub import snapshot_download
+# # Override attention processor initialization
+# AttnProcessor2_0.__init__ = lambda self, *args, **kwargs: super(AttnProcessor2_0, self).__init__()
 weights_dir = './allegro_weights'
 os.makedirs(weights_dir, exist_ok=True)
+is_shared_ui = False
 is_gpu_associated = torch.cuda.is_available()
+# Download weights if not present
+if not os.path.exists(weights_dir):
     snapshot_download(
         repo_id='rhymes-ai/Allegro',
         allow_patterns=[
         local_dir=weights_dir,
     )
 def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload):
+    dtype = torch.float16  # Changed from torch.bfloat16
     # Load models
     vae = AllegroAutoencoderKL3D.from_pretrained(
     if enable_cpu_offload:
         allegro_pipeline.enable_sequential_cpu_offload()
+    # Clear memory before generation
+    # torch.cuda.empty_cache()
     out_video = allegro_pipeline(
         user_prompt,
         negative_prompt=negative_prompt,
     result_path = single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload)
     return result_path
 # Create Gradio interface
+with gr.Blocks() as demo:
+    with gr.Column():
         gr.Markdown("# Allegro Video Generation")
         gr.Markdown("Generate a video based on a text prompt using the Allegro pipeline.")
+        user_prompt = gr.Textbox(label="User Prompt")
         with gr.Row():
+            guidance_scale = gr.Slider(minimum=0, maximum=20, step=0.1, label="Guidance Scale", value=7.5)
+            num_sampling_steps = gr.Slider(minimum=10, maximum=100, step=1, label="Number of Sampling Steps", value=20)
         with gr.Row():
+            seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Random Seed", value=42)
+            enable_cpu_offload = gr.Checkbox(label="Enable CPU Offload", value=True, scale=1)
+        submit_btn = gr.Button("Generate Video")
+        video_output = gr.Video(label="Generated Video")
         gr.Examples(
             examples=[
                 ["An astronaut riding a horse."],
                 ["A tiny finch on a branch with spring flowers on background."]
             ],
             inputs=[user_prompt],
             outputs=video_output,
+            fn=lambda x: None,
+            cache_examples=False
         )
     submit_btn.click(
     )
 # Launch the interface
+demo.launch(show_error=True)