multimodalart HF staff commited on
Commit
a6075c0
1 Parent(s): 1ebe3e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -9
app.py CHANGED
@@ -8,8 +8,12 @@ import argparse
8
  import uuid
9
 
10
  hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
 
11
 
12
  def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
 
 
 
13
  unique_id = uuid.uuid4()
14
 
15
  args = argparse.Namespace(
@@ -27,13 +31,71 @@ def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=T
27
  inference_process(args)
28
  return f'output-{unique_id}.mp4'
29
 
30
- iface = gr.Interface(
31
- title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation",
32
- description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab",
33
- fn=run_inference,
34
- inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],
35
- cache_examples=False,
36
- outputs="video"
37
- )
38
 
39
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import uuid
9
 
10
  hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
11
+ is_shared_ui = True if "multimodalart/hallo" in os.environ['SPACE_ID'] else False
12
 
13
  def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
14
+ if is_shared_ui:
15
+ raise gr.Error("This Space only works in duplicated instances")
16
+
17
  unique_id = uuid.uuid4()
18
 
19
  args = argparse.Namespace(
 
31
  inference_process(args)
32
  return f'output-{unique_id}.mp4'
33
 
 
 
 
 
 
 
 
 
34
 
35
+ css = '''
36
+ div#warning-ready {
37
+ background-color: #ecfdf5;
38
+ padding: 0 16px 16px;
39
+ margin: 20px 0;
40
+ color: #030303!important;
41
+ }
42
+ div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
43
+ color: #057857!important;
44
+ }
45
+ div#warning-duplicate {
46
+ background-color: #ebf5ff;
47
+ padding: 0 16px 16px;
48
+ margin: 20px 0;
49
+ color: #030303!important;
50
+ }
51
+ div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
52
+ color: #0f4592!important;
53
+ }
54
+ div#warning-duplicate strong {
55
+ color: #0f4592;
56
+ }
57
+ p.actions {
58
+ display: flex;
59
+ align-items: center;
60
+ margin: 20px 0;
61
+ }
62
+ div#warning-duplicate .actions a {
63
+ display: inline-block;
64
+ margin-right: 10px;
65
+ }
66
+ '''
67
+ with gr.Blocks(css=css) as demo:
68
+ if is_shared_ui:
69
+ top_description = gr.HTML(f'''
70
+ <div class="gr-prose">
71
+ <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
72
+ Attention: this Space need to be duplicated to work</h2>
73
+ <p class="main-message custom-color">
74
+ To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
75
+ An L4 costs <strong>US$0.80/h</strong>, so it should cost ~US$0.08 to generate 10s of talking head avatar.
76
+ </p>
77
+ <p class="actions custom-color">
78
+ <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
79
+ <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
80
+ </a>
81
+ to start generate your talking head
82
+ </p>
83
+ </div>
84
+ ''', elem_id="warning-duplicate")
85
+ gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
86
+ gr.Markdown("Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab")
87
+ with gr.Row():
88
+ with gr.Column():
89
+ avatar_face = gr.Image(type="filepath", label="Face")
90
+ driving_audio = gr.Audio(type="filepath", label="Driving audio")
91
+ generate = gr.Button("Generate")
92
+ with gr.Column():
93
+ output_video = gr.Video(label="Your talking head")
94
+
95
+ generate.click(
96
+ fn=run_inference,
97
+ intputs=[avatar_face, driving_audio],
98
+ outputs=output_video
99
+ )
100
+
101
+ demo.launch()