H-Liu1997 commited on
Commit
603c46e
1 Parent(s): 83565d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -53
app.py CHANGED
@@ -580,42 +580,36 @@ combined_examples = [
580
 
581
  def make_demo():
582
  with gr.Blocks(analytics_enabled=False) as Interface:
583
- # First row: Audio upload and Audio examples with adjusted ratio
584
  gr.Markdown(
585
- """
586
- <div align='center'> <h1> TANGO: Co-Speech Gesture Video Reenactment with Hierarchical Audio Motion Embedding and Diffusion Interpolation </span> </h1> \
587
- <h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
588
- <a href='https://h-liu1997.github.io/'>Haiyang Liu</a>, \
589
- <a href='https://yangxingchao.github.io/'>Xingchao Yang</a>, \
590
- <a href=''>Tomoya Akiyama</a>, \
591
- <a href='https://sky24h.github.io/'> Yuantian Huang</a>, \
592
- <a href=''>Qiaoge Li</a>, \
593
- <a href='https://www.tut.ac.jp/english/university/faculty/cs/164.html'>Shigeru Kuriyama</a>, \
594
- <a href='https://taketomitakafumi.sakura.ne.jp/web/en/'>Takafumi Taketomi</a>\
595
- </h2> \
596
- <a style='font-size:18px;color: #000000'>This is a preprint version, more details will be available at </a>\
597
- <a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
598
- <a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
599
- <a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
600
- </h2> \
601
- <a style='font-size:18px;color: #000000'>This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some high-quality mode results are shown below. </a> </div>
602
- """
 
 
 
 
 
 
 
 
 
 
603
  )
604
-
605
- # gr.Markdown("""
606
- # <h4 style="text-align: left;">
607
- # This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
608
-
609
- # Details of the low-quality mode:
610
- # 1. Lower resolution.
611
- # 2. More discontinuous frames (causing noticeable "frame jumps").
612
- # 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
613
- # 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
614
- # 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
615
-
616
- # Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
617
- # </h4>
618
- # """)
619
 
620
  # Create a gallery with 5 videos
621
  with gr.Row():
@@ -630,7 +624,15 @@ def make_demo():
630
  video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
631
  video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
632
  video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
633
-
 
 
 
 
 
 
 
 
634
 
635
  with gr.Row():
636
  with gr.Column(scale=4):
@@ -650,20 +652,20 @@ def make_demo():
650
  file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
651
  gr.Markdown("""
652
  <h4 style="text-align: left;">
653
- <a style='font-size:18px;color: #000000'> Details of the low-quality mode: </a>
654
  <br>
655
- <a style='font-size:18px;color: #000000'> 1. Lower resolution.</a>
656
  <br>
657
- <a style='font-size:18px;color: #000000'> 2. More discontinuous graph nodes (causing noticeable "frame jumps"). </a>
658
  <br>
659
- <a style='font-size:18px;color: #000000'> 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing. </a>
660
  <br>
661
- <a style='font-size:18px;color: #000000'> 4. only use first 8 seconds of your input audio.</a>
662
  <br>
663
- <a style='font-size:18px;color: #000000'> 5. custom character for a video up to 10 seconds. </a>
664
  <br>
665
  <br>
666
- <a style='font-size:18px;color: #000000'> Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.</a>
667
  </h4>
668
  """)
669
 
@@ -701,17 +703,17 @@ def make_demo():
701
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
702
  )
703
 
704
- with gr.Row():
705
- with gr.Column(scale=4):
706
- print(combined_examples)
707
- gr.Examples(
708
- examples=combined_examples,
709
- inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
710
- outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
711
- fn=tango, # Function that processes both audio and video inputs
712
- label="Select Combined Audio and Video Examples (Cached)",
713
- cache_examples=True
714
- )
715
 
716
  return Interface
717
 
 
580
 
581
  def make_demo():
582
  with gr.Blocks(analytics_enabled=False) as Interface:
 
583
  gr.Markdown(
584
+ """
585
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
586
+ <div>
587
+ <h1>TANGO</h1>
588
+ <span>Generating full-body talking videos from audio and reference video</span>
589
+ <h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
590
+ <a href='https://h-liu1997.github.io/'>Haiyang Liu</a>, \
591
+ <a href='https://yangxingchao.github.io/'>Xingchao Yang</a>, \
592
+ <a href=''>Tomoya Akiyama</a>, \
593
+ <a href='https://sky24h.github.io/'> Yuantian Huang</a>, \
594
+ <a href=''>Qiaoge Li</a>, \
595
+ <a href='https://www.tut.ac.jp/english/university/faculty/cs/164.html'>Shigeru Kuriyama</a>, \
596
+ <a href='https://taketomitakafumi.sakura.ne.jp/web/en/'>Takafumi Taketomi</a>\
597
+ </h2>
598
+ <br>
599
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
600
+ <a href="https://arxiv.org/abs/2410.04221"><img src="https://img.shields.io/badge/arXiv-2410.04221-blue"></a>
601
+ &nbsp;
602
+ <a href="https://pantomatrix.github.io/TANGO/"><img src="https://img.shields.io/badge/Project_Page-TANGO-orange" alt="Project Page"></a>
603
+ &nbsp;
604
+ <a href="https://github.com/CyberAgentAILab/TANGO"><img src="https://img.shields.io/badge/Github-Code-green"></a>
605
+ &nbsp;
606
+ <a href="https://github.com/CyberAgentAILab/TANGO"><img src="https://img.shields.io/github/stars/CyberAgentAILab/TANGO
607
+ "></a>
608
+ </div>
609
+ </div>
610
+ </div>
611
+ """
612
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
 
614
  # Create a gallery with 5 videos
615
  with gr.Row():
 
624
  video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
625
  video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
626
  video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
627
+
628
+ with gr.Row():
629
+ gr.Markdown(
630
+ """
631
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
632
+ This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
633
+ </div>
634
+ """
635
+ )
636
 
637
  with gr.Row():
638
  with gr.Column(scale=4):
 
652
  file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
653
  gr.Markdown("""
654
  <h4 style="text-align: left;">
655
+ Details of the low-quality mode:
656
  <br>
657
+ 1. Lower resolution.
658
  <br>
659
+ 2. More discontinuous graph nodes (causing noticeable "frame jumps").
660
  <br>
661
+ 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
662
  <br>
663
+ 4. only use first 8 seconds of your input audio.
664
  <br>
665
+ 5. custom character for a video up to 10 seconds.
666
  <br>
667
  <br>
668
+ Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
669
  </h4>
670
  """)
671
 
 
703
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
704
  )
705
 
706
+ # with gr.Row():
707
+ # with gr.Column(scale=4):
708
+ # print(combined_examples)
709
+ # gr.Examples(
710
+ # examples=combined_examples,
711
+ # inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
712
+ # outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
713
+ # fn=tango, # Function that processes both audio and video inputs
714
+ # label="Select Combined Audio and Video Examples (Cached)",
715
+ # cache_examples=True
716
+ # )
717
 
718
  return Interface
719