shisheng7 commited on
Commit
f7e8357
β€’
1 Parent(s): 2a393cc

inital update

Browse files
Files changed (3) hide show
  1. README.md +3 -4
  2. app.py +88 -0
  3. requirements.txt +30 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: JoyHallo
3
- emoji: πŸ†
4
- colorFrom: indigo
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
@@ -10,4 +10,3 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: JoyHallo
3
+ emoji: πŸƒ
4
+ colorFrom: gray
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
 
10
  license: mit
11
  ---
12
 
 
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from huggingface_hub import snapshot_download
4
+ import gradio as gr
5
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
6
+
7
+ hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
8
+ joyhallo_dir = snapshot_download(repo_id="jdh-algo/JoyHallo-v1", local_dir="pretrained_models/joyhallo")
9
+ wav_dir = snapshot_download(repo_id="TencentGameMate/chinese-wav2vec2-base", local_dir="pretrained_models/chinese-wav2vec2-base")
10
+ print(hallo_dir, joyhallo_dir)
11
+ print(os.listdir(hallo_dir))
12
+
13
+ from scripts.inference import predict
14
+
15
+ def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
16
+
17
+ return predict(source_image, driving_audio, 1.0, 1.0, 1.0, 1.2)
18
+
19
+
20
+ css = '''
21
+ div#warning-ready {
22
+ background-color: #ecfdf5;
23
+ padding: 0 16px 16px;
24
+ margin: 20px 0;
25
+ color: #030303!important;
26
+ }
27
+ div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
28
+ color: #057857!important;
29
+ }
30
+ div#warning-duplicate {
31
+ background-color: #ebf5ff;
32
+ padding: 0 16px 16px;
33
+ margin: 20px 0;
34
+ color: #030303!important;
35
+ }
36
+ div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
37
+ color: #0f4592!important;
38
+ }
39
+ div#warning-duplicate strong {
40
+ color: #0f4592;
41
+ }
42
+ p.actions {
43
+ display: flex;
44
+ align-items: center;
45
+ margin: 20px 0;
46
+ }
47
+ div#warning-duplicate .actions a {
48
+ display: inline-block;
49
+ margin-right: 10px;
50
+ }
51
+ .dark #warning-duplicate {
52
+ background-color: #0c0c0c !important;
53
+ border: 1px solid white !important;
54
+ }
55
+ '''
56
+
57
+ with gr.Blocks(css=css) as demo:
58
+ gr.Markdown("# JoyHallo: Digital human model for Mandarin")
59
+ gr.Markdown("Generate talking head avatars driven with Mandarin speech.")
60
+ gr.Markdown("""
61
+
62
+ Data requirements:
63
+
64
+ Image:
65
+ 1. Cropped to square shape.
66
+ 2. Face should be facing forward and occupy 50%-70% of the image area.
67
+
68
+ Audio:
69
+ 1. Audio in wav format.
70
+ 2. Mandarin or English or mixed, with clear audio and suitable background music.
71
+
72
+ ! Important: Too long audio will casue a very long processing time, please keep the audio length within 5s.
73
+ """)
74
+ with gr.Row():
75
+ with gr.Column():
76
+ avatar_face = gr.Image(type="filepath", label="Face")
77
+ driving_audio = gr.Audio(type="filepath", label="Driving audio")
78
+ generate = gr.Button("Generate")
79
+ with gr.Column():
80
+ output_video = gr.Video(label="Your talking head")
81
+
82
+ generate.click(
83
+ fn=run_inference,
84
+ inputs=[avatar_face, driving_audio],
85
+ outputs=output_video
86
+ )
87
+
88
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.28.0
2
+ audio-separator==0.17.2
3
+ av==12.1.0
4
+ bitsandbytes==0.43.1
5
+ decord==0.6.0
6
+ diffusers==0.27.2
7
+ einops==0.8.0
8
+ insightface==0.7.3
9
+ librosa==0.10.2.post1
10
+ mediapipe[vision]==0.10.14
11
+ mlflow==2.13.1
12
+ moviepy==1.0.3
13
+ numpy==1.26.4
14
+ omegaconf==2.3.0
15
+ onnx2torch==1.5.14
16
+ onnx==1.16.1
17
+ onnxruntime==1.18.0
18
+ opencv-contrib-python==4.9.0.80
19
+ opencv-python-headless==4.9.0.80
20
+ opencv-python==4.9.0.80
21
+ pillow==10.3.0
22
+ setuptools==70.0.0
23
+ torch==2.2.2
24
+ torchvision==0.17.2
25
+ tqdm==4.66.4
26
+ transformers==4.39.2
27
+ xformers==0.0.25.post1
28
+ isort==5.13.2
29
+ pylint==3.2.2
30
+ pre-commit==3.7.1