Spaces:
Sleeping
Sleeping
v0.0.2
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +5 -1
- app.py +53 -6
- checkpoints/BFM_Fitting.zip +0 -3
- checkpoints/BFM_Fitting/01_MorphableModel.mat +0 -3
- checkpoints/BFM_Fitting/01_MorphableModel.mat +1 -0
- checkpoints/BFM_Fitting/BFM09_model_info.mat +0 -3
- checkpoints/BFM_Fitting/BFM09_model_info.mat +1 -0
- checkpoints/BFM_Fitting/BFM_exp_idx.mat +0 -0
- checkpoints/BFM_Fitting/BFM_exp_idx.mat +1 -0
- checkpoints/BFM_Fitting/BFM_front_idx.mat +0 -0
- checkpoints/BFM_Fitting/BFM_front_idx.mat +1 -0
- checkpoints/BFM_Fitting/Exp_Pca.bin +0 -3
- checkpoints/BFM_Fitting/Exp_Pca.bin +1 -0
- checkpoints/BFM_Fitting/facemodel_info.mat +0 -0
- checkpoints/BFM_Fitting/facemodel_info.mat +1 -0
- checkpoints/BFM_Fitting/select_vertex_id.mat +0 -0
- checkpoints/BFM_Fitting/select_vertex_id.mat +1 -0
- checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +0 -0
- checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +1 -0
- checkpoints/BFM_Fitting/std_exp.txt +0 -1
- checkpoints/BFM_Fitting/std_exp.txt +1 -0
- checkpoints/auido2exp_00300-model.pth +0 -3
- checkpoints/auido2exp_00300-model.pth +1 -0
- checkpoints/auido2pose_00140-model.pth +0 -3
- checkpoints/auido2pose_00140-model.pth +1 -0
- checkpoints/epoch_20.pth +0 -3
- checkpoints/epoch_20.pth +1 -0
- checkpoints/facevid2vid_00189-model.pth.tar +0 -3
- checkpoints/facevid2vid_00189-model.pth.tar +1 -0
- checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +0 -3
- checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +1 -0
- checkpoints/hub/checkpoints/s3fd-619a316812.pth +0 -3
- checkpoints/hub/checkpoints/s3fd-619a316812.pth +1 -0
- checkpoints/mapping_00229-model.pth.tar +0 -3
- checkpoints/mapping_00229-model.pth.tar +1 -0
- checkpoints/shape_predictor_68_face_landmarks.dat +0 -3
- checkpoints/shape_predictor_68_face_landmarks.dat +1 -0
- checkpoints/wav2lip.pth +0 -3
- checkpoints/wav2lip.pth +1 -0
- docs/sadtalker_logo.png +0 -0
- examples/source_image/full3.png +0 -0
- examples/source_image/full4.jpeg +0 -0
- inference.py +0 -134
- modules/__pycache__/gfpgan_inference.cpython-38.pyc +0 -0
- modules/__pycache__/gfpgan_inference.cpython-39.pyc +0 -0
- modules/__pycache__/sadtalker_test.cpython-38.pyc +0 -0
- modules/__pycache__/sadtalker_test.cpython-39.pyc +0 -0
- modules/__pycache__/text2speech.cpython-38.pyc +0 -0
- modules/__pycache__/text2speech.cpython-39.pyc +0 -0
- modules/gfpgan_inference.py +0 -36
.gitignore
CHANGED
@@ -152,4 +152,8 @@ dmypy.json
|
|
152 |
# Cython debug symbols
|
153 |
cython_debug/
|
154 |
|
155 |
-
results/
|
|
|
|
|
|
|
|
|
|
152 |
# Cython debug symbols
|
153 |
cython_debug/
|
154 |
|
155 |
+
results/
|
156 |
+
checkpoints/
|
157 |
+
gradio_cached_examples/
|
158 |
+
gfpgan/
|
159 |
+
start.sh
|
app.py
CHANGED
@@ -3,15 +3,20 @@ import tempfile
|
|
3 |
import gradio as gr
|
4 |
from src.gradio_demo import SadTalker
|
5 |
from src.utils.text2speech import TTSTalker
|
|
|
6 |
|
7 |
def get_source_image(image):
|
8 |
return image
|
9 |
|
10 |
-
|
|
|
|
|
11 |
|
12 |
def sadtalker_demo():
|
13 |
|
14 |
-
|
|
|
|
|
15 |
tts_talker = TTSTalker()
|
16 |
|
17 |
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
@@ -42,7 +47,8 @@ def sadtalker_demo():
|
|
42 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
43 |
with gr.TabItem('Settings'):
|
44 |
with gr.Column(variant='panel'):
|
45 |
-
|
|
|
46 |
enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
|
47 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
48 |
|
@@ -54,42 +60,83 @@ def sadtalker_demo():
|
|
54 |
[
|
55 |
'examples/source_image/full_body_1.png',
|
56 |
'examples/driven_audio/bus_chinese.wav',
|
|
|
57 |
True,
|
58 |
False
|
59 |
],
|
60 |
[
|
61 |
'examples/source_image/full_body_2.png',
|
62 |
-
'examples/driven_audio/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
True,
|
64 |
False
|
65 |
],
|
66 |
[
|
67 |
'examples/source_image/art_13.png',
|
68 |
'examples/driven_audio/fayu.wav',
|
|
|
69 |
True,
|
70 |
False
|
71 |
],
|
72 |
[
|
73 |
'examples/source_image/art_5.png',
|
74 |
'examples/driven_audio/chinese_news.wav',
|
75 |
-
|
|
|
76 |
False
|
77 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
]
|
79 |
gr.Examples(examples=examples,
|
80 |
inputs=[
|
81 |
source_image,
|
82 |
driven_audio,
|
|
|
83 |
is_still_mode,
|
84 |
enhancer],
|
85 |
outputs=[gen_video],
|
86 |
fn=sad_talker.test,
|
87 |
-
cache_examples=os.getenv('SYSTEM') == 'spaces')
|
88 |
|
89 |
submit.click(
|
90 |
fn=sad_talker.test,
|
91 |
inputs=[source_image,
|
92 |
driven_audio,
|
|
|
93 |
is_still_mode,
|
94 |
enhancer],
|
95 |
outputs=[gen_video]
|
|
|
3 |
import gradio as gr
|
4 |
from src.gradio_demo import SadTalker
|
5 |
from src.utils.text2speech import TTSTalker
|
6 |
+
from huggingface_hub import snapshot_download
|
7 |
|
8 |
def get_source_image(image):
|
9 |
return image
|
10 |
|
11 |
+
def download_model():
|
12 |
+
REPO_ID = 'vinthony/SadTalker'
|
13 |
+
snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
|
14 |
|
15 |
def sadtalker_demo():
|
16 |
|
17 |
+
download_model()
|
18 |
+
|
19 |
+
sad_talker = SadTalker(lazy_load=True)
|
20 |
tts_talker = TTSTalker()
|
21 |
|
22 |
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
|
|
47 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
48 |
with gr.TabItem('Settings'):
|
49 |
with gr.Column(variant='panel'):
|
50 |
+
preprocess_type = gr.Radio(['crop','resize','full'], value='crop', label='preprocess', info="How to handle input image?")
|
51 |
+
is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works with preprocess `full`)")
|
52 |
enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
|
53 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
54 |
|
|
|
60 |
[
|
61 |
'examples/source_image/full_body_1.png',
|
62 |
'examples/driven_audio/bus_chinese.wav',
|
63 |
+
'crop',
|
64 |
True,
|
65 |
False
|
66 |
],
|
67 |
[
|
68 |
'examples/source_image/full_body_2.png',
|
69 |
+
'examples/driven_audio/japanese.wav',
|
70 |
+
'crop',
|
71 |
+
False,
|
72 |
+
False
|
73 |
+
],
|
74 |
+
[
|
75 |
+
'examples/source_image/full3.png',
|
76 |
+
'examples/driven_audio/deyu.wav',
|
77 |
+
'crop',
|
78 |
+
False,
|
79 |
+
True
|
80 |
+
],
|
81 |
+
[
|
82 |
+
'examples/source_image/full4.jpeg',
|
83 |
+
'examples/driven_audio/eluosi.wav',
|
84 |
+
'full',
|
85 |
+
False,
|
86 |
+
True
|
87 |
+
],
|
88 |
+
[
|
89 |
+
'examples/source_image/full4.jpeg',
|
90 |
+
'examples/driven_audio/imagine.wav',
|
91 |
+
'full',
|
92 |
+
True,
|
93 |
+
True
|
94 |
+
],
|
95 |
+
[
|
96 |
+
'examples/source_image/full_body_1.png',
|
97 |
+
'examples/driven_audio/bus_chinese.wav',
|
98 |
+
'full',
|
99 |
True,
|
100 |
False
|
101 |
],
|
102 |
[
|
103 |
'examples/source_image/art_13.png',
|
104 |
'examples/driven_audio/fayu.wav',
|
105 |
+
'resize',
|
106 |
True,
|
107 |
False
|
108 |
],
|
109 |
[
|
110 |
'examples/source_image/art_5.png',
|
111 |
'examples/driven_audio/chinese_news.wav',
|
112 |
+
'resize',
|
113 |
+
False,
|
114 |
False
|
115 |
],
|
116 |
+
[
|
117 |
+
'examples/source_image/art_5.png',
|
118 |
+
'examples/driven_audio/RD_Radio31_000.wav',
|
119 |
+
'resize',
|
120 |
+
True,
|
121 |
+
True
|
122 |
+
],
|
123 |
]
|
124 |
gr.Examples(examples=examples,
|
125 |
inputs=[
|
126 |
source_image,
|
127 |
driven_audio,
|
128 |
+
preprocess_type,
|
129 |
is_still_mode,
|
130 |
enhancer],
|
131 |
outputs=[gen_video],
|
132 |
fn=sad_talker.test,
|
133 |
+
cache_examples=True) # os.getenv('SYSTEM') == 'spaces')
|
134 |
|
135 |
submit.click(
|
136 |
fn=sad_talker.test,
|
137 |
inputs=[source_image,
|
138 |
driven_audio,
|
139 |
+
preprocess_type,
|
140 |
is_still_mode,
|
141 |
enhancer],
|
142 |
outputs=[gen_video]
|
checkpoints/BFM_Fitting.zip
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:785f77f3de288568e76666cd419dcf40646d3f74eae6d4fa3b766c933087a9d8
|
3 |
-
size 404051745
|
|
|
|
|
|
|
|
checkpoints/BFM_Fitting/01_MorphableModel.mat
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
|
3 |
-
size 240875364
|
|
|
|
|
|
|
|
checkpoints/BFM_Fitting/01_MorphableModel.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
|
checkpoints/BFM_Fitting/BFM09_model_info.mat
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
|
3 |
-
size 127170280
|
|
|
|
|
|
|
|
checkpoints/BFM_Fitting/BFM09_model_info.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
|
checkpoints/BFM_Fitting/BFM_exp_idx.mat
DELETED
Binary file (91.9 kB)
|
|
checkpoints/BFM_Fitting/BFM_exp_idx.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/1146e4e9c3bef303a497383aa7974c014fe945c7
|
checkpoints/BFM_Fitting/BFM_front_idx.mat
DELETED
Binary file (44.9 kB)
|
|
checkpoints/BFM_Fitting/BFM_front_idx.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b9d7b0953dd1dc5b1e28144610485409ac321f9b
|
checkpoints/BFM_Fitting/Exp_Pca.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
|
3 |
-
size 51086404
|
|
|
|
|
|
|
|
checkpoints/BFM_Fitting/Exp_Pca.bin
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
|
checkpoints/BFM_Fitting/facemodel_info.mat
DELETED
Binary file (739 kB)
|
|
checkpoints/BFM_Fitting/facemodel_info.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/3e516ec7297fa3248098f49ecea10579f4831c0a
|
checkpoints/BFM_Fitting/select_vertex_id.mat
DELETED
Binary file (62.3 kB)
|
|
checkpoints/BFM_Fitting/select_vertex_id.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/5b8b220093d93b133acc94ffed159f31a74854cd
|
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat
DELETED
Binary file (994 Bytes)
|
|
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/a0e23588302bc71fc899eef53ff06df5f4df4c1d
|
checkpoints/BFM_Fitting/std_exp.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
|
|
|
|
checkpoints/BFM_Fitting/std_exp.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/767b8de4ea1ca78b6f22b98ff2dee4fa345500bb
|
checkpoints/auido2exp_00300-model.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
|
3 |
-
size 34278319
|
|
|
|
|
|
|
|
checkpoints/auido2exp_00300-model.pth
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
|
checkpoints/auido2pose_00140-model.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
|
3 |
-
size 95916155
|
|
|
|
|
|
|
|
checkpoints/auido2pose_00140-model.pth
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
|
checkpoints/epoch_20.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
|
3 |
-
size 288860037
|
|
|
|
|
|
|
|
checkpoints/epoch_20.pth
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
|
checkpoints/facevid2vid_00189-model.pth.tar
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
|
3 |
-
size 2112619148
|
|
|
|
|
|
|
|
checkpoints/facevid2vid_00189-model.pth.tar
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
|
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
|
3 |
-
size 96316515
|
|
|
|
|
|
|
|
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
|
checkpoints/hub/checkpoints/s3fd-619a316812.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
|
3 |
-
size 89843225
|
|
|
|
|
|
|
|
checkpoints/hub/checkpoints/s3fd-619a316812.pth
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
|
checkpoints/mapping_00229-model.pth.tar
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
|
3 |
-
size 155521183
|
|
|
|
|
|
|
|
checkpoints/mapping_00229-model.pth.tar
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
|
checkpoints/shape_predictor_68_face_landmarks.dat
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
3 |
-
size 99693937
|
|
|
|
|
|
|
|
checkpoints/shape_predictor_68_face_landmarks.dat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
checkpoints/wav2lip.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
|
3 |
-
size 435807851
|
|
|
|
|
|
|
|
checkpoints/wav2lip.pth
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
|
docs/sadtalker_logo.png
ADDED
examples/source_image/full3.png
ADDED
examples/source_image/full4.jpeg
ADDED
inference.py
DELETED
@@ -1,134 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from time import strftime
|
3 |
-
import os, sys, time
|
4 |
-
from argparse import ArgumentParser
|
5 |
-
|
6 |
-
from src.utils.preprocess import CropAndExtract
|
7 |
-
from src.test_audio2coeff import Audio2Coeff
|
8 |
-
from src.facerender.animate import AnimateFromCoeff
|
9 |
-
from src.generate_batch import get_data
|
10 |
-
from src.generate_facerender_batch import get_facerender_data
|
11 |
-
|
12 |
-
def main(args):
|
13 |
-
#torch.backends.cudnn.enabled = False
|
14 |
-
|
15 |
-
pic_path = args.source_image
|
16 |
-
audio_path = args.driven_audio
|
17 |
-
save_dir = os.path.join(args.result_dir, strftime("%Y_%m_%d_%H.%M.%S"))
|
18 |
-
os.makedirs(save_dir, exist_ok=True)
|
19 |
-
pose_style = args.pose_style
|
20 |
-
device = args.device
|
21 |
-
batch_size = args.batch_size
|
22 |
-
camera_yaw_list = args.camera_yaw
|
23 |
-
camera_pitch_list = args.camera_pitch
|
24 |
-
camera_roll_list = args.camera_roll
|
25 |
-
|
26 |
-
current_code_path = sys.argv[0]
|
27 |
-
current_root_path = os.path.split(current_code_path)[0]
|
28 |
-
|
29 |
-
os.environ['TORCH_HOME']=os.path.join(current_root_path, args.checkpoint_dir)
|
30 |
-
|
31 |
-
path_of_lm_croper = os.path.join(current_root_path, args.checkpoint_dir, 'shape_predictor_68_face_landmarks.dat')
|
32 |
-
path_of_net_recon_model = os.path.join(current_root_path, args.checkpoint_dir, 'epoch_20.pth')
|
33 |
-
dir_of_BFM_fitting = os.path.join(current_root_path, args.checkpoint_dir, 'BFM_Fitting')
|
34 |
-
wav2lip_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'wav2lip.pth')
|
35 |
-
|
36 |
-
audio2pose_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2pose_00140-model.pth')
|
37 |
-
audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
|
38 |
-
|
39 |
-
audio2exp_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2exp_00300-model.pth')
|
40 |
-
audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
|
41 |
-
|
42 |
-
free_view_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'facevid2vid_00189-model.pth.tar')
|
43 |
-
mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00229-model.pth.tar')
|
44 |
-
facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender.yaml')
|
45 |
-
|
46 |
-
#init model
|
47 |
-
print(path_of_net_recon_model)
|
48 |
-
preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device)
|
49 |
-
|
50 |
-
print(audio2pose_checkpoint)
|
51 |
-
print(audio2exp_checkpoint)
|
52 |
-
audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path,
|
53 |
-
audio2exp_checkpoint, audio2exp_yaml_path,
|
54 |
-
wav2lip_checkpoint, device)
|
55 |
-
|
56 |
-
print(free_view_checkpoint)
|
57 |
-
print(mapping_checkpoint)
|
58 |
-
animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint,
|
59 |
-
facerender_yaml_path, device)
|
60 |
-
|
61 |
-
#crop image and extract 3dmm from image
|
62 |
-
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
|
63 |
-
os.makedirs(first_frame_dir, exist_ok=True)
|
64 |
-
first_coeff_path, crop_pic_path = preprocess_model.generate(pic_path, first_frame_dir)
|
65 |
-
if first_coeff_path is None:
|
66 |
-
print("Can't get the coeffs of the input")
|
67 |
-
return
|
68 |
-
|
69 |
-
#audio2ceoff
|
70 |
-
batch = get_data(first_coeff_path, audio_path, device)
|
71 |
-
coeff_path = audio_to_coeff.generate(batch, save_dir, pose_style)
|
72 |
-
|
73 |
-
# 3dface render
|
74 |
-
if args.face3dvis:
|
75 |
-
from src.face3d.visualize import gen_composed_video
|
76 |
-
gen_composed_video(args, device, first_coeff_path, coeff_path, audio_path, os.path.join(save_dir, '3dface.mp4'))
|
77 |
-
|
78 |
-
#coeff2video
|
79 |
-
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path,
|
80 |
-
batch_size, camera_yaw_list, camera_pitch_list, camera_roll_list,
|
81 |
-
expression_scale=args.expression_scale, still_mode=args.still)
|
82 |
-
|
83 |
-
animate_from_coeff.generate(data, save_dir, enhancer=args.enhancer)
|
84 |
-
video_name = data['video_name']
|
85 |
-
|
86 |
-
if args.enhancer is not None:
|
87 |
-
print(f'The generated video is named {video_name}_enhanced in {save_dir}')
|
88 |
-
else:
|
89 |
-
print(f'The generated video is named {video_name} in {save_dir}')
|
90 |
-
|
91 |
-
return os.path.join(save_dir, video_name+'.mp4'), os.path.join(save_dir, video_name+'.mp4')
|
92 |
-
|
93 |
-
|
94 |
-
if __name__ == '__main__':
|
95 |
-
|
96 |
-
parser = ArgumentParser()
|
97 |
-
parser.add_argument("--driven_audio", default='./examples/driven_audio/japanese.wav', help="path to driven audio")
|
98 |
-
parser.add_argument("--source_image", default='./examples/source_image/art_0.png', help="path to source image")
|
99 |
-
parser.add_argument("--checkpoint_dir", default='./checkpoints', help="path to output")
|
100 |
-
parser.add_argument("--result_dir", default='./results', help="path to output")
|
101 |
-
parser.add_argument("--pose_style", type=int, default=0, help="input pose style from [0, 46)")
|
102 |
-
parser.add_argument("--batch_size", type=int, default=2, help="the batch size of facerender")
|
103 |
-
parser.add_argument("--expression_scale", type=float, default=1., help="the batch size of facerender")
|
104 |
-
parser.add_argument('--camera_yaw', nargs='+', type=int, default=[0], help="the camera yaw degree")
|
105 |
-
parser.add_argument('--camera_pitch', nargs='+', type=int, default=[0], help="the camera pitch degree")
|
106 |
-
parser.add_argument('--camera_roll', nargs='+', type=int, default=[0], help="the camera roll degree")
|
107 |
-
parser.add_argument('--enhancer', type=str, default=None, help="Face enhancer, [GFPGAN]")
|
108 |
-
parser.add_argument("--cpu", dest="cpu", action="store_true")
|
109 |
-
parser.add_argument("--face3dvis", action="store_true", help="generate 3d face and 3d landmarks")
|
110 |
-
parser.add_argument("--still", action="store_true")
|
111 |
-
|
112 |
-
# net structure and parameters
|
113 |
-
parser.add_argument('--net_recon', type=str, default='resnet50', choices=['resnet18', 'resnet34', 'resnet50'], help='not use')
|
114 |
-
parser.add_argument('--init_path', type=str, default=None, help='not Use')
|
115 |
-
parser.add_argument('--use_last_fc',default=False, help='zero initialize the last fc')
|
116 |
-
parser.add_argument('--bfm_folder', type=str, default='./checkpoints/BFM_Fitting/')
|
117 |
-
parser.add_argument('--bfm_model', type=str, default='BFM_model_front.mat', help='bfm model')
|
118 |
-
|
119 |
-
# default renderer parameters
|
120 |
-
parser.add_argument('--focal', type=float, default=1015.)
|
121 |
-
parser.add_argument('--center', type=float, default=112.)
|
122 |
-
parser.add_argument('--camera_d', type=float, default=10.)
|
123 |
-
parser.add_argument('--z_near', type=float, default=5.)
|
124 |
-
parser.add_argument('--z_far', type=float, default=15.)
|
125 |
-
|
126 |
-
args = parser.parse_args()
|
127 |
-
|
128 |
-
if torch.cuda.is_available() and not args.cpu:
|
129 |
-
args.device = "cuda"
|
130 |
-
else:
|
131 |
-
args.device = "cpu"
|
132 |
-
|
133 |
-
main(args)
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/__pycache__/gfpgan_inference.cpython-38.pyc
DELETED
Binary file (1.36 kB)
|
|
modules/__pycache__/gfpgan_inference.cpython-39.pyc
DELETED
Binary file (1.4 kB)
|
|
modules/__pycache__/sadtalker_test.cpython-38.pyc
DELETED
Binary file (3.63 kB)
|
|
modules/__pycache__/sadtalker_test.cpython-39.pyc
DELETED
Binary file (3.98 kB)
|
|
modules/__pycache__/text2speech.cpython-38.pyc
DELETED
Binary file (473 Bytes)
|
|
modules/__pycache__/text2speech.cpython-39.pyc
DELETED
Binary file (477 Bytes)
|
|
modules/gfpgan_inference.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
import os,sys
|
2 |
-
|
3 |
-
def gfpgan(scale, origin_mp4_path):
|
4 |
-
current_code_path = sys.argv[0]
|
5 |
-
current_root_path = os.path.split(current_code_path)[0]
|
6 |
-
print(current_root_path)
|
7 |
-
gfpgan_code_path = current_root_path+'/repositories/GFPGAN/inference_gfpgan.py'
|
8 |
-
print(gfpgan_code_path)
|
9 |
-
|
10 |
-
#video2pic
|
11 |
-
result_dir = os.path.split(origin_mp4_path)[0]
|
12 |
-
video_name = os.path.split(origin_mp4_path)[1]
|
13 |
-
video_name = video_name.split('.')[0]
|
14 |
-
print(video_name)
|
15 |
-
str_scale = str(scale).replace('.', '_')
|
16 |
-
output_mp4_path = os.path.join(result_dir, video_name+'##'+str_scale+'.mp4')
|
17 |
-
temp_output_mp4_path = os.path.join(result_dir, 'temp_'+video_name+'##'+str_scale+'.mp4')
|
18 |
-
|
19 |
-
audio_name = video_name.split('##')[-1]
|
20 |
-
audio_path = os.path.join(result_dir, audio_name+'.wav')
|
21 |
-
temp_pic_dir1 = os.path.join(result_dir, video_name)
|
22 |
-
temp_pic_dir2 = os.path.join(result_dir, video_name+'##'+str_scale)
|
23 |
-
os.makedirs(temp_pic_dir1, exist_ok=True)
|
24 |
-
os.makedirs(temp_pic_dir2, exist_ok=True)
|
25 |
-
cmd1 = 'ffmpeg -i \"{}\" -start_number 0 \"{}\"/%06d.png -loglevel error -y'.format(origin_mp4_path, temp_pic_dir1)
|
26 |
-
os.system(cmd1)
|
27 |
-
cmd2 = f'python {gfpgan_code_path} -i {temp_pic_dir1} -o {temp_pic_dir2} -s {scale}'
|
28 |
-
os.system(cmd2)
|
29 |
-
cmd3 = f'ffmpeg -r 25 -f image2 -i {temp_pic_dir2}/%06d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p {temp_output_mp4_path}'
|
30 |
-
os.system(cmd3)
|
31 |
-
cmd4 = f'ffmpeg -y -i {temp_output_mp4_path} -i {audio_path} -vcodec copy {output_mp4_path}'
|
32 |
-
os.system(cmd4)
|
33 |
-
#shutil.rmtree(temp_pic_dir1)
|
34 |
-
#shutil.rmtree(temp_pic_dir2)
|
35 |
-
|
36 |
-
return output_mp4_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|