{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "U1xFNFU58_2j" }, "source": [ "## Goal: Make anyone speak anything (LipSync)\n", "\n", "* Github: https://github.com/Rudrabha/Wav2Lip\n", "* Paper: https://arxiv.org/abs/2008.10010\n", "*Original notebook: https://colab.research.google.com/drive/1tZpDWXz49W6wDcTprANRGLo2D_EbD5J8?usp=sharing\n", "\n", "\n", "\n", "\n", "**Modded by: [justinjohn-03](https://github.com/justinjohn0306)**\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "Qgo-oaI3JU2u" }, "outputs": [], "source": [ "#@title

Step1: Setup Wav2Lip

\n", "#@markdown * Install dependency\n", "#@markdown * Download pretrained model\n", "from IPython.display import HTML, clear_output\n", "!rm -rf /content/sample_data\n", "!mkdir /content/sample_data\n", "\n", "!git clone https://github.com/justinjohn0306/Wav2Lip\n", "\n", "%cd /content/Wav2Lip\n", "\n", "#download the pretrained model\n", "!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'\n", "!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'\n", "!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'\n", "!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'\n", "a = !pip install https://raw.githubusercontent.com/AwaleSajil/ghc/master/ghc-1.0-py3-none-any.whl\n", "!pip install git+https://github.com/elliottzheng/batch-face.git@master\n", "\n", "!pip install ffmpeg-python mediapipe==0.8.11\n", "\n", "#this code for recording audio\n", "\"\"\"\n", "To write this piece of code I took inspiration/code from a lot of places.\n", "It was late night, so I'm not sure how much I created or just copied o.O\n", "Here are some of the possible references:\n", "https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/\n", "https://stackoverflow.com/a/18650249\n", "https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/\n", "https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/\n", "https://stackoverflow.com/a/49019356\n", "\"\"\"\n", "from IPython.display import HTML, Audio\n", "from google.colab.output import eval_js\n", "from base64 import b64decode\n", "import numpy as np\n", "from scipy.io.wavfile import read as wav_read\n", "import io\n", "import ffmpeg\n", "\n", "AUDIO_HTML = \"\"\"\n", "\n", "\"\"\"\n", "\n", "%cd /\n", "from ghc.l_ghc_cf import l_ghc_cf\n", "%cd content\n", "\n", "def get_audio():\n", " display(HTML(AUDIO_HTML))\n", " data = eval_js(\"data\")\n", " binary = b64decode(data.split(',')[1])\n", "\n", " process = (ffmpeg\n", " .input('pipe:0')\n", " .output('pipe:1', format='wav')\n", " .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)\n", " )\n", " output, err = process.communicate(input=binary)\n", "\n", " riff_chunk_size = len(output) - 8\n", " # Break up the chunk size into four bytes, held in b.\n", " q = riff_chunk_size\n", " b = []\n", " for i in range(4):\n", " q, r = divmod(q, 256)\n", " b.append(r)\n", "\n", " # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.\n", " riff = output[:4] + bytes(b) + output[8:]\n", "\n", " sr, audio = wav_read(io.BytesIO(riff))\n", "\n", " return audio, sr\n", "\n", "\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "def showVideo(path):\n", " mp4 = open(str(path),'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " return HTML(\"\"\"\n", " \n", " \"\"\" % data_url)\n", "\n", "from IPython.display import clear_output\n", "\n", "clear_output()\n", "print(\"All set and ready!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "SEdy6PWDXMRL" }, "source": [ "# LipSync Youtube Video" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "QI4kcm8QEeGZ" }, "outputs": [], "source": [ "#@title STEP2: Select a Youtube Video\n", "# Install yt-dlp\n", "\n", "import os\n", "!pip install yt-dlp\n", "\n", "#@markdown ## Find YouTube video ID from URL\n", "\n", "#@markdown ___\n", "\n", "#@markdown Link format:\n", "\n", "#@markdown ``https://youtu.be/vAnWYLTdvfY`` ❌\n", "\n", "#@markdown ``https://www.youtube.com/watch?v=vAnWYLTdvfY`` ✔️\n", "\n", "!rm -df youtube.mp4\n", "\n", "#@markdown ___\n", "from urllib import parse as urlparse\n", "YOUTUBE_URL = 'https://www.youtube.com/watch?v=vAnWYLTdvfY' #@param {type:\"string\"}\n", "url_data = urlparse.urlparse(YOUTUBE_URL)\n", "query = urlparse.parse_qs(url_data.query)\n", "YOUTUBE_ID = query[\"v\"][0]\n", "\n", "\n", "# remove previous input video\n", "!rm -f /content/sample_data/input_vid.mp4\n", "\n", "\n", "#@markdown ___\n", "\n", "#@markdown ### Trim the video (start, end) seconds\n", "start = 35 #@param {type:\"integer\"}\n", "end = 62 #@param {type:\"integer\"}\n", "interval = end - start\n", "\n", "#@markdown Note: ``the trimmed video must have face on all frames``\n", "\n", "# Download the YouTube video using yt-dlp\n", "!yt-dlp -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n", "\n", "# Cut the video using FFmpeg\n", "!ffmpeg -y -i youtube.mp4 -ss {start} -t {interval} -async 1 /content/sample_data/input_vid.mp4\n", "\n", "# Preview the trimmed video\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "mp4 = open('/content/sample_data/input_vid.mp4','rb').read()\n", "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", "HTML(f\"\"\"\"\"\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "zS_RAeh-IfZy" }, "outputs": [], "source": [ "#@title STEP3: Select Audio (Record, Upload from local drive or Gdrive)\n", "import os\n", "from IPython.display import Audio\n", "from IPython.core.display import display\n", "\n", "upload_method = 'Upload' #@param ['Record', 'Upload', 'Custom Path']\n", "\n", "#remove previous input audio\n", "if os.path.isfile('/content/sample_data/input_audio.wav'):\n", " os.remove('/content/sample_data/input_audio.wav')\n", "\n", "def displayAudio():\n", " display(Audio('/content/sample_data/input_audio.wav'))\n", "\n", "if upload_method == 'Record':\n", " audio, sr = get_audio()\n", " import scipy\n", " scipy.io.wavfile.write('/content/sample_data/input_audio.wav', sr, audio)\n", "\n", "elif upload_method == 'Upload':\n", " from google.colab import files\n", " uploaded = files.upload()\n", " for fn in uploaded.keys():\n", " print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", " name=fn, length=len(uploaded[fn])))\n", "\n", " # Consider only the first file\n", " PATH_TO_YOUR_AUDIO = str(list(uploaded.keys())[0])\n", "\n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)\n", "\n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n", "\n", " clear_output()\n", " displayAudio()\n", "\n", "elif upload_method == 'Custom Path':\n", " from google.colab import drive\n", " drive.mount('/content/drive')\n", " #@markdown ``Add the full path to your audio on your Gdrive`` 👇\n", " PATH_TO_YOUR_AUDIO = '/content/drive/MyDrive/test.wav' #@param {type:\"string\"}\n", "\n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)\n", "\n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n", "\n", " clear_output()\n", " displayAudio()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "BQPLXJ8L0gms" }, "outputs": [], "source": [ "#@title STEP4: Start Crunching and Preview Output\n", "#@markdown Note: Only change these, if you have to\n", "\n", "%cd /content/Wav2Lip\n", "\n", "# Set up paths and variables for the output file\n", "output_file_path = '/content/Wav2Lip/results/result_voice.mp4'\n", "\n", "# Delete existing output file before processing, if any\n", "if os.path.exists(output_file_path):\n", " os.remove(output_file_path)\n", "\n", "pad_top = 0#@param {type:\"integer\"}\n", "pad_bottom = 10#@param {type:\"integer\"}\n", "pad_left = 0#@param {type:\"integer\"}\n", "pad_right = 0#@param {type:\"integer\"}\n", "rescaleFactor = 1#@param {type:\"integer\"}\n", "nosmooth = True #@param {type:\"boolean\"}\n", "#@markdown ___\n", "#@markdown Model selection:\n", "use_hd_model = False #@param {type:\"boolean\"}\n", "checkpoint_path = 'checkpoints/wav2lip.pth' if not use_hd_model else 'checkpoints/wav2lip_gan.pth'\n", "\n", "\n", "if nosmooth == False:\n", " !python inference.py --checkpoint_path $checkpoint_path --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor\n", "else:\n", " !python inference.py --checkpoint_path $checkpoint_path --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth\n", "\n", "#Preview output video\n", "if os.path.exists(output_file_path):\n", " clear_output()\n", " print(\"Final Video Preview\")\n", " print(\"Download this video from\", output_file_path)\n", " showVideo(output_file_path)\n", "else:\n", " print(\"Processing failed. Output video not found.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "vYxpPeie1CYL" }, "source": [ "# LipSync on Your Video File" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "nDuM7tfZ1F0t" }, "outputs": [], "source": [ "import os\n", "import shutil\n", "from google.colab import drive\n", "from google.colab import files\n", "from IPython.display import HTML, clear_output\n", "from base64 import b64encode\n", "import moviepy.editor as mp\n", "\n", "\n", "def showVideo(file_path):\n", " \"\"\"Function to display video in Colab\"\"\"\n", " mp4 = open(file_path,'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " display(HTML(\"\"\"\n", " \n", " \"\"\" % data_url))\n", "\n", "def get_video_resolution(video_path):\n", " \"\"\"Function to get the resolution of a video\"\"\"\n", " import cv2\n", " video = cv2.VideoCapture(video_path)\n", " width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\n", " height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", " return (width, height)\n", "\n", "def resize_video(video_path, new_resolution):\n", " \"\"\"Function to resize a video\"\"\"\n", " import cv2\n", " video = cv2.VideoCapture(video_path)\n", " fourcc = int(video.get(cv2.CAP_PROP_FOURCC))\n", " fps = video.get(cv2.CAP_PROP_FPS)\n", " width, height = new_resolution\n", " output_path = os.path.splitext(video_path)[0] + '_720p.mp4'\n", " writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))\n", " while True:\n", " success, frame = video.read()\n", " if not success:\n", " break\n", " resized_frame = cv2.resize(frame, new_resolution)\n", " writer.write(resized_frame)\n", " video.release()\n", " writer.release()\n", "\n", "# Mount Google Drive if it's not already mounted\n", "if not os.path.isdir(\"/content/drive/MyDrive\"):\n", " drive.mount('/content/drive', force_remount=True)\n", "\n", "#@markdown ### Select an uploading method\n", "upload_method = \"Upload\" #@param [\"Upload\", \"Custom Path\"]\n", "\n", "\n", "# remove previous input video\n", "if os.path.isfile('/content/sample_data/input_vid.mp4'):\n", " os.remove('/content/sample_data/input_vid.mp4')\n", "\n", "if upload_method == \"Upload\":\n", " uploaded = files.upload()\n", " for filename in uploaded.keys():\n", " os.rename(filename, '/content/sample_data/input_vid.mp4')\n", " PATH_TO_YOUR_VIDEO = '/content/sample_data/input_vid.mp4'\n", "\n", "elif upload_method == 'Custom Path':\n", " #@markdown ``Add the full path to your video on your Gdrive `` 👇\n", " PATH_TO_YOUR_VIDEO = '/content/drive/MyDrive/test.mp4' #@param {type:\"string\"}\n", " if not os.path.isfile(PATH_TO_YOUR_VIDEO):\n", " print(\"ERROR: File not found!\")\n", " raise SystemExit(0)\n", "\n", "#@markdown Notes:\n", "\n", "#@markdown . ``If your uploaded video is 1080p or higher resolution, this cell will resize it to 720p.``\n", "\n", "#@markdown . ``Do not upload videos longer than 60 seconds.``\n", "\n", "#@markdown ___\n", "\n", "video_duration = mp.VideoFileClip(PATH_TO_YOUR_VIDEO).duration\n", "if video_duration > 60:\n", " print(\"WARNING: Video duration exceeds 60 seconds. Please upload a shorter video.\")\n", " raise SystemExit(0)\n", "\n", "video_resolution = get_video_resolution(PATH_TO_YOUR_VIDEO)\n", "print(f\"Video resolution: {video_resolution}\")\n", "if video_resolution[0] >= 1920 or video_resolution[1] >= 1080:\n", " print(\"Resizing video to 720p...\")\n", " os.system(f\"ffmpeg -i {PATH_TO_YOUR_VIDEO} -vf scale=1280:720 /content/sample_data/input_vid.mp4\")\n", " PATH_TO_YOUR_VIDEO = \"/content/sample_data/input_vid.mp4\"\n", " print(\"Video resized to 720p\")\n", "else:\n", " print(\"No resizing needed\")\n", "\n", "if upload_method == \"Upload\":\n", " clear_output()\n", " print(\"Input Video\")\n", " showVideo(PATH_TO_YOUR_VIDEO)\n", "else:\n", " if os.path.isfile(PATH_TO_YOUR_VIDEO):\n", " # Check if the source and destination files are the same\n", " if PATH_TO_YOUR_VIDEO != \"/content/sample_data/input_vid.mp4\":\n", " shutil.copyfile(PATH_TO_YOUR_VIDEO, \"/content/sample_data/input_vid.mp4\")\n", " print(\"Video copied to destination.\")\n", "\n", " print(\"Input Video\")\n", " # Display the video from the destination path\n", " showVideo(\"/content/sample_data/input_vid.mp4\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "XgF4794r7sWK" }, "outputs": [], "source": [ "#@title STEP3: Select Audio (Record, Upload from local drive or Gdrive)\n", "import os\n", "from IPython.display import Audio\n", "from IPython.core.display import display\n", "\n", "upload_method = 'Upload' #@param ['Record', 'Upload', 'Custom Path']\n", "\n", "#remove previous input audio\n", "if os.path.isfile('/content/sample_data/input_audio.wav'):\n", " os.remove('/content/sample_data/input_audio.wav')\n", "\n", "def displayAudio():\n", " display(Audio('/content/sample_data/input_audio.wav'))\n", "\n", "if upload_method == 'Record':\n", " audio, sr = get_audio()\n", " import scipy\n", " scipy.io.wavfile.write('/content/sample_data/input_audio.wav', sr, audio)\n", "\n", "elif upload_method == 'Upload':\n", " from google.colab import files\n", " uploaded = files.upload()\n", " for fn in uploaded.keys():\n", " print('User uploaded file \"{name}\" with length {length} bytes.'.format(\n", " name=fn, length=len(uploaded[fn])))\n", "\n", " # Consider only the first file\n", " PATH_TO_YOUR_AUDIO = str(list(uploaded.keys())[0])\n", "\n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)\n", "\n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n", "\n", " clear_output()\n", " displayAudio()\n", "\n", "else: # Custom Path\n", " from google.colab import drive\n", " drive.mount('/content/drive')\n", " #@markdown ``Add the full path to your audio on your Gdrive`` 👇\n", " PATH_TO_YOUR_AUDIO = '/content/drive/MyDrive/test.wav' #@param {type:\"string\"}\n", "\n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)\n", "\n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n", "\n", " clear_output()\n", " displayAudio()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "ZgtO08V28ANf" }, "outputs": [], "source": [ "#@title STEP4: Start Crunching and Preview Output\n", "#@markdown Note: Only change these, if you have to\n", "\n", "%cd /content/Wav2Lip\n", "\n", "# Set up paths and variables for the output file\n", "output_file_path = '/content/Wav2Lip/results/result_voice.mp4'\n", "\n", "# Delete existing output file before processing, if any\n", "if os.path.exists(output_file_path):\n", " os.remove(output_file_path)\n", "\n", "pad_top = 0#@param {type:\"integer\"}\n", "pad_bottom = 10#@param {type:\"integer\"}\n", "pad_left = 0#@param {type:\"integer\"}\n", "pad_right = 0#@param {type:\"integer\"}\n", "rescaleFactor = 1#@param {type:\"integer\"}\n", "nosmooth = True #@param {type:\"boolean\"}\n", "#@markdown ___\n", "#@markdown Model selection:\n", "use_hd_model = False #@param {type:\"boolean\"}\n", "checkpoint_path = 'checkpoints/wav2lip.pth' if not use_hd_model else 'checkpoints/wav2lip_gan.pth'\n", "\n", "\n", "if nosmooth == False:\n", " !python inference.py --checkpoint_path $checkpoint_path --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor\n", "else:\n", " !python inference.py --checkpoint_path $checkpoint_path --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth\n", "\n", "#Preview output video\n", "if os.path.exists(output_file_path):\n", " clear_output()\n", " print(\"Final Video Preview\")\n", " print(\"Download this video from\", output_file_path)\n", " showVideo(output_file_path)\n", "else:\n", " print(\"Processing failed. Output video not found.\")" ] } ], "metadata": { "accelerator": "GPU", "colab": { "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 }