{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "source": [ "**Fixes by: [justinjohn-03](https://github.com/justinjohn0306)**" ], "metadata": { "id": "9Uyk6DCBGHuW" } }, { "cell_type": "markdown", "metadata": { "id": "U1xFNFU58_2j" }, "source": [ "## Goal: Make anyone speak anything (LipSync)\n", "\n", "* Github: https://github.com/Rudrabha/Wav2Lip\n", "* Paper: https://arxiv.org/abs/2008.10010\n", "*Original notebook: https://colab.research.google.com/drive/1tZpDWXz49W6wDcTprANRGLo2D_EbD5J8?usp=sharing\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "Qgo-oaI3JU2u", "cellView": "form" }, "source": [ "#@title

Step1: Setup Wav2Lip

\n", "#@markdown * Install dependency\n", "#@markdown * Download pretrained model\n", "!rm -rf /content/sample_data\n", "!mkdir /content/sample_data\n", "\n", "!git clone https://github.com/zabique/Wav2Lip\n", "\n", "#download the pretrained model\n", "!wget 'https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA' -O '/content/Wav2Lip/checkpoints/wav2lip_gan.pth'\n", "a = !pip install https://raw.githubusercontent.com/AwaleSajil/ghc/master/ghc-1.0-py3-none-any.whl\n", "\n", "# !pip uninstall tensorflow tensorflow-gpu\n", "!cd Wav2Lip && pip install -r requirements.txt\n", "\n", "#download pretrained model for face detection\n", "!wget \"https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth\" -O \"/content/Wav2Lip/face_detection/detection/sfd/s3fd.pth\"\n", "\n", "!pip install -q youtube-dl\n", "!pip install ffmpeg-python\n", "!pip install librosa==0.9.1\n", "\n", "#this code for recording audio\n", "\"\"\"\n", "To write this piece of code I took inspiration/code from a lot of places.\n", "It was late night, so I'm not sure how much I created or just copied o.O\n", "Here are some of the possible references:\n", "https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/\n", "https://stackoverflow.com/a/18650249\n", "https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/\n", "https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/\n", "https://stackoverflow.com/a/49019356\n", "\"\"\"\n", "from IPython.display import HTML, Audio\n", "from google.colab.output import eval_js\n", "from base64 import b64decode\n", "import numpy as np\n", "from scipy.io.wavfile import read as wav_read\n", "import io\n", "import ffmpeg\n", "\n", "AUDIO_HTML = \"\"\"\n", "\n", "\"\"\"\n", "\n", "%cd /\n", "from ghc.l_ghc_cf import l_ghc_cf\n", "%cd content\n", "\n", "def get_audio():\n", " display(HTML(AUDIO_HTML))\n", " data = eval_js(\"data\")\n", " binary = b64decode(data.split(',')[1])\n", " \n", " process = (ffmpeg\n", " .input('pipe:0')\n", " .output('pipe:1', format='wav')\n", " .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)\n", " )\n", " output, err = process.communicate(input=binary)\n", " \n", " riff_chunk_size = len(output) - 8\n", " # Break up the chunk size into four bytes, held in b.\n", " q = riff_chunk_size\n", " b = []\n", " for i in range(4):\n", " q, r = divmod(q, 256)\n", " b.append(r)\n", "\n", " # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.\n", " riff = output[:4] + bytes(b) + output[8:]\n", "\n", " sr, audio = wav_read(io.BytesIO(riff))\n", "\n", " return audio, sr\n", "\n", "\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "def showVideo(path):\n", " mp4 = open(str(path),'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " return HTML(\"\"\"\n", " \n", " \"\"\" % data_url)\n", "\n", "from IPython.display import clear_output" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "SEdy6PWDXMRL" }, "source": [ "# LipSync Youtube Video" ] }, { "cell_type": "code", "metadata": { "id": "QI4kcm8QEeGZ", "cellView": "form" }, "source": [ "#@title STEP2: Select a Youtube Video\n", "# Install yt-dlp\n", "!pip install yt-dlp\n", "\n", "#@markdown ### Find YouTube video ID from URL\n", "from urllib import parse as urlparse\n", "YOUTUBE_URL = 'https://www.youtube.com/watch?v=vAnWYLTdvfY' #@param {type:\"string\"}\n", "url_data = urlparse.urlparse(YOUTUBE_URL)\n", "query = urlparse.parse_qs(url_data.query)\n", "YOUTUBE_ID = query[\"v\"][0]\n", "\n", "#@markdown ### Trim the video (start, end) seconds\n", "start = 35 #@param {type:\"integer\"}\n", "end = 62 #@param {type:\"integer\"}\n", "interval = end - start\n", "\n", "# Download the YouTube video using yt-dlp\n", "!yt-dlp -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n", "\n", "# Cut the video using FFmpeg\n", "!ffmpeg -y -i youtube.mp4 -ss {start} -t {interval} -async 1 /content/sample_data/input_vid.mp4\n", "\n", "# Preview the trimmed video\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "mp4 = open('/content/sample_data/input_vid.mp4','rb').read()\n", "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", "HTML(f\"\"\"\"\"\")\n", "\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "zS_RAeh-IfZy", "cellView": "form" }, "source": [ "#@title STEP3: Select Audio (Record or Upload)\n", "from IPython.display import Audio \n", "from IPython.core.display import display\n", "\n", "record_or_upload = 'Upload' #@param ['Record', 'Upload']\n", "\n", "def displayAudio():\n", " display(Audio('/content/sample_data/input_audio.wav'))\n", "if record_or_upload == 'Record':\n", " audio, sr = get_audio()\n", " import scipy\n", " scipy.io.wavfile.write('/content/sample_data/input_audio.wav', sr, audio)\n", "elif record_or_upload == 'Upload':\n", " from google.colab import files\n", " uploaded = files.upload()\n", " for fn in uploaded.keys():\n", " print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", " name=fn, length=len(uploaded[fn])))\n", " \n", " #concider only the first file\n", " audio_file = str(list(uploaded.keys())[0])\n", " \n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(audio_file, sr=None)\n", " \n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n", " \n", " clear_output()\n", " displayAudio()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "BQPLXJ8L0gms", "cellView": "form" }, "source": [ "#@title STEP4: Start Crunching and Preview Output\n", "#@markdown Note: Only change these, if you have to\n", "pad_top = 0#@param {type:\"integer\"}\n", "pad_bottom = 10#@param {type:\"integer\"}\n", "pad_left = 0#@param {type:\"integer\"}\n", "pad_right = 0#@param {type:\"integer\"}\n", "rescaleFactor = 1#@param {type:\"integer\"}\n", "nosmooth = False #@param {type:\"boolean\"}\n", "\n", "\n", "if nosmooth == False:\n", " !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor\n", "else:\n", " !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth\n", "#Preview output video\n", "clear_output()\n", "print(\"Final Video Preview\")\n", "print(\"Download this video from\", '/content/Wav2Lip/results/result_voice.mp4')\n", "showVideo('/content/Wav2Lip/results/result_voice.mp4')\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "vYxpPeie1CYL" }, "source": [ "# LipSync on Your Video File" ] }, { "cell_type": "code", "metadata": { "id": "nDuM7tfZ1F0t", "cellView": "form" }, "source": [ "import os\n", "from google.colab import files\n", "from IPython.display import HTML\n", "\n", "def showVideo(file_path):\n", " \"\"\"Function to display video in Colab\"\"\"\n", " mp4 = open(file_path,'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " display(HTML(\"\"\"\n", " \n", " \"\"\" % data_url))\n", "\n", "#@markdown ### Select an uploading method\n", "upload_or_path = \"Upload\" #@param [\"Upload\", \"Custom Path\"]\n", "\n", "if upload_or_path == \"Upload\":\n", " uploaded = files.upload()\n", " for filename in uploaded.keys():\n", " os.rename(filename, '/content/sample_data/input_vid.mp4')\n", " PATH_TO_YOUR_VIDEO = '/content/sample_data/input_vid.mp4'\n", "else:\n", " PATH_TO_YOUR_VIDEO = '/content/test.mp4' #@param {type:\"string\"}\n", " if not os.path.isfile(PATH_TO_YOUR_VIDEO):\n", " print(\"ERROR: File not found!\")\n", " raise SystemExit(0)\n", "\n", "#@markdown ### Trim the video (start, end) seconds\n", "start_time = 0 #@param {type:\"integer\"}\n", "end_time = 0 #@param {type:\"integer\"}\n", "\n", "if start_time == 0 and end_time == 0:\n", " print(\"No trimming applied\")\n", "else:\n", " duration = end_time - start_time\n", " os.system(f\"ffmpeg -i {PATH_TO_YOUR_VIDEO} -ss {start_time} -t {duration} -async 1 /content/sample_data/trimmed_vid.mp4\")\n", " PATH_TO_YOUR_VIDEO = \"/content/sample_data/input_vid.mp4\"\n", " print(f\"Video trimmed from {start_time} to {end_time} seconds\")\n", "\n", "print(f\"PATH_TO_YOUR_VIDEO: {PATH_TO_YOUR_VIDEO}\")\n", "\n", "if upload_or_path == \"Upload\":\n", " clear_output()\n", " print(\"Input Video\")\n", " showVideo(PATH_TO_YOUR_VIDEO)\n", "else:\n", " if os.path.isfile(PATH_TO_YOUR_VIDEO):\n", " print(\"Input Video\")\n", " showVideo(PATH_TO_YOUR_VIDEO)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "XgF4794r7sWK", "cellView": "form" }, "source": [ "#@title STEP3: Select Audio (Record or Upload)\n", "from IPython.display import Audio \n", "from IPython.core.display import display\n", "\n", "record_or_upload = 'Upload' #@param ['Record', 'Upload']\n", "\n", "def displayAudio():\n", " display(Audio('/content/sample_data/input_audio.wav'))\n", "if record_or_upload == 'Record':\n", " audio, sr = get_audio()\n", " import scipy\n", " scipy.io.wavfile.write('/content/sample_data/input_audio.wav', sr, audio)\n", "elif record_or_upload == 'Upload':\n", " from google.colab import files\n", " uploaded = files.upload()\n", " for fn in uploaded.keys():\n", " print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", " name=fn, length=len(uploaded[fn])))\n", " \n", " #concider only the first file\n", " audio_file = str(list(uploaded.keys())[0])\n", " \n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(audio_file, sr=None)\n", " \n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n", " \n", " clear_output()\n", " displayAudio()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ZgtO08V28ANf", "cellView": "form" }, "source": [ "#@title STEP4: Start Crunching and Preview Output\n", "#@markdown Note: Only change these, if you have to\n", "pad_top = 0#@param {type:\"integer\"}\n", "pad_bottom = 10#@param {type:\"integer\"}\n", "pad_left = 0#@param {type:\"integer\"}\n", "pad_right = 0#@param {type:\"integer\"}\n", "rescaleFactor = 1#@param {type:\"integer\"}\n", "nosmooth = False #@param {type:\"boolean\"}\n", "\n", "if nosmooth == False:\n", " !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor\n", "else:\n", " !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth\n", "\n", "#Preview output video\n", "clear_output()\n", "print(\"Final Video Preview\")\n", "print(\"Dowload this video from\", '/content/Wav2Lip/results/result_voice.mp4')\n", "showVideo('/content/Wav2Lip/results/result_voice.mp4')\n" ], "execution_count": null, "outputs": [] } ] }