{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "f1e90f25", "metadata": {}, "outputs": [], "source": [ "# Step 1: Install dependency\n", "!pip install ffmpeg-python\n", "\n", "# Step 2: Clone the Wav2Lip repository\n", "!git clone https://github.com/justinjohn0306/Wav2Lip\n", "\n", "# Step 3: Download pretrained model\n", "import requests\n", "url = \"https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA\"\n", "response = requests.get(url)\n", "\n", "with open(\"Wav2Lip/checkpoints/wav2lip_gan.pth\", \"wb\") as f:\n", " f.write(response.content)\n", " \n", "# Step 4: Install the required dependencies for Wav2Lip\n", "!cd Wav2Lip && pip install -r requirements.txt\n", "!pip install pyaudio\n", "\n", "\n", "# Step 5: Download pretrained model for face detection\n", "url = \"https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth\"\n", "response = requests.get(url)\n", "\n", "with open(\"Wav2Lip/face_detection/detection/sfd/s3fd.pth\", \"wb\") as f:\n", " f.write(response.content)" ] }, { "cell_type": "code", "execution_count": null, "id": "8e86c988", "metadata": {}, "outputs": [], "source": [ "import os\n", "import subprocess\n", "from urllib import parse as urlparse\n", "\n", "# Step 1: Install yt-dlp\n", "subprocess.run(['pip', 'install', 'yt-dlp'])\n", "\n", "# Step 2: Define YouTube URL and Video ID\n", "YOUTUBE_URL = 'https://www.youtube.com/watch?v=vAnWYLTdvfY'\n", "url_data = urlparse.urlparse(YOUTUBE_URL)\n", "query = urlparse.parse_qs(url_data.query)\n", "YOUTUBE_ID = query[\"v\"][0]\n", "\n", "# Remove previous input video\n", "if os.path.isfile('input_vid.mp4'):\n", " os.remove('input_vid.mp4')\n", "\n", "# Trim video (start, end) seconds\n", "start = 35\n", "end = 62\n", "interval = end - start\n", "\n", "# Step 3: Download and trim the YouTube video\n", "subprocess.run(['yt-dlp', '-f', 'bestvideo[ext=mp4]', '--output', \"youtube.%(ext)s\", f'https://www.youtube.com/watch?v={YOUTUBE_ID}'])\n", "\n", "# Cut the video using FFmpeg\n", "subprocess.run(['ffmpeg', '-y', '-i', 'youtube.mp4', '-ss', str(start), '-t', str(interval), '-async', '1', 'input_vid.mp4'])\n", "\n", "# Display video.\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "\n", "def show_video(path):\n", " mp4 = open(path, 'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " return HTML(f\"\"\"\"\"\")\n", "\n", "# Preview the trimmed video\n", "show_video('input_vid.mp4')" ] }, { "cell_type": "code", "execution_count": null, "id": "7da8e818", "metadata": { "scrolled": false }, "outputs": [], "source": [ "import os\n", "from IPython.display import Audio\n", "from IPython.core.display import display\n", "\n", "upload_method = 'Path' # Change this to 'Record' or 'Path'\n", "\n", "# Remove previous input audio\n", "if os.path.isfile('input_audio.wav'):\n", " os.remove('input_audio.wav')\n", "\n", "def display_audio():\n", " display(Audio('input_audio.wav'))\n", "\n", "if upload_method == 'Record':\n", " import pyaudio\n", " import wave\n", "\n", " CHUNK = 1024\n", " FORMAT = pyaudio.paInt16\n", " CHANNELS = 1\n", " RATE = 16000\n", " RECORD_SECONDS = 5\n", " WAVE_OUTPUT_FILENAME = \"input_audio.wav\"\n", "\n", " p = pyaudio.PyAudio()\n", "\n", " stream = p.open(format=FORMAT,\n", " channels=CHANNELS,\n", " rate=RATE,\n", " input=True,\n", " frames_per_buffer=CHUNK)\n", "\n", " print(\"Recording...\")\n", "\n", " frames = []\n", "\n", " for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n", " data = stream.read(CHUNK)\n", " frames.append(data)\n", "\n", " print(\"Finished recording.\")\n", "\n", " stream.stop_stream()\n", " stream.close()\n", " p.terminate()\n", "\n", " wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')\n", " wf.setnchannels(CHANNELS)\n", " wf.setsampwidth(p.get_sample_size(FORMAT))\n", " wf.setframerate(RATE)\n", " wf.writeframes(b''.join(frames))\n", " wf.close()\n", "\n", " display_audio()\n", "\n", "elif upload_method == 'Path':\n", " # Add the full path to your audio\n", " PATH_TO_YOUR_AUDIO = 'C:/Users/justi/OneDrive/Desktop/wav2lip/Wav2Lip/input_audio.wav'\n", "\n", " # Load audio with specified sampling rate\n", " import librosa\n", " audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)\n", "\n", " # Save audio with specified sampling rate\n", " import soundfile as sf\n", " sf.write('input_audio.wav', audio, sr, format='wav')\n", "\n", " display_audio()" ] }, { "cell_type": "code", "execution_count": null, "id": "63289945", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Define the parameters for the Wav2Lip model\n", "pad_top = 0\n", "pad_bottom = 10\n", "pad_left = 0\n", "pad_right = 0\n", "rescaleFactor = 1\n", "nosmooth = False\n", "\n", "# Set the path to the Wav2Lip model and input files\n", "checkpoint_path = \"checkpoints/wav2lip_gan.pth\"\n", "input_face = \"input_vid.mp4\"\n", "input_audio = \"input_audio.wav\"\n", "\n", "# Run the Wav2Lip model\n", "!cd Wav2Lip && python inference.py --checkpoint_path {checkpoint_path} --face {input_face} --audio {input_audio} --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} {\"--nosmooth\" if nosmooth else \"\"}\n", "\n", "# Preview the output video\n", "print(\"Final Video Preview\")\n", "print(\"Find the output video at\", 'Wav2Lip/results/result_voice.mp4')\n", "show_video('Wav2Lip/results/result_voice.mp4')" ] }, { "cell_type": "code", "execution_count": null, "id": "3fbafa56", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 5 }