Spaces:

hhyangcs
/

depth-any-video

Running on Zero

File size: 3,372 Bytes

from PIL import Image
import cv2
import numpy as np
import os
import tempfile


def resize(img, size):
    assert img.dtype == np.uint8
    pil_image = Image.fromarray(img)
    pil_image = pil_image.resize(size, Image.LANCZOS)
    resized_img = np.array(pil_image)
    return resized_img


def crop(img, start_h, start_w, crop_h, crop_w):
    img_src = np.zeros((crop_h, crop_w, *img.shape[2:]), dtype=img.dtype)
    hsize, wsize = crop_h, crop_w
    dh, dw, sh, sw = start_h, start_w, 0, 0
    if dh < 0:
        sh = -dh
        hsize += dh
        dh = 0
    if dh + hsize > img.shape[0]:
        hsize = img.shape[0] - dh
    if dw < 0:
        sw = -dw
        wsize += dw
        dw = 0
    if dw + wsize > img.shape[1]:
        wsize = img.shape[1] - dw
    img_src[sh : sh + hsize, sw : sw + wsize] = img[dh : dh + hsize, dw : dw + wsize]
    return img_src


def imresize_max(img, size, min_side=False):
    new_img = []
    for i, _img in enumerate(img):
        h, w = _img.shape[:2]
        ori_size = min(h, w) if min_side else max(h, w)
        _resize = min(size / ori_size, 1.0)
        new_size = (int(w * _resize), int(h * _resize))
        new_img.append(resize(_img, new_size))
    return new_img


def imcrop_multi(img, multiple=32):
    new_img = []
    for i, _img in enumerate(img):
        crop_size = (
            _img.shape[0] // multiple * multiple,
            _img.shape[1] // multiple * multiple,
        )
        start_h = int(0.5 * max(0, _img.shape[0] - crop_size[0]))
        start_w = int(0.5 * max(0, _img.shape[1] - crop_size[1]))
        _img_src = crop(_img, start_h, start_w, crop_size[0], crop_size[1])
        new_img.append(_img_src)
    return new_img


def read_video(video_path, max_frames=None):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frames = []
    count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
        count += 1
        if max_frames is not None and count >= max_frames:
            break
    cap.release()
    # (N, H, W, 3)
    return frames, fps


def read_image(image_path):
    frame = cv2.imread(image_path)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # (1, H, W, 3)
    return [frame]


def write_video(video_path, frames, fps):
    # tmp_dir = os.path.join(os.path.dirname(video_path), "tmp")
    # os.makedirs(tmp_dir, exist_ok=True)
    # for i, frame in enumerate(frames):
    #     write_image(os.path.join(tmp_dir, f"{i:06d}.png"), frame)
    # # it will cause visual compression artifacts
    # ffmpeg_command = [
    #     "ffmpeg",
    #     "-f",
    #     "image2",
    #     "-framerate",
    #     f"{fps}",
    #     "-i",
    #     os.path.join(tmp_dir, "%06d.png"),
    #     "-b:v",
    #     "5626k",
    #     "-y",
    #     video_path,
    # ]
    # os.system(" ".join(ffmpeg_command))
    # os.system(f"rm -rf {tmp_dir}")
    h, w = frames[0].shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(video_path, fourcc, fps, (w, h))
    for frame in frames:
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame)
    out.release()


def write_image(image_path, frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    cv2.imwrite(image_path, frame)