Spaces:

hhyangcs
/

depth-any-video

Running on Zero

App Files Files Community

depthanyvideo commited on 18 days ago

Commit

47ac829

•

1 Parent(s): 4be2365

update

Browse files

Files changed (1) hide show

app.py +89 -88

app.py CHANGED Viewed

@@ -70,98 +70,99 @@ def depth_any_video(
     """
     Perform depth estimation on the uploaded video/image.
     """
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        # Save the uploaded file
-        input_path = os.path.join(tmp_dir, file.name)
-        with open(input_path, "wb") as f:
-            f.write(file.read())
-        # Set up output directory
-        output_dir = os.path.join(tmp_dir, "output")
-        os.makedirs(output_dir, exist_ok=True)
-        # Prepare configuration
-        cfg = EasyDict(
-            {
-                "model_base": MODEL_BASE,
-                "data_path": input_path,
-                "output_dir": output_dir,
-                "denoise_steps": denoise_steps,
-                "num_frames": num_frames,
-                "decode_chunk_size": decode_chunk_size,
-                "num_interp_frames": num_interp_frames,
-                "num_overlap_frames": num_overlap_frames,
-                "max_resolution": max_resolution,
-                "seed": 666,
-            }
-        )
-        seed_all(cfg.seed)
-        file_name = os.path.splitext(os.path.basename(cfg.data_path))[0]
-        is_video = cfg.data_path.lower().endswith((".mp4", ".avi", ".mov", ".mkv"))
-        if is_video:
-            num_interp_frames = cfg.num_interp_frames
-            num_overlap_frames = cfg.num_overlap_frames
-            num_frames = cfg.num_frames
-            assert num_frames % 2 == 0, "num_frames should be even."
-            assert (
-                2 <= num_overlap_frames <= (num_interp_frames + 2 + 1) // 2
-            ), "Invalid frame overlap."
-            max_frames = (num_interp_frames + 2 - num_overlap_frames) * (
-                num_frames // 2
-            )
-            image, fps = img_utils.read_video(cfg.data_path, max_frames=max_frames)
-        else:
-            image = img_utils.read_image(cfg.data_path)
-        image = img_utils.imresize_max(image, cfg.max_resolution)
-        image = img_utils.imcrop_multi(image)
-        image_tensor = np.ascontiguousarray(
-            [_img.transpose(2, 0, 1) / 255.0 for _img in image]
-        )
-        image_tensor = torch.from_numpy(image_tensor).to(DEVICE)
-        with torch.no_grad(), torch.autocast(
-            device_type=DEVICE_TYPE, dtype=torch.float16
-        ):
-            pipe_out = pipe(
-                image_tensor,
-                num_frames=cfg.num_frames,
-                num_overlap_frames=cfg.num_overlap_frames,
-                num_interp_frames=cfg.num_interp_frames,
-                decode_chunk_size=cfg.decode_chunk_size,
-                num_inference_steps=cfg.denoise_steps,
             )
-        disparity = pipe_out.disparity
-        disparity_colored = pipe_out.disparity_colored
-        image = pipe_out.image
-        # (N, H, 2 * W, 3)
-        merged = np.concatenate(
-            [
-                image,
-                disparity_colored,
-            ],
-            axis=2,
-        )
-        if is_video:
-            output_path = os.path.join(cfg.output_dir, f"{file_name}_depth.mp4")
-            img_utils.write_video(
-                output_path,
-                merged,
-                fps,
             )
-            return output_path
-        else:
-            output_path = os.path.join(cfg.output_dir, f"{file_name}_depth.png")
-            img_utils.write_image(
-                output_path,
-                merged[0],
             )
-            return output_path
 # Define Gradio interface

     """
     Perform depth estimation on the uploaded video/image.
     """
+    with open(file, "rb") as _file:
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # Save the uploaded file
+            input_path = os.path.join(tmp_dir, file.name)
+            with open(input_path, "wb") as f:
+                f.write(_file.read())
+            # Set up output directory
+            output_dir = os.path.join(tmp_dir, "output")
+            os.makedirs(output_dir, exist_ok=True)
+            # Prepare configuration
+            cfg = EasyDict(
+                {
+                    "model_base": MODEL_BASE,
+                    "data_path": input_path,
+                    "output_dir": output_dir,
+                    "denoise_steps": denoise_steps,
+                    "num_frames": num_frames,
+                    "decode_chunk_size": decode_chunk_size,
+                    "num_interp_frames": num_interp_frames,
+                    "num_overlap_frames": num_overlap_frames,
+                    "max_resolution": max_resolution,
+                    "seed": 666,
+                }
             )
+            seed_all(cfg.seed)
+            file_name = os.path.splitext(os.path.basename(cfg.data_path))[0]
+            is_video = cfg.data_path.lower().endswith((".mp4", ".avi", ".mov", ".mkv"))
+            if is_video:
+                num_interp_frames = cfg.num_interp_frames
+                num_overlap_frames = cfg.num_overlap_frames
+                num_frames = cfg.num_frames
+                assert num_frames % 2 == 0, "num_frames should be even."
+                assert (
+                    2 <= num_overlap_frames <= (num_interp_frames + 2 + 1) // 2
+                ), "Invalid frame overlap."
+                max_frames = (num_interp_frames + 2 - num_overlap_frames) * (
+                    num_frames // 2
+                )
+                image, fps = img_utils.read_video(cfg.data_path, max_frames=max_frames)
+            else:
+                image = img_utils.read_image(cfg.data_path)
+            image = img_utils.imresize_max(image, cfg.max_resolution)
+            image = img_utils.imcrop_multi(image)
+            image_tensor = np.ascontiguousarray(
+                [_img.transpose(2, 0, 1) / 255.0 for _img in image]
             )
+            image_tensor = torch.from_numpy(image_tensor).to(DEVICE)
+            with torch.no_grad(), torch.autocast(
+                device_type=DEVICE_TYPE, dtype=torch.float16
+            ):
+                pipe_out = pipe(
+                    image_tensor,
+                    num_frames=cfg.num_frames,
+                    num_overlap_frames=cfg.num_overlap_frames,
+                    num_interp_frames=cfg.num_interp_frames,
+                    decode_chunk_size=cfg.decode_chunk_size,
+                    num_inference_steps=cfg.denoise_steps,
+                )
+            disparity = pipe_out.disparity
+            disparity_colored = pipe_out.disparity_colored
+            image = pipe_out.image
+            # (N, H, 2 * W, 3)
+            merged = np.concatenate(
+                [
+                    image,
+                    disparity_colored,
+                ],
+                axis=2,
             )
+            if is_video:
+                output_path = os.path.join(cfg.output_dir, f"{file_name}_depth.mp4")
+                img_utils.write_video(
+                    output_path,
+                    merged,
+                    fps,
+                )
+                return output_path
+            else:
+                output_path = os.path.join(cfg.output_dir, f"{file_name}_depth.png")
+                img_utils.write_image(
+                    output_path,
+                    merged[0],
+                )
+                return output_path
 # Define Gradio interface