ZehanWang
/

Inpaint

Model card Files Files and versions Community

Inpaint / src /dataset /depthanything_dataset.py

ZehanWang's picture

Upload folder using huggingface_hub

864ec44 verified 11 days ago

history blame contribute delete

3.7 kB

	# Last modified: 2024-02-08
	#
	# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# --------------------------------------------------------------------------
	# If you find this code useful, we kindly ask you to cite our paper in your work.
	# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
	# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold.
	# More information about the method can be found at https://marigoldmonodepth.github.io
	# --------------------------------------------------------------------------

	from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode
	import torch
	from torchvision.transforms import InterpolationMode, Resize, CenterCrop
	import torchvision.transforms as transforms

	class DepthAnythingDataset(BaseDepthDataset):
	def __init__(
	self,
	**kwargs,
	) -> None:
	super().__init__(
	# ScanNet data parameter
	min_depth=-1,
	max_depth=256,
	has_filled_depth=False,
	name_mode=DepthFileNameMode.id,
	**kwargs,
	)

	def _read_depth_file(self, rel_path):
	depth_in = self._read_image(rel_path)
	# Decode ScanNet depth
	# depth_decoded = depth_in / 1000.0
	return depth_in

	def _training_preprocess(self, rasters):
	# Augmentation
	if self.augm_args is not None:
	rasters = self._augment_data(rasters)

	# Normalization
	rasters["depth_raw_norm"] = rasters["depth_raw_linear"] / 255.0 * 2.0 - 1.0
	rasters["depth_filled_norm"] = rasters["depth_filled_linear"] / 255.0 * 2.0 - 1.0

	# Set invalid pixel to far plane
	if self.move_invalid_to_far_plane:
	if self.depth_transform.far_plane_at_max:
	rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
	self.depth_transform.norm_max
	)
	else:
	rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
	self.depth_transform.norm_min
	)

	# Resize
	if self.resize_to_hw is not None:
	T = transforms.Compose([
	Resize(self.resize_to_hw[0]),
	CenterCrop(self.resize_to_hw),
	])
	rasters = {k: T(v) for k, v in rasters.items()}
	return rasters

	# def _load_depth_data(self, depth_rel_path, filled_rel_path):
	# # Read depth data
	# outputs = {}
	# depth_raw = self._read_depth_file(depth_rel_path).squeeze()
	# depth_raw_linear = torch.from_numpy(depth_raw).float().unsqueeze(0) # [1, H, W] [0, 255]
	# outputs["depth_raw_linear"] = depth_raw_linear.clone()
	#
	# if self.has_filled_depth:
	# depth_filled = self._read_depth_file(filled_rel_path).squeeze()
	# depth_filled_linear = torch.from_numpy(depth_filled).float().unsqueeze(0)
	# outputs["depth_filled_linear"] = depth_filled_linear
	# else:
	# outputs["depth_filled_linear"] = depth_raw_linear.clone()
	#
	# return outputs