from typing import List, Tuple import numpy as np def connected_component(r: np.ndarray, c: np.ndarray) -> List[List[int]]: """Find connected components in the given row and column indices. Args: ---- r (np.ndarray): Row indices. c (np.ndarray): Column indices. Yields: ------ List[int]: Indices of connected components. """ indices = [0] for i in range(1, r.size): if r[i] == r[indices[-1]] and c[i] == c[indices[-1]] + 1: indices.append(i) else: yield indices indices = [i] yield indices def nms_horizontal(ratio: np.ndarray, threshold: float) -> np.ndarray: """Apply Non-Maximum Suppression (NMS) horizontally on the given ratio matrix. Args: ---- ratio (np.ndarray): Input ratio matrix. threshold (float): Threshold for NMS. Returns: ------- np.ndarray: Binary mask after applying NMS. """ mask = np.zeros_like(ratio, dtype=bool) r, c = np.nonzero(ratio > threshold) if len(r) == 0: return mask for ids in connected_component(r, c): values = [ratio[r[i], c[i]] for i in ids] mi = np.argmax(values) mask[r[ids[mi]], c[ids[mi]]] = True return mask def nms_vertical(ratio: np.ndarray, threshold: float) -> np.ndarray: """Apply Non-Maximum Suppression (NMS) vertically on the given ratio matrix. Args: ---- ratio (np.ndarray): Input ratio matrix. threshold (float): Threshold for NMS. Returns: ------- np.ndarray: Binary mask after applying NMS. """ return np.transpose(nms_horizontal(np.transpose(ratio), threshold)) def fgbg_depth( d: np.ndarray, t: float ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Find foreground-background relations between neighboring pixels. Args: ---- d (np.ndarray): Depth matrix. t (float): Threshold for comparison. Returns: ------- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating left, top, right, and bottom foreground-background relations. """ right_is_big_enough = (d[..., :, 1:] / d[..., :, :-1]) > t left_is_big_enough = (d[..., :, :-1] / d[..., :, 1:]) > t bottom_is_big_enough = (d[..., 1:, :] / d[..., :-1, :]) > t top_is_big_enough = (d[..., :-1, :] / d[..., 1:, :]) > t return ( left_is_big_enough, top_is_big_enough, right_is_big_enough, bottom_is_big_enough, ) def fgbg_depth_thinned( d: np.ndarray, t: float ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Find foreground-background relations between neighboring pixels with Non-Maximum Suppression. Args: ---- d (np.ndarray): Depth matrix. t (float): Threshold for NMS. Returns: ------- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating left, top, right, and bottom foreground-background relations with NMS applied. """ right_is_big_enough = nms_horizontal(d[..., :, 1:] / d[..., :, :-1], t) left_is_big_enough = nms_horizontal(d[..., :, :-1] / d[..., :, 1:], t) bottom_is_big_enough = nms_vertical(d[..., 1:, :] / d[..., :-1, :], t) top_is_big_enough = nms_vertical(d[..., :-1, :] / d[..., 1:, :], t) return ( left_is_big_enough, top_is_big_enough, right_is_big_enough, bottom_is_big_enough, ) def fgbg_binary_mask( d: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Find foreground-background relations between neighboring pixels in binary masks. Args: ---- d (np.ndarray): Binary depth matrix. Returns: ------- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating left, top, right, and bottom foreground-background relations in binary masks. """ assert d.dtype == bool right_is_big_enough = d[..., :, 1:] & ~d[..., :, :-1] left_is_big_enough = d[..., :, :-1] & ~d[..., :, 1:] bottom_is_big_enough = d[..., 1:, :] & ~d[..., :-1, :] top_is_big_enough = d[..., :-1, :] & ~d[..., 1:, :] return ( left_is_big_enough, top_is_big_enough, right_is_big_enough, bottom_is_big_enough, ) def edge_recall_matting(pr: np.ndarray, gt: np.ndarray, t: float) -> float: """Calculate edge recall for image matting. Args: ---- pr (np.ndarray): Predicted depth matrix. gt (np.ndarray): Ground truth binary mask. t (float): Threshold for NMS. Returns: ------- float: Edge recall value. """ assert gt.dtype == bool ap, bp, cp, dp = fgbg_depth_thinned(pr, t) ag, bg, cg, dg = fgbg_binary_mask(gt) return 0.25 * ( np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1) + np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1) + np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1) + np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1) ) def boundary_f1( pr: np.ndarray, gt: np.ndarray, t: float, return_p: bool = False, return_r: bool = False, ) -> float: """Calculate Boundary F1 score. Args: ---- pr (np.ndarray): Predicted depth matrix. gt (np.ndarray): Ground truth depth matrix. t (float): Threshold for comparison. return_p (bool, optional): If True, return precision. Defaults to False. return_r (bool, optional): If True, return recall. Defaults to False. Returns: ------- float: Boundary F1 score, or precision, or recall depending on the flags. """ ap, bp, cp, dp = fgbg_depth(pr, t) ag, bg, cg, dg = fgbg_depth(gt, t) r = 0.25 * ( np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1) + np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1) + np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1) + np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1) ) p = 0.25 * ( np.count_nonzero(ap & ag) / max(np.count_nonzero(ap), 1) + np.count_nonzero(bp & bg) / max(np.count_nonzero(bp), 1) + np.count_nonzero(cp & cg) / max(np.count_nonzero(cp), 1) + np.count_nonzero(dp & dg) / max(np.count_nonzero(dp), 1) ) if r + p == 0: return 0.0 if return_p: return p if return_r: return r return 2 * (r * p) / (r + p) def get_thresholds_and_weights( t_min: float, t_max: float, N: int ) -> Tuple[np.ndarray, np.ndarray]: """Generate thresholds and weights for the given range. Args: ---- t_min (float): Minimum threshold. t_max (float): Maximum threshold. N (int): Number of thresholds. Returns: ------- Tuple[np.ndarray, np.ndarray]: Array of thresholds and corresponding weights. """ thresholds = np.linspace(t_min, t_max, N) weights = thresholds / thresholds.sum() return thresholds, weights def invert_depth(depth: np.ndarray, eps: float = 1e-6) -> np.ndarray: """Inverts a depth map with numerical stability. Args: ---- depth (np.ndarray): Depth map to be inverted. eps (float): Minimum value to avoid division by zero (default is 1e-6). Returns: ------- np.ndarray: Inverted depth map. """ inverse_depth = 1.0 / depth.clip(min=eps) return inverse_depth def SI_boundary_F1( predicted_depth: np.ndarray, target_depth: np.ndarray, t_min: float = 1.05, t_max: float = 1.25, N: int = 10, ) -> float: """Calculate Scale-Invariant Boundary F1 Score for depth-based ground-truth. Args: ---- predicted_depth (np.ndarray): Predicted depth matrix. target_depth (np.ndarray): Ground truth depth matrix. t_min (float, optional): Minimum threshold. Defaults to 1.05. t_max (float, optional): Maximum threshold. Defaults to 1.25. N (int, optional): Number of thresholds. Defaults to 10. Returns: ------- float: Scale-Invariant Boundary F1 Score. """ assert predicted_depth.ndim == target_depth.ndim == 2 thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) f1_scores = np.array( [ boundary_f1(invert_depth(predicted_depth), invert_depth(target_depth), t) for t in thresholds ] ) return np.sum(f1_scores * weights) def SI_boundary_Recall( predicted_depth: np.ndarray, target_mask: np.ndarray, t_min: float = 1.05, t_max: float = 1.25, N: int = 10, alpha_threshold: float = 0.1, ) -> float: """Calculate Scale-Invariant Boundary Recall Score for mask-based ground-truth. Args: ---- predicted_depth (np.ndarray): Predicted depth matrix. target_mask (np.ndarray): Ground truth binary mask. t_min (float, optional): Minimum threshold. Defaults to 1.05. t_max (float, optional): Maximum threshold. Defaults to 1.25. N (int, optional): Number of thresholds. Defaults to 10. alpha_threshold (float, optional): Threshold for alpha masking. Defaults to 0.1. Returns: ------- float: Scale-Invariant Boundary Recall Score. """ assert predicted_depth.ndim == target_mask.ndim == 2 thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) thresholded_target = target_mask > alpha_threshold recall_scores = np.array( [ edge_recall_matting( invert_depth(predicted_depth), thresholded_target, t=float(t) ) for t in thresholds ] ) weighted_recall = np.sum(recall_scores * weights) return weighted_recall