Spaces:

A19grey
/

Depth-Pro-in-Meters

Running on Zero

App Files Files Community

A19grey commited on 7 days ago

Commit

b3b839e

•

1 Parent(s): 29a026e

moved resizing to 3D model code out of depth generatino to clean architecture

Browse files

Files changed (1) hide show

app.py +17 -32

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ def generate_3d_model(depth, image_path, focallength_px):
     Args:
         depth (np.ndarray): 2D array representing depth in meters.
-        image_path (str): Path to the resized RGB image.
         focallength_px (float): Focal length in pixels.
     Returns:
@@ -68,8 +68,16 @@ def generate_3d_model(depth, image_path, focallength_px):
     """
     # Load the RGB image and convert to a NumPy array
     image = np.array(Image.open(image_path))
     height, width = depth.shape
     # Compute camera intrinsic parameters
     fx = fy = focallength_px  # Assuming square pixels and fx = fy
     cx, cy = width / 2, height / 2  # Principal point at the image center
@@ -126,17 +134,13 @@ def predict_depth(input_image):
         # Preprocess the image for depth prediction
         result = depth_pro.load_rgb(temp_file)
-        # Add error checking for the result tuple
         if len(result) < 2:
             raise ValueError(f"Unexpected result from load_rgb: {result}")
-        image = result[0]  # Unpack the result tuple correctly
-        f_px = result[-1]  # Extract focal length
         print(f"Extracted focal length: {f_px}")
-        image = transform(image)  # Apply preprocessing transforms
-        image = image.to(device)  # Move the image tensor to the selected device
         # Run the depth prediction model
         prediction = model.infer(image, f_px=f_px)
@@ -151,33 +155,13 @@ def predict_depth(input_image):
         if depth.ndim != 2:
             depth = depth.squeeze()
-        # Print debug information
-        print(f"Original depth shape: {depth.shape}")
-        print(f"Original image shape: {image.shape}")
-        # Resize depth to match image dimensions
-        image_height, image_width = image.shape[2], image.shape[3]
-        depth = cv2.resize(depth, (image_width, image_height), interpolation=cv2.INTER_LINEAR)
-        print(f"Resized depth shape: {depth.shape}")
-        print(f"Final image shape: {image.shape}")
-        # No downsampling
-        downscale_factor = 1
-        # Convert image tensor to CPU and NumPy
-        image_np = image.cpu().detach().numpy()[0].transpose(1, 2, 0)
-        # No normalization of depth map as it is already in meters
-        depth_min = np.min(depth)
-        depth_max = np.max(depth)
-        depth_normalized = depth  # Depth remains in meters
         # Create a color map for visualization using matplotlib
         plt.figure(figsize=(10, 10))
-        plt.imshow(depth_normalized, cmap='gist_rainbow')
         plt.colorbar(label='Depth [m]')
-        plt.title(f'Predicted Depth Map - Min: {depth_min:.1f}m, Max: {depth_max:.1f}m')
         plt.axis('off')  # Hide axis for a cleaner image
         # Save the depth map visualization to a file
@@ -208,8 +192,9 @@ def get_last_commit_timestamp():
     try:
         timestamp = subprocess.check_output(['git', 'log', '-1', '--format=%cd', '--date=iso']).decode('utf-8').strip()
         return datetime.fromisoformat(timestamp).strftime("%Y-%m-%d %H:%M:%S")
-    except Exception:
-        return "Unknown"
 # Create the Gradio interface with appropriate input and output components.
 last_updated = get_last_commit_timestamp()

     Args:
         depth (np.ndarray): 2D array representing depth in meters.
+        image_path (str): Path to the RGB image.
         focallength_px (float): Focal length in pixels.
     Returns:
     """
     # Load the RGB image and convert to a NumPy array
     image = np.array(Image.open(image_path))
+    # Resize depth to match image dimensions if necessary
+    if depth.shape != image.shape[:2]:
+        depth = cv2.resize(depth, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR)
     height, width = depth.shape
+    print(f"3D model generation - Depth shape: {depth.shape}")
+    print(f"3D model generation - Image shape: {image.shape}")
     # Compute camera intrinsic parameters
     fx = fy = focallength_px  # Assuming square pixels and fx = fy
     cx, cy = width / 2, height / 2  # Principal point at the image center
         # Preprocess the image for depth prediction
         result = depth_pro.load_rgb(temp_file)
         if len(result) < 2:
             raise ValueError(f"Unexpected result from load_rgb: {result}")
+        image, _, _, _, f_px = result
         print(f"Extracted focal length: {f_px}")
+        image = transform(image).to(device)
         # Run the depth prediction model
         prediction = model.infer(image, f_px=f_px)
         if depth.ndim != 2:
             depth = depth.squeeze()
+        print(f"Depth map shape: {depth.shape}")
         # Create a color map for visualization using matplotlib
         plt.figure(figsize=(10, 10))
+        plt.imshow(depth, cmap='gist_rainbow')
         plt.colorbar(label='Depth [m]')
+        plt.title(f'Predicted Depth Map - Min: {np.min(depth):.1f}m, Max: {np.max(depth):.1f}m')
         plt.axis('off')  # Hide axis for a cleaner image
         # Save the depth map visualization to a file
     try:
         timestamp = subprocess.check_output(['git', 'log', '-1', '--format=%cd', '--date=iso']).decode('utf-8').strip()
         return datetime.fromisoformat(timestamp).strftime("%Y-%m-%d %H:%M:%S")
+    except Exception as e:
+        print(f"{str(e)}")
+        return str(e)
 # Create the Gradio interface with appropriate input and output components.
 last_updated = get_last_commit_timestamp()