Spaces:

yunusserhat
/

guesstimatelocation

Sleeping

App Files Files Community

yunusserhat commited on May 11

Commit

94f372a

•

1 Parent(s): abd15df

Upload 40 files

Browse files

Files changed (40) hide show

metrics/__init__.py +0 -0
metrics/distance_based.py +129 -0
metrics/elo.py +21 -0
metrics/utils.py +85 -0
models/__init__.py +0 -0
models/classification/utils_global.py +177 -0
models/eval_best_model.py +62 -0
models/huggingface.py +24 -0
models/losses.py +614 -0
models/misc.py +9 -0
models/module.py +157 -0
models/networks/backbones.py +162 -0
models/networks/heads/__init__.py +0 -0
models/networks/heads/auxilliary.py +33 -0
models/networks/heads/classification.py +17 -0
models/networks/heads/hybrid.py +194 -0
models/networks/heads/id_to_gps.py +33 -0
models/networks/heads/random.py +53 -0
models/networks/heads/regression.py +44 -0
models/networks/mlp.py +258 -0
models/networks/network.py +335 -0
models/networks/utils.py +22 -0
models/utils.py +54 -0
scripts/download-dataset.py +27 -0
scripts/preprocessing/enrich-metadata-adaptive-quadtrees.py +225 -0
scripts/preprocessing/enrich-metadata-quadtree.py +208 -0
scripts/preprocessing/enrich-metadata.py +123 -0
scripts/preprocessing/fix_namimbia.py +64 -0
scripts/preprocessing/nearest-neighbors.py +140 -0
scripts/preprocessing/preprocess.py +400 -0
scripts/preprocessing/train-val-split.py +15 -0
scripts/retrieval/backbone.py +150 -0
scripts/retrieval/retrieval.py +143 -0
scripts/retrieval/street-clip-zero-shot.py +299 -0
scripts/retrieval/utils.py +113 -0
utils/__init__.py +0 -0
utils/image_processing.py +58 -0
utils/lr_scheduler.py +96 -0
utils/model_utils.py +14 -0
utils/quadtree_10_1000.csv +0 -0

metrics/__init__.py ADDED Viewed

File without changes

metrics/distance_based.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+from metrics.utils import haversine, reverse
+from torchmetrics import Metric
+class HaversineMetrics(Metric):
+    """
+    Computes the average haversine distance between the predicted and ground truth points.
+    Compute the accuracy given some radiuses.
+    Compute the Geoguessr score given some radiuses.
+    Args:
+        acc_radiuses (list): list of radiuses to compute the accuracy from
+        acc_area (list): list of areas to compute the accuracy from.
+        acc_data (list): list of auxilliary data to compute the accuracy from.
+    """
+    def __init__(
+        self,
+        acc_radiuses=[],
+        acc_area=["country", "region", "sub-region", "city"],
+        aux_data=[],
+    ):
+        super().__init__()
+        self.add_state("haversine_sum", default=torch.tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("geoguessr_sum", default=torch.tensor(0.0), dist_reduce_fx="sum")
+        for acc in acc_radiuses:
+            self.add_state(
+                f"close_enough_points_{acc}",
+                default=torch.tensor(0.0),
+                dist_reduce_fx="sum",
+            )
+        for acc in acc_area:
+            self.add_state(
+                f"close_enough_points_{acc}",
+                default=torch.tensor(0.0),
+                dist_reduce_fx="sum",
+            )
+            self.add_state(
+                f"count_{acc}", default=torch.tensor(0), dist_reduce_fx="sum"
+            )
+        self.acc_radius = acc_radiuses
+        self.acc_area = acc_area
+        self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
+        self.aux = len(aux_data) > 0
+        self.aux_list = aux_data
+        if self.aux:
+            self.aux_count = {}
+            for col in self.aux_list:
+                self.add_state(
+                    f"aux_{col}",
+                    default=torch.tensor(0.0),
+                    dist_reduce_fx="sum",
+                )
+    def update(self, pred, gt):
+        haversine_distance = haversine(pred["gps"], gt["gps"])
+        for acc in self.acc_radius:
+            self.__dict__[f"close_enough_points_{acc}"] += (
+                haversine_distance < acc
+            ).sum()
+        if len(self.acc_area) > 0:
+            area_pred, area_gt = reverse(pred["gps"], gt, self.acc_area)
+        for acc in self.acc_area:
+            self.__dict__[f"close_enough_points_{acc}"] += (
+                area_pred[acc] == area_gt["_".join(["unique", acc])]
+            ).sum()
+            self.__dict__[f"count_{acc}"] += len(area_gt["_".join(["unique", acc])])
+        self.haversine_sum += haversine_distance.sum()
+        self.geoguessr_sum += 5000 * torch.exp(-haversine_distance / 1492.7).sum()
+        if self.aux:
+            if "land_cover" in self.aux_list:
+                col = "land_cover"
+                self.__dict__[f"aux_{col}"] += (
+                    pred[col].argmax(dim=1) == gt[col].argmax(dim=1)
+                ).sum()
+            if "road_index" in self.aux_list:
+                col = "road_index"
+                self.__dict__[f"aux_{col}"] += (
+                    pred[col].argmax(dim=1) == gt[col].argmax(dim=1)
+                ).sum()
+            if "drive_side" in self.aux_list:
+                col = "drive_side"
+                self.__dict__[f"aux_{col}"] += (
+                    (pred[col] > 0.5).float() == gt[col]
+                ).sum()
+            if "climate" in self.aux_list:
+                col = "climate"
+                self.__dict__[f"aux_{col}"] += (
+                    pred[col].argmax(dim=1) == gt[col].argmax(dim=1)
+                ).sum()
+            if "soil" in self.aux_list:
+                col = "soil"
+                self.__dict__[f"aux_{col}"] += (
+                    pred[col].argmax(dim=1) == gt[col].argmax(dim=1)
+                ).sum()
+            if "dist_sea" in self.aux_list:
+                col = "dist_sea"
+                self.__dict__[f"aux_{col}"] += (
+                    (pred[col] - gt[col]).pow(2).sum(dim=1).sum()
+                )
+        self.count += pred["gps"].shape[0]
+    def compute(self):
+        output = {
+            "Haversine": self.haversine_sum / self.count,
+            "Geoguessr": self.geoguessr_sum / self.count,
+        }
+        for acc in self.acc_radius:
+            output[f"Accuracy_{acc}_km_radius"] = (
+                self.__dict__[f"close_enough_points_{acc}"] / self.count
+            )
+        for acc in self.acc_area:
+            output[f"Accuracy_{acc}"] = (
+                self.__dict__[f"close_enough_points_{acc}"]
+                / self.__dict__[f"count_{acc}"]
+            )
+        if self.aux:
+            for col in self.aux_list:
+                output["_".join(["Accuracy", col])] = (
+                    self.__dict__[f"aux_{col}"] / self.count
+                )
+        return output

metrics/elo.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+import torch
+from metrics.utils import haversine
+from torchmetrics import Metric
+class HaversineELOMetric(Metric):
+    """
+    Computes the ELO score of the current network given previous players
+    Args:
+        previous_players_scores (str): path to the csv containing the scores of the previous players
+        previous_players_predictions (str): path to the folder containing the predictions of the previous players
+        tag (str): tag of the current experiment
+    """
+    def __init__(self, cache_folder, tag):
+        ### TODO
+        pass

metrics/utils.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import torch
+import reverse_geocoder
+import numpy as np
+def haversine(pred, gt):
+    # expects inputs to be np arrays in (lat, lon) format as radians
+    # N x 2
+    # calculate the difference in latitude and longitude between the predicted and ground truth points
+    lat_diff = pred[:, 0] - gt[:, 0]
+    lon_diff = pred[:, 1] - gt[:, 1]
+    # calculate the haversine formula components
+    lhs = torch.sin(lat_diff / 2) ** 2
+    rhs = torch.cos(pred[:, 0]) * torch.cos(gt[:, 0]) * torch.sin(lon_diff / 2) ** 2
+    a = lhs + rhs
+    # calculate the final distance using the haversine formula
+    c = 2 * torch.arctan2(torch.sqrt(a), torch.sqrt(1 - a))
+    distance = 6371 * c
+    return distance
+def reverse(pred, gt, area):
+    df = {}
+    gt_area = {}
+    nan_mask = {}
+    areas = ["_".join(["unique", ar]) for ar in area]
+    if "unique_continent" in areas:
+        areas.remove("unique_continent")
+    for ar in areas:
+        inter = np.array(gt[ar])
+        nan_mask[ar] = inter != "nan"
+        gt_area[ar] = inter[nan_mask[ar]]
+    location = reverse_geocoder.search(
+        [
+            (lat, lon)
+            for lat, lon in zip(
+                np.degrees(pred[:, 0].cpu()), np.degrees(pred[:, 1].cpu())
+            )
+        ]
+    )
+    if "continent" in area:
+        continent = torch.load("continent.pt")
+        inter = np.array([l.get("cc", "") for l in location])[
+            nan_mask["unique_country"]
+        ]
+        df["continent"] = np.array([continent[i] for i in inter])
+        gt_area["unique_continent"] = np.array(
+            [continent[i] for i in gt_area["unique_country"]]
+        )
+    if "country" in area:
+        df["country"] = np.array([l.get("cc", "") for l in location])[
+            nan_mask["unique_country"]
+        ]
+    if "region" in area:
+        df["region"] = np.array(
+            ["_".join([l.get("admin1", ""), l.get("cc", "")]) for l in location]
+        )[nan_mask["unique_region"]]
+    if "sub-region" in area:
+        df["sub-region"] = np.array(
+            [
+                "_".join([l.get("admin2", ""), l.get("admin1", ""), l.get("cc", "")])
+                for l in location
+            ]
+        )[nan_mask["unique_sub-region"]]
+    if "city" in area:
+        df["city"] = np.array(
+            [
+                "_".join(
+                    [
+                        l.get("name", ""),
+                        l.get("admin2", ""),
+                        l.get("admin1", ""),
+                        l.get("cc", ""),
+                    ]
+                )
+                for l in location
+            ]
+        )[nan_mask["unique_city"]]
+    return df, gt_area

models/__init__.py ADDED Viewed

File without changes

models/classification/utils_global.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import logging
+from collections import OrderedDict
+from pathlib import Path
+from typing import Union, List
+import torch
+import torchvision
+def check_is_valid_torchvision_architecture(architecture: str):
+    """Raises an ValueError if architecture is not part of available torchvision models
+    """
+    available = sorted(
+        name
+        for name in torchvision.models.__dict__
+        if name.islower()
+        and not name.startswith("__")
+        and callable(torchvision.models.__dict__[name])
+    )
+    if architecture not in available:
+        raise ValueError(f"{architecture} not in {available}")
+def build_base_model(arch: str):
+    model = torchvision.models.__dict__[arch](pretrained=True)
+    # get input dimension before classification layer
+    if arch in ["mobilenet_v2"]:
+        nfeatures = model.classifier[-1].in_features
+        model = torch.nn.Sequential(*list(model.children())[:-1])
+    elif arch in ["densenet121", "densenet161", "densenet169"]:
+        nfeatures = model.classifier.in_features
+        model = torch.nn.Sequential(*list(model.children())[:-1])
+    elif "resne" in arch:
+        # usually all ResNet variants
+        nfeatures = model.fc.in_features
+        model = torch.nn.Sequential(*list(model.children())[:-2])
+    else:
+        raise NotImplementedError
+    model.avgpool = torch.nn.AdaptiveAvgPool2d(1)
+    model.flatten = torch.nn.Flatten(start_dim=1)
+    return model, nfeatures
+def load_weights_if_available(
+    model: torch.nn.Module, classifier: torch.nn.Module, weights_path: Union[str, Path]
+):
+    checkpoint = torch.load(weights_path, map_location=lambda storage, loc: storage)
+    state_dict_features = OrderedDict()
+    state_dict_classifier = OrderedDict()
+    for k, w in checkpoint["state_dict"].items():
+        if k.startswith("model"):
+            state_dict_features[k.replace("model.", "")] = w
+        elif k.startswith("classifier"):
+            state_dict_classifier[k.replace("classifier.", "")] = w
+        else:
+            logging.warning(f"Unexpected prefix in state_dict: {k}")
+    model.load_state_dict(state_dict_features, strict=True)
+    return model, classifier
+def vectorized_gc_distance(latitudes, longitudes, latitudes_gt, longitudes_gt):
+    R = 6371
+    factor_rad = 0.01745329252
+    longitudes = factor_rad * longitudes
+    longitudes_gt = factor_rad * longitudes_gt
+    latitudes = factor_rad * latitudes
+    latitudes_gt = factor_rad * latitudes_gt
+    delta_long = longitudes_gt - longitudes
+    delta_lat = latitudes_gt - latitudes
+    subterm0 = torch.sin(delta_lat / 2) ** 2
+    subterm1 = torch.cos(latitudes) * torch.cos(latitudes_gt)
+    subterm2 = torch.sin(delta_long / 2) ** 2
+    subterm1 = subterm1 * subterm2
+    a = subterm0 + subterm1
+    c = 2 * torch.asin(torch.sqrt(a))
+    gcd = R * c
+    return gcd
+def gcd_threshold_eval(gc_dists, thresholds=[1, 25, 200, 750, 2500]):
+    # calculate accuracy for given gcd thresolds
+    results = {}
+    for thres in thresholds:
+        results[thres] = torch.true_divide(
+            torch.sum(gc_dists <= thres), len(gc_dists)
+        ).item()
+    return results
+def accuracy(output, target, partitioning_shortnames: list, topk=(1, 5, 10)):
+    def _accuracy(output, target, topk=(1,)):
+        """Computes the accuracy over the k top predictions for the specified values of k"""
+        with torch.no_grad():
+            maxk = max(topk)
+            batch_size = target.size(0)
+            _, pred = output.topk(maxk, 1, True, True)
+            pred = pred.t()
+            correct = pred.eq(target.view(1, -1).expand_as(pred))
+            res = {}
+            for k in topk:
+                correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+                res[k] = correct_k / batch_size
+            return res
+    with torch.no_grad():
+        out_dict = {}
+        for i, pname in enumerate(partitioning_shortnames):
+            res_dict = _accuracy(output[i], target[i], topk=topk)
+            for k, v in res_dict.items():
+                out_dict[f"acc{k}_val/{pname}"] = v
+        return out_dict
+def summarize_gcd_stats(pnames: List[str], outputs, hierarchy=None):
+    gcd_dict = {}
+    metric_names = [f"gcd_{p}_val" for p in pnames]
+    if hierarchy is not None:
+        metric_names.append("gcd_hierarchy_val")
+    for metric_name in metric_names:
+        distances_flat = [output[metric_name] for output in outputs]
+        distances_flat = torch.cat(distances_flat, dim=0)
+        gcd_results = gcd_threshold_eval(distances_flat)
+        for gcd_thres, acc in gcd_results.items():
+            gcd_dict[f"{metric_name}/{gcd_thres}"] = acc
+    return gcd_dict
+def summarize_test_gcd(pnames, outputs, hierarchy=None):
+    def _eval(output):
+        # calculate acc@km for a list of given thresholds
+        accuracy_outputs = {}
+        if hierarchy is not None:
+            pnames.append("hierarchy")
+        for pname in pnames:
+            # concat batches of distances
+            distances_flat = torch.cat([x[pname] for x in output], dim=0)
+            # acc for all distances
+            acc_dict = gcd_threshold_eval(distances_flat)
+            accuracy_outputs[f"acc_test/{pname}"] = acc_dict
+        return accuracy_outputs
+    result = {}
+    if isinstance(outputs[0], dict):  # only one testset
+        result = _eval(outputs)
+    elif isinstance(outputs[0], list):  # multiple testsets
+        for testset_index, output in enumerate(outputs):
+            result[testset_index] = _eval(output)
+    else:
+        raise TypeError
+    return result
+def summarize_loss_acc_stats(pnames: List[str], outputs, topk=[1, 5, 10]):
+    loss_acc_dict = {}
+    metric_names = []
+    for k in topk:
+        accuracy_names = [f"acc{k}_val/{p}" for p in pnames]
+        metric_names.extend(accuracy_names)
+    metric_names.extend([f"loss_val/{p}" for p in pnames])
+    for metric_name in ["loss_val/total", *metric_names]:
+        metric_total = 0
+        for output in outputs:
+            metric_value = output[metric_name]
+            metric_total += metric_value
+        loss_acc_dict[metric_name] = metric_total / len(outputs)
+    return loss_acc_dict

models/eval_best_model.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+from typing import Any
+import pytorch_lightning as L
+import torch
+from hydra.utils import instantiate
+from models.huggingface import Geolocalizer
+class EvalModule(L.LightningModule):
+    def __init__(self, cfg):
+        super().__init__()
+        self.cfg = cfg
+        os.chdir(cfg.network.root_dir)
+        self.model = Geolocalizer.from_pretrained('osv5m/baseline')
+        self.test_metrics = instantiate(cfg.test_metrics)
+    def training_step(self, batch, batch_idx):
+        pred = self.model(batch)
+        pass
+    @torch.no_grad()
+    def validation_step(self, batch, batch_idx):
+        pred = self.model(batch)
+        pass
+    def on_validation_epoch_end(self):
+        pass
+    @torch.no_grad()
+    def test_step(self, batch, batch_idx):
+        pred = self.model.forward_tensor(batch)
+        self.test_metrics.update({"gps": pred}, batch)
+    def on_test_epoch_end(self):
+        metrics = self.test_metrics.compute()
+        for metric_name, metric_value in metrics.items():
+            self.log(
+                f"test/{metric_name}",
+                metric_value,
+                sync_dist=True,
+                on_step=False,
+                on_epoch=True,
+            )
+    def lr_scheduler_step(self, scheduler, metric):
+        scheduler.step(self.global_step)
+def get_parameter_names(model, forbidden_layer_types):
+    """
+    Returns the names of the model parameters that are not inside a forbidden layer.
+    Taken from HuggingFace transformers.
+    """
+    result = []
+    for name, child in model.named_children():
+        result += [
+            f"{name}.{n}"
+            for n in get_parameter_names(child, forbidden_layer_types)
+            if not isinstance(child, tuple(forbidden_layer_types))
+        ]
+    # Add model specific parameters (defined with nn.Parameter) since they are not in any child.
+    result += list(model._parameters.keys())
+    return result

models/huggingface.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+from torch import nn
+from hydra.utils import instantiate
+from omegaconf import OmegaConf
+from huggingface_hub import PyTorchModelHubMixin
+class Geolocalizer(nn.Module, PyTorchModelHubMixin):
+    def __init__(self, config):
+        super().__init__()
+        self.config = OmegaConf.create(config)
+        self.transform = instantiate(self.config.transform)
+        self.model = instantiate(self.config.model)
+        self.head = self.model.head
+        self.mid = self.model.mid
+        self.backbone = self.model.backbone
+    def forward(self, img: torch.Tensor):
+        output = self.head(self.mid(self.backbone({"img": img})), None)
+        return output["gps"]
+    def forward_tensor(self, img: torch.Tensor):
+        output = self.head(self.mid(self.backbone(img)), None)
+        return output["gps"]

models/losses.py ADDED Viewed

	@@ -0,0 +1,614 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+from os.path import join
+from models.networks.utils import NormGPS
+class L1(nn.Module):
+    def __init__(self):
+        super(L1, self).__init__()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "gps": torch.Tensor Bx2
+            y: dict that contains "gps": torch.Tensor Bx2
+        Returns:
+            torch.Tensor: L1 loss between x and y: torch.Tensor([B])
+        """
+        return {"L1_loss": torch.abs(x["gps"] - y["gps"]).mean(dim=-1)}
+class L2(nn.Module):
+    def __init__(self):
+        super(L2, self).__init__()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "gps": torch.Tensor Bx2
+            y: dict that contains "gps": torch.Tensor Bx2
+        Returns:
+            torch.Tensor: L2 loss between x and y: torch.Tensor([B])
+        """
+        return {"L2_loss": ((x["gps"] - y["gps"]) ** 2).mean(dim=-1)}
+class L2Hybrid(nn.Module):
+    def __init__(self):
+        super(L2Hybrid, self).__init__()
+        self.norm = NormGPS()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "gps": torch.Tensor Bx2
+            y: dict that contains "gps": torch.Tensor Bx2
+        Returns:
+            torch.Tensor: L2 loss between x and y: torch.Tensor([B])
+        """
+        return {
+            "L2_loss": (
+                (x["reg"] - (self.norm(y["gps"]) - x["center"]) * x["size"]) ** 2
+            ).mean(dim=-1)
+        }
+class CrossEntropy(nn.Module):
+    def __init__(self):
+        super(CrossEntropy, self).__init__()
+        self.loss = nn.CrossEntropyLoss(reduction="none")
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "label": torch.Tensor BxN
+            y: dict that contains "label": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {"cross_entropy_loss": self.loss(x["label"], y["label"])}
+class HierarchicalCrossEntropyQuad(nn.Module):
+    def __init__(self, data_path=""):
+        super(HierarchicalCrossEntropyQuad, self).__init__()
+        self.dict_losses = {"classif_loss": nn.CrossEntropyLoss(reduction="none")}
+        for i in range(1, 10):
+            self.dict_losses[f"quadtree_{i}_loss"] = nn.NLLLoss()
+        self.matrixes = torch.load(join(data_path, "quadtree_matrixes.pt"))
+        self.dicts = torch.load(join(data_path, "quadtree_dicts.pt"))
+        self.id_to_quad = torch.load(join(data_path, "id_to_quad_10_1000.pt"))
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "label": torch.Tensor BxN
+            y: dict that contains "label": torch.Tensor BxN
+        Returns:
+            torch.Tensor: Hierarchical CrossEntropy for Quadtrees loss between x and y: torch.Tensor([B])
+        """
+        out = {"classif_loss": self.dict_losses["classif_loss"](x["label"], y["label"])}
+        probas = nn.functional.softmax(x["label"], dim=1)
+        device = x["label"].device
+        gt = self.id_to_quad[y["label"].cpu()]
+        for i in range(9):
+            logits = torch.log(torch.mm(probas, self.matrixes[i].to(device)) + 1e-10)
+            l = [s[: 9 - i] if len(s) >= 10 - i else s for s in gt]
+            out[f"quadtree_{i+1}_loss"] = self.dict_losses[f"quadtree_{i+1}_loss"](
+                logits, torch.tensor([self.dicts[i][item] for item in l]).to(device)
+            )
+        return out
+class HierarchicalCrossEntropy(nn.Module):
+    def __init__(self, path=""):
+        super(HierarchicalCrossEntropy, self).__init__()
+        self.city_loss = nn.CrossEntropyLoss(reduction="none")
+        self.country_loss = nn.NLLLoss()
+        self.area_loss = nn.NLLLoss()
+        self.region_loss = nn.NLLLoss()
+        self.city_to_country = torch.load(path + "city_to_country.pt")
+        self.city_to_region = torch.load(path + "city_to_region.pt")
+        self.city_to_area = torch.load(path + "city_to_area.pt")
+        self.country_to_idx = torch.load(path + "country_to_idx.pt")
+        self.region_to_idx = torch.load(path + "region_to_idx.pt")
+        self.area_to_idx = torch.load(path + "area_to_idx.pt")
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "label": torch.Tensor BxN
+            y: dict that contains "label": torch.Tensor BxN
+        Returns:
+            torch.Tensor: Hierarchical CrossEntropy  loss between x and y: torch.Tensor([B])
+        """
+        country_mask = np.array(y["unique_country"]) != "NaN"
+        self.city_to_country = self.city_to_country.to(x["label"].device)
+        countries_probas = nn.functional.softmax(x["label"][country_mask], dim=1)
+        countries_logits = torch.log(
+            torch.mm(countries_probas, self.city_to_country) + 1e-10
+        )
+        country_gt = torch.tensor(
+            [
+                self.country_to_idx[item]
+                for item in np.array(y["unique_country"])[country_mask]
+            ]
+        ).to(x["label"].device)
+        region_mask = np.array(y["unique_region"]) != "NaN"
+        self.city_to_region = self.city_to_region.to(x["label"].device)
+        regions_probas = nn.functional.softmax(x["label"][region_mask], dim=1)
+        regions_logits = torch.log(
+            torch.mm(regions_probas, self.city_to_region) + 1e-10
+        )
+        region_gt = torch.tensor(
+            [
+                self.region_to_idx[item]
+                for item in np.array(y["unique_region"])[region_mask]
+            ]
+        ).to(x["label"].device)
+        area_mask = np.array(y["unique_sub-region"]) != "NaN"
+        self.city_to_area = self.city_to_area.to(x["label"].device)
+        areas_probas = nn.functional.softmax(x["label"][area_mask], dim=1)
+        areas_logits = torch.log(torch.mm(areas_probas, self.city_to_area) + 1e-10)
+        area_gt = torch.tensor(
+            [
+                self.area_to_idx[item]
+                for item in np.array(y["unique_sub-region"])[area_mask]
+            ]
+        ).to(x["label"].device)
+        return {
+            "cross_entropy_country_loss": self.country_loss(
+                countries_logits, country_gt
+            ),
+            "cross_entropy_city_loss": self.city_loss(x["label"], y["label"]),
+            "cross_entropy_area_loss": self.area_loss(areas_logits, area_gt),
+            "cross_entropy_region_loss": self.region_loss(regions_logits, region_gt),
+        }
+class LandCoverLoss(nn.Module):
+    def __init__(self):
+        super(LandCoverLoss, self).__init__()
+        self.loss = nn.CrossEntropyLoss()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "land_cover": torch.Tensor BxN
+            y: dict that contains "land_cover": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {
+            "land_cover_cross_entropy_loss": self.loss(x["land_cover"], y["land_cover"])
+        }
+class RoadIndexLoss(nn.Module):
+    def __init__(self):
+        super(RoadIndexLoss, self).__init__()
+        self.loss = nn.MSELoss()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "road_index": torch.Tensor BxN
+            y: dict that contains "road_index": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {"road_index_mse_loss": self.loss(x["road_index"], y["road_index"])}
+class DriveSideLoss(nn.Module):
+    def __init__(self):
+        super(DriveSideLoss, self).__init__()
+        self.loss = nn.BCELoss()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "drive_side": torch.Tensor BxN
+            y: dict that contains "drive_side": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {"drive_side_bce_loss": self.loss(x["drive_side"], y["drive_side"])}
+class ClimateLoss(nn.Module):
+    def __init__(self):
+        super(ClimateLoss, self).__init__()
+        self.loss = nn.CrossEntropyLoss()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "climate": torch.Tensor BxN
+            y: dict that contains "climate": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {"climate_cross_entropy_loss": self.loss(x["climate"], y["climate"])}
+class SoilLoss(nn.Module):
+    def __init__(self):
+        super(SoilLoss, self).__init__()
+        self.loss = nn.CrossEntropyLoss()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "soil": torch.Tensor BxN
+            y: dict that contains "soil": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {"soil_cross_entropy_loss": self.loss(x["soil"], y["soil"])}
+class DistSeaLoss(nn.Module):
+    def __init__(self):
+        super(DistSeaLoss, self).__init__()
+        self.loss = nn.MSELoss()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "dist_sea": torch.Tensor BxN
+            y: dict that contains "dist_sea": torch.Tensor BxN
+        Returns:
+            torch.Tensor: CrossEntropy loss between x and y: torch.Tensor([B])
+        """
+        return {"dist_sea_mse_loss": self.loss(x["dist_sea"], y["dist_sea"])}
+class Haversine(nn.Module):
+    def __init__(self):
+        super(Haversine, self).__init__()
+    def forward(self, x, y):
+        """
+        Args:
+            x: dict that contains "gps": torch.Tensor Bx2
+            y: dict that contains "gps": torch.Tensor Bx2
+        Returns:
+            torch.Tensor: Haversine loss between x and y: torch.Tensor([B])
+        Note:
+            Haversine distance doesn't contain the 2 * 6371 constant.
+        """
+        x, y = x["gps"], y["gps"]
+        lhs = torch.sin((x[:, 0] - y[:, 0]) / 2) ** 2
+        rhs = (
+            torch.cos(x[:, 0])
+            * torch.cos(y[:, 0])
+            * torch.sin((x[:, 1] - y[:, 1]) / 2) ** 2
+        )
+        a = lhs + rhs
+        return {
+            "haversine_loss": torch.arctan2(torch.sqrt(a), torch.sqrt(1 - a))
+        }  # ommitting 2 * 6371 as both are a constant
+class GeoguessrLoss(Haversine):
+    def __init__(self):
+        super(GeoguessrLoss, self).__init__()
+    def forward(self, x, y):
+        distance = super().forward(x, y)["haversine_loss"]
+        loss = torch.exp(-distance / 1852)
+        return {"geoguessr_loss": loss}
+class InfoNCE(nn.Module):
+    def __init__(self, tau=0.1):
+        super(InfoNCE, self).__init__()
+        self.tau = tau
+    def cosine_similarity(self, a, b, normalize=True):
+        if normalize:
+            w1 = a.norm(p=2, dim=1, keepdim=True)
+            w2 = b.norm(p=2, dim=1, keepdim=True)
+            sim_matrix = torch.mm(a, b.t()) / (w1 * w2.t()).clamp(min=1e-8)
+        else:
+            sim_matrix = torch.mm(a, b.t())
+        return sim_matrix
+    def forward(self, x, y=None):
+        """
+        neg_sim: BxB
+        pos_sim: Bx1
+        """
+        features = x["features"]
+        positive_features = x["pos_features"]
+        pos_sim = F.cosine_similarity(
+            features, positive_features, dim=1, eps=1e-8
+        ).unsqueeze(1)
+        neg_sim = self.cosine_similarity(features, features, normalize=True)
+        b = neg_sim.shape[0]
+        logits = (1 - torch.eye(b)).type_as(neg_sim) * neg_sim + torch.eye(b).type_as(
+            pos_sim
+        ) * pos_sim
+        logits = logits / self.tau
+        labels = torch.arange(b, dtype=torch.long).cuda()
+        loss = F.cross_entropy(logits, labels)
+        return {
+            "contrastive_loss": loss,
+        }
+class TextNCE(nn.Module):
+    def __init__(self, tau=0.1, num_devices=1):
+        super(TextNCE, self).__init__()
+        self.distributed = num_devices > 1
+        self.tau = tau
+    def cosine_similarity(self, a, b, normalize=True):
+        if normalize:
+            w1 = a.norm(p=2, dim=1, keepdim=True)
+            w2 = b.norm(p=2, dim=1, keepdim=True)
+            sim_matrix = torch.mm(a, b.t()) / (w1 * w2.t()).clamp(min=1e-8)
+        else:
+            sim_matrix = torch.mm(a, b.t())
+        return sim_matrix
+    def forward(self, x, y=None):
+        """
+        neg_sim: BxB
+        pos_sim: Bx1
+        """
+        if self.distributed:
+            all_image_features = torch.cat(
+                torch.distributed.nn.all_gather(x["features"]), dim=0
+            )
+            all_text_features = torch.cat(
+                torch.distributed.nn.all_gather(x["text_features"]), dim=0
+            )
+            all_labels = torch.cat(torch.distributed.nn.all_gather(y["label"]), dim=0)
+        else:
+            all_image_features = x["features"]
+            all_text_features = x["text_features"]
+            all_labels = y["label"]
+        labels_u = torch.unique(all_labels)
+        logits = self.cosine_similarity(
+            all_image_features, all_text_features, normalize=True
+        )
+        rows, cols = logits.size()
+        indices = torch.arange(0, rows, device=all_image_features.device)
+        loss = torch.sum(
+            torch.logsumexp(
+                logits[indices != indices.view(-1, 1)].view(rows, cols - 1) / self.tau,
+                dim=1,
+            )
+        )
+        for label in labels_u:
+            if not (label == "NaN"):
+                # Get the positive and negative examples
+                idx = all_labels == label
+                pos_logits = logits[idx][:, idx]
+                # Compute the MIL-NCE loss
+                loss += torch.sum(-torch.logsumexp(pos_logits / self.tau, dim=1))
+        return {
+            "contrastive_loss": loss,
+        }
+class MILNCE(nn.Module):
+    def __init__(self, tau=0.1, num_devices=1):
+        super(MILNCE, self).__init__()
+        self.distributed = num_devices > 1
+        self.tau = tau
+    def cosine_similarity(self, a, b, normalize=True):
+        if normalize:
+            w1 = a.norm(p=2, dim=1, keepdim=True)
+            w2 = b.norm(p=2, dim=1, keepdim=True)
+            sim_matrix = torch.mm(a, b.t()) / (w1 * w2.t()).clamp(min=1e-8)
+        else:
+            sim_matrix = torch.mm(a, b.t())
+        return sim_matrix
+    def forward(self, x, y=None):
+        """
+        COmpute MIL-NCE loss
+        """
+        if self.distributed:
+            all_image_features = torch.cat(
+                torch.distributed.nn.all_gather(x["features"]), dim=0
+            )
+            all_pos_features = torch.cat(
+                torch.distributed.nn.all_gather(x["pos_features"]), dim=0
+            )
+            all_labels = torch.cat(torch.distributed.nn.all_gather(y["label"]), dim=0)
+        else:
+            all_image_features = x["features"]
+            all_pos_features = x["pos_features"]
+            all_labels = y["label"]
+        labels_u = torch.unique(all_labels)
+        features = torch.cat([all_image_features, all_pos_features])
+        labels = torch.cat([all_labels, all_labels])
+        logits = self.cosine_similarity(features, features, normalize=True)
+        rows, cols = logits.size()
+        indices = torch.arange(0, rows, device=features.device)
+        loss = torch.sum(
+            torch.logsumexp(
+                logits[indices != indices.view(-1, 1)].view(rows, cols - 1) / self.tau,
+                dim=1,
+            )
+        )
+        for label in labels_u:
+            if not (label == "NaN"):
+                # Get the positive and negative examples
+                idx = labels == label
+                pos_logits = logits[idx][:, idx]
+                rows, cols = pos_logits.size()
+                indices = torch.arange(0, rows, device=features.device)
+                pos_logits = pos_logits[indices != indices.view(-1, 1)].view(
+                    rows, cols - 1
+                )
+                # Compute the MIL-NCE loss
+                loss += torch.sum(-torch.logsumexp(pos_logits / self.tau, dim=1))
+        return {
+            "contrastive_loss": loss,
+        }
+class RegionMILNCE(nn.Module):
+    def __init__(self, tau=0.1, num_devices=1):
+        super(RegionMILNCE, self).__init__()
+        self.distributed = num_devices > 1
+        self.tau = tau
+    def cosine_similarity(self, a, b, normalize=True):
+        if normalize:
+            w1 = a.norm(p=2, dim=1, keepdim=True)
+            w2 = b.norm(p=2, dim=1, keepdim=True)
+            sim_matrix = torch.mm(a, b.t()) / (w1 * w2.t()).clamp(min=1e-8)
+        else:
+            sim_matrix = torch.mm(a, b.t())
+        return sim_matrix
+    def forward(self, x, y=None):
+        """
+        neg_sim: BxB
+        pos_sim: Bx1
+        """
+        if self.distributed:
+            all_image_features = torch.cat(
+                torch.distributed.nn.all_gather(x["features"]), dim=0
+            )
+            all_pos_features = torch.cat(
+                torch.distributed.nn.all_gather(x["pos_features"]), dim=0
+            )
+            all_labels = torch.cat(torch.distributed.nn.all_gather(y["label"]), dim=0)
+        else:
+            all_image_features = x["features"]
+            all_pos_features = x["pos_features"]
+            all_labels = y["label"]
+        labels_u = torch.unique(all_labels)
+        features = torch.cat([all_image_features, all_pos_features])
+        labels = torch.cat([all_labels, all_labels])
+        logits = self.cosine_similarity(features, features, normalize=True)
+        rows, cols = logits.size()
+        indices = torch.arange(0, rows, device=features.device)
+        loss = torch.sum(
+            torch.logsumexp(
+                logits[indices != indices.view(-1, 1)].view(rows, cols - 1) / self.tau,
+                dim=1,
+            )
+        )
+        for label in labels_u:
+            if not (label == "NaN"):
+                # Get the positive and negative examples
+                idx = labels == label
+                pos_logits = logits[idx][:, idx]
+                rows, cols = pos_logits.size()
+                indices = torch.arange(0, rows, device=features.device)
+                pos_logits = pos_logits[indices != indices.view(-1, 1)].view(
+                    rows, cols - 1
+                )
+                # Compute the MIL-NCE loss
+                loss += torch.sum(-torch.logsumexp(pos_logits / self.tau, dim=1))
+        return {
+            "contrastive_loss": loss / len(all_labels),
+        }
+LOSSES = {
+    "l1": L1,
+    "l2": L2,
+    "l2_hybrid": L2Hybrid,
+    "haversine": Haversine,
+    "geoguessr": GeoguessrLoss,
+    "crossentropy": CrossEntropy,
+    "infonce": InfoNCE,
+    "mil-nce": MILNCE,
+    "text-nce": TextNCE,
+    "land_cover": LandCoverLoss,
+    "road_index": RoadIndexLoss,
+    "drive_side": DriveSideLoss,
+    "climate": ClimateLoss,
+    "soil": SoilLoss,
+    "dist_sea": DistSeaLoss,
+    "hierarchical": HierarchicalCrossEntropy,
+    "hier_quad": HierarchicalCrossEntropyQuad,
+    "region_mil": RegionMILNCE,
+}
+AVERAGE = {False: lambda x: x, True: lambda x: x.mean(dim=-1)}
+class Losses(nn.Module):
+    """The Losses meta-object that can take a mix of losses."""
+    def __init__(self, mix={}, aux_data=[], path="", num_devices=1):
+        """Initializes the Losses object.
+        Args:
+            mix (dict): dictionary with keys "loss_name" and values weight
+        """
+        super(Losses, self).__init__()
+        assert len(mix)
+        self.aux = len(aux_data) > 0
+        if self.aux:
+            self.aux_list = aux_data
+            total = ["land_cover", "drive_side", "climate", "soil", "dist_sea"]
+            for col in self.aux_list:
+                total.remove(col)
+            for col in total:
+                del mix[col]
+        self.init_losses(mix, path, num_devices)
+    def init_losses(self, mix, path="", num_devices=1):
+        """Initializes the losses.
+        Args:
+            mix (dict): dictionary with keys "loss_name" and values weight
+        """
+        self.loss = {}
+        for m, v in mix.items():
+            m = m.lower()
+            if m in ["hierarchical", "hier_quad"]:
+                try:
+                    self.loss[m] = (LOSSES[m](path), v)
+                except KeyError:
+                    raise KeyError(f"Loss {m} not found in {LOSSES.keys()}")
+            elif m in ["region_mil", "mil-nce", "text-nce"]:
+                try:
+                    self.loss[m] = (LOSSES[m](num_devices=num_devices), v)
+                except KeyError:
+                    raise KeyError(f"Loss {m} not found in {LOSSES.keys()}")
+            else:
+                try:
+                    self.loss[m] = (LOSSES[m](), v)
+                except KeyError:
+                    raise KeyError(f"Loss {m} not found in {LOSSES.keys()}")
+    def forward(self, x, y, average=True):
+        """Computes the losses.
+        Args:
+            x: dict that contains "gps": torch.Tensor Bx2 or "label": torch.Tensor BxN
+            y: dict that contains "gps": torch.Tensor Bx2 or "label": torch.Tensor BxN
+            average (bool): whether to average the losses or not
+        Returns:
+            dict: dictionary with losses
+        """
+        output = {"loss": 0}
+        for loss_name, (loss, weight) in self.loss.items():
+            loss_output = loss(x, y)
+            for k, v in loss_output.items():
+                v = AVERAGE[average](v)
+                if k.endswith("_loss"):
+                    output["loss"] += weight * v
+                output[k] = v
+        return output

models/misc.py ADDED Viewed

	@@ -0,0 +1,9 @@

+class DoNothingOptimizer(nn.Module):
+    def __init__(self, *args, **kwargs):
+        pass
+    def step(self, *args, **kwargs):
+        pass
+    def zero_grad(self, *args, **kwargs):
+        pass

models/module.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import os
+from typing import Any
+import pytorch_lightning as L
+import torch
+import torch.nn as nn
+from hydra.utils import instantiate
+import copy
+import pandas as pd
+import numpy as np
+class Geolocalizer(L.LightningModule):
+    def __init__(self, cfg):
+        super().__init__()
+        self.cfg = cfg
+        self.model = instantiate(cfg.network.instance)
+        if cfg.text_tuning:
+            self.text_model = instantiate(cfg.text_network.instance)
+        self.loss = instantiate(cfg.loss)
+        self.val_metrics = instantiate(cfg.val_metrics)
+        self.test_metrics = instantiate(cfg.test_metrics)
+        self.text_tuning = cfg.text_tuning
+    def training_step(self, batch, batch_idx):
+        pred = self.model(batch)
+        if self.text_tuning:
+            pred["text_features"] = self.text_model(batch)
+        loss = self.loss(pred, batch, average=True)
+        for metric_name, metric_value in loss.items():
+            self.log(
+                f"train/{metric_name}",
+                metric_value,
+                sync_dist=True,
+                on_step=True,
+                on_epoch=True,
+            )
+        return loss
+    @torch.no_grad()
+    def validation_step(self, batch, batch_idx):
+        pred = self.model(batch)
+        if self.text_tuning:
+            pred["text_features"] = self.text_model(batch)
+        loss = self.loss(pred, batch, average=True)["loss"]
+        self.val_metrics.update(pred, batch)
+        self.log("val/loss", loss, sync_dist=True, on_step=False, on_epoch=True)
+    def on_validation_epoch_end(self):
+        metrics = self.val_metrics.compute()
+        for metric_name, metric_value in metrics.items():
+            self.log(
+                f"val/{metric_name}",
+                metric_value,
+                sync_dist=True,
+                on_step=False,
+                on_epoch=True,
+            )
+    @torch.no_grad()
+    def test_step(self, batch, batch_idx):
+        pred = self.model(batch)
+        self.test_metrics.update(pred, batch)
+    def on_test_epoch_end(self):
+        metrics = self.test_metrics.compute()
+        for metric_name, metric_value in metrics.items():
+            self.log(
+                f"test/{metric_name}",
+                metric_value,
+                sync_dist=True,
+                on_step=False,
+                on_epoch=True,
+            )
+    def configure_optimizers(self):
+        lora_params = []
+        backbone_params = []
+        other_params = []
+        last_block_params = []
+        for name, param in self.model.named_parameters():
+            if "lora" in name:
+                lora_params.append(param)
+            elif "backbone" in name:
+                if self.cfg.optimizer.diff_backbone_last and ".11." in name:
+                    last_block_params.append(param)
+                else:
+                    backbone_params.append(param)
+            else:
+                other_params.append(param)
+        params_to_optimize = [{"params": other_params}]
+        if self.cfg.optimizer.unfreeze_lr:
+            params_to_optimize += [
+                {"params": backbone_params, "lr": self.cfg.optimizer.backbone_lr}
+            ]
+            if self.cfg.optimizer.diff_backbone_last:
+                params_to_optimize += [
+                    {
+                        "params": last_block_params,
+                        "lr": self.cfg.optimizer.last_block_lr,
+                    }
+                ]
+        if len(lora_params) > 0:
+            # LoRA params sometimes train better with a different lr (~1e-4 for CLIP)
+            params_to_optimize += [
+                {"params": lora_params, "lr": self.cfg.optimizer.lora_lr}
+            ]
+        if self.cfg.optimizer.exclude_ln_and_biases_from_weight_decay:
+            parameters_names_wd = get_parameter_names(self.model, [nn.LayerNorm])
+            parameters_names_wd = [
+                name for name in parameters_names_wd if "bias" not in name
+            ]
+            optimizer_grouped_parameters = [
+                {
+                    "params": [
+                        p
+                        for n, p in self.model.named_parameters()
+                        if n in parameters_names_wd
+                    ],
+                    "weight_decay": self.cfg.optimizer.optim.weight_decay,
+                },
+                {
+                    "params": [
+                        p
+                        for n, p in self.model.named_parameters()
+                        if n not in parameters_names_wd
+                    ],
+                    "weight_decay": 0.0,
+                },
+            ]
+            optimizer = instantiate(
+                self.cfg.optimizer.optim, optimizer_grouped_parameters
+            )
+        else:
+            optimizer = instantiate(self.cfg.optimizer.optim, params_to_optimize)
+        scheduler = instantiate(self.cfg.lr_scheduler)(optimizer)
+        return [optimizer], [{"scheduler": scheduler, "interval": "step"}]
+    def lr_scheduler_step(self, scheduler, metric):
+        scheduler.step(self.global_step)
+def get_parameter_names(model, forbidden_layer_types):
+    """
+    Returns the names of the model parameters that are not inside a forbidden layer.
+    Taken from HuggingFace transformers.
+    """
+    result = []
+    for name, child in model.named_children():
+        result += [
+            f"{name}.{n}"
+            for n in get_parameter_names(child, forbidden_layer_types)
+            if not isinstance(child, tuple(forbidden_layer_types))
+        ]
+    # Add model specific parameters (defined with nn.Parameter) since they are not in any child.
+    result += list(model._parameters.keys())
+    return result

models/networks/backbones.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import torch.hub
+from transformers import (
+    CLIPVisionModel,
+    CLIPVisionConfig,
+    CLIPModel,
+    CLIPProcessor,
+    AutoTokenizer,
+    CLIPTextModelWithProjection,
+    CLIPTextConfig,
+    CLIPVisionModelWithProjection,
+    ResNetModel,
+    ResNetConfig
+)
+from torch import nn
+from PIL import Image
+import requests
+class CLIP(nn.Module):
+    def __init__(self, path):
+        """Initializes the CLIP model."""
+        super().__init__()
+        if path == "":
+            config_vision = CLIPVisionConfig()
+            self.clip = CLIPVisionModel(config_vision)
+        else:
+            self.clip = CLIPVisionModel.from_pretrained(path)
+    def forward(self, x):
+        """Predicts CLIP features from an image.
+        Args:
+            x (dict that contains "img": torch.Tensor): Input batch
+        """
+        features = self.clip(pixel_values=x["img"])["last_hidden_state"]
+        return features
+class CLIPJZ(nn.Module):
+    def __init__(self, path):
+        """Initializes the CLIP model."""
+        super().__init__()
+        if path == "":
+            config_vision = CLIPVisionConfig()
+            self.clip = CLIPVisionModel(config_vision)
+        else:
+            self.clip = CLIPVisionModel.from_pretrained(path)
+    def forward(self, x):
+        """Predicts CLIP features from an image.
+        Args:
+            x (dict that contains "img": torch.Tensor): Input batch
+        """
+        features = self.clip(pixel_values=x["img"])["last_hidden_state"]
+        return features
+class StreetCLIP(nn.Module):
+    def __init__(self, path):
+        """Initializes the CLIP model."""
+        super().__init__()
+        self.clip = CLIPModel.from_pretrained(path)
+        self.transform = CLIPProcessor.from_pretrained(path)
+    def forward(self, x):
+        """Predicts CLIP features from an image.
+        Args:
+            x (dict that contains "img": torch.Tensor): Input batch
+        """
+        features = self.clip.get_image_features(
+            **self.transform(images=x["img"], return_tensors="pt").to(x["gps"].device)
+        ).unsqueeze(1)
+        return features
+class CLIPText(nn.Module):
+    def __init__(self, path):
+        """Initializes the CLIP model."""
+        super().__init__()
+        if path == "":
+            config_vision = CLIPVisionConfig()
+            self.clip = CLIPVisionModel(config_vision)
+        else:
+            self.clip = CLIPVisionModelWithProjection.from_pretrained(path)
+    def forward(self, x):
+        """Predicts CLIP features from an image.
+        Args:
+            x (dict that contains "img": torch.Tensor): Input batch
+        """
+        features = self.clip(pixel_values=x["img"])
+        return features.image_embeds, features.last_hidden_state
+class TextEncoder(nn.Module):
+    def __init__(self, path):
+        """Initializes the CLIP text model."""
+        super().__init__()
+        if path == "":
+            config_vision = CLIPTextConfig()
+            self.clip = CLIPTextModelWithProjection(config_vision)
+            self.transform = AutoTokenizer()
+        else:
+            self.clip = CLIPTextModelWithProjection.from_pretrained(path)
+            self.transform = AutoTokenizer.from_pretrained(path)
+        for p in self.clip.parameters():
+            p.requires_grad = False
+        self.clip.eval()
+    def forward(self, x):
+        """Predicts CLIP features from text.
+        Args:
+            x (dict that contains "text": list): Input batch
+        """
+        features = self.clip(
+            **self.transform(x["text"], padding=True, return_tensors="pt").to(
+                x["gps"].device
+            )
+        ).text_embeds
+        return features
+class DINOv2(nn.Module):
+    def __init__(self, tag) -> None:
+        """Initializes the DINO model."""
+        super().__init__()
+        self.dino = torch.hub.load("facebookresearch/dinov2", tag)
+        self.stride = 14  # ugly but dinov2 stride = 14
+    def forward(self, x):
+        """Predicts DINO features from an image."""
+        x = x["img"]
+        # crop for stride
+        _, _, H, W = x.shape
+        H_new = H - H % self.stride
+        W_new = W - W % self.stride
+        x = x[:, :, :H_new, :W_new]
+        # forward features
+        x = self.dino.forward_features(x)
+        x = x["x_prenorm"]
+        return x
+class ResNet(nn.Module):
+    def __init__(self, path):
+        """Initializes the ResNet model."""
+        super().__init__()
+        if path == "":
+            config_vision = ResNetConfig()
+            self.resnet = ResNetModel(config_vision)
+        else:
+            self.resnet = ResNetModel.from_pretrained(path)
+    def forward(self, x):
+        """Predicts ResNet50 features from an image.
+        Args:
+            x (dict that contains "img": torch.Tensor): Input batch
+        """
+        features = self.resnet(x["img"])["pooler_output"]
+        return features.squeeze()

models/networks/heads/__init__.py ADDED Viewed

File without changes

models/networks/heads/auxilliary.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch.nn as nn
+from models.networks.utils import UnormGPS
+from torch.nn.functional import tanh, sigmoid, softmax
+class AuxHead(nn.Module):
+    def __init__(self, aux_data=[], use_tanh=False):
+        super().__init__()
+        self.aux_data = aux_data
+        self.unorm = UnormGPS()
+        self.use_tanh = use_tanh
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        if self.use_tanh:
+            gps = tanh(x["gps"])
+        gps = self.unorm(gps)
+        output = {"gps": gps}
+        if "land_cover" in self.aux_data:
+            output["land_cover"] = softmax(x["land_cover"])
+        if "road_index" in self.aux_data:
+            output["road_index"] = x["road_index"]
+        if "drive_side" in self.aux_data:
+            output["drive_side"] = sigmoid(x["drive_side"])
+        if "climate" in self.aux_data:
+            output["climate"] = softmax(x["climate"])
+        if "soil" in self.aux_data:
+            output["soil"] = softmax(x["soil"])
+        if "dist_sea" in self.aux_data:
+            output["dist_sea"] = x["dist_sea"]
+        return output

models/networks/heads/classification.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import torch
+import torch.nn as nn
+class ClassificationHead(nn.Module):
+    """Classification head for the network."""
+    def __init__(self, id_to_gps):
+        super().__init__()
+        self.id_to_gps = id_to_gps
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        gps = self.id_to_gps(x.argmax(dim=-1))
+        return {"label": x, **gps}

models/networks/heads/hybrid.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import torch
+import torch.nn as nn
+import pandas as pd
+from models.networks.utils import UnormGPS
+class HybridHead(nn.Module):
+    """Classification head followed by regression head for the network."""
+    def __init__(self, final_dim, quadtree_path, use_tanh, scale_tanh):
+        super().__init__()
+        self.final_dim = final_dim
+        self.use_tanh = use_tanh
+        self.scale_tanh = scale_tanh
+        self.unorm = UnormGPS()
+        if quadtree_path is not None:
+            quadtree = pd.read_csv(quadtree_path)
+            self.init_quadtree(quadtree)
+    def init_quadtree(self, quadtree):
+        quadtree[["min_lat", "max_lat"]] /= 90.0
+        quadtree[["min_lon", "max_lon"]] /= 180.0
+        self.register_buffer(
+            "cell_center",
+            0.5 * torch.tensor(quadtree[["max_lat", "max_lon"]].values)
+            + 0.5 * torch.tensor(quadtree[["min_lat", "min_lon"]].values),
+        )
+        self.register_buffer(
+            "cell_size",
+            torch.tensor(quadtree[["max_lat", "max_lon"]].values)
+            - torch.tensor(quadtree[["min_lat", "min_lon"]].values),
+        )
+    def forward(self, x, gt_label):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        classification_logits = x[..., : self.final_dim]
+        classification = classification_logits.argmax(dim=-1)
+        regression = x[..., self.final_dim :]
+        if self.use_tanh:
+            regression = self.scale_tanh * torch.tanh(regression)
+        regression = regression.view(regression.shape[0], -1, 2)
+        if self.training:
+            regression = torch.gather(
+                regression,
+                1,
+                gt_label.unsqueeze(-1).unsqueeze(-1).expand(regression.shape[0], 1, 2),
+            )[:, 0, :]
+            size = 2.0 / self.cell_size[gt_label]
+            center = self.cell_center[gt_label]
+            gps = (
+                self.cell_center[gt_label] + regression * self.cell_size[gt_label] / 2.0
+            )
+        else:
+            regression = torch.gather(
+                regression,
+                1,
+                classification.unsqueeze(-1)
+                .unsqueeze(-1)
+                .expand(regression.shape[0], 1, 2),
+            )[:, 0, :]
+            size = 2.0 / self.cell_size[classification]
+            center = self.cell_center[classification]
+            gps = (
+                self.cell_center[classification]
+                + regression * self.cell_size[classification] / 2.0
+            )
+        gps = self.unorm(gps)
+        return {
+            "label": classification_logits,
+            "gps": gps,
+            "size": size,
+            "center": center,
+            "reg": regression,
+        }
+class HybridHeadCentroid(nn.Module):
+    """Classification head followed by regression head for the network."""
+    def __init__(self, final_dim, quadtree_path, use_tanh, scale_tanh):
+        super().__init__()
+        self.final_dim = final_dim
+        self.use_tanh = use_tanh
+        self.scale_tanh = scale_tanh
+        self.unorm = UnormGPS()
+        if quadtree_path is not None:
+            quadtree = pd.read_csv(quadtree_path)
+            self.init_quadtree(quadtree)
+    def init_quadtree(self, quadtree):
+        quadtree[["min_lat", "max_lat", "mean_lat"]] /= 90.0
+        quadtree[["min_lon", "max_lon", "mean_lon"]] /= 180.0
+        self.cell_center = torch.tensor(quadtree[["mean_lat", "mean_lon"]].values)
+        self.cell_size_up = torch.tensor(quadtree[["max_lat", "max_lon"]].values) - torch.tensor(quadtree[["mean_lat", "mean_lon"]].values)
+        self.cell_size_down = torch.tensor(quadtree[["mean_lat", "mean_lon"]].values) - torch.tensor(quadtree[["min_lat", "min_lon"]].values)
+    def forward(self, x, gt_label):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        classification_logits = x[..., : self.final_dim]
+        classification = classification_logits.argmax(dim=-1)
+        self.cell_size_up = self.cell_size_up.to(classification.device)
+        self.cell_center = self.cell_center.to(classification.device)
+        self.cell_size_down = self.cell_size_down.to(classification.device)
+        regression = x[..., self.final_dim :]
+        if self.use_tanh:
+            regression = self.scale_tanh * torch.tanh(regression)
+        regression = regression.view(regression.shape[0], -1, 2)
+        if self.training:
+            regression = torch.gather(
+                regression,
+                1,
+                gt_label.unsqueeze(-1).unsqueeze(-1).expand(regression.shape[0], 1, 2),
+            )[:, 0, :]
+            size = torch.where(
+                regression > 0,
+                self.cell_size_up[gt_label],
+                self.cell_size_down[gt_label],
+            )
+            center = self.cell_center[gt_label]
+            gps = self.cell_center[gt_label] + regression * size
+        else:
+            regression = torch.gather(
+                regression,
+                1,
+                classification.unsqueeze(-1)
+                .unsqueeze(-1)
+                .expand(regression.shape[0], 1, 2),
+            )[:, 0, :]
+            size = torch.where(
+                regression > 0,
+                self.cell_size_up[classification],
+                self.cell_size_down[classification],
+            )
+            center = self.cell_center[classification]
+            gps = self.cell_center[classification] + regression * size
+        gps = self.unorm(gps)
+        return {
+            "label": classification_logits,
+            "gps": gps,
+            "size": 1.0 / size,
+            "center": center,
+            "reg": regression,
+        }
+class SharedHybridHead(HybridHead):
+    """Classification head followed by SHARED regression head for the network."""
+    def forward(self, x, gt_label):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        classification_logits = x[..., : self.final_dim]
+        classification = classification_logits.argmax(dim=-1)
+        regression = x[..., self.final_dim :]
+        if self.use_tanh:
+            regression = self.scale_tanh * torch.tanh(regression)
+        if self.training:
+            gps = (
+                self.cell_center[gt_label] + regression * self.cell_size[gt_label] / 2.0
+            )
+        else:
+            gps = (
+                self.cell_center[classification]
+                + regression * self.cell_size[classification] / 2.0
+            )
+        gps = self.unorm(gps)
+        return {"label": classification_logits, "gps": gps}

models/networks/heads/id_to_gps.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch
+from models.networks.utils import UnormGPS
+import torch.nn as nn
+import numpy as np
+class IdToGPS(nn.Module):
+    def __init__(self, id_to_gps: str):
+        """Map index to gps coordinates (indices can be country or city ids)"""
+        super().__init__()
+        if "quadtree" in id_to_gps:
+            self.id_to_gps = torch.load(
+                "_".join(id_to_gps.split("_")[:-4] + id_to_gps.split("_")[-3:])
+            )
+        else:
+            self.id_to_gps = torch.load(id_to_gps)
+        #self.unorm = UnormGPS()
+    def forward(self, x):
+        """Mapping from country id to gps coordinates
+        Args:
+            x: torch.Tensor with features
+        """
+        if isinstance(x, dict):
+            # for oracle
+            labels, x = x["label"], x["img"]
+        else:
+            # predicted labels
+            labels = x
+        self.id_to_gps = self.id_to_gps.to(labels.device)
+        #return {"gps": self.unorm(self.id_to_gps[labels])}
+        return {"gps": self.id_to_gps[labels]}

models/networks/heads/random.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import pandas as pd
+import torch
+from torch import nn
+from models.networks.utils import UnormGPS
+class Random(nn.Module):
+    def __init__(self, num_output):
+        """Random"""
+        super().__init__()
+        self.num_output = num_output
+        self.unorm = UnormGPS()
+    def forward(self, x):
+        """Predicts GPS coordinates from an image.
+        Args:
+            x: torch.Tensor with features
+        """
+        #x = x["img"]
+        gps = torch.rand((x.shape[0], self.num_output), device=x.device) * 2 - 1
+        return {"gps": self.unorm(gps)}
+class RandomCoords(nn.Module):
+    def __init__(self, coords_path: str):
+        """Randomly sample from a list of coordinates
+        Args:
+            coords_path: str with path to csv file with coordinates
+        """
+        super().__init__()
+        coordinates = pd.read_csv(coords_path)
+        longitudes = coordinates["longitude"].values / 180
+        latitudes = coordinates["latitude"].values / 90
+        self.unorm = UnormGPS()
+        del coordinates
+        self.N = len(longitudes)
+        assert len(longitudes) == len(latitudes)
+        self.coordinates = torch.stack(
+            [torch.tensor(latitudes), torch.tensor(longitudes)],
+            dim=-1,
+        )
+        del longitudes, latitudes
+    def forward(self, x):
+        """Predicts GPS coordinates from an image.
+        Args:
+            x: torch.Tensor with features
+        """
+        x = x["img"]
+        # randomly select a coordinate in the list
+        n = torch.randint(0, self.N, (x.shape[0],))
+        return {"gps": self.unorm(self.coordinates[n].to(x.device))}

models/networks/heads/regression.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from models.networks.utils import UnormGPS
+import torch.nn as nn
+from torch.nn.functional import tanh
+import torch
+class RegressionHead(nn.Module):
+    def __init__(self, use_tanh=False):
+        super().__init__()
+        self.unorm = UnormGPS()
+        self.use_tanh = use_tanh
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        if self.use_tanh:
+            x = tanh(x)
+        gps = self.unorm(x)
+        return {"gps": gps}
+class RegressionHeadAngle(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.unorm = UnormGPS()
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        x1 = x[:, 0].pow(2)
+        x2 = x[:, 1].pow(2)
+        x3 = x[:, 2].pow(2)
+        x4 = x[:, 3].pow(2)
+        cos_lambda = x1 / (x1 + x2)
+        sin_lambda = x2 / (x1 + x2)
+        cos_phi = x3 / (x3 + x4)
+        sin_phi = x4 / (x3 + x4)
+        lbd = torch.atan2(sin_lambda, cos_lambda)
+        phi = torch.atan2(sin_phi, cos_phi)
+        gps = torch.cat((lbd.unsqueeze(1), phi.unsqueeze(1)), dim=1)
+        # gps = self.unorm(x)
+        return {"gps": gps}

models/networks/mlp.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import torch
+from torch import nn
+class MLP(nn.Module):
+    def __init__(
+        self,
+        initial_dim=512,
+        hidden_dim=[128, 32, 2],
+        final_dim=2,
+        norm=nn.InstanceNorm1d,
+        activation=nn.ReLU,
+        aux_data=[],
+    ):
+        """
+        Initializes an MLP Classification Head
+        Args:
+            hidden_dim (list): list of hidden dimensions for the MLP
+            norm (nn.Module): normalization layer
+            activation (nn.Module): activation layer
+        """
+        super().__init__()
+        self.aux_data = aux_data
+        self.aux = len(self.aux_data) > 0
+        if self.aux:
+            hidden_dim_aux = hidden_dim
+            hidden_dim_aux[-1] = 128
+            final_dim_aux_dict = {
+                "land_cover": 12,
+                "climate": 30,
+                "soil": 14,
+                "road_index": 1,
+                "drive_side": 1,
+                "dist_sea": 1,
+            }
+            self.idx = {}
+            final_dim_aux = 0
+            for col in self.aux_data:
+                self.idx[col] = [
+                    final_dim_aux + i for i in range(final_dim_aux_dict[col])
+                ]
+                final_dim_aux += final_dim_aux_dict[col]
+            dim = [initial_dim] + hidden_dim_aux + [final_dim_aux]
+            args = self.init_layers(dim, norm, activation)
+            self.mlp_aux = nn.Sequential(*args)
+        dim = [initial_dim] + hidden_dim + [final_dim]
+        args = self.init_layers(dim, norm, activation)
+        self.mlp = nn.Sequential(*args)
+    def init_layers(self, dim, norm, activation):
+        """Initializes the MLP layers."""
+        args = [nn.LayerNorm(dim[0])]
+        for i in range(len(dim) - 1):
+            args.append(nn.Linear(dim[i], dim[i + 1]))
+            if i < len(dim) - 2:
+                # args.append(norm(dim[i + 1]))
+                args.append(norm(4, dim[i + 1]))
+                args.append(activation())
+        return args
+    def forward(self, x):
+        """Predicts GPS coordinates from an image.
+        Args:
+            x: torch.Tensor with features
+        """
+        if self.aux:
+            out = {"gps": self.mlp(x[:, 0, :])}
+            x = self.mlp_aux(x[:, 0, :])
+            for col in list(self.idx.keys()):
+                out[col] = x[:, self.idx[col]]
+            return out
+        return self.mlp(x[:, 0, :])
+class MLPResNet(nn.Module):
+    def __init__(
+        self,
+        initial_dim=512,
+        hidden_dim=[128, 32, 2],
+        final_dim=2,
+        norm=nn.InstanceNorm1d,
+        activation=nn.ReLU,
+        aux_data=[],
+    ):
+        """
+        Initializes an MLP Classification Head
+        Args:
+            hidden_dim (list): list of hidden dimensions for the MLP
+            norm (nn.Module): normalization layer
+            activation (nn.Module): activation layer
+        """
+        super().__init__()
+        self.aux_data = aux_data
+        self.aux = len(self.aux_data) > 0
+        if self.aux:
+            hidden_dim_aux = hidden_dim
+            hidden_dim_aux[-1] = 128
+            final_dim_aux_dict = {
+                "land_cover": 12,
+                "climate": 30,
+                "soil": 14,
+                "road_index": 1,
+                "drive_side": 1,
+                "dist_sea": 1,
+            }
+            self.idx = {}
+            final_dim_aux = 0
+            for col in self.aux_data:
+                self.idx[col] = [
+                    final_dim_aux + i for i in range(final_dim_aux_dict[col])
+                ]
+                final_dim_aux += final_dim_aux_dict[col]
+            dim = [initial_dim] + hidden_dim_aux + [final_dim_aux]
+            args = self.init_layers(dim, norm, activation)
+            self.mlp_aux = nn.Sequential(*args)
+        dim = [initial_dim] + hidden_dim + [final_dim]
+        args = self.init_layers(dim, norm, activation)
+        self.mlp = nn.Sequential(*args)
+    def init_layers(self, dim, norm, activation):
+        """Initializes the MLP layers."""
+        args = [nn.LayerNorm(dim[0])]
+        for i in range(len(dim) - 1):
+            args.append(nn.Linear(dim[i], dim[i + 1]))
+            if i < len(dim) - 2:
+                # args.append(norm(dim[i + 1]))
+                args.append(norm(4, dim[i + 1]))
+                args.append(activation())
+        return args
+    def forward(self, x):
+        """Predicts GPS coordinates from an image.
+        Args:
+            x: torch.Tensor with features
+        """
+        if self.aux:
+            out = {"gps": self.mlp(x[:, 0, :])}
+            x = self.mlp_aux(x[:, 0, :])
+            for col in list(self.idx.keys()):
+                out[col] = x[:, self.idx[col]]
+            return out
+        return self.mlp(x)
+class MLPCentroid(nn.Module):
+    def __init__(
+        self,
+        initial_dim=512,
+        hidden_dim=[128, 32, 2],
+        final_dim=2,
+        norm=nn.InstanceNorm1d,
+        activation=nn.ReLU,
+        aux_data=[],
+    ):
+        """
+        Initializes an MLP Classification Head
+        Args:
+            hidden_dim (list): list of hidden dimensions for the MLP
+            norm (nn.Module): normalization layer
+            activation (nn.Module): activation layer
+        """
+        super().__init__()
+        self.aux_data = aux_data
+        self.aux = len(self.aux_data) > 0
+        dim = [initial_dim] + hidden_dim + [final_dim // 3]
+        args = self.init_layers(dim, norm, activation)
+        self.classif = nn.Sequential(*args)
+        dim = [initial_dim] + hidden_dim + [2 * final_dim // 3]
+        args = self.init_layers(dim, norm, activation)
+        self.reg = nn.Sequential(*args)
+        # torch.nn.init.normal_(self.reg.weight, mean=0.0, std=0.01)
+        if self.aux:
+            self.dim = [initial_dim] + hidden_dim
+            self.predictors = {"gps": self.mlp}
+            self.init_aux(dim, norm, activation)
+    def init_layers(self, dim, norm, activation):
+        """Initializes the MLP layers."""
+        args = [nn.LayerNorm(dim[0])]
+        for i in range(len(dim) - 1):
+            args.append(nn.Linear(dim[i], dim[i + 1]))
+            if i < len(dim) - 2:
+                # args.append(norm(dim[i + 1]))
+                args.append(norm(4, dim[i + 1]))
+                args.append(activation())
+        return args
+    def init_aux(self, dim, norm, activation):
+        final_dim_aux = {
+            "land_cover": 12,
+            "climate": 30,
+            "soil": 14,
+            "road_index": 1,
+            "drive_side": 1,
+            "dist_sea": 1,
+        }
+        if "land_cover" in self.aux_data:
+            args = self.init_layers(
+                self.dim + [final_dim_aux["land_cover"]], norm, activation
+            )
+            self.land_cover = nn.Sequential(*args)
+            self.predictors["land_cover"] = self.land_cover
+        if "road_index" in self.aux_data:
+            args = self.init_layers(
+                self.dim + [final_dim_aux["road_index"]], norm, activation
+            )
+            self.road_index = nn.Sequential(*args)
+            self.predictors["road_index"] = self.road_index
+        if "drive_side" in self.aux_data:
+            args = self.init_layers(
+                self.dim + [final_dim_aux["drive_side"]], norm, activation
+            )
+            self.drive_side = nn.Sequential(*args)
+            self.predictors["drive_side"] = self.drive_side
+        if "climate" in self.aux_data:
+            args = self.init_layers(
+                self.dim + [final_dim_aux["climate"]], norm, activation
+            )
+            self.climate = nn.Sequential(*args)
+            self.predictors["climate"] = self.climate
+        if "soil" in self.aux_data:
+            args = self.init_layers(
+                self.dim + [final_dim_aux["soil"]], norm, activation
+            )
+            self.soil = nn.Sequential(*args)
+            self.predictors["soil"] = self.soil
+        if "dist_sea" in self.aux_data:
+            args = self.init_layers(
+                self.dim + [final_dim_aux["dist_sea"]], norm, activation
+            )
+            self.dist_sea = nn.Sequential(*args)
+            self.predictors["dist_sea"] = self.dist_sea
+    def forward(self, x):
+        """Predicts GPS coordinates from an image.
+        Args:
+            x: torch.Tensor with features
+        """
+        if self.aux:
+            return {
+                col: self.predictors[col](x[:, 0, :]) for col in self.predictors.keys()
+            }
+        return torch.cat([self.classif(x[:, 0, :]), self.reg(x[:, 0, :])], dim=1)
+class Identity(nn.Module):
+    def __init__(
+        self
+    ):
+        """
+        Initializes an Identity module
+        """
+        super().__init__()
+    def forward(self, x):
+        """
+        Return same as input
+        """
+        return x

models/networks/network.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import torch
+import numpy as np
+from abc import ABC, abstractmethod
+from torch import nn
+from hydra.utils import instantiate
+import copy
+from peft import LoraConfig, get_peft_model
+from utils.model_utils import print_trainable_parameters
+def freeze(model):
+    """Freezes the parameters of a model."""
+    for p in model.parameters():
+        p.requires_grad = False
+    model.eval()
+def unfreeze(model):
+    """Unfreezes the parameters of a model.
+    for p in model.parameters():
+        p.requires_grad = True"""
+    model_parameters = model.named_parameters()
+    for name, param in model_parameters:
+        if name in [
+            "clip.vision_model.post_layernorm.weight",
+            "clip.vision_model.post_layernorm.bias",
+        ]:
+            param.requires_grad = False
+        else:
+            param.requires_grad = True
+    model.train()
+def unfreeze_last(model):
+    """Unfreezes the parameters of a model.
+    for p in model.parameters():
+        p.requires_grad = True"""
+    model_parameters = model.named_parameters()
+    for name, param in model_parameters:
+        if len(name.split(".")) > 5:
+            if name.split(".")[4] == "11":
+                param.requires_grad = True
+            else:
+                param.requires_grad = False
+        else:
+            param.requires_grad = False
+    model.train()
+class FrozenBackbone(nn.Module):
+    """Freezes the backbone of a network."""
+    def __init__(self, backbone, mid, head):
+        super().__init__()
+        self.backbone = backbone.instance
+        self.mid = mid.instance
+        self.head = head.instance
+        self.target_key = head.target_key
+        freeze(self.backbone)
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        with torch.no_grad():
+            x = self.backbone(x)
+        x = self.mid(x)
+        x = self.head(x)
+        return x
+class UnfrozenBackbone(nn.Module):
+    """Unfreezes the backbone of a network."""
+    def __init__(self, backbone, mid, head):
+        super().__init__()
+        self.backbone = backbone.instance
+        self.mid = mid.instance
+        self.head = head.instance
+        self.target_key = head.target_key
+        unfreeze(self.backbone)
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        x = self.backbone(x)
+        x = self.mid(x)
+        x = self.head(x)
+        return x
+class UnfrozenPartBackbone(nn.Module):
+    """Unfreezes the backbone of a network."""
+    def __init__(self, backbone, mid, head):
+        super().__init__()
+        self.backbone = backbone.instance
+        self.mid = mid.instance
+        self.head = head.instance
+        self.target_key = head.target_key
+        unfreeze_last(self.backbone)
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        x = self.backbone(x)
+        x = self.mid(x)
+        x = self.head(x)
+        return x
+class NoFeatureBackbone(nn.Module):
+    """Randomizes the backbone of a network."""
+    def __init__(self, head):
+        super().__init__()
+        self.head = head.instance
+        self.target_key = head.target_key
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        return self.head(x)
+class ContrastiveFrozenBackbone(FrozenBackbone):
+    """Freezes the backbone of a network."""
+    def __init__(self, backbone, mid, head, mode):
+        super().__init__(backbone, mid, head)
+        self.mode = mode
+    def forward(self, x):
+        with torch.no_grad():
+            features = self.backbone(x)
+            if self.mode != "eval":
+                x_pos = {
+                    k.strip("pos_"): v.clone()
+                    if isinstance(v, torch.Tensor)
+                    else copy.deepcopy(v)
+                    for k, v in x.items()
+                    if k.startswith("pos_")
+                }
+                pos_features = self.backbone(x_pos)
+        x = self.mid(features)
+        x = self.head(x)
+        if self.mode != "eval":
+            return {
+                "features": features[:, 0, :],
+                "pos_features": pos_features[:, 0, :],
+                **x,
+            }
+        return {
+            "features": features[:, 0, :],
+            **x,
+        }
+class ContrastiveUnFrozenPartBackbone(UnfrozenPartBackbone):
+    """Freezes the backbone of a network."""
+    def __init__(self, backbone, mid, head, mode):
+        super().__init__(backbone, mid, head)
+        self.mode = mode
+    def forward(self, x):
+        features = self.backbone(x)
+        if self.mode != "eval":
+            x_pos = {
+                k.strip("pos_"): v.clone()
+                if isinstance(v, torch.Tensor)
+                else copy.deepcopy(v)
+                for k, v in x.items()
+                if k.startswith("pos_")
+            }
+            pos_features = self.backbone(x_pos)
+        x = self.mid(features)
+        x = self.head(x)
+        if self.mode != "eval":
+            return {
+                "features": features[:, 0, :],
+                "pos_features": pos_features[:, 0, :],
+                **x,
+            }
+        return {
+            "features": features[:, 0, :],
+            **x,
+        }
+class ContrastiveUnFrozenBackbone(UnfrozenBackbone):
+    """Freezes the backbone of a network."""
+    def __init__(self, backbone, mid, head, mode):
+        super().__init__(backbone, mid, head)
+        self.mode = mode
+    def forward(self, x):
+        features = self.backbone(x)
+        if self.mode != "eval":
+            x_pos = {
+                k.strip("pos_"): v.clone()
+                if isinstance(v, torch.Tensor)
+                else copy.deepcopy(v)
+                for k, v in x.items()
+                if k.startswith("pos_")
+            }
+            pos_features = self.backbone(x_pos)
+        x = self.mid(features)
+        x = self.head(x)
+        if self.mode != "eval":
+            return {
+                "features": features[:, 0, :],
+                "pos_features": pos_features[:, 0, :],
+                **x,
+            }
+        return {
+            "features": features[:, 0, :],
+            **x,
+        }
+class TextContrastiveUnFrozenBackbone(UnfrozenBackbone):
+    """Freezes the backbone of a network."""
+    def __init__(self, backbone, mid, head):
+        super().__init__(backbone, mid, head)
+    def forward(self, x):
+        con, features = self.backbone(x)
+        x = self.mid(features)
+        x = self.head(x)
+        return {
+            "features": con,
+            **x,
+        }
+class LoraBackbone(nn.Module):
+    """Wraps the backbone in a PEFT model for LoRA tuning."""
+    def __init__(self, backbone, mid, head, r, alpha, dropout, bias):
+        super().__init__()
+        self.backbone = backbone.instance
+        self.mid = mid.instance
+        self.head = head.instance
+        self.target_key = head.target_key
+        freeze(self.backbone)
+        config = LoraConfig(
+            r=r,
+            lora_alpha=alpha,
+            lora_dropout=dropout,
+            bias=bias,
+            target_modules=["q_proj", "k_proj", "v_proj"],
+        )
+        self.backbone = get_peft_model(self.backbone, config)
+        print_trainable_parameters(self)
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        x = self.backbone(x)
+        x = self.mid(x)
+        return self.head(x)
+class HybridFrozenBackbone(FrozenBackbone):
+    """Freezes the backbone of a network."""
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        gt_label = x["label"] if self.training else None
+        with torch.no_grad():
+            x = self.backbone(x)
+        x = self.mid(x)
+        x = self.head(x, gt_label)
+        return x
+class HybridUnfrozenBackbone(UnfrozenBackbone):
+    """Unfreezes the backbone of a network."""
+    def forward(self, x):
+        """Forward pass of the network.
+        x : Union[torch.Tensor, dict] with the output of the backbone.
+        """
+        gt_label = x["label"] if self.training else None
+        x = self.backbone(x)
+        x = self.mid(x)
+        x = self.head(x, gt_label)
+        return x
+class ContrastiveHybridUnFrozenBackbone(UnfrozenBackbone):
+    """Freezes the backbone of a network."""
+    def __init__(self, backbone, mid, head, mode):
+        super().__init__(backbone, mid, head)
+        self.mode = mode
+    def forward(self, x):
+        gt_label = x["label"] if self.training else None
+        features = self.backbone(x)
+        if self.mode != "eval":
+            x_pos = {
+                k.strip("pos_"): v.clone()
+                if isinstance(v, torch.Tensor)
+                else copy.deepcopy(v)
+                for k, v in x.items()
+                if k.startswith("pos_")
+            }
+            pos_features = self.backbone(x_pos)
+        x = self.mid(features)
+        x = self.head(x, gt_label)
+        if self.mode != "eval":
+            return {
+                "features": features[:, 0, :],
+                "pos_features": pos_features[:, 0, :],
+                **x,
+            }
+        return {
+            "features": features[:, 0, :],
+            **x,
+        }

models/networks/utils.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import numpy as np
+from torch import nn
+class NormGPS(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        """Normalize latitude longtitude radians to -1, 1."""  # not used currently
+        return x / torch.Tensor([np.pi * 0.5, np.pi]).unsqueeze(0).to(x.device)
+class UnormGPS(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        """Unormalize latitude longtitude radians to -1, 1."""
+        x = torch.clamp(x, -1, 1)
+        return x * torch.Tensor([np.pi * 0.5, np.pi]).unsqueeze(0).to(x.device)

models/utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from os.path import abspath as abp
+import torch
+import hydra
+from hydra import initialize, compose
+from models.module import Geolocalizer
+from omegaconf import OmegaConf, open_dict
+from os.path import join
+from hydra.utils import instantiate
+def load_model_config(path):
+    # given the directory of os.cwd()
+    # compute the relative path to path
+    path = abp(path)
+    rel_path = os.path.relpath(path, start=os.path.split(__file__)[0])
+    with initialize(version_base=None, config_path=rel_path):
+        cfg = compose(config_name="config", overrides=[])
+    checkpoint = torch.load(join(path, "last.ckpt"))
+    del checkpoint["state_dict"][
+        "model.backbone.clip.vision_model.embeddings.position_ids"
+    ]
+    torch.save(checkpoint, join(path, "last2.ckpt"))
+    with open_dict(cfg):
+        cfg.checkpoint = join(path, "last2.ckpt")
+    cfg.num_classes = 11399
+    cfg.model.network.mid.instance.final_dim = cfg.num_classes * 3
+    cfg.model.network.head.final_dim = cfg.num_classes * 3
+    cfg.model.network.head.instance.quadtree_path = join(path, "quadtree_10_1000.csv")
+    cfg.dataset.train_dataset.path = ""
+    cfg.dataset.val_dataset.path = ""
+    cfg.dataset.test_dataset.path = ""
+    cfg.logger.save_dir = ""
+    cfg.data_dir = ""
+    cfg.root_dir = ""
+    cfg.mode = "test"
+    cfg.model.network.backbone.instance.path = (
+        "laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K"
+    )
+    return cfg.dataset.test_transform, cfg.model, join(path, "last2.ckpt"), True
+def load_model(path):
+    transform_config, model_config, checkpoint_path, delete = load_model_config(path)
+    transform = instantiate(transform_config)
+    model = Geolocalizer.load_from_checkpoint(checkpoint_path, cfg=model_config)
+    if delete:
+        os.remove(checkpoint_path)
+    return model, transform

scripts/download-dataset.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os, zipfile
+from huggingface_hub import snapshot_download
+# Define the base directory
+base_dir = os.path.join(os.getcwd(), 'datasets')
+# Ensure the base directory exists
+if not os.path.exists(base_dir):
+    os.mkdir(base_dir)
+# Define the specific dataset directory
+dataset_dir = os.path.join(base_dir, "osv5m")
+# Ensure the specific dataset directory exists
+if not os.path.exists(dataset_dir):
+    os.mkdir(dataset_dir)
+# Download the dataset
+snapshot_download(repo_id="osv5m/osv5m", local_dir=dataset_dir, repo_type='dataset')
+# Extract zip files and remove them after extraction
+for root, dirs, files in os.walk(dataset_dir):
+    for file in files:
+        if file.endswith(".zip"):
+            with zipfile.ZipFile(os.path.join(root, file), 'r') as zip_ref:
+                zip_ref.extractall(root)
+                os.remove(os.path.join(root, file))

scripts/preprocessing/enrich-metadata-adaptive-quadtrees.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import hydra
+import torch
+import numpy as np
+import pandas as pd
+import statistics
+from os.path import join, dirname
+import matplotlib.pyplot as plt
+class QuadTree(object):
+    def __init__(self, data, id="", depth=3, do_split=5000):
+        self.id = id
+        self.data = data
+        coord = data[["latitude", "longitude"]].to_numpy()
+        # if mins is None:
+        mins = coord.min(0)
+        # if maxs is None:
+        maxs = coord.max(0)
+        self.mins = np.asarray(mins)
+        self.maxs = np.asarray(maxs)
+        self.sizes = self.maxs - self.mins
+        self.children = []
+        # sort by latitude
+        sorted_data_lat = sorted(coord, key=lambda point: point[0])
+        # get the median lat
+        median_lat = statistics.median(point[0] for point in sorted_data_lat)
+        # Divide the cell into two half-cells based on the median lat
+        data_left = [point for point in sorted_data_lat if point[0] <= median_lat]
+        data_right = [point for point in sorted_data_lat if point[0] > median_lat]
+        # Sort the data points by long in each half-cell
+        sorted_data_left_lon = sorted(data_left, key=lambda point: point[1])
+        sorted_data_right_lon = sorted(data_right, key=lambda point: point[1])
+        # Calculate the median ylong coordinate in each half-cell
+        median_lon_left = statistics.median(point[1] for point in sorted_data_left_lon)
+        median_lon_right = statistics.median(
+            point[1] for point in sorted_data_right_lon
+        )
+        if (depth > 0) and (len(self.data) >= do_split):
+            # split the data into four quadrants
+            data_q1 = data[
+                (data["latitude"] < median_lat) & (data["longitude"] < median_lon_left)
+            ]
+            data_q2 = data[
+                (data["latitude"] < median_lat) & (data["longitude"] >= median_lon_left)
+            ]
+            data_q3 = data[
+                (data["latitude"] >= median_lat)
+                & (data["longitude"] < median_lon_right)
+            ]
+            data_q4 = data[
+                (data["latitude"] >= median_lat)
+                & (data["longitude"] >= median_lon_right)
+            ]
+            # recursively build a quad tree on each quadrant which has data
+            if data_q1.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q1,
+                        id + "0",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q2.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q2,
+                        id + "1",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q3.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q3,
+                        id + "2",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q4.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q4,
+                        id + "3",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+    def unwrap(self):
+        if len(self.children) == 0:
+            return {self.id: [self.mins, self.maxs, self.data.copy()]}
+        else:
+            d = dict()
+            for child in self.children:
+                d.update(child.unwrap())
+            return d
+def extract(qt, name_new_column):
+    cluster = qt.unwrap()
+    boundaries, data = {}, []
+    for i, (id, vs) in zip(np.arange(len(cluster)), cluster.items()):
+        (min_lat, min_lon), (max_lat, max_lon), points = vs
+        points[name_new_column] = int(i)
+        data.append(points)
+        boundaries[i] = (
+            float(min_lat),
+            float(min_lon),
+            float(max_lat),
+            float(max_lon),
+            points["latitude"].mean(),
+            points["longitude"].mean(),
+        )
+    data = pd.concat(data)
+    return boundaries, data
+def vizu(name_new_column, df_train, boundaries, do_split):
+    plt.hist(df_train[name_new_column], bins=len(boundaries))
+    plt.xlabel("Cluster ID")
+    plt.ylabel("Number of images")
+    plt.title("Cluster distribution")
+    plt.yscale("log")
+    plt.ylim(10, do_split)
+    plt.savefig(f"{name_new_column}_distrib.png")
+    plt.clf()
+    plt.scatter(
+        df_train["longitude"].to_numpy(),
+        df_train["latitude"].to_numpy(),
+        c=np.random.permutation(len(boundaries))[df_train[name_new_column].to_numpy()],
+        cmap="tab20",
+        s=0.1,
+        alpha=0.5,
+    )
+    plt.xlabel("Longitude")
+    plt.ylabel("Latitude")
+    plt.title("Quadtree map")
+    plt.savefig(f"{name_new_column}_map.png")
+@hydra.main(
+    config_path="../configs/scripts",
+    config_name="enrich-metadata-quadtree",
+    version_base=None,
+)
+def main(cfg):
+    data_path = join(cfg.data_dir, "osv5m")
+    name_new_column = f"adaptive_quadtree_{cfg.depth}_{cfg.do_split}"
+    # Create clusters from train images
+    train_fp = join(data_path, f"train.csv")
+    df_train = pd.read_csv(train_fp)
+    qt = QuadTree(df_train, depth=cfg.depth, do_split=cfg.do_split)
+    boundaries, df_train = extract(qt, name_new_column)
+    vizu(name_new_column, df_train, boundaries, cfg.do_split)
+    # Save clusters
+    boundaries = pd.DataFrame.from_dict(
+        boundaries,
+        orient="index",
+        columns=["min_lat", "min_lon", "max_lat", "max_lon", "mean_lat", "mean_lon"],
+    )
+    boundaries.to_csv(f"{name_new_column}.csv", index_label="cluster_id")
+    # Assign test images to clusters
+    test_fp = join(data_path, f"test.csv")
+    df_test = pd.read_csv(test_fp)
+    above_lat = np.expand_dims(df_test["latitude"].to_numpy(), -1) > np.expand_dims(
+        boundaries["min_lat"].to_numpy(), 0
+    )
+    below_lat = np.expand_dims(df_test["latitude"].to_numpy(), -1) < np.expand_dims(
+        boundaries["max_lat"].to_numpy(), 0
+    )
+    above_lon = np.expand_dims(df_test["longitude"].to_numpy(), -1) > np.expand_dims(
+        boundaries["min_lon"].to_numpy(), 0
+    )
+    below_lon = np.expand_dims(df_test["longitude"].to_numpy(), -1) < np.expand_dims(
+        boundaries["max_lon"].to_numpy(), 0
+    )
+    mask = np.logical_and(
+        np.logical_and(above_lat, below_lat), np.logical_and(above_lon, below_lon)
+    )
+    df_test[name_new_column] = np.argmax(mask, axis=1)
+    # save index_to_gps_quadtree file
+    lat = torch.tensor(boundaries["mean_lat"])
+    lon = torch.tensor(boundaries["mean_lon"])
+    coord = torch.stack([lat / 90, lon / 180], dim=-1)
+    torch.save(
+        coord,
+        join(
+            data_path, f"index_to_gps_adaptive_quadtree_{cfg.depth}_{cfg.do_split}.pt"
+        ),
+    )
+    # Overwrite test.csv and train.csv
+    if cfg.overwrite_csv:
+        df_train.to_csv(train_fp, index=False)
+        df_test.to_csv(test_fp, index=False)
+if __name__ == "__main__":
+    main()

scripts/preprocessing/enrich-metadata-quadtree.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import hydra
+import numpy as np
+import pandas as pd
+from os.path import join, dirname
+import matplotlib.pyplot as plt
+import torch
+class QuadTree(object):
+    def __init__(self, data, mins=None, maxs=None, id="", depth=3, do_split=1000):
+        self.id = id
+        self.data = data
+        if mins is None:
+            mins = data[["latitude", "longitude"]].to_numpy().min(0)
+        if maxs is None:
+            maxs = data[["latitude", "longitude"]].to_numpy().max(0)
+        self.mins = np.asarray(mins)
+        self.maxs = np.asarray(maxs)
+        self.sizes = self.maxs - self.mins
+        self.children = []
+        mids = 0.5 * (self.mins + self.maxs)
+        xmin, ymin = self.mins
+        xmax, ymax = self.maxs
+        xmid, ymid = mids
+        if (depth > 0) and (len(self.data) >= do_split):
+            # split the data into four quadrants
+            data_q1 = data[(data["latitude"] < mids[0]) & (data["longitude"] < mids[1])]
+            data_q2 = data[
+                (data["latitude"] < mids[0]) & (data["longitude"] >= mids[1])
+            ]
+            data_q3 = data[
+                (data["latitude"] >= mids[0]) & (data["longitude"] < mids[1])
+            ]
+            data_q4 = data[
+                (data["latitude"] >= mids[0]) & (data["longitude"] >= mids[1])
+            ]
+            # recursively build a quad tree on each quadrant which has data
+            if data_q1.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q1,
+                        [xmin, ymin],
+                        [xmid, ymid],
+                        id + "0",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q2.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q2,
+                        [xmin, ymid],
+                        [xmid, ymax],
+                        id + "1",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q3.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q3,
+                        [xmid, ymin],
+                        [xmax, ymid],
+                        id + "2",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q4.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q4,
+                        [xmid, ymid],
+                        [xmax, ymax],
+                        id + "3",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+    def unwrap(self):
+        if len(self.children) == 0:
+            return {self.id: [self.mins, self.maxs, self.data.copy()]}
+        else:
+            d = dict()
+            for child in self.children:
+                d.update(child.unwrap())
+            return d
+def extract(qt, name_new_column):
+    cluster = qt.unwrap()
+    boundaries, data = {}, []
+    id_to_quad = np.array(list(cluster.keys()))
+    for i, (id, vs) in zip(np.arange(len(cluster)), cluster.items()):
+        (min_lat, min_lon), (max_lat, max_lon), points = vs
+        points[name_new_column] = int(i)
+        data.append(points)
+        boundaries[i] = (
+            float(min_lat),
+            float(min_lon),
+            float(max_lat),
+            float(max_lon),
+            points["latitude"].mean(),
+            points["longitude"].mean(),
+        )
+    data = pd.concat(data)
+    return boundaries, data, id_to_quad
+def vizu(name_new_column, df_train, boundaries):
+    plt.hist(df_train[name_new_column], bins=len(boundaries))
+    plt.xlabel("Cluster ID")
+    plt.ylabel("Number of images")
+    plt.title("Cluster distribution")
+    plt.yscale("log")
+    plt.savefig(f"{name_new_column}_distrib.png")
+    plt.clf()
+    plt.scatter(
+        df_train["longitude"].to_numpy(),
+        df_train["latitude"].to_numpy(),
+        c=np.random.permutation(len(boundaries))[df_train[name_new_column].to_numpy()],
+        cmap="tab20",
+        s=0.1,
+        alpha=0.5,
+    )
+    plt.xlabel("Longitude")
+    plt.ylabel("Latitude")
+    plt.title("Quadtree map")
+    plt.savefig(f"{name_new_column}_map.png")
+@hydra.main(
+    config_path="../configs/scripts",
+    config_name="enrich-metadata-quadtree",
+    version_base=None,
+)
+def main(cfg):
+    data_path = join(cfg.data_dir, "osv5m")
+    name_new_column = f"quadtree_{cfg.depth}_{cfg.do_split}"
+    # Create clusters from train images
+    train_fp = join(data_path, f"train.csv")
+    df_train = pd.read_csv(train_fp)
+    qt = QuadTree(df_train, depth=cfg.depth, do_split=cfg.do_split)
+    boundaries, df_train, id_to_quad = extract(qt, name_new_column)
+    vizu(name_new_column, df_train, boundaries)
+    # Save clusters
+    boundaries = pd.DataFrame.from_dict(
+        boundaries,
+        orient="index",
+        columns=["min_lat", "min_lon", "max_lat", "max_lon", "mean_lat", "mean_lon"],
+    )
+    boundaries.to_csv(f"{name_new_column}.csv", index_label="cluster_id")
+    # Assign test images to clusters
+    test_fp = join(data_path, f"test.csv")
+    df_test = pd.read_csv(test_fp)
+    above_lat = np.expand_dims(df_test["latitude"].to_numpy(), -1) > np.expand_dims(
+        boundaries["min_lat"].to_numpy(), 0
+    )
+    below_lat = np.expand_dims(df_test["latitude"].to_numpy(), -1) < np.expand_dims(
+        boundaries["max_lat"].to_numpy(), 0
+    )
+    above_lon = np.expand_dims(df_test["longitude"].to_numpy(), -1) > np.expand_dims(
+        boundaries["min_lon"].to_numpy(), 0
+    )
+    below_lon = np.expand_dims(df_test["longitude"].to_numpy(), -1) < np.expand_dims(
+        boundaries["max_lon"].to_numpy(), 0
+    )
+    mask = np.logical_and(
+        np.logical_and(above_lat, below_lat), np.logical_and(above_lon, below_lon)
+    )
+    df_test[name_new_column] = np.argmax(mask, axis=1)
+    # save index_to_gps_quadtree file
+    lat = torch.tensor(boundaries["mean_lat"])
+    lon = torch.tensor(boundaries["mean_lon"])
+    coord = torch.stack([lat / 90, lon / 180], dim=-1)
+    torch.save(
+        coord, join(data_path, f"index_to_gps_quadtree_{cfg.depth}_{cfg.do_split}.pt")
+    )
+    torch.save(id_to_quad, join(data_path, f"id_to_quad_{cfg.depth}_{cfg.do_split}.pt"))
+    # Overwrite test.csv and train.csv
+    if cfg.overwrite_csv:
+        df_train.to_csv(train_fp, index=False)
+        df_test.to_csv(test_fp, index=False)
+if __name__ == "__main__":
+    main()

scripts/preprocessing/enrich-metadata.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os
+import json
+import joblib
+import pandas as pd
+import numpy as np
+import reverse_geocoder
+from os.path import join, dirname
+class QuadTree(object):
+    def __init__(
+        self, data, mins=None, maxs=None, id="", depth=3, min_split=0, do_split=1000
+    ):
+        self.id = id
+        self.data = data
+        if mins is None:
+            mins = data[["latitude", "longitude"]].to_numpy().min(0)
+        if maxs is None:
+            maxs = data[["latitude", "longitude"]].to_numpy().max(0)
+        self.mins = np.asarray(mins)
+        self.maxs = np.asarray(maxs)
+        self.sizes = self.maxs - self.mins
+        self.children = []
+        mids = 0.5 * (self.mins + self.maxs)
+        xmin, ymin = self.mins
+        xmax, ymax = self.maxs
+        xmid, ymid = mids
+        if depth > 0 and len(self.data) >= do_split:
+            # split the data into four quadrants
+            data_q1 = data[(data["latitude"] < mids[0]) & (data["longitude"] < mids[1])]
+            data_q2 = data[
+                (data["latitude"] < mids[0]) & (data["longitude"] >= mids[1])
+            ]
+            data_q3 = data[
+                (data["latitude"] >= mids[0]) & (data["longitude"] < mids[1])
+            ]
+            data_q4 = data[
+                (data["latitude"] >= mids[0]) & (data["longitude"] >= mids[1])
+            ]
+            # recursively build a quad tree on each quadrant which has data
+            if data_q1.shape[0] > min_split:
+                self.children.append(
+                    QuadTree(data_q1, [xmin, ymin], [xmid, ymid], id + "0", depth - 1)
+                )
+            if data_q2.shape[0] > min_split:
+                self.children.append(
+                    QuadTree(data_q2, [xmin, ymid], [xmid, ymax], id + "1", depth - 1)
+                )
+            if data_q3.shape[0] > min_split:
+                self.children.append(
+                    QuadTree(data_q3, [xmid, ymin], [xmax, ymid], id + "2", depth - 1)
+                )
+            if data_q4.shape[0] > min_split:
+                self.children.append(
+                    QuadTree(data_q4, [xmid, ymid], [xmax, ymax], id + "3", depth - 1)
+                )
+    def unwrap(self):
+        if len(self.children) == 0:
+            return {self.id: [self.mins, self.maxs, self.data.copy()]}
+        else:
+            d = dict()
+            for child in self.children:
+                d.update(child.unwrap())
+            return d
+def extract(qt):
+    cluster = qt.unwrap()
+    boundaries, data = {}, []
+    for id, vs in cluster.items():
+        (min_lat, min_lon), (max_lat, max_lon), points = vs
+        points["category"] = id
+        data.append(points)
+        boundaries[id] = (
+            float(min_lat),
+            float(min_lon),
+            float(max_lat),
+            float(max_lon),
+        )
+    data = pd.concat(data)
+    return boundaries, data
+if __name__ == "__main__":
+    # merge into one DataFrame
+    data_path = join(dirname(dirname(__file__)), "datasets", "osv5m")
+    train_fp = join(data_path, f"train.csv")
+    test_fp = join(data_path, f"test.csv")
+    df_train = pd.read_csv(train_fp)
+    df_train["split"] = "train"
+    df_test = pd.read_csv(test_fp)
+    df_test["split"] = "test"
+    df = pd.concat([df_train, df_test])
+    size_before = df.shape[0]
+    qt = QuadTree(df, depth=15)
+    boundaries, df = extract(qt)
+    assert df.shape[0] == size_before
+    location = reverse_geocoder.search(
+        [(lat, lon) for lat, lon in zip(df["latitude"], df["longitude"])]
+    )
+    df["city"] = [l.get("name", "") for l in location]
+    df["country"] = [l.get("cc", "") for l in location]
+    del location
+    df_train = df[df["split"] == "train"].drop(["split"], axis=1)
+    df_test = df[df["split"] == "test"].drop(["split"], axis=1)
+    assert (df_train.shape[0] + df_test.shape[0]) == size_before
+    json.dump(boundaries, open(join(data_path, "borders.json"), "w"))
+    df_train.to_csv(train_fp, index=False)
+    df_test.to_csv(test_fp, index=False)

scripts/preprocessing/fix_namimbia.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from os.path import join, dirname
+import numpy as np
+import pandas as pd
+if __name__ == "__main__":
+    # Define the list of cities
+    cities = [
+        "Walvis Bay",
+        "Keetmanshoop",
+        "Warmbad",
+        "Rundu",
+        "Outapi",
+        "Karibib",
+        "Otjimbingwe",
+        "Ondangwa",
+        "Oranjemund",
+        "Maltahohe",
+        "Otavi",
+        "Outjo",
+        "Swakopmund",
+        "Gobabis",
+        "Karasburg",
+        "Opuwo",
+        "Hentiesbaai",
+        "Katima Mulilo",
+        "Oshikango",
+        "Bethanie",
+        "Ongandjera",
+        "Mariental",
+        "Bagani",
+        "Nkurenkuru",
+        "Usakos",
+        "Rehoboth",
+        "Aranos",
+        "Omaruru",
+        "Arandis",
+        "Windhoek",
+        "Khorixas",
+        "Okahandja",
+        "Grootfontein",
+        "Tsumeb",
+    ]
+    csv_dtype = {"category": str, "country": str, "city": str}
+    for split in ["train", "test"]:
+        fp = join(
+            dirname(dirname(__file__)), "datasets", "osv5m", f"{split}.csv"
+        )
+        # Read the CSV file into a pandas DataFrame
+        df = pd.read_csv(fp, dtype=csv_dtype)
+        # Check if the "country" column contains any of the cities in the list
+        mask = df["city"].isin(cities)
+        # If a city is found, set the corresponding rows in the "country" column to 'NMB'
+        df.loc[mask, "country"] = "NMB"
+        assert all(map(lambda x: isinstance(x, str), df["country"].unique().tolist()))
+        # Drop the columns that are all NaN
+        df.dropna(subset=["id", "latitude", "longitude"], inplace=True)
+        # Save the modified DataFrame back to the CSV file
+        df.to_csv(fp, index=False)

scripts/preprocessing/nearest-neighbors.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import sys, os
+import json
+from PIL import Image
+from tqdm import tqdm
+from os.path import dirname, join
+sys.path.append(dirname(dirname(__file__)))
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from transformers import CLIPProcessor, CLIPModel
+from transformers import pipeline
+from data.data import osv5m
+from json_stream import streamable_list
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def load_model_clip():
+    model = CLIPModel.from_pretrained("laion/CLIP-ViT-L-14-laion2B-s32B-b82K")
+    processor = CLIPProcessor.from_pretrained("laion/CLIP-ViT-L-14-laion2B-s32B-b82K")
+    return processor, model.to(DEVICE)
+def load_model_dino():
+    model = AutoModel.from_pretrained("facebook/dinov2-base")
+    processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
+    return processor, model.to(DEVICE)
+def compute_dino(processor, model, x):
+    inputs = processor(images=x[0], return_tensors="pt", device=DEVICE).to(DEVICE)
+    outputs = model(**inputs)
+    last_hidden_states = outputs.last_hidden_state.cpu().numpy()
+    for i in range(len(x[0])):
+        yield [last_hidden_states[i].tolist(), x[1][i], x[2][i], x[3][i]]
+def compute_clip(processor, model, x):
+    inputs = processor(images=x[0], return_tensors="pt", device=DEVICE).to(DEVICE)
+    features = model.get_image_features(**inputs)
+    features /= features.norm(dim=-1, keepdim=True)
+    features = features.cpu().numpy()
+    for i in range(len(x[0])):
+        yield [features[i].tolist(), x[1][i], x[2][i], x[3][i]]
+def get_batch(dataset, batch_size):
+    data, lats, lons, ids = [], [], [], []
+    for i in range(len(dataset)):
+        id, lat, lon = dataset.df.iloc[i]
+        data.append(Image.open(join(dataset.image_folder, f"{int(id)}.jpg")))
+        lats.append(lat)
+        lons.append(lon)
+        ids.append(id)
+        if len(data) == batch_size:
+            yield data, lats, lons, ids
+            data, lats, lons, ids = [], [], [], []
+    if len(data) > 0:
+        yield data, lats, lons, ids
+        data, lats, lons, ids = [], [], [], []
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int, default=256)
+    parser.add_argument("--compute_features", action="store_true")
+    parser.add_argument("--compute_nearest", action="store_true")
+    parser.add_argument("--json_path", default="features")
+    parser.add_argument("--which", type=str, default="clip", choices=["clip", "dino"])
+    args = parser.parse_args()
+    json_path = join(args.json_path, args.which)
+    os.makedirs(json_path, exist_ok=True)
+    if args.compute_features:
+        processor, model = (
+            load_model_clip() if args.which == "clip" else load_model_dino()
+        )
+        compute_fn = compute_clip if args.which == "clip" else compute_dino
+        for split in ["test"]:  #'train',
+            # open existing json and read as dictionary
+            json_path_ = join(json_path, f"{split}.json")
+            dataset = osv5m(
+                "datasets/osv5m", transforms=None, split=split, dont_split=True
+            )
+            @torch.no_grad()
+            def compute(batch_size):
+                for data in tqdm(
+                    get_batch(dataset, batch_size),
+                    total=len(dataset) // batch_size,
+                    desc=f"Computing {split} on {args.which}",
+                ):
+                    features = compute_fn(processor, model, data)
+                    for feature, lat, lon, id in features:
+                        yield feature, lat, lon, id
+            data = streamable_list(compute(args.batch_size))
+            json.dump(data, open(json_path_, "w"), indent=4)
+    if args.compute_nearest:
+        from sklearn.metrics.pairwise import cosine_similarity
+        import numpy as np
+        train, test = [
+            json.load(open(join(json_path, f"{split}.json"), "r"))
+            for split in ["train", "test"]
+        ]
+        def get_neighbors(k=10):
+            for i, test_data in enumerate(tqdm(test)):
+                feature, lat, lon, id = test_data
+                features_train = np.stack(
+                    [np.array(train_data[0]) for train_data in train]
+                )
+                cs = np.squeeze(
+                    cosine_similarity(np.expand_dims(feature, axis=0), features_train),
+                    axis=0,
+                )
+                i = np.argsort(cs)[-k:][::-1].tolist()
+                yield [
+                    {n: x}
+                    for idx in i
+                    for n, x in zip(
+                        ["feature", "lat", "lon", "id", "distance"],
+                        train[idx]
+                        + [
+                            cs[idx],
+                        ],
+                    )
+                ]
+        data = streamable_list(get_neighbors())
+        json.dump(data, open(join(json_path, "nearest.json"), "w"), indent=4)

scripts/preprocessing/preprocess.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import pandas as pd
+import torch
+import numpy as np
+from os.path import join
+import matplotlib.pyplot as plt
+import hydra
+class QuadTree(object):
+    def __init__(self, data, mins=None, maxs=None, id="", depth=3, do_split=1000):
+        self.id = id
+        self.data = data
+        if mins is None:
+            mins = data[["latitude", "longitude"]].to_numpy().min(0)
+        if maxs is None:
+            maxs = data[["latitude", "longitude"]].to_numpy().max(0)
+        self.mins = np.asarray(mins)
+        self.maxs = np.asarray(maxs)
+        self.sizes = self.maxs - self.mins
+        self.children = []
+        mids = 0.5 * (self.mins + self.maxs)
+        xmin, ymin = self.mins
+        xmax, ymax = self.maxs
+        xmid, ymid = mids
+        if (depth > 0) and (len(self.data) >= do_split):
+            # split the data into four quadrants
+            data_q1 = data[(data["latitude"] < mids[0]) & (data["longitude"] < mids[1])]
+            data_q2 = data[
+                (data["latitude"] < mids[0]) & (data["longitude"] >= mids[1])
+            ]
+            data_q3 = data[
+                (data["latitude"] >= mids[0]) & (data["longitude"] < mids[1])
+            ]
+            data_q4 = data[
+                (data["latitude"] >= mids[0]) & (data["longitude"] >= mids[1])
+            ]
+            # recursively build a quad tree on each quadrant which has data
+            if data_q1.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q1,
+                        [xmin, ymin],
+                        [xmid, ymid],
+                        id + "0",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q2.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q2,
+                        [xmin, ymid],
+                        [xmid, ymax],
+                        id + "1",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q3.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q3,
+                        [xmid, ymin],
+                        [xmax, ymid],
+                        id + "2",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+            if data_q4.shape[0] > 0:
+                self.children.append(
+                    QuadTree(
+                        data_q4,
+                        [xmid, ymid],
+                        [xmax, ymax],
+                        id + "3",
+                        depth - 1,
+                        do_split=do_split,
+                    )
+                )
+    def unwrap(self):
+        if len(self.children) == 0:
+            return {self.id: [self.mins, self.maxs, self.data.copy()]}
+        else:
+            d = dict()
+            for child in self.children:
+                d.update(child.unwrap())
+            return d
+def extract(qt, name_new_column):
+    cluster = qt.unwrap()
+    boundaries, data = {}, []
+    id_to_quad = np.array(list(cluster.keys()))
+    for i, (id, vs) in zip(np.arange(len(cluster)), cluster.items()):
+        (min_lat, min_lon), (max_lat, max_lon), points = vs
+        points[name_new_column] = int(i)
+        data.append(points)
+        boundaries[i] = (
+            float(min_lat),
+            float(min_lon),
+            float(max_lat),
+            float(max_lon),
+            points["latitude"].mean(),
+            points["longitude"].mean(),
+        )
+    data = pd.concat(data)
+    return boundaries, data, id_to_quad
+def vizu(name_new_column, df_train, boundaries, save_path):
+    plt.hist(df_train[name_new_column], bins=len(boundaries))
+    plt.xlabel("Cluster ID")
+    plt.ylabel("Number of images")
+    plt.title("Cluster distribution")
+    plt.yscale("log")
+    plt.savefig(join(save_path, f"{name_new_column}_distrib.png"))
+    plt.clf()
+    plt.scatter(
+        df_train["longitude"].to_numpy(),
+        df_train["latitude"].to_numpy(),
+        c=np.random.permutation(len(boundaries))[df_train[name_new_column].to_numpy()],
+        cmap="tab20",
+        s=0.1,
+        alpha=0.5,
+    )
+    plt.xlabel("Longitude")
+    plt.ylabel("Latitude")
+    plt.title("Quadtree map")
+    plt.savefig(join(save_path, f"{name_new_column}_map.png"))
+@hydra.main(
+    config_path="../../configs/scripts",
+    config_name="preprocess",
+    version_base=None,
+)
+def main(cfg):
+    data_path = join(cfg.data_dir, "osv5m")
+    save_path = cfg.data_dir
+    name_new_column = f"quadtree_{cfg.depth}_{cfg.do_split}"
+    # Create clusters from train images
+    train_fp = join(data_path, f"train.csv")
+    df_train = pd.read_csv(train_fp, low_memory=False)
+    qt = QuadTree(df_train, depth=cfg.depth, do_split=cfg.do_split)
+    boundaries, df_train, id_to_quad = extract(qt, name_new_column)
+    vizu(name_new_column, df_train, boundaries, save_path)
+    # Save clusters
+    boundaries = pd.DataFrame.from_dict(
+        boundaries,
+        orient="index",
+        columns=["min_lat", "min_lon", "max_lat", "max_lon", "mean_lat", "mean_lon"],
+    )
+    boundaries.to_csv(
+        join(save_path, f"{name_new_column}.csv"), index_label="cluster_id"
+    )
+    # Assign test images to clusters
+    test_fp = join(data_path, f"test.csv")
+    df_test = pd.read_csv(test_fp)
+    above_lat = np.expand_dims(df_test["latitude"].to_numpy(), -1) > np.expand_dims(
+        boundaries["min_lat"].to_numpy(), 0
+    )
+    below_lat = np.expand_dims(df_test["latitude"].to_numpy(), -1) < np.expand_dims(
+        boundaries["max_lat"].to_numpy(), 0
+    )
+    above_lon = np.expand_dims(df_test["longitude"].to_numpy(), -1) > np.expand_dims(
+        boundaries["min_lon"].to_numpy(), 0
+    )
+    below_lon = np.expand_dims(df_test["longitude"].to_numpy(), -1) < np.expand_dims(
+        boundaries["max_lon"].to_numpy(), 0
+    )
+    mask = np.logical_and(
+        np.logical_and(above_lat, below_lat), np.logical_and(above_lon, below_lon)
+    )
+    df_test[name_new_column] = np.argmax(mask, axis=1)
+    # save index_to_gps_quadtree file
+    lat = torch.tensor(boundaries["mean_lat"])
+    lon = torch.tensor(boundaries["mean_lon"])
+    coord = torch.stack([lat, lon], dim=-1)
+    torch.save(
+        coord, join(save_path, f"index_to_gps_quadtree_{cfg.depth}_{cfg.do_split}.pt")
+    )
+    torch.save(id_to_quad, join(save_path, f"id_to_quad_{cfg.depth}_{cfg.do_split}.pt"))
+    # Overwrite test.csv and train.csv
+    if cfg.overwrite_csv:
+        df_train.to_csv(train_fp, index=False)
+        df_test.to_csv(test_fp, index=False)
+    df = pd.read_csv(join(data_path, "train.csv"), low_memory=False).fillna("NaN")
+    # Compute the average location for each unique country
+    country_avg = (
+        df.groupby("unique_country")[["latitude", "longitude"]].mean().reset_index()
+    )
+    country_avg.to_csv(
+        join(save_path, "country_center.csv"),
+        columns=["unique_country", "latitude", "longitude"],
+        index=False,
+    )
+    # Compute the average location for each unique admin1 (region)
+    region_avg = (
+        df.groupby(["unique_region"])[["latitude", "longitude"]].mean().reset_index()
+    )
+    region_avg.to_csv(
+        join(save_path, "region_center.csv"),
+        columns=["unique_region", "latitude", "longitude"],
+        index=False,
+    )
+    # Compute the average location for each unique admin2 (area)
+    area_avg = (
+        df.groupby(["unique_sub-region"])[["latitude", "longitude"]]
+        .mean()
+        .reset_index()
+    )
+    area_avg.to_csv(
+        join(save_path, "sub-region_center.csv"),
+        columns=["unique_sub-region", "latitude", "longitude"],
+        index=False,
+    )
+    # Compute the average location for each unique city
+    city_avg = (
+        df.groupby(["unique_city"])[["latitude", "longitude"]].mean().reset_index()
+    )
+    city_avg.to_csv(
+        join(save_path, "city_center.csv"),
+        columns=["unique_city", "latitude", "longitude"],
+        index=False,
+    )
+    for class_name in [
+        "unique_country",
+        "unique_sub-region",
+        "unique_region",
+        "unique_city",
+    ]:
+        # Load CSV data into a Pandas DataFrame
+        csv_file = class_name.split("_")[-1] + "_center.csv"
+        df = pd.read_csv(join(save_path, csv_file), low_memory=False)
+        splits = ["train"]
+        categories = sorted(
+            pd.concat(
+                [
+                    pd.read_csv(
+                        join(data_path, f"{split}.csv"), low_memory=False
+                    )[class_name]
+                    for split in splits
+                ]
+            )
+            .fillna("NaN")
+            .unique()
+            .tolist()
+        )
+        if "NaN" in categories:
+            categories.remove("NaN")
+        # compute the total number of categories - this name is fixed and will be used as a lookup during init
+        num_classes = len(categories)
+        # create a mapping from category to index
+        category_to_index = {category: i for i, category in enumerate(categories)}
+        dictionary = torch.zeros((num_classes, 2))
+        for index, row in df.iterrows():
+            key = row.iloc[0]
+            value = [row.iloc[1], row.iloc[2]]
+            if key in categories:
+                (
+                    dictionary[category_to_index[key], 0],
+                    dictionary[category_to_index[key], 1],
+                ) = np.radians(row.iloc[1]), np.radians(row.iloc[2])
+        # Save the PyTorch tensor to a .pt file
+        output_file = join(save_path, "index_to_gps_" + class_name + ".pt")
+        torch.save(dictionary, output_file)
+    train = pd.read_csv(join(data_path, "train.csv"), low_memory=False).fillna(
+        "NaN"
+    )
+    u = train.groupby("unique_city").sample(n=1)
+    country_df = (
+        u.pivot(index="unique_city", columns="unique_country", values="unique_city")
+        .notna()
+        .astype(int)
+        .fillna(0)
+    )
+    country_to_idx = {
+        category: i for i, category in enumerate(list(country_df.columns))
+    }
+    city_country_matrix = torch.tensor(country_df.values) / 1.0
+    region_df = (
+        u.pivot(index="unique_city", columns="unique_region", values="unique_city")
+        .notna()
+        .astype(int)
+        .fillna(0)
+    )
+    region_to_idx = {category: i for i, category in enumerate(list(region_df.columns))}
+    city_region_matrix = torch.tensor(region_df.values) / 1.0
+    country_df = (
+        u.pivot(index="unique_city", columns="unique_country", values="unique_city")
+        .notna()
+        .astype(int)
+        .fillna(0)
+    )
+    country_to_idx = {
+        category: i for i, category in enumerate(list(country_df.columns))
+    }
+    city_country_matrix = torch.tensor(country_df.values) / 1.0
+    output_file = join(save_path, "city_to_country.pt")
+    torch.save(city_country_matrix, output_file)
+    output_file = join(save_path, "country_to_idx.pt")
+    torch.save(country_to_idx, output_file)
+    region_df = (
+        u.pivot(index="unique_city", columns="unique_region", values="unique_city")
+        .notna()
+        .astype(int)
+        .fillna(0)
+    )
+    region_to_idx = {category: i for i, category in enumerate(list(region_df.columns))}
+    city_region_matrix = torch.tensor(region_df.values) / 1.0
+    output_file = join(save_path, "city_to_region.pt")
+    torch.save(city_region_matrix, output_file)
+    output_file = join(save_path, "region_to_idx.pt")
+    torch.save(region_to_idx, output_file)
+    area_df = (
+        u.pivot(index="unique_city", columns="unique_sub-region", values="unique_city")
+        .notna()
+        .astype(int)
+        .fillna(0)
+    )
+    area_to_idx = {category: i for i, category in enumerate(list(area_df.columns))}
+    city_area_matrix = torch.tensor(area_df.values) / 1.0
+    output_file = join(save_path, "city_to_area.pt")
+    torch.save(city_area_matrix, output_file)
+    output_file = join(save_path, "area_to_idx.pt")
+    torch.save(area_to_idx, output_file)
+    gt = torch.load(join(save_path, f"id_to_quad_{cfg.depth}_{cfg.do_split}.pt"))
+    matrixes = []
+    dicts = []
+    for i in range(1, cfg.depth):
+        # Step 2: Truncate strings to size cfg.depth - 1
+        l = [s[: cfg.depth - i] if len(s) >= cfg.depth + 1 - i else s for s in gt]
+        # Step 3: Get unique values in the modified list l
+        h = list(set(l))
+        # Step 4: Create a dictionary to map unique values to their index
+        h_dict = {value: index for index, value in enumerate(h)}
+        dicts.append(h_dict)
+        # Step 5: Initialize a torch matrix with zeros
+        matrix = torch.zeros((len(gt), len(h)))
+        # Step 6: Fill in the matrix with 1s based on the mapping
+        for h in range(len(gt)):
+            j = h_dict[l[h]]
+            matrix[h, j] = 1
+        matrixes.append(matrix)
+    output_file = join(save_path, "quadtree_matrixes.pt")
+    torch.save(matrixes, output_file)
+    output_file = join(save_path, "quadtree_dicts.pt")
+    torch.save(dicts, output_file)
+if __name__ == "__main__":
+    main()

scripts/preprocessing/train-val-split.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import os
+from os.path import dirname, join
+import pandas as pd
+from sklearn.model_selection import train_test_split
+if __name__ == "__main__":
+    data_path = join(dirname(dirname(__file__)), "datasets", "osv5m")
+    train_fp = join(data_path, f"train.csv")
+    val_fp = join(data_path, f"val.csv")
+    os.makedirs(dirname(val_fp), exist_ok=True)
+    df = pd.read_csv(train_fp, dtype={"category": str, "country": str, "city": str})
+    df_train, df_val = train_test_split(df, stratify=df["category"], test_size=0.1)
+    df_train.to_csv(train_fp, index=False)
+    df_val.to_csv(val_fp, index=False)

scripts/retrieval/backbone.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from os.path import join
+import PIL
+import numpy as np
+import pandas as pd
+import reverse_geocoder
+from torch.utils.data import Dataset
+class GeoDataset(Dataset):
+    def __init__(self, image_folder, annotation_file, transformation, tag="image_id"):
+        self.image_folder = image_folder
+        gt = pd.read_csv(annotation_file, dtype={tag: str})
+        files = set([f.replace(".jpg", "") for f in os.listdir(image_folder)])
+        gt = gt[gt[tag].isin(files)]
+        self.processor = transformation
+        self.gt = [
+            (g[1][tag], g[1]["latitude"], g[1]["longitude"]) for g in gt.iterrows()
+        ]
+        self.tag = tag
+    def fid(self, i):
+        return self.gt[i][0]
+    def latlon(self, i):
+        return self.gt[i][1]
+    def __len__(self):
+        return len(self.gt)
+    def __getitem__(self, idx):
+        fp = join(self.image_folder, self.gt[idx][0] + ".jpg")
+        return self.processor(self, idx, fp)
+def load_plonk(path):
+    import hydra
+    from hydra import initialize, compose
+    from models.module import Geolocalizer
+    from omegaconf import OmegaConf, open_dict
+    from os.path import join
+    from hydra.utils import instantiate
+    # load config from path
+    # make path relative to current_dir
+    with initialize(version_base=None, config_path="osv5m__best_model"):
+        cfg = compose(config_name="config", overrides=[])
+    checkpoint = torch.load(join(path, "last.ckpt"))
+    del checkpoint["state_dict"][
+        "model.backbone.clip.vision_model.embeddings.position_ids"
+    ]
+    torch.save(checkpoint, join(path, "last2.ckpt"))
+    with open_dict(cfg):
+        cfg.checkpoint = join(path, "last2.ckpt")
+    cfg.num_classes = 11399
+    cfg.model.network.mid.instance.final_dim = cfg.num_classes * 3
+    cfg.model.network.head.final_dim = cfg.num_classes * 3
+    cfg.model.network.head.instance.quadtree_path = join(path, "quadtree_10_1000.csv")
+    cfg.dataset.train_dataset.path = ""
+    cfg.dataset.val_dataset.path = ""
+    cfg.dataset.test_dataset.path = ""
+    cfg.logger.save_dir = ""
+    cfg.data_dir = ""
+    cfg.root_dir = ""
+    cfg.mode = "test"
+    cfg.model.network.backbone.instance.path = (
+        "laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K"
+    )
+    transform = instantiate(cfg.dataset.test_transform)
+    model = Geolocalizer.load_from_checkpoint(join(path, "last2.ckpt"), cfg=cfg.model)
+    os.remove(join(path, "last2.ckpt"))
+    @torch.no_grad()
+    def inference(model, x):
+        return x[0], model.model.backbone({"img": x[1].to(model.device)})[:, 0, :].cpu()
+    def collate_fn(batch):
+        return [b[0] for b in batch], torch.stack([b[1] for b in batch], dim=0)
+    def operate(self, idx, fp):
+        proc = self.processor(PIL.Image.open(fp))
+        return self.gt[idx][0], proc
+    return model, operate, inference, collate_fn
+def load_clip(which):
+    # We evaluate on:
+    # - "openai/clip-vit-base-patch32"
+    # - "openai/clip-vit-large-patch14-336"
+    # - "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
+    # - "laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K"
+    # - "geolocal/StreetCLIP"
+    from transformers import CLIPProcessor, CLIPModel
+    @torch.no_grad()
+    def inference(model, img):
+        image_ids = img.data.pop("image_id")
+        image_input = img.to(model.device)
+        image_input["pixel_values"] = image_input["pixel_values"].squeeze(1)
+        features = model.get_image_features(**image_input)
+        features /= features.norm(dim=-1, keepdim=True)
+        return image_ids, features.cpu()
+    processor = CLIPProcessor.from_pretrained(which)
+    def operate(self, idx, fp):
+        pil = PIL.Image.open(fp)
+        proc = processor(images=pil, return_tensors="pt")
+        proc["image_id"] = self.gt[idx][0]
+        return proc
+    return CLIPModel.from_pretrained(which), operate, inference, None
+def load_dino(which):
+    # We evaluate on:
+    # - 'facebook/dinov2-large'
+    from transformers import AutoImageProcessor, AutoModel
+    @torch.no_grad()
+    def inference(model, img):
+        image_ids = img.data.pop("image_id")
+        image_input = img.to(model.device)
+        image_input["pixel_values"] = image_input["pixel_values"].squeeze(1)
+        features = model(**image_input).last_hidden_state[:, 0]
+        features /= features.norm(dim=-1, keepdim=True)
+        return image_ids, features.cpu()
+    processor = AutoImageProcessor.from_pretrained("facebook/dinov2-large")
+    def operate(self, idx, fp):
+        pil = PIL.Image.open(fp)
+        proc = processor(images=pil, return_tensors="pt")
+        proc["image_id"] = self.gt[idx][0]
+        return proc
+    return AutoModel.from_pretrained("facebook/dinov2-large"), operate, inference, None
+def get_backbone(name):
+    if os.path.isdir(name):
+        return load_plonk(name)
+    elif "clip" in name.lower():
+        return load_clip(name)
+    elif "dino" in name.lower():
+        return load_dino(name)

scripts/retrieval/retrieval.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import os
+import sys
+import PIL
+import json
+import torch
+import numpy as np
+import pandas as pd
+import operator
+from PIL import Image
+from itertools import cycle
+from tqdm.auto import tqdm, trange
+from os.path import join
+from PIL import Image
+from tqdm import tqdm
+from torch.utils.data import Dataset, DataLoader
+from torch.nn import functional as F
+from backbone import get_backbone
+from utils import haversine, get_filenames, get_match_values, compute_print_accuracy
+def compute_features(path, data_dir, csv_file, tag, args):
+    data = GeoDataset(data_dir, csv_file, tag=tag)
+    if not os.path.isdir(test_features_dir) or len(
+        os.listdir(test_features_dir)
+    ) != len(data):
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model, transform, inference, collate_fn = get_backbone(args.name)
+        dataloader = DataLoader(
+            data,
+            batch_size=args.batch_size,
+            shuffle=False,
+            num_workers=8,
+            collate_fn=collate_fn,
+        )
+        model = model.to(device)
+        os.makedirs(path, exist_ok=True)
+        for i, x in enumerate(tqdm(dataloader)):
+            image_ids, features = inference(model, x)
+            # save features as numpy array
+            for j, image_id in zip(range(features.shape[0]), image_ids):
+                np.save(join(path, f"{image_id}.npy"), features[j].unsqueeze(0).numpy())
+def get_results(args, train_test):
+    import joblib
+    if not os.path.isfile(join(args.features_parent, ".cache", "1-nn.pkl")):
+        import faiss, glob, bisect
+        # import sys; sys.exit(0)
+        indexes = [
+            get_filenames(idx) for idx in tqdm(range(1, 6), desc="Loading indexes...")
+        ]
+        train_gt = pd.read_csv(
+            join(args.data_parent, args.annotation_file), dtype={"image_id": str}
+        )[["image_id", "latitude", "longitude"]]
+        test_gt = pd.read_csv(test_path_csv, dtype={"id": str})[
+            ["id", "latitude", "longitude"]
+        ]
+        # make a map between image_id and lat/lon
+        train_gt = {
+            g[1]["image_id"]: np.array([g[1]["latitude"], g[1]["longitude"]])
+            for g in tqdm(
+                train_gt.iterrows(), total=len(train_gt), desc="Loading train_gt"
+            )
+        }
+        test_gt = {
+            g[1]["id"]: np.array([g[1]["latitude"], g[1]["longitude"]])
+            for g in tqdm(
+                test_gt.iterrows(), total=len(test_gt), desc="Loading test_gt"
+            )
+        }
+        train_test = []
+        os.makedirs(join(args.features_parent, ".cache"), exist_ok=True)
+        for f in tqdm(os.listdir(test_features_dir)):
+            query_vector = np.load(join(test_features_dir, f))
+            neighbors = []
+            for index, ids in indexes:
+                distances, indices = index.search(query_vector, 1)
+                distances, indices = np.squeeze(distances), np.squeeze(indices)
+                bisect.insort(
+                    neighbors, (ids[indices], distances), key=operator.itemgetter(1)
+                )
+            neighbors = list(reversed(neighbors))
+            train_gps = train_gt[neighbors[0][0].replace(".npy", "")][None, :]
+            test_gps = test_gt[f.replace(".npy", "")][None, :]
+            train_test.append((train_gps, test_gps))
+        joblib.dump(train_test, join(args.features_parent, ".cache", "1-nn.pkl"))
+    else:
+        train_test = joblib.load(join(args.features_parent, ".cache", "1-nn.pkl"))
+    return train_test
+if __name__ == "__main__":
+    # make a train/eval argparser
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--id", type=int, default=1)  # maybe need to remove/refactor
+    parser.add_argument("--batch_size", type=int, default=512)
+    parser.add_argument(
+        "--annotation_file", type=str, required=False, default="train.csv"
+    )
+    parser.add_argument("--name", type=str, default="openai/clip-vit-base-patch32")
+    parser.add_argument("--features_parent", type=str, default="faiss/")
+    parser.add_argument("--data_parent", type=str, default="data/")
+    parser.add_argument("--test", action="store_true")
+    args = parser.parse_args()
+    args.features_parent = join(args.features_parent, args.name)
+    if args.test:
+        csv_file = join(args.data_parent, "test.csv")
+        data_dir = join(args.data_parent, "test")
+        path = join(args.features_parent, "features-test")
+        model = get_backbone(args.name)
+        compute_features(path, data_dir, csv_file, tag="id", args=args)
+        train_test = get_results(args, train_test)
+        from collections import Counter
+        N, pos = Counter(), Counter()
+        for train_gps, test_gps in tqdm(train_test, desc="Computing accuracy..."):
+            get_match_values(train_gps, test_gps, N, pos)
+        for train_gps, test_gps in tqdm(train_test, desc="Computing haversine..."):
+            haversine(train_gps, test_gps, N, pos)
+        compute_print_accuracy(N, pos)
+    else:
+        csv_file = join(args.data_parent, args.annotation_file)
+        path = join(args.features_parent, f"features-{args.id}")
+        data_dir = join(args.data_parent, f"images-{args.id}", "train")
+        compute_features(path, data_dir, csv_file, tag="image_id", args=args)

scripts/retrieval/street-clip-zero-shot.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import traceback
+import os
+import sys
+import PIL
+import json
+import torch
+import numpy as np
+import pandas as pd
+import operator
+import joblib
+import reverse_geocoder
+from PIL import Image
+from itertools import cycle
+from tqdm.auto import tqdm, trange
+from os.path import join
+from PIL import Image
+from tqdm import tqdm
+from collections import Counter
+from transformers import CLIPProcessor, CLIPModel
+from torch.utils.data import Dataset, DataLoader
+from torch.nn import functional as F
+from utils import haversine
+class GeoDataset(Dataset):
+    def __init__(self, image_folder, annotation_file, tag="image_id"):
+        self.image_folder = image_folder
+        gt = pd.read_csv(annotation_file, dtype={tag: str})
+        files = set([f.replace(".jpg", "") for f in os.listdir(image_folder)])
+        gt = gt[gt[tag].isin(files)]
+        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+        self.gt = [
+            (g[1][tag], g[1]["latitude"], g[1]["longitude"]) for g in gt.iterrows()
+        ]
+        self.tag = tag
+    def fid(self, i):
+        return self.gt[i][0]
+    def latlon(self, i):
+        return self.gt[i][1]
+    def __len__(self):
+        return len(self.gt)
+    def __getitem__(self, idx):
+        fp = join(self.image_folder, self.gt[idx][0] + ".jpg")
+        pil = PIL.Image.open(fp)
+        proc = self.processor(images=pil, return_tensors="pt")
+        proc["image_id"] = self.gt[idx][0]
+        return proc
+@torch.no_grad()
+def compute_features_clip(img, model):
+    image_ids = img.data.pop("image_id")
+    image_input = img.to(model.device)
+    image_input["pixel_values"] = image_input["pixel_values"].squeeze(1)
+    features = model.get_image_features(**image_input)
+    features /= features.norm(dim=-1, keepdim=True)
+    return image_ids, features.cpu()
+def get_prompts(country, region, sub_region, city):
+    a = country if country != "" else None
+    b, c, d = None, None, None
+    if a is not None:
+        b = country + ", " + region if region != "" else None
+        if b is not None:
+            c = (
+                country + ", " + region + ", " + sub_region
+                if sub_region != ""
+                else None
+            )
+            d = (
+                country + ", " + region + ", " + sub_region + ", " + city
+                if city != ""
+                else None
+            )
+    return a, b, c, d
+if __name__ == "__main__":
+    # make a train/eval argparser
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--annotation_file", type=str, required=False, default="train.csv"
+    )
+    parser.add_argument(
+        "--features_parent", type=str, default="/home/isig/gaia-v2/faiss/street-clip"
+    )
+    parser.add_argument(
+        "--data_parent", type=str, default="/home/isig/gaia-v2/loic-data/"
+    )
+    args = parser.parse_args()
+    test_path_csv = join(args.data_parent, "test.csv")
+    test_image_dir = join(args.data_parent, "test")
+    save_path = join(args.features_parent, "indexes/test.index")
+    test_features_dir = join(args.features_parent, "indexes/features-test")
+    processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = CLIPModel.from_pretrained("geolocal/StreetCLIP").to(device)
+    @torch.no_grad()
+    def compute_text_features_clip(text):
+        text_pt = processor(text=text, return_tensors="pt").to(device)
+        features = model.get_text_features(**text_pt)
+        features /= features.norm(dim=-1, keepdim=True)
+        return features.cpu().squeeze(0).numpy()
+    import country_converter as coco
+    if not os.path.isfile("text_street-clip-features.pkl"):
+        if not os.path.isfile("rg_cities1000.csv"):
+            os.system(
+                "wget https://raw.githubusercontent.com/thampiman/reverse-geocoder/master/reverse_geocoder/rg_cities1000.csv"
+            )
+        cities = pd.read_csv("rg_cities1000.csv")
+        cities = cities[["lat", "lon", "name", "admin1", "admin2", "cc"]]
+        reprs = {0: {}, 1: {}, 2: {}, 3: {}}
+        for line in tqdm(
+            cities.iterrows(), total=len(cities), desc="Creating hierarchy"
+        ):
+            lat, lon, city, region, sub_region, cc = line[1]
+            try:
+                city, region, sub_region, cc = [
+                    ("" if pd.isna(x) else x)
+                    for x in [
+                        city,
+                        region,
+                        sub_region,
+                        coco.convert(cc, to="name_short"),
+                    ]
+                ]
+                a, b, c, d = get_prompts(cc, region, sub_region, city)
+                if a is not None:
+                    if a not in reprs[0]:
+                        reprs[0][a] = {
+                            "gps": {(lat, lon)},
+                            "embedding": compute_text_features_clip(a),
+                        }
+                    else:
+                        reprs[0][a]["gps"].add((lat, lon))
+                if b is not None:
+                    if b not in reprs[1]:
+                        reprs[1][b] = {
+                            "gps": {(lat, lon)},
+                            "embedding": compute_text_features_clip(b),
+                        }
+                    else:
+                        reprs[1][b]["gps"].add((lat, lon))
+                if c is not None:
+                    if c not in reprs[2]:
+                        reprs[2][c] = {
+                            "gps": {(lat, lon)},
+                            "embedding": compute_text_features_clip(c),
+                        }
+                    else:
+                        reprs[2][c]["gps"].add((lat, lon))
+                if d is not None:
+                    if d not in reprs[3]:
+                        reprs[3][d] = {
+                            "gps": {(lat, lon)},
+                            "embedding": compute_text_features_clip(
+                                d.replace(", , ", ", ")
+                            ),
+                        }
+                    else:
+                        reprs[3][d]["gps"].add((lat, lon))
+            except Exception as e:
+                # print stack trace into file log.txt
+                with open("log.txt", "a") as f:
+                    print(traceback.format_exc(), file=f)
+        reprs[-1] = {"": {"gps": (0, 0), "embedding": compute_text_features_clip("")}}
+        # compute mean for gps of all 'a' and 'b' and 'c' and 'd'
+        for i in range(4):
+            for k in reprs[i].keys():
+                reprs[i][k]["gps"] = tuple(
+                    np.array(list(reprs[i][k]["gps"])).mean(axis=0).tolist()
+                )
+        joblib.dump(reprs, "text_street-clip-features.pkl")
+    else:
+        reprs = joblib.load("text_street-clip-features.pkl")
+    def get_loc(x):
+        location = reverse_geocoder.search(x[0].tolist())[0]
+        country = coco.convert(names=location["cc"], to="name_short")
+        region = location.get("admin1", "")
+        sub_region = location.get("admin2", "")
+        city = location.get("name", "")
+        a, b, c, d = get_prompts(country, region, sub_region, city)
+        return a, b, c, d
+    def matches(embed, repr, control, gt, sw=None):
+        first_max = max(
+            (
+                (k, embed.dot(v["embedding"]))
+                for k, v in repr.items()
+                if sw is None or k.startswith(sw)
+            ),
+            key=operator.itemgetter(1),
+        )
+        if first_max[1] > embed.dot(control["embedding"]):
+            return repr[first_max[0]]["gps"], gt == first_max[0]
+        else:
+            return control["gps"], False
+    def get_match_values(gt, embed, N, pos):
+        xa, xb, xc, xd = get_loc(gt)
+        if xa is not None:
+            N["country"] += 1
+            gps, flag = matches(embed, reprs[0], reprs[-1][""], xa)
+            if flag:
+                pos["country"] += 1
+                if xb is not None:
+                    N["region"] += 1
+                    gps, flag = matches(embed, reprs[1], reprs[0][xa], xb, sw=xa)
+                    if flag:
+                        pos["region"] += 1
+                        if xc is not None:
+                            N["sub-region"] += 1
+                            gps, flag = matches(
+                                embed, reprs[2], reprs[1][xb], xc, sw=xb
+                            )
+                            if flag:
+                                pos["sub-region"] += 1
+                                if xd is not None:
+                                    N["city"] += 1
+                                    gps, flag = matches(
+                                        embed, reprs[3], reprs[2][xc], xd, sw=xc
+                                    )
+                                    if flag:
+                                        pos["city"] += 1
+                        else:
+                            if xd is not None:
+                                N["city"] += 1
+                                gps, flag = matches(
+                                    embed, reprs[3], reprs[1][xb], xd, sw=xb + ", "
+                                )
+                                if flag:
+                                    pos["city"] += 1
+        haversine(np.array(gps)[None, :], np.array(gt), N, pos)
+    def compute_print_accuracy(N, pos):
+        for k in N.keys():
+            pos[k] /= N[k]
+        # pretty-print accuracy in percentage with 2 floating points
+        print(
+            f'Accuracy: {pos["country"]*100.0:.2f} (country), {pos["region"]*100.0:.2f} (region), {pos["sub-region"]*100.0:.2f} (sub-region), {pos["city"]*100.0:.2f} (city)'
+        )
+        print(
+            f'Haversine: {pos["haversine"]:.2f} (haversine), {pos["geoguessr"]:.2f} (geoguessr)'
+        )
+    import joblib
+    data = GeoDataset(test_image_dir, test_path_csv, tag="id")
+    test_gt = pd.read_csv(test_path_csv, dtype={"id": str})[
+        ["id", "latitude", "longitude"]
+    ]
+    test_gt = {
+        g[1]["id"]: np.array([g[1]["latitude"], g[1]["longitude"]])
+        for g in tqdm(test_gt.iterrows(), total=len(test_gt), desc="Loading test_gt")
+    }
+    with open("/home/isig/gaia-v2/loic/plonk/test3_indices.txt", "r") as f:
+        # read lines
+        lines = f.readlines()
+        # remove whitespace characters like `\n` at the end of each line
+        lines = [l.strip() for l in lines]
+        # and convert to set
+        lines = set(lines)
+    train_test = []
+    N, pos = Counter(), Counter()
+    for f in tqdm(os.listdir(test_features_dir)):
+        if f.replace(".npy", "") not in lines:
+            continue
+        query_vector = np.squeeze(np.load(join(test_features_dir, f)))
+        test_gps = test_gt[f.replace(".npy", "")][None, :]
+        get_match_values(test_gps, query_vector, N, pos)
+    compute_print_accuracy(N, pos)

scripts/retrieval/utils.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import numpy as np
+import reverse_geocoder
+def get_loc(x):
+    location = reverse_geocoder.search(x[0].tolist())[0]
+    country = location.get("cc", "")
+    region = location.get("admin1", "")
+    sub_region = location.get("admin2", "")
+    city = location.get("name", "")
+    a = country if country != "" else None
+    b, c, d = None, None, None
+    if a is not None:
+        b = country + "," + region if region != "" else None
+        if b is not None:
+            c = country + "," + region + "," + sub_region if sub_region != "" else None
+            d = (
+                country + "," + region + "," + sub_region + "," + city
+                if city != ""
+                else None
+            )
+    return a, b, c, d
+def get_match_values(pred, gt, N, pos):
+    xa, xb, xc, xd = get_loc(gt)
+    ya, yb, yc, yd = get_loc(pred)
+    if xa is not None:
+        N["country"] += 1
+        if xa == ya:
+            pos["country"] += 1
+        if xb is not None:
+            N["region"] += 1
+            if xb == yb:
+                pos["region"] += 1
+            if xc is not None:
+                N["sub-region"] += 1
+                if xc == yc:
+                    pos["sub-region"] += 1
+            if xd is not None:
+                N["city"] += 1
+                if xd == yd:
+                    pos["city"] += 1
+def compute_print_accuracy(N, pos):
+    for k in N.keys():
+        pos[k] /= N[k]
+    # pretty-print accuracy in percentage with 2 floating points
+    print(
+        f'Accuracy: {pos["country"]*100.0:.2f} (country), {pos["region"]*100.0:.2f} (region), {pos["sub-region"]*100.0:.2f} (sub-region), {pos["city"]*100.0:.2f} (city)'
+    )
+    print(
+        f'Haversine: {pos["haversine"]:.2f} (haversine), {pos["geoguessr"]:.2f} (geoguessr)'
+    )
+def get_filenames(idx):
+    from autofaiss import build_index
+    path = join(args.features_parent, f"features-{idx}/")
+    files = [f for f in os.listdir(path)]
+    full_files = [join(path, f) for f in os.listdir(path)]
+    index = build_index(
+        embeddings=np.concatenate([np.load(f) for f in tqdm(full_files)], axis=0),
+        nb_cores=12,
+        save_on_disk=False,
+    )[0]
+    return index, files
+def normalize(x):
+    lat, lon = x[:, 0], x[:, 1]
+    """Used to put all lat lon inside ±90 and ±180."""
+    lat = (lat + 90) % 360 - 90
+    if lat > 90:
+        lat = 180 - lat
+        lon += 180
+    lon = (lon + 180) % 360 - 180
+    return np.stack([lat, lon], axis=1)
+def haversine(pred, gt, N, p):
+    # expects inputs to be np arrays in (lat, lon) format as radians
+    # N x 2
+    pred = np.radians(normalize(pred))
+    gt = np.radians(normalize(gt))
+    # calculate the difference in latitude and longitude between the predicted and ground truth points
+    lat_diff = pred[:, 0] - gt[:, 0]
+    lon_diff = pred[:, 1] - gt[:, 1]
+    # calculate the haversine formula components
+    lhs = np.sin(lat_diff / 2) ** 2
+    rhs = np.cos(pred[:, 0]) * np.cos(gt[:, 0]) * np.sin(lon_diff / 2) ** 2
+    a = lhs + rhs
+    # calculate the final distance using the haversine formula
+    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
+    haversine_distance = 6371 * c[0]
+    geoguessr_sum = 5000 * np.exp(-haversine_distance / 1492.7)
+    N["geoguessr"] += 1
+    p["geoguessr"] += geoguessr_sum
+    N["haversine"] += 1
+    p["haversine"] += haversine_distance

utils/__init__.py ADDED Viewed

File without changes

utils/image_processing.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+import torch.nn.functional as F
+import torchvision
+def remap_image_torch(image):
+    image_torch = ((image + 1) / 2.0) * 255.0
+    image_torch = torch.clip(image_torch, 0, 255).to(torch.uint8)
+    return image_torch
+class CenterCrop(torch.nn.Module):
+    """Crops the given image at the center. Allows to crop to the maximum possible size.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        ratio (str): Desired output ratio of the crop that will do the maximum possible crop with the given ratio.
+    """
+    def __init__(self, size=None, ratio="1:1"):
+        super().__init__()
+        self.size = size
+        self.ratio = ratio
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+        Returns:
+            PIL Image or Tensor: Cropped image.
+        """
+        if self.size is None:
+            if isinstance(img, torch.Tensor):
+                h, w = img.shape[-2:]
+            else:
+                w, h = img.size
+            ratio = self.ratio.split(":")
+            ratio = float(ratio[0]) / float(ratio[1])
+            ratioed_w = int(h * ratio)
+            ratioed_h = int(w / ratio)
+            if w >= h:
+                if ratioed_h <= h:
+                    size = (ratioed_h, w)
+                else:
+                    size = (h, ratioed_w)
+            else:
+                if ratioed_w <= w:
+                    size = (h, ratioed_w)
+                else:
+                    size = (ratioed_h, w)
+        else:
+            size = self.size
+        return torchvision.transforms.functional.center_crop(img, size)
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size})"

utils/lr_scheduler.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import math
+class WarmupLR:
+    """
+    Linear Warmup learning rate scheduler. After warmup, learning rate is
+    constant.
+    Args:
+        optimizer (torch.optim.Optimizer): optimizer
+        warmup_steps (int): number of warmup steps
+    """
+    def __init__(self, optimizer, warmup_steps):
+        self.optimizer = optimizer
+        self.warmup_steps = warmup_steps
+        self.base_lr = None
+    def get_lr(self, lr, step):
+        return lr * min(step / max(self.warmup_steps, 1), 1.0)
+    def step(self, step):
+        if self.base_lr is None:
+            self.base_lr = [
+                param_group["lr"] for param_group in self.optimizer.param_groups
+            ]
+        for param_group, base_lr_group in zip(
+            self.optimizer.param_groups, self.base_lr
+        ):
+            param_group["lr"] = self.get_lr(base_lr_group, step)
+    def state_dict(self):
+        return {
+            key: value for key, value in self.__dict__.items() if key != "optimizer"
+        }
+    def load_state_dict(self, state_dict):
+        self.__dict__.update(state_dict)
+class WarmupCosineDecayLR:
+    """
+    Linear Warmup learning rate scheduler. After warmup, learning rate is
+    constant.
+    After warmup, learning rate follows a cosine decay.
+    Args:
+        optimizer (torch.optim.Optimizer): optimizer
+        warmup_steps (int): number of warmup steps
+        total_steps (int): total number of steps
+        rate (float): cosine decay rate
+    """
+    def __init__(self, optimizer, warmup_steps, total_steps, rate=1.0):
+        self.optimizer = optimizer
+        self.warmup_steps = warmup_steps
+        self.base_lr = None
+        self.total_steps = total_steps
+        self.rate = rate
+    def get_lr(self, lr, step):
+        if step < self.warmup_steps:
+            return lr * min(step / max(self.warmup_steps, 1), 1.0)
+        else:
+            return (
+                0.5
+                * lr
+                * (
+                    1
+                    + math.cos(
+                        self.rate
+                        * math.pi
+                        * (step - self.warmup_steps)
+                        / (self.total_steps - self.warmup_steps)
+                    )
+                )
+            )
+    def step(self, step):
+        if self.base_lr is None:
+            self.base_lr = [
+                param_group["lr"] for param_group in self.optimizer.param_groups
+            ]
+        for param_group, base_lr_group in zip(
+            self.optimizer.param_groups, self.base_lr
+        ):
+            param_group["lr"] = self.get_lr(base_lr_group, step)
+    def state_dict(self):
+        return {
+            key: value for key, value in self.__dict__.items() if key != "optimizer"
+        }
+    def load_state_dict(self, state_dict):
+        self.__dict__.update(state_dict)

utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,14 @@

+def print_trainable_parameters(model):
+    """
+    Prints the number and percentage of trainable parameters in the model.
+    Useful for tracking % parameters trained for LoRA.
+    """
+    trainable_params = 0
+    all_param = 0
+    for _, param in model.named_parameters():
+        all_param += param.numel()
+        if param.requires_grad:
+            trainable_params += param.numel()
+    print(
+        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
+    )

utils/quadtree_10_1000.csv ADDED Viewed

The diff for this file is too large to render. See raw diff