nvidia
/

RADIO

mranzinger commited on May 4

Commit

19f9016

•

1 Parent(s): aa1a477

Fix double conditioning

Files changed (3) hide show

config.json CHANGED Viewed

@@ -347,6 +347,7 @@
     "AutoConfig": "hf_model.RADIOConfig",
     "AutoModel": "hf_model.RADIOModel"
   },
   "max_resolution": 2048,
   "patch_size": 16,
   "preferred_resolution": [

     "AutoConfig": "hf_model.RADIOConfig",
     "AutoModel": "hf_model.RADIOModel"
   },
+  "external_conditioner": false,
   "max_resolution": 2048,
   "patch_size": 16,
   "preferred_resolution": [

hf_model.py CHANGED Viewed

@@ -45,6 +45,7 @@ class RADIOConfig(PretrainedConfig):
         preferred_resolution: Optional[Resolution] = None,
         adaptor_names: Union[str, List[str]] = None,
         vitdet_window_size: Optional[int] = None,
         **kwargs,
     ):
         self.args = args
@@ -63,6 +64,7 @@ class RADIOConfig(PretrainedConfig):
         )
         self.adaptor_names = adaptor_names
         self.vitdet_window_size = vitdet_window_size
         super().__init__(**kwargs)
@@ -75,7 +77,7 @@ class RADIOModel(PreTrainedModel):
     config_class = RADIOConfig
-    def __init__(self, config):
         super().__init__(config)
         RADIOArgs = namedtuple("RADIOArgs", config.args.keys())
@@ -115,6 +117,7 @@ class RADIOModel(PreTrainedModel):
             preferred_resolution=config.preferred_resolution,
             adaptors=adaptors,
         )
     @property
     def adaptors(self) -> nn.ModuleDict:

         preferred_resolution: Optional[Resolution] = None,
         adaptor_names: Union[str, List[str]] = None,
         vitdet_window_size: Optional[int] = None,
+        external_conditioner: Optional[bool] = False,
         **kwargs,
     ):
         self.args = args
         )
         self.adaptor_names = adaptor_names
         self.vitdet_window_size = vitdet_window_size
+        self.external_conditioner = external_conditioner
         super().__init__(**kwargs)
     config_class = RADIOConfig
+    def __init__(self, config: RADIOConfig):
         super().__init__(config)
         RADIOArgs = namedtuple("RADIOArgs", config.args.keys())
             preferred_resolution=config.preferred_resolution,
             adaptors=adaptors,
         )
+        self.radio_model._external_conditioner = config.external_conditioner
     @property
     def adaptors(self) -> nn.ModuleDict:

radio_model.py CHANGED Viewed

@@ -51,6 +51,12 @@ class RADIOModel(nn.Module):
         self._patch_size = patch_size
         self._max_resolution = max_resolution
         self._window_size = window_size
         adaptors = adaptors or dict()
         self.adaptors = nn.ModuleDict(adaptors)
@@ -113,7 +119,8 @@ class RADIOModel(nn.Module):
                              '`self.get_nearest_supported_resolution(<height>, <width>) is provided as a convenience API. '
                              f'Input: {x.shape[-2:]}, Nearest: {self.get_nearest_supported_resolution(*x.shape[-2:])}')
-        x = self.input_conditioner(x)
         y = self.model.forward_features(x)
         if isinstance(self.model, VisionTransformer):

         self._patch_size = patch_size
         self._max_resolution = max_resolution
         self._window_size = window_size
+        # This is a hack workaround for huggingface, since their
+        # data prep is annoying and complicated. If set to true,
+        # then will not call `self.input_conditioner` on the
+        # input tensor. This will be set in `hf_model.RADIOModel`
+        # where appropriate.
+        self._external_conditioner = False
         adaptors = adaptors or dict()
         self.adaptors = nn.ModuleDict(adaptors)
                              '`self.get_nearest_supported_resolution(<height>, <width>) is provided as a convenience API. '
                              f'Input: {x.shape[-2:]}, Nearest: {self.get_nearest_supported_resolution(*x.shape[-2:])}')
+        if not self._external_conditioner:
+            x = self.input_conditioner(x)
         y = self.model.forward_features(x)
         if isinstance(self.model, VisionTransformer):