jinlinyi commited on
Commit
5fa7303
1 Parent(s): d752c3b

new model for edina dataset

Browse files
.gitattributes CHANGED
@@ -41,3 +41,6 @@ assets/imgs/907px-Vincent_van_Gogh_-_De_slaapkamer_-_Google_Art_Project.jpg filt
41
  assets/imgs/AdobeStock_286429091.jpeg filter=lfs diff=lfs merge=lfs -text
42
  assets/imgs/AdobeStock_331358641.jpeg filter=lfs diff=lfs merge=lfs -text
43
  assets/imgs/ filter=lfs diff=lfs merge=lfs -text
 
 
 
 
41
  assets/imgs/AdobeStock_286429091.jpeg filter=lfs diff=lfs merge=lfs -text
42
  assets/imgs/AdobeStock_331358641.jpeg filter=lfs diff=lfs merge=lfs -text
43
  assets/imgs/ filter=lfs diff=lfs merge=lfs -text
44
+ models/paramnet_360cities_edina_rpfpp.pth filter=lfs diff=lfs merge=lfs -text
45
+ models/paramnet_360cities_edina_rpf.pth filter=lfs diff=lfs merge=lfs -text
46
+ assets/imgs/epic.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -34,6 +34,7 @@ description = """
34
  <p>Try our Gradio demo for Perspective Fields for single image camera calibration. You can click on one of the provided examples or upload your own image.</p>
35
  <h3>Available Models:</h3>
36
  <ol>
 
37
  <li><strong>PersNet-360Cities:</strong> PerspectiveNet trained on the 360Cities dataset. This model predicts perspective fields and is designed to be robust and generalize well to both indoor and outdoor images.</li>
38
  <li><strong>PersNet_Paramnet-GSV-uncentered:</strong> A combination of PerspectiveNet and ParamNet trained on the Google Street View (GSV) dataset. This model predicts camera Roll, Pitch, and Field of View (FoV), as well as the Principal Point location.</li>
39
  <li><strong>PersNet_Paramnet-GSV-centered:</strong> PerspectiveNet+ParamNet trained on the GSV dataset. This model assumes the principal point is at the center of the image and predicts camera Roll, Pitch, and FoV.</li>
@@ -153,6 +154,21 @@ print(examples)
153
 
154
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
155
  model_zoo = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  'PersNet-360Cities': {
157
  'weights': ['https://www.dropbox.com/s/czqrepqe7x70b7y/cvpr2023.pth'],
158
  'opts': ['MODEL.WEIGHTS', 'models/cvpr2023.pth', 'MODEL.DEVICE', device,],
 
34
  <p>Try our Gradio demo for Perspective Fields for single image camera calibration. You can click on one of the provided examples or upload your own image.</p>
35
  <h3>Available Models:</h3>
36
  <ol>
37
+ <li>[NEW!!!]<strong>Paramnet-360Cities-edina:</strong> PerspectiveNet+ParamNet trained on 360cities and edina dataset.</li>
38
  <li><strong>PersNet-360Cities:</strong> PerspectiveNet trained on the 360Cities dataset. This model predicts perspective fields and is designed to be robust and generalize well to both indoor and outdoor images.</li>
39
  <li><strong>PersNet_Paramnet-GSV-uncentered:</strong> A combination of PerspectiveNet and ParamNet trained on the Google Street View (GSV) dataset. This model predicts camera Roll, Pitch, and Field of View (FoV), as well as the Principal Point location.</li>
40
  <li><strong>PersNet_Paramnet-GSV-centered:</strong> PerspectiveNet+ParamNet trained on the GSV dataset. This model assumes the principal point is at the center of the image and predicts camera Roll, Pitch, and FoV.</li>
 
154
 
155
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
156
  model_zoo = {
157
+
158
+ 'Paramnet-360Cities-edina-centered': {
159
+ 'weights': ['https://www.dropbox.com/s/z2dja70bgy007su/paramnet_360cities_edina_rpf.pth'],
160
+ 'opts': ['MODEL.WEIGHTS', 'models/paramnet_360cities_edina_rpf.pth', 'MODEL.DEVICE', device,],
161
+ 'config_file': 'models/paramnet_360cities_edina_rpf.yaml',
162
+ 'param': True,
163
+ },
164
+
165
+ 'Paramnet-360Cities-edina-uncentered': {
166
+ 'weights': ['https://www.dropbox.com/s/nt29e1pi83mm1va/paramnet_360cities_edina_rpfpp.pth'],
167
+ 'opts': ['MODEL.WEIGHTS', 'models/paramnet_360cities_edina_rpfpp.pth', 'MODEL.DEVICE', device,],
168
+ 'config_file': 'models/paramnet_360cities_edina_rpfpp.yaml',
169
+ 'param': True,
170
+ },
171
+
172
  'PersNet-360Cities': {
173
  'weights': ['https://www.dropbox.com/s/czqrepqe7x70b7y/cvpr2023.pth'],
174
  'opts': ['MODEL.WEIGHTS', 'models/cvpr2023.pth', 'MODEL.DEVICE', device,],
assets/imgs/epic.png ADDED

Git LFS Details

  • SHA256: c2a42a05c6498aca8a92355bbe065f49865b0ab60a11e96b6e1458ac1e5d237a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.84 MB
models/paramnet_360cities_edina_rpf.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fe0285fe5d4592aec77e9ef57ac94273deb79bcb99f27d08bae68a2d1efc4a
3
+ size 837147876
models/paramnet_360cities_edina_rpf.yaml ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_FUN: uniform_vfov_crop_resize
6
+ AUGMENTATION_TYPE: geometry
7
+ FILTER_EMPTY_ANNOTATIONS: true
8
+ NO_GEOMETRY_AUG: false
9
+ NUM_WORKERS: 8
10
+ REPEAT_THRESHOLD: 0.0
11
+ RESIZE:
12
+ - 320
13
+ - 320
14
+ SAMPLER_TRAIN: TrainingSampler
15
+ DATASETS:
16
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
17
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
18
+ PROPOSAL_FILES_TEST: []
19
+ PROPOSAL_FILES_TRAIN: []
20
+ TEST:
21
+ - edina_test_crop_vfov
22
+ TRAIN:
23
+ - edina_train
24
+ - cities360_train
25
+ DEBUG_ON: false
26
+ GLOBAL:
27
+ HACK: 1.0
28
+ INPUT:
29
+ CROP:
30
+ ENABLED: false
31
+ SIZE:
32
+ - 0.9
33
+ - 0.9
34
+ TYPE: relative_range
35
+ FORMAT: BGR
36
+ MASK_FORMAT: polygon
37
+ MAX_SIZE_TEST: 1333
38
+ MAX_SIZE_TRAIN: 1333
39
+ MIN_SIZE_TEST: 800
40
+ MIN_SIZE_TRAIN:
41
+ - 800
42
+ MIN_SIZE_TRAIN_SAMPLING: choice
43
+ ONLINE_CROP: false
44
+ RANDOM_FLIP: horizontal
45
+ MODEL:
46
+ ANCHOR_GENERATOR:
47
+ ANGLES:
48
+ - - -90
49
+ - 0
50
+ - 90
51
+ ASPECT_RATIOS:
52
+ - - 0.5
53
+ - 1.0
54
+ - 2.0
55
+ NAME: DefaultAnchorGenerator
56
+ OFFSET: 0.0
57
+ SIZES:
58
+ - - 32
59
+ - 64
60
+ - 128
61
+ - 256
62
+ - 512
63
+ BACKBONE:
64
+ FREEZE_AT: 2
65
+ NAME: build_mit_backbone
66
+ CENTER_ON: false
67
+ DEVICE: cuda
68
+ FPN:
69
+ FUSE_TYPE: sum
70
+ IN_FEATURES: []
71
+ NORM: ''
72
+ OUT_CHANNELS: 256
73
+ FPN_CENTER_HEAD:
74
+ COMMON_STRIDE: 4
75
+ CONVS_DIM: 128
76
+ IGNORE_VALUE: 360
77
+ IN_FEATURES:
78
+ - p2
79
+ - p3
80
+ - p4
81
+ - p5
82
+ LOSS_WEIGHT: 1.0
83
+ NAME: CenterFPNHead
84
+ NORM: GN
85
+ NUM_CLASSES: 30
86
+ FPN_GRAVITY_HEAD:
87
+ COMMON_STRIDE: 4
88
+ CONVS_DIM: 128
89
+ IGNORE_VALUE: 360
90
+ IN_FEATURES:
91
+ - p2
92
+ - p3
93
+ - p4
94
+ - p5
95
+ LOSS_WEIGHT: 1.0
96
+ NAME: GravityFPNHead
97
+ NORM: GN
98
+ NUM_CLASSES: 361
99
+ FPN_HEADS:
100
+ NAME: StandardFPNHeads
101
+ FPN_LATITUDE_HEAD:
102
+ COMMON_STRIDE: 4
103
+ CONVS_DIM: 128
104
+ IGNORE_VALUE: -1
105
+ IN_FEATURES:
106
+ - p2
107
+ - p3
108
+ - p4
109
+ - p5
110
+ LOSS_WEIGHT: 1.0
111
+ NAME: LatitudeFPNHead
112
+ NORM: GN
113
+ NUM_CLASSES: 9
114
+ FREEZE: []
115
+ GRAVITY_DECODER:
116
+ IGNORE_VALUE: 72
117
+ LOSS_TYPE: regression
118
+ LOSS_WEIGHT: 1.0
119
+ NAME: GravityDecoder
120
+ NUM_CLASSES: 73
121
+ GRAVITY_ON: true
122
+ HEIGHT_DECODER:
123
+ LOSS_WEIGHT: 1.0
124
+ NAME: HeightDecoder
125
+ HEIGHT_ON: false
126
+ KEYPOINT_ON: false
127
+ LATITUDE_DECODER:
128
+ IGNORE_VALUE: -1
129
+ LOSS_TYPE: regression
130
+ LOSS_WEIGHT: 1.0
131
+ NAME: LatitudeDecoder
132
+ NUM_CLASSES: 1
133
+ LATITUDE_ON: true
134
+ LOAD_PROPOSALS: false
135
+ MASK_ON: false
136
+ META_ARCHITECTURE: PersFormer
137
+ PANOPTIC_FPN:
138
+ COMBINE:
139
+ ENABLED: true
140
+ INSTANCES_CONFIDENCE_THRESH: 0.5
141
+ OVERLAP_THRESH: 0.5
142
+ STUFF_AREA_LIMIT: 4096
143
+ INSTANCE_LOSS_WEIGHT: 1.0
144
+ PARAM_DECODER:
145
+ DEBUG_LAT: false
146
+ DEBUG_UP: false
147
+ INPUT_SIZE: 64
148
+ LOSS_TYPE: regression
149
+ LOSS_WEIGHT: 1.0
150
+ NAME: ParamNet
151
+ PREDICT_PARAMS:
152
+ - roll
153
+ - pitch
154
+ - vfov
155
+ SYNTHETIC_PRETRAIN: false
156
+ PERSFORMER_HEADS:
157
+ NAME: StandardPersformerHeads
158
+ PIXEL_MEAN:
159
+ - 103.53
160
+ - 116.28
161
+ - 123.675
162
+ PIXEL_STD:
163
+ - 1.0
164
+ - 1.0
165
+ - 1.0
166
+ PROPOSAL_GENERATOR:
167
+ MIN_SIZE: 0
168
+ NAME: RPN
169
+ RECOVER_PP: false
170
+ RECOVER_RPF: true
171
+ RESNETS:
172
+ DEFORM_MODULATED: false
173
+ DEFORM_NUM_GROUPS: 1
174
+ DEFORM_ON_PER_STAGE:
175
+ - false
176
+ - false
177
+ - false
178
+ - false
179
+ DEPTH: 50
180
+ NORM: FrozenBN
181
+ NUM_GROUPS: 1
182
+ OUT_FEATURES:
183
+ - res4
184
+ RES2_OUT_CHANNELS: 256
185
+ RES5_DILATION: 1
186
+ STEM_OUT_CHANNELS: 64
187
+ STRIDE_IN_1X1: true
188
+ WIDTH_PER_GROUP: 64
189
+ RETINANET:
190
+ BBOX_REG_LOSS_TYPE: smooth_l1
191
+ BBOX_REG_WEIGHTS: &id002
192
+ - 1.0
193
+ - 1.0
194
+ - 1.0
195
+ - 1.0
196
+ FOCAL_LOSS_ALPHA: 0.25
197
+ FOCAL_LOSS_GAMMA: 2.0
198
+ IN_FEATURES:
199
+ - p3
200
+ - p4
201
+ - p5
202
+ - p6
203
+ - p7
204
+ IOU_LABELS:
205
+ - 0
206
+ - -1
207
+ - 1
208
+ IOU_THRESHOLDS:
209
+ - 0.4
210
+ - 0.5
211
+ NMS_THRESH_TEST: 0.5
212
+ NORM: ''
213
+ NUM_CLASSES: 80
214
+ NUM_CONVS: 4
215
+ PRIOR_PROB: 0.01
216
+ SCORE_THRESH_TEST: 0.05
217
+ SMOOTH_L1_LOSS_BETA: 0.1
218
+ TOPK_CANDIDATES_TEST: 1000
219
+ ROI_BOX_CASCADE_HEAD:
220
+ BBOX_REG_WEIGHTS:
221
+ - &id001
222
+ - 10.0
223
+ - 10.0
224
+ - 5.0
225
+ - 5.0
226
+ - - 20.0
227
+ - 20.0
228
+ - 10.0
229
+ - 10.0
230
+ - - 30.0
231
+ - 30.0
232
+ - 15.0
233
+ - 15.0
234
+ IOUS:
235
+ - 0.5
236
+ - 0.6
237
+ - 0.7
238
+ ROI_BOX_HEAD:
239
+ BBOX_REG_LOSS_TYPE: smooth_l1
240
+ BBOX_REG_LOSS_WEIGHT: 1.0
241
+ BBOX_REG_WEIGHTS: *id001
242
+ CLS_AGNOSTIC_BBOX_REG: false
243
+ CONV_DIM: 256
244
+ FC_DIM: 1024
245
+ FED_LOSS_FREQ_WEIGHT_POWER: 0.5
246
+ FED_LOSS_NUM_CLASSES: 50
247
+ NAME: ''
248
+ NORM: ''
249
+ NUM_CONV: 0
250
+ NUM_FC: 0
251
+ POOLER_RESOLUTION: 14
252
+ POOLER_SAMPLING_RATIO: 0
253
+ POOLER_TYPE: ROIAlignV2
254
+ SMOOTH_L1_BETA: 0.0
255
+ TRAIN_ON_PRED_BOXES: false
256
+ USE_FED_LOSS: false
257
+ USE_SIGMOID_CE: false
258
+ ROI_HEADS:
259
+ BATCH_SIZE_PER_IMAGE: 512
260
+ IN_FEATURES:
261
+ - res4
262
+ IOU_LABELS:
263
+ - 0
264
+ - 1
265
+ IOU_THRESHOLDS:
266
+ - 0.5
267
+ NAME: Res5ROIHeads
268
+ NMS_THRESH_TEST: 0.5
269
+ NUM_CLASSES: 80
270
+ POSITIVE_FRACTION: 0.25
271
+ PROPOSAL_APPEND_GT: true
272
+ SCORE_THRESH_TEST: 0.05
273
+ ROI_KEYPOINT_HEAD:
274
+ CONV_DIMS:
275
+ - 512
276
+ - 512
277
+ - 512
278
+ - 512
279
+ - 512
280
+ - 512
281
+ - 512
282
+ - 512
283
+ LOSS_WEIGHT: 1.0
284
+ MIN_KEYPOINTS_PER_IMAGE: 1
285
+ NAME: KRCNNConvDeconvUpsampleHead
286
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
287
+ NUM_KEYPOINTS: 17
288
+ POOLER_RESOLUTION: 14
289
+ POOLER_SAMPLING_RATIO: 0
290
+ POOLER_TYPE: ROIAlignV2
291
+ ROI_MASK_HEAD:
292
+ CLS_AGNOSTIC_MASK: false
293
+ CONV_DIM: 256
294
+ NAME: MaskRCNNConvUpsampleHead
295
+ NORM: ''
296
+ NUM_CONV: 0
297
+ POOLER_RESOLUTION: 14
298
+ POOLER_SAMPLING_RATIO: 0
299
+ POOLER_TYPE: ROIAlignV2
300
+ RPN:
301
+ BATCH_SIZE_PER_IMAGE: 256
302
+ BBOX_REG_LOSS_TYPE: smooth_l1
303
+ BBOX_REG_LOSS_WEIGHT: 1.0
304
+ BBOX_REG_WEIGHTS: *id002
305
+ BOUNDARY_THRESH: -1
306
+ CONV_DIMS:
307
+ - -1
308
+ HEAD_NAME: StandardRPNHead
309
+ IN_FEATURES:
310
+ - res4
311
+ IOU_LABELS:
312
+ - 0
313
+ - -1
314
+ - 1
315
+ IOU_THRESHOLDS:
316
+ - 0.3
317
+ - 0.7
318
+ LOSS_WEIGHT: 1.0
319
+ NMS_THRESH: 0.7
320
+ POSITIVE_FRACTION: 0.5
321
+ POST_NMS_TOPK_TEST: 1000
322
+ POST_NMS_TOPK_TRAIN: 2000
323
+ PRE_NMS_TOPK_TEST: 6000
324
+ PRE_NMS_TOPK_TRAIN: 12000
325
+ SMOOTH_L1_BETA: 0.0
326
+ SEM_SEG_HEAD:
327
+ COMMON_STRIDE: 4
328
+ CONVS_DIM: 128
329
+ IGNORE_VALUE: 255
330
+ IN_FEATURES:
331
+ - p2
332
+ - p3
333
+ - p4
334
+ - p5
335
+ LOSS_WEIGHT: 1.0
336
+ NAME: SemSegFPNHead
337
+ NORM: GN
338
+ NUM_CLASSES: 54
339
+ WEIGHTS: ./init_model_weights/cvpr2023.pth
340
+ OUTPUT_DIR: /home/msticha/exps/e01_edina
341
+ OVERFIT_ON: false
342
+ SEED: -1
343
+ SOLVER:
344
+ AMP:
345
+ ENABLED: false
346
+ BASE_LR: 0.01
347
+ BASE_LR_END: 0.0
348
+ BIAS_LR_FACTOR: 1.0
349
+ CHECKPOINT_PERIOD: 500
350
+ CLIP_GRADIENTS:
351
+ CLIP_TYPE: value
352
+ CLIP_VALUE: 1.0
353
+ ENABLED: false
354
+ NORM_TYPE: 2.0
355
+ GAMMA: 0.1
356
+ IMS_PER_BATCH: 32
357
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
358
+ MAX_ITER: 90000
359
+ MOMENTUM: 0.9
360
+ NESTEROV: false
361
+ NUM_DECAYS: 3
362
+ REFERENCE_WORLD_SIZE: 0
363
+ RESCALE_INTERVAL: false
364
+ STEPS:
365
+ - 40000
366
+ - 60000
367
+ WARMUP_FACTOR: 0.001
368
+ WARMUP_ITERS: 1000
369
+ WARMUP_METHOD: linear
370
+ WEIGHT_DECAY: 0.0001
371
+ WEIGHT_DECAY_BIAS: null
372
+ WEIGHT_DECAY_NORM: 0.0
373
+ TEST:
374
+ AUG:
375
+ ENABLED: false
376
+ FLIP: true
377
+ MAX_SIZE: 4000
378
+ MIN_SIZES:
379
+ - 400
380
+ - 500
381
+ - 600
382
+ - 700
383
+ - 800
384
+ - 900
385
+ - 1000
386
+ - 1100
387
+ - 1200
388
+ DETECTIONS_PER_IMAGE: 100
389
+ EVAL_PERIOD: 500
390
+ EXPECTED_RESULTS: []
391
+ KEYPOINT_OKS_SIGMAS: []
392
+ PRECISE_BN:
393
+ ENABLED: false
394
+ NUM_ITER: 200
395
+ VERSION: 2
396
+ VIS_PERIOD: 500
models/paramnet_360cities_edina_rpfpp.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5220df6d9d89380d490bb9a3e4a162a8a5ca8eeb8610510173410076719fca67
3
+ size 837147876
models/paramnet_360cities_edina_rpfpp.yaml ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_FUN: uniform_vfov_crop_resize
6
+ AUGMENTATION_TYPE: geometry
7
+ FILTER_EMPTY_ANNOTATIONS: true
8
+ NO_GEOMETRY_AUG: false
9
+ NUM_WORKERS: 8
10
+ REPEAT_THRESHOLD: 0.0
11
+ RESIZE:
12
+ - 320
13
+ - 320
14
+ SAMPLER_TRAIN: TrainingSampler
15
+ DATASETS:
16
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
17
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
18
+ PROPOSAL_FILES_TEST: []
19
+ PROPOSAL_FILES_TRAIN: []
20
+ TEST:
21
+ - edina_test_crop_uniform
22
+ TRAIN:
23
+ - edina_train
24
+ - cities360_train
25
+ DEBUG_ON: false
26
+ GLOBAL:
27
+ HACK: 1.0
28
+ INPUT:
29
+ CROP:
30
+ ENABLED: false
31
+ SIZE:
32
+ - 0.9
33
+ - 0.9
34
+ TYPE: relative_range
35
+ FORMAT: BGR
36
+ MASK_FORMAT: polygon
37
+ MAX_SIZE_TEST: 1333
38
+ MAX_SIZE_TRAIN: 1333
39
+ MIN_SIZE_TEST: 800
40
+ MIN_SIZE_TRAIN:
41
+ - 800
42
+ MIN_SIZE_TRAIN_SAMPLING: choice
43
+ ONLINE_CROP: false
44
+ RANDOM_FLIP: horizontal
45
+ MODEL:
46
+ ANCHOR_GENERATOR:
47
+ ANGLES:
48
+ - - -90
49
+ - 0
50
+ - 90
51
+ ASPECT_RATIOS:
52
+ - - 0.5
53
+ - 1.0
54
+ - 2.0
55
+ NAME: DefaultAnchorGenerator
56
+ OFFSET: 0.0
57
+ SIZES:
58
+ - - 32
59
+ - 64
60
+ - 128
61
+ - 256
62
+ - 512
63
+ BACKBONE:
64
+ FREEZE_AT: 2
65
+ NAME: build_mit_backbone
66
+ CENTER_ON: false
67
+ DEVICE: cuda
68
+ FPN:
69
+ FUSE_TYPE: sum
70
+ IN_FEATURES: []
71
+ NORM: ''
72
+ OUT_CHANNELS: 256
73
+ FPN_CENTER_HEAD:
74
+ COMMON_STRIDE: 4
75
+ CONVS_DIM: 128
76
+ IGNORE_VALUE: 360
77
+ IN_FEATURES:
78
+ - p2
79
+ - p3
80
+ - p4
81
+ - p5
82
+ LOSS_WEIGHT: 1.0
83
+ NAME: CenterFPNHead
84
+ NORM: GN
85
+ NUM_CLASSES: 30
86
+ FPN_GRAVITY_HEAD:
87
+ COMMON_STRIDE: 4
88
+ CONVS_DIM: 128
89
+ IGNORE_VALUE: 360
90
+ IN_FEATURES:
91
+ - p2
92
+ - p3
93
+ - p4
94
+ - p5
95
+ LOSS_WEIGHT: 1.0
96
+ NAME: GravityFPNHead
97
+ NORM: GN
98
+ NUM_CLASSES: 361
99
+ FPN_HEADS:
100
+ NAME: StandardFPNHeads
101
+ FPN_LATITUDE_HEAD:
102
+ COMMON_STRIDE: 4
103
+ CONVS_DIM: 128
104
+ IGNORE_VALUE: -1
105
+ IN_FEATURES:
106
+ - p2
107
+ - p3
108
+ - p4
109
+ - p5
110
+ LOSS_WEIGHT: 1.0
111
+ NAME: LatitudeFPNHead
112
+ NORM: GN
113
+ NUM_CLASSES: 9
114
+ FREEZE: []
115
+ GRAVITY_DECODER:
116
+ IGNORE_VALUE: 72
117
+ LOSS_TYPE: regression
118
+ LOSS_WEIGHT: 1.0
119
+ NAME: GravityDecoder
120
+ NUM_CLASSES: 73
121
+ GRAVITY_ON: true
122
+ HEIGHT_DECODER:
123
+ LOSS_WEIGHT: 1.0
124
+ NAME: HeightDecoder
125
+ HEIGHT_ON: false
126
+ KEYPOINT_ON: false
127
+ LATITUDE_DECODER:
128
+ IGNORE_VALUE: -1
129
+ LOSS_TYPE: regression
130
+ LOSS_WEIGHT: 1.0
131
+ NAME: LatitudeDecoder
132
+ NUM_CLASSES: 1
133
+ LATITUDE_ON: true
134
+ LOAD_PROPOSALS: false
135
+ MASK_ON: false
136
+ META_ARCHITECTURE: PersFormer
137
+ PANOPTIC_FPN:
138
+ COMBINE:
139
+ ENABLED: true
140
+ INSTANCES_CONFIDENCE_THRESH: 0.5
141
+ OVERLAP_THRESH: 0.5
142
+ STUFF_AREA_LIMIT: 4096
143
+ INSTANCE_LOSS_WEIGHT: 1.0
144
+ PARAM_DECODER:
145
+ DEBUG_LAT: false
146
+ DEBUG_UP: false
147
+ INPUT_SIZE: 64
148
+ LOSS_TYPE: regression
149
+ LOSS_WEIGHT: 1.0
150
+ NAME: ParamNetConvNextRegress
151
+ PREDICT_PARAMS:
152
+ - roll
153
+ - pitch
154
+ - general_vfov
155
+ - rel_cx
156
+ - rel_cy
157
+ SYNTHETIC_PRETRAIN: false
158
+ PERSFORMER_HEADS:
159
+ NAME: StandardPersformerHeads
160
+ PIXEL_MEAN:
161
+ - 103.53
162
+ - 116.28
163
+ - 123.675
164
+ PIXEL_STD:
165
+ - 1.0
166
+ - 1.0
167
+ - 1.0
168
+ PROPOSAL_GENERATOR:
169
+ MIN_SIZE: 0
170
+ NAME: RPN
171
+ RECOVER_PP: true
172
+ RECOVER_RPF: true
173
+ RESNETS:
174
+ DEFORM_MODULATED: false
175
+ DEFORM_NUM_GROUPS: 1
176
+ DEFORM_ON_PER_STAGE:
177
+ - false
178
+ - false
179
+ - false
180
+ - false
181
+ DEPTH: 50
182
+ NORM: FrozenBN
183
+ NUM_GROUPS: 1
184
+ OUT_FEATURES:
185
+ - res4
186
+ RES2_OUT_CHANNELS: 256
187
+ RES5_DILATION: 1
188
+ STEM_OUT_CHANNELS: 64
189
+ STRIDE_IN_1X1: true
190
+ WIDTH_PER_GROUP: 64
191
+ RETINANET:
192
+ BBOX_REG_LOSS_TYPE: smooth_l1
193
+ BBOX_REG_WEIGHTS: &id002
194
+ - 1.0
195
+ - 1.0
196
+ - 1.0
197
+ - 1.0
198
+ FOCAL_LOSS_ALPHA: 0.25
199
+ FOCAL_LOSS_GAMMA: 2.0
200
+ IN_FEATURES:
201
+ - p3
202
+ - p4
203
+ - p5
204
+ - p6
205
+ - p7
206
+ IOU_LABELS:
207
+ - 0
208
+ - -1
209
+ - 1
210
+ IOU_THRESHOLDS:
211
+ - 0.4
212
+ - 0.5
213
+ NMS_THRESH_TEST: 0.5
214
+ NORM: ''
215
+ NUM_CLASSES: 80
216
+ NUM_CONVS: 4
217
+ PRIOR_PROB: 0.01
218
+ SCORE_THRESH_TEST: 0.05
219
+ SMOOTH_L1_LOSS_BETA: 0.1
220
+ TOPK_CANDIDATES_TEST: 1000
221
+ ROI_BOX_CASCADE_HEAD:
222
+ BBOX_REG_WEIGHTS:
223
+ - &id001
224
+ - 10.0
225
+ - 10.0
226
+ - 5.0
227
+ - 5.0
228
+ - - 20.0
229
+ - 20.0
230
+ - 10.0
231
+ - 10.0
232
+ - - 30.0
233
+ - 30.0
234
+ - 15.0
235
+ - 15.0
236
+ IOUS:
237
+ - 0.5
238
+ - 0.6
239
+ - 0.7
240
+ ROI_BOX_HEAD:
241
+ BBOX_REG_LOSS_TYPE: smooth_l1
242
+ BBOX_REG_LOSS_WEIGHT: 1.0
243
+ BBOX_REG_WEIGHTS: *id001
244
+ CLS_AGNOSTIC_BBOX_REG: false
245
+ CONV_DIM: 256
246
+ FC_DIM: 1024
247
+ FED_LOSS_FREQ_WEIGHT_POWER: 0.5
248
+ FED_LOSS_NUM_CLASSES: 50
249
+ NAME: ''
250
+ NORM: ''
251
+ NUM_CONV: 0
252
+ NUM_FC: 0
253
+ POOLER_RESOLUTION: 14
254
+ POOLER_SAMPLING_RATIO: 0
255
+ POOLER_TYPE: ROIAlignV2
256
+ SMOOTH_L1_BETA: 0.0
257
+ TRAIN_ON_PRED_BOXES: false
258
+ USE_FED_LOSS: false
259
+ USE_SIGMOID_CE: false
260
+ ROI_HEADS:
261
+ BATCH_SIZE_PER_IMAGE: 512
262
+ IN_FEATURES:
263
+ - res4
264
+ IOU_LABELS:
265
+ - 0
266
+ - 1
267
+ IOU_THRESHOLDS:
268
+ - 0.5
269
+ NAME: Res5ROIHeads
270
+ NMS_THRESH_TEST: 0.5
271
+ NUM_CLASSES: 80
272
+ POSITIVE_FRACTION: 0.25
273
+ PROPOSAL_APPEND_GT: true
274
+ SCORE_THRESH_TEST: 0.05
275
+ ROI_KEYPOINT_HEAD:
276
+ CONV_DIMS:
277
+ - 512
278
+ - 512
279
+ - 512
280
+ - 512
281
+ - 512
282
+ - 512
283
+ - 512
284
+ - 512
285
+ LOSS_WEIGHT: 1.0
286
+ MIN_KEYPOINTS_PER_IMAGE: 1
287
+ NAME: KRCNNConvDeconvUpsampleHead
288
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
289
+ NUM_KEYPOINTS: 17
290
+ POOLER_RESOLUTION: 14
291
+ POOLER_SAMPLING_RATIO: 0
292
+ POOLER_TYPE: ROIAlignV2
293
+ ROI_MASK_HEAD:
294
+ CLS_AGNOSTIC_MASK: false
295
+ CONV_DIM: 256
296
+ NAME: MaskRCNNConvUpsampleHead
297
+ NORM: ''
298
+ NUM_CONV: 0
299
+ POOLER_RESOLUTION: 14
300
+ POOLER_SAMPLING_RATIO: 0
301
+ POOLER_TYPE: ROIAlignV2
302
+ RPN:
303
+ BATCH_SIZE_PER_IMAGE: 256
304
+ BBOX_REG_LOSS_TYPE: smooth_l1
305
+ BBOX_REG_LOSS_WEIGHT: 1.0
306
+ BBOX_REG_WEIGHTS: *id002
307
+ BOUNDARY_THRESH: -1
308
+ CONV_DIMS:
309
+ - -1
310
+ HEAD_NAME: StandardRPNHead
311
+ IN_FEATURES:
312
+ - res4
313
+ IOU_LABELS:
314
+ - 0
315
+ - -1
316
+ - 1
317
+ IOU_THRESHOLDS:
318
+ - 0.3
319
+ - 0.7
320
+ LOSS_WEIGHT: 1.0
321
+ NMS_THRESH: 0.7
322
+ POSITIVE_FRACTION: 0.5
323
+ POST_NMS_TOPK_TEST: 1000
324
+ POST_NMS_TOPK_TRAIN: 2000
325
+ PRE_NMS_TOPK_TEST: 6000
326
+ PRE_NMS_TOPK_TRAIN: 12000
327
+ SMOOTH_L1_BETA: 0.0
328
+ SEM_SEG_HEAD:
329
+ COMMON_STRIDE: 4
330
+ CONVS_DIM: 128
331
+ IGNORE_VALUE: 255
332
+ IN_FEATURES:
333
+ - p2
334
+ - p3
335
+ - p4
336
+ - p5
337
+ LOSS_WEIGHT: 1.0
338
+ NAME: SemSegFPNHead
339
+ NORM: GN
340
+ NUM_CLASSES: 54
341
+ WEIGHTS: ./init_model_weights/cvpr2023.pth
342
+ OUTPUT_DIR: /home/msticha/exps/e01_edina_pp
343
+ OVERFIT_ON: false
344
+ SEED: -1
345
+ SOLVER:
346
+ AMP:
347
+ ENABLED: false
348
+ BASE_LR: 0.005
349
+ BASE_LR_END: 0.0
350
+ BIAS_LR_FACTOR: 1.0
351
+ CHECKPOINT_PERIOD: 500
352
+ CLIP_GRADIENTS:
353
+ CLIP_TYPE: value
354
+ CLIP_VALUE: 1.0
355
+ ENABLED: false
356
+ NORM_TYPE: 2.0
357
+ GAMMA: 0.1
358
+ IMS_PER_BATCH: 32
359
+ LR_SCHEDULER_NAME: WarmupCosineLR
360
+ MAX_ITER: 90000
361
+ MOMENTUM: 0.9
362
+ NESTEROV: false
363
+ NUM_DECAYS: 3
364
+ REFERENCE_WORLD_SIZE: 0
365
+ RESCALE_INTERVAL: false
366
+ STEPS:
367
+ - 30000
368
+ WARMUP_FACTOR: 0.001
369
+ WARMUP_ITERS: 1000
370
+ WARMUP_METHOD: linear
371
+ WEIGHT_DECAY: 0.0001
372
+ WEIGHT_DECAY_BIAS: null
373
+ WEIGHT_DECAY_NORM: 0.0
374
+ TEST:
375
+ AUG:
376
+ ENABLED: false
377
+ FLIP: true
378
+ MAX_SIZE: 4000
379
+ MIN_SIZES:
380
+ - 400
381
+ - 500
382
+ - 600
383
+ - 700
384
+ - 800
385
+ - 900
386
+ - 1000
387
+ - 1100
388
+ - 1200
389
+ DETECTIONS_PER_IMAGE: 100
390
+ EVAL_PERIOD: 500
391
+ EXPECTED_RESULTS: []
392
+ KEYPOINT_OKS_SIGMAS: []
393
+ PRECISE_BN:
394
+ ENABLED: false
395
+ NUM_ITER: 200
396
+ VERSION: 2
397
+ VIS_PERIOD: 500