apolinario's picture
upload clipseg
48fa639
raw
history blame
2.2 kB
configuration:
batch_size: 64
optimizer: torch.optim.AdamW
lr: 0.001
trainer: experiment_setup.train_loop
scorer: experiment_setup.score
model: models.clipseg.CLIPDensePredT
lr_scheduler: cosine
T_max: 20000
eta_min: 0.0001
max_iterations: 20000
val_interval: null
# dataset
dataset: datasets.phrasecut.PhraseCut # <-----------------
split_mode: pascal_test
split: train
mask: text_and_crop_blur_highlight352
image_size: 352
normalize: True
pre_crop_image_size: [sample, 1, 1.5]
aug: 1new
# general
mix: False # <-----------------
prompt: shuffle+
norm_cond: True
mix_text_min: 0.0
# model
out: 1
extract_layers: [3, 7, 9]
reduce_dim: 64
depth: 3
fix_shift: False
loss: torch.nn.functional.binary_cross_entropy_with_logits
amp: True
test_configuration_common:
normalize: True
image_size: 352
batch_size: 32
# max_iterations: 5
# max_iterations: 150
test_configuration:
-
name: pc # old: phrasecut
metric: metrics.FixedIntervalMetrics
test_dataset: phrasecut
split: test
mask: text
label_support: True
sigmoid: True
columns: [i, name, pc_miou_0.3, pc_fgiou_0.3, pc_fgiou_0.5, pc_ap, duration, date]
individual_configurations:
# important ones
- {name: rd64-uni, version: 'ViT-B/16', reduce_dim: 64, with_visual: True, negative_prob: 0.2, mix: True, mix_text_max: 0.5}
# this was accedentally trained using old mask
- {name: rd128-vit16-phrasecut, version: 'ViT-B/16', reduce_dim: 128, mask: text_and_blur3_highlight01}
- {name: rd64-uni-novis, version: 'ViT-B/16', reduce_dim: 64, with_visual: False, negative_prob: 0.2, mix: False}
# this was accedentally trained using old mask
- {name: baseline3-vit16-phrasecut, model: models.clipseg.CLIPDenseBaseline, version: 'ViT-B/16', reduce_dim: 64, reduce2_dim: 64, mask: text_and_blur3_highlight01}
- {name: vit64-uni, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, reduce_dim: 64, with_visual: True, only_visual: True, negative_prob: 0.2, mask: crop_blur_highlight352, lr: 0.0003}
- {name: vit64-uni-novis, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, with_visual: False, reduce_dim: 64, lr: 0.0001}