Files
SkySensePlusPlus/configs/pretrain_skysensepp.yml
esenke 01adcfdf60 init
2025-12-08 22:16:31 +08:00

248 lines
6.1 KiB
YAML

task_attributes:
segmentation:
dataset_attributes:
pretraining_loader:
data_root_dir: 'pretrain_datasets/'
train_json_path_list: ['dynamic-mm/dynamic-mm_train.json', 'pastis-mm/pastis-mm_train.json', 'vaihingen/vaihingen_train.json', 'deepglobe/deepglobe_train.json', 'potsdam/potsdam_train.json', 'fbp/fbp_train.json', 'loveda/loveda_train.json', 'isaid/isaid_train.json', 'jl16-mm/jl16-mm_train.json', 'flair-mm/flair-mm_train.json', 's2naip-mm/s2naip-mm_train.json', 'dfc20-mm/dfc20-mm_train.json', 'c2segab-mm/c2segab-mm_train.json']
val_json_path_list: ['dynamic-mm/dynamic-mm_val.json', 'pastis-mm/pastis-mm_val.json', 'vaihingen/vaihingen_val.json', 'deepglobe/deepglobe_val.json', 'potsdam/potsdam_val.json', 'fbp/fbp_val.json', 'loveda/loveda_val.json', 'isaid/isaid_val.json', 'jl16-mm/jl16-mm_val.json', 'flair-mm/flair-mm_val.json', 's2naip-mm/s2naip-mm_val.json', 'dfc20-mm/dfc20-mm_val.json', 'c2segab-mm/c2segab-mm_val.json']
use_multi_pairs: True
seq_len: 1
half_mask_ratio: 0.3
min_random_scale: 0.3
cls_repeat_cnt: 2000
image_size:
hr: (512, 512)
s2: (16, 16)
s1: (16, 16)
anno: (512, 512)
mim:
input_size: (1024, 512)
patch_size: 128
mask_ratio: 0.5
model_attributes:
SkySensePP:
sources: ['hr', 's2', 's1']
use_modal_vae: True
use_ctpe: False
use_cls_token_uper_head: False
upsacle_results: True
calendar_time: 365
vocabulary_size: 64
backbone_hr:
type: 'SwinTransformerV2MSL'
arch: 'huge'
use_attn: True
merge_stage: 2
vocabulary_size: 64
img_size: 224
patch_size: 4
in_channels: 3
window_size: 8
drop_rate: 0.
drop_path_rate: 0.2
out_indices: (0,1,2,3)
use_abs_pos_embed: False
interpolate_mode: 'bicubic'
with_cp: True
frozen_stages: -1
norm_eval: False
pad_small_map: False
pretrained_window_sizes: [0, 0, 0, 0]
init_cfg:
type: Pretrained
checkpoint: 'pretrain/skysense_model_backbone_hr.pth'
backbone_s2:
type: 'VisionTransformerMSL'
img_size: (16, 16)
use_attn: False
merge_stage: 4
vocabulary_size: 64
patch_size: 4
in_channels: 10
embed_dims: 1024
num_layers: 24
num_heads: 16
mlp_ratio: 4
out_indices: (5,11,17,23)
qkv_bias: True
drop_rate: 0.
attn_drop_rate: 0.
drop_path_rate: 0.3
with_cls_token: False
output_cls_token: False
act_cfg:
type: 'GELU'
norm_cfg:
type: 'LN'
eps: 1e-6
with_cp: True
interpolate_mode: 'bicubic'
init_cfg:
type: Pretrained
checkpoint: 'pretrain/skysense_model_backbone_s2.pth'
head_s2:
type: 'UPHead'
in_dim: 1024
out_dim: 2816 #2816
up_scale: 4
init_cfg:
type: Pretrained
checkpoint: 'pretrain/skysense_model_head_s2.pth'
backbone_s1:
type: 'VisionTransformerMSL'
img_size: (16, 16)
use_attn: False
merge_stage: 4
vocabulary_size: 64
patch_size: 4
in_channels: 2
embed_dims: 1024
num_layers: 24
num_heads: 16
mlp_ratio: 4
out_indices: (5,11,17,23)
qkv_bias: True
drop_rate: 0.
attn_drop_rate: 0.
drop_path_rate: 0.3
with_cls_token: False
output_cls_token: False
act_cfg:
type: 'GELU'
norm_cfg:
type: 'LN'
eps: 1e-6
with_cp: True
interpolate_mode: 'bicubic'
init_cfg:
type: Pretrained
checkpoint: 'pretrain/skysense_model_backbone_s1.pth'
head_s1:
type: 'UPHead'
in_dim: 1024
out_dim: 2816 #2816
up_scale: 4
init_cfg:
type: Pretrained
checkpoint: 'pretrain/skysense_model_head_s1.pth'
rec_head_hr:
type: 'UPerHead'
in_channels: [704, 704, 1408, 2816, 1024]
in_index: [0, 1, 2, 3, 4]
pool_scales: (1, 2, 3, 6)
channels: 512
dropout_ratio: 0.1
num_classes: 65
norm_cfg:
type: 'SyncBN'
requires_grad: true
align_corners: false
necks:
type: 'TransformerEncoder'
input_dims: 2816
embed_dims: 1024
num_layers: 24
num_heads: 16
mlp_ratio: 4
qkv_bias: True
drop_rate: 0.
attn_drop_rate: 0.
drop_path_rate: 0.3
with_cls_token: True
output_cls_token: True
norm_cfg:
type: 'LN'
act_cfg:
type: 'GELU'
num_fcs: 2
norm_eval: False
with_cp: True
init_cfg:
type: Pretrained
checkpoint: 'pretrain/skysense_model_fusion.pth'
modality_vae:
type: 'ModalityCompletion'
input_shape_hr: [2816, 32, 16]
input_shape_s2: [2816, 32, 16]
input_shape_s1: [2816, 32, 16]
conv_dim: 256
z_dim: 256
n_codebook: 8192
metrics:
- type: 'sem_metric'
losses:
- type: 'RecLoss'
params:
weight: 1.0
patch_size: 4
balance: True
use_all_patch: True
vocabulary_size: 64
feature_merged: True
pred_key: 'logits_hr'
mask_key: 'mask_hr'
target_key: 'mapped_targets'
use_bg: True
- type: 'ModalityVAELoss'
params:
weight: 1.0
optimizer_attributes:
type: AdamW
params:
lr: 2e-04
betas: (0.9, 0.999)
weight_decay: 0.04
lr_parameters:
layer_decay: 0.7
frozen_blocks: 12
frozen_fusion_blocks_start: 3
training_parameters:
trainer: 'seg_trainer'
run_type: train
seed: 24042301
pin_memory: True
batch_size: 256
test_batch_size: 128
num_workers: 16
max_iterations: 30000
num_warmup_steps: 1000
log_interval: 50
snapshot_interval: 3000
cos_lr: False
clip_norm_mode: all
clip_gradients: true
max_grad_l2_norm: 5
enable_tf32: False
enable_amp: True
find_unused_parameters: True
synchronized_loss: True
static_graph: True
replace_speedup_op: True
ema: False
distributed_batch_sampler:
batch_size: 8
amp_attributes:
amp_escapes: Conv2d
opt_level: O1
init_scale: 1