init

2025-12-08 22:16:31 +08:00
commit 01adcfdf60
305 changed files with 50879 additions and 0 deletions
--- a/finetune/tools/dataset_converters/chase_db1.py
+++ b/finetune/tools/dataset_converters/chase_db1.py
@@ -0,0 +1,89 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+CHASE_DB1_LEN = 28 * 3
+TRAINING_LEN = 60
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert CHASE_DB1 dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'CHASE_DB1')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting CHASEDB1.zip...')
+        zip_file = zipfile.ZipFile(dataset_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+
+        assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
+            f'len(os.listdir(tmp_dir)) != {CHASE_DB1_LEN}'
+
+        for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/cityscapes.py
+++ b/finetune/tools/dataset_converters/cityscapes.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+from cityscapesscripts.preparation.json2labelImg import json2labelImg
+from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress,
+                            track_progress)
+
+
+def convert_json_to_label(json_file):
+    label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
+    json2labelImg(json_file, label_file, 'trainIds')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert Cityscapes annotations to TrainIds')
+    parser.add_argument('cityscapes_path', help='cityscapes data path')
+    parser.add_argument('--gt-dir', default='gtFine', type=str)
+    parser.add_argument('-o', '--out-dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    cityscapes_path = args.cityscapes_path
+    out_dir = args.out_dir if args.out_dir else cityscapes_path
+    mkdir_or_exist(out_dir)
+
+    gt_dir = osp.join(cityscapes_path, args.gt_dir)
+
+    poly_files = []
+    for poly in scandir(gt_dir, '_polygons.json', recursive=True):
+        poly_file = osp.join(gt_dir, poly)
+        poly_files.append(poly_file)
+    if args.nproc > 1:
+        track_parallel_progress(convert_json_to_label, poly_files, args.nproc)
+    else:
+        track_progress(convert_json_to_label, poly_files)
+
+    split_names = ['train', 'val', 'test']
+
+    for split in split_names:
+        filenames = []
+        for poly in scandir(
+                osp.join(gt_dir, split), '_polygons.json', recursive=True):
+            filenames.append(poly.replace('_gtFine_polygons.json', ''))
+        with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
+            f.writelines(f + '\n' for f in filenames)
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/coco_stuff10k.py
+++ b/finetune/tools/dataset_converters/coco_stuff10k.py
@@ -0,0 +1,308 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+from functools import partial
+
+import numpy as np
+from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
+                            track_progress)
+from PIL import Image
+from scipy.io import loadmat
+
+COCO_LEN = 10000
+
+clsID_to_trID = {
+    0: 0,
+    1: 1,
+    2: 2,
+    3: 3,
+    4: 4,
+    5: 5,
+    6: 6,
+    7: 7,
+    8: 8,
+    9: 9,
+    10: 10,
+    11: 11,
+    13: 12,
+    14: 13,
+    15: 14,
+    16: 15,
+    17: 16,
+    18: 17,
+    19: 18,
+    20: 19,
+    21: 20,
+    22: 21,
+    23: 22,
+    24: 23,
+    25: 24,
+    27: 25,
+    28: 26,
+    31: 27,
+    32: 28,
+    33: 29,
+    34: 30,
+    35: 31,
+    36: 32,
+    37: 33,
+    38: 34,
+    39: 35,
+    40: 36,
+    41: 37,
+    42: 38,
+    43: 39,
+    44: 40,
+    46: 41,
+    47: 42,
+    48: 43,
+    49: 44,
+    50: 45,
+    51: 46,
+    52: 47,
+    53: 48,
+    54: 49,
+    55: 50,
+    56: 51,
+    57: 52,
+    58: 53,
+    59: 54,
+    60: 55,
+    61: 56,
+    62: 57,
+    63: 58,
+    64: 59,
+    65: 60,
+    67: 61,
+    70: 62,
+    72: 63,
+    73: 64,
+    74: 65,
+    75: 66,
+    76: 67,
+    77: 68,
+    78: 69,
+    79: 70,
+    80: 71,
+    81: 72,
+    82: 73,
+    84: 74,
+    85: 75,
+    86: 76,
+    87: 77,
+    88: 78,
+    89: 79,
+    90: 80,
+    92: 81,
+    93: 82,
+    94: 83,
+    95: 84,
+    96: 85,
+    97: 86,
+    98: 87,
+    99: 88,
+    100: 89,
+    101: 90,
+    102: 91,
+    103: 92,
+    104: 93,
+    105: 94,
+    106: 95,
+    107: 96,
+    108: 97,
+    109: 98,
+    110: 99,
+    111: 100,
+    112: 101,
+    113: 102,
+    114: 103,
+    115: 104,
+    116: 105,
+    117: 106,
+    118: 107,
+    119: 108,
+    120: 109,
+    121: 110,
+    122: 111,
+    123: 112,
+    124: 113,
+    125: 114,
+    126: 115,
+    127: 116,
+    128: 117,
+    129: 118,
+    130: 119,
+    131: 120,
+    132: 121,
+    133: 122,
+    134: 123,
+    135: 124,
+    136: 125,
+    137: 126,
+    138: 127,
+    139: 128,
+    140: 129,
+    141: 130,
+    142: 131,
+    143: 132,
+    144: 133,
+    145: 134,
+    146: 135,
+    147: 136,
+    148: 137,
+    149: 138,
+    150: 139,
+    151: 140,
+    152: 141,
+    153: 142,
+    154: 143,
+    155: 144,
+    156: 145,
+    157: 146,
+    158: 147,
+    159: 148,
+    160: 149,
+    161: 150,
+    162: 151,
+    163: 152,
+    164: 153,
+    165: 154,
+    166: 155,
+    167: 156,
+    168: 157,
+    169: 158,
+    170: 159,
+    171: 160,
+    172: 161,
+    173: 162,
+    174: 163,
+    175: 164,
+    176: 165,
+    177: 166,
+    178: 167,
+    179: 168,
+    180: 169,
+    181: 170,
+    182: 171
+}
+
+
+def convert_to_trainID(tuple_path, in_img_dir, in_ann_dir, out_img_dir,
+                       out_mask_dir, is_train):
+    imgpath, maskpath = tuple_path
+    shutil.copyfile(
+        osp.join(in_img_dir, imgpath),
+        osp.join(out_img_dir, 'train2014', imgpath) if is_train else osp.join(
+            out_img_dir, 'test2014', imgpath))
+    annotate = loadmat(osp.join(in_ann_dir, maskpath))
+    mask = annotate['S'].astype(np.uint8)
+    mask_copy = mask.copy()
+    for clsID, trID in clsID_to_trID.items():
+        mask_copy[mask == clsID] = trID
+    seg_filename = osp.join(out_mask_dir, 'train2014',
+                            maskpath.split('.')[0] +
+                            '_labelTrainIds.png') if is_train else osp.join(
+                                out_mask_dir, 'test2014',
+                                maskpath.split('.')[0] + '_labelTrainIds.png')
+    Image.fromarray(mask_copy).save(seg_filename, 'PNG')
+
+
+def generate_coco_list(folder):
+    train_list = osp.join(folder, 'imageLists', 'train.txt')
+    test_list = osp.join(folder, 'imageLists', 'test.txt')
+    train_paths = []
+    test_paths = []
+
+    with open(train_list) as f:
+        for filename in f:
+            basename = filename.strip()
+            imgpath = basename + '.jpg'
+            maskpath = basename + '.mat'
+            train_paths.append((imgpath, maskpath))
+
+    with open(test_list) as f:
+        for filename in f:
+            basename = filename.strip()
+            imgpath = basename + '.jpg'
+            maskpath = basename + '.mat'
+            test_paths.append((imgpath, maskpath))
+
+    return train_paths, test_paths
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=\
+        'Convert COCO Stuff 10k annotations to mmsegmentation format')  # noqa
+    parser.add_argument('coco_path', help='coco stuff path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=16, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    coco_path = args.coco_path
+    nproc = args.nproc
+
+    out_dir = args.out_dir or coco_path
+    out_img_dir = osp.join(out_dir, 'images')
+    out_mask_dir = osp.join(out_dir, 'annotations')
+
+    mkdir_or_exist(osp.join(out_img_dir, 'train2014'))
+    mkdir_or_exist(osp.join(out_img_dir, 'test2014'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'train2014'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'test2014'))
+
+    train_list, test_list = generate_coco_list(coco_path)
+    assert (len(train_list) +
+            len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
+                len(train_list), len(test_list))
+
+    if args.nproc > 1:
+        track_parallel_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=True),
+            train_list,
+            nproc=nproc)
+        track_parallel_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=False),
+            test_list,
+            nproc=nproc)
+    else:
+        track_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=True), train_list)
+        track_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=False), test_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/coco_stuff164k.py
+++ b/finetune/tools/dataset_converters/coco_stuff164k.py
@@ -0,0 +1,265 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+from functools import partial
+from glob import glob
+
+import numpy as np
+from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
+                            track_progress)
+from PIL import Image
+
+COCO_LEN = 123287
+
+clsID_to_trID = {
+    0: 0,
+    1: 1,
+    2: 2,
+    3: 3,
+    4: 4,
+    5: 5,
+    6: 6,
+    7: 7,
+    8: 8,
+    9: 9,
+    10: 10,
+    12: 11,
+    13: 12,
+    14: 13,
+    15: 14,
+    16: 15,
+    17: 16,
+    18: 17,
+    19: 18,
+    20: 19,
+    21: 20,
+    22: 21,
+    23: 22,
+    24: 23,
+    26: 24,
+    27: 25,
+    30: 26,
+    31: 27,
+    32: 28,
+    33: 29,
+    34: 30,
+    35: 31,
+    36: 32,
+    37: 33,
+    38: 34,
+    39: 35,
+    40: 36,
+    41: 37,
+    42: 38,
+    43: 39,
+    45: 40,
+    46: 41,
+    47: 42,
+    48: 43,
+    49: 44,
+    50: 45,
+    51: 46,
+    52: 47,
+    53: 48,
+    54: 49,
+    55: 50,
+    56: 51,
+    57: 52,
+    58: 53,
+    59: 54,
+    60: 55,
+    61: 56,
+    62: 57,
+    63: 58,
+    64: 59,
+    66: 60,
+    69: 61,
+    71: 62,
+    72: 63,
+    73: 64,
+    74: 65,
+    75: 66,
+    76: 67,
+    77: 68,
+    78: 69,
+    79: 70,
+    80: 71,
+    81: 72,
+    83: 73,
+    84: 74,
+    85: 75,
+    86: 76,
+    87: 77,
+    88: 78,
+    89: 79,
+    91: 80,
+    92: 81,
+    93: 82,
+    94: 83,
+    95: 84,
+    96: 85,
+    97: 86,
+    98: 87,
+    99: 88,
+    100: 89,
+    101: 90,
+    102: 91,
+    103: 92,
+    104: 93,
+    105: 94,
+    106: 95,
+    107: 96,
+    108: 97,
+    109: 98,
+    110: 99,
+    111: 100,
+    112: 101,
+    113: 102,
+    114: 103,
+    115: 104,
+    116: 105,
+    117: 106,
+    118: 107,
+    119: 108,
+    120: 109,
+    121: 110,
+    122: 111,
+    123: 112,
+    124: 113,
+    125: 114,
+    126: 115,
+    127: 116,
+    128: 117,
+    129: 118,
+    130: 119,
+    131: 120,
+    132: 121,
+    133: 122,
+    134: 123,
+    135: 124,
+    136: 125,
+    137: 126,
+    138: 127,
+    139: 128,
+    140: 129,
+    141: 130,
+    142: 131,
+    143: 132,
+    144: 133,
+    145: 134,
+    146: 135,
+    147: 136,
+    148: 137,
+    149: 138,
+    150: 139,
+    151: 140,
+    152: 141,
+    153: 142,
+    154: 143,
+    155: 144,
+    156: 145,
+    157: 146,
+    158: 147,
+    159: 148,
+    160: 149,
+    161: 150,
+    162: 151,
+    163: 152,
+    164: 153,
+    165: 154,
+    166: 155,
+    167: 156,
+    168: 157,
+    169: 158,
+    170: 159,
+    171: 160,
+    172: 161,
+    173: 162,
+    174: 163,
+    175: 164,
+    176: 165,
+    177: 166,
+    178: 167,
+    179: 168,
+    180: 169,
+    181: 170,
+    255: 255
+}
+
+
+def convert_to_trainID(maskpath, out_mask_dir, is_train):
+    mask = np.array(Image.open(maskpath))
+    mask_copy = mask.copy()
+    for clsID, trID in clsID_to_trID.items():
+        mask_copy[mask == clsID] = trID
+    seg_filename = osp.join(
+        out_mask_dir, 'train2017',
+        osp.basename(maskpath).split('.')[0] +
+        '_labelTrainIds.png') if is_train else osp.join(
+            out_mask_dir, 'val2017',
+            osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png')
+    Image.fromarray(mask_copy).save(seg_filename, 'PNG')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=\
+        'Convert COCO Stuff 164k annotations to mmsegmentation format')  # noqa
+    parser.add_argument('coco_path', help='coco stuff path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=16, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    coco_path = args.coco_path
+    nproc = args.nproc
+
+    out_dir = args.out_dir or coco_path
+    out_img_dir = osp.join(out_dir, 'images')
+    out_mask_dir = osp.join(out_dir, 'annotations')
+
+    mkdir_or_exist(osp.join(out_mask_dir, 'train2017'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'val2017'))
+
+    if out_dir != coco_path:
+        shutil.copytree(osp.join(coco_path, 'images'), out_img_dir)
+
+    train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png'))
+    train_list = [file for file in train_list if '_labelTrainIds' not in file]
+    test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png'))
+    test_list = [file for file in test_list if '_labelTrainIds' not in file]
+    assert (len(train_list) +
+            len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
+                len(train_list), len(test_list))
+
+    if args.nproc > 1:
+        track_parallel_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
+            train_list,
+            nproc=nproc)
+        track_parallel_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
+            test_list,
+            nproc=nproc)
+    else:
+        track_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
+            train_list)
+        track_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
+            test_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/drive.py
+++ b/finetune/tools/dataset_converters/drive.py
@@ -0,0 +1,114 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import cv2
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert DRIVE dataset to mmsegmentation format')
+    parser.add_argument(
+        'training_path', help='the training part of DRIVE dataset')
+    parser.add_argument(
+        'testing_path', help='the testing part of DRIVE dataset')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    training_path = args.training_path
+    testing_path = args.testing_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'DRIVE')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting training.zip...')
+        zip_file = zipfile.ZipFile(training_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+        now_dir = osp.join(tmp_dir, 'training', 'images')
+        for img_name in os.listdir(now_dir):
+            img = mmcv.imread(osp.join(now_dir, img_name))
+            mmcv.imwrite(
+                img,
+                osp.join(
+                    out_dir, 'images', 'training',
+                    osp.splitext(img_name)[0].replace('_training', '') +
+                    '.png'))
+
+        now_dir = osp.join(tmp_dir, 'training', '1st_manual')
+        for img_name in os.listdir(now_dir):
+            cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+            ret, img = cap.read()
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(img_name)[0] + '.png'))
+
+        print('Extracting test.zip...')
+        zip_file = zipfile.ZipFile(testing_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating validation dataset...')
+        now_dir = osp.join(tmp_dir, 'test', 'images')
+        for img_name in os.listdir(now_dir):
+            img = mmcv.imread(osp.join(now_dir, img_name))
+            mmcv.imwrite(
+                img,
+                osp.join(
+                    out_dir, 'images', 'validation',
+                    osp.splitext(img_name)[0].replace('_test', '') + '.png'))
+
+        now_dir = osp.join(tmp_dir, 'test', '1st_manual')
+        if osp.exists(now_dir):
+            for img_name in os.listdir(now_dir):
+                cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+                ret, img = cap.read()
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
+        if osp.exists(now_dir):
+            for img_name in os.listdir(now_dir):
+                cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+                ret, img = cap.read()
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/hrf.py
+++ b/finetune/tools/dataset_converters/hrf.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+HRF_LEN = 15
+TRAINING_LEN = 5
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert HRF dataset to mmsegmentation format')
+    parser.add_argument('healthy_path', help='the path of healthy.zip')
+    parser.add_argument(
+        'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
+    parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
+    parser.add_argument(
+        'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_path',
+        help='the path of diabetic_retinopathy.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_manualsegm_path',
+        help='the path of diabetic_retinopathy_manualsegm.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    images_path = [
+        args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
+    ]
+    annotations_path = [
+        args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
+        args.diabetic_retinopathy_manualsegm_path
+    ]
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'HRF')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    print('Generating images...')
+    for now_path in images_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
+
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+
+    print('Generating annotations...')
+    for now_path in annotations_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
+
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/isaid.py
+++ b/finetune/tools/dataset_converters/isaid.py
@@ -0,0 +1,246 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import os
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+from PIL import Image
+
+iSAID_palette = \
+    {
+        0: (0, 0, 0),
+        1: (0, 0, 63),
+        2: (0, 63, 63),
+        3: (0, 63, 0),
+        4: (0, 63, 127),
+        5: (0, 63, 191),
+        6: (0, 63, 255),
+        7: (0, 127, 63),
+        8: (0, 127, 127),
+        9: (0, 0, 127),
+        10: (0, 0, 191),
+        11: (0, 0, 255),
+        12: (0, 191, 127),
+        13: (0, 127, 191),
+        14: (0, 127, 255),
+        15: (0, 100, 155)
+    }
+
+iSAID_invert_palette = {v: k for k, v in iSAID_palette.items()}
+
+
+def iSAID_convert_from_color(arr_3d, palette=iSAID_invert_palette):
+    """RGB-color encoding to grayscale labels."""
+    arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
+
+    for c, i in palette.items():
+        m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
+        arr_2d[m] = i
+
+    return arr_2d
+
+
+def slide_crop_image(src_path, out_dir, mode, patch_H, patch_W, overlap):
+    img = np.asarray(Image.open(src_path).convert('RGB'))
+
+    img_H, img_W, _ = img.shape
+
+    if img_H < patch_H and img_W > patch_W:
+
+        img = mmcv.impad(img, shape=(patch_H, img_W), pad_val=0)
+
+        img_H, img_W, _ = img.shape
+
+    elif img_H > patch_H and img_W < patch_W:
+
+        img = mmcv.impad(img, shape=(img_H, patch_W), pad_val=0)
+
+        img_H, img_W, _ = img.shape
+
+    elif img_H < patch_H and img_W < patch_W:
+
+        img = mmcv.impad(img, shape=(patch_H, patch_W), pad_val=0)
+
+        img_H, img_W, _ = img.shape
+
+    for x in range(0, img_W, patch_W - overlap):
+        for y in range(0, img_H, patch_H - overlap):
+            x_str = x
+            x_end = x + patch_W
+            if x_end > img_W:
+                diff_x = x_end - img_W
+                x_str -= diff_x
+                x_end = img_W
+            y_str = y
+            y_end = y + patch_H
+            if y_end > img_H:
+                diff_y = y_end - img_H
+                y_str -= diff_y
+                y_end = img_H
+
+            img_patch = img[y_str:y_end, x_str:x_end, :]
+            img_patch = Image.fromarray(img_patch.astype(np.uint8))
+            image = osp.basename(src_path).split('.')[0] + '_' + str(
+                y_str) + '_' + str(y_end) + '_' + str(x_str) + '_' + str(
+                    x_end) + '.png'
+            # print(image)
+            save_path_image = osp.join(out_dir, 'img_dir', mode, str(image))
+            img_patch.save(save_path_image, format='BMP')
+
+
+def slide_crop_label(src_path, out_dir, mode, patch_H, patch_W, overlap):
+    label = mmcv.imread(src_path, channel_order='rgb')
+    label = iSAID_convert_from_color(label)
+    img_H, img_W = label.shape
+
+    if img_H < patch_H and img_W > patch_W:
+
+        label = mmcv.impad(label, shape=(patch_H, img_W), pad_val=255)
+
+        img_H = patch_H
+
+    elif img_H > patch_H and img_W < patch_W:
+
+        label = mmcv.impad(label, shape=(img_H, patch_W), pad_val=255)
+
+        img_W = patch_W
+
+    elif img_H < patch_H and img_W < patch_W:
+
+        label = mmcv.impad(label, shape=(patch_H, patch_W), pad_val=255)
+
+        img_H = patch_H
+        img_W = patch_W
+
+    for x in range(0, img_W, patch_W - overlap):
+        for y in range(0, img_H, patch_H - overlap):
+            x_str = x
+            x_end = x + patch_W
+            if x_end > img_W:
+                diff_x = x_end - img_W
+                x_str -= diff_x
+                x_end = img_W
+            y_str = y
+            y_end = y + patch_H
+            if y_end > img_H:
+                diff_y = y_end - img_H
+                y_str -= diff_y
+                y_end = img_H
+
+            lab_patch = label[y_str:y_end, x_str:x_end]
+            lab_patch = Image.fromarray(lab_patch.astype(np.uint8), mode='P')
+
+            image = osp.basename(src_path).split('.')[0].split(
+                '_')[0] + '_' + str(y_str) + '_' + str(y_end) + '_' + str(
+                    x_str) + '_' + str(x_end) + '_instance_color_RGB' + '.png'
+            lab_patch.save(osp.join(out_dir, 'ann_dir', mode, str(image)))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert iSAID dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='iSAID folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+
+    parser.add_argument(
+        '--patch_width',
+        default=896,
+        type=int,
+        help='Width of the cropped image patch')
+    parser.add_argument(
+        '--patch_height',
+        default=896,
+        type=int,
+        help='Height of the cropped image patch')
+    parser.add_argument(
+        '--overlap_area', default=384, type=int, help='Overlap area')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    # image patch width and height
+    patch_H, patch_W = args.patch_width, args.patch_height
+
+    overlap = args.overlap_area  # overlap area
+
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'iSAID')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
+
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test'))
+
+    assert os.path.exists(os.path.join(dataset_path, 'train')), \
+        f'train is not in {dataset_path}'
+    assert os.path.exists(os.path.join(dataset_path, 'val')), \
+        f'val is not in {dataset_path}'
+    assert os.path.exists(os.path.join(dataset_path, 'test')), \
+        f'test is not in {dataset_path}'
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for dataset_mode in ['train', 'val', 'test']:
+
+            # for dataset_mode in [ 'test']:
+            print(f'Extracting  {dataset_mode}ing.zip...')
+            img_zipp_list = glob.glob(
+                os.path.join(dataset_path, dataset_mode, 'images', '*.zip'))
+            print('Find the data', img_zipp_list)
+            for img_zipp in img_zipp_list:
+                zip_file = zipfile.ZipFile(img_zipp)
+                zip_file.extractall(os.path.join(tmp_dir, dataset_mode, 'img'))
+            src_path_list = glob.glob(
+                os.path.join(tmp_dir, dataset_mode, 'img', 'images', '*.png'))
+
+            src_prog_bar = ProgressBar(len(src_path_list))
+            for i, img_path in enumerate(src_path_list):
+                if dataset_mode != 'test':
+                    slide_crop_image(img_path, out_dir, dataset_mode, patch_H,
+                                     patch_W, overlap)
+
+                else:
+                    shutil.move(img_path,
+                                os.path.join(out_dir, 'img_dir', dataset_mode))
+                src_prog_bar.update()
+
+            if dataset_mode != 'test':
+                label_zipp_list = glob.glob(
+                    os.path.join(dataset_path, dataset_mode, 'Semantic_masks',
+                                 '*.zip'))
+                for label_zipp in label_zipp_list:
+                    zip_file = zipfile.ZipFile(label_zipp)
+                    zip_file.extractall(
+                        os.path.join(tmp_dir, dataset_mode, 'lab'))
+
+                lab_path_list = glob.glob(
+                    os.path.join(tmp_dir, dataset_mode, 'lab', 'images',
+                                 '*.png'))
+                lab_prog_bar = ProgressBar(len(lab_path_list))
+                for i, lab_path in enumerate(lab_path_list):
+                    slide_crop_label(lab_path, out_dir, dataset_mode, patch_H,
+                                     patch_W, overlap)
+                    lab_prog_bar.update()
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/levircd.py
+++ b/finetune/tools/dataset_converters/levircd.py
@@ -0,0 +1,99 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert levir-cd dataset to mmsegmentation format')
+    parser.add_argument('--dataset_path', help='potsdam folder path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=256)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    input_folder = args.dataset_path
+    png_files = glob.glob(
+        os.path.join(input_folder, '**/*.png'), recursive=True)
+    output_folder = args.out_dir
+    prog_bar = ProgressBar(len(png_files))
+    for png_file in png_files:
+        new_path = os.path.join(
+            output_folder,
+            os.path.relpath(os.path.dirname(png_file), input_folder))
+        os.makedirs(os.path.dirname(new_path), exist_ok=True)
+        label = False
+        if 'label' in png_file:
+            label = True
+        clip_big_image(png_file, new_path, args, label)
+        prog_bar.update()
+
+
+def clip_big_image(image_path, clip_save_dir, args, to_label=False):
+    image = mmcv.imread(image_path)
+
+    h, w, c = image.shape
+    clip_size = args.clip_size
+    stride_size = args.stride_size
+
+    num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
+        (h - clip_size) /
+        stride_size) * stride_size + clip_size >= h else math.ceil(
+            (h - clip_size) / stride_size) + 1
+    num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
+        (w - clip_size) /
+        stride_size) * stride_size + clip_size >= w else math.ceil(
+            (w - clip_size) / stride_size) + 1
+
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * clip_size
+    ymin = y * clip_size
+
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
+                           np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
+                           np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + clip_size, w),
+        np.minimum(ymin + clip_size, h)
+    ],
+                     axis=1)
+
+    if to_label:
+        image[image == 255] = 1
+        image = image[:, :, 0]
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y, start_x:end_x] \
+            if to_label else image[start_y:end_y, start_x:end_x, :]
+        idx = osp.basename(image_path).split('.')[0]
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(clip_save_dir,
+                     f'{idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/loveda.py
+++ b/finetune/tools/dataset_converters/loveda.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert LoveDA dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='LoveDA folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'loveDA')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'img_dir'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+
+    assert 'Train.zip' in os.listdir(dataset_path), \
+        f'Train.zip is not in {dataset_path}'
+    assert 'Val.zip' in os.listdir(dataset_path), \
+        f'Val.zip is not in {dataset_path}'
+    assert 'Test.zip' in os.listdir(dataset_path), \
+        f'Test.zip is not in {dataset_path}'
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for dataset in ['Train', 'Val', 'Test']:
+            zip_file = zipfile.ZipFile(
+                os.path.join(dataset_path, dataset + '.zip'))
+            zip_file.extractall(tmp_dir)
+            data_type = dataset.lower()
+            for location in ['Rural', 'Urban']:
+                for image_type in ['images_png', 'masks_png']:
+                    if image_type == 'images_png':
+                        dst = osp.join(out_dir, 'img_dir', data_type)
+                    else:
+                        dst = osp.join(out_dir, 'ann_dir', data_type)
+                    if dataset == 'Test' and image_type == 'masks_png':
+                        continue
+                    else:
+                        src_dir = osp.join(tmp_dir, dataset, location,
+                                           image_type)
+                        src_lst = os.listdir(src_dir)
+                        for file in src_lst:
+                            shutil.move(osp.join(src_dir, file), dst)
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/nyu.py
+++ b/finetune/tools/dataset_converters/nyu.py
@@ -0,0 +1,89 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert NYU Depth dataset to mmsegmentation format')
+    parser.add_argument('raw_data', help='the path of raw data')
+    parser.add_argument(
+        '-o', '--out_dir', help='output path', default='./data/nyu')
+    args = parser.parse_args()
+    return args
+
+
+def reorganize(raw_data_dir: str, out_dir: str):
+    """Reorganize NYU Depth dataset files into the required directory
+    structure.
+
+    Args:
+        raw_data_dir (str): Path to the raw data directory.
+        out_dir (str): Output directory for the organized dataset.
+    """
+
+    def move_data(data_list, dst_prefix, fname_func):
+        """Move data files from source to destination directory.
+
+        Args:
+            data_list (list): List of data file paths.
+            dst_prefix (str): Prefix to be added to destination paths.
+            fname_func (callable): Function to process file names
+        """
+        for data_item in data_list:
+            data_item = data_item.strip().strip('/')
+            new_item = fname_func(data_item)
+            shutil.move(
+                osp.join(raw_data_dir, data_item),
+                osp.join(out_dir, dst_prefix, new_item))
+
+    def process_phase(phase):
+        """Process a dataset phase (e.g., 'train' or 'test')."""
+        with open(osp.join(raw_data_dir, f'nyu_{phase}.txt')) as f:
+            data = filter(lambda x: len(x.strip()) > 0, f.readlines())
+            data = map(lambda x: x.split()[:2], data)
+            images, annos = zip(*data)
+
+            move_data(images, f'images/{phase}',
+                      lambda x: x.replace('/rgb', ''))
+            move_data(annos, f'annotations/{phase}',
+                      lambda x: x.replace('/sync_depth', ''))
+
+    process_phase('train')
+    process_phase('test')
+
+
+def main():
+    args = parse_args()
+
+    print('Making directories...')
+    mkdir_or_exist(args.out_dir)
+    for subdir in [
+            'images/train', 'images/test', 'annotations/train',
+            'annotations/test'
+    ]:
+        mkdir_or_exist(osp.join(args.out_dir, subdir))
+
+    print('Generating images and annotations...')
+
+    if args.raw_data.endswith('.zip'):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            zip_file = zipfile.ZipFile(args.raw_data)
+            zip_file.extractall(tmp_dir)
+            reorganize(osp.join(tmp_dir, 'nyu'), args.out_dir)
+    else:
+        assert osp.isdir(
+            args.raw_data
+        ), 'the argument --raw-data should be either a zip file or directory.'
+        reorganize(args.raw_data, args.out_dir)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/pascal_context.py
+++ b/finetune/tools/dataset_converters/pascal_context.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import partial
+
+import numpy as np
+from detail import Detail
+from mmengine.utils import mkdir_or_exist, track_progress
+from PIL import Image
+
+_mapping = np.sort(
+    np.array([
+        0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
+        158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
+        440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
+        85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
+    ]))
+_key = np.array(range(len(_mapping))).astype('uint8')
+
+
+def generate_labels(img_id, detail, out_dir):
+
+    def _class_to_index(mask, _mapping, _key):
+        # assert the values
+        values = np.unique(mask)
+        for i in range(len(values)):
+            assert (values[i] in _mapping)
+        index = np.digitize(mask.ravel(), _mapping, right=True)
+        return _key[index].reshape(mask.shape)
+
+    mask = Image.fromarray(
+        _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
+    filename = img_id['file_name']
+    mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
+    return osp.splitext(osp.basename(filename))[0]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('json_path', help='annoation json filepath')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
+    else:
+        out_dir = args.out_dir
+    json_path = args.json_path
+    mkdir_or_exist(out_dir)
+    img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
+
+    train_detail = Detail(json_path, img_dir, 'train')
+    train_ids = train_detail.getImgs()
+
+    val_detail = Detail(json_path, img_dir, 'val')
+    val_ids = val_detail.getImgs()
+
+    mkdir_or_exist(
+        osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
+
+    train_list = track_progress(
+        partial(generate_labels, detail=train_detail, out_dir=out_dir),
+        train_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'train.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(train_list))
+
+    val_list = track_progress(
+        partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'val.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(val_list))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/potsdam.py
+++ b/finetune/tools/dataset_converters/potsdam.py
@@ -0,0 +1,158 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert potsdam dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='potsdam folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=512)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+
+
+def clip_big_image(image_path, clip_save_dir, args, to_label=False):
+    # Original image of Potsdam dataset is very large, thus pre-processing
+    # of them is adopted. Given fixed clip size and stride size to generate
+    # clipped image, the intersection　of width and height is determined.
+    # For example, given one 5120 x 5120 original image, the clip size is
+    # 512 and stride size is 256, thus it would generate 20x20 = 400 images
+    # whose size are all 512x512.
+    image = mmcv.imread(image_path)
+
+    h, w, c = image.shape
+    clip_size = args.clip_size
+    stride_size = args.stride_size
+
+    num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
+        (h - clip_size) /
+        stride_size) * stride_size + clip_size >= h else math.ceil(
+            (h - clip_size) / stride_size) + 1
+    num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
+        (w - clip_size) /
+        stride_size) * stride_size + clip_size >= w else math.ceil(
+            (w - clip_size) / stride_size) + 1
+
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * clip_size
+    ymin = y * clip_size
+
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
+                           np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
+                           np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + clip_size, w),
+        np.minimum(ymin + clip_size, h)
+    ],
+                     axis=1)
+
+    if to_label:
+        color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
+                              [255, 255, 0], [0, 255, 0], [0, 255, 255],
+                              [0, 0, 255]])
+        flatten_v = np.matmul(
+            image.reshape(-1, c),
+            np.array([2, 3, 4]).reshape(3, 1))
+        out = np.zeros_like(flatten_v)
+        for idx, class_color in enumerate(color_map):
+            value_idx = np.matmul(class_color,
+                                  np.array([2, 3, 4]).reshape(3, 1))
+            out[flatten_v == value_idx] = idx
+        image = out.reshape(h, w)
+
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y,
+                              start_x:end_x] if to_label else image[
+                                  start_y:end_y, start_x:end_x, :]
+        idx_i, idx_j = osp.basename(image_path).split('_')[2:4]
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(
+                clip_save_dir,
+                f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+
+
+def main():
+    args = parse_args()
+    splits = {
+        'train': [
+            '2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11',
+            '4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7',
+            '6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9'
+        ],
+        'val': [
+            '5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13',
+            '4_15', '2_14', '5_13', '4_13', '3_14', '7_13'
+        ]
+    }
+
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'potsdam')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+
+    zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
+    print('Find the data', zipp_list)
+
+    for zipp in zipp_list:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(zipp)
+            zip_file.extractall(tmp_dir)
+            src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
+            if not len(src_path_list):
+                sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0])
+                src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif'))
+
+            prog_bar = ProgressBar(len(src_path_list))
+            for i, src_path in enumerate(src_path_list):
+                idx_i, idx_j = osp.basename(src_path).split('_')[2:4]
+                data_type = 'train' if f'{idx_i}_{idx_j}' in splits[
+                    'train'] else 'val'
+                if 'label' in src_path:
+                    dst_dir = osp.join(out_dir, 'ann_dir', data_type)
+                    clip_big_image(src_path, dst_dir, args, to_label=True)
+                else:
+                    dst_dir = osp.join(out_dir, 'img_dir', data_type)
+                    clip_big_image(src_path, dst_dir, args, to_label=False)
+                prog_bar.update()
+
+    print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/refuge.py
+++ b/finetune/tools/dataset_converters/refuge.py
@@ -0,0 +1,110 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert REFUGE dataset to mmsegmentation format')
+    parser.add_argument('--raw_data_root', help='the root path of raw data')
+
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def extract_img(root: str,
+                cur_dir: str,
+                out_dir: str,
+                mode: str = 'train',
+                file_type: str = 'img') -> None:
+    """_summary_
+
+    Args:
+       Args:
+        root (str): root where the extracted data is saved
+        cur_dir (cur_dir): dir where the zip_file exists
+        out_dir (str): root dir where the data is saved
+
+        mode (str, optional): Defaults to 'train'.
+        file_type (str, optional): Defaults to 'img',else to 'mask'.
+    """
+    zip_file = zipfile.ZipFile(cur_dir)
+    zip_file.extractall(root)
+    for cur_dir, dirs, files in os.walk(root):
+        # filter child dirs and directories with "Illustration" and "MACOSX"
+        if len(dirs) == 0 and \
+                cur_dir.split('\\')[-1].find('Illustration') == -1 and \
+                cur_dir.find('MACOSX') == -1:
+
+            file_names = [
+                file for file in files
+                if file.endswith('.jpg') or file.endswith('.bmp')
+            ]
+            for filename in sorted(file_names):
+                img = mmcv.imread(osp.join(cur_dir, filename))
+
+                if file_type == 'annotations':
+                    img = img[:, :, 0]
+                    img[np.where(img == 0)] = 1
+                    img[np.where(img == 128)] = 2
+                    img[np.where(img == 255)] = 0
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, file_type, mode,
+                             osp.splitext(filename)[0] + '.png'))
+
+
+def main():
+    args = parse_args()
+
+    raw_data_root = args.raw_data_root
+    if args.out_dir is None:
+        out_dir = osp.join('./data', 'REFUGE')
+
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'test'))
+
+    print('Generating images and annotations...')
+    # process data from the child dir on the first rank
+    cur_dir, dirs, files = list(os.walk(raw_data_root))[0]
+    print('====================')
+
+    files = list(filter(lambda x: x.endswith('.zip'), files))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for file in files:
+            # search data folders for training,validation,test
+            mode = list(
+                filter(lambda x: file.lower().find(x) != -1,
+                       ['training', 'test', 'validation']))[0]
+            file_root = osp.join(tmp_dir, file[:-4])
+            file_type = 'images' if file.find('Anno') == -1 and file.find(
+                'GT') == -1 else 'annotations'
+            extract_img(file_root, osp.join(cur_dir, file), out_dir, mode,
+                        file_type)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/stare.py
+++ b/finetune/tools/dataset_converters/stare.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import gzip
+import os
+import os.path as osp
+import tarfile
+import tempfile
+
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+STARE_LEN = 20
+TRAINING_LEN = 10
+
+
+def un_gz(src, dst):
+    g_file = gzip.GzipFile(src)
+    with open(dst, 'wb+') as f:
+        f.write(g_file.read())
+    g_file.close()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert STARE dataset to mmsegmentation format')
+    parser.add_argument('image_path', help='the path of stare-images.tar')
+    parser.add_argument('labels_ah', help='the path of labels-ah.tar')
+    parser.add_argument('labels_vk', help='the path of labels-vk.tar')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    image_path = args.image_path
+    labels_ah = args.labels_ah
+    labels_vk = args.labels_vk
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'STARE')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting stare-images.tar...')
+        with tarfile.open(image_path) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting labels-ah.tar...')
+        with tarfile.open(labels_ah) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            # The annotation img should be divided by 128, because some of
+            # the annotation imgs are not standard. We should set a threshold
+            # to convert the nonstandard annotation imgs. The value divided by
+            # 128 equivalent to '1 if value >= 128 else 0'
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting labels-vk.tar...')
+        with tarfile.open(labels_vk) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/synapse.py
+++ b/finetune/tools/dataset_converters/synapse.py
@@ -0,0 +1,155 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+import nibabel as nib
+import numpy as np
+from mmengine.utils import mkdir_or_exist
+from PIL import Image
+
+
+def read_files_from_txt(txt_path):
+    with open(txt_path) as f:
+        files = f.readlines()
+    files = [file.strip() for file in files]
+    return files
+
+
+def read_nii_file(nii_path):
+    img = nib.load(nii_path).get_fdata()
+    return img
+
+
+def split_3d_image(img):
+    c, _, _ = img.shape
+    res = []
+    for i in range(c):
+        res.append(img[i, :, :])
+    return res
+
+
+def label_mapping(label):
+    """Label mapping from TransUNet paper setting. It only has 9 classes, which
+    are 'background', 'aorta', 'gallbladder', 'left_kidney', 'right_kidney',
+    'liver', 'pancreas', 'spleen', 'stomach', respectively. Other foreground
+    classes in original dataset are all set to background.
+
+    More details could be found here: https://arxiv.org/abs/2102.04306
+    """
+    maped_label = np.zeros_like(label)
+    maped_label[label == 8] = 1
+    maped_label[label == 4] = 2
+    maped_label[label == 3] = 3
+    maped_label[label == 2] = 4
+    maped_label[label == 6] = 5
+    maped_label[label == 11] = 6
+    maped_label[label == 1] = 7
+    maped_label[label == 7] = 8
+    return maped_label
+
+
+def pares_args():
+    parser = argparse.ArgumentParser(
+        description='Convert synapse dataset to mmsegmentation format')
+    parser.add_argument(
+        '--dataset-path', type=str, help='synapse dataset path.')
+    parser.add_argument(
+        '--save-path',
+        default='data/synapse',
+        type=str,
+        help='save path of the dataset.')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = pares_args()
+    dataset_path = args.dataset_path
+    save_path = args.save_path
+
+    if not osp.exists(dataset_path):
+        raise ValueError('The dataset path does not exist. '
+                         'Please enter a correct dataset path.')
+    if not osp.exists(osp.join(dataset_path, 'img')) \
+            or not osp.exists(osp.join(dataset_path, 'label')):
+        raise FileNotFoundError('The dataset structure is incorrect. '
+                                'Please check your dataset.')
+
+    train_id = read_files_from_txt(osp.join(dataset_path, 'train.txt'))
+    train_id = [idx[3:7] for idx in train_id]
+
+    test_id = read_files_from_txt(osp.join(dataset_path, 'val.txt'))
+    test_id = [idx[3:7] for idx in test_id]
+
+    mkdir_or_exist(osp.join(save_path, 'img_dir/train'))
+    mkdir_or_exist(osp.join(save_path, 'img_dir/val'))
+    mkdir_or_exist(osp.join(save_path, 'ann_dir/train'))
+    mkdir_or_exist(osp.join(save_path, 'ann_dir/val'))
+
+    # It follows data preparation pipeline from here:
+    # https://github.com/Beckschen/TransUNet/tree/main/datasets
+    for i, idx in enumerate(train_id):
+        img_3d = read_nii_file(
+            osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
+        label_3d = read_nii_file(
+            osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
+
+        img_3d = np.clip(img_3d, -125, 275)
+        img_3d = (img_3d + 125) / 400
+        img_3d *= 255
+        img_3d = np.transpose(img_3d, [2, 0, 1])
+        img_3d = np.flip(img_3d, 2)
+
+        label_3d = np.transpose(label_3d, [2, 0, 1])
+        label_3d = np.flip(label_3d, 2)
+        label_3d = label_mapping(label_3d)
+
+        for c in range(img_3d.shape[0]):
+            img = img_3d[c]
+            label = label_3d[c]
+
+            img = Image.fromarray(img).convert('RGB')
+            label = Image.fromarray(label).convert('L')
+            img.save(
+                osp.join(
+                    save_path, 'img_dir/train', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.jpg'))
+            label.save(
+                osp.join(
+                    save_path, 'ann_dir/train', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.png'))
+
+    for i, idx in enumerate(test_id):
+        img_3d = read_nii_file(
+            osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
+        label_3d = read_nii_file(
+            osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
+
+        img_3d = np.clip(img_3d, -125, 275)
+        img_3d = (img_3d + 125) / 400
+        img_3d *= 255
+        img_3d = np.transpose(img_3d, [2, 0, 1])
+        img_3d = np.flip(img_3d, 2)
+
+        label_3d = np.transpose(label_3d, [2, 0, 1])
+        label_3d = np.flip(label_3d, 2)
+        label_3d = label_mapping(label_3d)
+
+        for c in range(img_3d.shape[0]):
+            img = img_3d[c]
+            label = label_3d[c]
+
+            img = Image.fromarray(img).convert('RGB')
+            label = Image.fromarray(label).convert('L')
+            img.save(
+                osp.join(
+                    save_path, 'img_dir/val', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.jpg'))
+            label.save(
+                osp.join(
+                    save_path, 'ann_dir/val', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.png'))
+
+
+if __name__ == '__main__':
+    main()
--- a/finetune/tools/dataset_converters/vaihingen.py
+++ b/finetune/tools/dataset_converters/vaihingen.py
@@ -0,0 +1,156 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert vaihingen dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='vaihingen folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=512)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+
+
+def clip_big_image(image_path, clip_save_dir, to_label=False):
+    # Original image of Vaihingen dataset is very large, thus pre-processing
+    # of them is adopted. Given fixed clip size and stride size to generate
+    # clipped image, the intersection　of width and height is determined.
+    # For example, given one 5120 x 5120 original image, the clip size is
+    # 512 and stride size is 256, thus it would generate 20x20 = 400 images
+    # whose size are all 512x512.
+    image = mmcv.imread(image_path)
+
+    h, w, c = image.shape
+    cs = args.clip_size
+    ss = args.stride_size
+
+    num_rows = math.ceil((h - cs) / ss) if math.ceil(
+        (h - cs) / ss) * ss + cs >= h else math.ceil((h - cs) / ss) + 1
+    num_cols = math.ceil((w - cs) / ss) if math.ceil(
+        (w - cs) / ss) * ss + cs >= w else math.ceil((w - cs) / ss) + 1
+
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * cs
+    ymin = y * cs
+
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + cs > w, w - xmin - cs, np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + cs > h, h - ymin - cs, np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + cs, w),
+        np.minimum(ymin + cs, h)
+    ],
+                     axis=1)
+
+    if to_label:
+        color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
+                              [255, 255, 0], [0, 255, 0], [0, 255, 255],
+                              [0, 0, 255]])
+        flatten_v = np.matmul(
+            image.reshape(-1, c),
+            np.array([2, 3, 4]).reshape(3, 1))
+        out = np.zeros_like(flatten_v)
+        for idx, class_color in enumerate(color_map):
+            value_idx = np.matmul(class_color,
+                                  np.array([2, 3, 4]).reshape(3, 1))
+            out[flatten_v == value_idx] = idx
+        image = out.reshape(h, w)
+
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y,
+                              start_x:end_x] if to_label else image[
+                                  start_y:end_y, start_x:end_x, :]
+        area_idx = osp.basename(image_path).split('_')[3].strip('.tif')
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(clip_save_dir,
+                     f'{area_idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+
+
+def main():
+    splits = {
+        'train': [
+            'area1', 'area11', 'area13', 'area15', 'area17', 'area21',
+            'area23', 'area26', 'area28', 'area3', 'area30', 'area32',
+            'area34', 'area37', 'area5', 'area7'
+        ],
+        'val': [
+            'area6', 'area24', 'area35', 'area16', 'area14', 'area22',
+            'area10', 'area4', 'area2', 'area20', 'area8', 'area31', 'area33',
+            'area27', 'area38', 'area12', 'area29'
+        ],
+    }
+
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'vaihingen')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+
+    zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
+    print('Find the data', zipp_list)
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for zipp in zipp_list:
+            zip_file = zipfile.ZipFile(zipp)
+            zip_file.extractall(tmp_dir)
+            src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
+            if 'ISPRS_semantic_labeling_Vaihingen' in zipp:
+                src_path_list = glob.glob(
+                    os.path.join(os.path.join(tmp_dir, 'top'), '*.tif'))
+            if 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE' in zipp:  # noqa
+                src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
+                # delete unused area9 ground truth
+                for area_ann in src_path_list:
+                    if 'area9' in area_ann:
+                        src_path_list.remove(area_ann)
+            prog_bar = ProgressBar(len(src_path_list))
+            for i, src_path in enumerate(src_path_list):
+                area_idx = osp.basename(src_path).split('_')[3].strip('.tif')
+                data_type = 'train' if area_idx in splits['train'] else 'val'
+                if 'noBoundary' in src_path:
+                    dst_dir = osp.join(out_dir, 'ann_dir', data_type)
+                    clip_big_image(src_path, dst_dir, to_label=True)
+                else:
+                    dst_dir = osp.join(out_dir, 'img_dir', data_type)
+                    clip_big_image(src_path, dst_dir, to_label=False)
+                prog_bar.update()
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main()
--- a/finetune/tools/dataset_converters/voc_aug.py
+++ b/finetune/tools/dataset_converters/voc_aug.py
@@ -0,0 +1,92 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import partial
+
+import numpy as np
+from mmengine.utils import mkdir_or_exist, scandir, track_parallel_progress
+from PIL import Image
+from scipy.io import loadmat
+
+AUG_LEN = 10582
+
+
+def convert_mat(mat_file, in_dir, out_dir):
+    data = loadmat(osp.join(in_dir, mat_file))
+    mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+    seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
+    Image.fromarray(mask).save(seg_filename, 'PNG')
+
+
+def generate_aug_list(merged_list, excluded_list):
+    return list(set(merged_list) - set(excluded_list))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('aug_path', help='pascal voc aug path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    aug_path = args.aug_path
+    nproc = args.nproc
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
+    else:
+        out_dir = args.out_dir
+    mkdir_or_exist(out_dir)
+    in_dir = osp.join(aug_path, 'dataset', 'cls')
+
+    track_parallel_progress(
+        partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
+        list(scandir(in_dir, suffix='.mat')),
+        nproc=nproc)
+
+    full_aug_list = []
+    with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+    with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'train.txt')) as f:
+        ori_train_list = [line.strip() for line in f]
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'val.txt')) as f:
+        val_list = [line.strip() for line in f]
+
+    aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
+                                       val_list)
+    assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
+        AUG_LEN)
+
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'trainaug.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in aug_train_list)
+
+    aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
+    assert len(aug_list) == AUG_LEN - len(
+        ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
+                                                      len(ori_train_list))
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
+            'w') as f:
+        f.writelines(line + '\n' for line in aug_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()