This commit is contained in:
esenke
2025-12-08 22:16:31 +08:00
commit 01adcfdf60
305 changed files with 50879 additions and 0 deletions

View File

@@ -0,0 +1,89 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
from mmengine.utils import mkdir_or_exist
CHASE_DB1_LEN = 28 * 3
TRAINING_LEN = 60
def parse_args():
parser = argparse.ArgumentParser(
description='Convert CHASE_DB1 dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join('data', 'CHASE_DB1')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, 'images'))
mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mkdir_or_exist(osp.join(out_dir, 'annotations'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
print('Extracting CHASEDB1.zip...')
zip_file = zipfile.ZipFile(dataset_path)
zip_file.extractall(tmp_dir)
print('Generating training dataset...')
assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
f'len(os.listdir(tmp_dir)) != {CHASE_DB1_LEN}'
for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, img_name))
if osp.splitext(img_name)[1] == '.jpg':
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'training',
osp.splitext(img_name)[0] + '.png'))
else:
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a
# threshold to convert the nonstandard annotation imgs. The
# value divided by 128 is equivalent to '1 if value >= 128
# else 0'
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(img_name)[0] + '.png'))
for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(tmp_dir, img_name))
if osp.splitext(img_name)[1] == '.jpg':
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'validation',
osp.splitext(img_name)[0] + '.png'))
else:
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(img_name)[0] + '.png'))
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,56 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
from cityscapesscripts.preparation.json2labelImg import json2labelImg
from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress,
track_progress)
def convert_json_to_label(json_file):
label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
json2labelImg(json_file, label_file, 'trainIds')
def parse_args():
parser = argparse.ArgumentParser(
description='Convert Cityscapes annotations to TrainIds')
parser.add_argument('cityscapes_path', help='cityscapes data path')
parser.add_argument('--gt-dir', default='gtFine', type=str)
parser.add_argument('-o', '--out-dir', help='output path')
parser.add_argument(
'--nproc', default=1, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
cityscapes_path = args.cityscapes_path
out_dir = args.out_dir if args.out_dir else cityscapes_path
mkdir_or_exist(out_dir)
gt_dir = osp.join(cityscapes_path, args.gt_dir)
poly_files = []
for poly in scandir(gt_dir, '_polygons.json', recursive=True):
poly_file = osp.join(gt_dir, poly)
poly_files.append(poly_file)
if args.nproc > 1:
track_parallel_progress(convert_json_to_label, poly_files, args.nproc)
else:
track_progress(convert_json_to_label, poly_files)
split_names = ['train', 'val', 'test']
for split in split_names:
filenames = []
for poly in scandir(
osp.join(gt_dir, split), '_polygons.json', recursive=True):
filenames.append(poly.replace('_gtFine_polygons.json', ''))
with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
f.writelines(f + '\n' for f in filenames)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,308 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import shutil
from functools import partial
import numpy as np
from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
track_progress)
from PIL import Image
from scipy.io import loadmat
COCO_LEN = 10000
clsID_to_trID = {
0: 0,
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
13: 12,
14: 13,
15: 14,
16: 15,
17: 16,
18: 17,
19: 18,
20: 19,
21: 20,
22: 21,
23: 22,
24: 23,
25: 24,
27: 25,
28: 26,
31: 27,
32: 28,
33: 29,
34: 30,
35: 31,
36: 32,
37: 33,
38: 34,
39: 35,
40: 36,
41: 37,
42: 38,
43: 39,
44: 40,
46: 41,
47: 42,
48: 43,
49: 44,
50: 45,
51: 46,
52: 47,
53: 48,
54: 49,
55: 50,
56: 51,
57: 52,
58: 53,
59: 54,
60: 55,
61: 56,
62: 57,
63: 58,
64: 59,
65: 60,
67: 61,
70: 62,
72: 63,
73: 64,
74: 65,
75: 66,
76: 67,
77: 68,
78: 69,
79: 70,
80: 71,
81: 72,
82: 73,
84: 74,
85: 75,
86: 76,
87: 77,
88: 78,
89: 79,
90: 80,
92: 81,
93: 82,
94: 83,
95: 84,
96: 85,
97: 86,
98: 87,
99: 88,
100: 89,
101: 90,
102: 91,
103: 92,
104: 93,
105: 94,
106: 95,
107: 96,
108: 97,
109: 98,
110: 99,
111: 100,
112: 101,
113: 102,
114: 103,
115: 104,
116: 105,
117: 106,
118: 107,
119: 108,
120: 109,
121: 110,
122: 111,
123: 112,
124: 113,
125: 114,
126: 115,
127: 116,
128: 117,
129: 118,
130: 119,
131: 120,
132: 121,
133: 122,
134: 123,
135: 124,
136: 125,
137: 126,
138: 127,
139: 128,
140: 129,
141: 130,
142: 131,
143: 132,
144: 133,
145: 134,
146: 135,
147: 136,
148: 137,
149: 138,
150: 139,
151: 140,
152: 141,
153: 142,
154: 143,
155: 144,
156: 145,
157: 146,
158: 147,
159: 148,
160: 149,
161: 150,
162: 151,
163: 152,
164: 153,
165: 154,
166: 155,
167: 156,
168: 157,
169: 158,
170: 159,
171: 160,
172: 161,
173: 162,
174: 163,
175: 164,
176: 165,
177: 166,
178: 167,
179: 168,
180: 169,
181: 170,
182: 171
}
def convert_to_trainID(tuple_path, in_img_dir, in_ann_dir, out_img_dir,
out_mask_dir, is_train):
imgpath, maskpath = tuple_path
shutil.copyfile(
osp.join(in_img_dir, imgpath),
osp.join(out_img_dir, 'train2014', imgpath) if is_train else osp.join(
out_img_dir, 'test2014', imgpath))
annotate = loadmat(osp.join(in_ann_dir, maskpath))
mask = annotate['S'].astype(np.uint8)
mask_copy = mask.copy()
for clsID, trID in clsID_to_trID.items():
mask_copy[mask == clsID] = trID
seg_filename = osp.join(out_mask_dir, 'train2014',
maskpath.split('.')[0] +
'_labelTrainIds.png') if is_train else osp.join(
out_mask_dir, 'test2014',
maskpath.split('.')[0] + '_labelTrainIds.png')
Image.fromarray(mask_copy).save(seg_filename, 'PNG')
def generate_coco_list(folder):
train_list = osp.join(folder, 'imageLists', 'train.txt')
test_list = osp.join(folder, 'imageLists', 'test.txt')
train_paths = []
test_paths = []
with open(train_list) as f:
for filename in f:
basename = filename.strip()
imgpath = basename + '.jpg'
maskpath = basename + '.mat'
train_paths.append((imgpath, maskpath))
with open(test_list) as f:
for filename in f:
basename = filename.strip()
imgpath = basename + '.jpg'
maskpath = basename + '.mat'
test_paths.append((imgpath, maskpath))
return train_paths, test_paths
def parse_args():
parser = argparse.ArgumentParser(
description=\
'Convert COCO Stuff 10k annotations to mmsegmentation format') # noqa
parser.add_argument('coco_path', help='coco stuff path')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--nproc', default=16, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
coco_path = args.coco_path
nproc = args.nproc
out_dir = args.out_dir or coco_path
out_img_dir = osp.join(out_dir, 'images')
out_mask_dir = osp.join(out_dir, 'annotations')
mkdir_or_exist(osp.join(out_img_dir, 'train2014'))
mkdir_or_exist(osp.join(out_img_dir, 'test2014'))
mkdir_or_exist(osp.join(out_mask_dir, 'train2014'))
mkdir_or_exist(osp.join(out_mask_dir, 'test2014'))
train_list, test_list = generate_coco_list(coco_path)
assert (len(train_list) +
len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
len(train_list), len(test_list))
if args.nproc > 1:
track_parallel_progress(
partial(
convert_to_trainID,
in_img_dir=osp.join(coco_path, 'images'),
in_ann_dir=osp.join(coco_path, 'annotations'),
out_img_dir=out_img_dir,
out_mask_dir=out_mask_dir,
is_train=True),
train_list,
nproc=nproc)
track_parallel_progress(
partial(
convert_to_trainID,
in_img_dir=osp.join(coco_path, 'images'),
in_ann_dir=osp.join(coco_path, 'annotations'),
out_img_dir=out_img_dir,
out_mask_dir=out_mask_dir,
is_train=False),
test_list,
nproc=nproc)
else:
track_progress(
partial(
convert_to_trainID,
in_img_dir=osp.join(coco_path, 'images'),
in_ann_dir=osp.join(coco_path, 'annotations'),
out_img_dir=out_img_dir,
out_mask_dir=out_mask_dir,
is_train=True), train_list)
track_progress(
partial(
convert_to_trainID,
in_img_dir=osp.join(coco_path, 'images'),
in_ann_dir=osp.join(coco_path, 'annotations'),
out_img_dir=out_img_dir,
out_mask_dir=out_mask_dir,
is_train=False), test_list)
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,265 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import shutil
from functools import partial
from glob import glob
import numpy as np
from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
track_progress)
from PIL import Image
COCO_LEN = 123287
clsID_to_trID = {
0: 0,
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
12: 11,
13: 12,
14: 13,
15: 14,
16: 15,
17: 16,
18: 17,
19: 18,
20: 19,
21: 20,
22: 21,
23: 22,
24: 23,
26: 24,
27: 25,
30: 26,
31: 27,
32: 28,
33: 29,
34: 30,
35: 31,
36: 32,
37: 33,
38: 34,
39: 35,
40: 36,
41: 37,
42: 38,
43: 39,
45: 40,
46: 41,
47: 42,
48: 43,
49: 44,
50: 45,
51: 46,
52: 47,
53: 48,
54: 49,
55: 50,
56: 51,
57: 52,
58: 53,
59: 54,
60: 55,
61: 56,
62: 57,
63: 58,
64: 59,
66: 60,
69: 61,
71: 62,
72: 63,
73: 64,
74: 65,
75: 66,
76: 67,
77: 68,
78: 69,
79: 70,
80: 71,
81: 72,
83: 73,
84: 74,
85: 75,
86: 76,
87: 77,
88: 78,
89: 79,
91: 80,
92: 81,
93: 82,
94: 83,
95: 84,
96: 85,
97: 86,
98: 87,
99: 88,
100: 89,
101: 90,
102: 91,
103: 92,
104: 93,
105: 94,
106: 95,
107: 96,
108: 97,
109: 98,
110: 99,
111: 100,
112: 101,
113: 102,
114: 103,
115: 104,
116: 105,
117: 106,
118: 107,
119: 108,
120: 109,
121: 110,
122: 111,
123: 112,
124: 113,
125: 114,
126: 115,
127: 116,
128: 117,
129: 118,
130: 119,
131: 120,
132: 121,
133: 122,
134: 123,
135: 124,
136: 125,
137: 126,
138: 127,
139: 128,
140: 129,
141: 130,
142: 131,
143: 132,
144: 133,
145: 134,
146: 135,
147: 136,
148: 137,
149: 138,
150: 139,
151: 140,
152: 141,
153: 142,
154: 143,
155: 144,
156: 145,
157: 146,
158: 147,
159: 148,
160: 149,
161: 150,
162: 151,
163: 152,
164: 153,
165: 154,
166: 155,
167: 156,
168: 157,
169: 158,
170: 159,
171: 160,
172: 161,
173: 162,
174: 163,
175: 164,
176: 165,
177: 166,
178: 167,
179: 168,
180: 169,
181: 170,
255: 255
}
def convert_to_trainID(maskpath, out_mask_dir, is_train):
mask = np.array(Image.open(maskpath))
mask_copy = mask.copy()
for clsID, trID in clsID_to_trID.items():
mask_copy[mask == clsID] = trID
seg_filename = osp.join(
out_mask_dir, 'train2017',
osp.basename(maskpath).split('.')[0] +
'_labelTrainIds.png') if is_train else osp.join(
out_mask_dir, 'val2017',
osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png')
Image.fromarray(mask_copy).save(seg_filename, 'PNG')
def parse_args():
parser = argparse.ArgumentParser(
description=\
'Convert COCO Stuff 164k annotations to mmsegmentation format') # noqa
parser.add_argument('coco_path', help='coco stuff path')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--nproc', default=16, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
coco_path = args.coco_path
nproc = args.nproc
out_dir = args.out_dir or coco_path
out_img_dir = osp.join(out_dir, 'images')
out_mask_dir = osp.join(out_dir, 'annotations')
mkdir_or_exist(osp.join(out_mask_dir, 'train2017'))
mkdir_or_exist(osp.join(out_mask_dir, 'val2017'))
if out_dir != coco_path:
shutil.copytree(osp.join(coco_path, 'images'), out_img_dir)
train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png'))
train_list = [file for file in train_list if '_labelTrainIds' not in file]
test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png'))
test_list = [file for file in test_list if '_labelTrainIds' not in file]
assert (len(train_list) +
len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
len(train_list), len(test_list))
if args.nproc > 1:
track_parallel_progress(
partial(
convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
train_list,
nproc=nproc)
track_parallel_progress(
partial(
convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
test_list,
nproc=nproc)
else:
track_progress(
partial(
convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
train_list)
track_progress(
partial(
convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
test_list)
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,114 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import tempfile
import zipfile
import cv2
import mmcv
from mmengine.utils import mkdir_or_exist
def parse_args():
parser = argparse.ArgumentParser(
description='Convert DRIVE dataset to mmsegmentation format')
parser.add_argument(
'training_path', help='the training part of DRIVE dataset')
parser.add_argument(
'testing_path', help='the testing part of DRIVE dataset')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
training_path = args.training_path
testing_path = args.testing_path
if args.out_dir is None:
out_dir = osp.join('data', 'DRIVE')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, 'images'))
mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mkdir_or_exist(osp.join(out_dir, 'annotations'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
print('Extracting training.zip...')
zip_file = zipfile.ZipFile(training_path)
zip_file.extractall(tmp_dir)
print('Generating training dataset...')
now_dir = osp.join(tmp_dir, 'training', 'images')
for img_name in os.listdir(now_dir):
img = mmcv.imread(osp.join(now_dir, img_name))
mmcv.imwrite(
img,
osp.join(
out_dir, 'images', 'training',
osp.splitext(img_name)[0].replace('_training', '') +
'.png'))
now_dir = osp.join(tmp_dir, 'training', '1st_manual')
for img_name in os.listdir(now_dir):
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
ret, img = cap.read()
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(img_name)[0] + '.png'))
print('Extracting test.zip...')
zip_file = zipfile.ZipFile(testing_path)
zip_file.extractall(tmp_dir)
print('Generating validation dataset...')
now_dir = osp.join(tmp_dir, 'test', 'images')
for img_name in os.listdir(now_dir):
img = mmcv.imread(osp.join(now_dir, img_name))
mmcv.imwrite(
img,
osp.join(
out_dir, 'images', 'validation',
osp.splitext(img_name)[0].replace('_test', '') + '.png'))
now_dir = osp.join(tmp_dir, 'test', '1st_manual')
if osp.exists(now_dir):
for img_name in os.listdir(now_dir):
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
ret, img = cap.read()
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a
# threshold to convert the nonstandard annotation imgs. The
# value divided by 128 is equivalent to '1 if value >= 128
# else 0'
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(img_name)[0] + '.png'))
now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
if osp.exists(now_dir):
for img_name in os.listdir(now_dir):
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
ret, img = cap.read()
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(img_name)[0] + '.png'))
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,112 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
from mmengine.utils import mkdir_or_exist
HRF_LEN = 15
TRAINING_LEN = 5
def parse_args():
parser = argparse.ArgumentParser(
description='Convert HRF dataset to mmsegmentation format')
parser.add_argument('healthy_path', help='the path of healthy.zip')
parser.add_argument(
'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
parser.add_argument(
'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
parser.add_argument(
'diabetic_retinopathy_path',
help='the path of diabetic_retinopathy.zip')
parser.add_argument(
'diabetic_retinopathy_manualsegm_path',
help='the path of diabetic_retinopathy_manualsegm.zip')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
images_path = [
args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
]
annotations_path = [
args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
args.diabetic_retinopathy_manualsegm_path
]
if args.out_dir is None:
out_dir = osp.join('data', 'HRF')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, 'images'))
mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mkdir_or_exist(osp.join(out_dir, 'annotations'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
print('Generating images...')
for now_path in images_path:
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
zip_file = zipfile.ZipFile(now_path)
zip_file.extractall(tmp_dir)
assert len(os.listdir(tmp_dir)) == HRF_LEN, \
f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, filename))
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'training',
osp.splitext(filename)[0] + '.png'))
for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(tmp_dir, filename))
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'validation',
osp.splitext(filename)[0] + '.png'))
print('Generating annotations...')
for now_path in annotations_path:
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
zip_file = zipfile.ZipFile(now_path)
zip_file.extractall(tmp_dir)
assert len(os.listdir(tmp_dir)) == HRF_LEN, \
f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, filename))
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a
# threshold to convert the nonstandard annotation imgs. The
# value divided by 128 is equivalent to '1 if value >= 128
# else 0'
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(filename)[0] + '.png'))
for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(tmp_dir, filename))
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(filename)[0] + '.png'))
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,246 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os
import os.path as osp
import shutil
import tempfile
import zipfile
import mmcv
import numpy as np
from mmengine.utils import ProgressBar, mkdir_or_exist
from PIL import Image
iSAID_palette = \
{
0: (0, 0, 0),
1: (0, 0, 63),
2: (0, 63, 63),
3: (0, 63, 0),
4: (0, 63, 127),
5: (0, 63, 191),
6: (0, 63, 255),
7: (0, 127, 63),
8: (0, 127, 127),
9: (0, 0, 127),
10: (0, 0, 191),
11: (0, 0, 255),
12: (0, 191, 127),
13: (0, 127, 191),
14: (0, 127, 255),
15: (0, 100, 155)
}
iSAID_invert_palette = {v: k for k, v in iSAID_palette.items()}
def iSAID_convert_from_color(arr_3d, palette=iSAID_invert_palette):
"""RGB-color encoding to grayscale labels."""
arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
for c, i in palette.items():
m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
arr_2d[m] = i
return arr_2d
def slide_crop_image(src_path, out_dir, mode, patch_H, patch_W, overlap):
img = np.asarray(Image.open(src_path).convert('RGB'))
img_H, img_W, _ = img.shape
if img_H < patch_H and img_W > patch_W:
img = mmcv.impad(img, shape=(patch_H, img_W), pad_val=0)
img_H, img_W, _ = img.shape
elif img_H > patch_H and img_W < patch_W:
img = mmcv.impad(img, shape=(img_H, patch_W), pad_val=0)
img_H, img_W, _ = img.shape
elif img_H < patch_H and img_W < patch_W:
img = mmcv.impad(img, shape=(patch_H, patch_W), pad_val=0)
img_H, img_W, _ = img.shape
for x in range(0, img_W, patch_W - overlap):
for y in range(0, img_H, patch_H - overlap):
x_str = x
x_end = x + patch_W
if x_end > img_W:
diff_x = x_end - img_W
x_str -= diff_x
x_end = img_W
y_str = y
y_end = y + patch_H
if y_end > img_H:
diff_y = y_end - img_H
y_str -= diff_y
y_end = img_H
img_patch = img[y_str:y_end, x_str:x_end, :]
img_patch = Image.fromarray(img_patch.astype(np.uint8))
image = osp.basename(src_path).split('.')[0] + '_' + str(
y_str) + '_' + str(y_end) + '_' + str(x_str) + '_' + str(
x_end) + '.png'
# print(image)
save_path_image = osp.join(out_dir, 'img_dir', mode, str(image))
img_patch.save(save_path_image, format='BMP')
def slide_crop_label(src_path, out_dir, mode, patch_H, patch_W, overlap):
label = mmcv.imread(src_path, channel_order='rgb')
label = iSAID_convert_from_color(label)
img_H, img_W = label.shape
if img_H < patch_H and img_W > patch_W:
label = mmcv.impad(label, shape=(patch_H, img_W), pad_val=255)
img_H = patch_H
elif img_H > patch_H and img_W < patch_W:
label = mmcv.impad(label, shape=(img_H, patch_W), pad_val=255)
img_W = patch_W
elif img_H < patch_H and img_W < patch_W:
label = mmcv.impad(label, shape=(patch_H, patch_W), pad_val=255)
img_H = patch_H
img_W = patch_W
for x in range(0, img_W, patch_W - overlap):
for y in range(0, img_H, patch_H - overlap):
x_str = x
x_end = x + patch_W
if x_end > img_W:
diff_x = x_end - img_W
x_str -= diff_x
x_end = img_W
y_str = y
y_end = y + patch_H
if y_end > img_H:
diff_y = y_end - img_H
y_str -= diff_y
y_end = img_H
lab_patch = label[y_str:y_end, x_str:x_end]
lab_patch = Image.fromarray(lab_patch.astype(np.uint8), mode='P')
image = osp.basename(src_path).split('.')[0].split(
'_')[0] + '_' + str(y_str) + '_' + str(y_end) + '_' + str(
x_str) + '_' + str(x_end) + '_instance_color_RGB' + '.png'
lab_patch.save(osp.join(out_dir, 'ann_dir', mode, str(image)))
def parse_args():
parser = argparse.ArgumentParser(
description='Convert iSAID dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='iSAID folder path')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--patch_width',
default=896,
type=int,
help='Width of the cropped image patch')
parser.add_argument(
'--patch_height',
default=896,
type=int,
help='Height of the cropped image patch')
parser.add_argument(
'--overlap_area', default=384, type=int, help='Overlap area')
args = parser.parse_args()
return args
def main():
args = parse_args()
dataset_path = args.dataset_path
# image patch width and height
patch_H, patch_W = args.patch_width, args.patch_height
overlap = args.overlap_area # overlap area
if args.out_dir is None:
out_dir = osp.join('data', 'iSAID')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test'))
assert os.path.exists(os.path.join(dataset_path, 'train')), \
f'train is not in {dataset_path}'
assert os.path.exists(os.path.join(dataset_path, 'val')), \
f'val is not in {dataset_path}'
assert os.path.exists(os.path.join(dataset_path, 'test')), \
f'test is not in {dataset_path}'
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for dataset_mode in ['train', 'val', 'test']:
# for dataset_mode in [ 'test']:
print(f'Extracting {dataset_mode}ing.zip...')
img_zipp_list = glob.glob(
os.path.join(dataset_path, dataset_mode, 'images', '*.zip'))
print('Find the data', img_zipp_list)
for img_zipp in img_zipp_list:
zip_file = zipfile.ZipFile(img_zipp)
zip_file.extractall(os.path.join(tmp_dir, dataset_mode, 'img'))
src_path_list = glob.glob(
os.path.join(tmp_dir, dataset_mode, 'img', 'images', '*.png'))
src_prog_bar = ProgressBar(len(src_path_list))
for i, img_path in enumerate(src_path_list):
if dataset_mode != 'test':
slide_crop_image(img_path, out_dir, dataset_mode, patch_H,
patch_W, overlap)
else:
shutil.move(img_path,
os.path.join(out_dir, 'img_dir', dataset_mode))
src_prog_bar.update()
if dataset_mode != 'test':
label_zipp_list = glob.glob(
os.path.join(dataset_path, dataset_mode, 'Semantic_masks',
'*.zip'))
for label_zipp in label_zipp_list:
zip_file = zipfile.ZipFile(label_zipp)
zip_file.extractall(
os.path.join(tmp_dir, dataset_mode, 'lab'))
lab_path_list = glob.glob(
os.path.join(tmp_dir, dataset_mode, 'lab', 'images',
'*.png'))
lab_prog_bar = ProgressBar(len(lab_path_list))
for i, lab_path in enumerate(lab_path_list):
slide_crop_label(lab_path, out_dir, dataset_mode, patch_H,
patch_W, overlap)
lab_prog_bar.update()
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,99 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import math
import os
import os.path as osp
import mmcv
import numpy as np
from mmengine.utils import ProgressBar
def parse_args():
parser = argparse.ArgumentParser(
description='Convert levir-cd dataset to mmsegmentation format')
parser.add_argument('--dataset_path', help='potsdam folder path')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--clip_size',
type=int,
help='clipped size of image after preparation',
default=256)
parser.add_argument(
'--stride_size',
type=int,
help='stride of clipping original images',
default=256)
args = parser.parse_args()
return args
def main():
args = parse_args()
input_folder = args.dataset_path
png_files = glob.glob(
os.path.join(input_folder, '**/*.png'), recursive=True)
output_folder = args.out_dir
prog_bar = ProgressBar(len(png_files))
for png_file in png_files:
new_path = os.path.join(
output_folder,
os.path.relpath(os.path.dirname(png_file), input_folder))
os.makedirs(os.path.dirname(new_path), exist_ok=True)
label = False
if 'label' in png_file:
label = True
clip_big_image(png_file, new_path, args, label)
prog_bar.update()
def clip_big_image(image_path, clip_save_dir, args, to_label=False):
image = mmcv.imread(image_path)
h, w, c = image.shape
clip_size = args.clip_size
stride_size = args.stride_size
num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
(h - clip_size) /
stride_size) * stride_size + clip_size >= h else math.ceil(
(h - clip_size) / stride_size) + 1
num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
(w - clip_size) /
stride_size) * stride_size + clip_size >= w else math.ceil(
(w - clip_size) / stride_size) + 1
x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
xmin = x * clip_size
ymin = y * clip_size
xmin = xmin.ravel()
ymin = ymin.ravel()
xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
np.zeros_like(xmin))
ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
np.zeros_like(ymin))
boxes = np.stack([
xmin + xmin_offset, ymin + ymin_offset,
np.minimum(xmin + clip_size, w),
np.minimum(ymin + clip_size, h)
],
axis=1)
if to_label:
image[image == 255] = 1
image = image[:, :, 0]
for box in boxes:
start_x, start_y, end_x, end_y = box
clipped_image = image[start_y:end_y, start_x:end_x] \
if to_label else image[start_y:end_y, start_x:end_x, :]
idx = osp.basename(image_path).split('.')[0]
mmcv.imwrite(
clipped_image.astype(np.uint8),
osp.join(clip_save_dir,
f'{idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,73 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import shutil
import tempfile
import zipfile
from mmengine.utils import mkdir_or_exist
def parse_args():
parser = argparse.ArgumentParser(
description='Convert LoveDA dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='LoveDA folder path')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join('data', 'loveDA')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, 'img_dir'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
assert 'Train.zip' in os.listdir(dataset_path), \
f'Train.zip is not in {dataset_path}'
assert 'Val.zip' in os.listdir(dataset_path), \
f'Val.zip is not in {dataset_path}'
assert 'Test.zip' in os.listdir(dataset_path), \
f'Test.zip is not in {dataset_path}'
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for dataset in ['Train', 'Val', 'Test']:
zip_file = zipfile.ZipFile(
os.path.join(dataset_path, dataset + '.zip'))
zip_file.extractall(tmp_dir)
data_type = dataset.lower()
for location in ['Rural', 'Urban']:
for image_type in ['images_png', 'masks_png']:
if image_type == 'images_png':
dst = osp.join(out_dir, 'img_dir', data_type)
else:
dst = osp.join(out_dir, 'ann_dir', data_type)
if dataset == 'Test' and image_type == 'masks_png':
continue
else:
src_dir = osp.join(tmp_dir, dataset, location,
image_type)
src_lst = os.listdir(src_dir)
for file in src_lst:
shutil.move(osp.join(src_dir, file), dst)
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,89 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import shutil
import tempfile
import zipfile
from mmengine.utils import mkdir_or_exist
def parse_args():
parser = argparse.ArgumentParser(
description='Convert NYU Depth dataset to mmsegmentation format')
parser.add_argument('raw_data', help='the path of raw data')
parser.add_argument(
'-o', '--out_dir', help='output path', default='./data/nyu')
args = parser.parse_args()
return args
def reorganize(raw_data_dir: str, out_dir: str):
"""Reorganize NYU Depth dataset files into the required directory
structure.
Args:
raw_data_dir (str): Path to the raw data directory.
out_dir (str): Output directory for the organized dataset.
"""
def move_data(data_list, dst_prefix, fname_func):
"""Move data files from source to destination directory.
Args:
data_list (list): List of data file paths.
dst_prefix (str): Prefix to be added to destination paths.
fname_func (callable): Function to process file names
"""
for data_item in data_list:
data_item = data_item.strip().strip('/')
new_item = fname_func(data_item)
shutil.move(
osp.join(raw_data_dir, data_item),
osp.join(out_dir, dst_prefix, new_item))
def process_phase(phase):
"""Process a dataset phase (e.g., 'train' or 'test')."""
with open(osp.join(raw_data_dir, f'nyu_{phase}.txt')) as f:
data = filter(lambda x: len(x.strip()) > 0, f.readlines())
data = map(lambda x: x.split()[:2], data)
images, annos = zip(*data)
move_data(images, f'images/{phase}',
lambda x: x.replace('/rgb', ''))
move_data(annos, f'annotations/{phase}',
lambda x: x.replace('/sync_depth', ''))
process_phase('train')
process_phase('test')
def main():
args = parse_args()
print('Making directories...')
mkdir_or_exist(args.out_dir)
for subdir in [
'images/train', 'images/test', 'annotations/train',
'annotations/test'
]:
mkdir_or_exist(osp.join(args.out_dir, subdir))
print('Generating images and annotations...')
if args.raw_data.endswith('.zip'):
with tempfile.TemporaryDirectory() as tmp_dir:
zip_file = zipfile.ZipFile(args.raw_data)
zip_file.extractall(tmp_dir)
reorganize(osp.join(tmp_dir, 'nyu'), args.out_dir)
else:
assert osp.isdir(
args.raw_data
), 'the argument --raw-data should be either a zip file or directory.'
reorganize(args.raw_data, args.out_dir)
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,87 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
from functools import partial
import numpy as np
from detail import Detail
from mmengine.utils import mkdir_or_exist, track_progress
from PIL import Image
_mapping = np.sort(
np.array([
0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
]))
_key = np.array(range(len(_mapping))).astype('uint8')
def generate_labels(img_id, detail, out_dir):
def _class_to_index(mask, _mapping, _key):
# assert the values
values = np.unique(mask)
for i in range(len(values)):
assert (values[i] in _mapping)
index = np.digitize(mask.ravel(), _mapping, right=True)
return _key[index].reshape(mask.shape)
mask = Image.fromarray(
_class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
filename = img_id['file_name']
mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
return osp.splitext(osp.basename(filename))[0]
def parse_args():
parser = argparse.ArgumentParser(
description='Convert PASCAL VOC annotations to mmsegmentation format')
parser.add_argument('devkit_path', help='pascal voc devkit path')
parser.add_argument('json_path', help='annoation json filepath')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
devkit_path = args.devkit_path
if args.out_dir is None:
out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
else:
out_dir = args.out_dir
json_path = args.json_path
mkdir_or_exist(out_dir)
img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
train_detail = Detail(json_path, img_dir, 'train')
train_ids = train_detail.getImgs()
val_detail = Detail(json_path, img_dir, 'val')
val_ids = val_detail.getImgs()
mkdir_or_exist(
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
train_list = track_progress(
partial(generate_labels, detail=train_detail, out_dir=out_dir),
train_ids)
with open(
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
'train.txt'), 'w') as f:
f.writelines(line + '\n' for line in sorted(train_list))
val_list = track_progress(
partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
with open(
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
'val.txt'), 'w') as f:
f.writelines(line + '\n' for line in sorted(val_list))
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,158 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import math
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
import numpy as np
from mmengine.utils import ProgressBar, mkdir_or_exist
def parse_args():
parser = argparse.ArgumentParser(
description='Convert potsdam dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='potsdam folder path')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--clip_size',
type=int,
help='clipped size of image after preparation',
default=512)
parser.add_argument(
'--stride_size',
type=int,
help='stride of clipping original images',
default=256)
args = parser.parse_args()
return args
def clip_big_image(image_path, clip_save_dir, args, to_label=False):
# Original image of Potsdam dataset is very large, thus pre-processing
# of them is adopted. Given fixed clip size and stride size to generate
# clipped image, the intersection of width and height is determined.
# For example, given one 5120 x 5120 original image, the clip size is
# 512 and stride size is 256, thus it would generate 20x20 = 400 images
# whose size are all 512x512.
image = mmcv.imread(image_path)
h, w, c = image.shape
clip_size = args.clip_size
stride_size = args.stride_size
num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
(h - clip_size) /
stride_size) * stride_size + clip_size >= h else math.ceil(
(h - clip_size) / stride_size) + 1
num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
(w - clip_size) /
stride_size) * stride_size + clip_size >= w else math.ceil(
(w - clip_size) / stride_size) + 1
x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
xmin = x * clip_size
ymin = y * clip_size
xmin = xmin.ravel()
ymin = ymin.ravel()
xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
np.zeros_like(xmin))
ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
np.zeros_like(ymin))
boxes = np.stack([
xmin + xmin_offset, ymin + ymin_offset,
np.minimum(xmin + clip_size, w),
np.minimum(ymin + clip_size, h)
],
axis=1)
if to_label:
color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
[255, 255, 0], [0, 255, 0], [0, 255, 255],
[0, 0, 255]])
flatten_v = np.matmul(
image.reshape(-1, c),
np.array([2, 3, 4]).reshape(3, 1))
out = np.zeros_like(flatten_v)
for idx, class_color in enumerate(color_map):
value_idx = np.matmul(class_color,
np.array([2, 3, 4]).reshape(3, 1))
out[flatten_v == value_idx] = idx
image = out.reshape(h, w)
for box in boxes:
start_x, start_y, end_x, end_y = box
clipped_image = image[start_y:end_y,
start_x:end_x] if to_label else image[
start_y:end_y, start_x:end_x, :]
idx_i, idx_j = osp.basename(image_path).split('_')[2:4]
mmcv.imwrite(
clipped_image.astype(np.uint8),
osp.join(
clip_save_dir,
f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
def main():
args = parse_args()
splits = {
'train': [
'2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11',
'4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7',
'6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9'
],
'val': [
'5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13',
'4_15', '2_14', '5_13', '4_13', '3_14', '7_13'
]
}
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join('data', 'potsdam')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
print('Find the data', zipp_list)
for zipp in zipp_list:
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
zip_file = zipfile.ZipFile(zipp)
zip_file.extractall(tmp_dir)
src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
if not len(src_path_list):
sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0])
src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif'))
prog_bar = ProgressBar(len(src_path_list))
for i, src_path in enumerate(src_path_list):
idx_i, idx_j = osp.basename(src_path).split('_')[2:4]
data_type = 'train' if f'{idx_i}_{idx_j}' in splits[
'train'] else 'val'
if 'label' in src_path:
dst_dir = osp.join(out_dir, 'ann_dir', data_type)
clip_big_image(src_path, dst_dir, args, to_label=True)
else:
dst_dir = osp.join(out_dir, 'img_dir', data_type)
clip_big_image(src_path, dst_dir, args, to_label=False)
prog_bar.update()
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,110 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
import numpy as np
from mmengine.utils import mkdir_or_exist
def parse_args():
parser = argparse.ArgumentParser(
description='Convert REFUGE dataset to mmsegmentation format')
parser.add_argument('--raw_data_root', help='the root path of raw data')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def extract_img(root: str,
cur_dir: str,
out_dir: str,
mode: str = 'train',
file_type: str = 'img') -> None:
"""_summary_
Args:
Args:
root (str): root where the extracted data is saved
cur_dir (cur_dir): dir where the zip_file exists
out_dir (str): root dir where the data is saved
mode (str, optional): Defaults to 'train'.
file_type (str, optional): Defaults to 'img',else to 'mask'.
"""
zip_file = zipfile.ZipFile(cur_dir)
zip_file.extractall(root)
for cur_dir, dirs, files in os.walk(root):
# filter child dirs and directories with "Illustration" and "MACOSX"
if len(dirs) == 0 and \
cur_dir.split('\\')[-1].find('Illustration') == -1 and \
cur_dir.find('MACOSX') == -1:
file_names = [
file for file in files
if file.endswith('.jpg') or file.endswith('.bmp')
]
for filename in sorted(file_names):
img = mmcv.imread(osp.join(cur_dir, filename))
if file_type == 'annotations':
img = img[:, :, 0]
img[np.where(img == 0)] = 1
img[np.where(img == 128)] = 2
img[np.where(img == 255)] = 0
mmcv.imwrite(
img,
osp.join(out_dir, file_type, mode,
osp.splitext(filename)[0] + '.png'))
def main():
args = parse_args()
raw_data_root = args.raw_data_root
if args.out_dir is None:
out_dir = osp.join('./data', 'REFUGE')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, 'images'))
mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mkdir_or_exist(osp.join(out_dir, 'images', 'test'))
mkdir_or_exist(osp.join(out_dir, 'annotations'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'test'))
print('Generating images and annotations...')
# process data from the child dir on the first rank
cur_dir, dirs, files = list(os.walk(raw_data_root))[0]
print('====================')
files = list(filter(lambda x: x.endswith('.zip'), files))
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for file in files:
# search data folders for training,validation,test
mode = list(
filter(lambda x: file.lower().find(x) != -1,
['training', 'test', 'validation']))[0]
file_root = osp.join(tmp_dir, file[:-4])
file_type = 'images' if file.find('Anno') == -1 and file.find(
'GT') == -1 else 'annotations'
extract_img(file_root, osp.join(cur_dir, file), out_dir, mode,
file_type)
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,167 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import gzip
import os
import os.path as osp
import tarfile
import tempfile
import mmcv
from mmengine.utils import mkdir_or_exist
STARE_LEN = 20
TRAINING_LEN = 10
def un_gz(src, dst):
g_file = gzip.GzipFile(src)
with open(dst, 'wb+') as f:
f.write(g_file.read())
g_file.close()
def parse_args():
parser = argparse.ArgumentParser(
description='Convert STARE dataset to mmsegmentation format')
parser.add_argument('image_path', help='the path of stare-images.tar')
parser.add_argument('labels_ah', help='the path of labels-ah.tar')
parser.add_argument('labels_vk', help='the path of labels-vk.tar')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
image_path = args.image_path
labels_ah = args.labels_ah
labels_vk = args.labels_vk
if args.out_dir is None:
out_dir = osp.join('data', 'STARE')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, 'images'))
mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mkdir_or_exist(osp.join(out_dir, 'annotations'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
mkdir_or_exist(osp.join(tmp_dir, 'gz'))
mkdir_or_exist(osp.join(tmp_dir, 'files'))
print('Extracting stare-images.tar...')
with tarfile.open(image_path) as f:
f.extractall(osp.join(tmp_dir, 'gz'))
for filename in os.listdir(osp.join(tmp_dir, 'gz')):
un_gz(
osp.join(tmp_dir, 'gz', filename),
osp.join(tmp_dir, 'files',
osp.splitext(filename)[0]))
now_dir = osp.join(tmp_dir, 'files')
assert len(os.listdir(now_dir)) == STARE_LEN, \
f'len(os.listdir(now_dir)) != {STARE_LEN}'
for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(now_dir, filename))
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'training',
osp.splitext(filename)[0] + '.png'))
for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(now_dir, filename))
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'validation',
osp.splitext(filename)[0] + '.png'))
print('Removing the temporary files...')
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
mkdir_or_exist(osp.join(tmp_dir, 'gz'))
mkdir_or_exist(osp.join(tmp_dir, 'files'))
print('Extracting labels-ah.tar...')
with tarfile.open(labels_ah) as f:
f.extractall(osp.join(tmp_dir, 'gz'))
for filename in os.listdir(osp.join(tmp_dir, 'gz')):
un_gz(
osp.join(tmp_dir, 'gz', filename),
osp.join(tmp_dir, 'files',
osp.splitext(filename)[0]))
now_dir = osp.join(tmp_dir, 'files')
assert len(os.listdir(now_dir)) == STARE_LEN, \
f'len(os.listdir(now_dir)) != {STARE_LEN}'
for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(now_dir, filename))
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a threshold
# to convert the nonstandard annotation imgs. The value divided by
# 128 equivalent to '1 if value >= 128 else 0'
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(filename)[0] + '.png'))
for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(now_dir, filename))
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(filename)[0] + '.png'))
print('Removing the temporary files...')
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
mkdir_or_exist(osp.join(tmp_dir, 'gz'))
mkdir_or_exist(osp.join(tmp_dir, 'files'))
print('Extracting labels-vk.tar...')
with tarfile.open(labels_vk) as f:
f.extractall(osp.join(tmp_dir, 'gz'))
for filename in os.listdir(osp.join(tmp_dir, 'gz')):
un_gz(
osp.join(tmp_dir, 'gz', filename),
osp.join(tmp_dir, 'files',
osp.splitext(filename)[0]))
now_dir = osp.join(tmp_dir, 'files')
assert len(os.listdir(now_dir)) == STARE_LEN, \
f'len(os.listdir(now_dir)) != {STARE_LEN}'
for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(now_dir, filename))
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(filename)[0] + '.png'))
for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(now_dir, filename))
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(filename)[0] + '.png'))
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,155 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import nibabel as nib
import numpy as np
from mmengine.utils import mkdir_or_exist
from PIL import Image
def read_files_from_txt(txt_path):
with open(txt_path) as f:
files = f.readlines()
files = [file.strip() for file in files]
return files
def read_nii_file(nii_path):
img = nib.load(nii_path).get_fdata()
return img
def split_3d_image(img):
c, _, _ = img.shape
res = []
for i in range(c):
res.append(img[i, :, :])
return res
def label_mapping(label):
"""Label mapping from TransUNet paper setting. It only has 9 classes, which
are 'background', 'aorta', 'gallbladder', 'left_kidney', 'right_kidney',
'liver', 'pancreas', 'spleen', 'stomach', respectively. Other foreground
classes in original dataset are all set to background.
More details could be found here: https://arxiv.org/abs/2102.04306
"""
maped_label = np.zeros_like(label)
maped_label[label == 8] = 1
maped_label[label == 4] = 2
maped_label[label == 3] = 3
maped_label[label == 2] = 4
maped_label[label == 6] = 5
maped_label[label == 11] = 6
maped_label[label == 1] = 7
maped_label[label == 7] = 8
return maped_label
def pares_args():
parser = argparse.ArgumentParser(
description='Convert synapse dataset to mmsegmentation format')
parser.add_argument(
'--dataset-path', type=str, help='synapse dataset path.')
parser.add_argument(
'--save-path',
default='data/synapse',
type=str,
help='save path of the dataset.')
args = parser.parse_args()
return args
def main():
args = pares_args()
dataset_path = args.dataset_path
save_path = args.save_path
if not osp.exists(dataset_path):
raise ValueError('The dataset path does not exist. '
'Please enter a correct dataset path.')
if not osp.exists(osp.join(dataset_path, 'img')) \
or not osp.exists(osp.join(dataset_path, 'label')):
raise FileNotFoundError('The dataset structure is incorrect. '
'Please check your dataset.')
train_id = read_files_from_txt(osp.join(dataset_path, 'train.txt'))
train_id = [idx[3:7] for idx in train_id]
test_id = read_files_from_txt(osp.join(dataset_path, 'val.txt'))
test_id = [idx[3:7] for idx in test_id]
mkdir_or_exist(osp.join(save_path, 'img_dir/train'))
mkdir_or_exist(osp.join(save_path, 'img_dir/val'))
mkdir_or_exist(osp.join(save_path, 'ann_dir/train'))
mkdir_or_exist(osp.join(save_path, 'ann_dir/val'))
# It follows data preparation pipeline from here:
# https://github.com/Beckschen/TransUNet/tree/main/datasets
for i, idx in enumerate(train_id):
img_3d = read_nii_file(
osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
label_3d = read_nii_file(
osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
img_3d = np.clip(img_3d, -125, 275)
img_3d = (img_3d + 125) / 400
img_3d *= 255
img_3d = np.transpose(img_3d, [2, 0, 1])
img_3d = np.flip(img_3d, 2)
label_3d = np.transpose(label_3d, [2, 0, 1])
label_3d = np.flip(label_3d, 2)
label_3d = label_mapping(label_3d)
for c in range(img_3d.shape[0]):
img = img_3d[c]
label = label_3d[c]
img = Image.fromarray(img).convert('RGB')
label = Image.fromarray(label).convert('L')
img.save(
osp.join(
save_path, 'img_dir/train', 'case' + idx.zfill(4) +
'_slice' + str(c).zfill(3) + '.jpg'))
label.save(
osp.join(
save_path, 'ann_dir/train', 'case' + idx.zfill(4) +
'_slice' + str(c).zfill(3) + '.png'))
for i, idx in enumerate(test_id):
img_3d = read_nii_file(
osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
label_3d = read_nii_file(
osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
img_3d = np.clip(img_3d, -125, 275)
img_3d = (img_3d + 125) / 400
img_3d *= 255
img_3d = np.transpose(img_3d, [2, 0, 1])
img_3d = np.flip(img_3d, 2)
label_3d = np.transpose(label_3d, [2, 0, 1])
label_3d = np.flip(label_3d, 2)
label_3d = label_mapping(label_3d)
for c in range(img_3d.shape[0]):
img = img_3d[c]
label = label_3d[c]
img = Image.fromarray(img).convert('RGB')
label = Image.fromarray(label).convert('L')
img.save(
osp.join(
save_path, 'img_dir/val', 'case' + idx.zfill(4) +
'_slice' + str(c).zfill(3) + '.jpg'))
label.save(
osp.join(
save_path, 'ann_dir/val', 'case' + idx.zfill(4) +
'_slice' + str(c).zfill(3) + '.png'))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,156 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import math
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
import numpy as np
from mmengine.utils import ProgressBar, mkdir_or_exist
def parse_args():
parser = argparse.ArgumentParser(
description='Convert vaihingen dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='vaihingen folder path')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--clip_size',
type=int,
help='clipped size of image after preparation',
default=512)
parser.add_argument(
'--stride_size',
type=int,
help='stride of clipping original images',
default=256)
args = parser.parse_args()
return args
def clip_big_image(image_path, clip_save_dir, to_label=False):
# Original image of Vaihingen dataset is very large, thus pre-processing
# of them is adopted. Given fixed clip size and stride size to generate
# clipped image, the intersection of width and height is determined.
# For example, given one 5120 x 5120 original image, the clip size is
# 512 and stride size is 256, thus it would generate 20x20 = 400 images
# whose size are all 512x512.
image = mmcv.imread(image_path)
h, w, c = image.shape
cs = args.clip_size
ss = args.stride_size
num_rows = math.ceil((h - cs) / ss) if math.ceil(
(h - cs) / ss) * ss + cs >= h else math.ceil((h - cs) / ss) + 1
num_cols = math.ceil((w - cs) / ss) if math.ceil(
(w - cs) / ss) * ss + cs >= w else math.ceil((w - cs) / ss) + 1
x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
xmin = x * cs
ymin = y * cs
xmin = xmin.ravel()
ymin = ymin.ravel()
xmin_offset = np.where(xmin + cs > w, w - xmin - cs, np.zeros_like(xmin))
ymin_offset = np.where(ymin + cs > h, h - ymin - cs, np.zeros_like(ymin))
boxes = np.stack([
xmin + xmin_offset, ymin + ymin_offset,
np.minimum(xmin + cs, w),
np.minimum(ymin + cs, h)
],
axis=1)
if to_label:
color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
[255, 255, 0], [0, 255, 0], [0, 255, 255],
[0, 0, 255]])
flatten_v = np.matmul(
image.reshape(-1, c),
np.array([2, 3, 4]).reshape(3, 1))
out = np.zeros_like(flatten_v)
for idx, class_color in enumerate(color_map):
value_idx = np.matmul(class_color,
np.array([2, 3, 4]).reshape(3, 1))
out[flatten_v == value_idx] = idx
image = out.reshape(h, w)
for box in boxes:
start_x, start_y, end_x, end_y = box
clipped_image = image[start_y:end_y,
start_x:end_x] if to_label else image[
start_y:end_y, start_x:end_x, :]
area_idx = osp.basename(image_path).split('_')[3].strip('.tif')
mmcv.imwrite(
clipped_image.astype(np.uint8),
osp.join(clip_save_dir,
f'{area_idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
def main():
splits = {
'train': [
'area1', 'area11', 'area13', 'area15', 'area17', 'area21',
'area23', 'area26', 'area28', 'area3', 'area30', 'area32',
'area34', 'area37', 'area5', 'area7'
],
'val': [
'area6', 'area24', 'area35', 'area16', 'area14', 'area22',
'area10', 'area4', 'area2', 'area20', 'area8', 'area31', 'area33',
'area27', 'area38', 'area12', 'area29'
],
}
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join('data', 'vaihingen')
else:
out_dir = args.out_dir
print('Making directories...')
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
print('Find the data', zipp_list)
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for zipp in zipp_list:
zip_file = zipfile.ZipFile(zipp)
zip_file.extractall(tmp_dir)
src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
if 'ISPRS_semantic_labeling_Vaihingen' in zipp:
src_path_list = glob.glob(
os.path.join(os.path.join(tmp_dir, 'top'), '*.tif'))
if 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE' in zipp: # noqa
src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
# delete unused area9 ground truth
for area_ann in src_path_list:
if 'area9' in area_ann:
src_path_list.remove(area_ann)
prog_bar = ProgressBar(len(src_path_list))
for i, src_path in enumerate(src_path_list):
area_idx = osp.basename(src_path).split('_')[3].strip('.tif')
data_type = 'train' if area_idx in splits['train'] else 'val'
if 'noBoundary' in src_path:
dst_dir = osp.join(out_dir, 'ann_dir', data_type)
clip_big_image(src_path, dst_dir, to_label=True)
else:
dst_dir = osp.join(out_dir, 'img_dir', data_type)
clip_big_image(src_path, dst_dir, to_label=False)
prog_bar.update()
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
args = parse_args()
main()

View File

@@ -0,0 +1,92 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
from functools import partial
import numpy as np
from mmengine.utils import mkdir_or_exist, scandir, track_parallel_progress
from PIL import Image
from scipy.io import loadmat
AUG_LEN = 10582
def convert_mat(mat_file, in_dir, out_dir):
data = loadmat(osp.join(in_dir, mat_file))
mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
Image.fromarray(mask).save(seg_filename, 'PNG')
def generate_aug_list(merged_list, excluded_list):
return list(set(merged_list) - set(excluded_list))
def parse_args():
parser = argparse.ArgumentParser(
description='Convert PASCAL VOC annotations to mmsegmentation format')
parser.add_argument('devkit_path', help='pascal voc devkit path')
parser.add_argument('aug_path', help='pascal voc aug path')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--nproc', default=1, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
devkit_path = args.devkit_path
aug_path = args.aug_path
nproc = args.nproc
if args.out_dir is None:
out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
else:
out_dir = args.out_dir
mkdir_or_exist(out_dir)
in_dir = osp.join(aug_path, 'dataset', 'cls')
track_parallel_progress(
partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
list(scandir(in_dir, suffix='.mat')),
nproc=nproc)
full_aug_list = []
with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
full_aug_list += [line.strip() for line in f]
with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
full_aug_list += [line.strip() for line in f]
with open(
osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
'train.txt')) as f:
ori_train_list = [line.strip() for line in f]
with open(
osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
'val.txt')) as f:
val_list = [line.strip() for line in f]
aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
val_list)
assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
AUG_LEN)
with open(
osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
'trainaug.txt'), 'w') as f:
f.writelines(line + '\n' for line in aug_train_list)
aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
assert len(aug_list) == AUG_LEN - len(
ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
len(ori_train_list))
with open(
osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
'w') as f:
f.writelines(line + '\n' for line in aug_list)
print('Done!')
if __name__ == '__main__':
main()