1.WHU-CD
是一个公共建筑CD数据集。它包含一对尺寸为32507×15354的高分辨率(0.075米)航空图像。将图像裁剪成大小为256×256、没有重叠的小块,并将其随机分为三部分:分别为6096/762/762用于训练/验证/测试
2.裁剪
import os
from PIL import Image
def crop_and_save_images(source_folder, target_folder):
image_files = os.listdir(source_folder)
for file in image_files:
if file.endswith('.tif'):
image_path = os.path.join(source_folder, file)
img = Image.open(image_path)
width, height = img.size
patch_num = 0
for y in range(0, height, 256):
for x in range(0, width, 256):
cropped_img = img.crop((x, y, x+256, y+256))
cropped_img_name = os.path.splitext(file)[0] + "_{}.png".format(patch_num)
target_path = os.path.join(target_folder, cropped_img_name)
cropped_img.save(target_path)
patch_num += 1
source_folder_T1 = "T1"
source_folder_T2 = "T2"
source_folder_label1 = "label1"
target_folder_A = "A"
target_folder_B = "B"
target_folder_label = "label"
crop_and_save_images(source_folder_T1, target_folder_A)
crop_and_save_images(source_folder_T2, target_folder_B)
crop_and_save_images(source_folder_label1, target_folder_label)
3.按照 8 :1:1划分训练集,测试集与验证集
import os
import random
source_folder = "images"
train_txt = "train.txt"
test_txt = "test.txt"
label_txt = "label.txt"
def split_images_to_txt(source_folder, train_txt, test_txt, label_txt):
image_files = os.listdir(source_folder)
random.shuffle(image_files)
total_num = len(image_files)
train_num = int(total_num * 0.8)
test_num = int(total_num * 0.1)
val_num = total_num - train_num - test_num
with open(train_txt, 'w') as f_train, open(test_txt, 'w') as f_test, open(label_txt, 'w') as f_label:
for i, file in enumerate(image_files):
image_name = os.path.splitext(file)[0]
image_extension = os.path.splitext(file)[1]
if i < train_num:
f_train.write(image_name + image_extension + '
')
elif i < train_num + test_num:
f_test.write(image_name + image_extension + '
')
else:
f_label.write(image_name + image_extension + '
')
split_images_to_txt(source_folder, train_txt, test_txt, label_txt)