变化检测WHU-CD数据集处理

1.WHU-CD
是一个公共建筑CD数据集。它包含一对尺寸为32507×15354的高分辨率(0.075米)航空图像。将图像裁剪成大小为256×256、没有重叠的小块,并将其随机分为三部分:分别为6096/762/762用于训练/验证/测试
2.裁剪

import os
from PIL import Image

def crop_and_save_images(source_folder, target_folder):
    image_files = os.listdir(source_folder)

    for file in image_files:
        if file.endswith('.tif'):
            image_path = os.path.join(source_folder, file)
            img = Image.open(image_path)
            width, height = img.size

            patch_num = 0
            for y in range(0, height, 256):
                for x in range(0, width, 256):
                    cropped_img = img.crop((x, y, x+256, y+256))
                    cropped_img_name = os.path.splitext(file)[0] + "_{}.png".format(patch_num)
                    target_path = os.path.join(target_folder, cropped_img_name)
                    cropped_img.save(target_path)
                    patch_num += 1

source_folder_T1 = "T1"
source_folder_T2 = "T2"
source_folder_label1 = "label1"

target_folder_A = "A"
target_folder_B = "B"
target_folder_label = "label"

crop_and_save_images(source_folder_T1, target_folder_A)
crop_and_save_images(source_folder_T2, target_folder_B)
crop_and_save_images(source_folder_label1, target_folder_label)

3.按照 8 :1:1划分训练集,测试集与验证集

import os
import random

source_folder = "images"
train_txt = "train.txt"
test_txt = "test.txt"
label_txt = "label.txt"

def split_images_to_txt(source_folder, train_txt, test_txt, label_txt):
    image_files = os.listdir(source_folder)
    random.shuffle(image_files)

    total_num = len(image_files)
    train_num = int(total_num * 0.8)
    test_num = int(total_num * 0.1)
    val_num = total_num - train_num - test_num

    with open(train_txt, 'w') as f_train, open(test_txt, 'w') as f_test, open(label_txt, 'w') as f_label:
        for i, file in enumerate(image_files):
            image_name = os.path.splitext(file)[0]
            image_extension = os.path.splitext(file)[1]

            if i < train_num:
                f_train.write(image_name + image_extension + '
')
            elif i < train_num + test_num:
                f_test.write(image_name + image_extension + '
')
            else:
                f_label.write(image_name + image_extension + '
')
        

split_images_to_txt(source_folder, train_txt, test_txt, label_txt)