Source code for h3.dataprocessing.pre_processing

import numpy as np
import pandas as pd

import os
import rasterio as rio
# import cv2
# import scipy.ndimage
# from os.path import exists

from tqdm import tqdm
# from threading import Thread
# from h3 import logger
# from h3.constants import DMG_CLASSES_DICT
from h3.utils.directories import get_metadata_pickle_dir, get_xbd_hlabel_dir
from h3.utils.directories import get_xbd_dir, get_data_dir, get_processed_data_dir
from h3.dataprocessing.extract_metadata import load_and_save_df
from h3.dataprocessing.crop_images_img import crop_images


[docs]def image_loading(polygons_df, zoom_levels: list, pixel_num: int, zoomdir_dict: dict): """Loads images and crops them based on the required zoom levels and the required imagery input pixel size for the model. Parameters ---------- polygons_df : geopandas dataframe Pandas dataframe containing metadata information about the pre-event polygoms, combined with the damage class from the post-event data. The reference system is "xy" referring to the corresponding image file. zoom_levels : list list containing all required zoom levels as integers. pixel_num : int Value of the sides of the squared cropped image as input for the model. zoomdir_dict : dict Dictionary containing all filepaths for the zoom directories with the values of the zoom level. """ polygons_df["index"] = np.arange(len(polygons_df)) filtered_df = polygons_df[["image_name", "json_link"]].drop_duplicates( subset=['image_name']) for json_path in tqdm(filtered_df["json_link"], desc="Processing images", position=0, leave=False): tif_name = json_path.replace("json", "tif") image_path = os.path.join(json_path, tif_name).replace("labels", "images") # image name is in the final folder so index with -1 name_img = os.path.basename(json_path).replace("json", "png") image_pol_df = polygons_df.query('image_name == @name_img') polygons_image = image_pol_df[["geometry", "index"]] with rio.open(image_path) as img: image_size = 1024 img_metadata = img.meta img_array = img.read() for building in tqdm(range(len(polygons_image)), desc="Buildings", position=1, leave=False): polygon_for_img = polygons_image.iloc[building]["geometry"] polygon_num = polygons_image.iloc[building]["index"] for zoom_level in tqdm(zoom_levels, desc="zoom", position=2, leave=False): zoom_dir = zoomdir_dict[zoom_level] img_num = str(polygon_num) + ".png" output_path = os.path.join(zoom_dir, img_num) crop_images( image_array=img_array, img_metadata=img_metadata, polygon_df=polygon_for_img, zoom_level=zoom_level, pixel_num=pixel_num, im_size=image_size, output_path=output_path )
[docs]def main(): zoom_levels = [1, 2, 4, 0.5] pixel_num = 224 # TODO: look/fix the geotiffs.old and all output_dir = get_metadata_pickle_dir() data_dir = get_data_dir() xbd_dir = get_xbd_dir() # xbd_dir = "/Users/Lisanne/Documents/AI4ER/hurricane-harm-herald/data/test_geotiffs" # data_dir = "/Users/Lisanne/Documents/AI4ER/hurricane-harm-herald/data/test_output/images" # output_dir = "/Users/Lisanne/Documents/AI4ER/hurricane-harm-herald/data/test_output" # hold_filepath = get_xbd_hlabel_dir() hold_filepath = os.path.join(xbd_dir, "geotiffs", "hold", "labels") tier1_filepath = os.path.join(xbd_dir, "geotiffs", "tier1", "labels") tier3_filepath = os.path.join(xbd_dir, "geotiffs", "tier3", "labels") test_filepath = os.path.join(xbd_dir, "geotiffs", "test", "labels") filepaths_dict = dict.fromkeys([hold_filepath, tier1_filepath, tier3_filepath, test_filepath]) # use df_pre_post_hurr_ll (longitude & latitude) for environmental factors # use df_pre_post_hurr_xy for image cropping df_pre_post_hurr_xy, df_pre_post_hurr_ll = load_and_save_df( filepaths_dict, output_dir) # where to save zoomed and cropped images save_dir_path = os.path.join(get_processed_data_dir(), "processed_xbd", "geotiffs_zoom", "images") zoomdir_dict = {} for zoom_num in zoom_levels: zoom_dir = "zoom_" + str(zoom_num) zoom_path = os.path.join(data_dir, save_dir_path, zoom_dir) zoomdir_dict[zoom_num] = zoom_path if not os.path.exists(zoom_path): os.makedirs(zoom_path) image_loading(df_pre_post_hurr_xy, zoom_levels, pixel_num, zoomdir_dict)
if __name__ == '__main__': main()