"""Utility functions for merging datasets."""from__future__importannotationsimportosimportshutilfrompathlibimportPathfromtqdm.autoimporttqdmfromgeographer.connectorimportConnector
[docs]defmerge_datasets(source_data_dir:Path|str,target_data_dir:Path|str,delete_source:bool=True,)->None:"""Merge datasets. Args: source_data_dir: data dir of source dataset target_data_dir: data dir of target dataset delete_source: Whether to delete source dataset after merging. Defaults to True. """source_connector=Connector.from_data_dir(source_data_dir)target_connector=Connector.from_data_dir(target_data_dir)# copy over raster_data_dirsforsource_dir,target_dirinzip(source_connector.raster_data_dirs,target_connector.raster_data_dirs):files_in_target_dir={raster.nameforrasterintarget_dir.iterdir()}pbar=tqdm(source_dir.iterdir())pbar.set_description(f"copying {str(source_dir.name)}")forraster_pathinpbar:ifraster_path.namenotinfiles_in_target_dir:shutil.copy2(raster_path,target_dir)# merge/copy over downloads (e.g. safe_files)merge_dirs(str(source_connector.download_dir),str(target_connector.download_dir))target_connector.add_to_polygons_df(source_connector.polygons_df)target_connector.add_to_rasters(source_connector.rasters)target_connector.save()
# TODO rewrite using pathlib
[docs]defmerge_dirs(root_src_dir:Path|str,root_dst_dir:Path|str)->None:"""Recursively merge two folders including subfolders. (Shamelessly copied from stackoverflow) Args: root_src_dir: root source directory root_dst_dir: root target directory """pbar=tqdm(os.walk(root_src_dir))forsrc_dir,dirs,filesinpbar:pbar.set_description(str(src_dir))dst_dir=src_dir.replace(root_src_dir,root_dst_dir,1)ifnotos.path.exists(dst_dir):os.makedirs(dst_dir)forfile_infiles:src_file=os.path.join(src_dir,file_)dst_file=os.path.join(dst_dir,file_)ifos.path.exists(dst_file):os.remove(dst_file)shutil.copy(src_file,dst_dir)