Source code for geographer.creator_from_source_dataset_base
"""ABC for creating or updating a dataset from an existing source dataset."""fromabcimportABC,abstractmethodfrompathlibimportPathfromtypingimportOptionalfrompydanticimport(BaseModel,ConfigDict,Field,PrivateAttr,field_validator,model_validator,)fromgeographer.base_model_dict_conversion.save_load_base_model_mixinimport(SaveAndLoadBaseModelMixIn,)fromgeographer.connectorimport(DEFAULT_CONNECTOR_DIR_NAME,INFERRED_PATH_ATTR_FILENAMES,Connector,)
[docs]classDSCreatorFromSource(ABC,SaveAndLoadBaseModelMixIn,BaseModel):"""ABC for creating or updating a dataset from an existing one."""model_config=ConfigDict(extra="allow",arbitrary_types_allowed=True)source_data_dir:Pathtarget_data_dir:Pathname:str=Field(title="Name",description="Name of dataset creator. Used as part of filename when saving.",)_source_connector:Optional[Connector]=PrivateAttr(default=None)_target_connector:Optional[Connector]=PrivateAttr(default=None)@field_validator("source_data_dir",mode="before")defvalidate_source_data_dir(cls,value:Path)->Path:"""Ensure source_data_dir is a valid path."""ifnotvalue.is_dir():raiseValueError(f"Invalid source_data_dir: {value}")returnvalue@model_validator(mode="after")defvalidate_connectors(self)->"DSCreatorFromSource":"""Initialize and validate connectors."""ifself.source_data_dir:self._source_connector=Connector.from_data_dir(self.source_data_dir)ifself.target_data_dir:connector_file_paths_exist=[(self.target_data_dir/DEFAULT_CONNECTOR_DIR_NAME/filename).is_file()forfilenameinINFERRED_PATH_ATTR_FILENAMES.values()]ifall(connector_file_paths_exist):self._target_connector=Connector.from_data_dir(self.target_data_dir)elifnotany(connector_file_paths_exist):self._target_connector=(self._source_connector.empty_connector_same_format(self.target_data_dir))else:raiseValueError("Corrupted target dataset: only some of the connector files exist.")returnself@abstractmethoddef_create(self,*args,**kwargs)->Connector:"""Create a new dataset from source dataset."""@abstractmethoddef_update(self,*args,**kwargs)->Connector:"""Update the target dataset from the source dataset."""
[docs]defcreate(self,*args,**kwargs)->Connector:"""Create a new dataset by cutting the source dataset."""self._create(*args,**kwargs)self._after_creating_or_updating()self.target_connector.save()returnself.target_connector
[docs]defupdate(self,*args,**kwargs)->Connector:"""Update the target dataset from the source dataset."""self._update(*args,**kwargs)self._after_creating_or_updating()self.target_connector.save()returnself.target_connector
[docs]defsave(self):"""Save to update folder in source_data_dir."""json_file_path=self.target_connector.connector_dir/f"{self.name}.json"self._save(json_file_path)
@propertydefsource_connector(self):"""Connector in source_data_dir."""returnself._source_connector@propertydeftarget_connector(self):"""Connector in target_data_dir."""returnself._target_connectordef_after_creating_or_updating(self):"""Run hook after creating/updating. Can be used to e.g. save parameters to the target_connector. """def_add_missing_vectors_to_target(self):"""Add missing vector features from source dataset to target dataset. Only checks vector feature names/indices, not whether entries differ. """source_vectors=self.source_connector.vectorstarget_vectors=self.target_connector.vectorsvectors_to_add=source_vectors[~source_vectors.index.isin(target_vectors.index)]self.target_connector.add_to_vectors(vectors_to_add)def_create_target_dirs(self):"""Create target_data_dir and subdirectories."""self.target_connector.connector_dir.mkdir(parents=True,exist_ok=True)fordir_inself.target_connector.raster_data_dirs:dir_.mkdir(parents=True,exist_ok=True)
[docs]classDSCreatorFromSourceWithBands(DSCreatorFromSource,ABC):"""ABC for creating/updating a dataset from an existing one. Includes a bands field. """bands:Optional[dict[str,Optional[list[int]]]]=Field(default=None,title="Dict of band indices",description="keys: raster directory names, values: list of band indices ""to keep, starting with 1",)