Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions dcpy/lifecycle/package/esri.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
import yaml

import dcpy.models.product.dataset.metadata as models
from dcpy.lifecycle import product_metadata
from dcpy.models.data.shapefile_metadata import Attr, Edom
from dcpy.models.product.dataset.metadata import (
ColumnValue,
DatasetColumn,
)
from dcpy.models.product.metadata import OrgMetadata
from dcpy.utils.geospatial import esri_metadata, fgdb
from dcpy.utils.geospatial.shapefile import Shapefile
from dcpy.utils.logging import logger


Expand Down Expand Up @@ -83,3 +92,119 @@ def parse_pdf_text(
output_path = output_path or Path("columns.yml")
with open(output_path, "w") as outfile:
yaml.dump(fields, outfile, sort_keys=False)


@app.command("write_metadata")
def _write_metadata(
product_name: str,
dataset_name: str,
path: Path,
layer: str,
org_md_path: Path | None = typer.Option(
None,
"--org-md-path",
help="Path to organizational metadata",
),
zip_subdir: str | None = typer.Option(
None,
"--zip-subdir",
help="Directory structure within zip file, if relevant",
),
):
write_metadata(
product_name=product_name,
dataset_name=dataset_name,
path=path,
layer=layer,
zip_subdir=zip_subdir,
org_md=org_md_path,
)
logger.info(f"Wrote metadata to {layer} in {path}")


def write_metadata(
product_name: str,
dataset_name: str,
path: Path,
layer: str,
zip_subdir: str | None,
org_md: Path | OrgMetadata | None, # Allow passing OrgMetadata for testing purposes
):
"""Write product metadata to the shapefile metadata XML. Generates a new XML with defaults,
and applies additional product-specific values.

Args:
product_name (str): Name of product. e.g. "lion"
dataset_name (str): Name of dataset within a product. e.g. "pseudo-lots"
path (Path): Path to parent directory or zip file containing shapefile, or geodatabase.
layer (str): Shapefile or feature class name.
zip_subdir (str | None): Internal path if shp is nested within a zip file.
Must be None when path is a file geodatabase.
org_md (Path | OrgMetadata | None): Metadata reference used to populate shapefile metadata.
"""
if isinstance(org_md, Path) or not org_md:
org_md = product_metadata.load(org_md_path_override=org_md)

product_md = org_md.product(product_name).dataset(dataset_name)

metadata = esri_metadata.generate_metadata()

# Set dataset-level values
# TODO: define DCP organizationally required metadata fields
metadata.md_hr_lv_name = product_md.attributes.display_name
metadata.data_id_info.id_abs = product_md.attributes.description
metadata.data_id_info.other_keys.keyword = product_md.attributes.tags
metadata.data_id_info.search_keys.keyword = product_md.attributes.tags

metadata.eainfo.detailed.name = product_md.id
metadata.eainfo.detailed.enttyp.enttypl.value = product_md.id
metadata.eainfo.detailed.enttyp.enttypt.value = "Feature Class"

# Build attribute metadata for each column
metadata.eainfo.detailed.attr = [
_create_attr_metadata(column) for column in product_md.columns
]

if ".gdb" in path.suffixes:
if zip_subdir is not None:
raise ValueError(
"Nested zipped GDBs are not supported. The GDB must be at the top level of the zip."
)
fgdb.write_metadata(gdb=path, layer=layer, metadata=metadata, overwrite=True)

elif ".shp" in path.suffixes or layer.endswith(".shp"):
shp = Shapefile(path=path, shp_name=layer, zip_subdir=zip_subdir)
shp.write_metadata(metadata, overwrite=True)


def _create_attr_metadata(column: DatasetColumn) -> Attr:
"""Create an Attr metadata object from a column specification."""
attr = Attr()

attr.attrlabl.value = "FID" if column.id == "uid" else column.id
attr.attalias.value = "FID" if column.name == "uid" else column.name
attr.attrdef.value = column.description

# TODO: define column-level defaults (e.g. attrdefs = 'Esri' if column.name == 'uid')
# TODO: map DCP types to Esri types (e.g. attrtype = 'OID' if column.name == 'uid') Note DCP types != Esri types
# attr.attrtype.value = column.data_type
# attr.attwidth.value = None
# attr.atprecis.value = None
# attr.attscale.value = None
# attr.attrdefs.value = ""

# Handle domain values if present
if hasattr(column, "values") and column.values:
attr.attrdomv.edom = [_create_edom_metadata(value) for value in column.values]

# TODO: handle 'attrdomv.udom', with other esri value defaults
return attr


def _create_edom_metadata(column_value: ColumnValue) -> Edom:
"""Create an Edom metadata object from a column value specification."""
edom = Edom()
edom.edomv = column_value.value
edom.edomvd = column_value.description

return edom
115 changes: 2 additions & 113 deletions dcpy/lifecycle/package/shapefiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,13 @@

import typer

from dcpy.lifecycle import product_metadata
from dcpy.models.data.shapefile_metadata import Attr, Edom
from dcpy.models.product.dataset.metadata import (
COLUMN_TYPES,
ColumnValue,
DatasetAttributes,
DatasetColumn,
Metadata,
)
from dcpy.models.product.metadata import OrgMetadata
from dcpy.utils.geospatial import shapefile as shp_utils
from dcpy.utils.geospatial.shapefile import Shapefile
from dcpy.utils.logging import logger

_shapefile_to_dcpy_types: dict[str, COLUMN_TYPES] = {
Expand Down Expand Up @@ -121,8 +116,9 @@ def parse_shapefile_metadata(file_path: Path) -> Metadata:
app = typer.Typer()


# TODO - delete?
@app.command("to_metadata")
def _write_metadata(
def _write_shapefile_metadata(
shp_xml_path: Path,
output_path: Path = typer.Option(
None,
Expand All @@ -134,110 +130,3 @@ def _write_metadata(
out_path = output_path or Path("./metadata.yml")
parse_shapefile_metadata(shp_xml_path).write_to_yaml(out_path)
logger.info(f"Wrote metadata to {out_path}")


@app.command("write_metadata")
def _write_shapefile_xml_metadata(
product_name: str,
dataset_name: str,
path: Path,
shp_name: str,
org_md_path: Path | None = typer.Option(
None,
"--org-md-path",
help="Path to organizational metadata",
),
zip_subdir: str | None = typer.Option(
None,
"--zip-subdir",
help="Directory structure within zip file, if relevant",
),
):
write_shapefile_xml_metadata(
product_name=product_name,
dataset_name=dataset_name,
path=path,
shp_name=shp_name,
zip_subdir=zip_subdir,
org_md=org_md_path,
)
logger.info(f"Wrote metadata to {shp_name} in {path}")


def write_shapefile_xml_metadata(
product_name: str,
dataset_name: str,
path: Path,
shp_name: str,
zip_subdir: str | None,
org_md: Path | OrgMetadata | None, # Allow passing OrgMetadata for testing purposes
):
"""Write product metadata to the shapefile metadata XML. Generates a new XML with defaults,
and applies additional product-specific values.

Args:
product_name (str): Name of product. e.g. "lion"
dataset_name (str): Name of dataset within a product. e.g. "pseudo-lots"
path (Path): Path to parent directory or zip file containing shapefile.
shp_name (str): Shapefile name, ending in ".shp". e.g. "shapefile_name.shp"
zip_subdir (str | None): Internal path, if shp is nested within a zip file.
org_md (Path | OrgMetadata | None): Metadata reference used to populate shapefile metadata.
"""
if isinstance(org_md, Path) or not org_md:
org_md = product_metadata.load(org_md_path_override=org_md)

product_md = org_md.product(product_name).dataset(dataset_name)

metadata = shp_utils.generate_metadata()

# Set dataset-level values
# TODO: define DCP organizationally required metadata fields
metadata.md_hr_lv_name = product_md.attributes.display_name
metadata.data_id_info.id_abs = product_md.attributes.description
metadata.data_id_info.other_keys.keyword = product_md.attributes.tags
metadata.data_id_info.search_keys.keyword = product_md.attributes.tags

metadata.eainfo.detailed.name = product_md.id
metadata.eainfo.detailed.enttyp.enttypl.value = product_md.id
metadata.eainfo.detailed.enttyp.enttypt.value = "Feature Class"

# Build attribute metadata for each column
metadata.eainfo.detailed.attr = [
_create_attr_metadata(column) for column in product_md.columns
]

shp = Shapefile(path=path, shp_name=shp_name, zip_subdir=zip_subdir)
shp.write_metadata(metadata, overwrite=True)


def _create_attr_metadata(column: DatasetColumn) -> Attr:
"""Create an Attr metadata object from a column specification."""
attr = Attr()

attr.attrlabl.value = "FID" if column.id == "uid" else column.id
attr.attalias.value = "FID" if column.name == "uid" else column.name
attr.attrdef.value = column.description

# TODO: define column-level defaults (e.g. attrdefs = 'Esri' if column.name == 'uid')
# TODO: map DCP types to Esri types (e.g. attrtype = 'OID' if column.name == 'uid') Note DCP types != Esri types
# attr.attrtype.value = column.data_type
# attr.attwidth.value = None
# attr.atprecis.value = None
# attr.attscale.value = None
# attr.attrdefs.value = ""

# Handle domain values if present
if hasattr(column, "values") and column.values:
attr.attrdomv.edom = [_create_edom_metadata(value) for value in column.values]

# TODO: handle 'attrdomv.udom', with other esri value defaults
return attr


def _create_edom_metadata(column_value: ColumnValue) -> Edom:
"""Create an Edom metadata object from a column value specification."""
edom = Edom()
edom.edomv = column_value.value
edom.edomvd = column_value.description

return edom
Loading