Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ authors = [
{name = "Jonatan Skogsfors", email = "jonatan.skogsfors@smhi.se"},
]
dependencies = [
"click>=8.3.0",
"nodc-codes",
"pyyaml>=6.0.3",
"sharkadm",
Expand All @@ -17,7 +18,7 @@ license = {file = "LICENSE"}


[project.scripts]
generate-text-from-yaml = "metadata.generate_text_file_from_yaml:main"
metadata-from-package = "delivery_metadata.metadata_from_package:cli"

[dependency-groups]
dev = [
Expand Down
30 changes: 16 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ charset-normalizer==3.4.3 \
click==8.3.0 \
--hash=sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc \
--hash=sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4
# via typer
# via
# delivery-metadata
# typer
colorama==0.4.6 ; sys_platform == 'win32' \
--hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
--hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
Expand All @@ -86,9 +88,9 @@ fastexcel==0.16.0 \
--hash=sha256:e1059eac593f4b92843ac9d10901677cccc2a8152c67e315c9dfbd7ce7c722e7 \
--hash=sha256:ec1c56b9b3b7b7ff2bde64dbe0e378a707287aff9deeb71ff6d0f8c3b7d24e34
# via sharkadm
filelock==3.19.1 \
--hash=sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58 \
--hash=sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d
filelock==3.20.0 \
--hash=sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2 \
--hash=sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4
# via virtualenv
geopandas==1.1.1 \
--hash=sha256:1745713f64d095c43e72e08e753dbd271678254b24f2e01db8cdb8debe1d293d \
Expand Down Expand Up @@ -289,9 +291,9 @@ pandas==2.3.3 \
# via
# geopandas
# sharkadm
platformdirs==4.4.0 \
--hash=sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85 \
--hash=sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf
platformdirs==4.5.0 \
--hash=sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312 \
--hash=sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3
# via virtualenv
pluggy==1.6.0 \
--hash=sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3 \
Expand Down Expand Up @@ -503,9 +505,9 @@ requests==2.32.5 \
# via
# nodc-codes
# sharkadm
rich==14.1.0 \
--hash=sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f \
--hash=sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8
rich==14.2.0 \
--hash=sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4 \
--hash=sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd
# via
# sharkadm
# typer
Expand Down Expand Up @@ -582,7 +584,7 @@ shapely==2.1.2 \
# via
# geopandas
# sharkadm
sharkadm @ git+https://github.com/nodc-sweden/SHARKadm@6f4c94ff7640f818e0b524223dd3a86a4db7efed
sharkadm @ git+https://github.com/nodc-sweden/SHARKadm@7f9cc05006425e6a463e8ddc00df3d4ac9ab8606
# via delivery-metadata
shellingham==1.5.4 \
--hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \
Expand All @@ -608,9 +610,9 @@ urllib3==2.5.0 \
--hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \
--hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc
# via requests
virtualenv==20.34.0 \
--hash=sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026 \
--hash=sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a
virtualenv==20.35.1 \
--hash=sha256:041dac43b6899858a91838b616599e80000e545dee01a21172a6a46746472cb2 \
--hash=sha256:1d9d93cd01d35b785476e2fa7af711a98d40d227a078941695bbae394f8737e2
# via pre-commit
xlsxwriter==3.2.9 \
--hash=sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c \
Expand Down
2 changes: 1 addition & 1 deletion src/delivery_metadata/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from delivery_metadata.delivery_metadata import DeliveryMetadata
from delivery_metadata.delivery_data import DeliveryData
115 changes: 115 additions & 0 deletions src/delivery_metadata/delivery_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from pathlib import Path
from typing import Callable, Self

import polars as pl
from sharkadm.data import get_polars_data_holder


def _apply_on_column(function: Callable, column: str, dataframe: pl.DataFrame):
if column not in dataframe.columns:
return None

return function(dataframe[column])


class DeliveryData:
_fields = (
"datatype",
"abstract",
"description",
"dataset_filename",
"discipline",
"measuring_area_type",
"coordinate_system",
"platform_class",
"access_constraints",
"min_year",
"max_year",
"min_date",
"max_date",
"min_longitude_dd",
"max_longitude_dd",
"min_latitude_dd",
"max_latitude_dd",
"taxonomic_coverage",
"originator",
"contact",
"orderer",
"data_holding_centre",
"data distributor",
"database_reference",
"internet_access",
"address",
"postal_code",
"city",
"phone",
"email",
"citation",
)

def __init__(
self,
data: pl.DataFrame | None = None,
delivery_note: dict | None = None,
source: str = "",
):
self._data = pl.DataFrame() if data is None else data
self._delivery_note = delivery_note or {}
self._source = source

@property
def data(self):
return self._data

@property
def delivery_note(self):
return self._delivery_note

@property
def fields(self):
return self._fields

def generate_metadata(self):
print("\n".join(sorted(self._data.columns)))
return {
"datatype": self._delivery_note.get("DTYPE"),
"abstract": None,
"description": None,
"dataset_filename": self._source,
"discipline": None,
"measuring_area_type": None,
"coordinate_system": None,
"platform_class": None,
"access_constraints": None,
"min_year": _apply_on_column(min, "visit_year", self._data),
"max_year": _apply_on_column(max, "visit_year", self._data),
"min_date": _apply_on_column(min, "sample_date", self._data),
"max_date": _apply_on_column(max, "sample_date", self._data),
"min_longitude_dd": _apply_on_column(min, "sample_longitude_dd", self._data),
"max_longitude_dd": _apply_on_column(max, "sample_longitude_dd", self._data),
"min_latitude_dd": _apply_on_column(min, "sample_latitude_dd", self._data),
"max_latitude_dd": _apply_on_column(max, "sample_latitude_dd", self._data),
"taxonomic_coverage": None,
"originator": None,
"contact": None,
"orderer": None,
"data_holding_centre": None,
"data distributor": None,
"database_reference": None,
"internet_access": None,
"address": None,
"postal_code": None,
"city": None,
"phone": None,
"email": None,
"citation": None,
}

@classmethod
def from_shark_package(cls, package_path: Path) -> Self:
sharkadm_dataholder = get_polars_data_holder(package_path)
return cls(
data=sharkadm_dataholder.data,
delivery_note=sharkadm_dataholder.delivery_note.data,
source=package_path.name,
)
60 changes: 0 additions & 60 deletions src/delivery_metadata/delivery_metadata.py

This file was deleted.

18 changes: 18 additions & 0 deletions src/delivery_metadata/metadata_from_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json
from pathlib import Path

import click

from delivery_metadata import DeliveryData


@click.command()
@click.argument("package", type=click.Path(exists=True, path_type=Path))
def cli(package: Path):
delivery_data = DeliveryData.from_shark_package(package)
metadata = delivery_data.generate_metadata()
print(json.dumps(metadata, indent=2))


if __name__ == "__main__":
cli()
6 changes: 3 additions & 3 deletions tests/test_parse_shark_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import polars as pl

from delivery_metadata import DeliveryMetadata
from delivery_metadata import DeliveryData


def _write_data_to_package_folder(
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_parse_unpacked_folder(
)

# When parsing data using path
metadata = DeliveryMetadata.from_shark_package(package_path)
metadata = DeliveryData.from_shark_package(package_path)

# Then metadata holds data
assert not metadata.data.is_empty()
Expand Down Expand Up @@ -92,7 +92,7 @@ def test_parse_zipped_folder(
zipped_package_path = Path(shutil.make_archive(package_path, "zip", package_path))

# When parsing data using path
metadata = DeliveryMetadata.from_shark_package(zipped_package_path)
metadata = DeliveryData.from_shark_package(zipped_package_path)

# Then metadata holds data
assert not metadata.data.is_empty()
Expand Down
12 changes: 7 additions & 5 deletions tests/test_write_metadata.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from delivery_metadata import DeliveryMetadata
from delivery_metadata import DeliveryData


def test_metadata_has_expected_fields():
# Given a metadata object
metadata = DeliveryMetadata()
given_delivery_note_data = {"DTYPE": "Phytoplankton"}

# Given a delivery data object
delivery_data = DeliveryData(delivery_note=given_delivery_note_data)

# When looking at all available fields
metadata_fields = metadata.fields
metadata = delivery_data.generate_metadata()

# Then they correspond to the expected fields
orderered_expected_fields = (
Expand Down Expand Up @@ -43,4 +45,4 @@ def test_metadata_has_expected_fields():
"citation",
)

assert metadata_fields == orderered_expected_fields
assert set(metadata.keys()) == set(orderered_expected_fields)
Loading