From 868fc526c8dffffdd4f41bebaf02d72ed6b25cdb Mon Sep 17 00:00:00 2001 From: "stepan.moc" Date: Sun, 15 Feb 2026 20:03:11 +0100 Subject: [PATCH 01/10] fix(git): remove branch rebase bug --- ingress_server/setup_venv.sh | 28 ++++++++++++++++++++++++++++ ingress_server/tools/manual_run.py | 14 ++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 ingress_server/setup_venv.sh create mode 100644 ingress_server/tools/manual_run.py diff --git a/ingress_server/setup_venv.sh b/ingress_server/setup_venv.sh new file mode 100644 index 00000000..42d2ddb4 --- /dev/null +++ b/ingress_server/setup_venv.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# This would create a development virtual environment +# - uses requirements.txt +# - install endorse itself in development mode. +set -x + +echo "Creating python virtual environment." + +SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P )" +SRC_ROOT="$SCRIPTPATH" + +cd ${SRC_ROOT} || exit +rm -r venv +#virtualenv venv +python3 -m venv --system-site-packages venv +ls + +venv_pip=${SRC_ROOT}/venv/bin/pip +$venv_pip install wheel +$venv_pip install --upgrade pip +#source venv/bin/activate +$venv_pip install "Flask-SQLAlchemy>=3.0.1" +#$venv_pip install -r requirements.txt + +# TODO simplyfy dependency and submodules +# attrs somehow was broken after gmsh explicit installation, must force its reinstalation + +$venv_pip install -e . diff --git a/ingress_server/tools/manual_run.py b/ingress_server/tools/manual_run.py new file mode 100644 index 00000000..fca94ec6 --- /dev/null +++ b/ingress_server/tools/manual_run.py @@ -0,0 +1,14 @@ +import sys +from ingress_server.worker import _process_one, _iter_accepted_files_in_dir + +def main(): + accepted_dir = sys.argv[1] + for data_path in _iter_accepted_files_in_dir(accepted_dir): + err = _process_one(data_path) + if err: + print(f"Error processing {data_path}: {err}") + else: + print(f"Successfully processed {data_path}") + +if __name__ == "__main__": + main() From 1840d667c47f8415f3aa8b0e36f2b47f64d2025b Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Mon, 16 Feb 2026 23:31:52 +0100 Subject: [PATCH 02/10] S3 CESNET bandwidth measurements --- tests/s3_bandwidth/fs_bandwidth.sh | 6 + tests/s3_bandwidth/results.md | 70 ++++++++++ tests/s3_bandwidth/setup.sh | 21 +++ tests/s3_bandwidth/test_s3_bandwidth.py | 166 ++++++++++++++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 tests/s3_bandwidth/fs_bandwidth.sh create mode 100644 tests/s3_bandwidth/results.md create mode 100644 tests/s3_bandwidth/setup.sh create mode 100644 tests/s3_bandwidth/test_s3_bandwidth.py diff --git a/tests/s3_bandwidth/fs_bandwidth.sh b/tests/s3_bandwidth/fs_bandwidth.sh new file mode 100644 index 00000000..0882a67a --- /dev/null +++ b/tests/s3_bandwidth/fs_bandwidth.sh @@ -0,0 +1,6 @@ +FILE="$1/test_file" +dd if=/dev/zero of="$FILE" bs=64M count=16 oflag=direct status=progress + +dd if="$FILE" of=/dev/null bs=64M iflag=direct status=progress + +rm -f "$FILE" diff --git a/tests/s3_bandwidth/results.md b/tests/s3_bandwidth/results.md new file mode 100644 index 00000000..33313b64 --- /dev/null +++ b/tests/s3_bandwidth/results.md @@ -0,0 +1,70 @@ +# Theoretical and practical bandwidth for CESNET + +## CESNET Ceph CL4 info + +CL4 (Brno) + +Ceph-based object storage + +57 storage servers + +24 HDDs per server + +Total raw capacity: 26.607 PB + +Access layer: 5 front-end servers + 9 application servers + +Theoretical max bandwidth per client 1-2 Gbps + + +## Brno - Charon network bandwidth + +Skirit (Brno) - Charon (Liberec) Bandwidth +(BOOKWORM)jan_brezina@skirit:~/lbc/workspace/zarr_test$ bash fs_bandwidth.sh . + +write to charon, 16 x 64MB +1073741824 bytes (1,1 GB, 1,0 GiB) copied, 18,6463 s, 57,6 MB/s + +read from charon 16x64MB +1073741824 bytes (1,1 GB, 1,0 GiB) copied, 15,4808 s, 69,4 MB/s + +## S3 access from Charon, Liberec + +Charon - CESNET (Liberec, 1GBit + +Endpoint: https://s3.cl4.du.cesnet.cz +Bucket: test-zarr-storage +Key: bwtest/38c648e951bf461bad38f473ba22758e.bin +Size: 1073741824 bytes (1 GiB) + +UPLOAD: 26.474 s -> 38.7 MiB/s (0.32 Gbit/s) +DOWNLOAD: 7.460 s -> 137.3 MiB/s (1.15 Gbit/s) + +UPLOAD: 25.947 s -> 39.5 MiB/s (0.33 Gbit/s) +DOWNLOAD: 7.133 s -> 143.6 MiB/s (1.20 Gbit/s) + + +## S3 access from Skirit, Brno + +Endpoint: https://s3.cl4.du.cesnet.cz +Bucket: test-zarr-storage +Key: bwtest/d20d78d0be4a4520b1e4e9a94cde1cff.bin +Size: 1073741824 bytes (1 GiB) + +UPLOAD: 23.865 s -> 42.9 MiB/s (0.36 Gbit/s) +DOWNLOAD: 5.664 s -> 180.8 MiB/s (1.52 Gbit/s) + +UPLOAD: 24.012 s -> 42.6 MiB/s (0.36 Gbit/s) +DOWNLOAD: 5.478 s -> 186.9 MiB/s (1.57 Gbit/s) + +## Conclusions + +- CESNET [documents](https://docs.du.cesnet.cz/en/docs/object-storage-s3/s5cmd?utm_source=chatgpt.com) peak bandwidth for 1-2Gps netwok connections using `s5cmd` util +- The network itself is capable of such bandwidth (Charon - Brno) +- For read we are close to the limit, which in fact is very close to the limit of mechanical HDDs. +- For write there is probably room for improvement up to 4-5 times. +- Storage has 5 frontends so it probably can handle multiple parallel writes. + +## TODO: +- Measure serial zarr and zarr-fuse R/W bandwidth. +- Measure real parallel read, parallel write bandwidth. diff --git a/tests/s3_bandwidth/setup.sh b/tests/s3_bandwidth/setup.sh new file mode 100644 index 00000000..41b0c4df --- /dev/null +++ b/tests/s3_bandwidth/setup.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -xeuo pipefail + +module load python + +VENV_DIR="venv" + +# Create venv if missing +if [[ ! -d "$VENV_DIR" ]]; then + python3 -m venv "$VENV_DIR" +fi + +# Activate venv +# shellcheck disable=SC1090 +#source "$VENV_DIR/bin/activate" + +PY="$VENV_DIR/bin/python3" +# Minimal deps for the S3 test script +$PY -m pip install --upgrade pip >/dev/null +$PY -m pip install boto3 + diff --git a/tests/s3_bandwidth/test_s3_bandwidth.py b/tests/s3_bandwidth/test_s3_bandwidth.py new file mode 100644 index 00000000..552b3f58 --- /dev/null +++ b/tests/s3_bandwidth/test_s3_bandwidth.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +import json +import math +import sys +import time +import uuid + +import boto3 +from botocore.config import Config + +# === EDIT THESE TWO === +S3_URL = "https://s3.cl4.du.cesnet.cz" # e.g. "https://s3.cld.cesnet.cz" +BUCKET = "test-zarr-storage" +# ====================== + +GiB = 1024 ** 3 +MiB = 1024 ** 2 + +SIZE_BYTES = 1 * GiB +PART_SIZE = 64 * MiB # must be >= 5 MiB for multipart +DL_CHUNK = 8 * MiB + + +def die(msg: str, code: int = 2) -> None: + print(f"ERROR: {msg}", file=sys.stderr) + raise SystemExit(code) + + +def human_speed(bytes_per_sec: float) -> str: + mib_s = bytes_per_sec / MiB + gbit_s = (bytes_per_sec * 8) / 1e9 + return f"{mib_s:.1f} MiB/s ({gbit_s:.2f} Gbit/s)" + + +def make_client(creds: dict): + ak = creds.get("access_key") + sk = creds.get("secret_key") + st = creds.get("session_token") + + if not ak or not sk: + die('Secrets JSON must include "access_key" and "secret_key".') + + # CESNET / S3-compatible compatibility knobs: + # - region_name is used for SigV4 signing only + # - checksum settings avoid AWS-style streaming checksums/trailers that some endpoints reject + cfg = Config( + region_name="us-east-1", + s3={"addressing_style": "path"}, + retries={"max_attempts": 5, "mode": "standard"}, + request_checksum_calculation="when_required", + response_checksum_validation="when_required", + ) + + return boto3.client( + "s3", + endpoint_url=S3_URL, + aws_access_key_id=ak, + aws_secret_access_key=sk, + aws_session_token=st, + config=cfg, + ) + + +def multipart_upload(s3, bucket: str, key: str) -> float: + resp = s3.create_multipart_upload(Bucket=bucket, Key=key) + upload_id = resp["UploadId"] + + buf = b"\0" * PART_SIZE + num_parts = math.ceil(SIZE_BYTES / PART_SIZE) + + parts = [] + sent = 0 + start = time.perf_counter() + + try: + for part_number in range(1, num_parts + 1): + remaining = SIZE_BYTES - sent + this_size = PART_SIZE if remaining >= PART_SIZE else remaining + body = buf if this_size == PART_SIZE else (b"\0" * this_size) + + up = s3.upload_part( + Bucket=bucket, + Key=key, + PartNumber=part_number, + UploadId=upload_id, + Body=body, + ContentLength=this_size, # important for some S3-compatible servers (avoids MissingContentLength) + ) + parts.append({"PartNumber": part_number, "ETag": up["ETag"]}) + sent += this_size + + s3.complete_multipart_upload( + Bucket=bucket, + Key=key, + UploadId=upload_id, + MultipartUpload={"Parts": parts}, + ) + except Exception: + try: + s3.abort_multipart_upload(Bucket=bucket, Key=key, UploadId=upload_id) + except Exception: + pass + raise + + return time.perf_counter() - start + + +def streaming_download(s3, bucket: str, key: str) -> float: + start = time.perf_counter() + obj = s3.get_object(Bucket=bucket, Key=key) + body = obj["Body"] + + while True: + chunk = body.read(DL_CHUNK) + if not chunk: + break + + body.close() + return time.perf_counter() - start + + +def main(): + if len(sys.argv) != 2: + die("Usage: python3 test_s3_bandwidth.py /path/to/secrets.json") + + # Read secrets from JSON file (single positional param = file path) + with open(sys.argv[1], "r", encoding="utf-8") as f: + raw = f.read() + + creds = json.loads(raw) + if not isinstance(creds, dict): + die("Secrets JSON must be an object/dict.") + + if not S3_URL.startswith("http"): + die("Set S3_URL in the script (must start with http/https).") + if not BUCKET: + die("Set BUCKET in the script.") + + s3 = make_client(creds) + key = f"bwtest/{uuid.uuid4().hex}.bin" + + print(f"Endpoint: {S3_URL}") + print(f"Bucket: {BUCKET}") + print(f"Key: {key}") + print(f"Size: {SIZE_BYTES} bytes (1 GiB)") + print() + + # Upload + t_up = multipart_upload(s3, BUCKET, key) + up_bps = SIZE_BYTES / t_up + print(f"UPLOAD: {t_up:.3f} s -> {human_speed(up_bps)}") + + # Download + t_dn = streaming_download(s3, BUCKET, key) + dn_bps = SIZE_BYTES / t_dn + print(f"DOWNLOAD: {t_dn:.3f} s -> {human_speed(dn_bps)}") + + # Cleanup + try: + s3.delete_object(Bucket=BUCKET, Key=key) + except Exception: + pass + + +if __name__ == "__main__": + main() From ffe7c3d192bf056ac12af51ea77035a16a6dcd76 Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Tue, 17 Feb 2026 01:29:50 +0100 Subject: [PATCH 03/10] Revert dasboard deployments --- .github/workflows/dashboard-pull-request.yaml | 19 +++++++++++++++++++ .github/workflows/dashboard-push-main.yaml | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 .github/workflows/dashboard-pull-request.yaml create mode 100644 .github/workflows/dashboard-push-main.yaml diff --git a/.github/workflows/dashboard-pull-request.yaml b/.github/workflows/dashboard-pull-request.yaml new file mode 100644 index 00000000..8f0c820f --- /dev/null +++ b/.github/workflows/dashboard-pull-request.yaml @@ -0,0 +1,19 @@ +name: Dashboard - pull request +on: + pull_request: + branches: + - "**" + paths: + - app/databuk/dashboard/** + workflow_dispatch: +jobs: + build: + name: Build and lint dashboard + uses: ./.github/workflows/dashboard-reusable-workflow.yaml + with: + tag: generate + deploy: true + namespace: zarr-fuse-dashboard-development + release-name: dashboard-development + s3-bucket-name: hlavo-release + secrets: inherit diff --git a/.github/workflows/dashboard-push-main.yaml b/.github/workflows/dashboard-push-main.yaml new file mode 100644 index 00000000..84b16c8b --- /dev/null +++ b/.github/workflows/dashboard-push-main.yaml @@ -0,0 +1,19 @@ +name: Dashboard - push main +on: + push: + branches: + - dashboard-main + paths: + - app/databuk/dashboard/** + workflow_dispatch: +jobs: + build-and-deploy: + name: Build and lint dashboard + uses: ./.github/workflows/dashboard-reusable-workflow.yaml + with: + tag: generate + deploy: true + namespace: zarr-fuse-dashboard + release-name: dashboard + s3-bucket-name: hlavo-release + secrets: inherit From 0b055f3a16d843b9432a88b277cadc577d9c28df Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Tue, 17 Feb 2026 01:33:42 +0100 Subject: [PATCH 04/10] Revert databuk/input/__init__.py --- app/databuk/inputs/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 app/databuk/inputs/__init__.py diff --git a/app/databuk/inputs/__init__.py b/app/databuk/inputs/__init__.py new file mode 100644 index 00000000..e122ab74 --- /dev/null +++ b/app/databuk/inputs/__init__.py @@ -0,0 +1,11 @@ +import pathlib + +# common output driectory +__script_dir__ = pathlib.Path(__file__).parent +work_dir = __script_dir__.parent / "workdir" +work_dir.mkdir(parents=True, exist_ok=True) + +# Following is public +input_dir = __script_dir__.parent / "inputs" +schema_bukov_yaml = input_dir / "schema_bukov.yaml" +bukov_locations_csv = input_dir / "bukov_locations.csv" From b3b6a2fd13127551bfd54949ea457ae5341be592 Mon Sep 17 00:00:00 2001 From: "stepan.moc" Date: Sun, 22 Feb 2026 16:00:10 +0100 Subject: [PATCH 05/10] fix(ci,oci): resolve threads --- .../dashboard-reusable-workflow.yaml | 16 ++++----- .../ingress-server-pull-request.yaml | 3 +- .../workflows/ingress-server-push-main.yaml | 1 + .../workflows/ingress-server-push-tag.yaml | 3 +- .../ingress-server-reusable-workflow.yaml | 34 ++++++------------- ingress_server/oci/Containerfile | 4 +-- ingress_server/oci/install_extras.sh | 4 +++ 7 files changed, 30 insertions(+), 35 deletions(-) diff --git a/.github/workflows/dashboard-reusable-workflow.yaml b/.github/workflows/dashboard-reusable-workflow.yaml index ab756e9f..d0ae79c3 100644 --- a/.github/workflows/dashboard-reusable-workflow.yaml +++ b/.github/workflows/dashboard-reusable-workflow.yaml @@ -36,7 +36,7 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get version tag id: get_version_tag run: | @@ -61,7 +61,7 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Containerize uses: ./.github/actions/containerize with: @@ -87,7 +87,7 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: npm ci - name: Build frontend @@ -95,7 +95,7 @@ jobs: VITE_API_URL: "https://zarr-fuse-${{ inputs.release-name }}.dyn.cloud.e-infra.cz" run: npm run build - name: Upload artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: dashboard-frontend path: app/databuk/dashboard/dist @@ -117,9 +117,9 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Download artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: dashboard-frontend path: app/databuk/dashboard/dist @@ -146,7 +146,7 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Lint helm chart uses: ./.github/actions/helm-lint with: @@ -172,7 +172,7 @@ jobs: cancel-in-progress: false steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Decode kubeconfig run: echo "${{ secrets.KUBECONFIG }}" | base64 -d > kubeconfig.yaml && chmod 600 kubeconfig.yaml - name: Ensure that the namespace exists diff --git a/.github/workflows/ingress-server-pull-request.yaml b/.github/workflows/ingress-server-pull-request.yaml index 169c59d8..ed9e5890 100644 --- a/.github/workflows/ingress-server-pull-request.yaml +++ b/.github/workflows/ingress-server-pull-request.yaml @@ -10,10 +10,11 @@ jobs: name: Build and deploy ingress server uses: ./.github/workflows/ingress-server-reusable-workflow.yaml concurrency: - group: "${{ github.workflow }}-${{ github.ref_name }}-build-and-deploy" + group: "${{ github.workflow }}-${{ github.head_ref }}-build-and-deploy" cancel-in-progress: false with: deploy: false push-container: false tag: generate + zarr-fuse-ref: ${{ github.head_ref }} secrets: inherit diff --git a/.github/workflows/ingress-server-push-main.yaml b/.github/workflows/ingress-server-push-main.yaml index 0ac241b9..0dc6ed41 100644 --- a/.github/workflows/ingress-server-push-main.yaml +++ b/.github/workflows/ingress-server-push-main.yaml @@ -18,4 +18,5 @@ jobs: namespace: ingress-server-latest release-name: ingress-server-latest s3-store-url: s3://app-databuk-test-service/bukov-main.zarr + zarr-fuse-ref: ${{ github.ref_name }} secrets: inherit diff --git a/.github/workflows/ingress-server-push-tag.yaml b/.github/workflows/ingress-server-push-tag.yaml index 7429ff16..3c3b69ae 100644 --- a/.github/workflows/ingress-server-push-tag.yaml +++ b/.github/workflows/ingress-server-push-tag.yaml @@ -14,7 +14,7 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get version from tag id: get-version @@ -36,4 +36,5 @@ jobs: namespace: ingress-server release-name: ingress-server s3-store-url: s3://app-databuk-release-service/bukov.zarr + zarr-fuse-ref: ${{ github.ref_name }} secrets: inherit diff --git a/.github/workflows/ingress-server-reusable-workflow.yaml b/.github/workflows/ingress-server-reusable-workflow.yaml index aa6b4955..343ad235 100644 --- a/.github/workflows/ingress-server-reusable-workflow.yaml +++ b/.github/workflows/ingress-server-reusable-workflow.yaml @@ -53,7 +53,7 @@ on: description: "Ref/branch/tag repozitáře GeoMop/zarr_fuse for external use" required: false type: string - default: "" + default: "main" jobs: get-version-tag: name: Get version tag @@ -63,6 +63,9 @@ jobs: steps: - name: Checkout zarr_fuse uses: actions/checkout@v4 + with: + repository: GeoMop/zarr_fuse + ref: ${{ inputs.zarr-fuse-ref }} - name: Get version tag id: get_version_tag @@ -78,7 +81,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout caller repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Copy configuration files run: | @@ -95,7 +98,7 @@ jobs: cp -r "$SRC_DIR"/. "$DEST_DIR"/ - name: Upload configuration files as artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: ingress-server-configuration path: ci-inputs/ @@ -111,12 +114,7 @@ jobs: options: --privileged steps: - name: Checkout zarr_fuse - if: ${{ github.repository == 'GeoMop/zarr_fuse' }} - uses: actions/checkout@v4 - - - name: Checkout zarr_fuse for external use - if: ${{ github.repository != 'GeoMop/zarr_fuse' }} - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: GeoMop/zarr_fuse ref: ${{ inputs.zarr-fuse-ref }} @@ -127,7 +125,7 @@ jobs: - name: Download configuration files artifact if: ${{ inputs.configuration-dir-path != '' }} - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: ingress-server-configuration path: ingress_server/inputs/ @@ -154,12 +152,7 @@ jobs: - containerize steps: - name: Checkout zarr_fuse - if: ${{ github.repository == 'GeoMop/zarr_fuse' }} - uses: actions/checkout@v4 - - - name: Checkout zarr_fuse for external use - if: ${{ github.repository != 'GeoMop/zarr_fuse' }} - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: GeoMop/zarr_fuse ref: ${{ inputs.zarr-fuse-ref }} @@ -170,7 +163,7 @@ jobs: working-directory: ingress_server/charts/ingress-server values: --values values/minimal-required-values.yaml - eploy: + deploy: name: Deploy ingress server to e-infra rancher runs-on: ubuntu-latest if: ${{ inputs.deploy }} @@ -186,12 +179,7 @@ jobs: working-directory: ingress_server/charts steps: - name: Checkout zarr_fuse - if: ${{ github.repository == 'GeoMop/zarr_fuse' }} - uses: actions/checkout@v4 - - - name: Checkout zarr_fuse for external use - if: ${{ github.repository != 'GeoMop/zarr_fuse' }} - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: GeoMop/zarr_fuse ref: ${{ inputs.zarr-fuse-ref }} diff --git a/ingress_server/oci/Containerfile b/ingress_server/oci/Containerfile index f9104f1f..db98b05f 100644 --- a/ingress_server/oci/Containerfile +++ b/ingress_server/oci/Containerfile @@ -2,7 +2,7 @@ FROM docker.io/library/python:3.11-slim-bullseye ARG APP_VERSION="devel" -ARG zarr_fuse_version="main" +ARG zarr_fuse_branch="main" ARG git_version="1:2.30.2-1+deb11u5" ARG waitress_version="3.0.2" @@ -25,7 +25,7 @@ RUN groupadd -r ingress && useradd -r -g ingress -u 11233 -m -d /ingress-server apt-get update && apt-get install -y --no-install-recommends git=${git_version} && \ apt-get clean && rm -rf /var/lib/apt/lists/* -RUN pip install --no-cache-dir "git+https://github.com/GeoMop/zarr_fuse.git@${zarr_fuse_version}" && \ +RUN pip install --no-cache-dir "git+https://github.com/GeoMop/zarr_fuse.git@${zarr_fuse_branch}" && \ pip install --no-cache-dir waitress==${waitress_version} # --- Install ingress server --- diff --git a/ingress_server/oci/install_extras.sh b/ingress_server/oci/install_extras.sh index 25b70c70..0b1040c5 100644 --- a/ingress_server/oci/install_extras.sh +++ b/ingress_server/oci/install_extras.sh @@ -1,4 +1,8 @@ #!/bin/sh + +# The script is used to install extra dependencies for ingress server extractirs and is used +# in the OCI (Dockerfile) to improve readability and maintainability of the Dockerfile. + set -eu set -x From 1321467b9b02a3e989a68c4986a3f23e23bb8f5f Mon Sep 17 00:00:00 2001 From: "stepan.moc" Date: Sun, 22 Feb 2026 16:36:27 +0100 Subject: [PATCH 06/10] feat(ingress_server): self review --- app/databuk/inputs/__init__.py | 11 ---------- app/databuk/inputs/paths.py | 22 ------------------- app/databuk/inputs/pyproject.toml | 11 ---------- ingress_server/README.MD | 6 ++--- ingress_server/ingress_server/worker.py | 22 +++++++++---------- ingress_server/oci/Containerfile | 6 ++--- ...s.sh => install_extractor_dependencies.sh} | 0 7 files changed, 17 insertions(+), 61 deletions(-) delete mode 100644 app/databuk/inputs/__init__.py delete mode 100644 app/databuk/inputs/paths.py delete mode 100644 app/databuk/inputs/pyproject.toml rename ingress_server/oci/{install_extras.sh => install_extractor_dependencies.sh} (100%) diff --git a/app/databuk/inputs/__init__.py b/app/databuk/inputs/__init__.py deleted file mode 100644 index e122ab74..00000000 --- a/app/databuk/inputs/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -import pathlib - -# common output driectory -__script_dir__ = pathlib.Path(__file__).parent -work_dir = __script_dir__.parent / "workdir" -work_dir.mkdir(parents=True, exist_ok=True) - -# Following is public -input_dir = __script_dir__.parent / "inputs" -schema_bukov_yaml = input_dir / "schema_bukov.yaml" -bukov_locations_csv = input_dir / "bukov_locations.csv" diff --git a/app/databuk/inputs/paths.py b/app/databuk/inputs/paths.py deleted file mode 100644 index 4502138e..00000000 --- a/app/databuk/inputs/paths.py +++ /dev/null @@ -1,22 +0,0 @@ -import pathlib - - -# common output driectory -__script_dir__ = pathlib.Path(__file__).parent - -# Following is public -input_dir = __script_dir__ -schema_bukov_yaml = input_dir / "schemas" / "bukov_schema.yaml" -bukov_locations_csv = input_dir / "bukov_locations.csv" - -# test data -test_dir = __script_dir__ / "test" -work_dir = test_dir / "workdir" -work_dir.mkdir(parents=True, exist_ok=True) - -measurements_dir = test_dir / "test_measurements" -_measurements = ['20250915T111522_824a7f3dc0ad.json', '20250915T115149_8b4f1f4535aa.json', -'20250915T133948_121e738c86ab.json', 'T_123_partial.json'] -old_measurements = [measurements_dir / _measurements[-1]] -new_measurements = [measurements_dir / _m for _m in _measurements[:-1]] - diff --git a/app/databuk/inputs/pyproject.toml b/app/databuk/inputs/pyproject.toml deleted file mode 100644 index b619444e..00000000 --- a/app/databuk/inputs/pyproject.toml +++ /dev/null @@ -1,11 +0,0 @@ -[project] -name = "extract" -version = "0.1.0" -description = "Custom conversion module for ingress server" - -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[tool.setuptools] -packages = ["extract"] diff --git a/ingress_server/README.MD b/ingress_server/README.MD index 68440888..c368d296 100644 --- a/ingress_server/README.MD +++ b/ingress_server/README.MD @@ -28,8 +28,9 @@ There is **no UI**; interaction happens via HTTP endpoints (multipart uploads). ``` . ├─ ingress_server/ -│ ├─ src/ +│ ├─ ingress_server/ │ │ ├─ main.py # Flask backend server +| ├─ inputs/ │ │ ├─ endpoints_config.yaml # List of API endpoints and their schema paths │ │ └─ schemas/ # Schema files (prod/ci_test) │ ├─ docs/ @@ -146,8 +147,7 @@ The container already includes `schemas/` and `endpoints_config.yaml` from the b If you want to override them without rebuilding, mount them as read-only volumes: ```bash --v $(pwd)/inputs/schemas:/ingress-server/schemas:ro \ --v $(pwd)/inputs/endpoints_config.yaml:/ingress-server/endpoints_config.yaml:ro +-v $(pwd)/inputs/:/ingress-server/:ro ``` --- diff --git a/ingress_server/ingress_server/worker.py b/ingress_server/ingress_server/worker.py index 438c1138..ff2f4628 100644 --- a/ingress_server/ingress_server/worker.py +++ b/ingress_server/ingress_server/worker.py @@ -36,16 +36,6 @@ def _move_tree_contents(src: Path, dst: Path): except OSError: pass - -def _iter_accepted_files_in_dir(dir: Path): - for root, _, files in os.walk(dir): - for name in files: - if name.endswith(".meta.json"): - continue - LOG.info("Found accepted file: %s", Path(root) / name) - yield Path(root) / name - - def _iter_accepted_files(): settings = get_settings() @@ -53,7 +43,17 @@ def _iter_accepted_files(): LOG.warning("Accepted directory does not exist: %s", settings.accepted_dir) yield from () return - yield from _iter_accepted_files_in_dir(settings.accepted_dir) + + paths: list[Path] = [] + for root, _, files in os.walk(settings.accepted_dir): + for name in files: + if name.endswith(".meta.json"): + continue + paths.append(Path(root) / name) + + for path in sorted(paths, key=lambda p: p.name): + LOG.info("Found accepted file: %s", path) + yield path def _load_metadata(data_path: Path) -> tuple[MetadataModel | None, str | None]: diff --git a/ingress_server/oci/Containerfile b/ingress_server/oci/Containerfile index db98b05f..071e40bf 100644 --- a/ingress_server/oci/Containerfile +++ b/ingress_server/oci/Containerfile @@ -32,12 +32,12 @@ RUN pip install --no-cache-dir "git+https://github.com/GeoMop/zarr_fuse.git@${za WORKDIR /ingress-server COPY --chown=ingress:ingress pyproject.toml ingress_server/ inputs/ ./ -COPY --chown=ingress:ingress oci/install_extras.sh ./scripts/install_extras.sh +COPY --chown=ingress:ingress oci/install_extractor_dependencies.sh ./scripts/install_extractor_dependencies.sh RUN pip install --no-cache-dir . -RUN chmod +x ./scripts/install_extras.sh && \ - ./scripts/install_extras.sh +RUN chmod +x ./scripts/install_extractor_dependencies.sh && \ + ./scripts/install_extractor_dependencies.sh ENV CONFIG_DIR_PATH="/ingress-server/inputs" diff --git a/ingress_server/oci/install_extras.sh b/ingress_server/oci/install_extractor_dependencies.sh similarity index 100% rename from ingress_server/oci/install_extras.sh rename to ingress_server/oci/install_extractor_dependencies.sh From ae3e9617601c270e3b46e41c602605b8b502b60c Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Tue, 17 Feb 2026 01:33:42 +0100 Subject: [PATCH 07/10] Revert databuk/input/__init__.py --- app/databuk/inputs/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 app/databuk/inputs/__init__.py diff --git a/app/databuk/inputs/__init__.py b/app/databuk/inputs/__init__.py new file mode 100644 index 00000000..e122ab74 --- /dev/null +++ b/app/databuk/inputs/__init__.py @@ -0,0 +1,11 @@ +import pathlib + +# common output driectory +__script_dir__ = pathlib.Path(__file__).parent +work_dir = __script_dir__.parent / "workdir" +work_dir.mkdir(parents=True, exist_ok=True) + +# Following is public +input_dir = __script_dir__.parent / "inputs" +schema_bukov_yaml = input_dir / "schema_bukov.yaml" +bukov_locations_csv = input_dir / "bukov_locations.csv" From 9fda37bc775f5ceb6650aadc4c1e749186786fb4 Mon Sep 17 00:00:00 2001 From: "stepan.moc" Date: Sun, 22 Feb 2026 16:36:27 +0100 Subject: [PATCH 08/10] feat(ingress_server): self review --- app/databuk/inputs/__init__.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 app/databuk/inputs/__init__.py diff --git a/app/databuk/inputs/__init__.py b/app/databuk/inputs/__init__.py deleted file mode 100644 index e122ab74..00000000 --- a/app/databuk/inputs/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -import pathlib - -# common output driectory -__script_dir__ = pathlib.Path(__file__).parent -work_dir = __script_dir__.parent / "workdir" -work_dir.mkdir(parents=True, exist_ok=True) - -# Following is public -input_dir = __script_dir__.parent / "inputs" -schema_bukov_yaml = input_dir / "schema_bukov.yaml" -bukov_locations_csv = input_dir / "bukov_locations.csv" From b7c8f0ce9b6ad41b85eab0587ba5a5ebedfa7d5c Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Tue, 17 Feb 2026 01:33:42 +0100 Subject: [PATCH 09/10] Revert databuk/input/__init__.py --- app/databuk/inputs/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 app/databuk/inputs/__init__.py diff --git a/app/databuk/inputs/__init__.py b/app/databuk/inputs/__init__.py new file mode 100644 index 00000000..e122ab74 --- /dev/null +++ b/app/databuk/inputs/__init__.py @@ -0,0 +1,11 @@ +import pathlib + +# common output driectory +__script_dir__ = pathlib.Path(__file__).parent +work_dir = __script_dir__.parent / "workdir" +work_dir.mkdir(parents=True, exist_ok=True) + +# Following is public +input_dir = __script_dir__.parent / "inputs" +schema_bukov_yaml = input_dir / "schema_bukov.yaml" +bukov_locations_csv = input_dir / "bukov_locations.csv" From e398d0be9e1655a1f71ef2a55553cccd6ddf46bf Mon Sep 17 00:00:00 2001 From: "stepan.moc" Date: Sun, 22 Feb 2026 16:36:27 +0100 Subject: [PATCH 10/10] feat(ingress_server): self review --- app/databuk/inputs/__init__.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 app/databuk/inputs/__init__.py diff --git a/app/databuk/inputs/__init__.py b/app/databuk/inputs/__init__.py deleted file mode 100644 index e122ab74..00000000 --- a/app/databuk/inputs/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -import pathlib - -# common output driectory -__script_dir__ = pathlib.Path(__file__).parent -work_dir = __script_dir__.parent / "workdir" -work_dir.mkdir(parents=True, exist_ok=True) - -# Following is public -input_dir = __script_dir__.parent / "inputs" -schema_bukov_yaml = input_dir / "schema_bukov.yaml" -bukov_locations_csv = input_dir / "bukov_locations.csv"