diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..6bf80ff --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +.git +.github +.devcontainer +.terraform +.terraform.lock.hcl +*.tfstate +*.tfstate.* +*.tfvars +__pycache__ +*.py[cod] +.pytest_cache +.tox +.coverage +htmlcov +build +dist +*.egg-info +.env diff --git a/.env-template b/.env-template index 2e0edc1..c87e2dd 100644 --- a/.env-template +++ b/.env-template @@ -43,6 +43,7 @@ export TF_VAR_CAT_ARCHITECTURE="x86_64" export TF_VAR_ECS_TASK_CPU=512 export TF_VAR_ECS_TASK_MEMORY=1024 export TF_VAR_ECS_DESIRED_COUNT=1 +export TF_VAR_ECS_HEALTHCHECK_GRACE_PERIOD_SECONDS=1800 export TF_VAR_ECS_LOGS_RETENTION_DAYS=14 export TF_VAR_CONTAINER_PORT=8000 export TF_VAR_HEALTHCHECK_PATH="/hello" @@ -57,7 +58,12 @@ export TF_VAR_CAT_DEPLOYMENT="prod" export TF_VAR_ECR_REPOSITORY_NAME="app" export TF_VAR_ECR_IMAGE_SCAN_ON_PUSH=true export TF_VAR_ECR_KEEP_IMAGE_COUNT=20 +export TF_VAR_EFS_PERFORMANCE_MODE="generalPurpose" +export TF_VAR_EFS_THROUGHPUT_MODE="bursting" # Keep this in sync with DOCKER_IMAGE_TAG. export TF_VAR_DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG}" + +# Must be globally unique across all AWS accounts. +export TF_VAR_S3_BUCKET_NAME="sbn-cat-lite-data" \ No newline at end of file diff --git a/.gitignore b/.gitignore index c20f9e7..52c8dbf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# dwd-dev stuff +AGENTS.md +.devcontainer + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -167,6 +171,7 @@ cython_debug/ *.tfstate.backup .terraform.lock.hcl *.tfvars +terraform.tfstate.* # IDE files diff --git a/Dockerfile b/Dockerfile index 461e096..a8fd6d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,12 @@ FROM python:3.12-slim -# Set working directory WORKDIR /app -### Install system dependencies -RUN apt-get update && apt-get install -y \ +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + astrometry.net \ + source-extractor \ netcat-openbsd \ git \ wget \ @@ -13,33 +15,27 @@ RUN apt-get update && apt-get install -y \ libbz2-dev \ && rm -rf /var/lib/apt/lists/* -RUN pip install -U pip setuptools wheel "connexion[flask,swagger-ui,uvicorn]" +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -U pip setuptools wheel + +COPY requirements.local.txt /app/ -COPY . /app -RUN pip install /app -COPY requirements.local.txt . -RUN pip install -r requirements.local.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements.local.txt +COPY pyproject.toml setup.py MANIFEST.in README.md /app/ +COPY catch_analysis_tools /app/catch_analysis_tools +COPY scripts /app/scripts +COPY docker /app/docker -# Checkout code -# RUN --mount=type=bind,source=./,target=/app/src/ -# COPY \ -# requirements.local.txt \ -# setup.py \ -# pyproject.toml \ -# . -# COPY catch_analysis_tools /app/catch_analysis_tools +ARG PACKAGE_VERSION=0.0.0 +ENV SETUPTOOLS_SCM_PRETEND_VERSION_FOR_CATCH_ANALYSIS_TOOLS=${PACKAGE_VERSION} -# # ARG CAT_DEPLOYMENT -# # RUN if [ "$CAT_DEPLOYMENT" = "prod" ]; then \ -# # git clone git+https://github.com -# # pip install -r src/requirements.prod.txt; \ -# # else \ -# # fi +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install --no-deps /app -# # Install dependencies -# #RUN pip install -r requirements.local.txt; +RUN chmod +x /app/docker/entrypoint.sh EXPOSE 8000 -CMD ["python3", "-m", "catch_analysis_tools.app.app"] +ENTRYPOINT ["/app/docker/entrypoint.sh"] diff --git a/README.md b/README.md index 9843226..530221b 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Running locally will install the currently checked out version of the CAT. ### DWD's changes: - Added terraform configuration. For now, we are depending on my local machine for state-files. -- Moved docker stuff to just two files in the root dir: Dockerfile and docker-compose.yml. We'll worry about different deployments later +- Moved docker stuff to just two files in the root dir: Dockerfile and docker-compose.yml. We'll worry about different deployments later. - To run the app locally, use `docker-compose up` - To deploy to AWS: - Don't do this for now -- rely on DWD to do it -- this is just FYI @@ -63,9 +63,10 @@ Running locally will install the currently checked out version of the CAT. - Update tf state with `./_tf apply` - I also made some changes adding a /hello route, and wiring up the flask code to get it to work - Added a simple script to ping the endpoint created by tf, `./_ping_endpoint` +- To make sure that the /astrometry endpoint does not run before the Astrometry.net index files are downloaded, I added a bunch of logic in `catch_analysis_tools/app/astrometry_readiness`. See README therein for details. It also involves a new route `/health` that lets you know if the files are downloaded. -## Astrometry Configuration +## Astrometry Configuration The astrometric calibration pipeline depends on **astrometry.net** index files and a corresponding configuration file. These are required for WCS solving. diff --git a/_ping_astrometry b/_ping_astrometry new file mode 100755 index 0000000..7d8ef92 --- /dev/null +++ b/_ping_astrometry @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# +# Resolve service URL from OpenTofu outputs and test POST /astrometry. +# Use --local to ping the locally running docker-compose app. + +set -euo pipefail +IFS=$'\n\t' + +local_mode=false + +print_help() { + cat <<'EOF' +Usage: ./_ping_astrometry [--local] + +Options: + --local Ping http://127.0.0.1:8000 instead of the deployed service. + -h, --help Show this help. +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --local) + local_mode=true + shift + ;; + -h|--help) + print_help + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + print_help >&2 + exit 1 + ;; + esac +done + +if ! command -v curl >/dev/null 2>&1; then + echo "curl command not found. Install curl and try again." >&2 + exit 1 +fi + +if [[ "${local_mode}" == true ]]; then + service_base_url="${LOCAL_SERVICE_BASE_URL:-http://127.0.0.1:8000}" +else + if [[ ! -f ".env" ]]; then + echo "Missing .env file. Copy .env-template and configure required values." >&2 + exit 1 + fi + + if ! command -v tofu >/dev/null 2>&1; then + echo "OpenTofu (tofu) is not installed or not in PATH." >&2 + exit 1 + fi + + # shellcheck disable=SC1091 + source .env + + service_base_url="$(tofu output -raw service_base_url)" + if [[ -z "${service_base_url}" ]]; then + echo "Failed to resolve service_base_url from tofu output." >&2 + exit 1 + fi +fi + +endpoint="${service_base_url%/}/astrometry" +echo "Pinging: ${endpoint}" + +# /wcs POST + +payload='{ + "image_url": "https://uxzqjwo0ye.execute-api.us-west-1.amazonaws.com/api/images/urn:nasa:pds:gbo.ast.loneos.survey:data_augmented:060104_1a_099_fits?ra=51.11064&dec=17.38378&size=12.60arcmin&format=fits", + "ra": 51.11064, + "dec": 17.38378, + "use_ra_dec": true, + "pixel_scale": 2.5, + "snr_threshold": 3.0, + "aperture_radius": 7.0, + "catalog": "PanSTARRS1", + "obs_band": "g", + "cal_band": "r", + "return_plot": false + }' + +printf 'Executing:\n' +printf 'curl -i -X POST %q -H %q --max-time 900 --data %q\n' \ + "${endpoint}" \ + "Content-Type: application/json" \ + "${payload}" + +curl -i -X POST "${endpoint}" \ + -H "Content-Type: application/json" \ + --max-time 900 \ + --data "${payload}" diff --git a/_ping_endpooint b/_ping_endpooint deleted file mode 100755 index 7257aca..0000000 --- a/_ping_endpooint +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# -# Resolve service URL from OpenTofu outputs and ping /hello. - -set -euo pipefail -IFS=$'\n\t' - -if [[ ! -f ".env" ]]; then - echo "Missing .env file. Copy .env-template and configure required values." >&2 - exit 1 -fi - -if ! command -v tofu >/dev/null 2>&1; then - echo "OpenTofu (tofu) is not installed or not in PATH." >&2 - exit 1 -fi - -if ! command -v curl >/dev/null 2>&1; then - echo "curl command not found. Install curl and try again." >&2 - exit 1 -fi - -# shellcheck disable=SC1091 -source .env - -service_base_url="$(tofu output -raw service_base_url)" -if [[ -z "${service_base_url}" ]]; then - echo "Failed to resolve service_base_url from tofu output." >&2 - exit 1 -fi - -endpoint="${service_base_url%/}/hello" -echo "Pinging: ${endpoint}" -curl -i "${endpoint}" diff --git a/_ping_health b/_ping_health new file mode 100755 index 0000000..c919db5 --- /dev/null +++ b/_ping_health @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# +# Resolve service URL from OpenTofu outputs and ping /health. +# Use --local to ping the locally running docker-compose app. + +set -euo pipefail +IFS=$'\n\t' + +local_mode=false + +print_help() { + cat <<'EOF' +Usage: ./_ping_health [--local] + +Options: + --local Ping http://127.0.0.1:8000 instead of the deployed service. + -h, --help Show this help. +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --local) + local_mode=true + shift + ;; + -h|--help) + print_help + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + print_help >&2 + exit 1 + ;; + esac +done + +if ! command -v curl >/dev/null 2>&1; then + echo "curl command not found. Install curl and try again." >&2 + exit 1 +fi + +if [[ "${local_mode}" == true ]]; then + service_base_url="${LOCAL_SERVICE_BASE_URL:-http://127.0.0.1:8000}" +else + if [[ ! -f ".env" ]]; then + echo "Missing .env file. Copy .env-template and configure required values." >&2 + exit 1 + fi + + if ! command -v tofu >/dev/null 2>&1; then + echo "OpenTofu (tofu) is not installed or not in PATH." >&2 + exit 1 + fi + + # shellcheck disable=SC1091 + source .env + + service_base_url="$(tofu output -raw service_base_url)" + if [[ -z "${service_base_url}" ]]; then + echo "Failed to resolve service_base_url from tofu output." >&2 + exit 1 + fi +fi + +endpoint="${service_base_url%/}/health" +echo "Pinging: ${endpoint}" + +printf 'Executing:\n' +printf 'curl -i %q\n' "${endpoint}" + +curl -i "${endpoint}" diff --git a/_ping_hello b/_ping_hello new file mode 100755 index 0000000..6d69094 --- /dev/null +++ b/_ping_hello @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Resolve service URL from OpenTofu outputs and ping /hello. +# Use --local to ping the locally running docker-compose app. + +set -euo pipefail +IFS=$'\n\t' + +local_mode=false + +print_help() { + cat <<'EOF' +Usage: ./_ping_hello [--local] + +Options: + --local Ping http://127.0.0.1:8000 instead of the deployed service. + -h, --help Show this help. +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --local) + local_mode=true + shift + ;; + -h|--help) + print_help + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + print_help >&2 + exit 1 + ;; + esac +done + +if ! command -v curl >/dev/null 2>&1; then + echo "curl command not found. Install curl and try again." >&2 + exit 1 +fi + +if [[ "${local_mode}" == true ]]; then + service_base_url="${LOCAL_SERVICE_BASE_URL:-http://127.0.0.1:8000}" +else + if [[ ! -f ".env" ]]; then + echo "Missing .env file. Copy .env-template and configure required values." >&2 + exit 1 + fi + + if ! command -v tofu >/dev/null 2>&1; then + echo "OpenTofu (tofu) is not installed or not in PATH." >&2 + exit 1 + fi + + # shellcheck disable=SC1091 + source .env + + service_base_url="$(tofu output -raw service_base_url)" + if [[ -z "${service_base_url}" ]]; then + echo "Failed to resolve service_base_url from tofu output." >&2 + exit 1 + fi +fi + +endpoint="${service_base_url%/}/hello" +echo "Pinging: ${endpoint}" +curl -i "${endpoint}" diff --git a/_ping_logs b/_ping_logs new file mode 100755 index 0000000..a0ba255 --- /dev/null +++ b/_ping_logs @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +# +# Resolve ECS/CloudWatch details from OpenTofu outputs and print a useful +# debugging snapshot for the deployed Fargate service. + +set -euo pipefail +IFS=$'\n\t' + +messages_only=false + +print_help() { + cat <<'EOF' +Usage: ./_ping_logs [--messages-only] + +Options: + --messages-only In the CloudWatch log section, print only log message text. + -h, --help Show this help. +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --messages-only) + messages_only=true + shift + ;; + -h|--help) + print_help + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + print_help >&2 + exit 1 + ;; + esac +done + +if [[ ! -f ".env" ]]; then + echo "Missing .env file. Copy .env-template and configure required values." >&2 + exit 1 +fi + +if ! command -v tofu >/dev/null 2>&1; then + echo "OpenTofu (tofu) is not installed or not in PATH." >&2 + exit 1 +fi + +if ! command -v aws >/dev/null 2>&1; then + echo "aws CLI not found. Install AWS CLI and try again." >&2 + exit 1 +fi + +# shellcheck disable=SC1091 +source .env + +unset AWS_PROFILE AWS_DEFAULT_PROFILE +export AWS_PAGER="" + +aws_region="${AWS_REGION:-${TF_VAR_AWS_REGION:-}}" +if [[ -z "${aws_region}" ]]; then + echo "Either AWS_REGION or TF_VAR_AWS_REGION must be set in .env" >&2 + exit 1 +fi + +service_base_url="$(tofu output -raw service_base_url)" +ecs_cluster_name="$(tofu output -raw ecs_cluster_name)" +ecs_service_name="$(tofu output -raw ecs_service_name)" + +if [[ -z "${service_base_url}" || -z "${ecs_cluster_name}" || -z "${ecs_service_name}" ]]; then + echo "Failed to resolve one or more OpenTofu outputs." >&2 + exit 1 +fi + +current_task_definition="$( + aws ecs describe-services \ + --region "${aws_region}" \ + --cluster "${ecs_cluster_name}" \ + --services "${ecs_service_name}" \ + --query 'services[0].taskDefinition' \ + --output text +)" + +current_image="$( + aws ecs describe-task-definition \ + --region "${aws_region}" \ + --task-definition "${current_task_definition}" \ + --query 'taskDefinition.containerDefinitions[0].image' \ + --output text +)" + +log_group="$( + aws ecs describe-task-definition \ + --region "${aws_region}" \ + --task-definition "${current_task_definition}" \ + --query 'taskDefinition.containerDefinitions[0].logConfiguration.options."awslogs-group"' \ + --output text +)" + +section() { + printf '\n=== %s ===\n' "$1" +} + +print_command() { + printf 'Executing:\n%s\n\n' "$1" +} + +describe_tasks() { + local desired_status="$1" + local max_results="$2" + local -a task_arns=() + + while IFS= read -r task_arn; do + task_arns+=("${task_arn}") + done < <( + aws ecs list-tasks \ + --region "${aws_region}" \ + --cluster "${ecs_cluster_name}" \ + --desired-status "${desired_status}" \ + --max-results "${max_results}" \ + --query 'taskArns[]' \ + --output text | tr '\t' '\n' | sed '/^$/d' + ) + + if [[ "${#task_arns[@]}" -eq 0 ]]; then + echo "No ${desired_status} tasks found." + return 0 + fi + + aws ecs describe-tasks \ + --region "${aws_region}" \ + --cluster "${ecs_cluster_name}" \ + --tasks "${task_arns[@]}" \ + --query 'sort_by(tasks,&createdAt)[].{ + task: taskArn, + taskDefinition: taskDefinitionArn, + lastStatus: lastStatus, + desiredStatus: desiredStatus, + healthStatus: healthStatus, + createdAt: createdAt, + startedAt: startedAt, + stoppedAt: stoppedAt, + stoppedReason: stoppedReason, + stopCode: stopCode, + exitCode: containers[0].exitCode, + containerReason: containers[0].reason + }' \ + --output table +} + +section "Context" +echo "Service URL: ${service_base_url}" +echo "AWS region: ${aws_region}" +echo "ECS cluster: ${ecs_cluster_name}" +echo "ECS service: ${ecs_service_name}" +echo "Task definition: ${current_task_definition}" +echo "Image: ${current_image}" +echo "Log group: ${log_group}" + +section "Service Summary" +print_command "aws ecs describe-services --region ${aws_region} --cluster ${ecs_cluster_name} --services ${ecs_service_name}" +aws ecs describe-services \ + --region "${aws_region}" \ + --cluster "${ecs_cluster_name}" \ + --services "${ecs_service_name}" \ + --query 'services[0].{ + status: status, + desiredCount: desiredCount, + runningCount: runningCount, + pendingCount: pendingCount, + taskDefinition: taskDefinition, + healthCheckGracePeriodSeconds: healthCheckGracePeriodSeconds, + deployments: deployments[].{ + status: status, + taskDefinition: taskDefinition, + desired: desiredCount, + running: runningCount, + pending: pendingCount, + failed: failedTasks, + rolloutState: rolloutState, + rolloutStateReason: rolloutStateReason + } + }' \ + --output json + +section "Recent Service Events" +print_command "aws ecs describe-services --region ${aws_region} --cluster ${ecs_cluster_name} --services ${ecs_service_name} --query services[0].events[0:10]" +aws ecs describe-services \ + --region "${aws_region}" \ + --cluster "${ecs_cluster_name}" \ + --services "${ecs_service_name}" \ + --query 'services[0].events[0:10].[createdAt,message]' \ + --output table + +section "Running Tasks" +print_command "aws ecs list-tasks --region ${aws_region} --cluster ${ecs_cluster_name} --desired-status RUNNING" +describe_tasks "RUNNING" 10 + +section "Recent Stopped Tasks" +print_command "aws ecs list-tasks --region ${aws_region} --cluster ${ecs_cluster_name} --desired-status STOPPED" +describe_tasks "STOPPED" 10 + +section "Recent Container Logs" +if [[ "${messages_only}" == true ]]; then + print_command "aws logs tail ${log_group} --region ${aws_region} --since 15m | sed -E 's/^[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]//'" + aws logs tail "${log_group}" \ + --region "${aws_region}" \ + --since 15m \ + | sed -E 's/^[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]//' +else + print_command "aws logs tail ${log_group} --region ${aws_region} --since 15m" + aws logs tail "${log_group}" \ + --region "${aws_region}" \ + --since 15m +fi diff --git a/catch_analysis_tools/app/api/openapi.yaml b/catch_analysis_tools/app/api/openapi.yaml index 969451f..221423c 100644 --- a/catch_analysis_tools/app/api/openapi.yaml +++ b/catch_analysis_tools/app/api/openapi.yaml @@ -10,7 +10,7 @@ info: paths: /hello: get: - operationId: catch_analysis_tools.app.services.health.hello + operationId: catch_analysis_tools.app.handlers.hello.hello summary: Basic service liveness endpoint. responses: "200": @@ -19,13 +19,38 @@ paths: text/plain: schema: type: string - example: Hello, World! + example: | + Catch Analysis Tools is running. + package_version=1.0.0 + docker_image_tag=2026-02-11-001 + /health: + get: + operationId: catch_analysis_tools.app.handlers.health.health + summary: Service health and astrometry data readiness. + responses: + "200": + description: Service is running. + content: + application/json: + schema: + type: object + /reset: + post: + operationId: catch_analysis_tools.app.handlers.reset.reset + summary: Force an astrometry data readiness recheck. + responses: + "202": + description: Recheck started. + content: + application/json: + schema: + type: object /astrometry: post: tags: - Astrometry summary: Perform astrometry and optionally return a plot image. - operationId: catch_analysis_tools.app.services.astrometry.do_astrometry + operationId: catch_analysis_tools.app.handlers.astrometry.do_astrometry requestBody: required: true @@ -198,5 +223,19 @@ paths: "400": description: Bad request (invalid input or FITS error) + "422": + description: Astrometry could not solve the submitted image. + content: + application/json: + schema: + type: object + + "503": + description: Astrometry index files are not ready yet. + content: + application/json: + schema: + type: object + "500": description: Internal server error diff --git a/catch_analysis_tools/app/app.py b/catch_analysis_tools/app/app.py index 926147f..8233fa7 100644 --- a/catch_analysis_tools/app/app.py +++ b/catch_analysis_tools/app/app.py @@ -18,14 +18,14 @@ #logger: logging.Logger = get_logger() app = connexion.FlaskApp(__name__, specification_dir="api/") -#app.add_middleware( -# CORSMiddleware, -# position=MiddlewarePosition.BEFORE_EXCEPTION, -# allow_origins=["*"], -# allow_credentials=True, -# allow_methods=["*"], -# allow_headers=["*"], -#) +app.add_middleware( + CORSMiddleware, + position=MiddlewarePosition.BEFORE_EXCEPTION, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) @@ -71,4 +71,8 @@ # for development #logger.info("Running " + ENV.APP_NAME) #logger.info(application.url_map) + from catch_analysis_tools.app.astrometry_readiness.start_astrometry_background_check import ( + start_astrometry_background_check, + ) + start_astrometry_background_check() app.run(host="0.0.0.0", port=8000)#"catch_analysis_tools.app:app")#, host=ENV.API_HOST, port=ENV.API_PORT) diff --git a/catch_analysis_tools/app/astrometry_readiness/_README.md b/catch_analysis_tools/app/astrometry_readiness/_README.md new file mode 100644 index 0000000..5618574 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/_README.md @@ -0,0 +1,228 @@ +# Astrometry Readiness + +This package is runtime plumbing for `/astrometry`. It exists so the app does +not call Astrometry.net's `solve-field` until the required index FITS files are +present on disk. + +This is not scientific astrometry code. The scientific route only taps into this plumbing with two simple questions answered by the following hooks: + +```python +is_astrometry_ready() +get_astrometry_readiness_status() +``` + +Everything else in this package is about making those answers reliable while +the app is starting, while files are downloading, and while more than one app +instance may be running. + +## Storage + +Inside the container, the index files always live here: + +```text +/root/.astrometry/data +``` + +That path is intentionally the same in every environment. + +In AWS Fargate, Terraform configures ECS to mount EFS at that container path. +All Fargate tasks for this service see the same EFS-backed directory. + +In local development, `docker compose up` uses this bind mount: + +```yaml +${HOME}/.astrometry/data:/root/.astrometry/data +``` + +So local downloads are written to the host machine at: + +```text +${HOME}/.astrometry/data +``` + +Those files persist across local container rebuilds and restarts. + +## Startup Flow + +The Docker entrypoint sets defaults for the index directory and Astrometry.net +config: + +```bash +ASTROMETRY_INDEX_DIR=/root/.astrometry/data +ASTROMETRY_CONFIG=/app/docker/astrometry.cfg +``` + +Then it starts the app with: + +```bash +python3 -m catch_analysis_tools.app.app +``` + +When `app.py` runs as `__main__`, it calls: + +```python +start_astrometry_background_check() +``` + +That starts one daemon thread in the current Python process. This thread runs +the index-file check/download work in the background, so the HTTP app can start +immediately instead of blocking until hundreds of index files are checked or +downloaded. + +While the background thread is still working: + +- `/hello` can respond normally. +- `/health` reports the current readiness status. +- `/astrometry` returns `503 not_ready` instead of trying `solve-field`. + +After the background thread confirms all expected index files exist, +`/astrometry` is allowed to run the scientific pipeline. + +## What State Lives In RAM + +Each app process has its own in-memory status object in `state.py`. + +That status includes fields such as: + +```text +state +ready +message +files_present +expected_files +error +updated_at +``` + +This RAM state is only local to one Python process. It is not shared across +Fargate tasks, Docker containers, or worker processes. + +That means one process cannot directly update another process's RAM. If two +Fargate tasks are running, each task has its own background thread and its own +readiness status. + +Cross-process coordination happens through the shared filesystem, not through +RAM. + +## The In-Memory Thread Lock + +`state.py` also owns a `threading.RLock`. + +That lock only protects the current Python process. It prevents the background +thread and HTTP routes in the same process from reading/writing the in-memory +status at the same time. + +It does not coordinate between Fargate tasks. + +It does not protect EFS. + +It does not make RAM global. + +## The Global File Lock + +The cross-process guard is the index-download file lock: + +```text +/root/.astrometry/.index-download.lock +``` + +This file sits next to the shared index directory. Because Fargate mounts EFS +at `/root/.astrometry/data`, every Fargate task sees the same parent directory +and therefore the same lock file. + +The same model is used locally because Docker Compose bind-mounts the host's +`${HOME}/.astrometry/data` to `/root/.astrometry/data`. + +When an app instance finds that index files are missing, it tries to acquire an +exclusive `flock` on the lock file. Only one process can hold that lock at a +time. + +The sequence is: + +1. Process A checks the index directory and sees missing files. +2. Process A acquires the file lock. +3. Process B also sees missing files and tries to acquire the same lock. +4. Process B blocks while Process A holds the lock. +5. Process A downloads the missing files. +6. Process A releases the lock. +7. Process B acquires the lock, rechecks the directory, sees the files are now + present, and skips the duplicate download. + +This is the part that makes simultaneous Fargate startups safe. + +## Readiness Check Details + +The readiness preparation function does this: + +1. Fetches the upstream directory listing from `INDEX_URL`. +2. Extracts the expected `index-*.fits` filenames. +3. Counts which expected files already exist locally and are non-empty. +4. If all files are present, marks this process ready. +5. If files are missing, acquires the global file lock. +6. Rechecks the files after acquiring the lock. +7. Downloads only missing files. +8. Recounts the files. +9. Marks this process ready if all expected files are now present. + +The current completeness check is intentionally simple: an expected file counts +as present if it exists and has size greater than zero. It does not currently +deep-validate every FITS file. + +## Sentinel File + +After a successful check/download, the code writes: + +```text +/root/.astrometry/data/.cat_astrometry_ready.json +``` + +This records that a successful readiness pass happened, along with counts and +timestamps. The core readiness decision still comes from checking the expected +index files and the current process's in-memory status. + +## Route Behavior + +`/health` returns the current process's readiness status. It does not force a +new expensive check by itself. + +`/reset` starts a forced readiness check in the background for the current app +process. + +`/astrometry` first checks the current process's in-memory readiness flag. If +that flag is false, it returns `503 not_ready` with the current readiness +status. It does not run the expensive file check on every request. + +This keeps normal `/astrometry` requests cheap once the process has marked +itself ready. + +## Failure Behavior + +If the background preparation fails, the current process records: + +```text +state=error +ready=false +error= +``` + +Then `/health` exposes that error and `/astrometry` continues returning +`503 not_ready`. + +The app process itself does not crash just because the background readiness +thread failed. + +## Mental Model + +There are two levels of coordination: + +```text +Within one Python process: + threading.RLock protects the in-memory status dictionary. + +Across Fargate tasks or containers: + flock on /root/.astrometry/.index-download.lock protects the shared + index directory. +``` + +The readiness state in RAM is deliberately cheap and local. The file lock and +shared mounted directory are what prevent duplicate or overlapping downloads. diff --git a/catch_analysis_tools/app/astrometry_readiness/__init__.py b/catch_analysis_tools/app/astrometry_readiness/__init__.py new file mode 100644 index 0000000..409a56b --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/__init__.py @@ -0,0 +1,18 @@ +""" +Runtime readiness helpers for Astrometry.net index files. + +This package is application/deployment plumbing, not scientific astrometry +logic. The /astrometry route needs Astrometry.net index FITS files on disk +before it can safely call solve-field. In AWS Fargate that directory is backed +by EFS and mounted at /root/.astrometry/data inside the container. + +The docker-compose.yml file is for local development/debugging. When running +locally with `docker compose up`, Docker bind-mounts `${HOME}/.astrometry/data` +from the host machine to /root/.astrometry/data inside the container. Missing +index files are downloaded into that host directory, so they persist across +container rebuilds and restarts. + +The helpers in this package discover the expected remote index files, download +missing files, expose cheap in-memory readiness status, and use a file lock so +multiple app instances do not download the same files at the same time. +""" diff --git a/catch_analysis_tools/app/astrometry_readiness/acquire_index_download_lock.py b/catch_analysis_tools/app/astrometry_readiness/acquire_index_download_lock.py new file mode 100644 index 0000000..768e35f --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/acquire_index_download_lock.py @@ -0,0 +1,12 @@ +import fcntl + +from catch_analysis_tools.app.astrometry_readiness.get_lock_path import get_lock_path + + +def acquire_index_download_lock(): + """Acquire the cross-process lock for mutating the shared index dir.""" + lock = get_lock_path() + lock.parent.mkdir(parents=True, exist_ok=True) + handle = open(lock, "w", encoding="utf-8") + fcntl.flock(handle.fileno(), fcntl.LOCK_EX) + return handle diff --git a/catch_analysis_tools/app/astrometry_readiness/background_prepare_astrometry_data.py b/catch_analysis_tools/app/astrometry_readiness/background_prepare_astrometry_data.py new file mode 100644 index 0000000..39249af --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/background_prepare_astrometry_data.py @@ -0,0 +1,11 @@ +from catch_analysis_tools.app.astrometry_readiness.prepare_astrometry_data import ( + prepare_astrometry_data, +) + + +def background_prepare_astrometry_data(force=False): + """Run astrometry readiness preparation without crashing the app process.""" + try: + prepare_astrometry_data(force=force) + except Exception: + pass diff --git a/catch_analysis_tools/app/astrometry_readiness/constants.py b/catch_analysis_tools/app/astrometry_readiness/constants.py new file mode 100644 index 0000000..d7c6e72 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/constants.py @@ -0,0 +1,2 @@ +INDEX_URL = "https://portal.nersc.gov/project/cosmo/temp/dstn/index-5200/LITE/" +DEFAULT_INDEX_DIR = "/root/.astrometry/data" diff --git a/catch_analysis_tools/app/astrometry_readiness/count_complete_index_files.py b/catch_analysis_tools/app/astrometry_readiness/count_complete_index_files.py new file mode 100644 index 0000000..9906a5d --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/count_complete_index_files.py @@ -0,0 +1,16 @@ +import glob +import os + +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir + + +def count_complete_index_files(expected_files=None): + """Count index files that exist locally and are non-empty.""" + if expected_files is None: + files = glob.glob(str(get_index_dir() / "index-*.fits")) + else: + files = [str(get_index_dir() / filename) for filename in expected_files] + complete_files = [ + path for path in files if os.path.exists(path) and os.path.getsize(path) > 0 + ] + return len(complete_files) diff --git a/catch_analysis_tools/app/astrometry_readiness/download_index_files.py b/catch_analysis_tools/app/astrometry_readiness/download_index_files.py new file mode 100644 index 0000000..49a1b12 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/download_index_files.py @@ -0,0 +1,41 @@ +import os +from urllib.parse import urljoin + +from catch_analysis_tools.app.astrometry_readiness.constants import INDEX_URL +from catch_analysis_tools.app.astrometry_readiness.count_complete_index_files import ( + count_complete_index_files, +) +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir +from catch_analysis_tools.app.astrometry_readiness.set_astrometry_readiness_status import ( + set_astrometry_readiness_status, +) + + +def download_index_files(session, expected_files): + """Download any missing Astrometry.net index files into the index dir.""" + target_dir = get_index_dir() + target_dir.mkdir(parents=True, exist_ok=True) + + for href in expected_files: + final_path = target_dir / href + if final_path.exists() and final_path.stat().st_size > 0: + continue + + tmp_path = final_path.with_name(f".{final_path.name}.tmp") + file_url = urljoin(INDEX_URL, href) + set_astrometry_readiness_status( + state="downloading", + ready=False, + message=f"Downloading {href}", + files_present=count_complete_index_files(expected_files), + error=None, + ) + + with session.get(file_url, stream=True, timeout=300) as download: + download.raise_for_status() + with open(tmp_path, "wb") as handle: + for chunk in download.iter_content(chunk_size=1024 * 1024): + if chunk: + handle.write(chunk) + + os.replace(tmp_path, final_path) diff --git a/catch_analysis_tools/app/astrometry_readiness/get_astrometry_readiness_status.py b/catch_analysis_tools/app/astrometry_readiness/get_astrometry_readiness_status.py new file mode 100644 index 0000000..a63861e --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/get_astrometry_readiness_status.py @@ -0,0 +1,10 @@ +from catch_analysis_tools.app.astrometry_readiness import state +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir + + +def get_astrometry_readiness_status(): + """Return a snapshot of the current astrometry index readiness state.""" + with state.state_lock: + status = dict(state.status) + status["index_dir"] = str(get_index_dir()) + return status diff --git a/catch_analysis_tools/app/astrometry_readiness/get_current_time.py b/catch_analysis_tools/app/astrometry_readiness/get_current_time.py new file mode 100644 index 0000000..9000282 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/get_current_time.py @@ -0,0 +1,6 @@ +from datetime import datetime, timezone + + +def get_current_time(): + """Return the current UTC time as an ISO-8601 string.""" + return datetime.now(timezone.utc).isoformat() diff --git a/catch_analysis_tools/app/astrometry_readiness/get_index_dir.py b/catch_analysis_tools/app/astrometry_readiness/get_index_dir.py new file mode 100644 index 0000000..755a5f7 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/get_index_dir.py @@ -0,0 +1,9 @@ +import os +from pathlib import Path + +from catch_analysis_tools.app.astrometry_readiness.constants import DEFAULT_INDEX_DIR + + +def get_index_dir(): + """Return the directory where Astrometry.net index files should live.""" + return Path(os.environ.get("ASTROMETRY_INDEX_DIR", DEFAULT_INDEX_DIR)) diff --git a/catch_analysis_tools/app/astrometry_readiness/get_lock_path.py b/catch_analysis_tools/app/astrometry_readiness/get_lock_path.py new file mode 100644 index 0000000..1cb7093 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/get_lock_path.py @@ -0,0 +1,6 @@ +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir + + +def get_lock_path(): + """Return the cross-process lock path used during index downloads.""" + return get_index_dir().parent / ".index-download.lock" diff --git a/catch_analysis_tools/app/astrometry_readiness/get_remote_index_files.py b/catch_analysis_tools/app/astrometry_readiness/get_remote_index_files.py new file mode 100644 index 0000000..cc89ec4 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/get_remote_index_files.py @@ -0,0 +1,18 @@ +from bs4 import BeautifulSoup + +from catch_analysis_tools.app.astrometry_readiness.constants import INDEX_URL + + +def get_remote_index_files(session): + """Return the Astrometry.net index FITS filenames advertised upstream.""" + response = session.get(INDEX_URL, timeout=60) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + files = sorted({ + link["href"] + for link in soup.find_all("a", href=True) + if link["href"].startswith("index-") and link["href"].endswith(".fits") + }) + if not files: + raise RuntimeError(f"No astrometry index files found at {INDEX_URL}") + return files diff --git a/catch_analysis_tools/app/astrometry_readiness/get_sentinel_path.py b/catch_analysis_tools/app/astrometry_readiness/get_sentinel_path.py new file mode 100644 index 0000000..b2eefc4 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/get_sentinel_path.py @@ -0,0 +1,6 @@ +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir + + +def get_sentinel_path(): + """Return the marker file path written after index files are ready.""" + return get_index_dir() / ".cat_astrometry_ready.json" diff --git a/catch_analysis_tools/app/astrometry_readiness/index_files_complete.py b/catch_analysis_tools/app/astrometry_readiness/index_files_complete.py new file mode 100644 index 0000000..4537757 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/index_files_complete.py @@ -0,0 +1,9 @@ +from catch_analysis_tools.app.astrometry_readiness.count_complete_index_files import ( + count_complete_index_files, +) + + +def index_files_complete(expected_files): + """Return whether all expected index files are present, plus the count.""" + files_present = count_complete_index_files(expected_files) + return files_present == len(expected_files), files_present diff --git a/catch_analysis_tools/app/astrometry_readiness/is_astrometry_ready.py b/catch_analysis_tools/app/astrometry_readiness/is_astrometry_ready.py new file mode 100644 index 0000000..6f480cc --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/is_astrometry_ready.py @@ -0,0 +1,7 @@ +from catch_analysis_tools.app.astrometry_readiness import state + + +def is_astrometry_ready(): + """Return whether the process currently considers astrometry ready.""" + with state.state_lock: + return bool(state.status["ready"]) diff --git a/catch_analysis_tools/app/astrometry_readiness/prepare_astrometry_data.py b/catch_analysis_tools/app/astrometry_readiness/prepare_astrometry_data.py new file mode 100644 index 0000000..073c247 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/prepare_astrometry_data.py @@ -0,0 +1,114 @@ +import fcntl + +import requests + +from catch_analysis_tools.app.astrometry_readiness.acquire_index_download_lock import ( + acquire_index_download_lock, +) +from catch_analysis_tools.app.astrometry_readiness.constants import INDEX_URL +from catch_analysis_tools.app.astrometry_readiness.count_complete_index_files import ( + count_complete_index_files, +) +from catch_analysis_tools.app.astrometry_readiness.download_index_files import ( + download_index_files, +) +from catch_analysis_tools.app.astrometry_readiness.get_astrometry_readiness_status import ( + get_astrometry_readiness_status, +) +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir +from catch_analysis_tools.app.astrometry_readiness.get_remote_index_files import ( + get_remote_index_files, +) +from catch_analysis_tools.app.astrometry_readiness.index_files_complete import ( + index_files_complete, +) +from catch_analysis_tools.app.astrometry_readiness.set_astrometry_readiness_status import ( + set_astrometry_readiness_status, +) +from catch_analysis_tools.app.astrometry_readiness.write_ready_sentinel import ( + write_ready_sentinel, +) + + +def prepare_astrometry_data(force=False): + """Check/download index files and mark astrometry ready when complete.""" + target_dir = get_index_dir() + target_dir.mkdir(parents=True, exist_ok=True) + + with requests.Session() as session: + expected_files = get_remote_index_files(session) + expected_count = len(expected_files) + + set_astrometry_readiness_status( + state="checking", + ready=False, + message="Checking astrometry index files.", + index_dir=str(target_dir), + index_url=INDEX_URL, + expected_files=expected_count, + error=None, + ) + + complete, files_present = index_files_complete(expected_files) + if complete and not force: + write_ready_sentinel(files_present, expected_files) + set_astrometry_readiness_status( + state="ready", + ready=True, + message="Astrometry index files are ready.", + files_present=files_present, + error=None, + ) + return get_astrometry_readiness_status() + + lock_handle = acquire_index_download_lock() + try: + complete, files_present = index_files_complete(expected_files) + if complete and not force: + write_ready_sentinel(files_present, expected_files) + set_astrometry_readiness_status( + state="ready", + ready=True, + message="Astrometry index files are ready.", + files_present=files_present, + error=None, + ) + return get_astrometry_readiness_status() + + set_astrometry_readiness_status( + state="downloading", + ready=False, + message="Astrometry index files are incomplete. Downloading missing files.", + files_present=files_present, + error=None, + ) + download_index_files(session, expected_files) + + complete, files_present = index_files_complete(expected_files) + if not complete: + raise RuntimeError( + "Astrometry index files incomplete: " + f"{files_present} / {expected_count}" + ) + + write_ready_sentinel(files_present, expected_files) + set_astrometry_readiness_status( + state="ready", + ready=True, + message="Astrometry index files are ready.", + files_present=files_present, + error=None, + ) + return get_astrometry_readiness_status() + except Exception as exc: + set_astrometry_readiness_status( + state="error", + ready=False, + message="Astrometry index preparation failed.", + files_present=count_complete_index_files(expected_files), + error=str(exc), + ) + raise + finally: + fcntl.flock(lock_handle.fileno(), fcntl.LOCK_UN) + lock_handle.close() diff --git a/catch_analysis_tools/app/astrometry_readiness/set_astrometry_readiness_status.py b/catch_analysis_tools/app/astrometry_readiness/set_astrometry_readiness_status.py new file mode 100644 index 0000000..22a02aa --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/set_astrometry_readiness_status.py @@ -0,0 +1,12 @@ +from catch_analysis_tools.app.astrometry_readiness import state +from catch_analysis_tools.app.astrometry_readiness.get_current_time import ( + get_current_time, +) + + +def set_astrometry_readiness_status(**updates): + """Update the in-memory astrometry readiness status atomically.""" + with state.state_lock: + state.status.update(updates) + state.status["updated_at"] = get_current_time() + return dict(state.status) diff --git a/catch_analysis_tools/app/astrometry_readiness/start_astrometry_background_check.py b/catch_analysis_tools/app/astrometry_readiness/start_astrometry_background_check.py new file mode 100644 index 0000000..c6d52be --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/start_astrometry_background_check.py @@ -0,0 +1,36 @@ +import threading + +from catch_analysis_tools.app.astrometry_readiness import state +from catch_analysis_tools.app.astrometry_readiness.background_prepare_astrometry_data import ( + background_prepare_astrometry_data, +) +from catch_analysis_tools.app.astrometry_readiness.constants import INDEX_URL +from catch_analysis_tools.app.astrometry_readiness.get_current_time import ( + get_current_time, +) +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir + + +def start_astrometry_background_check(force=False): + """Start one background thread to check/download astrometry index files.""" + with state.state_lock: + if state.worker is not None and state.worker.is_alive(): + return dict(state.status) + + state.status.update({ + "state": "checking", + "ready": False, + "message": "Astrometry data readiness check has started.", + "index_dir": str(get_index_dir()), + "index_url": INDEX_URL, + "expected_files": None, + "error": None, + "updated_at": get_current_time(), + }) + state.worker = threading.Thread( + target=background_prepare_astrometry_data, + kwargs={"force": force}, + daemon=True, + ) + state.worker.start() + return dict(state.status) diff --git a/catch_analysis_tools/app/astrometry_readiness/state.py b/catch_analysis_tools/app/astrometry_readiness/state.py new file mode 100644 index 0000000..ff5026e --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/state.py @@ -0,0 +1,21 @@ +import threading + +from catch_analysis_tools.app.astrometry_readiness.constants import ( + DEFAULT_INDEX_DIR, + INDEX_URL, +) + + +state_lock = threading.RLock() +worker = None +status = { + "state": "unknown", + "ready": False, + "message": "Astrometry data has not been checked yet.", + "files_present": 0, + "expected_files": None, + "index_dir": DEFAULT_INDEX_DIR, + "index_url": INDEX_URL, + "updated_at": None, + "error": None, +} diff --git a/catch_analysis_tools/app/astrometry_readiness/write_ready_sentinel.py b/catch_analysis_tools/app/astrometry_readiness/write_ready_sentinel.py new file mode 100644 index 0000000..dd819f7 --- /dev/null +++ b/catch_analysis_tools/app/astrometry_readiness/write_ready_sentinel.py @@ -0,0 +1,28 @@ +import json +import os + +from catch_analysis_tools.app.astrometry_readiness.constants import INDEX_URL +from catch_analysis_tools.app.astrometry_readiness.get_current_time import ( + get_current_time, +) +from catch_analysis_tools.app.astrometry_readiness.get_index_dir import get_index_dir +from catch_analysis_tools.app.astrometry_readiness.get_sentinel_path import ( + get_sentinel_path, +) + + +def write_ready_sentinel(files_present, expected_files): + """Write a small marker file recording that local index files are ready.""" + payload = { + "state": "ready", + "ready": True, + "files_present": files_present, + "expected_files": len(expected_files), + "index_url": INDEX_URL, + "index_dir": str(get_index_dir()), + "completed_at": get_current_time(), + } + sentinel = get_sentinel_path() + tmp = sentinel.with_suffix(".tmp") + tmp.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8") + os.replace(tmp, sentinel) diff --git a/catch_analysis_tools/app/handlers/__init__.py b/catch_analysis_tools/app/handlers/__init__.py new file mode 100644 index 0000000..31b11a7 --- /dev/null +++ b/catch_analysis_tools/app/handlers/__init__.py @@ -0,0 +1 @@ +"""HTTP route handlers for the Connexion app.""" diff --git a/catch_analysis_tools/app/handlers/astrometry.py b/catch_analysis_tools/app/handlers/astrometry.py new file mode 100644 index 0000000..41924de --- /dev/null +++ b/catch_analysis_tools/app/handlers/astrometry.py @@ -0,0 +1,115 @@ +import base64 +import json +import logging +import os +from tempfile import NamedTemporaryFile +from uuid import uuid4 + +import requests +from flask import Response +from werkzeug.exceptions import BadRequest + +from catch_analysis_tools.app.astrometry_readiness.get_astrometry_readiness_status import ( + get_astrometry_readiness_status, +) +from catch_analysis_tools.app.astrometry_readiness.is_astrometry_ready import ( + is_astrometry_ready, +) +from catch_analysis_tools.app.services.astrometry import ( + AstrometrySolveError, + AstrometryValidationError, + run_pipeline, + validate_and_normalize, +) + + +logger = logging.getLogger(__name__) + + +def do_astrometry(body): + """Handle POST /astrometry and translate service results to HTTP responses.""" + if not is_astrometry_ready(): + payload = { + "status": "not_ready", + "message": "Astrometry index files are not ready yet.", + "astrometry_data": get_astrometry_readiness_status(), + } + return Response( + json.dumps(payload), + status=503, + mimetype="application/json", + headers={"Retry-After": "30"}, + ) + + request_id = uuid4().hex[:12] + stage = "validate_request" + # Capture raw request context for error logs before validation normalizes it. + image_url = body.get("image_url") + return_plot = body.get("return_plot") + plot_type = body.get("plot_type") + + try: + cfg = validate_and_normalize(body) + + image_url = cfg["image_url"] + return_plot = cfg["meta"]["return_plot"] + plot_type = cfg["meta"]["plot_type"] + + stage = "fetch_fits" + try: + response = requests.get(image_url, timeout=60) + response.raise_for_status() + except requests.RequestException: + raise BadRequest("Could not retrieve FITS image") + + stage = "write_temp_fits" + with NamedTemporaryFile(suffix=".fits", delete=False) as tmp: + tmp.write(response.content) + tmp_path = tmp.name + + try: + stage = "run_pipeline" + results = run_pipeline(tmp_path, cfg) + + stage = "build_response" + if return_plot: + if plot_type not in results.get("plots", {}): + raise BadRequest(f"Unknown plot_type: {plot_type}") + + image_bytes = base64.b64decode(results["plots"][plot_type]) + return Response(image_bytes, mimetype="image/png") + + return results, 200, {"Content-Type": "application/json"} + finally: + os.remove(tmp_path) + except AstrometryValidationError as exc: + raise BadRequest(str(exc)) + except BadRequest: + raise + except AstrometrySolveError as exc: + logger.warning( + "Astrometry solve did not produce WCS " + "[request_id=%s stage=%s image_url=%r return_plot=%r plot_type=%r]", + request_id, + stage, + image_url, + return_plot, + plot_type, + ) + payload = { + "status": "solve_failed", + "message": str(exc), + "request_id": request_id, + } + return Response(json.dumps(payload), status=422, mimetype="application/json") + except Exception: + logger.exception( + "Astrometry request failed " + "[request_id=%s stage=%s image_url=%r return_plot=%r plot_type=%r]", + request_id, + stage, + image_url, + return_plot, + plot_type, + ) + raise diff --git a/catch_analysis_tools/app/handlers/health.py b/catch_analysis_tools/app/handlers/health.py new file mode 100644 index 0000000..654e599 --- /dev/null +++ b/catch_analysis_tools/app/handlers/health.py @@ -0,0 +1,11 @@ +from catch_analysis_tools.app.astrometry_readiness.get_astrometry_readiness_status import ( + get_astrometry_readiness_status, +) + + +def health(): + """Return service health plus astrometry index readiness details.""" + return { + "status": "ok", + "astrometry_data": get_astrometry_readiness_status(), + } diff --git a/catch_analysis_tools/app/handlers/hello.py b/catch_analysis_tools/app/handlers/hello.py new file mode 100644 index 0000000..9de4f24 --- /dev/null +++ b/catch_analysis_tools/app/handlers/hello.py @@ -0,0 +1,13 @@ +import os + +from catch_analysis_tools import __version__ + + +def hello(): + """Return a tiny liveness response with build identity.""" + docker_image_tag = os.environ.get("DOCKER_IMAGE_TAG", "unknown") + return ( + "Catch Analysis Tools is running.\n" + f"package_version={__version__}\n" + f"docker_image_tag={docker_image_tag}\n" + ) diff --git a/catch_analysis_tools/app/handlers/reset.py b/catch_analysis_tools/app/handlers/reset.py new file mode 100644 index 0000000..0ee0d75 --- /dev/null +++ b/catch_analysis_tools/app/handlers/reset.py @@ -0,0 +1,12 @@ +from catch_analysis_tools.app.astrometry_readiness.start_astrometry_background_check import ( + start_astrometry_background_check, +) + + +def reset(): + """Force an astrometry index readiness recheck in the background.""" + status = start_astrometry_background_check(force=True) + return { + "status": "reset_started", + "astrometry_data": status, + }, 202 diff --git a/catch_analysis_tools/app/services/astrometry.py b/catch_analysis_tools/app/services/astrometry.py index 7e67d5c..5d826d4 100644 --- a/catch_analysis_tools/app/services/astrometry.py +++ b/catch_analysis_tools/app/services/astrometry.py @@ -1,5 +1,4 @@ import numpy as np -import requests import os import subprocess import pandas as pd @@ -15,12 +14,18 @@ from astropy.table import Table from astropy.coordinates import SkyCoord from typing import Dict, Any -from werkzeug.exceptions import BadRequest -from tempfile import NamedTemporaryFile -from flask import Response from copy import deepcopy import calviacat as cvc + +class AstrometrySolveError(RuntimeError): + pass + + +class AstrometryValidationError(ValueError): + pass + + DEFAULT_CONFIG = { "wcs": { "use_ra_dec": True, @@ -110,6 +115,11 @@ def run_solve_field(input_fits, output_wcs, wcs_cfg): ] subprocess.run(command, check=True) + if not os.path.exists(output_wcs): + raise AstrometrySolveError( + f"solve-field did not produce a WCS solution: {output_wcs}" + ) + return True def find_sources(image, det_cfg): @@ -529,12 +539,12 @@ def get_float(name, default=None, required=False): val = body.get(name, default) if val is None: if required: - raise BadRequest(f"{name} is required") + raise AstrometryValidationError(f"{name} is required") return None try: return float(val) except Exception: - raise BadRequest(f"{name} must be a number") + raise AstrometryValidationError(f"{name} must be a number") def get_bool(name, default=False): return bool(body.get(name, default)) @@ -542,7 +552,7 @@ def get_bool(name, default=False): # --- Required --- image_url = body.get("image_url") if not image_url: - raise BadRequest("image_url is required") + raise AstrometryValidationError("image_url is required") # --- WCS --- use_ra_dec = get_bool("use_ra_dec", False) @@ -550,7 +560,7 @@ def get_bool(name, default=False): dec = get_float("dec") if use_ra_dec and (ra is None or dec is None): - raise BadRequest("ra/dec required when use_ra_dec=True") + raise AstrometryValidationError("ra/dec required when use_ra_dec=True") pixel_scale = get_float("pixel_scale") scale_low = get_float("scale_low") @@ -558,9 +568,11 @@ def get_bool(name, default=False): if pixel_scale is None: if scale_low is None or scale_high is None: - raise BadRequest("Provide pixel_scale OR (scale_low & scale_high)") + raise AstrometryValidationError( + "Provide pixel_scale OR (scale_low & scale_high)" + ) if scale_low > scale_high: - raise BadRequest("scale_low must be <= scale_high") + raise AstrometryValidationError("scale_low must be <= scale_high") # --- Detection --- snr = get_float("snr_threshold", 3.0) @@ -606,44 +618,6 @@ def get_bool(name, default=False): } -def do_astrometry(body: Dict[str, Any]): - - # --- Validate + normalize --- - cfg = validate_and_normalize(body) - - image_url = cfg["image_url"] - return_plot = cfg["meta"]["return_plot"] - plot_type = cfg["meta"]["plot_type"] - - # --- Fetch FITS --- - try: - response = requests.get(image_url, timeout=60) - response.raise_for_status() - except requests.RequestException: - raise BadRequest("Could not retrieve FITS image") - - with NamedTemporaryFile(suffix=".fits", delete=False) as tmp: - tmp.write(response.content) - tmp_path = tmp.name - - try: - # --- Run pipeline --- - results = run_pipeline(tmp_path, cfg) - - # --- Return plot --- - if return_plot: - if plot_type not in results.get("plots", {}): - raise BadRequest(f"Unknown plot_type: {plot_type}") - - image_bytes = base64.b64decode(results["plots"][plot_type]) - return Response(image_bytes, mimetype="image/png") - - return results, 200, {"Content-Type": "application/json"} - - finally: - os.remove(tmp_path) - - # ## when testing the code locally if __name__ == "__main__": @@ -696,4 +670,4 @@ def do_astrometry(body: Dict[str, Any]): plt.imshow(img) plt.axis("off") plt.title(plot_type) - plt.show() \ No newline at end of file + plt.show() diff --git a/catch_analysis_tools/app/services/health.py b/catch_analysis_tools/app/services/health.py deleted file mode 100644 index dea05ae..0000000 --- a/catch_analysis_tools/app/services/health.py +++ /dev/null @@ -1,2 +0,0 @@ -def hello(): - return "Hello, World!" diff --git a/catch_analysis_tools/astrometry.py b/catch_analysis_tools/astrometry.py index 2b10fea..706821e 100644 --- a/catch_analysis_tools/astrometry.py +++ b/catch_analysis_tools/astrometry.py @@ -1,5 +1,4 @@ import os -import glob import subprocess import argparse import numpy as np @@ -13,6 +12,7 @@ from astropy.coordinates import SkyCoord import calviacat as cvc + def run_solve_field(input_fits, output_wcs, pixel_scale, Ra_deg, Dec_deg, scale_units="arcsecperpix"): """ Execute the `solve-field` command to compute a WCS solution. @@ -34,9 +34,10 @@ def run_solve_field(input_fits, output_wcs, pixel_scale, Ra_deg, Dec_deg, scale_ True if the solve-field command succeeded or file already exists. """ if os.path.exists(output_wcs): - print(f"Output file '{output_wcs}' already exists. Skipping solve-field execution.") + print( + f"Output file '{output_wcs}' already exists. Skipping solve-field execution.") return True - + config_file = os.environ.get("ASTROMETRY_CONFIG") if config_file is None: raise RuntimeError( @@ -103,7 +104,8 @@ def find_sources(image_sub, bkg_err, snr, aperture_radius=7.0): ) source_list['aperture_sum'] = flux source_list['aperture_err'] = flux_err - source_list = source_list[source_list['aperture_sum'] > 0].reset_index(drop=True) + source_list = source_list[source_list['aperture_sum'] > 0].reset_index( + drop=True) return source_list, image_sub @@ -154,11 +156,11 @@ def retrieve_sources(source_list, wcs_solution): def calibrate_photometry( - sky_coords, - source_list, - catalog: str = 'PanSTARRS1', - obs_band: str = 'obs_band', - cal_band: str = 'g'): + sky_coords, + source_list, + catalog: str = 'PanSTARRS1', + obs_band: str = 'obs_band', + cal_band: str = 'g'): """ Calibrate instrumental magnitudes against a Pan-STARRS1 catalog. @@ -192,12 +194,12 @@ def calibrate_photometry( - distances : array_like, matching distances """ color_index = f"{obs_band}-{cal_band}" - + try: CatalogClass = getattr(cvc, catalog) except AttributeError: raise ValueError(f"Catalog '{catalog}' not found in calviacat") - + ref = CatalogClass('cat.db') results = ref.search(sky_coords) if len(results[0]) < 500: @@ -205,23 +207,23 @@ def calibrate_photometry( objids, distances = ref.xmatch(sky_coords) m_inst = -2.5 * np.log10(source_list['aperture_sum'].values) zp, C, unc, m_cal, color_mags, _ = ref.cal_color( - objids, - m_inst, - cal_band, - color_index, - ) + objids, + m_inst, + cal_band, + color_index, + ) return { - 'zp' : zp, - 'C' : C, - 'unc' : unc, - 'm' : m_cal, - 'm_inst' : m_inst, - 'obs_band' : obs_band, - 'cal_band' : cal_band, - 'color_mags' : color_mags, - 'color_index' : color_index, - 'objids' : objids, - 'distances' : distances, + 'zp': zp, + 'C': C, + 'unc': unc, + 'm': m_cal, + 'm_inst': m_inst, + 'obs_band': obs_band, + 'cal_band': cal_band, + 'color_mags': color_mags, + 'color_index': color_index, + 'objids': objids, + 'distances': distances, } @@ -259,7 +261,8 @@ def plot_color_correction( fig, ax = plt.subplots() ax.scatter(color_mags, m - m_inst, marker='.') x = np.linspace(0, 1.5, 100) - ax.plot(x, C * x + zp, color='red', label=f'$m = C\\times({color_index}) + ZP$') + ax.plot(x, C * x + zp, color='red', + label=f'$m = C\\times({color_index}) + ZP$') ax.set_xlabel(f'${color_index}$ (mag)') ax.set_ylabel(r'$m - m_{\mathrm{inst}}$ (mag)') plt.tight_layout() @@ -288,12 +291,16 @@ def plot_image(telescope_image_sub, source_list, matched_idx, colored_idx): """ fig, ax = plt.subplots() m, s = np.mean(telescope_image_sub), np.std(telescope_image_sub) - im = ax.imshow(telescope_image_sub, interpolation='nearest', origin='lower', cmap='gray') + im = ax.imshow(telescope_image_sub, interpolation='nearest', + origin='lower', cmap='gray') im.set_clim(vmin=m-s, vmax=m+s) fig.colorbar(im, ax=ax) - ax.plot(source_list['x'], source_list['y'], '+', markersize=5, label='Detected', color='red',) - ax.plot(source_list['x'].iloc[matched_idx], source_list['y'].iloc[matched_idx], 'o', markersize=10, color='blue', markerfacecolor='none', label='Matched') - ax.plot(source_list['x'].iloc[colored_idx], source_list['y'].iloc[colored_idx], 'o', markersize=15, color='yellow', markerfacecolor='none', label='Selected for Color Corr') + ax.plot(source_list['x'], source_list['y'], '+', + markersize=5, label='Detected', color='red',) + ax.plot(source_list['x'].iloc[matched_idx], source_list['y'].iloc[matched_idx], + 'o', markersize=10, color='blue', markerfacecolor='none', label='Matched') + ax.plot(source_list['x'].iloc[colored_idx], source_list['y'].iloc[colored_idx], 'o', + markersize=15, color='yellow', markerfacecolor='none', label='Selected for Color Corr') ax.legend() return fig, ax @@ -327,18 +334,23 @@ def create_header(image, wcs_solution, zp, unc, source_list, matched_idx, colore None """ image_arr = np.asarray(image) - primary_hdu = fits.PrimaryHDU(data=image_arr, header=wcs_solution.to_header()) + primary_hdu = fits.PrimaryHDU( + data=image_arr, header=wcs_solution.to_header()) primary_hdu.header['ZP'] = zp primary_hdu.header['ZP_STD'] = unc - primary_hdu.header['SUV_FLT'] = cal_band + primary_hdu.header['SUV_FLT'] = cal_band primary_hdu.header['REF_CATA'] = catalog - primary_hdu.header['REF_FLT'] = obj_band - primary_hdu.header['CAT_COR'] = f"{cal_band}-{obj_band}" - source_list_clean = source_list.map(lambda x: x.filled(np.nan) if hasattr(x, 'filled') else x) - detected_hdu = fits.BinTableHDU(Table.from_pandas(source_list_clean), name='DETECTED_SOURCES') + primary_hdu.header['REF_FLT'] = obj_band + primary_hdu.header['CAT_COR'] = f"{cal_band}-{obj_band}" + source_list_clean = source_list.map( + lambda x: x.filled(np.nan) if hasattr(x, 'filled') else x) + detected_hdu = fits.BinTableHDU(Table.from_pandas( + source_list_clean), name='DETECTED_SOURCES') if not source_list_clean.empty: - matched_hdu = fits.BinTableHDU(Table.from_pandas(source_list_clean.iloc[matched_idx].reset_index(drop=True)), name='SELECTED_STARS') - colored_hdu = fits.BinTableHDU(Table.from_pandas(source_list_clean.iloc[colored_idx].reset_index(drop=True)), name='START_COLOR_CORRECTION') + matched_hdu = fits.BinTableHDU(Table.from_pandas( + source_list_clean.iloc[matched_idx].reset_index(drop=True)), name='SELECTED_STARS') + colored_hdu = fits.BinTableHDU(Table.from_pandas( + source_list_clean.iloc[colored_idx].reset_index(drop=True)), name='START_COLOR_CORRECTION') else: matched_hdu = fits.BinTableHDU(name='SELECTED_STARS') colored_hdu = fits.BinTableHDU(name='START_COLOR_CORRECTION') @@ -369,10 +381,14 @@ def cleanup_files(file_base): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Photometric calibration on a background-subtracted image.") - parser.add_argument("input_fits", help="Path to background-subtracted FITS image") - parser.add_argument("--Ra", type=float, required=True, help="RA from CATCH") - parser.add_argument("--Dec", type=float, required=True, help="DEC from CATCH") + parser = argparse.ArgumentParser( + description="Photometric calibration on a background-subtracted image.") + parser.add_argument( + "input_fits", help="Path to background-subtracted FITS image") + parser.add_argument("--Ra", type=float, required=True, + help="RA from CATCH") + parser.add_argument("--Dec", type=float, required=True, + help="DEC from CATCH") parser.add_argument("--bkg_err", type=float, required=True, help="Global background RMS (float, required)") parser.add_argument("--pixel_scale", type=float, default=1.86, @@ -381,23 +397,24 @@ def cleanup_files(file_base): help="Detection S/N threshold (default: 7.0)") parser.add_argument("--catalog", default="PanSTARRS1", help="Photometric reference catalog (default: PanSTARRS1)") - parser.add_argument("--obs_band", default="obs_band", help="Observed image bandpass (used for labeling only; default: 'obs_band')") - parser.add_argument("--cal_band", default="r", help="Reference catalog bandpass (default: r)") + parser.add_argument("--obs_band", default="obs_band", + help="Observed image bandpass (used for labeling only; default: 'obs_band')") + parser.add_argument("--cal_band", default="r", + help="Reference catalog bandpass (default: r)") args = parser.parse_args() - input_fits = args.input_fits - file_base = os.path.splitext(input_fits)[0] - image = fitsio.read(input_fits).astype(np.float32) - Ra = args.Ra - Dec = args.Dec - bkg_err = args.bkg_err + input_fits = args.input_fits + file_base = os.path.splitext(input_fits)[0] + image = fitsio.read(input_fits).astype(np.float32) + Ra = args.Ra + Dec = args.Dec + bkg_err = args.bkg_err pixel_scale = args.pixel_scale - snr = args.snr - catalog = args.catalog - obs_band = args.obs_band - cal_band = args.cal_band - - + snr = args.snr + catalog = args.catalog + obs_band = args.obs_band + cal_band = args.cal_band + output_wcs = f"{file_base}.wcs" try: if run_solve_field(input_fits, output_wcs, pixel_scale, Ra, Dec): @@ -416,14 +433,14 @@ def cleanup_files(file_base): obs_band=obs_band, cal_band=cal_band, ) - zp = calibration["zp"] - C = calibration["C"] - unc = calibration["unc"] - m = calibration["m"] + zp = calibration["zp"] + C = calibration["C"] + unc = calibration["unc"] + m = calibration["m"] m_inst = calibration["m_inst"] - color_mags = calibration['color_mags'] + color_mags = calibration['color_mags'] color_index = calibration['color_index'] - objids = calibration['objids'] + objids = calibration['objids'] if hasattr(objids, "mask"): matched_idx = np.where(~objids.mask)[0] @@ -434,8 +451,10 @@ def cleanup_files(file_base): else: colored_idx = np.arange(len(source_list)) - fig1, ax1 = plot_color_correction(color_mags, m, m_inst, C, zp, color_index) - fig2, ax2 = plot_image(telescope_image_sub, source_list, matched_idx, colored_idx) + fig1, ax1 = plot_color_correction( + color_mags, m, m_inst, C, zp, color_index) + fig2, ax2 = plot_image(telescope_image_sub, + source_list, matched_idx, colored_idx) plt.show() create_header( @@ -446,10 +465,10 @@ def cleanup_files(file_base): source_list, matched_idx, colored_idx, - input_fits, + input_fits, obs_band, catalog, cal_band, ) - cleanup_files(file_base) \ No newline at end of file + cleanup_files(file_base) diff --git a/catch_analysis_tools/tests/.DS_Store b/catch_analysis_tools/tests/.DS_Store deleted file mode 100644 index e66682e..0000000 Binary files a/catch_analysis_tools/tests/.DS_Store and /dev/null differ diff --git a/docker-compose.yml b/docker-compose.yml index aec24db..d954b63 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,5 +4,9 @@ services: context: . dockerfile: Dockerfile image: "${DOCKER_IMAGE_NAME:-sbn-cat}:${DOCKER_IMAGE_TAG:-local}" + environment: + DOCKER_IMAGE_TAG: "${DOCKER_IMAGE_TAG:-local}" ports: - - "8000:8000" + - "127.0.0.1:8000:8000" + volumes: + - "${HOME}/.astrometry/data:/root/.astrometry/data" diff --git a/docker/astrometry.cfg b/docker/astrometry.cfg new file mode 100644 index 0000000..d90867f --- /dev/null +++ b/docker/astrometry.cfg @@ -0,0 +1,5 @@ +# Minimal Astrometry.net backend config for the containerized app. + +add_path /root/.astrometry/data +autoindex +cpulimit 300 diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..233685c --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -euo pipefail +IFS=$'\n\t' + +export ASTROMETRY_INDEX_DIR="${ASTROMETRY_INDEX_DIR:-/root/.astrometry/data}" +export ASTROMETRY_CONFIG="${ASTROMETRY_CONFIG:-/app/docker/astrometry.cfg}" + +mkdir -p "${ASTROMETRY_INDEX_DIR}" + +exec python3 -m catch_analysis_tools.app.app diff --git a/pyproject.toml b/pyproject.toml index e5fb8fb..f7d4b0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "fitsio", "calviacat", "mastcasjobs", + "beautifulsoup4", "connexion[flask,swagger-ui,uvicorn]>=3.3.0", ] dynamic = ["version"] @@ -51,4 +52,4 @@ dev = ["build"] addopts = "--cov=catch_analysis_tools --cov-report=term-missing" markers = [ "integration: tests that run solve-field and require astrometry.net index files" -] \ No newline at end of file +] diff --git a/requirements.local.txt b/requirements.local.txt index ce7a7c6..3589457 100644 --- a/requirements.local.txt +++ b/requirements.local.txt @@ -12,4 +12,5 @@ sep fitsio calviacat mastcasjobs +beautifulsoup4 connexion[flask,swagger-ui,uvicorn] diff --git a/scripts/Checking_indexfiles.py b/scripts/Checking_indexfiles.py index fd9d453..76ed334 100644 --- a/scripts/Checking_indexfiles.py +++ b/scripts/Checking_indexfiles.py @@ -1,68 +1,14 @@ -import os -import glob -import requests -from pathlib import Path -from bs4 import BeautifulSoup -from urllib.parse import urljoin - -INDEX_URL = "https://portal.nersc.gov/project/cosmo/temp/dstn/index-5200/LITE/" -LOCAL_DIR = Path( - os.environ.get("ASTROMETRY_INDEX_DIR", "/tmp/astrometry/index_files") +from catch_analysis_tools.app.astrometry_readiness.get_astrometry_readiness_status import ( + get_astrometry_readiness_status, +) +from catch_analysis_tools.app.astrometry_readiness.prepare_astrometry_data import ( + prepare_astrometry_data, ) -LOCAL_DIR.mkdir(parents=True, exist_ok=True) -EXPECTED_FILES = 300 - - -def index_files_complete(directory): - files = glob.glob(os.path.join(directory, "index-*.fits")) - - if len(files) != EXPECTED_FILES: - print(f"Incomplete index set: {len(files)} / {EXPECTED_FILES}") - return False - - return True - - -def download_index_files(url, target_dir): - os.makedirs(target_dir, exist_ok=True) - - with requests.Session() as session: - response = session.get(url) - response.raise_for_status() - - soup = BeautifulSoup(response.text, "html.parser") - - for link in soup.find_all("a", href=True): - href = link["href"] - - if not (href.startswith("index-") and href.endswith(".fits")): - continue - - file_url = urljoin(url, href) - local_path = os.path.join(target_dir, href) - - print(f"Downloading: {href}") - - with session.get(file_url, stream=True) as r: - r.raise_for_status() - with open(local_path, "wb") as f: - for chunk in r.iter_content(chunk_size=1024 * 1024): - if chunk: - f.write(chunk) def main(): - if index_files_complete(LOCAL_DIR): - print("Index files are complete. Skipping download.") - return - - print("Index files incomplete or missing. Downloading...") - download_index_files(INDEX_URL, LOCAL_DIR) - - if not index_files_complete(LOCAL_DIR): - raise RuntimeError("Index files still incomplete after download") - - print("Index files downloaded successfully.") + prepare_astrometry_data() + print(get_astrometry_readiness_status()) if __name__ == "__main__": diff --git a/tf_main.tf b/tf_main.tf index 7c8ee9b..38c53a6 100644 --- a/tf_main.tf +++ b/tf_main.tf @@ -30,6 +30,7 @@ locals { log_group_name = "/aws/ecs/${local.name_prefix}app" alb_name = trimsuffix(substr("${local.normalized_name_prefix}alb", 0, 32), "-") target_group_name = trimsuffix(substr("${local.normalized_name_prefix}tg", 0, 32), "-") + efs_name = trimsuffix(substr("${local.normalized_name_prefix}efs", 0, 32), "-") execution_role_name = substr("${local.name_prefix}ecs-task-execution-role", 0, 64) } @@ -139,11 +140,51 @@ resource "aws_security_group" "ecs_tasks" { } } +resource "aws_security_group" "efs" { + name = "${local.name_prefix}efs-sg" + description = "Allow NFS traffic from ECS tasks" + vpc_id = aws_vpc.main.id + + ingress { + description = "NFS from ECS tasks" + from_port = 2049 + to_port = 2049 + protocol = "tcp" + security_groups = [aws_security_group.ecs_tasks.id] + } + + egress { + description = "Allow all outbound traffic" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + resource "aws_cloudwatch_log_group" "ecs" { name = local.log_group_name retention_in_days = var.ECS_LOGS_RETENTION_DAYS } +resource "aws_efs_file_system" "astrometry_data" { + creation_token = "${local.name_prefix}astrometry-data" + performance_mode = var.EFS_PERFORMANCE_MODE + throughput_mode = var.EFS_THROUGHPUT_MODE + encrypted = true + + tags = { + Name = local.efs_name + } +} + +resource "aws_efs_mount_target" "astrometry_data" { + count = length(aws_subnet.public) + file_system_id = aws_efs_file_system.astrometry_data.id + subnet_id = aws_subnet.public[count.index].id + security_groups = [aws_security_group.efs.id] +} + resource "aws_iam_role" "ecs_task_execution" { name = local.execution_role_name @@ -183,11 +224,37 @@ resource "aws_ecs_task_definition" "app" { operating_system_family = "LINUX" } + volume { + name = "astrometry-data" + + efs_volume_configuration { + file_system_id = aws_efs_file_system.astrometry_data.id + transit_encryption = "ENABLED" + } + } + container_definitions = jsonencode([ { name = local.container_name image = "${aws_ecr_repository.app.repository_url}:${var.DOCKER_IMAGE_TAG}" essential = true + environment = [ + { + name = "ASTROMETRY_INDEX_DIR" + value = "/root/.astrometry/data" + }, + { + name = "DOCKER_IMAGE_TAG" + value = var.DOCKER_IMAGE_TAG + } + ] + mountPoints = [ + { + sourceVolume = "astrometry-data" + containerPath = "/root/.astrometry/data" + readOnly = false + } + ] portMappings = [ { containerPort = var.CONTAINER_PORT @@ -206,7 +273,10 @@ resource "aws_ecs_task_definition" "app" { } ]) - depends_on = [aws_iam_role_policy_attachment.ecs_task_execution_default] + depends_on = [ + aws_iam_role_policy_attachment.ecs_task_execution_default, + aws_efs_mount_target.astrometry_data, + ] } resource "aws_lb" "main" { @@ -249,6 +319,8 @@ resource "aws_ecs_service" "app" { task_definition = aws_ecs_task_definition.app.arn desired_count = var.ECS_DESIRED_COUNT launch_type = "FARGATE" + platform_version = "1.4.0" + health_check_grace_period_seconds = var.ECS_HEALTHCHECK_GRACE_PERIOD_SECONDS network_configuration { subnets = aws_subnet.public[*].id diff --git a/tf_outputs.tf b/tf_outputs.tf index 6d0215b..f77b007 100644 --- a/tf_outputs.tf +++ b/tf_outputs.tf @@ -22,3 +22,8 @@ output "ecs_service_name" { description = "ECS service name" value = aws_ecs_service.app.name } + +output "efs_file_system_id" { + description = "EFS file system ID used to persist astrometry index files" + value = aws_efs_file_system.astrometry_data.id +} diff --git a/tf_variables.tf b/tf_variables.tf index 4ebc38a..c5e3b5a 100644 --- a/tf_variables.tf +++ b/tf_variables.tf @@ -79,12 +79,30 @@ variable "ECS_DESIRED_COUNT" { default = 1 } +variable "ECS_HEALTHCHECK_GRACE_PERIOD_SECONDS" { + description = "Grace period to allow initial container startup tasks such as data downloads before ALB health checks count against the service" + type = number + default = 1800 +} + variable "ECS_LOGS_RETENTION_DAYS" { description = "CloudWatch Logs retention in days for ECS container logs" type = number default = 14 } +variable "EFS_PERFORMANCE_MODE" { + description = "Performance mode for the astrometry EFS file system" + type = string + default = "generalPurpose" +} + +variable "EFS_THROUGHPUT_MODE" { + description = "Throughput mode for the astrometry EFS file system" + type = string + default = "bursting" +} + variable "CAT_ARCHITECTURE" { description = "CPU architecture for ECS task runtime platform (x86_64 or arm64)" type = string