diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index ba7a2312..3ac7fcbb 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -88,6 +88,14 @@ services:
- VOIP_TICKET_TTL_SECONDS=${VOIP_TICKET_TTL_SECONDS:-180} # Media Streams WSS ticket TTL
- VOIP_INTENT_TTL_SECONDS=${VOIP_INTENT_TTL_SECONDS:-180} # staged Gemini-intent TTL
- GITHUB_PAT=${GITHUB_PAT}
+ # Log Retention & Archival Configuration (#1039)
+ # These were MISSING from prod — prod launches standalone (no base-compose
+ # merge), so an operator-set LOG_RETENTION_DAYS never reached the
+ # container and retention silently fell back to the code default. Default
+ # is the 5-day community floor; enterprise `retention` unlocks longer.
+ - LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-5}
+ - LOG_ARCHIVE_ENABLED=${LOG_ARCHIVE_ENABLED:-true}
+ - LOG_CLEANUP_HOUR=${LOG_CLEANUP_HOUR:-3}
# Host paths for volumes (used when creating agent containers)
- HOST_TEMPLATES_PATH=${HOST_TEMPLATES_PATH:-${PWD}/config/agent-templates}
# Agent /tmp tmpfs size (#1231) — read by capabilities.py to build the
diff --git a/docker-compose.yml b/docker-compose.yml
index 33ba97cb..41309c10 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -69,7 +69,9 @@ services:
# SendGrid transport (EMAIL_PROVIDER=sendgrid) — read by src/backend/config.py (#771)
- SENDGRID_API_KEY=${SENDGRID_API_KEY:-}
# Log Retention & Archival Configuration
- - LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-90}
+ # #1039: 5-day community retention floor (was 90). Enterprise `retention`
+ # license unlocks longer windows; this env is the self-host escape hatch.
+ - LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-5}
- LOG_ARCHIVE_ENABLED=${LOG_ARCHIVE_ENABLED:-true}
- LOG_CLEANUP_HOUR=${LOG_CLEANUP_HOUR:-3}
- LOG_ARCHIVE_PATH=/data/archives # Local path for archived logs
diff --git a/docs/memory/architecture.md b/docs/memory/architecture.md
index e2a9d6fc..809500f7 100644
--- a/docs/memory/architecture.md
+++ b/docs/memory/architecture.md
@@ -420,7 +420,7 @@ Backend orchestration in `services/subscription_auto_switch.py`: `_hot_reload_su
**Admin recovery (Phase 1c):** metadata-only (`deleted_at → NULL`) via the `/api/admin/soft-deleted/*` endpoints. Agent recovery does NOT recreate the container (`needs_container_recreate=true`; operator runs `POST /api/agents/{name}/start`); schedule recovery rejoins the firing list next poll if enabled. Audit events `agent_lifecycle:recover` / `schedule_recover`. Response models `SoftDeletedAgent`/`SoftDeletedSchedule` in `models.py`.
-**Cleanup Service sweeps** (every 5 min): #772 retention — nulls `schedule_executions.execution_log` past `execution_log_retention_days` (default 30), DELETEs terminal `schedule_executions` past `execution_row_retention_days` (default 90), DELETEs `agent_health_checks` past `health_check_retention_days` (default 7). #834 purges — hard-deletes `agent_ownership` rows soft-deleted longer than `agent_soft_delete_retention_days` (default 180, `0`=disabled), cascading children via the #816 `purge_agent_ownership`/`cascade_delete` primitive; hard-deletes `agent_schedules` rows past `schedule_soft_delete_retention_days` (default 30, `0`=disabled) via `purge_schedule()`, which cascades the row's `schedule_executions` (no #816 chain — schedules have no registered children). Each sweep capped at 5000 rows/cycle (first post-deploy backfill spans hours, not minutes); `0` disables a sweep; `PRAGMA wal_checkpoint(TRUNCATE)` when any sweep reclaims rows. Also purges expired `idempotency_keys`. **Startup hook (#740):** one-shot `mark_orphan_loops_interrupted()` flips `agent_loops` rows left `queued`/`running` after a restart to `interrupted` (`stop_reason="interrupted"`); loops do not auto-resume.
+**Cleanup Service sweeps** (every 5 min): #772 retention — nulls `schedule_executions.execution_log` past `execution_log_retention_days` (default 5 — #1039 community floor, was 30), DELETEs terminal `schedule_executions` past `execution_row_retention_days` (default 5, was 90), DELETEs `agent_health_checks` past `health_check_retention_days` (default 5, was 7). #834 purges — hard-deletes `agent_ownership` rows soft-deleted longer than `agent_soft_delete_retention_days` (default 5 — #1039, was 180; `0`=disabled), cascading children via the #816 `purge_agent_ownership`/`cascade_delete` primitive; hard-deletes `agent_schedules` rows past `schedule_soft_delete_retention_days` (default 5, was 30; `0`=disabled) via `purge_schedule()`, which cascades the row's `schedule_executions` (no #816 chain — schedules have no registered children). Each sweep capped at 5000 rows/cycle (first post-deploy backfill spans hours, not minutes); `0` disables a sweep; `PRAGMA wal_checkpoint(TRUNCATE)` when any sweep reclaims rows. Also purges expired `idempotency_keys`. **Community retention floor (#1039):** these operator-tunable windows default to a 5-day community floor (was 30/90/7/180/30); the audit-log window is exempt (separate 365-day integrity floor). Precedence: an optional entitled override (via the entitlement registry, #847) → env → 5-day community default; OSS does not hard-clamp env/OPS (self-host escape hatch). Surfaced read-only at `GET /api/settings/retention`. **Startup hook (#740):** one-shot `mark_orphan_loops_interrupted()` flips `agent_loops` rows left `queued`/`running` after a restart to `interrupted` (`stop_reason="interrupted"`); loops do not auto-resume.
### Sequential Agent Loops (#740, UI #1106)
@@ -751,6 +751,7 @@ Coverage: agent lifecycle, auth, sharing, credentials, settings, rename; request
|--------|------|-------------|
| GET/PUT/DELETE | `/api/settings/mcp-url` | Get (any auth user) / set / reset-to-auto-detect (admin-only) MCP server URL |
| GET | `/api/settings/feature-flags` | Public-safe UI gating flags (any auth user): `session_tab_enabled`, `voice_available` (`VOICE_ENABLED && GEMINI_API_KEY`), `workspace_available` (voice AND `WORKSPACE_ENABLED`, opt-in #860), `voip_available` (#1056), `mcp_agent_chat_pull_enabled` (#946 pull-pilot routing; observability-only — the routing gate is the MCP server's own read of `MCP_AGENT_CHAT_PULL_ENABLED`; default OFF, not a UI surface), `enterprise_features` (registered enterprise modules; empty in OSS-only builds or under `TRINITY_OSS_ONLY=1`) (#847) |
+| GET | `/api/settings/retention` | Effective data-retention windows + active edition (admin-only, #1039). Reports log-archival, execution log/row, health-check, agent/schedule soft-delete, and the audit-log window (365-day floor, exempt). `edition` is `enterprise` when an entitled override is registered (via the #847 entitlement seam), else `community` (5-day floor); precedence entitled override → env → 5-day default; OSS does not hard-clamp |
| GET/PUT | `/api/settings/agent-defaults/resources` | Fleet-wide default CPU/memory for new containers (admin-only; CPU 1/2/4/8/16, memory 1g–32g) (RES-001) |
| GET/PUT | `/api/settings/agent-defaults/access-policy` | Fleet-wide default `require_email` for new agents (admin-only, #1129). Stored in `system_settings`, **secure-by-default ON** (code fallback when unset — no migration); seeds `agent_ownership.require_email` at creation (`register_agent_owner`) for **new** agents only, never rewrites existing rows; owners still override per agent via `PUT /api/agents/{name}/access-policy` |
diff --git a/src/backend/routers/logs.py b/src/backend/routers/logs.py
index 588da50e..10c8caa8 100644
--- a/src/backend/routers/logs.py
+++ b/src/backend/routers/logs.py
@@ -80,7 +80,7 @@ async def get_retention_config(current_user: User = Depends(require_admin)):
import os
return RetentionConfig(
- retention_days=int(os.getenv("LOG_RETENTION_DAYS", "90")),
+ retention_days=int(os.getenv("LOG_RETENTION_DAYS", "5")),
archive_enabled=os.getenv("LOG_ARCHIVE_ENABLED", "true").lower() == "true",
cleanup_hour=int(os.getenv("LOG_CLEANUP_HOUR", "3")),
)
@@ -183,7 +183,7 @@ async def log_service_health(current_user: User = Depends(require_admin)):
"scheduler_running": log_archive_service.scheduler.running if log_archive_service.scheduler else False,
"archive_enabled": os.getenv("LOG_ARCHIVE_ENABLED", "true").lower() == "true",
"archive_path": os.getenv("LOG_ARCHIVE_PATH", "/data/archives"),
- "retention_days": int(os.getenv("LOG_RETENTION_DAYS", "90")),
+ "retention_days": int(os.getenv("LOG_RETENTION_DAYS", "5")),
"cleanup_hour": int(os.getenv("LOG_CLEANUP_HOUR", "3")),
}
diff --git a/src/backend/routers/settings.py b/src/backend/routers/settings.py
index 91287582..4a832dce 100644
--- a/src/backend/routers/settings.py
+++ b/src/backend/routers/settings.py
@@ -152,6 +152,61 @@ async def get_public_feature_flags(
}
+@router.get("/retention")
+async def get_retention_status(
+ current_user: User = Depends(get_current_user),
+):
+ """Effective data-retention windows actually in use, plus the active
+ edition (#1039).
+
+ Reports the value resolved for each operator-tunable class — log archival
+ (env LOG_*), execution log/row, health-check, and agent/schedule
+ soft-delete (OPS settings, DB → default precedence) — and the audit-log
+ window (separate 365-day integrity floor, exempt from the community floor).
+
+ ``edition`` is ``enterprise`` when the ``retention`` entitlement is present
+ (license-driven once #1040 lands; registry-driven today) and ``community``
+ otherwise. In the community edition the windows default to the 5-day floor;
+ the env/OPS values remain an unsupported self-host escape hatch — OSS does
+ not hard-clamp (the enterprise module is the managed, supported surface).
+
+ Admin-only.
+ """
+ require_admin(current_user)
+
+ from services.entitlement_service import entitlement_service
+ from services.settings_service import (
+ COMMUNITY_RETENTION_FLOOR_DAYS,
+ RETENTION_OPS_KEYS,
+ )
+
+ def _ops_int(key: str) -> int:
+ raw = db.get_setting_value(key, OPS_SETTINGS_DEFAULTS.get(key, "0"))
+ try:
+ return max(int(raw), 0)
+ except (TypeError, ValueError):
+ return 0
+
+ entitled = entitlement_service.is_entitled("retention")
+ audit_days = max(int(os.getenv("AUDIT_LOG_RETENTION_DAYS", "365") or 365), 365)
+
+ return {
+ "edition": "enterprise" if entitled else "community",
+ "community_floor_days": COMMUNITY_RETENTION_FLOOR_DAYS,
+ # enterprise (license) DB setting → env var → 5-day community default
+ "precedence": "enterprise → env → community-default",
+ "windows": {
+ # Log archival (env-driven; LOG_* escape hatch)
+ "log_retention_days": int(os.getenv("LOG_RETENTION_DAYS", "5")),
+ "log_archive_enabled": os.getenv("LOG_ARCHIVE_ENABLED", "true").lower() == "true",
+ # Execution + health + soft-delete (OPS settings, 0 = disabled)
+ **{k: _ops_int(k) for k in RETENTION_OPS_KEYS},
+ # Audit log — exempt from the community floor (365-day integrity floor)
+ "audit_log_retention_days": audit_days,
+ },
+ }
+
+
# ============================================================================
# API Keys Management Endpoints
# NOTE: These routes MUST be defined BEFORE the /{key} catch-all route
diff --git a/src/backend/services/log_archive_service.py b/src/backend/services/log_archive_service.py
index 3f92bb9d..8160441b 100644
--- a/src/backend/services/log_archive_service.py
+++ b/src/backend/services/log_archive_service.py
@@ -22,7 +22,9 @@
logger = logging.getLogger(__name__)
# Configuration from environment
-LOG_RETENTION_DAYS = int(os.getenv("LOG_RETENTION_DAYS", "90"))
+# #1039: 5-day community retention floor (was 90). Enterprise `retention`
+# license unlocks longer windows; LOG_* env remains a self-host escape hatch.
+LOG_RETENTION_DAYS = int(os.getenv("LOG_RETENTION_DAYS", "5"))
LOG_ARCHIVE_ENABLED = os.getenv("LOG_ARCHIVE_ENABLED", "true").lower() == "true"
LOG_CLEANUP_HOUR = int(os.getenv("LOG_CLEANUP_HOUR", "3"))
diff --git a/src/backend/services/settings_service.py b/src/backend/services/settings_service.py
index c1e7e071..2bfef092 100644
--- a/src/backend/services/settings_service.py
+++ b/src/backend/services/settings_service.py
@@ -28,6 +28,24 @@
# Ops Settings Configuration - moved from routers/settings.py
# ============================================================================
+# Issue #1039: community retention floor (days). Operator-tunable retention
+# windows default to this in the community edition; an enterprise `retention`
+# license unlocks longer/configurable windows. The audit log is EXEMPT — it
+# keeps a 365-day integrity floor (see audit_retention_service). This is the
+# shared default the enterprise `retention` module clamps unentitled writes to.
+COMMUNITY_RETENTION_FLOOR_DAYS = 5
+
+# The operator-tunable retention OPS-settings keys governed by the #1039 floor
+# (audit log excluded — separate env-driven 365-day floor).
+RETENTION_OPS_KEYS = (
+ "execution_log_retention_days",
+ "execution_row_retention_days",
+ "health_check_retention_days",
+ "agent_soft_delete_retention_days",
+ "schedule_soft_delete_retention_days",
+)
+
+
# Default values for ops settings (as specified in requirements)
OPS_SETTINGS_DEFAULTS = {
"ops_context_warning_threshold": "75", # Context % to trigger warning
@@ -39,24 +57,32 @@
"ops_log_retention_days": "7", # Days to keep container logs
"ops_health_check_interval": "60", # Seconds between health checks
"ssh_access_enabled": "false", # Enable SSH access via MCP tool
+ # Issue #1039: 5-day COMMUNITY retention floor. These operator-tunable
+ # windows default to 5 days in the community edition; an enterprise
+ # `retention` license unlocks longer/configurable windows. The audit log is
+ # EXEMPT — it keeps its 365-day integrity floor (audit_retention_service).
+ # Previous code defaults (execution-log 30 / execution-row 90 /
+ # health-check 7 / agent soft-delete 180 / schedule soft-delete 30) are
+ # documented in the #1039 release notes; self-hosters restore them via the
+ # OPS settings escape hatch (unsupported) or an enterprise license.
+ #
# Issue #772: retention policy for execution_log + agent_health_checks.
# "0" disables that prune step.
- "execution_log_retention_days": "30", # Null `execution_log` TEXT after N days
- "execution_row_retention_days": "90", # DELETE schedule_executions rows after N days
- "health_check_retention_days": "7", # DELETE agent_health_checks rows after N days
+ "execution_log_retention_days": "5", # Null `execution_log` TEXT after N days (#1039: was 30)
+ "execution_row_retention_days": "5", # DELETE schedule_executions rows after N days (#1039: was 90)
+ "health_check_retention_days": "5", # DELETE agent_health_checks rows after N days (#1039: was 7)
# Issue #834 Phase 1a: soft-delete retention for agents. After
# DELETE /api/agents/{name}, the agent_ownership row is marked
# `deleted_at = NOW` and child rows are preserved. The cleanup
# sweep hard-deletes rows older than this many days (cascading
# child tables via #816's purge primitive). "0" disables the
# sweep entirely — soft-deleted rows then persist until manually
- # purged.
- "agent_soft_delete_retention_days": "180",
- # Issue #834 Phase 1b: per-schedule soft-delete. Schedules are
- # higher-churn than agents (users tweak/replace cron expressions
- # often), so default is shorter than the agent window. "0"
- # disables the sweep.
- "schedule_soft_delete_retention_days": "30",
+ # purged. (#1039: community default lowered 180 → 5; recovery
+ # window shrinks accordingly — an enterprise license restores it.)
+ "agent_soft_delete_retention_days": "5",
+ # Issue #834 Phase 1b: per-schedule soft-delete. (#1039: lowered
+ # 30 → 5.) "0" disables the sweep.
+ "schedule_soft_delete_retention_days": "5",
}
# Descriptions for each ops setting
diff --git a/src/frontend/src/views/Settings.vue b/src/frontend/src/views/Settings.vue
index 49552197..81bfa350 100644
--- a/src/frontend/src/views/Settings.vue
+++ b/src/frontend/src/views/Settings.vue
@@ -42,6 +42,74 @@
+ How long Trinity keeps logs, executions, health checks, and soft-deleted agents/schedules. +
++ The community edition keeps a fixed + {{ retention.community_floor_days }}-day retention floor. + An enterprise license unlocks configurable, longer windows — set per class, applied live with no restart. +
+