From 7dced88b31fe2d9dfdd044cf28366d2d4f36ccb0 Mon Sep 17 00:00:00 2001 From: coodos Date: Mon, 18 May 2026 18:00:00 +0530 Subject: [PATCH 1/2] chore(awareness-service): cap webhook delivery at 3 attempts Lower the default AWARENESS_MAX_ATTEMPTS from 8 to 3. --- .env.example | 2 +- docs/docs/Services/Awareness-as-a-Service.md | 2 +- services/awareness-service/api/src/config.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index ebc131d80..fe924eff8 100644 --- a/.env.example +++ b/.env.example @@ -145,7 +145,7 @@ AAAS_ADMIN_ENAMES="" # Secret used to sign AaaS portal session JWTs AAAS_JWT_SECRET="replace-with-a-strong-secret" # Webhook delivery tuning -AWARENESS_MAX_ATTEMPTS=8 +AWARENESS_MAX_ATTEMPTS=3 AWARENESS_DELIVERY_POLL_MS=2000 # The one-time Neo4j backfill reuses the standard NEO4J_URI / NEO4J_USER / # NEO4J_PASSWORD vars at the top of this file - it reads evault-core's graph diff --git a/docs/docs/Services/Awareness-as-a-Service.md b/docs/docs/Services/Awareness-as-a-Service.md index 9173926c8..a00b10911 100644 --- a/docs/docs/Services/Awareness-as-a-Service.md +++ b/docs/docs/Services/Awareness-as-a-Service.md @@ -160,7 +160,7 @@ AaaS is designed to be dropped in with **zero receiver-side changes**: | `AWARENESS_SERVICE_URL` | (evault-core) where to POST packets | | `AAAS_ADMIN_ENAMES` | Comma-separated admin eNames | | `AAAS_JWT_SECRET` | Signs portal session JWTs | -| `AWARENESS_MAX_ATTEMPTS` | Delivery attempts before dead-lettering (default 8) | +| `AWARENESS_MAX_ATTEMPTS` | Delivery attempts before dead-lettering (default 3) | | `AWARENESS_DELIVERY_POLL_MS` | Delivery engine poll interval (default 2000) | | `NEO4J_URI` / `NEO4J_USER` / `NEO4J_PASSWORD` | Standard eVault Neo4j vars — reused by the one-time backfill | | `PUBLIC_AWARENESS_API_URL` | (portal) AaaS API base URL | diff --git a/services/awareness-service/api/src/config.ts b/services/awareness-service/api/src/config.ts index 4668685cc..71df8fc4b 100644 --- a/services/awareness-service/api/src/config.ts +++ b/services/awareness-service/api/src/config.ts @@ -27,7 +27,7 @@ export const config = { .filter(Boolean), /** Secret used to sign portal session JWTs. */ jwtSecret: process.env.AAAS_JWT_SECRET ?? "awareness-dev-secret", - maxAttempts: parseInt(process.env.AWARENESS_MAX_ATTEMPTS ?? "8", 10), + maxAttempts: parseInt(process.env.AWARENESS_MAX_ATTEMPTS ?? "3", 10), deliveryPollMs: parseInt( process.env.AWARENESS_DELIVERY_POLL_MS ?? "2000", 10, From f3f6f6fb234a9fc4fd48b54254e8689ee59ea2d1 Mon Sep 17 00:00:00 2001 From: coodos Date: Mon, 18 May 2026 18:20:00 +0530 Subject: [PATCH 2/2] fix(awareness-service): stop re-claiming exhausted deliveries A dead-lettered delivery was left with status 'failed', which claimBatch selects - so the engine re-claimed it every tick, re-attempted it, inflated its attempt count without bound and inserted a fresh dead-letter row each time. Mark exhausted deliveries with a terminal 'dead' status and exclude any delivery at/over the attempt cap from claiming. --- .../api/src/database/entities/Delivery.ts | 3 ++- services/awareness-service/api/src/openapi.ts | 8 +++++++- .../api/src/services/DeliveryEngine.ts | 11 +++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/services/awareness-service/api/src/database/entities/Delivery.ts b/services/awareness-service/api/src/database/entities/Delivery.ts index d16b6d33b..3010de772 100644 --- a/services/awareness-service/api/src/database/entities/Delivery.ts +++ b/services/awareness-service/api/src/database/entities/Delivery.ts @@ -11,7 +11,8 @@ export type DeliveryStatus = | "pending" | "delivering" | "delivered" - | "failed"; + | "failed" + | "dead"; /** * A queued webhook delivery of one packet to one subscription. The unique diff --git a/services/awareness-service/api/src/openapi.ts b/services/awareness-service/api/src/openapi.ts index 0955e3685..b7106888d 100644 --- a/services/awareness-service/api/src/openapi.ts +++ b/services/awareness-service/api/src/openapi.ts @@ -107,7 +107,13 @@ export const openApiDocument = { packetId: { type: "string" }, status: { type: "string", - enum: ["pending", "delivering", "delivered", "failed"], + enum: [ + "pending", + "delivering", + "delivered", + "failed", + "dead", + ], }, attempts: { type: "integer" }, nextAttemptAt: { type: "string", format: "date-time" }, diff --git a/services/awareness-service/api/src/services/DeliveryEngine.ts b/services/awareness-service/api/src/services/DeliveryEngine.ts index cd566b594..378d43eb5 100644 --- a/services/awareness-service/api/src/services/DeliveryEngine.ts +++ b/services/awareness-service/api/src/services/DeliveryEngine.ts @@ -92,15 +92,19 @@ export class DeliveryEngine { .update(Delivery) .set({ status: "delivering" }) .where( + // Only pending/failed deliveries still under the attempt + // limit are claimable. `dead` deliveries (and any that + // already hit the cap) are terminal and never re-claimed. `id IN ( SELECT id FROM deliveries WHERE status IN ('pending', 'failed') + AND attempts < :maxAttempts AND "nextAttemptAt" <= now() ORDER BY "nextAttemptAt" LIMIT :limit FOR UPDATE SKIP LOCKED )`, - { limit: BATCH_SIZE }, + { limit: BATCH_SIZE, maxAttempts: config.maxAttempts }, ) .returning("*") .execute(); @@ -181,8 +185,11 @@ export class DeliveryEngine { const deliveryRepo = AppDataSource.getRepository(Delivery); if (attempts >= config.maxAttempts) { + // Terminal: mark `dead` so the engine never re-claims it. (Using + // `failed` here let exhausted deliveries be picked up again every + // tick, inflating attempts and spawning duplicate dead letters.) await deliveryRepo.update(delivery.id, { - status: "failed", + status: "dead", attempts, lastError: message, lastResponseStatus: responseStatus,