From 59559fc7163a846bee1e2d3ac0cf295e41b4acbd Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Mar 2026 17:17:39 +0000 Subject: [PATCH 1/6] fix: reduce scraper/scheduler error log noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Eliminate duplicate Browserless error logging (extractWithBrowserless no longer logs since the caller checkMonitor already logs with fuller context) - Remove premature error logging from fetchWithCurl (it's a fallback — the pipeline continues to Browserless) - Downgrade transient network/DB errors in scraper outer catch from ERROR to WARNING (they're retried automatically) - Downgrade "rendered page extraction failed" to WARNING (expected for sites that block headless browsers, handled by circuit breaker) - Downgrade "check succeeded but failed to save result" to WARNING (marked for accelerated retry, self-heals on next cycle) - Wrap scheduler notification, digest, webhook, and cleanup DB operations in withDbRetry for transient connection errors - Downgrade all scheduler transient DB failures to WARNING level - Downgrade daily cleanup failures to WARNING (non-critical background tasks that retry tomorrow) https://claude.ai/code/session_01CAV9VUnHbJ66A4oKNbr9Kk --- server/services/scheduler.test.ts | 22 ++++----- server/services/scheduler.ts | 78 +++++++++++++++++++++---------- server/services/scraper.test.ts | 34 ++++---------- server/services/scraper.ts | 42 ++++++++++------- 4 files changed, 100 insertions(+), 76 deletions(-) diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index ccd3f9c8..7a5e002e 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -664,15 +664,14 @@ describe("daily metrics cleanup", () => { consoleSpy.mockRestore(); }); - it("logs error when cleanup query fails", async () => { + it("logs warning when cleanup query fails (non-critical background task)", async () => { await startScheduler(); mockDbExecute.mockRejectedValueOnce(new Error("DB timeout")); await runCron("0 3 * * *"); - expect(ErrorLogger.error).toHaveBeenCalledWith( + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - "monitor_metrics cleanup failed", - expect.any(Error), + "monitor_metrics cleanup failed (will retry tomorrow)", expect.objectContaining({ errorMessage: "DB timeout", retentionDays: 90, @@ -686,10 +685,9 @@ describe("daily metrics cleanup", () => { mockDbExecute.mockRejectedValueOnce("disk full"); await runCron("0 3 * * *"); - expect(ErrorLogger.error).toHaveBeenCalledWith( + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - "monitor_metrics cleanup failed", - null, + "monitor_metrics cleanup failed (will retry tomorrow)", expect.objectContaining({ errorMessage: "disk full", retentionDays: 90, @@ -931,7 +929,7 @@ describe("withDbRetry and re-entrancy guards", () => { ); }); - it("logs error when retry also fails on transient error", async () => { + it("logs warning when retry also fails on transient error", async () => { mockGetAllActiveMonitors .mockRejectedValueOnce(new Error("Connection terminated")) .mockRejectedValueOnce(new Error("Connection terminated again")); @@ -942,11 +940,11 @@ describe("withDbRetry and re-entrancy guards", () => { await cronPromise; expect(mockGetAllActiveMonitors).toHaveBeenCalledTimes(2); - expect(ErrorLogger.error).toHaveBeenCalledWith( + // Transient DB errors are downgraded to warnings since the next tick retries automatically + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - "Scheduler iteration failed", - expect.any(Error), - expect.objectContaining({ phase: "fetching active monitors" }) + expect.stringContaining("Scheduler iteration skipped"), + expect.objectContaining({ activeChecks: 0 }) ); }); diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 9341150e..fefd72e4 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -227,11 +227,21 @@ export async function startScheduler() { } } } catch (error) { - await ErrorLogger.error("scheduler", "Scheduler iteration failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - activeChecks, - phase: "fetching active monitors", - }); + // Transient DB errors (connection drops, pool exhaustion) are expected during + // spikes — the next tick will retry automatically. Only log as error for + // non-transient failures that indicate a real problem. + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "Scheduler iteration skipped (transient DB error, will retry next tick)", { + errorMessage: error instanceof Error ? error.message : String(error), + activeChecks, + }); + } else { + await ErrorLogger.error("scheduler", "Scheduler iteration failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + activeChecks, + phase: "fetching active monitors", + }); + } } finally { mainCronRunning = false; } @@ -249,18 +259,30 @@ export async function startScheduler() { notificationCronRunning = true; try { try { - await processQueuedNotifications(); + await withDbRetry(() => processQueuedNotifications()); } catch (error) { - await ErrorLogger.error("scheduler", "Queued notification processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "Queued notification processing skipped (transient DB error)", { + errorMessage: error instanceof Error ? error.message : String(error), + }); + } else { + await ErrorLogger.error("scheduler", "Queued notification processing failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + }); + } } try { - await processDigestCron(); + await withDbRetry(() => processDigestCron()); } catch (error) { - await ErrorLogger.error("scheduler", "Digest processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "Digest processing skipped (transient DB error)", { + errorMessage: error instanceof Error ? error.message : String(error), + }); + } else { + await ErrorLogger.error("scheduler", "Digest processing failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + }); + } } } finally { notificationCronRunning = false; @@ -361,9 +383,15 @@ export async function startScheduler() { } } } catch (error) { - await ErrorLogger.error("scheduler", "Webhook retry processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "Webhook retry processing skipped (transient DB error)", { + errorMessage: error instanceof Error ? error.message : String(error), + }); + } else { + await ErrorLogger.error("scheduler", "Webhook retry processing failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + }); + } } finally { webhookCronRunning = false; } @@ -371,17 +399,19 @@ export async function startScheduler() { } // Daily cleanup: prune monitor_metrics older than 90 days to prevent unbounded growth + // All cleanup operations are best-effort background tasks — transient DB failures + // are logged as warnings since the next daily run will catch up. cronTasks.push(cron.schedule("0 3 * * *", async () => { try { - const result = await db.execute( + const result = await withDbRetry(() => db.execute( sql`DELETE FROM monitor_metrics WHERE checked_at < NOW() - INTERVAL '90 days'` - ); + )); const deleted = (result as any).rowCount ?? 0; if (deleted > 0) { console.log(`[Cleanup] Pruned ${deleted} monitor_metrics rows older than 90 days`); } } catch (error) { - await ErrorLogger.error("scheduler", "monitor_metrics cleanup failed", error instanceof Error ? error : null, { + await ErrorLogger.warning("scheduler", "monitor_metrics cleanup failed (will retry tomorrow)", { errorMessage: error instanceof Error ? error.message : String(error), retentionDays: 90, table: "monitor_metrics", @@ -391,12 +421,12 @@ export async function startScheduler() { // Delivery log cleanup: prune entries older than 30 days try { const olderThan = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); - const entriesDeleted = await storage.cleanupOldDeliveryLogs(olderThan); + const entriesDeleted = await withDbRetry(() => storage.cleanupOldDeliveryLogs(olderThan)); if (entriesDeleted > 0) { console.log(`[Cleanup] Pruned ${entriesDeleted} delivery_log rows older than 30 days`); } } catch (error) { - await ErrorLogger.error("scheduler", "delivery_log cleanup failed", error instanceof Error ? error : null, { + await ErrorLogger.warning("scheduler", "delivery_log cleanup failed (will retry tomorrow)", { errorMessage: error instanceof Error ? error.message : String(error), retentionDays: 30, table: "delivery_log", @@ -405,14 +435,14 @@ export async function startScheduler() { // Notification queue cleanup: prune permanently failed entries older than 7 days try { - const deleted = await storage.cleanupPermanentlyFailedQueueEntries( + const deleted = await withDbRetry(() => storage.cleanupPermanentlyFailedQueueEntries( new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) - ); + )); if (deleted > 0) { console.log(`[Cleanup] Pruned ${deleted} permanently failed notification_queue rows older than 7 days`); } } catch (error) { - await ErrorLogger.error("scheduler", "notification_queue cleanup failed", error instanceof Error ? error : null, { + await ErrorLogger.warning("scheduler", "notification_queue cleanup failed (will retry tomorrow)", { errorMessage: error instanceof Error ? error.message : String(error), retentionDays: 7, table: "notification_queue", diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index e83d5e00..e1d94ccd 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -4214,11 +4214,10 @@ describe("checkMonitor outer catch resilience", () => { expect(result.currentValue).toBe("$49.99"); expect(result.changed).toBe(true); expect(result.error).toContain("server error prevented saving"); - // Verify enhanced logging includes extracted and previous values - expect(ErrorLogger.error).toHaveBeenCalledWith( + // Verify enhanced logging includes extracted and previous values (downgraded to warning) + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scraper", expect.stringContaining("check succeeded but failed to save result"), - expect.any(Error), expect.objectContaining({ monitorId: 1, extractedValue: "$49.99", @@ -5831,49 +5830,36 @@ describe("extractWithBrowserless error classification in logs", () => { delete process.env.BROWSERLESS_TOKEN; }); - it("logs classified timeout message to ErrorLogger", async () => { + it("does not log to ErrorLogger (caller handles logging)", async () => { mockConnectOverCDP.mockRejectedValue(new Error("Navigation timeout of 30000ms exceeded")); await expect( extractWithBrowserless("https://example.com", ".price", 1, "My Monitor") ).rejects.toThrow(); - expect(ErrorLogger.error).toHaveBeenCalledWith( - "scraper", - expect.stringContaining("took too long"), - expect.any(Error), - expect.objectContaining({ url: "https://example.com" }), - ); + // extractWithBrowserless no longer logs — the caller (checkMonitor) logs + // with fuller context to avoid duplicate error entries. + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); - it("logs classified ECONNREFUSED message to ErrorLogger", async () => { + it("re-throws ECONNREFUSED without logging", async () => { mockConnectOverCDP.mockRejectedValue(new Error("connect ECONNREFUSED 127.0.0.1:443")); await expect( extractWithBrowserless("https://example.com", ".price") ).rejects.toThrow(); - expect(ErrorLogger.error).toHaveBeenCalledWith( - "scraper", - expect.stringContaining("refused the connection"), - expect.any(Error), - expect.objectContaining({ url: "https://example.com" }), - ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); - it("includes monitor name in error label when provided", async () => { + it("re-throws errors without logging even when monitor name provided", async () => { mockConnectOverCDP.mockRejectedValue(new Error("some error")); await expect( extractWithBrowserless("https://example.com", ".price", 1, "Price Tracker") ).rejects.toThrow(); - expect(ErrorLogger.error).toHaveBeenCalledWith( - "scraper", - expect.stringContaining('"Price Tracker"'), - expect.any(Error), - expect.objectContaining({ monitorName: "Price Tracker", monitorId: 1 }), - ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); }); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index bcef4bff..d1d5cfb6 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -952,9 +952,8 @@ export async function extractWithBrowserless(url: string, selector: string, moni }; }, { pageTimeoutMs }); } catch (error) { - const label = monitorName ? `"${monitorName}" — browser` : "Browser"; - const classified = classifyBrowserlessError(error instanceof Error ? error.message : "Unknown error"); - await ErrorLogger.error("scraper", `${label}-based extraction failed: ${classified}`, error instanceof Error ? error : null, { url, selector, ...(monitorId ? { monitorId } : {}), ...(monitorName ? { monitorName } : {}) }); + // Don't log here — the caller (checkMonitor) logs with fuller context. + // Logging here too would create duplicate error entries for every failure. throw error; } } @@ -976,8 +975,8 @@ async function fetchWithCurl(url: string, monitorId?: number, monitorName?: stri const rethrow = isAbort ? new Error("Page took too long to respond (15s timeout)") : error; - const label = monitorName ? `"${monitorName}" — page` : "Page"; - await ErrorLogger.error("scraper", `${label} fetch with curl failed — the site returned an error or is blocking the request. Verify the URL is correct and the site is accessible.`, rethrow instanceof Error ? rethrow : null, { url, ...(monitorId ? { monitorId } : {}), ...(monitorName ? { monitorName } : {}) }); + // Don't log here — this is a fallback fetch. The caller decides + // whether to log based on the overall pipeline outcome. throw rethrow; } finally { clearTimeout(timeout); @@ -1194,7 +1193,10 @@ export async function checkMonitor(monitor: Monitor): Promise<{ } if (lastBrowserlessErr) { - await ErrorLogger.error("scraper", `"${monitor.name}" — rendered page extraction failed. The site may block automated browsers or the page took too long to load. Try simplifying the selector or check if the site requires login.`, lastBrowserlessErr instanceof Error ? lastBrowserlessErr : null, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }); + // Downgrade to warning: Browserless failures are expected for sites that + // block headless browsers. The circuit breaker and retry logic handle recovery. + const classified = classifyBrowserlessError(lastBrowserlessErr instanceof Error ? lastBrowserlessErr.message : "Unknown error"); + await ErrorLogger.warning("scraper", `"${monitor.name}" — rendered page extraction failed: ${classified}`, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }); } const durationMs = Date.now() - startTime; @@ -1338,13 +1340,13 @@ export async function checkMonitor(monitor: Monitor): Promise<{ consecutiveFailures: 0, }); } catch (retryError) { - // Both attempts failed — log with full context + // Both attempts failed — log as warning since it's marked for + // accelerated retry and will self-heal on the next cycle. const dbErrMsg = dbError instanceof Error ? dbError.message : String(dbError); const retryErrMsg = retryError instanceof Error ? retryError.message : String(retryError); - await ErrorLogger.error( + await ErrorLogger.warning( "scraper", - `"${monitor.name}" check succeeded but failed to save result`, - dbError instanceof Error ? dbError : null, + `"${monitor.name}" check succeeded but failed to save result (will retry)`, { monitorId: monitor.id, monitorName: monitor.name, @@ -1445,12 +1447,20 @@ export async function checkMonitor(monitor: Monitor): Promise<{ } catch (error) { const { userMessage, logContext } = classifyOuterError(error); - await ErrorLogger.error( - "scraper", - `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`, - error instanceof Error ? error : null, - { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector } - ).catch(() => {}); + // Transient network/DB errors are expected and retried automatically — + // log as warnings to avoid polluting the error log with recoverable conditions. + const isTransient = logContext === "network error" || logContext === "database error" || logContext === "database connection error"; + const logMessage = `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`; + if (isTransient) { + await ErrorLogger.warning("scraper", logMessage, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }).catch(() => {}); + } else { + await ErrorLogger.error( + "scraper", + logMessage, + error instanceof Error ? error : null, + { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector } + ).catch(() => {}); + } try { await handleMonitorFailure(monitor, "error", userMessage, false); From 8d03301ae5f41cecef43deeee7e15708e9a557e7 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Mar 2026 17:59:12 +0000 Subject: [PATCH 2/6] test: add coverage for transient error warning paths Add tests verifying that transient DB errors in scheduler (notification, digest, webhook processing) and scraper (outer catch classification) are correctly downgraded to warnings instead of errors. Update existing tests to match new warning-level logging for save failures and cleanup. https://claude.ai/code/session_01CAV9VUnHbJ66A4oKNbr9Kk --- server/services/scheduler.test.ts | 84 +++++++++++++++++++++++++++++++ server/services/scraper.test.ts | 16 ++++++ 2 files changed, 100 insertions(+) diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index 7a5e002e..9bd64b92 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -818,6 +818,62 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { }) ); }); + + it("logs warning (not error) when processQueuedNotifications fails with transient DB error", async () => { + // withDbRetry retries once on transient error (1s delay via trackTimeout), + // then outer catch checks isTransientDbError for warning vs error. + mockProcessQueuedNotifications + .mockRejectedValueOnce(new Error("Connection terminated")) + .mockRejectedValueOnce(new Error("Connection terminated")); + + await startScheduler(); + const callbacks = cronCallbacks["*/1 * * * *"]; + // Run notification cron (index 0) without awaiting — withDbRetry's 1s timeout needs advancement + const cronPromise = callbacks[0](); + await vi.advanceTimersByTimeAsync(2000); + await cronPromise; + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Queued notification processing skipped"), + expect.objectContaining({ + errorMessage: "Connection terminated", + }) + ); + // Should NOT have called ErrorLogger.error for this transient failure + expect(ErrorLogger.error).not.toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("notification"), + expect.anything(), + expect.anything() + ); + }); + + it("logs warning (not error) when processDigestCron fails with transient DB error", async () => { + mockProcessDigestCron + .mockRejectedValueOnce(new Error("Connection terminated")) + .mockRejectedValueOnce(new Error("Connection terminated")); + + await startScheduler(); + const callbacks = cronCallbacks["*/1 * * * *"]; + const cronPromise = callbacks[0](); + await vi.advanceTimersByTimeAsync(2000); + await cronPromise; + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Digest processing skipped"), + expect.objectContaining({ + errorMessage: "Connection terminated", + }) + ); + expect(ErrorLogger.error).not.toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Digest"), + expect.anything(), + expect.anything() + ); + }); }); describe("stopScheduler", () => { @@ -1043,6 +1099,34 @@ describe("withDbRetry and re-entrancy guards", () => { resolveRetries([]); await firstRun; }); + + it("logs warning (not error) when webhook processing fails with transient DB error", async () => { + // Both withDbRetry attempts fail with transient error + mockStorage.getPendingWebhookRetries + .mockRejectedValueOnce(new Error("Connection terminated")) + .mockRejectedValueOnce(new Error("Connection terminated")); + + await startScheduler(); + const callbacks = cronCallbacks["*/1 * * * *"]; + await callbacks[0](); // notification cron + const webhookPromise = callbacks[1](); + await vi.advanceTimersByTimeAsync(2000); + await webhookPromise; + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Webhook retry processing skipped"), + expect.objectContaining({ + errorMessage: "Connection terminated", + }) + ); + expect(ErrorLogger.error).not.toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Webhook"), + expect.anything(), + expect.anything() + ); + }); }); describe("webhook retry cumulative backoff", () => { diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index e1d94ccd..7514d157 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -4261,6 +4261,22 @@ describe("checkMonitor outer catch resilience", () => { // Must NOT be the old generic message expect(result.error).not.toBe("Failed to fetch page"); }); + it("classifyOuterError returns 'network error' for transient network errors", () => { + // Verify the classification that drives the transient/non-transient logging split + const { userMessage, logContext } = classifyOuterError(new Error("Connection terminated due to connection timeout")); + expect(logContext).toBe("network error"); + expect(userMessage).toBe("Page took too long to respond"); + }); + + it("classifyOuterError returns 'database error' for DB-specific errors", () => { + const { logContext } = classifyOuterError(new Error("relation 'monitors' does not exist")); + expect(logContext).toBe("database error"); + }); + + it("classifyOuterError returns 'unclassified error' for non-transient errors", () => { + const { logContext } = classifyOuterError(new Error("Something totally unexpected")); + expect(logContext).toBe("unclassified error"); + }); }); // --------------------------------------------------------------------------- From fb0564f69add1d2aa84d34b6ca5f2d41c5ff7689 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Mar 2026 18:04:51 +0000 Subject: [PATCH 3/6] fix: separate SSRF from network errors, apply transient checks to all error downgrades MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security fixes from review: - SSRF-blocked errors now get dedicated "ssrf_blocked" logContext so they stay at error level instead of being downgraded as transient network errors - DB save failures in scraper apply isTransient check: only transient connection errors are warnings, schema/constraint errors remain errors - Cleanup operations in scheduler apply isTransientDbError: transient → warning, non-transient → error (was unconditionally warning) https://claude.ai/code/session_01CAV9VUnHbJ66A4oKNbr9Kk --- server/services/scheduler.test.ts | 31 +++++++++++++++--- server/services/scheduler.ts | 54 ++++++++++++++++++++++--------- server/services/scraper.test.ts | 22 ++++++++----- server/services/scraper.ts | 49 +++++++++++++++++++--------- 4 files changed, 113 insertions(+), 43 deletions(-) diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index 9bd64b92..0f975747 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -664,16 +664,37 @@ describe("daily metrics cleanup", () => { consoleSpy.mockRestore(); }); - it("logs warning when cleanup query fails (non-critical background task)", async () => { + it("logs error when cleanup fails with non-transient DB error", async () => { await startScheduler(); mockDbExecute.mockRejectedValueOnce(new Error("DB timeout")); await runCron("0 3 * * *"); + expect(ErrorLogger.error).toHaveBeenCalledWith( + "scheduler", + "monitor_metrics cleanup failed", + expect.any(Error), + expect.objectContaining({ + errorMessage: "DB timeout", + retentionDays: 90, + table: "monitor_metrics", + }) + ); + }); + + it("logs warning when cleanup fails with transient DB error", async () => { + await startScheduler(); + mockDbExecute + .mockRejectedValueOnce(new Error("Connection terminated")) + .mockRejectedValueOnce(new Error("Connection terminated")); + const cronPromise = runCron("0 3 * * *"); + await vi.advanceTimersByTimeAsync(2000); + await cronPromise; + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", "monitor_metrics cleanup failed (will retry tomorrow)", expect.objectContaining({ - errorMessage: "DB timeout", + errorMessage: "Connection terminated", retentionDays: 90, table: "monitor_metrics", }) @@ -685,9 +706,11 @@ describe("daily metrics cleanup", () => { mockDbExecute.mockRejectedValueOnce("disk full"); await runCron("0 3 * * *"); - expect(ErrorLogger.warning).toHaveBeenCalledWith( + // Non-Error values are not transient, so logged as error + expect(ErrorLogger.error).toHaveBeenCalledWith( "scheduler", - "monitor_metrics cleanup failed (will retry tomorrow)", + "monitor_metrics cleanup failed", + null, expect.objectContaining({ errorMessage: "disk full", retentionDays: 90, diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index fefd72e4..9e1d3523 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -411,11 +411,19 @@ export async function startScheduler() { console.log(`[Cleanup] Pruned ${deleted} monitor_metrics rows older than 90 days`); } } catch (error) { - await ErrorLogger.warning("scheduler", "monitor_metrics cleanup failed (will retry tomorrow)", { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 90, - table: "monitor_metrics", - }); + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "monitor_metrics cleanup failed (will retry tomorrow)", { + errorMessage: error instanceof Error ? error.message : String(error), + retentionDays: 90, + table: "monitor_metrics", + }); + } else { + await ErrorLogger.error("scheduler", "monitor_metrics cleanup failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + retentionDays: 90, + table: "monitor_metrics", + }); + } } // Delivery log cleanup: prune entries older than 30 days @@ -426,11 +434,19 @@ export async function startScheduler() { console.log(`[Cleanup] Pruned ${entriesDeleted} delivery_log rows older than 30 days`); } } catch (error) { - await ErrorLogger.warning("scheduler", "delivery_log cleanup failed (will retry tomorrow)", { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 30, - table: "delivery_log", - }); + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "delivery_log cleanup failed (will retry tomorrow)", { + errorMessage: error instanceof Error ? error.message : String(error), + retentionDays: 30, + table: "delivery_log", + }); + } else { + await ErrorLogger.error("scheduler", "delivery_log cleanup failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + retentionDays: 30, + table: "delivery_log", + }); + } } // Notification queue cleanup: prune permanently failed entries older than 7 days @@ -442,11 +458,19 @@ export async function startScheduler() { console.log(`[Cleanup] Pruned ${deleted} permanently failed notification_queue rows older than 7 days`); } } catch (error) { - await ErrorLogger.warning("scheduler", "notification_queue cleanup failed (will retry tomorrow)", { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 7, - table: "notification_queue", - }); + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", "notification_queue cleanup failed (will retry tomorrow)", { + errorMessage: error instanceof Error ? error.message : String(error), + retentionDays: 7, + table: "notification_queue", + }); + } else { + await ErrorLogger.error("scheduler", "notification_queue cleanup failed", error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + retentionDays: 7, + table: "notification_queue", + }); + } } })); diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index 7514d157..97b920a4 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -4044,9 +4044,9 @@ describe("classifyOuterError", () => { expect(result.userMessage).toBe("Connection was reset by the target site"); }); - it("classifies SSRF blocked errors as network error", () => { + it("classifies SSRF blocked errors as ssrf_blocked (not network error)", () => { const result = classifyOuterError(new Error("SSRF blocked: This URL resolves to a private address")); - expect(result.logContext).toBe("network error"); + expect(result.logContext).toBe("ssrf_blocked"); expect(result.userMessage).toBe("URL is not allowed"); }); @@ -4201,9 +4201,9 @@ describe("checkMonitor outer catch resilience", () => { const html = `$49.99`; vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(new Response(html, { status: 200 })); - // Both attempts fail - mockStorage.updateMonitor.mockRejectedValueOnce(new Error("conn reset")); - mockStorage.updateMonitor.mockRejectedValueOnce(new Error("conn reset again")); + // Both attempts fail with transient connection error + mockStorage.updateMonitor.mockRejectedValueOnce(new Error("connection terminated")); + mockStorage.updateMonitor.mockRejectedValueOnce(new Error("connection terminated")); const { ErrorLogger } = await import("./logger"); @@ -4214,7 +4214,7 @@ describe("checkMonitor outer catch resilience", () => { expect(result.currentValue).toBe("$49.99"); expect(result.changed).toBe(true); expect(result.error).toContain("server error prevented saving"); - // Verify enhanced logging includes extracted and previous values (downgraded to warning) + // Transient DB errors are downgraded to warnings (will retry via accelerated retry) expect(ErrorLogger.warning).toHaveBeenCalledWith( "scraper", expect.stringContaining("check succeeded but failed to save result"), @@ -4223,8 +4223,8 @@ describe("checkMonitor outer catch resilience", () => { extractedValue: "$49.99", previousValue: "$39.99", changed: true, - dbError: "conn reset", - retryError: "conn reset again", + dbError: "connection terminated", + retryError: "connection terminated", }), ); }); @@ -4277,6 +4277,12 @@ describe("checkMonitor outer catch resilience", () => { const { logContext } = classifyOuterError(new Error("Something totally unexpected")); expect(logContext).toBe("unclassified error"); }); + + it("classifyOuterError returns 'ssrf_blocked' for SSRF errors (not grouped with network errors)", () => { + const { userMessage, logContext } = classifyOuterError(new Error("SSRF blocked: URL is not allowed")); + expect(logContext).toBe("ssrf_blocked"); + expect(userMessage).toBe("URL is not allowed"); + }); }); // --------------------------------------------------------------------------- diff --git a/server/services/scraper.ts b/server/services/scraper.ts index d1d5cfb6..75a62b51 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -486,8 +486,13 @@ export function classifyOuterError(error: unknown): { userMessage: string; logCo return { userMessage: "A temporary server error occurred. The check will be retried automatically.", logContext: "database connection error" }; } + // SSRF blocks — security-relevant, must stay at error level (not transient) + if (/SSRF blocked/i.test(msg)) { + return { userMessage: sanitizeErrorForClient(msg), logContext: "ssrf_blocked" }; + } + // Network errors — delegate to the existing fetch-error sanitizer - if (/abort|timeout|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|ECONNRESET|socket hang up|certificate|ssl|tls|SSRF|UND_ERR_HEADERS_OVERFLOW/i.test(msg)) { + if (/abort|timeout|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|ECONNRESET|socket hang up|certificate|ssl|tls|UND_ERR_HEADERS_OVERFLOW/i.test(msg)) { return { userMessage: sanitizeErrorForClient(msg), logContext: "network error" }; } @@ -1340,23 +1345,35 @@ export async function checkMonitor(monitor: Monitor): Promise<{ consecutiveFailures: 0, }); } catch (retryError) { - // Both attempts failed — log as warning since it's marked for - // accelerated retry and will self-heal on the next cycle. + // Both attempts failed. Transient DB errors (connection drops) are + // expected and will self-heal via accelerated retry — log as warning. + // Non-transient errors (schema/constraint) indicate a real problem — log as error. const dbErrMsg = dbError instanceof Error ? dbError.message : String(dbError); const retryErrMsg = retryError instanceof Error ? retryError.message : String(retryError); - await ErrorLogger.warning( - "scraper", - `"${monitor.name}" check succeeded but failed to save result (will retry)`, - { - monitorId: monitor.id, - monitorName: monitor.name, - extractedValue: newValue?.substring(0, 200) ?? null, - previousValue: oldValue?.substring(0, 200) ?? null, - changed, - dbError: dbErrMsg, - retryError: retryErrMsg, - }, - ).catch(() => {}); + const isTransientSave = /connection terminated|connection timeout|connection refused|econnreset|econnrefused|cannot acquire|timeout expired/i.test(retryErrMsg); + const saveContext = { + monitorId: monitor.id, + monitorName: monitor.name, + extractedValue: newValue?.substring(0, 200) ?? null, + previousValue: oldValue?.substring(0, 200) ?? null, + changed, + dbError: dbErrMsg, + retryError: retryErrMsg, + }; + if (isTransientSave) { + await ErrorLogger.warning( + "scraper", + `"${monitor.name}" check succeeded but failed to save result (will retry)`, + saveContext, + ).catch(() => {}); + } else { + await ErrorLogger.error( + "scraper", + `"${monitor.name}" check succeeded but failed to save result`, + retryError instanceof Error ? retryError : null, + saveContext, + ).catch(() => {}); + } saveFailed = true; } From 67e79663b49be8c9c431b4d3b1bf25baa58c0244 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Mar 2026 18:10:08 +0000 Subject: [PATCH 4/6] refactor: extract logSchedulerError helper, fix database error classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture review fixes: - Extract logSchedulerError() helper to eliminate duplicated isTransientDbError + conditional log level pattern (was repeated 7x) - Remove "database error" from transient classification in scraper outer catch — schema/constraint errors are NOT transient and must stay at error level; only "database connection error" is truly transient https://claude.ai/code/session_01CAV9VUnHbJ66A4oKNbr9Kk --- server/services/scheduler.test.ts | 12 ++-- server/services/scheduler.ts | 107 ++++++++---------------------- server/services/scraper.ts | 5 +- 3 files changed, 35 insertions(+), 89 deletions(-) diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index 0f975747..d8da3ca3 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -692,7 +692,7 @@ describe("daily metrics cleanup", () => { expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - "monitor_metrics cleanup failed (will retry tomorrow)", + "monitor_metrics cleanup failed (transient, will retry)", expect.objectContaining({ errorMessage: "Connection terminated", retentionDays: 90, @@ -858,7 +858,7 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - expect.stringContaining("Queued notification processing skipped"), + expect.stringContaining("Queued notification processing failed (transient, will retry)"), expect.objectContaining({ errorMessage: "Connection terminated", }) @@ -885,7 +885,7 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - expect.stringContaining("Digest processing skipped"), + expect.stringContaining("Digest processing failed (transient, will retry)"), expect.objectContaining({ errorMessage: "Connection terminated", }) @@ -1019,10 +1019,10 @@ describe("withDbRetry and re-entrancy guards", () => { await cronPromise; expect(mockGetAllActiveMonitors).toHaveBeenCalledTimes(2); - // Transient DB errors are downgraded to warnings since the next tick retries automatically + // Transient DB errors are downgraded to warnings via logSchedulerError helper expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - expect.stringContaining("Scheduler iteration skipped"), + expect.stringContaining("Scheduler iteration failed (transient, will retry)"), expect.objectContaining({ activeChecks: 0 }) ); }); @@ -1138,7 +1138,7 @@ describe("withDbRetry and re-entrancy guards", () => { expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - expect.stringContaining("Webhook retry processing skipped"), + expect.stringContaining("Webhook retry processing failed (transient, will retry)"), expect.objectContaining({ errorMessage: "Connection terminated", }) diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 9e1d3523..e954a1b9 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -56,6 +56,25 @@ async function withDbRetry(fn: () => Promise): Promise { } } +/** Log a caught error as warning (transient) or error (non-transient) based on isTransientDbError. */ +async function logSchedulerError( + message: string, + error: unknown, + context?: Record, +): Promise { + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", `${message} (transient, will retry)`, { + errorMessage: error instanceof Error ? error.message : String(error), + ...context, + }); + } else { + await ErrorLogger.error("scheduler", message, error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + ...context, + }); + } +} + /** Schedule a callback with automatic cleanup from pendingTimeouts when it fires. */ function trackTimeout(callback: () => void, delayMs: number): ReturnType { const handle = setTimeout(() => { @@ -227,21 +246,7 @@ export async function startScheduler() { } } } catch (error) { - // Transient DB errors (connection drops, pool exhaustion) are expected during - // spikes — the next tick will retry automatically. Only log as error for - // non-transient failures that indicate a real problem. - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "Scheduler iteration skipped (transient DB error, will retry next tick)", { - errorMessage: error instanceof Error ? error.message : String(error), - activeChecks, - }); - } else { - await ErrorLogger.error("scheduler", "Scheduler iteration failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - activeChecks, - phase: "fetching active monitors", - }); - } + await logSchedulerError("Scheduler iteration failed", error, { activeChecks, phase: "fetching active monitors" }); } finally { mainCronRunning = false; } @@ -261,28 +266,12 @@ export async function startScheduler() { try { await withDbRetry(() => processQueuedNotifications()); } catch (error) { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "Queued notification processing skipped (transient DB error)", { - errorMessage: error instanceof Error ? error.message : String(error), - }); - } else { - await ErrorLogger.error("scheduler", "Queued notification processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); - } + await logSchedulerError("Queued notification processing failed", error); } try { await withDbRetry(() => processDigestCron()); } catch (error) { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "Digest processing skipped (transient DB error)", { - errorMessage: error instanceof Error ? error.message : String(error), - }); - } else { - await ErrorLogger.error("scheduler", "Digest processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); - } + await logSchedulerError("Digest processing failed", error); } } finally { notificationCronRunning = false; @@ -383,15 +372,7 @@ export async function startScheduler() { } } } catch (error) { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "Webhook retry processing skipped (transient DB error)", { - errorMessage: error instanceof Error ? error.message : String(error), - }); - } else { - await ErrorLogger.error("scheduler", "Webhook retry processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); - } + await logSchedulerError("Webhook retry processing failed", error); } finally { webhookCronRunning = false; } @@ -411,19 +392,7 @@ export async function startScheduler() { console.log(`[Cleanup] Pruned ${deleted} monitor_metrics rows older than 90 days`); } } catch (error) { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "monitor_metrics cleanup failed (will retry tomorrow)", { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 90, - table: "monitor_metrics", - }); - } else { - await ErrorLogger.error("scheduler", "monitor_metrics cleanup failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 90, - table: "monitor_metrics", - }); - } + await logSchedulerError("monitor_metrics cleanup failed", error, { retentionDays: 90, table: "monitor_metrics" }); } // Delivery log cleanup: prune entries older than 30 days @@ -434,19 +403,7 @@ export async function startScheduler() { console.log(`[Cleanup] Pruned ${entriesDeleted} delivery_log rows older than 30 days`); } } catch (error) { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "delivery_log cleanup failed (will retry tomorrow)", { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 30, - table: "delivery_log", - }); - } else { - await ErrorLogger.error("scheduler", "delivery_log cleanup failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 30, - table: "delivery_log", - }); - } + await logSchedulerError("delivery_log cleanup failed", error, { retentionDays: 30, table: "delivery_log" }); } // Notification queue cleanup: prune permanently failed entries older than 7 days @@ -458,19 +415,7 @@ export async function startScheduler() { console.log(`[Cleanup] Pruned ${deleted} permanently failed notification_queue rows older than 7 days`); } } catch (error) { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", "notification_queue cleanup failed (will retry tomorrow)", { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 7, - table: "notification_queue", - }); - } else { - await ErrorLogger.error("scheduler", "notification_queue cleanup failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 7, - table: "notification_queue", - }); - } + await logSchedulerError("notification_queue cleanup failed", error, { retentionDays: 7, table: "notification_queue" }); } })); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 75a62b51..552ebb3e 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -1464,9 +1464,10 @@ export async function checkMonitor(monitor: Monitor): Promise<{ } catch (error) { const { userMessage, logContext } = classifyOuterError(error); - // Transient network/DB errors are expected and retried automatically — + // Transient network/connection errors are expected and retried automatically — // log as warnings to avoid polluting the error log with recoverable conditions. - const isTransient = logContext === "network error" || logContext === "database error" || logContext === "database connection error"; + // Note: "database error" (schema/constraint issues) is NOT transient and stays at error level. + const isTransient = logContext === "network error" || logContext === "database connection error"; const logMessage = `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`; if (isTransient) { await ErrorLogger.warning("scraper", logMessage, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }).catch(() => {}); From a3ad14fd3f593f9a8e42f39fdcb3f3254f690804 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Mar 2026 18:21:27 +0000 Subject: [PATCH 5/6] fix: consolidate transient-error detection, harden error classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skeptic review fixes: - Extract isTransientDbError() to shared server/utils/dbErrors.ts to prevent drift between scraper inline regex and scheduler function - ENOTFOUND and certificate/ssl/tls errors are permanent misconfigs, not transient — exclude from warning downgrade in scraper outer catch - Add try/catch to logSchedulerError so logging failures during DB outage don't mask the original error https://claude.ai/code/session_01CAV9VUnHbJ66A4oKNbr9Kk --- server/services/scheduler.ts | 45 +++++++++++++----------------------- server/services/scraper.ts | 8 +++++-- server/utils/dbErrors.ts | 21 +++++++++++++++++ 3 files changed, 43 insertions(+), 31 deletions(-) create mode 100644 server/utils/dbErrors.ts diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index e954a1b9..4e7c9aee 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -7,6 +7,7 @@ import { ErrorLogger } from "./logger"; import { notificationTablesExist } from "./notificationReady"; import { browserlessCircuitBreaker } from "./browserlessCircuitBreaker"; import { ensureMonitorConditionsTable } from "./ensureTables"; +import { isTransientDbError } from "../utils/dbErrors"; import { db } from "../db"; import { sql } from "drizzle-orm"; @@ -21,25 +22,6 @@ let schedulerStarted = false; const cronTasks: ReturnType[] = []; const pendingTimeouts = new Set>(); -/** - * Transient DB errors that are safe to retry (connection drops, pool exhaustion). - * Checks both PostgreSQL error codes (stable across driver versions) and message - * substrings (fallback for connection-level errors that lack a code). - */ -function isTransientDbError(err: unknown): boolean { - if (!(err instanceof Error)) return false; - // PostgreSQL error codes: 08xxx = connection exceptions, 57P01 = admin shutdown - const code = (err as any).code; - if (typeof code === "string" && (/^08/.test(code) || code === "57P01")) return true; - const msg = err.message.toLowerCase(); - return msg.includes("connection terminated") - || msg.includes("connection timeout") - || msg.includes("connection refused") - || msg.includes("econnreset") - || msg.includes("econnrefused") - || msg.includes("cannot acquire") - || msg.includes("timeout expired"); -} /** Retry a DB operation once after a 1 s delay on transient connection errors. */ async function withDbRetry(fn: () => Promise): Promise { @@ -62,16 +44,21 @@ async function logSchedulerError( error: unknown, context?: Record, ): Promise { - if (isTransientDbError(error)) { - await ErrorLogger.warning("scheduler", `${message} (transient, will retry)`, { - errorMessage: error instanceof Error ? error.message : String(error), - ...context, - }); - } else { - await ErrorLogger.error("scheduler", message, error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - ...context, - }); + try { + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", `${message} (transient, will retry)`, { + errorMessage: error instanceof Error ? error.message : String(error), + ...context, + }); + } else { + await ErrorLogger.error("scheduler", message, error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + ...context, + }); + } + } catch { + // If logging itself fails (e.g., logging DB also down), don't mask the original error + console.error(`[Scheduler] Failed to log error: ${message}`, error instanceof Error ? error.message : error); } } diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 552ebb3e..53c20031 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -5,6 +5,7 @@ import { processChangeNotification } from "./notification"; import { ErrorLogger } from "./logger"; import { BrowserlessUsageTracker } from "./browserlessTracker"; import { browserlessCircuitBreaker } from "./browserlessCircuitBreaker"; +import { isTransientDbError } from "../utils/dbErrors"; import { browserPool } from "./browserPool"; import { validateUrlBeforeFetch, ssrfSafeFetch } from "../utils/ssrf"; import { type Monitor, monitorMetrics, monitors } from "@shared/schema"; @@ -1350,7 +1351,7 @@ export async function checkMonitor(monitor: Monitor): Promise<{ // Non-transient errors (schema/constraint) indicate a real problem — log as error. const dbErrMsg = dbError instanceof Error ? dbError.message : String(dbError); const retryErrMsg = retryError instanceof Error ? retryError.message : String(retryError); - const isTransientSave = /connection terminated|connection timeout|connection refused|econnreset|econnrefused|cannot acquire|timeout expired/i.test(retryErrMsg); + const isTransientSave = isTransientDbError(retryError); const saveContext = { monitorId: monitor.id, monitorName: monitor.name, @@ -1467,7 +1468,10 @@ export async function checkMonitor(monitor: Monitor): Promise<{ // Transient network/connection errors are expected and retried automatically — // log as warnings to avoid polluting the error log with recoverable conditions. // Note: "database error" (schema/constraint issues) is NOT transient and stays at error level. - const isTransient = logContext === "network error" || logContext === "database connection error"; + // Note: ENOTFOUND, certificate/ssl/tls errors are permanent misconfigurations, not transient. + const errMsg = error instanceof Error ? error.message : ""; + const isPermanentNetworkError = /ENOTFOUND|EAI_AGAIN|certificate|ssl|tls/i.test(errMsg); + const isTransient = (logContext === "network error" && !isPermanentNetworkError) || logContext === "database connection error"; const logMessage = `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`; if (isTransient) { await ErrorLogger.warning("scraper", logMessage, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }).catch(() => {}); diff --git a/server/utils/dbErrors.ts b/server/utils/dbErrors.ts new file mode 100644 index 00000000..76cc1c66 --- /dev/null +++ b/server/utils/dbErrors.ts @@ -0,0 +1,21 @@ +/** + * Transient DB errors that are safe to retry (connection drops, pool exhaustion). + * Checks both PostgreSQL error codes (stable across driver versions) and message + * substrings (fallback for connection-level errors that lack a code). + * + * Shared between scheduler and scraper to ensure consistent transient classification. + */ +export function isTransientDbError(err: unknown): boolean { + if (!(err instanceof Error)) return false; + // PostgreSQL error codes: 08xxx = connection exceptions, 57P01 = admin shutdown + const code = (err as any).code; + if (typeof code === "string" && (/^08/.test(code) || code === "57P01")) return true; + const msg = err.message.toLowerCase(); + return msg.includes("connection terminated") + || msg.includes("connection timeout") + || msg.includes("connection refused") + || msg.includes("econnreset") + || msg.includes("econnrefused") + || msg.includes("cannot acquire") + || msg.includes("timeout expired"); +} From 2a8d20359e30266408210678a95d0ca962194079 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Mar 2026 06:36:48 +0000 Subject: [PATCH 6/6] fix: address CodeRabbit review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add PG pool exhaustion codes (53300, 57P03) and messages to isTransientDbError for complete transient error coverage - Remove EAI_AGAIN from permanent network errors (it's transient) - Add SSRF check in Browserless failure path — SSRF blocks stay at error level even in the rendered extraction warning path - Remove withDbRetry from processQueuedNotifications/processDigestCron to prevent duplicate notification deliveries on retry - Tighten test assertions: also verify ErrorLogger.error/warning not called where the opposite level is expected - Use unambiguous non-transient error message in cleanup test https://claude.ai/code/session_01CAV9VUnHbJ66A4oKNbr9Kk --- server/services/scheduler.test.ts | 43 +++++++++++-------------------- server/services/scheduler.ts | 7 +++-- server/services/scraper.test.ts | 4 +++ server/services/scraper.ts | 22 ++++++++++++---- server/utils/dbErrors.ts | 9 ++++--- 5 files changed, 47 insertions(+), 38 deletions(-) diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index d8da3ca3..18db4414 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -666,7 +666,7 @@ describe("daily metrics cleanup", () => { it("logs error when cleanup fails with non-transient DB error", async () => { await startScheduler(); - mockDbExecute.mockRejectedValueOnce(new Error("DB timeout")); + mockDbExecute.mockRejectedValueOnce(new Error('relation "monitor_metrics" does not exist')); await runCron("0 3 * * *"); expect(ErrorLogger.error).toHaveBeenCalledWith( @@ -674,7 +674,7 @@ describe("daily metrics cleanup", () => { "monitor_metrics cleanup failed", expect.any(Error), expect.objectContaining({ - errorMessage: "DB timeout", + errorMessage: 'relation "monitor_metrics" does not exist', retentionDays: 90, table: "monitor_metrics", }) @@ -699,6 +699,13 @@ describe("daily metrics cleanup", () => { table: "monitor_metrics", }) ); + // Verify the monitor_metrics cleanup itself didn't log an error (other cleanup tasks may) + expect(ErrorLogger.error).not.toHaveBeenCalledWith( + "scheduler", + "monitor_metrics cleanup failed", + expect.anything(), + expect.anything() + ); }); it("handles non-Error thrown in cleanup (uses String coercion)", async () => { @@ -843,18 +850,13 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { }); it("logs warning (not error) when processQueuedNotifications fails with transient DB error", async () => { - // withDbRetry retries once on transient error (1s delay via trackTimeout), - // then outer catch checks isTransientDbError for warning vs error. + // Not wrapped in withDbRetry (to prevent duplicate deliveries), but + // logSchedulerError still classifies transient errors as warnings. mockProcessQueuedNotifications - .mockRejectedValueOnce(new Error("Connection terminated")) .mockRejectedValueOnce(new Error("Connection terminated")); await startScheduler(); - const callbacks = cronCallbacks["*/1 * * * *"]; - // Run notification cron (index 0) without awaiting — withDbRetry's 1s timeout needs advancement - const cronPromise = callbacks[0](); - await vi.advanceTimersByTimeAsync(2000); - await cronPromise; + await runCron("*/1 * * * *"); expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", @@ -863,25 +865,15 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { errorMessage: "Connection terminated", }) ); - // Should NOT have called ErrorLogger.error for this transient failure - expect(ErrorLogger.error).not.toHaveBeenCalledWith( - "scheduler", - expect.stringContaining("notification"), - expect.anything(), - expect.anything() - ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); it("logs warning (not error) when processDigestCron fails with transient DB error", async () => { mockProcessDigestCron - .mockRejectedValueOnce(new Error("Connection terminated")) .mockRejectedValueOnce(new Error("Connection terminated")); await startScheduler(); - const callbacks = cronCallbacks["*/1 * * * *"]; - const cronPromise = callbacks[0](); - await vi.advanceTimersByTimeAsync(2000); - await cronPromise; + await runCron("*/1 * * * *"); expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", @@ -890,12 +882,7 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { errorMessage: "Connection terminated", }) ); - expect(ErrorLogger.error).not.toHaveBeenCalledWith( - "scheduler", - expect.stringContaining("Digest"), - expect.anything(), - expect.anything() - ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); }); diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 4e7c9aee..d25c04f5 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -251,12 +251,15 @@ export async function startScheduler() { notificationCronRunning = true; try { try { - await withDbRetry(() => processQueuedNotifications()); + // Not wrapped in withDbRetry: these functions deliver notifications + // before marking entries as delivered. Retrying the entire function + // could cause duplicate email/webhook/Slack deliveries. + await processQueuedNotifications(); } catch (error) { await logSchedulerError("Queued notification processing failed", error); } try { - await withDbRetry(() => processDigestCron()); + await processDigestCron(); } catch (error) { await logSchedulerError("Digest processing failed", error); } diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index 97b920a4..1fd55bc8 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -4227,6 +4227,7 @@ describe("checkMonitor outer catch resilience", () => { retryError: "connection terminated", }), ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); it("returns result even when ErrorLogger.error rejects in outer catch", async () => { @@ -5862,6 +5863,7 @@ describe("extractWithBrowserless error classification in logs", () => { // extractWithBrowserless no longer logs — the caller (checkMonitor) logs // with fuller context to avoid duplicate error entries. expect(ErrorLogger.error).not.toHaveBeenCalled(); + expect(ErrorLogger.warning).not.toHaveBeenCalled(); }); it("re-throws ECONNREFUSED without logging", async () => { @@ -5872,6 +5874,7 @@ describe("extractWithBrowserless error classification in logs", () => { ).rejects.toThrow(); expect(ErrorLogger.error).not.toHaveBeenCalled(); + expect(ErrorLogger.warning).not.toHaveBeenCalled(); }); it("re-throws errors without logging even when monitor name provided", async () => { @@ -5882,6 +5885,7 @@ describe("extractWithBrowserless error classification in logs", () => { ).rejects.toThrow(); expect(ErrorLogger.error).not.toHaveBeenCalled(); + expect(ErrorLogger.warning).not.toHaveBeenCalled(); }); }); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 53c20031..3201c4a8 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -1199,10 +1199,21 @@ export async function checkMonitor(monitor: Monitor): Promise<{ } if (lastBrowserlessErr) { - // Downgrade to warning: Browserless failures are expected for sites that - // block headless browsers. The circuit breaker and retry logic handle recovery. - const classified = classifyBrowserlessError(lastBrowserlessErr instanceof Error ? lastBrowserlessErr.message : "Unknown error"); - await ErrorLogger.warning("scraper", `"${monitor.name}" — rendered page extraction failed: ${classified}`, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }); + const rawBrowserlessMsg = lastBrowserlessErr instanceof Error ? lastBrowserlessErr.message : "Unknown error"; + if (/SSRF blocked/i.test(rawBrowserlessMsg)) { + // SSRF blocks are security-relevant — keep at error level + await ErrorLogger.error( + "scraper", + `"${monitor.name}" — rendered page extraction blocked by SSRF protection`, + lastBrowserlessErr instanceof Error ? lastBrowserlessErr : null, + { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }, + ).catch(() => {}); + } else { + // Downgrade to warning: Browserless failures are expected for sites that + // block headless browsers. The circuit breaker and retry logic handle recovery. + const classified = classifyBrowserlessError(rawBrowserlessMsg); + await ErrorLogger.warning("scraper", `"${monitor.name}" — rendered page extraction failed: ${classified}`, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }); + } } const durationMs = Date.now() - startTime; @@ -1469,8 +1480,9 @@ export async function checkMonitor(monitor: Monitor): Promise<{ // log as warnings to avoid polluting the error log with recoverable conditions. // Note: "database error" (schema/constraint issues) is NOT transient and stays at error level. // Note: ENOTFOUND, certificate/ssl/tls errors are permanent misconfigurations, not transient. + // Note: EAI_AGAIN is transient (temporary DNS resolver failure), so it is NOT in this list. const errMsg = error instanceof Error ? error.message : ""; - const isPermanentNetworkError = /ENOTFOUND|EAI_AGAIN|certificate|ssl|tls/i.test(errMsg); + const isPermanentNetworkError = /ENOTFOUND|certificate|ssl|tls/i.test(errMsg); const isTransient = (logContext === "network error" && !isPermanentNetworkError) || logContext === "database connection error"; const logMessage = `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`; if (isTransient) { diff --git a/server/utils/dbErrors.ts b/server/utils/dbErrors.ts index 76cc1c66..1b0d7a0c 100644 --- a/server/utils/dbErrors.ts +++ b/server/utils/dbErrors.ts @@ -7,9 +7,10 @@ */ export function isTransientDbError(err: unknown): boolean { if (!(err instanceof Error)) return false; - // PostgreSQL error codes: 08xxx = connection exceptions, 57P01 = admin shutdown + // PostgreSQL error codes: 08xxx = connection exceptions, 57P01 = admin shutdown, + // 57P03 = cannot_connect_now, 53300 = too_many_connections (pool exhaustion) const code = (err as any).code; - if (typeof code === "string" && (/^08/.test(code) || code === "57P01")) return true; + if (typeof code === "string" && (/^08/.test(code) || ["57P01", "57P03", "53300"].includes(code))) return true; const msg = err.message.toLowerCase(); return msg.includes("connection terminated") || msg.includes("connection timeout") @@ -17,5 +18,7 @@ export function isTransientDbError(err: unknown): boolean { || msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("cannot acquire") - || msg.includes("timeout expired"); + || msg.includes("timeout expired") + || msg.includes("too many clients") + || msg.includes("remaining connection slots"); }