diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index ccd3f9c8..18db4414 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -664,9 +664,9 @@ describe("daily metrics cleanup", () => { consoleSpy.mockRestore(); }); - it("logs error when cleanup query fails", async () => { + it("logs error when cleanup fails with non-transient DB error", async () => { await startScheduler(); - mockDbExecute.mockRejectedValueOnce(new Error("DB timeout")); + mockDbExecute.mockRejectedValueOnce(new Error('relation "monitor_metrics" does not exist')); await runCron("0 3 * * *"); expect(ErrorLogger.error).toHaveBeenCalledWith( @@ -674,18 +674,46 @@ describe("daily metrics cleanup", () => { "monitor_metrics cleanup failed", expect.any(Error), expect.objectContaining({ - errorMessage: "DB timeout", + errorMessage: 'relation "monitor_metrics" does not exist', retentionDays: 90, table: "monitor_metrics", }) ); }); + it("logs warning when cleanup fails with transient DB error", async () => { + await startScheduler(); + mockDbExecute + .mockRejectedValueOnce(new Error("Connection terminated")) + .mockRejectedValueOnce(new Error("Connection terminated")); + const cronPromise = runCron("0 3 * * *"); + await vi.advanceTimersByTimeAsync(2000); + await cronPromise; + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + "monitor_metrics cleanup failed (transient, will retry)", + expect.objectContaining({ + errorMessage: "Connection terminated", + retentionDays: 90, + table: "monitor_metrics", + }) + ); + // Verify the monitor_metrics cleanup itself didn't log an error (other cleanup tasks may) + expect(ErrorLogger.error).not.toHaveBeenCalledWith( + "scheduler", + "monitor_metrics cleanup failed", + expect.anything(), + expect.anything() + ); + }); + it("handles non-Error thrown in cleanup (uses String coercion)", async () => { await startScheduler(); mockDbExecute.mockRejectedValueOnce("disk full"); await runCron("0 3 * * *"); + // Non-Error values are not transient, so logged as error expect(ErrorLogger.error).toHaveBeenCalledWith( "scheduler", "monitor_metrics cleanup failed", @@ -820,6 +848,42 @@ describe("notification queue and digest cron (*/1 * * * *)", () => { }) ); }); + + it("logs warning (not error) when processQueuedNotifications fails with transient DB error", async () => { + // Not wrapped in withDbRetry (to prevent duplicate deliveries), but + // logSchedulerError still classifies transient errors as warnings. + mockProcessQueuedNotifications + .mockRejectedValueOnce(new Error("Connection terminated")); + + await startScheduler(); + await runCron("*/1 * * * *"); + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Queued notification processing failed (transient, will retry)"), + expect.objectContaining({ + errorMessage: "Connection terminated", + }) + ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); + }); + + it("logs warning (not error) when processDigestCron fails with transient DB error", async () => { + mockProcessDigestCron + .mockRejectedValueOnce(new Error("Connection terminated")); + + await startScheduler(); + await runCron("*/1 * * * *"); + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Digest processing failed (transient, will retry)"), + expect.objectContaining({ + errorMessage: "Connection terminated", + }) + ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); + }); }); describe("stopScheduler", () => { @@ -931,7 +995,7 @@ describe("withDbRetry and re-entrancy guards", () => { ); }); - it("logs error when retry also fails on transient error", async () => { + it("logs warning when retry also fails on transient error", async () => { mockGetAllActiveMonitors .mockRejectedValueOnce(new Error("Connection terminated")) .mockRejectedValueOnce(new Error("Connection terminated again")); @@ -942,11 +1006,11 @@ describe("withDbRetry and re-entrancy guards", () => { await cronPromise; expect(mockGetAllActiveMonitors).toHaveBeenCalledTimes(2); - expect(ErrorLogger.error).toHaveBeenCalledWith( + // Transient DB errors are downgraded to warnings via logSchedulerError helper + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scheduler", - "Scheduler iteration failed", - expect.any(Error), - expect.objectContaining({ phase: "fetching active monitors" }) + expect.stringContaining("Scheduler iteration failed (transient, will retry)"), + expect.objectContaining({ activeChecks: 0 }) ); }); @@ -1045,6 +1109,34 @@ describe("withDbRetry and re-entrancy guards", () => { resolveRetries([]); await firstRun; }); + + it("logs warning (not error) when webhook processing fails with transient DB error", async () => { + // Both withDbRetry attempts fail with transient error + mockStorage.getPendingWebhookRetries + .mockRejectedValueOnce(new Error("Connection terminated")) + .mockRejectedValueOnce(new Error("Connection terminated")); + + await startScheduler(); + const callbacks = cronCallbacks["*/1 * * * *"]; + await callbacks[0](); // notification cron + const webhookPromise = callbacks[1](); + await vi.advanceTimersByTimeAsync(2000); + await webhookPromise; + + expect(ErrorLogger.warning).toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Webhook retry processing failed (transient, will retry)"), + expect.objectContaining({ + errorMessage: "Connection terminated", + }) + ); + expect(ErrorLogger.error).not.toHaveBeenCalledWith( + "scheduler", + expect.stringContaining("Webhook"), + expect.anything(), + expect.anything() + ); + }); }); describe("webhook retry cumulative backoff", () => { diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 9341150e..d25c04f5 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -7,6 +7,7 @@ import { ErrorLogger } from "./logger"; import { notificationTablesExist } from "./notificationReady"; import { browserlessCircuitBreaker } from "./browserlessCircuitBreaker"; import { ensureMonitorConditionsTable } from "./ensureTables"; +import { isTransientDbError } from "../utils/dbErrors"; import { db } from "../db"; import { sql } from "drizzle-orm"; @@ -21,25 +22,6 @@ let schedulerStarted = false; const cronTasks: ReturnType[] = []; const pendingTimeouts = new Set>(); -/** - * Transient DB errors that are safe to retry (connection drops, pool exhaustion). - * Checks both PostgreSQL error codes (stable across driver versions) and message - * substrings (fallback for connection-level errors that lack a code). - */ -function isTransientDbError(err: unknown): boolean { - if (!(err instanceof Error)) return false; - // PostgreSQL error codes: 08xxx = connection exceptions, 57P01 = admin shutdown - const code = (err as any).code; - if (typeof code === "string" && (/^08/.test(code) || code === "57P01")) return true; - const msg = err.message.toLowerCase(); - return msg.includes("connection terminated") - || msg.includes("connection timeout") - || msg.includes("connection refused") - || msg.includes("econnreset") - || msg.includes("econnrefused") - || msg.includes("cannot acquire") - || msg.includes("timeout expired"); -} /** Retry a DB operation once after a 1 s delay on transient connection errors. */ async function withDbRetry(fn: () => Promise): Promise { @@ -56,6 +38,30 @@ async function withDbRetry(fn: () => Promise): Promise { } } +/** Log a caught error as warning (transient) or error (non-transient) based on isTransientDbError. */ +async function logSchedulerError( + message: string, + error: unknown, + context?: Record, +): Promise { + try { + if (isTransientDbError(error)) { + await ErrorLogger.warning("scheduler", `${message} (transient, will retry)`, { + errorMessage: error instanceof Error ? error.message : String(error), + ...context, + }); + } else { + await ErrorLogger.error("scheduler", message, error instanceof Error ? error : null, { + errorMessage: error instanceof Error ? error.message : String(error), + ...context, + }); + } + } catch { + // If logging itself fails (e.g., logging DB also down), don't mask the original error + console.error(`[Scheduler] Failed to log error: ${message}`, error instanceof Error ? error.message : error); + } +} + /** Schedule a callback with automatic cleanup from pendingTimeouts when it fires. */ function trackTimeout(callback: () => void, delayMs: number): ReturnType { const handle = setTimeout(() => { @@ -227,11 +233,7 @@ export async function startScheduler() { } } } catch (error) { - await ErrorLogger.error("scheduler", "Scheduler iteration failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - activeChecks, - phase: "fetching active monitors", - }); + await logSchedulerError("Scheduler iteration failed", error, { activeChecks, phase: "fetching active monitors" }); } finally { mainCronRunning = false; } @@ -249,18 +251,17 @@ export async function startScheduler() { notificationCronRunning = true; try { try { + // Not wrapped in withDbRetry: these functions deliver notifications + // before marking entries as delivered. Retrying the entire function + // could cause duplicate email/webhook/Slack deliveries. await processQueuedNotifications(); } catch (error) { - await ErrorLogger.error("scheduler", "Queued notification processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); + await logSchedulerError("Queued notification processing failed", error); } try { await processDigestCron(); } catch (error) { - await ErrorLogger.error("scheduler", "Digest processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); + await logSchedulerError("Digest processing failed", error); } } finally { notificationCronRunning = false; @@ -361,9 +362,7 @@ export async function startScheduler() { } } } catch (error) { - await ErrorLogger.error("scheduler", "Webhook retry processing failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - }); + await logSchedulerError("Webhook retry processing failed", error); } finally { webhookCronRunning = false; } @@ -371,52 +370,42 @@ export async function startScheduler() { } // Daily cleanup: prune monitor_metrics older than 90 days to prevent unbounded growth + // All cleanup operations are best-effort background tasks — transient DB failures + // are logged as warnings since the next daily run will catch up. cronTasks.push(cron.schedule("0 3 * * *", async () => { try { - const result = await db.execute( + const result = await withDbRetry(() => db.execute( sql`DELETE FROM monitor_metrics WHERE checked_at < NOW() - INTERVAL '90 days'` - ); + )); const deleted = (result as any).rowCount ?? 0; if (deleted > 0) { console.log(`[Cleanup] Pruned ${deleted} monitor_metrics rows older than 90 days`); } } catch (error) { - await ErrorLogger.error("scheduler", "monitor_metrics cleanup failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 90, - table: "monitor_metrics", - }); + await logSchedulerError("monitor_metrics cleanup failed", error, { retentionDays: 90, table: "monitor_metrics" }); } // Delivery log cleanup: prune entries older than 30 days try { const olderThan = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); - const entriesDeleted = await storage.cleanupOldDeliveryLogs(olderThan); + const entriesDeleted = await withDbRetry(() => storage.cleanupOldDeliveryLogs(olderThan)); if (entriesDeleted > 0) { console.log(`[Cleanup] Pruned ${entriesDeleted} delivery_log rows older than 30 days`); } } catch (error) { - await ErrorLogger.error("scheduler", "delivery_log cleanup failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 30, - table: "delivery_log", - }); + await logSchedulerError("delivery_log cleanup failed", error, { retentionDays: 30, table: "delivery_log" }); } // Notification queue cleanup: prune permanently failed entries older than 7 days try { - const deleted = await storage.cleanupPermanentlyFailedQueueEntries( + const deleted = await withDbRetry(() => storage.cleanupPermanentlyFailedQueueEntries( new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) - ); + )); if (deleted > 0) { console.log(`[Cleanup] Pruned ${deleted} permanently failed notification_queue rows older than 7 days`); } } catch (error) { - await ErrorLogger.error("scheduler", "notification_queue cleanup failed", error instanceof Error ? error : null, { - errorMessage: error instanceof Error ? error.message : String(error), - retentionDays: 7, - table: "notification_queue", - }); + await logSchedulerError("notification_queue cleanup failed", error, { retentionDays: 7, table: "notification_queue" }); } })); diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index e83d5e00..1fd55bc8 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -4044,9 +4044,9 @@ describe("classifyOuterError", () => { expect(result.userMessage).toBe("Connection was reset by the target site"); }); - it("classifies SSRF blocked errors as network error", () => { + it("classifies SSRF blocked errors as ssrf_blocked (not network error)", () => { const result = classifyOuterError(new Error("SSRF blocked: This URL resolves to a private address")); - expect(result.logContext).toBe("network error"); + expect(result.logContext).toBe("ssrf_blocked"); expect(result.userMessage).toBe("URL is not allowed"); }); @@ -4201,9 +4201,9 @@ describe("checkMonitor outer catch resilience", () => { const html = `$49.99`; vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(new Response(html, { status: 200 })); - // Both attempts fail - mockStorage.updateMonitor.mockRejectedValueOnce(new Error("conn reset")); - mockStorage.updateMonitor.mockRejectedValueOnce(new Error("conn reset again")); + // Both attempts fail with transient connection error + mockStorage.updateMonitor.mockRejectedValueOnce(new Error("connection terminated")); + mockStorage.updateMonitor.mockRejectedValueOnce(new Error("connection terminated")); const { ErrorLogger } = await import("./logger"); @@ -4214,20 +4214,20 @@ describe("checkMonitor outer catch resilience", () => { expect(result.currentValue).toBe("$49.99"); expect(result.changed).toBe(true); expect(result.error).toContain("server error prevented saving"); - // Verify enhanced logging includes extracted and previous values - expect(ErrorLogger.error).toHaveBeenCalledWith( + // Transient DB errors are downgraded to warnings (will retry via accelerated retry) + expect(ErrorLogger.warning).toHaveBeenCalledWith( "scraper", expect.stringContaining("check succeeded but failed to save result"), - expect.any(Error), expect.objectContaining({ monitorId: 1, extractedValue: "$49.99", previousValue: "$39.99", changed: true, - dbError: "conn reset", - retryError: "conn reset again", + dbError: "connection terminated", + retryError: "connection terminated", }), ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); }); it("returns result even when ErrorLogger.error rejects in outer catch", async () => { @@ -4262,6 +4262,28 @@ describe("checkMonitor outer catch resilience", () => { // Must NOT be the old generic message expect(result.error).not.toBe("Failed to fetch page"); }); + it("classifyOuterError returns 'network error' for transient network errors", () => { + // Verify the classification that drives the transient/non-transient logging split + const { userMessage, logContext } = classifyOuterError(new Error("Connection terminated due to connection timeout")); + expect(logContext).toBe("network error"); + expect(userMessage).toBe("Page took too long to respond"); + }); + + it("classifyOuterError returns 'database error' for DB-specific errors", () => { + const { logContext } = classifyOuterError(new Error("relation 'monitors' does not exist")); + expect(logContext).toBe("database error"); + }); + + it("classifyOuterError returns 'unclassified error' for non-transient errors", () => { + const { logContext } = classifyOuterError(new Error("Something totally unexpected")); + expect(logContext).toBe("unclassified error"); + }); + + it("classifyOuterError returns 'ssrf_blocked' for SSRF errors (not grouped with network errors)", () => { + const { userMessage, logContext } = classifyOuterError(new Error("SSRF blocked: URL is not allowed")); + expect(logContext).toBe("ssrf_blocked"); + expect(userMessage).toBe("URL is not allowed"); + }); }); // --------------------------------------------------------------------------- @@ -5831,49 +5853,39 @@ describe("extractWithBrowserless error classification in logs", () => { delete process.env.BROWSERLESS_TOKEN; }); - it("logs classified timeout message to ErrorLogger", async () => { + it("does not log to ErrorLogger (caller handles logging)", async () => { mockConnectOverCDP.mockRejectedValue(new Error("Navigation timeout of 30000ms exceeded")); await expect( extractWithBrowserless("https://example.com", ".price", 1, "My Monitor") ).rejects.toThrow(); - expect(ErrorLogger.error).toHaveBeenCalledWith( - "scraper", - expect.stringContaining("took too long"), - expect.any(Error), - expect.objectContaining({ url: "https://example.com" }), - ); + // extractWithBrowserless no longer logs — the caller (checkMonitor) logs + // with fuller context to avoid duplicate error entries. + expect(ErrorLogger.error).not.toHaveBeenCalled(); + expect(ErrorLogger.warning).not.toHaveBeenCalled(); }); - it("logs classified ECONNREFUSED message to ErrorLogger", async () => { + it("re-throws ECONNREFUSED without logging", async () => { mockConnectOverCDP.mockRejectedValue(new Error("connect ECONNREFUSED 127.0.0.1:443")); await expect( extractWithBrowserless("https://example.com", ".price") ).rejects.toThrow(); - expect(ErrorLogger.error).toHaveBeenCalledWith( - "scraper", - expect.stringContaining("refused the connection"), - expect.any(Error), - expect.objectContaining({ url: "https://example.com" }), - ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); + expect(ErrorLogger.warning).not.toHaveBeenCalled(); }); - it("includes monitor name in error label when provided", async () => { + it("re-throws errors without logging even when monitor name provided", async () => { mockConnectOverCDP.mockRejectedValue(new Error("some error")); await expect( extractWithBrowserless("https://example.com", ".price", 1, "Price Tracker") ).rejects.toThrow(); - expect(ErrorLogger.error).toHaveBeenCalledWith( - "scraper", - expect.stringContaining('"Price Tracker"'), - expect.any(Error), - expect.objectContaining({ monitorName: "Price Tracker", monitorId: 1 }), - ); + expect(ErrorLogger.error).not.toHaveBeenCalled(); + expect(ErrorLogger.warning).not.toHaveBeenCalled(); }); }); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index bcef4bff..3201c4a8 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -5,6 +5,7 @@ import { processChangeNotification } from "./notification"; import { ErrorLogger } from "./logger"; import { BrowserlessUsageTracker } from "./browserlessTracker"; import { browserlessCircuitBreaker } from "./browserlessCircuitBreaker"; +import { isTransientDbError } from "../utils/dbErrors"; import { browserPool } from "./browserPool"; import { validateUrlBeforeFetch, ssrfSafeFetch } from "../utils/ssrf"; import { type Monitor, monitorMetrics, monitors } from "@shared/schema"; @@ -486,8 +487,13 @@ export function classifyOuterError(error: unknown): { userMessage: string; logCo return { userMessage: "A temporary server error occurred. The check will be retried automatically.", logContext: "database connection error" }; } + // SSRF blocks — security-relevant, must stay at error level (not transient) + if (/SSRF blocked/i.test(msg)) { + return { userMessage: sanitizeErrorForClient(msg), logContext: "ssrf_blocked" }; + } + // Network errors — delegate to the existing fetch-error sanitizer - if (/abort|timeout|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|ECONNRESET|socket hang up|certificate|ssl|tls|SSRF|UND_ERR_HEADERS_OVERFLOW/i.test(msg)) { + if (/abort|timeout|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|ECONNRESET|socket hang up|certificate|ssl|tls|UND_ERR_HEADERS_OVERFLOW/i.test(msg)) { return { userMessage: sanitizeErrorForClient(msg), logContext: "network error" }; } @@ -952,9 +958,8 @@ export async function extractWithBrowserless(url: string, selector: string, moni }; }, { pageTimeoutMs }); } catch (error) { - const label = monitorName ? `"${monitorName}" — browser` : "Browser"; - const classified = classifyBrowserlessError(error instanceof Error ? error.message : "Unknown error"); - await ErrorLogger.error("scraper", `${label}-based extraction failed: ${classified}`, error instanceof Error ? error : null, { url, selector, ...(monitorId ? { monitorId } : {}), ...(monitorName ? { monitorName } : {}) }); + // Don't log here — the caller (checkMonitor) logs with fuller context. + // Logging here too would create duplicate error entries for every failure. throw error; } } @@ -976,8 +981,8 @@ async function fetchWithCurl(url: string, monitorId?: number, monitorName?: stri const rethrow = isAbort ? new Error("Page took too long to respond (15s timeout)") : error; - const label = monitorName ? `"${monitorName}" — page` : "Page"; - await ErrorLogger.error("scraper", `${label} fetch with curl failed — the site returned an error or is blocking the request. Verify the URL is correct and the site is accessible.`, rethrow instanceof Error ? rethrow : null, { url, ...(monitorId ? { monitorId } : {}), ...(monitorName ? { monitorName } : {}) }); + // Don't log here — this is a fallback fetch. The caller decides + // whether to log based on the overall pipeline outcome. throw rethrow; } finally { clearTimeout(timeout); @@ -1194,7 +1199,21 @@ export async function checkMonitor(monitor: Monitor): Promise<{ } if (lastBrowserlessErr) { - await ErrorLogger.error("scraper", `"${monitor.name}" — rendered page extraction failed. The site may block automated browsers or the page took too long to load. Try simplifying the selector or check if the site requires login.`, lastBrowserlessErr instanceof Error ? lastBrowserlessErr : null, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }); + const rawBrowserlessMsg = lastBrowserlessErr instanceof Error ? lastBrowserlessErr.message : "Unknown error"; + if (/SSRF blocked/i.test(rawBrowserlessMsg)) { + // SSRF blocks are security-relevant — keep at error level + await ErrorLogger.error( + "scraper", + `"${monitor.name}" — rendered page extraction blocked by SSRF protection`, + lastBrowserlessErr instanceof Error ? lastBrowserlessErr : null, + { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }, + ).catch(() => {}); + } else { + // Downgrade to warning: Browserless failures are expected for sites that + // block headless browsers. The circuit breaker and retry logic handle recovery. + const classified = classifyBrowserlessError(rawBrowserlessMsg); + await ErrorLogger.warning("scraper", `"${monitor.name}" — rendered page extraction failed: ${classified}`, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }); + } } const durationMs = Date.now() - startTime; @@ -1338,23 +1357,35 @@ export async function checkMonitor(monitor: Monitor): Promise<{ consecutiveFailures: 0, }); } catch (retryError) { - // Both attempts failed — log with full context + // Both attempts failed. Transient DB errors (connection drops) are + // expected and will self-heal via accelerated retry — log as warning. + // Non-transient errors (schema/constraint) indicate a real problem — log as error. const dbErrMsg = dbError instanceof Error ? dbError.message : String(dbError); const retryErrMsg = retryError instanceof Error ? retryError.message : String(retryError); - await ErrorLogger.error( - "scraper", - `"${monitor.name}" check succeeded but failed to save result`, - dbError instanceof Error ? dbError : null, - { - monitorId: monitor.id, - monitorName: monitor.name, - extractedValue: newValue?.substring(0, 200) ?? null, - previousValue: oldValue?.substring(0, 200) ?? null, - changed, - dbError: dbErrMsg, - retryError: retryErrMsg, - }, - ).catch(() => {}); + const isTransientSave = isTransientDbError(retryError); + const saveContext = { + monitorId: monitor.id, + monitorName: monitor.name, + extractedValue: newValue?.substring(0, 200) ?? null, + previousValue: oldValue?.substring(0, 200) ?? null, + changed, + dbError: dbErrMsg, + retryError: retryErrMsg, + }; + if (isTransientSave) { + await ErrorLogger.warning( + "scraper", + `"${monitor.name}" check succeeded but failed to save result (will retry)`, + saveContext, + ).catch(() => {}); + } else { + await ErrorLogger.error( + "scraper", + `"${monitor.name}" check succeeded but failed to save result`, + retryError instanceof Error ? retryError : null, + saveContext, + ).catch(() => {}); + } saveFailed = true; } @@ -1445,12 +1476,25 @@ export async function checkMonitor(monitor: Monitor): Promise<{ } catch (error) { const { userMessage, logContext } = classifyOuterError(error); - await ErrorLogger.error( - "scraper", - `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`, - error instanceof Error ? error : null, - { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector } - ).catch(() => {}); + // Transient network/connection errors are expected and retried automatically — + // log as warnings to avoid polluting the error log with recoverable conditions. + // Note: "database error" (schema/constraint issues) is NOT transient and stays at error level. + // Note: ENOTFOUND, certificate/ssl/tls errors are permanent misconfigurations, not transient. + // Note: EAI_AGAIN is transient (temporary DNS resolver failure), so it is NOT in this list. + const errMsg = error instanceof Error ? error.message : ""; + const isPermanentNetworkError = /ENOTFOUND|certificate|ssl|tls/i.test(errMsg); + const isTransient = (logContext === "network error" && !isPermanentNetworkError) || logContext === "database connection error"; + const logMessage = `"${monitor.name}" check failed (${logContext}): ${error instanceof Error ? error.message : "Unknown error"}`; + if (isTransient) { + await ErrorLogger.warning("scraper", logMessage, { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector }).catch(() => {}); + } else { + await ErrorLogger.error( + "scraper", + logMessage, + error instanceof Error ? error : null, + { monitorId: monitor.id, monitorName: monitor.name, url: monitor.url, selector: monitor.selector } + ).catch(() => {}); + } try { await handleMonitorFailure(monitor, "error", userMessage, false); diff --git a/server/utils/dbErrors.ts b/server/utils/dbErrors.ts new file mode 100644 index 00000000..1b0d7a0c --- /dev/null +++ b/server/utils/dbErrors.ts @@ -0,0 +1,24 @@ +/** + * Transient DB errors that are safe to retry (connection drops, pool exhaustion). + * Checks both PostgreSQL error codes (stable across driver versions) and message + * substrings (fallback for connection-level errors that lack a code). + * + * Shared between scheduler and scraper to ensure consistent transient classification. + */ +export function isTransientDbError(err: unknown): boolean { + if (!(err instanceof Error)) return false; + // PostgreSQL error codes: 08xxx = connection exceptions, 57P01 = admin shutdown, + // 57P03 = cannot_connect_now, 53300 = too_many_connections (pool exhaustion) + const code = (err as any).code; + if (typeof code === "string" && (/^08/.test(code) || ["57P01", "57P03", "53300"].includes(code))) return true; + const msg = err.message.toLowerCase(); + return msg.includes("connection terminated") + || msg.includes("connection timeout") + || msg.includes("connection refused") + || msg.includes("econnreset") + || msg.includes("econnrefused") + || msg.includes("cannot acquire") + || msg.includes("timeout expired") + || msg.includes("too many clients") + || msg.includes("remaining connection slots"); +}