diff --git a/Areas/Public/Controllers/TilesController.cs b/Areas/Public/Controllers/TilesController.cs index 4b6d09aa..93322731 100644 --- a/Areas/Public/Controllers/TilesController.cs +++ b/Areas/Public/Controllers/TilesController.cs @@ -31,8 +31,10 @@ public class TilesController : Controller /// Retry-After header value (in seconds) sent with HTTP 503 when the outbound budget is exhausted. /// Set to 5s to align with : at 2 tokens/sec /// (ReplenishIntervalMs=500) with BurstCapacity=10, a full burst refills in ~5 seconds. + /// Also exposed to the client via wayfarerTileConfig.retryAfterSeconds so the + /// tile layer can derive its slow-retry interval without hardcoding the value. /// - private const string BudgetRetryAfterSeconds = "5"; + internal const int BudgetRetryAfterSeconds = 5; /// /// Thread-safe dictionary for rate limiting anonymous tile requests by IP address. @@ -163,7 +165,7 @@ public async Task GetTile(int z, int x, int y) if (result.BudgetExhausted) { _logger.LogWarning("Tile budget exhausted for {Z}/{X}/{Y}", z, x, y); - Response.Headers["Retry-After"] = BudgetRetryAfterSeconds; + Response.Headers["Retry-After"] = BudgetRetryAfterSeconds.ToString(); return StatusCode(503, "Tile server busy. Please retry shortly."); } diff --git a/CHANGELOG.md b/CHANGELOG.md index 14724aa2..8713dce5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # CHANGELOG +## [1.2.24] - 2026-03-26 + +### Changed +- Default per-IP outbound budget increased from 30 to 80 cache misses/min — 30 was too low for cold-cache zoom-17 loads (~35 tiles), causing immediate per-IP rejection before retries could succeed (#206) +- Slow retry interval reduced from 30s to 15s — derived from server's `retryAfterSeconds * 3` instead of hardcoded (#206) +- Client-side concurrency pool size now derived from server's `burstCapacity * 0.6` (injected via `wayfarerTileConfig`) instead of hardcoded 6 (#206) +- Slow retry delay now derived from server's `retryAfterSeconds * 3` (injected via `wayfarerTileConfig`) instead of hardcoded 30s (#206) +- `TilesController.BudgetRetryAfterSeconds` changed from `private string` to `internal int` for config injection (#206) +- `TileCacheService.OutboundBurstCapacity` added as public accessor for `OutboundBudget.BurstCapacity` (#206) +- `wayfarerTileConfig` in `_Layout.cshtml` now includes `burstCapacity` and `retryAfterSeconds` from server config (#206) + ## [1.2.23] - 2026-03-26 ### Fixed diff --git a/Models/ApplicationSettings.cs b/Models/ApplicationSettings.cs index 85409696..d146df16 100644 --- a/Models/ApplicationSettings.cs +++ b/Models/ApplicationSettings.cs @@ -16,7 +16,7 @@ public class ApplicationSettings public const string DefaultTileProviderAttribution = "© OpenStreetMap contributors"; public const int DefaultTileRateLimitPerMinute = 600; public const int DefaultTileRateLimitAuthenticatedPerMinute = 2000; - public const int DefaultTileOutboundBudgetPerIpPerMinute = 30; + public const int DefaultTileOutboundBudgetPerIpPerMinute = 80; public const int DefaultProxyImageRateLimitPerMinute = 200; public const int DefaultMaxProxyImageDownloadMB = 50; diff --git a/Services/TileCacheService.cs b/Services/TileCacheService.cs index 0c42e159..e24244c9 100644 --- a/Services/TileCacheService.cs +++ b/Services/TileCacheService.cs @@ -307,6 +307,13 @@ internal static void DrainForTesting() /// public static void StopOutboundBudget() => OutboundBudget.Stop(); + /// + /// Exposes the outbound budget burst capacity for client-side configuration. + /// Injected into wayfarerTileConfig.burstCapacity by _Layout.cshtml so the + /// tile layer can derive its concurrency pool size without hardcoding the value. + /// + public static int OutboundBurstCapacity => OutboundBudget.BurstCapacity; + /// /// Reconciles with the authoritative database sum. /// Called periodically by to correct drift diff --git a/Views/Shared/_Layout.cshtml b/Views/Shared/_Layout.cshtml index ffcc7f97..19a9c814 100644 --- a/Views/Shared/_Layout.cshtml +++ b/Views/Shared/_Layout.cshtml @@ -1,5 +1,7 @@ @using System.Text.Json @using Wayfarer.Models +@using Wayfarer.Services +@using Wayfarer.Areas.Public.Controllers @inject Wayfarer.Parsers.IApplicationSettingsService ApplicationSettingsService @{ bool embed = ViewBag.IsEmbed as bool? ?? false; @@ -199,11 +201,15 @@ var tileAttribution = tileSettings?.TileProviderAttribution ?? ApplicationSettings.DefaultTileProviderAttribution; } diff --git a/wwwroot/js/retryTileLayer.js b/wwwroot/js/retryTileLayer.js index bf05ed3a..5f51dcd6 100644 --- a/wwwroot/js/retryTileLayer.js +++ b/wwwroot/js/retryTileLayer.js @@ -4,20 +4,18 @@ * while treating 404 as permanent failure. * * Concurrency control: - * - Global pool limits concurrent tile fetches (default 6) to prevent overwhelming - * the server's outbound budget (10 burst, 2/sec) and per-IP budget (default 30/min). - * Without this, a cold-cache load at zoom 17 (~35 tiles) sends all requests - * simultaneously, exhausting both budgets and causing cascading 503 failures where - * retries also get rejected (the per-IP counter increments on every request, even - * rejected ones, so the count quickly snowballs past the limit). + * - Global pool limits concurrent tile fetches to prevent overwhelming the server's + * outbound budget and per-IP budget. Pool size is derived from the server's burst + * capacity (injected via wayfarerTileConfig.burstCapacity) — set to 60% of burst + * to leave headroom for other concurrent users. * * Retry strategy (two phases): * - Fast phase: up to 5 retries with exponential backoff (respects Retry-After header) * - Slow phase: if fast retries exhaust on 503 or network error, enters indefinite - * 30-second polling until the tile loads or is removed (panned/zoomed away). - * This handles cold-cache scenarios where the per-IP budget (30/min) is exceeded - * by the number of tiles needed — tiles that can't be served within the fast retry - * window will load once the sliding-window budget decays. + * polling (interval derived from retryAfterSeconds * 3) until the tile loads or is + * removed (panned/zoomed away). This handles cold-cache scenarios where the per-IP + * budget is exceeded by the number of tiles needed — tiles that can't be served + * within the fast retry window will load once the sliding-window budget decays (~60s). * - 404 and other HTTP errors are permanent failures (no retry) * * Design note: upstream HTTP 500/502/504 errors are treated as permanent failures @@ -26,12 +24,14 @@ * up stale retry timers. Users will see gray tiles until upstream recovers. */ +// ---------- Server config (injected by _Layout.cshtml) ---------- +const _config = window.wayfarerTileConfig || {}; + // ---------- Global concurrency pool ---------- -// Limits concurrent tile fetches to prevent overwhelming the server's per-IP outbound -// budget (default 30/min) and global token budget (10 burst, 2/sec). Tiles beyond the -// limit queue client-side and proceed as slots free up, producing the progressive -// "stream-in" effect on cold-cache loads instead of a wall of 503s. -const _poolSize = 6; +// Pool size derived from server's outbound burst capacity: 60% of burst leaves headroom +// for other concurrent users while still allowing a cold-cache load to progress quickly. +// Falls back to 6 if config is unavailable (e.g., inline scripts outside _Layout). +const _poolSize = Math.ceil((_config.burstCapacity || 10) * 0.6); let _inFlight = 0; const _waiting = []; @@ -77,11 +77,18 @@ const _releaseSlot = () => { } }; +// ---------- Retry timing derived from server config ---------- +// retryAfterSeconds is the Retry-After value the server sends on 503 (matches the budget +// replenishment cycle). Slow retry uses 3x that interval to give the per-IP sliding window +// time to decay between attempts. Falls back to 5s if config unavailable. +const _retryAfterSeconds = _config.retryAfterSeconds || 5; +const _defaultSlowRetryDelayMs = _retryAfterSeconds * 3 * 1000; + const RetryTileLayer = L.TileLayer.extend({ options: { maxRetries: 5, retryDelayMs: 1000, - slowRetryDelayMs: 30000, + slowRetryDelayMs: _defaultSlowRetryDelayMs, }, /** @@ -139,9 +146,9 @@ const RetryTileLayer = L.TileLayer.extend({ /** * Schedules a slow-phase retry for a tile whose fast retries have been exhausted. - * Fires every slowRetryDelayMs (default 30s) indefinitely until the tile either - * loads successfully or is removed (signal aborted). Resets the attempt counter - * to 0 so the tile gets a fresh fast-retry cycle on each slow-phase trigger. + * Fires every slowRetryDelayMs (derived from retryAfterSeconds * 3) indefinitely + * until the tile either loads successfully or is removed (signal aborted). Resets + * the attempt counter to 0 so the tile gets a fresh fast-retry cycle on each trigger. * @param {string} url - The tile URL. * @param {HTMLImageElement} tile - The tile image element. * @param {Function} done - Leaflet callback to signal completion. @@ -164,8 +171,9 @@ const RetryTileLayer = L.TileLayer.extend({ * Fetches a tile via fetch(), retries on 503 or network error with backoff. * Two retry phases: * - Fast: attempts 0..maxRetries with exponential backoff (seconds) - * - Slow: after fast retries exhaust on 503/network error, retries every 30s - * indefinitely until the tile loads or is removed + * - Slow: after fast retries exhaust on 503/network error, retries every ~15s + * (derived from server's retryAfterSeconds * 3) indefinitely until the tile + * loads or is removed * Acquires a concurrency slot before each fetch attempt to prevent overwhelming * the server's budget. Respects AbortSignal so removed tiles stop immediately. * @param {string} url - The tile URL. @@ -234,7 +242,7 @@ const RetryTileLayer = L.TileLayer.extend({ } // Slow phase: fast retries exhausted but 503 is transient (budget will - // recover). Keep retrying every ~30s until the tile loads or is removed. + // recover). Keep retrying until the tile loads or is removed. layer._scheduleSlowRetry(url, tile, done, signal); return; } @@ -272,13 +280,12 @@ const RetryTileLayer = L.TileLayer.extend({ * Reads URL and attribution from window.wayfarerTileConfig (injected by _Layout.cshtml). * @param {Object} [opts] - Additional L.TileLayer options to merge. Supports standard Leaflet * options (e.g., {zoomAnimation: true}) plus retry tuning: maxRetries (default 5), - * retryDelayMs (default 1000), slowRetryDelayMs (default 30000). + * retryDelayMs (default 1000), slowRetryDelayMs (derived from retryAfterSeconds * 3). * @returns {L.TileLayer} The tile layer instance (call .addTo(map) on the result). */ export const createTileLayer = (opts) => { - const config = window.wayfarerTileConfig || {}; - const url = config.tilesUrl || (window.location.origin + '/Public/tiles/{z}/{x}/{y}.png'); - const attribution = config.attribution || '\u00a9 OpenStreetMap contributors'; + const url = _config.tilesUrl || (window.location.origin + '/Public/tiles/{z}/{x}/{y}.png'); + const attribution = _config.attribution || '\u00a9 OpenStreetMap contributors'; return new RetryTileLayer(url, Object.assign({ maxZoom: 19, attribution: attribution,