Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Areas/Public/Controllers/TilesController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ public class TilesController : Controller
/// Retry-After header value (in seconds) sent with HTTP 503 when the outbound budget is exhausted.
/// Set to 5s to align with <see cref="TileCacheService.OutboundBudget"/>: at 2 tokens/sec
/// (ReplenishIntervalMs=500) with BurstCapacity=10, a full burst refills in ~5 seconds.
/// Also exposed to the client via <c>wayfarerTileConfig.retryAfterSeconds</c> so the
/// tile layer can derive its slow-retry interval without hardcoding the value.
/// </summary>
private const string BudgetRetryAfterSeconds = "5";
internal const int BudgetRetryAfterSeconds = 5;

/// <summary>
/// Thread-safe dictionary for rate limiting anonymous tile requests by IP address.
Expand Down Expand Up @@ -163,7 +165,7 @@ public async Task<IActionResult> GetTile(int z, int x, int y)
if (result.BudgetExhausted)
{
_logger.LogWarning("Tile budget exhausted for {Z}/{X}/{Y}", z, x, y);
Response.Headers["Retry-After"] = BudgetRetryAfterSeconds;
Response.Headers["Retry-After"] = BudgetRetryAfterSeconds.ToString();
return StatusCode(503, "Tile server busy. Please retry shortly.");
}

Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# CHANGELOG

## [1.2.24] - 2026-03-26

### Changed
- Default per-IP outbound budget increased from 30 to 80 cache misses/min — 30 was too low for cold-cache zoom-17 loads (~35 tiles), causing immediate per-IP rejection before retries could succeed (#206)
- Slow retry interval reduced from 30s to 15s — derived from server's `retryAfterSeconds * 3` instead of hardcoded (#206)
- Client-side concurrency pool size now derived from server's `burstCapacity * 0.6` (injected via `wayfarerTileConfig`) instead of hardcoded 6 (#206)
- Slow retry delay now derived from server's `retryAfterSeconds * 3` (injected via `wayfarerTileConfig`) instead of hardcoded 30s (#206)
- `TilesController.BudgetRetryAfterSeconds` changed from `private string` to `internal int` for config injection (#206)
- `TileCacheService.OutboundBurstCapacity` added as public accessor for `OutboundBudget.BurstCapacity` (#206)
- `wayfarerTileConfig` in `_Layout.cshtml` now includes `burstCapacity` and `retryAfterSeconds` from server config (#206)

## [1.2.23] - 2026-03-26

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion Models/ApplicationSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class ApplicationSettings
public const string DefaultTileProviderAttribution = "&copy; OpenStreetMap contributors";
public const int DefaultTileRateLimitPerMinute = 600;
public const int DefaultTileRateLimitAuthenticatedPerMinute = 2000;
public const int DefaultTileOutboundBudgetPerIpPerMinute = 30;
public const int DefaultTileOutboundBudgetPerIpPerMinute = 80;
public const int DefaultProxyImageRateLimitPerMinute = 200;
public const int DefaultMaxProxyImageDownloadMB = 50;

Expand Down
7 changes: 7 additions & 0 deletions Services/TileCacheService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,13 @@ internal static void DrainForTesting()
/// </summary>
public static void StopOutboundBudget() => OutboundBudget.Stop();

/// <summary>
/// Exposes the outbound budget burst capacity for client-side configuration.
/// Injected into <c>wayfarerTileConfig.burstCapacity</c> by _Layout.cshtml so the
/// tile layer can derive its concurrency pool size without hardcoding the value.
/// </summary>
public static int OutboundBurstCapacity => OutboundBudget.BurstCapacity;

/// <summary>
/// Reconciles <see cref="_currentCacheSize"/> with the authoritative database sum.
/// Called periodically by <see cref="Wayfarer.Jobs.RateLimitCleanupJob"/> to correct drift
Expand Down
10 changes: 8 additions & 2 deletions Views/Shared/_Layout.cshtml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
@using System.Text.Json
@using Wayfarer.Models
@using Wayfarer.Services
@using Wayfarer.Areas.Public.Controllers
@inject Wayfarer.Parsers.IApplicationSettingsService ApplicationSettingsService
@{
bool embed = ViewBag.IsEmbed as bool? ?? false;
Expand Down Expand Up @@ -199,11 +201,15 @@
var tileAttribution = tileSettings?.TileProviderAttribution ?? ApplicationSettings.DefaultTileProviderAttribution;
}
<script>
// Expose tile attribution for Leaflet maps that use the tile cache proxy.
// Expose tile configuration for Leaflet maps that use the tile cache proxy.
// burstCapacity and retryAfterSeconds are used by retryTileLayer.js to derive
// the concurrency pool size and slow-retry interval from actual server config.
window.wayfarerTileConfig = @Html.Raw(JsonSerializer.Serialize(new
{
tilesUrl = "/Public/tiles/{z}/{x}/{y}.png",
attribution = tileAttribution
attribution = tileAttribution,
burstCapacity = TileCacheService.OutboundBurstCapacity,
retryAfterSeconds = TilesController.BudgetRetryAfterSeconds
}));
</script>

Expand Down
59 changes: 33 additions & 26 deletions wwwroot/js/retryTileLayer.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,18 @@
* while treating 404 as permanent failure.
*
* Concurrency control:
* - Global pool limits concurrent tile fetches (default 6) to prevent overwhelming
* the server's outbound budget (10 burst, 2/sec) and per-IP budget (default 30/min).
* Without this, a cold-cache load at zoom 17 (~35 tiles) sends all requests
* simultaneously, exhausting both budgets and causing cascading 503 failures where
* retries also get rejected (the per-IP counter increments on every request, even
* rejected ones, so the count quickly snowballs past the limit).
* - Global pool limits concurrent tile fetches to prevent overwhelming the server's
* outbound budget and per-IP budget. Pool size is derived from the server's burst
* capacity (injected via wayfarerTileConfig.burstCapacity) — set to 60% of burst
* to leave headroom for other concurrent users.
*
* Retry strategy (two phases):
* - Fast phase: up to 5 retries with exponential backoff (respects Retry-After header)
* - Slow phase: if fast retries exhaust on 503 or network error, enters indefinite
* 30-second polling until the tile loads or is removed (panned/zoomed away).
* This handles cold-cache scenarios where the per-IP budget (30/min) is exceeded
* by the number of tiles needed — tiles that can't be served within the fast retry
* window will load once the sliding-window budget decays.
* polling (interval derived from retryAfterSeconds * 3) until the tile loads or is
* removed (panned/zoomed away). This handles cold-cache scenarios where the per-IP
* budget is exceeded by the number of tiles needed — tiles that can't be served
* within the fast retry window will load once the sliding-window budget decays (~60s).
* - 404 and other HTTP errors are permanent failures (no retry)
*
* Design note: upstream HTTP 500/502/504 errors are treated as permanent failures
Expand All @@ -26,12 +24,14 @@
* up stale retry timers. Users will see gray tiles until upstream recovers.
*/

// ---------- Server config (injected by _Layout.cshtml) ----------
const _config = window.wayfarerTileConfig || {};

// ---------- Global concurrency pool ----------
// Limits concurrent tile fetches to prevent overwhelming the server's per-IP outbound
// budget (default 30/min) and global token budget (10 burst, 2/sec). Tiles beyond the
// limit queue client-side and proceed as slots free up, producing the progressive
// "stream-in" effect on cold-cache loads instead of a wall of 503s.
const _poolSize = 6;
// Pool size derived from server's outbound burst capacity: 60% of burst leaves headroom
// for other concurrent users while still allowing a cold-cache load to progress quickly.
// Falls back to 6 if config is unavailable (e.g., inline scripts outside _Layout).
const _poolSize = Math.ceil((_config.burstCapacity || 10) * 0.6);
let _inFlight = 0;
const _waiting = [];

Expand Down Expand Up @@ -77,11 +77,18 @@ const _releaseSlot = () => {
}
};

// ---------- Retry timing derived from server config ----------
// retryAfterSeconds is the Retry-After value the server sends on 503 (matches the budget
// replenishment cycle). Slow retry uses 3x that interval to give the per-IP sliding window
// time to decay between attempts. Falls back to 5s if config unavailable.
const _retryAfterSeconds = _config.retryAfterSeconds || 5;
const _defaultSlowRetryDelayMs = _retryAfterSeconds * 3 * 1000;

const RetryTileLayer = L.TileLayer.extend({
options: {
maxRetries: 5,
retryDelayMs: 1000,
slowRetryDelayMs: 30000,
slowRetryDelayMs: _defaultSlowRetryDelayMs,
},

/**
Expand Down Expand Up @@ -139,9 +146,9 @@ const RetryTileLayer = L.TileLayer.extend({

/**
* Schedules a slow-phase retry for a tile whose fast retries have been exhausted.
* Fires every slowRetryDelayMs (default 30s) indefinitely until the tile either
* loads successfully or is removed (signal aborted). Resets the attempt counter
* to 0 so the tile gets a fresh fast-retry cycle on each slow-phase trigger.
* Fires every slowRetryDelayMs (derived from retryAfterSeconds * 3) indefinitely
* until the tile either loads successfully or is removed (signal aborted). Resets
* the attempt counter to 0 so the tile gets a fresh fast-retry cycle on each trigger.
* @param {string} url - The tile URL.
* @param {HTMLImageElement} tile - The tile image element.
* @param {Function} done - Leaflet callback to signal completion.
Expand All @@ -164,8 +171,9 @@ const RetryTileLayer = L.TileLayer.extend({
* Fetches a tile via fetch(), retries on 503 or network error with backoff.
* Two retry phases:
* - Fast: attempts 0..maxRetries with exponential backoff (seconds)
* - Slow: after fast retries exhaust on 503/network error, retries every 30s
* indefinitely until the tile loads or is removed
* - Slow: after fast retries exhaust on 503/network error, retries every ~15s
* (derived from server's retryAfterSeconds * 3) indefinitely until the tile
* loads or is removed
* Acquires a concurrency slot before each fetch attempt to prevent overwhelming
* the server's budget. Respects AbortSignal so removed tiles stop immediately.
* @param {string} url - The tile URL.
Expand Down Expand Up @@ -234,7 +242,7 @@ const RetryTileLayer = L.TileLayer.extend({
}

// Slow phase: fast retries exhausted but 503 is transient (budget will
// recover). Keep retrying every ~30s until the tile loads or is removed.
// recover). Keep retrying until the tile loads or is removed.
layer._scheduleSlowRetry(url, tile, done, signal);
return;
}
Expand Down Expand Up @@ -272,13 +280,12 @@ const RetryTileLayer = L.TileLayer.extend({
* Reads URL and attribution from window.wayfarerTileConfig (injected by _Layout.cshtml).
* @param {Object} [opts] - Additional L.TileLayer options to merge. Supports standard Leaflet
* options (e.g., {zoomAnimation: true}) plus retry tuning: maxRetries (default 5),
* retryDelayMs (default 1000), slowRetryDelayMs (default 30000).
* retryDelayMs (default 1000), slowRetryDelayMs (derived from retryAfterSeconds * 3).
* @returns {L.TileLayer} The tile layer instance (call .addTo(map) on the result).
*/
export const createTileLayer = (opts) => {
const config = window.wayfarerTileConfig || {};
const url = config.tilesUrl || (window.location.origin + '/Public/tiles/{z}/{x}/{y}.png');
const attribution = config.attribution || '\u00a9 OpenStreetMap contributors';
const url = _config.tilesUrl || (window.location.origin + '/Public/tiles/{z}/{x}/{y}.png');
const attribution = _config.attribution || '\u00a9 OpenStreetMap contributors';
return new RetryTileLayer(url, Object.assign({
maxZoom: 19,
attribution: attribution,
Expand Down
Loading