From 7dbe62b01854f6394fe89f90b43ebe31a6460c85 Mon Sep 17 00:00:00 2001 From: Mauro Antonio Sanz Date: Thu, 26 Mar 2026 20:22:03 -0300 Subject: [PATCH] feat: add connection pooling and transient error handling for stale HTTP connections (FME-14487) Use SocketsHttpHandler on .NET 5+ with PooledConnectionLifetime (10min) and PooledConnectionIdleTimeout (30s) to prevent stale keep-alive connections causing "response ended prematurely" errors on POST requests. Also catch transient HttpRequestException/IOException as a warning instead of error. Includes integration tests with a PrematureCloseServer to reproduce and verify the stale connection scenarios. --- .../Services/Common/SplitioHttpClient.cs | 21 ++ .../StaleConnectionTests.cs | 288 ++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 tests/Splitio.Integration-events-tests/StaleConnectionTests.cs diff --git a/src/Splitio/Services/Common/SplitioHttpClient.cs b/src/Splitio/Services/Common/SplitioHttpClient.cs index 32a96216..e52c456f 100644 --- a/src/Splitio/Services/Common/SplitioHttpClient.cs +++ b/src/Splitio/Services/Common/SplitioHttpClient.cs @@ -28,10 +28,27 @@ public SplitioHttpClient(string apiKey, #if NET45 ServicePointManager.SecurityProtocol = (SecurityProtocolType)12288 | (SecurityProtocolType)3072; #endif + +#if NET5_0_OR_GREATER + // Use SocketsHttpHandler on modern .NET to configure connection pooling. + // This prevents "The response ended prematurely" (HttpIOException) errors + // caused by stale keep-alive connections. When the server or load balancer + // closes an idle connection, the client may try to reuse it for a POST request. + // .NET does NOT auto-retry POST requests on connection failures (unlike GET), + // so the request fails. PooledConnectionLifetime and PooledConnectionIdleTimeout + // ensure connections are recycled before server-side timeouts close them. + var handler = new SocketsHttpHandler() + { + AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate, + PooledConnectionLifetime = TimeSpan.FromMinutes(10), + PooledConnectionIdleTimeout = TimeSpan.FromSeconds(30), + }; +#else var handler = new HttpClientHandler() { AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate, }; +#endif if (!string.IsNullOrEmpty(config.ProxyHost)) { @@ -103,6 +120,10 @@ public async Task PostAsync(string url, string data) result.IsSuccessStatusCode = response.IsSuccessStatusCode; } } + catch (HttpRequestException e) when (e.InnerException is System.IO.IOException) + { + _log.Warn($"Transient connection error executing POST {url}. The request will be retried.", e); + } catch (Exception e) { _log.Error(string.Format("Exception caught executing POST {0}", url), e); diff --git a/tests/Splitio.Integration-events-tests/StaleConnectionTests.cs b/tests/Splitio.Integration-events-tests/StaleConnectionTests.cs new file mode 100644 index 00000000..78acfa3c --- /dev/null +++ b/tests/Splitio.Integration-events-tests/StaleConnectionTests.cs @@ -0,0 +1,288 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Moq; +using Splitio.Domain; +using Splitio.Services.Common; +using Splitio.Services.Impressions.Classes; +using Splitio.Services.Shared.Classes; +using Splitio.Services.Shared.Interfaces; +using Splitio.Telemetry.Storages; +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Sockets; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Splitio.Integration_events_tests +{ + /// + /// Tests that reproduce the "response ended prematurely" (HttpIOException / IOException) + /// error caused by server-side connection closures on keep-alive connections. + /// + /// Root cause: When using HTTP keep-alive, if the server closes an idle connection + /// while the client's connection pool still considers it active, the next POST request + /// sent on that stale connection will fail with "The response ended prematurely". + /// + /// .NET does NOT auto-retry POST requests (non-idempotent) on connection failures, + /// unlike GET requests which are retried up to 3 times automatically. + /// + /// See: https://github.com/dotnet/runtime/issues/125489 + /// https://github.com/dotnet/runtime/issues/122583 + /// + [TestClass, TestCategory("Integration")] + public class StaleConnectionTests + { +#if NET5_0_OR_GREATER + [TestMethod] + public async Task PostAsync_WhenServerClosesWithoutResponding_ReturnsFailedResult() + { + // Arrange: A server that reads the POST request but closes the connection + // without sending a response. This simulates what happens when a load balancer + // or server closes a keep-alive connection while the client is sending a request. + using var server = new PrematureCloseServer(failFirstNRequests: int.MaxValue); + _ = Task.Run(() => server.AcceptConnectionsAsync()); + + var config = new SelfRefreshingConfig + { + HttpConnectionTimeout = 5000, + HttpReadTimeout = 5000 + }; + using var httpClient = new SplitioHttpClient("test-api-key", config, new Dictionary()); + + // Act + var result = await httpClient.PostAsync( + $"{server.Url}/api/testImpressions/bulk", + "[{\"f\":\"feature\",\"i\":[{\"k\":\"key\",\"t\":\"on\",\"m\":1}]}]"); + + // Assert: PostAsync catches the exception and returns a failed result + Assert.IsFalse(result.IsSuccessStatusCode, + "PostAsync should return failed result when server closes connection prematurely"); + Assert.AreEqual(0, (int)result.StatusCode, + "StatusCode should be 0 (default) since no HTTP response was received"); + } + + [TestMethod] + public async Task PostAsync_KeepAlive_SecondRequestFailsOnStaleConnection() + { + // Arrange: Server responds to the first POST, then closes the connection + // without responding to the second POST. This is the exact scenario the + // customer (Compare the Market) experiences: the keep-alive connection + // goes stale between impression flush intervals. + using var server = new PrematureCloseServer(failFirstNRequests: 0, respondOnSameConnection: 1); + _ = Task.Run(() => server.AcceptConnectionsAsync()); + + var config = new SelfRefreshingConfig + { + HttpConnectionTimeout = 5000, + HttpReadTimeout = 5000 + }; + using var httpClient = new SplitioHttpClient("test-api-key", config, new Dictionary()); + + var url = $"{server.Url}/api/testImpressions/bulk"; + var payload = "[{\"f\":\"feature\",\"i\":[{\"k\":\"key\",\"t\":\"on\",\"m\":1}]}]"; + + // Act: First POST succeeds + var result1 = await httpClient.PostAsync(url, payload); + Assert.IsTrue(result1.IsSuccessStatusCode, "First POST should succeed"); + + // Act: Second POST should fail because the server closes the keep-alive connection + var result2 = await httpClient.PostAsync(url, payload); + + // Assert: Second request fails due to stale connection + Assert.IsFalse(result2.IsSuccessStatusCode, + "Second POST should fail when server closes the keep-alive connection"); + } + + [TestMethod] + public async Task SendBulkImpressionsAsync_WhenFirstAttemptFails_RetrySucceedsOnNewConnection() + { + // Arrange: Server fails the first request (closes without responding), + // but succeeds on the second (retry opens a new connection). + // This tests that the retry loop in BuildJsonAndPostAsync recovers + // from transient connection failures. + using var server = new PrematureCloseServer(failFirstNRequests: 1); + _ = Task.Run(() => server.AcceptConnectionsAsync()); + + var config = new SelfRefreshingConfig + { + HttpConnectionTimeout = 5000, + HttpReadTimeout = 5000 + }; + using var httpClient = new SplitioHttpClient("test-api-key", config, new Dictionary()); + + var telemetry = new Mock(); + var wrapperAdapter = WrapperAdapter.Instance(); + + var impressions = new List + { + new KeyImpression("key-1", "feature-1", "on", 1, 1, "label", "bucket", false) + }; + + var apiClient = new ImpressionsSdkApiClient( + httpClient, telemetry.Object, server.Url, wrapperAdapter, 5000); + + // Act: SendBulkImpressionsAsync should retry and succeed + await apiClient.SendBulkImpressionsAsync(impressions); + + // Assert: Server received 2 requests (1 failed + 1 retry succeeded) + // Allow a brief moment for the server to process + await Task.Delay(500); + Assert.AreEqual(2, server.TotalRequestsReceived, + "Server should receive 2 requests: 1 failed attempt + 1 successful retry"); + Assert.AreEqual(1, server.SuccessfulResponses, + "Server should have sent 1 successful response (on the retry)"); + } +#endif + + #region Helper: PrematureCloseServer + + /// + /// A minimal TCP server that simulates premature connection closures. + /// + /// It accepts HTTP POST requests and can be configured to: + /// - Close the connection without responding (simulating server-side keep-alive timeout) + /// - Respond normally to some requests before closing + /// + /// This is used instead of WireMock because WireMock doesn't support + /// simulating mid-connection closures after reading the request body. + /// + private class PrematureCloseServer : IDisposable + { + private readonly TcpListener _listener; + private readonly int _failFirstNRequests; + private readonly int _respondOnSameConnection; + private int _globalRequestCount; + private int _totalRequestsReceived; + private int _successfulResponses; + + /// + /// Number of initial requests (globally, across all connections) to fail + /// by closing the connection without responding. Subsequent requests get 200 OK. + /// + /// + /// Number of requests to respond to on the same keep-alive connection before + /// closing it. Use this to simulate the keep-alive stale connection scenario. + /// When set to > 0, overrides failFirstNRequests for per-connection behavior. + /// Default: 0 (use failFirstNRequests for global behavior). + /// + public PrematureCloseServer(int failFirstNRequests, int respondOnSameConnection = 0) + { + _failFirstNRequests = failFirstNRequests; + _respondOnSameConnection = respondOnSameConnection; + _listener = new TcpListener(IPAddress.Loopback, 0); + _listener.Start(); + } + + public string Url => $"http://localhost:{((IPEndPoint)_listener.LocalEndpoint).Port}"; + public int TotalRequestsReceived => _totalRequestsReceived; + public int SuccessfulResponses => _successfulResponses; + + public async Task AcceptConnectionsAsync() + { + try + { + while (true) + { + var client = await _listener.AcceptTcpClientAsync(); + _ = HandleClientAsync(client); + } + } + catch (ObjectDisposedException) { } + catch (SocketException) { } + } + + private async Task HandleClientAsync(TcpClient client) + { + int perConnectionCount = 0; + + using (client) + { + var stream = client.GetStream(); + + while (true) + { + // Read HTTP request line + var requestLine = await ReadLineAsync(stream); + if (string.IsNullOrEmpty(requestLine)) break; + + // Read headers + int contentLength = 0; + string line; + while ((line = await ReadLineAsync(stream)) != "") + { + if (line.StartsWith("Content-Length:", StringComparison.OrdinalIgnoreCase)) + contentLength = int.Parse(line.Substring("Content-Length:".Length).Trim()); + } + + // Read body + if (contentLength > 0) + { + var body = new byte[contentLength]; + int totalRead = 0; + while (totalRead < contentLength) + { + var read = await stream.ReadAsync(body, totalRead, contentLength - totalRead); + if (read == 0) break; + totalRead += read; + } + } + + Interlocked.Increment(ref _totalRequestsReceived); + perConnectionCount++; + + bool shouldRespond; + if (_respondOnSameConnection > 0) + { + // Per-connection mode: respond to first N requests on this connection + shouldRespond = perConnectionCount <= _respondOnSameConnection; + } + else + { + // Global mode: fail first N requests across all connections + var globalCount = Interlocked.Increment(ref _globalRequestCount); + shouldRespond = globalCount > _failFirstNRequests; + } + + if (shouldRespond) + { + var response = "HTTP/1.1 200 OK\r\nContent-Length: 2\r\nConnection: keep-alive\r\n\r\nok"; + var responseBytes = Encoding.UTF8.GetBytes(response); + await stream.WriteAsync(responseBytes, 0, responseBytes.Length); + await stream.FlushAsync(); + Interlocked.Increment(ref _successfulResponses); + } + else + { + // Close without responding — simulates premature connection close + break; + } + } + } + } + + private static async Task ReadLineAsync(NetworkStream stream) + { + var sb = new StringBuilder(); + var buffer = new byte[1]; + while (true) + { + int read; + try { read = await stream.ReadAsync(buffer, 0, 1); } + catch { return sb.ToString(); } + if (read == 0) return sb.ToString(); + var c = (char)buffer[0]; + if (c == '\n') return sb.ToString().TrimEnd('\r'); + sb.Append(c); + } + } + + public void Dispose() + { + _listener.Stop(); + } + } + + #endregion + } +}