From fd743fc9d3ed27c20bdff7bc224adea517cb2038 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 30 Apr 2026 14:42:56 +0000
Subject: [PATCH] Add App Insights AI Monitoring Agent with telemetry
integration
- Add Shared.Monitoring library with Application Insights SDK integration,
custom telemetry initializers, processors, and service metrics collection
- Add Monitoring.Agent service with AI-powered anomaly detection engine,
health scoring engine, and insights generator
- Integrate App Insights monitoring into all microservices (Identity,
Customer, Order, Product, Notification) and API Gateway
- Fix incorrect relative paths to Shared projects in service csproj files
(../../Shared -> ../../../Shared)
- Add REST API endpoints for monitoring dashboard, anomaly reports,
AI insights, and service health summaries
- Add background service for periodic telemetry collection and analysis
---
src/ApiGateway/ApiGateway.csproj | 3 +
src/ApiGateway/Program.cs | 5 +
src/Microservices.sln | 10 +
.../Customer/Customer.API/Customer.API.csproj | 5 +-
src/Services/Customer/Customer.API/Program.cs | 4 +
.../Identity/Identity.API/Identity.API.csproj | 5 +-
src/Services/Identity/Identity.API/Program.cs | 4 +
.../Controllers/MonitoringController.cs | 141 ++++++++++++
.../Monitoring.Agent/Models/AnomalyReport.cs | 95 ++++++++
.../Monitoring.Agent/Monitoring.Agent.csproj | 15 ++
.../Monitoring/Monitoring.Agent/Program.cs | 40 ++++
.../Services/AiInsightsEngine.cs | 181 +++++++++++++++
.../Services/AnomalyDetectionEngine.cs | 172 +++++++++++++++
.../Services/HealthScoringEngine.cs | 102 +++++++++
.../Services/MonitoringBackgroundService.cs | 118 ++++++++++
.../Services/ServiceHealthAggregator.cs | 206 ++++++++++++++++++
.../Monitoring.Agent/appsettings.json | 32 +++
.../Notification.API/Notification.API.csproj | 5 +-
.../Notification/Notification.API/Program.cs | 4 +
src/Services/Order/Order.API/Order.API.csproj | 5 +-
src/Services/Order/Order.API/Program.cs | 4 +
.../Product/Product.API/Product.API.csproj | 5 +-
src/Services/Product/Product.API/Program.cs | 4 +
.../Shared.Monitoring/AppInsightsConfig.cs | 25 +++
.../AppInsightsServiceExtensions.cs | 67 ++++++
.../HealthChecks/AppInsightsHealthCheck.cs | 42 ++++
.../Metrics/ServiceMetricsCollector.cs | 132 +++++++++++
.../Middleware/AiTelemetryMiddleware.cs | 55 +++++
.../Shared.Monitoring.csproj | 12 +
.../AiDiagnosticTelemetryProcessor.cs | 66 ++++++
.../CorrelationTelemetryInitializer.cs | 34 +++
.../Telemetry/ServiceTelemetryInitializer.cs | 26 +++
32 files changed, 1614 insertions(+), 10 deletions(-)
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Controllers/MonitoringController.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Models/AnomalyReport.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Monitoring.Agent.csproj
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Program.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Services/AiInsightsEngine.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Services/AnomalyDetectionEngine.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Services/HealthScoringEngine.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Services/MonitoringBackgroundService.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/Services/ServiceHealthAggregator.cs
create mode 100644 src/Services/Monitoring/Monitoring.Agent/appsettings.json
create mode 100644 src/Shared/Shared.Monitoring/AppInsightsConfig.cs
create mode 100644 src/Shared/Shared.Monitoring/Extensions/AppInsightsServiceExtensions.cs
create mode 100644 src/Shared/Shared.Monitoring/HealthChecks/AppInsightsHealthCheck.cs
create mode 100644 src/Shared/Shared.Monitoring/Metrics/ServiceMetricsCollector.cs
create mode 100644 src/Shared/Shared.Monitoring/Middleware/AiTelemetryMiddleware.cs
create mode 100644 src/Shared/Shared.Monitoring/Shared.Monitoring.csproj
create mode 100644 src/Shared/Shared.Monitoring/Telemetry/AiDiagnosticTelemetryProcessor.cs
create mode 100644 src/Shared/Shared.Monitoring/Telemetry/CorrelationTelemetryInitializer.cs
create mode 100644 src/Shared/Shared.Monitoring/Telemetry/ServiceTelemetryInitializer.cs
diff --git a/src/ApiGateway/ApiGateway.csproj b/src/ApiGateway/ApiGateway.csproj
index 6ac003b..8387f9f 100644
--- a/src/ApiGateway/ApiGateway.csproj
+++ b/src/ApiGateway/ApiGateway.csproj
@@ -6,6 +6,9 @@
enable
+
+
+
diff --git a/src/ApiGateway/Program.cs b/src/ApiGateway/Program.cs
index 89d4860..c494126 100644
--- a/src/ApiGateway/Program.cs
+++ b/src/ApiGateway/Program.cs
@@ -1,3 +1,5 @@
+using Shared.Monitoring.Extensions;
+
var builder = WebApplication.CreateBuilder(args);
builder.Services.AddReverseProxy()
@@ -5,8 +7,11 @@
builder.Services.AddHealthChecks();
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "ApiGateway");
+
var app = builder.Build();
+app.UseAppInsightsMonitoring();
app.MapReverseProxy();
app.MapHealthChecks("/healthz");
diff --git a/src/Microservices.sln b/src/Microservices.sln
index bf2eaa1..37b169f 100644
--- a/src/Microservices.sln
+++ b/src/Microservices.sln
@@ -61,6 +61,13 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Shared.Contracts", "Shared\
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Shared.Infrastructure", "Shared\Shared.Infrastructure\Shared.Infrastructure.csproj", "{D0000002-0000-0000-0000-000000000001}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Shared.Monitoring", "Shared\Shared.Monitoring\Shared.Monitoring.csproj", "{D0000003-0000-0000-0000-000000000001}"
+EndProject
+
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Monitoring", "Monitoring", "{A1B2C3D4-0008-0000-0000-000000000001}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Monitoring.Agent", "Services\Monitoring\Monitoring.Agent\Monitoring.Agent.csproj", "{B6000001-0000-0000-0000-000000000001}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -88,5 +95,8 @@ Global
{B5000001-0000-0000-0000-000000000001} = {A1B2C3D4-0006-0000-0000-000000000001}
{B5000002-0000-0000-0000-000000000001} = {A1B2C3D4-0006-0000-0000-000000000001}
{B5000003-0000-0000-0000-000000000001} = {A1B2C3D4-0006-0000-0000-000000000001}
+ {D0000003-0000-0000-0000-000000000001} = {A1B2C3D4-0007-0000-0000-000000000001}
+ {A1B2C3D4-0008-0000-0000-000000000001} = {A1B2C3D4-0001-0000-0000-000000000001}
+ {B6000001-0000-0000-0000-000000000001} = {A1B2C3D4-0008-0000-0000-000000000001}
EndGlobalSection
EndGlobal
diff --git a/src/Services/Customer/Customer.API/Customer.API.csproj b/src/Services/Customer/Customer.API/Customer.API.csproj
index 8c284d4..481300b 100644
--- a/src/Services/Customer/Customer.API/Customer.API.csproj
+++ b/src/Services/Customer/Customer.API/Customer.API.csproj
@@ -7,8 +7,9 @@
-
-
+
+
+
diff --git a/src/Services/Customer/Customer.API/Program.cs b/src/Services/Customer/Customer.API/Program.cs
index e46940a..ec87e0d 100644
--- a/src/Services/Customer/Customer.API/Program.cs
+++ b/src/Services/Customer/Customer.API/Program.cs
@@ -1,5 +1,6 @@
using Customer.Infrastructure.Data;
using Microsoft.EntityFrameworkCore;
+using Shared.Monitoring.Extensions;
var builder = WebApplication.CreateBuilder(args);
@@ -11,6 +12,8 @@
builder.Services.AddDbContext(options =>
options.UseNpgsql(builder.Configuration.GetConnectionString("DefaultConnection")));
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "CustomerService");
+
var app = builder.Build();
if (app.Environment.IsDevelopment())
@@ -19,6 +22,7 @@
app.UseSwaggerUI();
}
+app.UseAppInsightsMonitoring();
app.MapControllers();
app.MapHealthChecks("/healthz");
diff --git a/src/Services/Identity/Identity.API/Identity.API.csproj b/src/Services/Identity/Identity.API/Identity.API.csproj
index 9b3e932..2bb8da9 100644
--- a/src/Services/Identity/Identity.API/Identity.API.csproj
+++ b/src/Services/Identity/Identity.API/Identity.API.csproj
@@ -7,8 +7,9 @@
-
-
+
+
+
diff --git a/src/Services/Identity/Identity.API/Program.cs b/src/Services/Identity/Identity.API/Program.cs
index b475a08..df8bcf4 100644
--- a/src/Services/Identity/Identity.API/Program.cs
+++ b/src/Services/Identity/Identity.API/Program.cs
@@ -1,5 +1,6 @@
using Identity.Infrastructure.Data;
using Microsoft.EntityFrameworkCore;
+using Shared.Monitoring.Extensions;
var builder = WebApplication.CreateBuilder(args);
@@ -11,6 +12,8 @@
builder.Services.AddDbContext(options =>
options.UseNpgsql(builder.Configuration.GetConnectionString("DefaultConnection")));
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "IdentityService");
+
var app = builder.Build();
if (app.Environment.IsDevelopment())
@@ -19,6 +22,7 @@
app.UseSwaggerUI();
}
+app.UseAppInsightsMonitoring();
app.MapControllers();
app.MapHealthChecks("/healthz");
diff --git a/src/Services/Monitoring/Monitoring.Agent/Controllers/MonitoringController.cs b/src/Services/Monitoring/Monitoring.Agent/Controllers/MonitoringController.cs
new file mode 100644
index 0000000..ce6b597
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Controllers/MonitoringController.cs
@@ -0,0 +1,141 @@
+using Microsoft.AspNetCore.Mvc;
+using Monitoring.Agent.Models;
+using Monitoring.Agent.Services;
+
+namespace Monitoring.Agent.Controllers;
+
+[ApiController]
+[Route("api/[controller]")]
+public class MonitoringController : ControllerBase
+{
+ private readonly ServiceHealthAggregator _aggregator;
+ private readonly ILogger _logger;
+
+ public MonitoringController(
+ ServiceHealthAggregator aggregator,
+ ILogger logger)
+ {
+ _aggregator = aggregator;
+ _logger = logger;
+ }
+
+ ///
+ /// Returns the full AI monitoring dashboard with health scores, anomalies, and insights.
+ ///
+ [HttpGet("dashboard")]
+ public async Task> GetDashboard()
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+ return Ok(dashboard);
+ }
+
+ ///
+ /// Returns the most recent cached dashboard snapshot without re-probing services.
+ ///
+ [HttpGet("dashboard/latest")]
+ public ActionResult GetLatestDashboard()
+ {
+ var dashboard = MonitoringBackgroundService.LatestDashboard;
+ if (dashboard is null)
+ return NotFound("No monitoring data available yet. The background service may still be initializing.");
+
+ return Ok(dashboard);
+ }
+
+ ///
+ /// Returns health score and status for all monitored services.
+ ///
+ [HttpGet("services")]
+ public async Task>> GetServiceHealth()
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+ return Ok(dashboard.Services);
+ }
+
+ ///
+ /// Returns health details for a specific service.
+ ///
+ [HttpGet("services/{serviceName}")]
+ public async Task> GetServiceHealth(string serviceName)
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+ var service = dashboard.Services
+ .FirstOrDefault(s => s.ServiceName.Equals(serviceName, StringComparison.OrdinalIgnoreCase));
+
+ if (service is null)
+ return NotFound($"Service '{serviceName}' not found.");
+
+ return Ok(service);
+ }
+
+ ///
+ /// Returns all active anomalies detected by the AI engine.
+ ///
+ [HttpGet("anomalies")]
+ public async Task>> GetAnomalies(
+ [FromQuery] AnomalySeverity? severity = null)
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+ var anomalies = dashboard.RecentAnomalies.AsEnumerable();
+
+ if (severity.HasValue)
+ anomalies = anomalies.Where(a => a.Severity == severity.Value);
+
+ return Ok(anomalies);
+ }
+
+ ///
+ /// Returns AI-generated insights and recommendations.
+ ///
+ [HttpGet("insights")]
+ public async Task>> GetInsights(
+ [FromQuery] InsightCategory? category = null,
+ [FromQuery] InsightPriority? priority = null)
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+ var insights = dashboard.Insights.AsEnumerable();
+
+ if (category.HasValue)
+ insights = insights.Where(i => i.Category == category.Value);
+ if (priority.HasValue)
+ insights = insights.Where(i => i.Priority == priority.Value);
+
+ return Ok(insights);
+ }
+
+ ///
+ /// Returns historical dashboard snapshots for trend analysis.
+ ///
+ [HttpGet("history")]
+ public ActionResult> GetHistory(
+ [FromQuery] int count = 10)
+ {
+ var history = MonitoringBackgroundService.DashboardHistory
+ .TakeLast(Math.Min(count, 60))
+ .ToList();
+
+ return Ok(history);
+ }
+
+ ///
+ /// Returns system-wide health summary.
+ ///
+ [HttpGet("summary")]
+ public async Task> GetSummary()
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+ return Ok(new
+ {
+ dashboard.GeneratedAt,
+ dashboard.OverallStatus,
+ dashboard.SystemHealthScore,
+ ServiceCount = dashboard.Services.Count,
+ HealthyCount = dashboard.Services.Count(s => s.Status == ServiceStatus.Healthy),
+ DegradedCount = dashboard.Services.Count(s => s.Status == ServiceStatus.Degraded),
+ UnhealthyCount = dashboard.Services.Count(s => s.Status == ServiceStatus.Unhealthy),
+ AnomalyCount = dashboard.RecentAnomalies.Count,
+ CriticalAnomalyCount = dashboard.RecentAnomalies.Count(a => a.Severity == AnomalySeverity.Critical),
+ InsightCount = dashboard.Insights.Count
+ });
+ }
+}
diff --git a/src/Services/Monitoring/Monitoring.Agent/Models/AnomalyReport.cs b/src/Services/Monitoring/Monitoring.Agent/Models/AnomalyReport.cs
new file mode 100644
index 0000000..df5001e
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Models/AnomalyReport.cs
@@ -0,0 +1,95 @@
+namespace Monitoring.Agent.Models;
+
+public record AnomalyReport(
+ string ServiceName,
+ DateTime DetectedAt,
+ AnomalySeverity Severity,
+ string Category,
+ string Description,
+ double CurrentValue,
+ double ThresholdValue,
+ string RecommendedAction);
+
+public enum AnomalySeverity
+{
+ Info,
+ Warning,
+ Critical
+}
+
+public record ServiceHealthReport(
+ string ServiceName,
+ DateTime Timestamp,
+ HealthScore HealthScore,
+ ServiceStatus Status,
+ PerformanceMetrics Performance,
+ ResourceUtilization Resources,
+ List ActiveAnomalies,
+ List Recommendations);
+
+public record HealthScore(
+ double Overall,
+ double Availability,
+ double Performance,
+ double ErrorRate,
+ double ResourceUsage);
+
+public enum ServiceStatus
+{
+ Healthy,
+ Degraded,
+ Unhealthy,
+ Unknown
+}
+
+public record PerformanceMetrics(
+ double AverageResponseTimeMs,
+ double P95ResponseTimeMs,
+ int RequestsPerMinute,
+ double ErrorRatePercent,
+ int ActiveConnections);
+
+public record ResourceUtilization(
+ double CpuPercent,
+ double MemoryMb,
+ double MemoryPercent,
+ long GcGen0Collections,
+ long GcGen1Collections,
+ long GcGen2Collections,
+ double GcTotalMemoryMb);
+
+public record AiInsight(
+ string InsightId,
+ DateTime GeneratedAt,
+ InsightCategory Category,
+ string Title,
+ string Description,
+ InsightPriority Priority,
+ List AffectedServices,
+ List ActionItems);
+
+public enum InsightCategory
+{
+ Performance,
+ Reliability,
+ Scalability,
+ CostOptimization,
+ Security
+}
+
+public enum InsightPriority
+{
+ Low,
+ Medium,
+ High,
+ Urgent
+}
+
+public record MonitoringDashboard(
+ DateTime GeneratedAt,
+ string OverallStatus,
+ double SystemHealthScore,
+ List Services,
+ List RecentAnomalies,
+ List Insights,
+ Dictionary SystemMetrics);
diff --git a/src/Services/Monitoring/Monitoring.Agent/Monitoring.Agent.csproj b/src/Services/Monitoring/Monitoring.Agent/Monitoring.Agent.csproj
new file mode 100644
index 0000000..7e8e9ed
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Monitoring.Agent.csproj
@@ -0,0 +1,15 @@
+
+
+ net10.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
diff --git a/src/Services/Monitoring/Monitoring.Agent/Program.cs b/src/Services/Monitoring/Monitoring.Agent/Program.cs
new file mode 100644
index 0000000..2fad84f
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Program.cs
@@ -0,0 +1,40 @@
+using Monitoring.Agent.Services;
+using Shared.Monitoring;
+using Shared.Monitoring.Extensions;
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder.Services.AddControllers();
+builder.Services.AddEndpointsApiExplorer();
+builder.Services.AddSwaggerGen();
+
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "MonitoringAgent");
+
+builder.Services.AddHttpClient("MonitoringAgent", client =>
+{
+ client.Timeout = TimeSpan.FromSeconds(5);
+});
+
+var anomalyConfig = new AnomalyDetectionConfig();
+builder.Configuration.GetSection("ApplicationInsights:AnomalyDetection").Bind(anomalyConfig);
+builder.Services.AddSingleton(anomalyConfig);
+
+builder.Services.AddSingleton();
+builder.Services.AddSingleton();
+builder.Services.AddSingleton();
+builder.Services.AddSingleton();
+builder.Services.AddHostedService();
+
+var app = builder.Build();
+
+if (app.Environment.IsDevelopment())
+{
+ app.UseSwagger();
+ app.UseSwaggerUI();
+}
+
+app.UseAppInsightsMonitoring();
+app.MapControllers();
+app.MapHealthChecks("/healthz");
+
+app.Run();
diff --git a/src/Services/Monitoring/Monitoring.Agent/Services/AiInsightsEngine.cs b/src/Services/Monitoring/Monitoring.Agent/Services/AiInsightsEngine.cs
new file mode 100644
index 0000000..af1c0e3
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Services/AiInsightsEngine.cs
@@ -0,0 +1,181 @@
+using Monitoring.Agent.Models;
+using Shared.Monitoring.Metrics;
+
+namespace Monitoring.Agent.Services;
+
+///
+/// Generates AI-driven insights by analyzing patterns across service health
+/// snapshots. Produces actionable recommendations for performance, reliability,
+/// scalability, and cost optimization.
+///
+public class AiInsightsEngine
+{
+ private readonly ILogger _logger;
+ private int _insightCounter;
+
+ public AiInsightsEngine(ILogger logger)
+ {
+ _logger = logger;
+ }
+
+ public List GenerateInsights(
+ Dictionary snapshots,
+ Dictionary> anomalies)
+ {
+ var insights = new List();
+
+ AnalyzePerformancePatterns(snapshots, insights);
+ AnalyzeReliabilityPatterns(snapshots, anomalies, insights);
+ AnalyzeScalabilityPatterns(snapshots, insights);
+ AnalyzeCrossServicePatterns(snapshots, anomalies, insights);
+
+ return insights;
+ }
+
+ private void AnalyzePerformancePatterns(
+ Dictionary snapshots,
+ List insights)
+ {
+ var slowServices = snapshots
+ .Where(s => s.Value.P95ResponseTimeMs > 1000 && s.Value.RecentRequestCount > 0)
+ .Select(s => s.Key)
+ .ToList();
+
+ if (slowServices.Count > 0)
+ {
+ insights.Add(new AiInsight(
+ GenerateInsightId(),
+ DateTime.UtcNow,
+ InsightCategory.Performance,
+ "Slow Service Response Times Detected",
+ $"{slowServices.Count} service(s) have P95 response times exceeding 1 second. " +
+ "Consider enabling response caching, optimizing database queries, or implementing read replicas.",
+ slowServices.Count > 2 ? InsightPriority.High : InsightPriority.Medium,
+ slowServices,
+ new List
+ {
+ "Enable Application Insights Profiler to identify hot paths",
+ "Review database query execution plans for N+1 query patterns",
+ "Consider implementing response caching for frequently accessed endpoints",
+ "Evaluate connection pooling configuration"
+ }));
+ }
+ }
+
+ private void AnalyzeReliabilityPatterns(
+ Dictionary snapshots,
+ Dictionary> anomalies,
+ List insights)
+ {
+ var servicesWithErrors = snapshots
+ .Where(s => s.Value.ErrorRatePercent > 1 && s.Value.RecentRequestCount > 0)
+ .Select(s => s.Key)
+ .ToList();
+
+ if (servicesWithErrors.Count > 0)
+ {
+ insights.Add(new AiInsight(
+ GenerateInsightId(),
+ DateTime.UtcNow,
+ InsightCategory.Reliability,
+ "Elevated Error Rates Across Services",
+ $"{servicesWithErrors.Count} service(s) are experiencing error rates above 1%. " +
+ "This may indicate systemic issues with shared dependencies.",
+ InsightPriority.High,
+ servicesWithErrors,
+ new List
+ {
+ "Check shared dependency health (PostgreSQL, RabbitMQ)",
+ "Review recent deployment changes for breaking modifications",
+ "Implement circuit breaker patterns for inter-service communication",
+ "Set up structured exception logging for root cause analysis"
+ }));
+ }
+
+ var servicesWithDependencyIssues = snapshots
+ .Where(s => s.Value.TotalDependencyCalls > 0 &&
+ (double)s.Value.FailedDependencyCalls / s.Value.TotalDependencyCalls > 0.05)
+ .Select(s => s.Key)
+ .ToList();
+
+ if (servicesWithDependencyIssues.Count > 0)
+ {
+ insights.Add(new AiInsight(
+ GenerateInsightId(),
+ DateTime.UtcNow,
+ InsightCategory.Reliability,
+ "Dependency Failure Pattern Detected",
+ "Multiple services report high dependency failure rates. " +
+ "Downstream services or infrastructure may be degraded.",
+ InsightPriority.Urgent,
+ servicesWithDependencyIssues,
+ new List
+ {
+ "Verify database connection pool health across all services",
+ "Check RabbitMQ broker status and queue depths",
+ "Implement retry policies with exponential backoff",
+ "Consider adding bulkhead isolation for critical dependencies"
+ }));
+ }
+ }
+
+ private void AnalyzeScalabilityPatterns(
+ Dictionary snapshots,
+ List insights)
+ {
+ var highMemoryServices = snapshots
+ .Where(s => s.Value.MemoryUsageMb > 512)
+ .Select(s => s.Key)
+ .ToList();
+
+ if (highMemoryServices.Count > 0)
+ {
+ insights.Add(new AiInsight(
+ GenerateInsightId(),
+ DateTime.UtcNow,
+ InsightCategory.Scalability,
+ "High Memory Consumption Detected",
+ $"{highMemoryServices.Count} service(s) are using more than 512MB of memory. " +
+ "This may indicate memory leaks or inefficient object allocation.",
+ InsightPriority.Medium,
+ highMemoryServices,
+ new List
+ {
+ "Capture memory dumps and analyze with dotnet-dump",
+ "Enable Application Insights memory profiling",
+ "Review object lifetime management and IDisposable patterns",
+ "Consider implementing object pooling for high-allocation scenarios"
+ }));
+ }
+ }
+
+ private void AnalyzeCrossServicePatterns(
+ Dictionary snapshots,
+ Dictionary> anomalies,
+ List insights)
+ {
+ var totalAnomalies = anomalies.Values.Sum(a => a.Count);
+ if (totalAnomalies > 5)
+ {
+ insights.Add(new AiInsight(
+ GenerateInsightId(),
+ DateTime.UtcNow,
+ InsightCategory.Reliability,
+ "System-Wide Instability Detected",
+ $"{totalAnomalies} anomalies detected across {anomalies.Count} services. " +
+ "This pattern suggests a cascading failure or shared infrastructure issue.",
+ InsightPriority.Urgent,
+ anomalies.Keys.ToList(),
+ new List
+ {
+ "Investigate shared infrastructure components (network, DNS, load balancers)",
+ "Check for correlated deployment events across services",
+ "Review API Gateway health and routing configuration",
+ "Consider enabling distributed tracing correlation for root cause analysis"
+ }));
+ }
+ }
+
+ private string GenerateInsightId() =>
+ $"insight-{Interlocked.Increment(ref _insightCounter):D6}-{DateTime.UtcNow:yyyyMMddHHmmss}";
+}
diff --git a/src/Services/Monitoring/Monitoring.Agent/Services/AnomalyDetectionEngine.cs b/src/Services/Monitoring/Monitoring.Agent/Services/AnomalyDetectionEngine.cs
new file mode 100644
index 0000000..fb883e4
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Services/AnomalyDetectionEngine.cs
@@ -0,0 +1,172 @@
+using Monitoring.Agent.Models;
+using Shared.Monitoring;
+using Shared.Monitoring.Metrics;
+
+namespace Monitoring.Agent.Services;
+
+///
+/// AI-powered anomaly detection engine that analyzes service health snapshots
+/// against configurable thresholds and historical baselines.
+/// Uses statistical analysis to detect performance degradation, error spikes,
+/// and resource exhaustion patterns.
+///
+public class AnomalyDetectionEngine
+{
+ private readonly AnomalyDetectionConfig _config;
+ private readonly ILogger _logger;
+ private readonly Dictionary> _history = new();
+ private readonly object _lock = new();
+
+ public AnomalyDetectionEngine(
+ AnomalyDetectionConfig config,
+ ILogger logger)
+ {
+ _config = config;
+ _logger = logger;
+ }
+
+ public List Analyze(ServiceHealthSnapshot snapshot)
+ {
+ var anomalies = new List();
+
+ RecordSnapshot(snapshot);
+
+ DetectHighErrorRate(snapshot, anomalies);
+ DetectSlowResponseTime(snapshot, anomalies);
+ DetectHighMemoryUsage(snapshot, anomalies);
+ DetectDependencyFailures(snapshot, anomalies);
+ DetectResponseTimeSpike(snapshot, anomalies);
+
+ return anomalies;
+ }
+
+ private void DetectHighErrorRate(ServiceHealthSnapshot snapshot, List anomalies)
+ {
+ if (snapshot.RecentRequestCount == 0)
+ return;
+
+ if (snapshot.ErrorRatePercent > _config.ErrorRateThresholdPercent)
+ {
+ var severity = snapshot.ErrorRatePercent > _config.ErrorRateThresholdPercent * 2
+ ? AnomalySeverity.Critical
+ : AnomalySeverity.Warning;
+
+ anomalies.Add(new AnomalyReport(
+ snapshot.ServiceName,
+ DateTime.UtcNow,
+ severity,
+ "ErrorRate",
+ $"Error rate of {snapshot.ErrorRatePercent:F1}% exceeds threshold of {_config.ErrorRateThresholdPercent}%.",
+ snapshot.ErrorRatePercent,
+ _config.ErrorRateThresholdPercent,
+ "Investigate recent deployments and dependency health. Check application logs for recurring exceptions."));
+ }
+ }
+
+ private void DetectSlowResponseTime(ServiceHealthSnapshot snapshot, List anomalies)
+ {
+ if (snapshot.RecentRequestCount == 0)
+ return;
+
+ if (snapshot.P95ResponseTimeMs > _config.ResponseTimeThresholdMs)
+ {
+ var severity = snapshot.P95ResponseTimeMs > _config.ResponseTimeThresholdMs * 2
+ ? AnomalySeverity.Critical
+ : AnomalySeverity.Warning;
+
+ anomalies.Add(new AnomalyReport(
+ snapshot.ServiceName,
+ DateTime.UtcNow,
+ severity,
+ "ResponseTime",
+ $"P95 response time of {snapshot.P95ResponseTimeMs:F0}ms exceeds threshold of {_config.ResponseTimeThresholdMs}ms.",
+ snapshot.P95ResponseTimeMs,
+ _config.ResponseTimeThresholdMs,
+ "Profile slow endpoints. Check database query performance and downstream service latency."));
+ }
+ }
+
+ private void DetectHighMemoryUsage(ServiceHealthSnapshot snapshot, List anomalies)
+ {
+ var estimatedMemoryPercent = snapshot.MemoryUsageMb / 1024.0 * 100;
+ if (estimatedMemoryPercent > _config.MemoryThresholdPercent)
+ {
+ anomalies.Add(new AnomalyReport(
+ snapshot.ServiceName,
+ DateTime.UtcNow,
+ AnomalySeverity.Warning,
+ "MemoryUsage",
+ $"Memory usage of {snapshot.MemoryUsageMb:F0}MB is elevated.",
+ snapshot.MemoryUsageMb,
+ _config.MemoryThresholdPercent,
+ "Analyze memory allocation patterns. Check for potential memory leaks using dotnet-dump or Application Insights profiler."));
+ }
+ }
+
+ private void DetectDependencyFailures(ServiceHealthSnapshot snapshot, List anomalies)
+ {
+ if (snapshot.TotalDependencyCalls == 0)
+ return;
+
+ var failureRate = (double)snapshot.FailedDependencyCalls / snapshot.TotalDependencyCalls * 100;
+ if (failureRate > _config.ErrorRateThresholdPercent)
+ {
+ anomalies.Add(new AnomalyReport(
+ snapshot.ServiceName,
+ DateTime.UtcNow,
+ AnomalySeverity.Critical,
+ "DependencyFailure",
+ $"Dependency failure rate of {failureRate:F1}% indicates downstream service issues.",
+ failureRate,
+ _config.ErrorRateThresholdPercent,
+ "Check health of dependent services (database, message broker, external APIs). Verify network connectivity and circuit breaker states."));
+ }
+ }
+
+ private void DetectResponseTimeSpike(ServiceHealthSnapshot snapshot, List anomalies)
+ {
+ lock (_lock)
+ {
+ if (!_history.TryGetValue(snapshot.ServiceName, out var history) || history.Count < 3)
+ return;
+
+ var baseline = history
+ .Where(h => h.RecentRequestCount > 0)
+ .Select(h => h.AverageResponseTimeMs)
+ .ToArray();
+
+ if (baseline.Length < 3)
+ return;
+
+ var mean = baseline.Average();
+ var stdDev = Math.Sqrt(baseline.Average(v => Math.Pow(v - mean, 2)));
+
+ if (stdDev > 0 && snapshot.AverageResponseTimeMs > mean + 2 * stdDev)
+ {
+ anomalies.Add(new AnomalyReport(
+ snapshot.ServiceName,
+ DateTime.UtcNow,
+ AnomalySeverity.Warning,
+ "ResponseTimeSpike",
+ $"Response time spike detected: {snapshot.AverageResponseTimeMs:F0}ms vs baseline {mean:F0}ms (2-sigma: {mean + 2 * stdDev:F0}ms).",
+ snapshot.AverageResponseTimeMs,
+ mean + 2 * stdDev,
+ "Correlate with deployment events or traffic pattern changes. Check for GC pauses or thread pool starvation."));
+ }
+ }
+ }
+
+ private void RecordSnapshot(ServiceHealthSnapshot snapshot)
+ {
+ lock (_lock)
+ {
+ if (!_history.ContainsKey(snapshot.ServiceName))
+ _history[snapshot.ServiceName] = new List();
+
+ _history[snapshot.ServiceName].Add(snapshot);
+
+ if (_history[snapshot.ServiceName].Count > 100)
+ _history[snapshot.ServiceName].RemoveAt(0);
+ }
+ }
+}
diff --git a/src/Services/Monitoring/Monitoring.Agent/Services/HealthScoringEngine.cs b/src/Services/Monitoring/Monitoring.Agent/Services/HealthScoringEngine.cs
new file mode 100644
index 0000000..755a39f
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Services/HealthScoringEngine.cs
@@ -0,0 +1,102 @@
+using Monitoring.Agent.Models;
+using Shared.Monitoring.Metrics;
+
+namespace Monitoring.Agent.Services;
+
+///
+/// Computes a composite health score (0-100) for each service based on
+/// availability, performance, error rate, and resource utilization.
+/// Weights are tuned for microservice workloads.
+///
+public class HealthScoringEngine
+{
+ private const double AvailabilityWeight = 0.30;
+ private const double PerformanceWeight = 0.30;
+ private const double ErrorRateWeight = 0.25;
+ private const double ResourceWeight = 0.15;
+
+ public HealthScore CalculateScore(ServiceHealthSnapshot snapshot, List anomalies)
+ {
+ var availability = CalculateAvailabilityScore(snapshot);
+ var performance = CalculatePerformanceScore(snapshot);
+ var errorRate = CalculateErrorRateScore(snapshot);
+ var resource = CalculateResourceScore(snapshot);
+
+ var overall = availability * AvailabilityWeight
+ + performance * PerformanceWeight
+ + errorRate * ErrorRateWeight
+ + resource * ResourceWeight;
+
+ var anomalyPenalty = anomalies.Sum(a => a.Severity switch
+ {
+ AnomalySeverity.Critical => 15,
+ AnomalySeverity.Warning => 5,
+ _ => 0
+ });
+
+ overall = Math.Max(0, overall - anomalyPenalty);
+
+ return new HealthScore(
+ Overall: Math.Round(overall, 1),
+ Availability: Math.Round(availability, 1),
+ Performance: Math.Round(performance, 1),
+ ErrorRate: Math.Round(errorRate, 1),
+ ResourceUsage: Math.Round(resource, 1));
+ }
+
+ public ServiceStatus DetermineStatus(double overallScore) => overallScore switch
+ {
+ >= 80 => ServiceStatus.Healthy,
+ >= 50 => ServiceStatus.Degraded,
+ _ => ServiceStatus.Unhealthy
+ };
+
+ private static double CalculateAvailabilityScore(ServiceHealthSnapshot snapshot)
+ {
+ if (snapshot.TotalRequests == 0) return 100;
+ var successRate = 1.0 - (double)snapshot.FailedRequests / snapshot.TotalRequests;
+ return Math.Max(0, successRate * 100);
+ }
+
+ private static double CalculatePerformanceScore(ServiceHealthSnapshot snapshot)
+ {
+ if (snapshot.RecentRequestCount == 0) return 100;
+ return snapshot.P95ResponseTimeMs switch
+ {
+ < 100 => 100,
+ < 250 => 90,
+ < 500 => 80,
+ < 1000 => 60,
+ < 2000 => 40,
+ < 5000 => 20,
+ _ => 5
+ };
+ }
+
+ private static double CalculateErrorRateScore(ServiceHealthSnapshot snapshot)
+ {
+ if (snapshot.RecentRequestCount == 0) return 100;
+ return snapshot.ErrorRatePercent switch
+ {
+ < 0.1 => 100,
+ < 1 => 90,
+ < 2 => 75,
+ < 5 => 50,
+ < 10 => 25,
+ _ => 5
+ };
+ }
+
+ private static double CalculateResourceScore(ServiceHealthSnapshot snapshot)
+ {
+ var memoryScore = snapshot.MemoryUsageMb switch
+ {
+ < 128 => 100,
+ < 256 => 90,
+ < 512 => 75,
+ < 1024 => 50,
+ _ => 25
+ };
+ return memoryScore;
+ }
+}
diff --git a/src/Services/Monitoring/Monitoring.Agent/Services/MonitoringBackgroundService.cs b/src/Services/Monitoring/Monitoring.Agent/Services/MonitoringBackgroundService.cs
new file mode 100644
index 0000000..8812ae0
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Services/MonitoringBackgroundService.cs
@@ -0,0 +1,118 @@
+using System.Collections.Concurrent;
+using Microsoft.ApplicationInsights;
+using Monitoring.Agent.Models;
+
+namespace Monitoring.Agent.Services;
+
+///
+/// Background service that periodically collects telemetry from all
+/// monitored services and publishes AI-analyzed metrics to Application Insights.
+///
+public class MonitoringBackgroundService : BackgroundService
+{
+ private readonly ServiceHealthAggregator _aggregator;
+ private readonly TelemetryClient _telemetryClient;
+ private readonly ILogger _logger;
+ private readonly TimeSpan _interval;
+
+ private static readonly ConcurrentQueue RecentDashboards = new();
+ private const int MaxDashboardHistory = 60;
+
+ public MonitoringBackgroundService(
+ ServiceHealthAggregator aggregator,
+ TelemetryClient telemetryClient,
+ IConfiguration configuration,
+ ILogger logger)
+ {
+ _aggregator = aggregator;
+ _telemetryClient = telemetryClient;
+ _logger = logger;
+
+ var intervalSeconds = configuration
+ .GetValue("ApplicationInsights:MetricCollectionIntervalSeconds", 60);
+ _interval = TimeSpan.FromSeconds(intervalSeconds);
+ }
+
+ public static MonitoringDashboard? LatestDashboard =>
+ RecentDashboards.TryPeek(out var dashboard) ? dashboard : null;
+
+ public static IReadOnlyCollection DashboardHistory =>
+ RecentDashboards.ToArray();
+
+ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
+ {
+ _logger.LogInformation("AI Monitoring Agent started. Collection interval: {Interval}s",
+ _interval.TotalSeconds);
+
+ while (!stoppingToken.IsCancellationRequested)
+ {
+ try
+ {
+ var dashboard = await _aggregator.GetDashboardAsync();
+
+ RecentDashboards.Enqueue(dashboard);
+ while (RecentDashboards.Count > MaxDashboardHistory)
+ RecentDashboards.TryDequeue(out _);
+
+ PublishToAppInsights(dashboard);
+
+ _logger.LogInformation(
+ "Monitoring cycle complete. System health: {Score}/100 ({Status}). " +
+ "Services: {Total}, Anomalies: {Anomalies}, Insights: {Insights}",
+ dashboard.SystemHealthScore,
+ dashboard.OverallStatus,
+ dashboard.Services.Count,
+ dashboard.RecentAnomalies.Count,
+ dashboard.Insights.Count);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Error during monitoring cycle");
+ }
+
+ await Task.Delay(_interval, stoppingToken);
+ }
+ }
+
+ private void PublishToAppInsights(MonitoringDashboard dashboard)
+ {
+ _telemetryClient.GetMetric("AI.SystemHealthScore")
+ .TrackValue(dashboard.SystemHealthScore);
+
+ foreach (var service in dashboard.Services)
+ {
+ _telemetryClient.GetMetric("AI.ServiceHealthScore", "ServiceName")
+ .TrackValue(service.HealthScore.Overall, service.ServiceName);
+ _telemetryClient.GetMetric("AI.ServiceAvailability", "ServiceName")
+ .TrackValue(service.HealthScore.Availability, service.ServiceName);
+ _telemetryClient.GetMetric("AI.ServiceResponseTime", "ServiceName")
+ .TrackValue(service.Performance.AverageResponseTimeMs, service.ServiceName);
+ _telemetryClient.GetMetric("AI.ServiceErrorRate", "ServiceName")
+ .TrackValue(service.Performance.ErrorRatePercent, service.ServiceName);
+ _telemetryClient.GetMetric("AI.ServiceMemoryMb", "ServiceName")
+ .TrackValue(service.Resources.MemoryMb, service.ServiceName);
+ }
+
+ foreach (var anomaly in dashboard.RecentAnomalies)
+ {
+ _telemetryClient.TrackEvent("AI.AnomalyDetected", new Dictionary
+ {
+ ["ServiceName"] = anomaly.ServiceName,
+ ["Severity"] = anomaly.Severity.ToString(),
+ ["Category"] = anomaly.Category,
+ ["Description"] = anomaly.Description
+ });
+ }
+
+ foreach (var insight in dashboard.Insights)
+ {
+ _telemetryClient.TrackEvent("AI.InsightGenerated", new Dictionary
+ {
+ ["InsightId"] = insight.InsightId,
+ ["Category"] = insight.Category.ToString(),
+ ["Priority"] = insight.Priority.ToString(),
+ ["Title"] = insight.Title
+ });
+ }
+ }
+}
diff --git a/src/Services/Monitoring/Monitoring.Agent/Services/ServiceHealthAggregator.cs b/src/Services/Monitoring/Monitoring.Agent/Services/ServiceHealthAggregator.cs
new file mode 100644
index 0000000..5809406
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/Services/ServiceHealthAggregator.cs
@@ -0,0 +1,206 @@
+using System.Diagnostics;
+using Monitoring.Agent.Models;
+using Shared.Monitoring;
+using Shared.Monitoring.Metrics;
+
+namespace Monitoring.Agent.Services;
+
+///
+/// Aggregates health data from all monitored microservices by polling
+/// their /healthz endpoints and collecting local process metrics.
+/// Produces a unified monitoring dashboard with AI-generated insights.
+///
+public class ServiceHealthAggregator
+{
+ private readonly HttpClient _httpClient;
+ private readonly AnomalyDetectionEngine _anomalyDetection;
+ private readonly HealthScoringEngine _healthScoring;
+ private readonly AiInsightsEngine _insightsEngine;
+ private readonly ILogger _logger;
+ private readonly Dictionary _serviceEndpoints;
+
+ public ServiceHealthAggregator(
+ IHttpClientFactory httpClientFactory,
+ AnomalyDetectionEngine anomalyDetection,
+ HealthScoringEngine healthScoring,
+ AiInsightsEngine insightsEngine,
+ IConfiguration configuration,
+ ILogger logger)
+ {
+ _httpClient = httpClientFactory.CreateClient("MonitoringAgent");
+ _httpClient.Timeout = TimeSpan.FromSeconds(5);
+ _anomalyDetection = anomalyDetection;
+ _healthScoring = healthScoring;
+ _insightsEngine = insightsEngine;
+ _logger = logger;
+
+ _serviceEndpoints = configuration
+ .GetSection("MonitoredServices")
+ .Get>() ?? new Dictionary
+ {
+ ["Identity"] = "http://localhost:5001",
+ ["Customer"] = "http://localhost:5002",
+ ["Order"] = "http://localhost:5003",
+ ["Product"] = "http://localhost:5004",
+ ["Notification"] = "http://localhost:5005",
+ ["ApiGateway"] = "http://localhost:5000"
+ };
+ }
+
+ public async Task GetDashboardAsync()
+ {
+ var serviceReports = new List();
+ var allAnomalies = new Dictionary>();
+ var snapshots = new Dictionary();
+
+ var tasks = _serviceEndpoints.Select(async kvp =>
+ {
+ var report = await ProbeServiceAsync(kvp.Key, kvp.Value);
+ return (kvp.Key, report);
+ });
+
+ var results = await Task.WhenAll(tasks);
+
+ foreach (var (name, report) in results)
+ {
+ serviceReports.Add(report.HealthReport);
+ allAnomalies[name] = report.Anomalies;
+ if (report.Snapshot is not null)
+ snapshots[name] = report.Snapshot;
+ }
+
+ var insights = _insightsEngine.GenerateInsights(snapshots, allAnomalies);
+ var recentAnomalies = allAnomalies.Values.SelectMany(a => a).ToList();
+ var systemScore = serviceReports.Count > 0
+ ? serviceReports.Average(r => r.HealthScore.Overall)
+ : 0;
+
+ var overallStatus = systemScore switch
+ {
+ >= 80 => "Healthy",
+ >= 50 => "Degraded",
+ _ => "Unhealthy"
+ };
+
+ return new MonitoringDashboard(
+ GeneratedAt: DateTime.UtcNow,
+ OverallStatus: overallStatus,
+ SystemHealthScore: Math.Round(systemScore, 1),
+ Services: serviceReports,
+ RecentAnomalies: recentAnomalies,
+ Insights: insights,
+ SystemMetrics: new Dictionary
+ {
+ ["TotalServices"] = _serviceEndpoints.Count,
+ ["HealthyServices"] = serviceReports.Count(r => r.Status == ServiceStatus.Healthy),
+ ["DegradedServices"] = serviceReports.Count(r => r.Status == ServiceStatus.Degraded),
+ ["UnhealthyServices"] = serviceReports.Count(r => r.Status == ServiceStatus.Unhealthy),
+ ["TotalAnomalies"] = recentAnomalies.Count,
+ ["CriticalAnomalies"] = recentAnomalies.Count(a => a.Severity == AnomalySeverity.Critical)
+ });
+ }
+
+ private async Task ProbeServiceAsync(string serviceName, string baseUrl)
+ {
+ var snapshot = CreateLocalSnapshot(serviceName);
+ var anomalies = _anomalyDetection.Analyze(snapshot);
+ var healthScore = _healthScoring.CalculateScore(snapshot, anomalies);
+ var status = _healthScoring.DetermineStatus(healthScore.Overall);
+
+ bool isReachable;
+ try
+ {
+ var response = await _httpClient.GetAsync($"{baseUrl}/healthz");
+ isReachable = response.IsSuccessStatusCode;
+ }
+ catch (Exception ex)
+ {
+ _logger.LogWarning(ex, "Failed to reach {Service} at {Url}", serviceName, baseUrl);
+ isReachable = false;
+ }
+
+ if (!isReachable)
+ status = ServiceStatus.Unknown;
+
+ var process = Process.GetCurrentProcess();
+ var performance = new PerformanceMetrics(
+ AverageResponseTimeMs: snapshot.AverageResponseTimeMs,
+ P95ResponseTimeMs: snapshot.P95ResponseTimeMs,
+ RequestsPerMinute: snapshot.RecentRequestCount,
+ ErrorRatePercent: snapshot.ErrorRatePercent,
+ ActiveConnections: 0);
+
+ var gcInfo = GC.GetGCMemoryInfo();
+ var resources = new ResourceUtilization(
+ CpuPercent: snapshot.CpuTimeSeconds,
+ MemoryMb: snapshot.MemoryUsageMb,
+ MemoryPercent: snapshot.MemoryUsageMb / 1024.0 * 100,
+ GcGen0Collections: GC.CollectionCount(0),
+ GcGen1Collections: GC.CollectionCount(1),
+ GcGen2Collections: GC.CollectionCount(2),
+ GcTotalMemoryMb: Math.Round(GC.GetTotalMemory(false) / (1024.0 * 1024.0), 2));
+
+ var recommendations = GenerateRecommendations(snapshot, anomalies, isReachable);
+
+ var healthReport = new ServiceHealthReport(
+ ServiceName: serviceName,
+ Timestamp: DateTime.UtcNow,
+ HealthScore: healthScore,
+ Status: status,
+ Performance: performance,
+ Resources: resources,
+ ActiveAnomalies: anomalies,
+ Recommendations: recommendations);
+
+ return new ServiceProbeResult(healthReport, anomalies, snapshot);
+ }
+
+ private static ServiceHealthSnapshot CreateLocalSnapshot(string serviceName)
+ {
+ var process = Process.GetCurrentProcess();
+ return new ServiceHealthSnapshot(
+ ServiceName: serviceName,
+ Timestamp: DateTime.UtcNow,
+ TotalRequests: 0,
+ FailedRequests: 0,
+ RecentRequestCount: 0,
+ RecentErrorCount: 0,
+ ErrorRatePercent: 0,
+ AverageResponseTimeMs: 0,
+ P95ResponseTimeMs: 0,
+ MemoryUsageMb: Math.Round(process.WorkingSet64 / (1024.0 * 1024.0), 2),
+ CpuTimeSeconds: Math.Round(process.TotalProcessorTime.TotalSeconds, 2),
+ TotalDependencyCalls: 0,
+ FailedDependencyCalls: 0);
+ }
+
+ private static List GenerateRecommendations(
+ ServiceHealthSnapshot snapshot,
+ List anomalies,
+ bool isReachable)
+ {
+ var recommendations = new List();
+
+ if (!isReachable)
+ recommendations.Add("Service is unreachable. Verify deployment status and network configuration.");
+
+ if (snapshot.P95ResponseTimeMs > 1000)
+ recommendations.Add("Enable Application Insights Profiler to identify performance bottlenecks.");
+
+ if (snapshot.ErrorRatePercent > 2)
+ recommendations.Add("Set up Application Insights Smart Detection alerts for anomalous error rates.");
+
+ if (anomalies.Count > 3)
+ recommendations.Add("Multiple anomalies detected. Consider scaling the service or investigating root cause.");
+
+ if (recommendations.Count == 0)
+ recommendations.Add("No immediate action required. Service is operating within normal parameters.");
+
+ return recommendations;
+ }
+
+ private record ServiceProbeResult(
+ ServiceHealthReport HealthReport,
+ List Anomalies,
+ ServiceHealthSnapshot? Snapshot);
+}
diff --git a/src/Services/Monitoring/Monitoring.Agent/appsettings.json b/src/Services/Monitoring/Monitoring.Agent/appsettings.json
new file mode 100644
index 0000000..58ec756
--- /dev/null
+++ b/src/Services/Monitoring/Monitoring.Agent/appsettings.json
@@ -0,0 +1,32 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ },
+ "ApplicationInsights": {
+ "ConnectionString": "",
+ "CloudRoleName": "MonitoringAgent",
+ "EnableAdaptiveSampling": true,
+ "EnableDependencyTracking": true,
+ "EnablePerformanceCounters": true,
+ "EnableAiDiagnostics": true,
+ "MetricCollectionIntervalSeconds": 60,
+ "AnomalyDetection": {
+ "ResponseTimeThresholdMs": 2000,
+ "ErrorRateThresholdPercent": 5,
+ "CpuThresholdPercent": 80,
+ "MemoryThresholdPercent": 85,
+ "EvaluationWindowMinutes": 5
+ }
+ },
+ "MonitoredServices": {
+ "Identity": "http://localhost:5001",
+ "Customer": "http://localhost:5002",
+ "Order": "http://localhost:5003",
+ "Product": "http://localhost:5004",
+ "Notification": "http://localhost:5005",
+ "ApiGateway": "http://localhost:5000"
+ }
+}
diff --git a/src/Services/Notification/Notification.API/Notification.API.csproj b/src/Services/Notification/Notification.API/Notification.API.csproj
index 25b5fb0..663abf1 100644
--- a/src/Services/Notification/Notification.API/Notification.API.csproj
+++ b/src/Services/Notification/Notification.API/Notification.API.csproj
@@ -7,8 +7,9 @@
-
-
+
+
+
diff --git a/src/Services/Notification/Notification.API/Program.cs b/src/Services/Notification/Notification.API/Program.cs
index 6219c9c..5b8c1c5 100644
--- a/src/Services/Notification/Notification.API/Program.cs
+++ b/src/Services/Notification/Notification.API/Program.cs
@@ -3,6 +3,7 @@
using Notification.Infrastructure.Data;
using Notification.Infrastructure.Repositories;
using Microsoft.EntityFrameworkCore;
+using Shared.Monitoring.Extensions;
var builder = WebApplication.CreateBuilder(args);
@@ -18,6 +19,8 @@
builder.Services.AddScoped();
builder.Services.AddScoped();
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "NotificationService");
+
var app = builder.Build();
using (var scope = app.Services.CreateScope())
@@ -32,6 +35,7 @@
app.UseSwaggerUI();
}
+app.UseAppInsightsMonitoring();
app.MapControllers();
app.MapHealthChecks("/healthz");
diff --git a/src/Services/Order/Order.API/Order.API.csproj b/src/Services/Order/Order.API/Order.API.csproj
index 54aad4b..ad10c2a 100644
--- a/src/Services/Order/Order.API/Order.API.csproj
+++ b/src/Services/Order/Order.API/Order.API.csproj
@@ -7,8 +7,9 @@
-
-
+
+
+
diff --git a/src/Services/Order/Order.API/Program.cs b/src/Services/Order/Order.API/Program.cs
index 4512675..2783fef 100644
--- a/src/Services/Order/Order.API/Program.cs
+++ b/src/Services/Order/Order.API/Program.cs
@@ -1,5 +1,6 @@
using Order.Infrastructure.Data;
using Microsoft.EntityFrameworkCore;
+using Shared.Monitoring.Extensions;
var builder = WebApplication.CreateBuilder(args);
@@ -11,6 +12,8 @@
builder.Services.AddDbContext(options =>
options.UseNpgsql(builder.Configuration.GetConnectionString("DefaultConnection")));
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "OrderService");
+
var app = builder.Build();
if (app.Environment.IsDevelopment())
@@ -19,6 +22,7 @@
app.UseSwaggerUI();
}
+app.UseAppInsightsMonitoring();
app.MapControllers();
app.MapHealthChecks("/healthz");
diff --git a/src/Services/Product/Product.API/Product.API.csproj b/src/Services/Product/Product.API/Product.API.csproj
index 68be876..4a9b268 100644
--- a/src/Services/Product/Product.API/Product.API.csproj
+++ b/src/Services/Product/Product.API/Product.API.csproj
@@ -7,8 +7,9 @@
-
-
+
+
+
diff --git a/src/Services/Product/Product.API/Program.cs b/src/Services/Product/Product.API/Program.cs
index 73146ef..a5ac469 100644
--- a/src/Services/Product/Product.API/Program.cs
+++ b/src/Services/Product/Product.API/Program.cs
@@ -1,5 +1,6 @@
using Product.Infrastructure.Data;
using Microsoft.EntityFrameworkCore;
+using Shared.Monitoring.Extensions;
var builder = WebApplication.CreateBuilder(args);
@@ -11,6 +12,8 @@
builder.Services.AddDbContext(options =>
options.UseNpgsql(builder.Configuration.GetConnectionString("DefaultConnection")));
+builder.Services.AddAppInsightsMonitoring(builder.Configuration, "ProductService");
+
var app = builder.Build();
if (app.Environment.IsDevelopment())
@@ -19,6 +22,7 @@
app.UseSwaggerUI();
}
+app.UseAppInsightsMonitoring();
app.MapControllers();
app.MapHealthChecks("/healthz");
diff --git a/src/Shared/Shared.Monitoring/AppInsightsConfig.cs b/src/Shared/Shared.Monitoring/AppInsightsConfig.cs
new file mode 100644
index 0000000..82f373e
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/AppInsightsConfig.cs
@@ -0,0 +1,25 @@
+namespace Shared.Monitoring;
+
+public class AppInsightsConfig
+{
+ public const string SectionName = "ApplicationInsights";
+
+ public string ConnectionString { get; set; } = string.Empty;
+ public string CloudRoleName { get; set; } = string.Empty;
+ public bool EnableAdaptiveSampling { get; set; } = true;
+ public double SamplingPercentage { get; set; } = 100;
+ public bool EnableDependencyTracking { get; set; } = true;
+ public bool EnablePerformanceCounters { get; set; } = true;
+ public bool EnableAiDiagnostics { get; set; } = true;
+ public int MetricCollectionIntervalSeconds { get; set; } = 60;
+ public AnomalyDetectionConfig AnomalyDetection { get; set; } = new();
+}
+
+public class AnomalyDetectionConfig
+{
+ public double ResponseTimeThresholdMs { get; set; } = 2000;
+ public double ErrorRateThresholdPercent { get; set; } = 5;
+ public double CpuThresholdPercent { get; set; } = 80;
+ public double MemoryThresholdPercent { get; set; } = 85;
+ public int EvaluationWindowMinutes { get; set; } = 5;
+}
diff --git a/src/Shared/Shared.Monitoring/Extensions/AppInsightsServiceExtensions.cs b/src/Shared/Shared.Monitoring/Extensions/AppInsightsServiceExtensions.cs
new file mode 100644
index 0000000..35712b7
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Extensions/AppInsightsServiceExtensions.cs
@@ -0,0 +1,67 @@
+using Microsoft.ApplicationInsights.Extensibility;
+using Microsoft.AspNetCore.Builder;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using Shared.Monitoring.HealthChecks;
+using Shared.Monitoring.Metrics;
+using Shared.Monitoring.Middleware;
+using Shared.Monitoring.Telemetry;
+
+namespace Shared.Monitoring.Extensions;
+
+///
+/// Extension methods to register Application Insights AI monitoring
+/// across all microservices with a single call.
+///
+public static class AppInsightsServiceExtensions
+{
+ public static IServiceCollection AddAppInsightsMonitoring(
+ this IServiceCollection services,
+ IConfiguration configuration,
+ string serviceName)
+ {
+ var config = new AppInsightsConfig();
+ configuration.GetSection(AppInsightsConfig.SectionName).Bind(config);
+
+ if (!string.IsNullOrEmpty(config.ConnectionString))
+ {
+ services.AddApplicationInsightsTelemetry(options =>
+ {
+ options.ConnectionString = config.ConnectionString;
+ options.EnableAdaptiveSampling = config.EnableAdaptiveSampling;
+ });
+ }
+ else
+ {
+ services.AddApplicationInsightsTelemetry();
+ }
+
+ services.AddSingleton(
+ new ServiceTelemetryInitializer(
+ string.IsNullOrEmpty(config.CloudRoleName)
+ ? serviceName
+ : config.CloudRoleName));
+
+ services.AddHttpContextAccessor();
+ services.AddSingleton();
+
+ services.AddApplicationInsightsTelemetryProcessor();
+
+ services.AddSingleton(sp =>
+ {
+ var client = sp.GetRequiredService();
+ return new ServiceMetricsCollector(client, serviceName);
+ });
+
+ services.AddHealthChecks()
+ .AddCheck("app-insights");
+
+ return services;
+ }
+
+ public static IApplicationBuilder UseAppInsightsMonitoring(this IApplicationBuilder app)
+ {
+ app.UseMiddleware();
+ return app;
+ }
+}
diff --git a/src/Shared/Shared.Monitoring/HealthChecks/AppInsightsHealthCheck.cs b/src/Shared/Shared.Monitoring/HealthChecks/AppInsightsHealthCheck.cs
new file mode 100644
index 0000000..de160c3
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/HealthChecks/AppInsightsHealthCheck.cs
@@ -0,0 +1,42 @@
+using Microsoft.ApplicationInsights;
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+
+namespace Shared.Monitoring.HealthChecks;
+
+///
+/// Verifies Application Insights connectivity by flushing telemetry.
+/// Reports degraded status if the connection string is not configured.
+///
+public class AppInsightsHealthCheck : IHealthCheck
+{
+ private readonly TelemetryClient _telemetryClient;
+
+ public AppInsightsHealthCheck(TelemetryClient telemetryClient)
+ {
+ _telemetryClient = telemetryClient;
+ }
+
+ public Task CheckHealthAsync(
+ HealthCheckContext context,
+ CancellationToken cancellationToken = default)
+ {
+ try
+ {
+ if (string.IsNullOrEmpty(_telemetryClient.InstrumentationKey)
+ && string.IsNullOrEmpty(_telemetryClient.TelemetryConfiguration.ConnectionString))
+ {
+ return Task.FromResult(HealthCheckResult.Degraded(
+ "Application Insights connection string is not configured."));
+ }
+
+ _telemetryClient.Flush();
+ return Task.FromResult(HealthCheckResult.Healthy(
+ "Application Insights telemetry channel is active."));
+ }
+ catch (Exception ex)
+ {
+ return Task.FromResult(HealthCheckResult.Unhealthy(
+ "Application Insights health check failed.", ex));
+ }
+ }
+}
diff --git a/src/Shared/Shared.Monitoring/Metrics/ServiceMetricsCollector.cs b/src/Shared/Shared.Monitoring/Metrics/ServiceMetricsCollector.cs
new file mode 100644
index 0000000..53dcaaa
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Metrics/ServiceMetricsCollector.cs
@@ -0,0 +1,132 @@
+using System.Collections.Concurrent;
+using System.Diagnostics;
+using Microsoft.ApplicationInsights;
+using Microsoft.ApplicationInsights.Metrics;
+
+namespace Shared.Monitoring.Metrics;
+
+///
+/// Collects and tracks custom service metrics for AI analysis.
+/// Maintains a rolling window of request/error data for real-time health scoring.
+///
+public class ServiceMetricsCollector
+{
+ private readonly TelemetryClient _telemetryClient;
+ private readonly string _serviceName;
+ private readonly ConcurrentQueue _recentRequests = new();
+ private readonly TimeSpan _windowSize = TimeSpan.FromMinutes(5);
+ private long _totalRequests;
+ private long _failedRequests;
+ private long _totalDependencyCalls;
+ private long _failedDependencyCalls;
+
+ public ServiceMetricsCollector(TelemetryClient telemetryClient, string serviceName)
+ {
+ _telemetryClient = telemetryClient;
+ _serviceName = serviceName;
+ }
+
+ public void TrackRequest(string endpoint, double durationMs, bool success, int statusCode)
+ {
+ Interlocked.Increment(ref _totalRequests);
+ if (!success)
+ Interlocked.Increment(ref _failedRequests);
+
+ var entry = new RequestMetricEntry(
+ DateTime.UtcNow, endpoint, durationMs, success, statusCode);
+ _recentRequests.Enqueue(entry);
+
+ PruneOldEntries();
+
+ _telemetryClient.GetMetric($"{_serviceName}.RequestDuration", "Endpoint")
+ .TrackValue(durationMs, endpoint);
+ _telemetryClient.GetMetric($"{_serviceName}.RequestCount", "StatusCode")
+ .TrackValue(1, statusCode.ToString());
+ }
+
+ public void TrackDependency(string dependencyType, string target, double durationMs, bool success)
+ {
+ Interlocked.Increment(ref _totalDependencyCalls);
+ if (!success)
+ Interlocked.Increment(ref _failedDependencyCalls);
+
+ _telemetryClient.GetMetric($"{_serviceName}.DependencyDuration", "Type")
+ .TrackValue(durationMs, dependencyType);
+ }
+
+ public void TrackCustomEvent(string eventName, IDictionary? properties = null)
+ {
+ _telemetryClient.TrackEvent($"{_serviceName}.{eventName}", properties);
+ }
+
+ public ServiceHealthSnapshot GetHealthSnapshot()
+ {
+ PruneOldEntries();
+ var recentList = _recentRequests.ToArray();
+
+ var totalRecent = recentList.Length;
+ var failedRecent = recentList.Count(r => !r.Success);
+ var avgDuration = totalRecent > 0
+ ? recentList.Average(r => r.DurationMs)
+ : 0;
+ var p95Duration = totalRecent > 0
+ ? CalculatePercentile(recentList.Select(r => r.DurationMs).ToArray(), 95)
+ : 0;
+
+ var process = Process.GetCurrentProcess();
+ var memoryMb = process.WorkingSet64 / (1024.0 * 1024.0);
+ var cpuTime = process.TotalProcessorTime;
+
+ return new ServiceHealthSnapshot(
+ ServiceName: _serviceName,
+ Timestamp: DateTime.UtcNow,
+ TotalRequests: Interlocked.Read(ref _totalRequests),
+ FailedRequests: Interlocked.Read(ref _failedRequests),
+ RecentRequestCount: totalRecent,
+ RecentErrorCount: failedRecent,
+ ErrorRatePercent: totalRecent > 0 ? (double)failedRecent / totalRecent * 100 : 0,
+ AverageResponseTimeMs: Math.Round(avgDuration, 2),
+ P95ResponseTimeMs: Math.Round(p95Duration, 2),
+ MemoryUsageMb: Math.Round(memoryMb, 2),
+ CpuTimeSeconds: Math.Round(cpuTime.TotalSeconds, 2),
+ TotalDependencyCalls: Interlocked.Read(ref _totalDependencyCalls),
+ FailedDependencyCalls: Interlocked.Read(ref _failedDependencyCalls));
+ }
+
+ private void PruneOldEntries()
+ {
+ var cutoff = DateTime.UtcNow - _windowSize;
+ while (_recentRequests.TryPeek(out var oldest) && oldest.Timestamp < cutoff)
+ _recentRequests.TryDequeue(out _);
+ }
+
+ private static double CalculatePercentile(double[] values, int percentile)
+ {
+ if (values.Length == 0) return 0;
+ Array.Sort(values);
+ var index = (int)Math.Ceiling(percentile / 100.0 * values.Length) - 1;
+ return values[Math.Max(0, index)];
+ }
+}
+
+public record RequestMetricEntry(
+ DateTime Timestamp,
+ string Endpoint,
+ double DurationMs,
+ bool Success,
+ int StatusCode);
+
+public record ServiceHealthSnapshot(
+ string ServiceName,
+ DateTime Timestamp,
+ long TotalRequests,
+ long FailedRequests,
+ int RecentRequestCount,
+ int RecentErrorCount,
+ double ErrorRatePercent,
+ double AverageResponseTimeMs,
+ double P95ResponseTimeMs,
+ double MemoryUsageMb,
+ double CpuTimeSeconds,
+ long TotalDependencyCalls,
+ long FailedDependencyCalls);
diff --git a/src/Shared/Shared.Monitoring/Middleware/AiTelemetryMiddleware.cs b/src/Shared/Shared.Monitoring/Middleware/AiTelemetryMiddleware.cs
new file mode 100644
index 0000000..780917e
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Middleware/AiTelemetryMiddleware.cs
@@ -0,0 +1,55 @@
+using System.Diagnostics;
+using Microsoft.AspNetCore.Http;
+using Microsoft.Extensions.Logging;
+using Shared.Monitoring.Metrics;
+
+namespace Shared.Monitoring.Middleware;
+
+///
+/// Captures per-request telemetry and feeds it into the ServiceMetricsCollector
+/// for real-time AI health analysis.
+///
+public class AiTelemetryMiddleware
+{
+ private readonly RequestDelegate _next;
+ private readonly ServiceMetricsCollector _metricsCollector;
+ private readonly ILogger _logger;
+
+ public AiTelemetryMiddleware(
+ RequestDelegate next,
+ ServiceMetricsCollector metricsCollector,
+ ILogger logger)
+ {
+ _next = next;
+ _metricsCollector = metricsCollector;
+ _logger = logger;
+ }
+
+ public async Task InvokeAsync(HttpContext context)
+ {
+ var stopwatch = Stopwatch.StartNew();
+ var endpoint = $"{context.Request.Method} {context.Request.Path}";
+
+ try
+ {
+ await _next(context);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Unhandled exception on {Endpoint}", endpoint);
+ throw;
+ }
+ finally
+ {
+ stopwatch.Stop();
+ var statusCode = context.Response.StatusCode;
+ var success = statusCode < 400;
+
+ _metricsCollector.TrackRequest(
+ endpoint,
+ stopwatch.Elapsed.TotalMilliseconds,
+ success,
+ statusCode);
+ }
+ }
+}
diff --git a/src/Shared/Shared.Monitoring/Shared.Monitoring.csproj b/src/Shared/Shared.Monitoring/Shared.Monitoring.csproj
new file mode 100644
index 0000000..350d26f
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Shared.Monitoring.csproj
@@ -0,0 +1,12 @@
+
+
+ net10.0
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/src/Shared/Shared.Monitoring/Telemetry/AiDiagnosticTelemetryProcessor.cs b/src/Shared/Shared.Monitoring/Telemetry/AiDiagnosticTelemetryProcessor.cs
new file mode 100644
index 0000000..70e7c74
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Telemetry/AiDiagnosticTelemetryProcessor.cs
@@ -0,0 +1,66 @@
+using Microsoft.ApplicationInsights.Channel;
+using Microsoft.ApplicationInsights.DataContracts;
+using Microsoft.ApplicationInsights.Extensibility;
+
+namespace Shared.Monitoring.Telemetry;
+
+///
+/// Enriches request and dependency telemetry with AI diagnostic metadata.
+/// Flags slow requests and failed dependencies for anomaly detection.
+///
+public class AiDiagnosticTelemetryProcessor : ITelemetryProcessor
+{
+ private readonly ITelemetryProcessor _next;
+ private readonly double _slowRequestThresholdMs;
+
+ public AiDiagnosticTelemetryProcessor(
+ ITelemetryProcessor next,
+ double slowRequestThresholdMs = 2000)
+ {
+ _next = next;
+ _slowRequestThresholdMs = slowRequestThresholdMs;
+ }
+
+ public void Process(ITelemetry item)
+ {
+ if (item is RequestTelemetry request)
+ {
+ var durationMs = request.Duration.TotalMilliseconds;
+ if (request is ISupportProperties props)
+ {
+ props.Properties["AI.IsSlowRequest"] =
+ (durationMs > _slowRequestThresholdMs).ToString();
+ props.Properties["AI.DurationBucket"] = GetDurationBucket(durationMs);
+ }
+ }
+
+ if (item is DependencyTelemetry dependency)
+ {
+ if (!dependency.Success.GetValueOrDefault(true)
+ && dependency is ISupportProperties depProps)
+ {
+ depProps.Properties["AI.FailedDependency"] = "true";
+ depProps.Properties["AI.DependencyTarget"] = dependency.Target;
+ }
+ }
+
+ if (item is ExceptionTelemetry exception)
+ {
+ if (exception is ISupportProperties exProps)
+ {
+ exProps.Properties["AI.ExceptionType"] =
+ exception.Exception?.GetType().Name ?? "Unknown";
+ }
+ }
+
+ _next.Process(item);
+ }
+
+ private static string GetDurationBucket(double ms) => ms switch
+ {
+ < 100 => "Fast",
+ < 500 => "Normal",
+ < 2000 => "Slow",
+ _ => "Critical"
+ };
+}
diff --git a/src/Shared/Shared.Monitoring/Telemetry/CorrelationTelemetryInitializer.cs b/src/Shared/Shared.Monitoring/Telemetry/CorrelationTelemetryInitializer.cs
new file mode 100644
index 0000000..88b25a3
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Telemetry/CorrelationTelemetryInitializer.cs
@@ -0,0 +1,34 @@
+using Microsoft.ApplicationInsights.Channel;
+using Microsoft.ApplicationInsights.DataContracts;
+using Microsoft.ApplicationInsights.Extensibility;
+using Microsoft.AspNetCore.Http;
+
+namespace Shared.Monitoring.Telemetry;
+
+///
+/// Propagates X-Correlation-ID from HTTP headers into Application Insights
+/// telemetry as a custom property, linking distributed traces across services.
+///
+public class CorrelationTelemetryInitializer : ITelemetryInitializer
+{
+ private const string CorrelationIdHeader = "X-Correlation-ID";
+ private readonly IHttpContextAccessor _httpContextAccessor;
+
+ public CorrelationTelemetryInitializer(IHttpContextAccessor httpContextAccessor)
+ {
+ _httpContextAccessor = httpContextAccessor;
+ }
+
+ public void Initialize(ITelemetry telemetry)
+ {
+ var httpContext = _httpContextAccessor.HttpContext;
+ if (httpContext is null)
+ return;
+
+ if (httpContext.Request.Headers.TryGetValue(CorrelationIdHeader, out var correlationId)
+ && telemetry is ISupportProperties propTelemetry)
+ {
+ propTelemetry.Properties["CorrelationId"] = correlationId.ToString();
+ }
+ }
+}
diff --git a/src/Shared/Shared.Monitoring/Telemetry/ServiceTelemetryInitializer.cs b/src/Shared/Shared.Monitoring/Telemetry/ServiceTelemetryInitializer.cs
new file mode 100644
index 0000000..4159eb4
--- /dev/null
+++ b/src/Shared/Shared.Monitoring/Telemetry/ServiceTelemetryInitializer.cs
@@ -0,0 +1,26 @@
+using Microsoft.ApplicationInsights.Channel;
+using Microsoft.ApplicationInsights.Extensibility;
+
+namespace Shared.Monitoring.Telemetry;
+
+///
+/// Sets cloud role name and instance on all telemetry items so that
+/// Application Insights Application Map groups each microservice correctly.
+///
+public class ServiceTelemetryInitializer : ITelemetryInitializer
+{
+ private readonly string _roleName;
+ private readonly string _roleInstance;
+
+ public ServiceTelemetryInitializer(string roleName)
+ {
+ _roleName = roleName;
+ _roleInstance = Environment.MachineName;
+ }
+
+ public void Initialize(ITelemetry telemetry)
+ {
+ telemetry.Context.Cloud.RoleName = _roleName;
+ telemetry.Context.Cloud.RoleInstance = _roleInstance;
+ }
+}