From 557f6faff67373e22f0aca06851d6e4622bcd6a5 Mon Sep 17 00:00:00 2001 From: Samir Patil Date: Wed, 25 Jun 2025 15:29:18 +0530 Subject: [PATCH 1/2] Enhance log collection in Azure App Service script with size optimization and verbosity control. Update README with log level configuration details and examples. Add cursor file to .gitignore. --- .gitignore | 4 + .../azure-appservice-webapp-health/README.md | 47 +++++++- .../appservice_logs.sh | 111 +++++++++++++++++- .../runbook.robot | 18 ++- 4 files changed, 171 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 9b81cc2a6..3fa287b2d 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,7 @@ build **gcp_credentials_json **bashfile-** **db.sqlite3** + + +# Ignore cursor files +cursor-docs/ \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/README.md b/codebundles/azure-appservice-webapp-health/README.md index daa6352a4..088569f01 100644 --- a/codebundles/azure-appservice-webapp-health/README.md +++ b/codebundles/azure-appservice-webapp-health/README.md @@ -1,18 +1,61 @@ # Azure App Service Triage + Checks key App Service metrics and the service plan, fetches logs, config and activities for the service and generates a report of present issues for any found. ## Configuration The TaskSet requires initialization to import necessary secrets, services, and user variables. The following variables should be set: -export APPSERVICE +export APP_SERVICE_NAME export AZ_RESOURCE_GROUP +## Log Collection Configuration (Optimized for Size) + +The log collection has been optimized to prevent large report files while maintaining diagnostic value. You can control the verbosity: + +### Log Levels + +- `ERROR`: Only errors and critical issues (minimal output) +- `WARN`: Warnings and errors +- `INFO`: Informational messages (default, filters for errors/warnings) +- `DEBUG`: Detailed debugging information +- `VERBOSE`: All logs including system events (use with caution) + +### Configuration Variables + +- `LOG_LEVEL`: Set log verbosity (default: INFO) +- `MAX_LOG_LINES`: Maximum lines per log file (default: 100) +- `MAX_TOTAL_SIZE`: Maximum total output size in bytes (default: 500000) + +### Examples + +```bash +# For production troubleshooting (minimal output) +export LOG_LEVEL=ERROR +export MAX_LOG_LINES=50 + +# For development debugging (more detailed) +export LOG_LEVEL=DEBUG +export MAX_LOG_LINES=200 +``` + +## Size Optimization + +The logs task now automatically: + +- Filters out verbose HTTP access logs +- Focuses on application-level logs and errors +- Limits output to 500KB by default +- Provides truncation warnings when limits are reached +- Directs users to Azure Portal for complete logs when needed + +This prevents report.jsonl files from exceeding UI rendering limits while maintaining diagnostic capability. + ## Notes This codebundle assumes the service principal authentication flow. ## TODO + - [ ] look for notable activities in list - [ ] config best practices check -- [ ] Add documentation \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/appservice_logs.sh b/codebundles/azure-appservice-webapp-health/appservice_logs.sh index 5cd22076a..6307b35d2 100755 --- a/codebundles/azure-appservice-webapp-health/appservice_logs.sh +++ b/codebundles/azure-appservice-webapp-health/appservice_logs.sh @@ -7,17 +7,120 @@ # AZ_TENANT # APP_SERVICE_NAME # AZ_RESOURCE_GROUP +# LOG_LEVEL (Optional, default is INFO) +# MAX_LOG_LINES (Optional, default is 100) + +# Set defaults +LOG_LEVEL="${LOG_LEVEL:-INFO}" +MAX_LOG_LINES="${MAX_LOG_LINES:-100}" +MAX_TOTAL_SIZE="${MAX_TOTAL_SIZE:-500000}" # 500KB limit LOG_PATH="_rw_logs_$APP_SERVICE_NAME.zip" subscription_id=$(az account show --query "id" -o tsv) -# # Set the subscription +# Set the subscription az account set --subscription $subscription_id +# Download and extract logs az webapp log download --name $APP_SERVICE_NAME --resource-group $AZ_RESOURCE_GROUP --subscription $subscription_id --log-file $LOG_PATH -log_contents=$(unzip -qq -c $LOG_PATH) -echo "Azure App Service $APP_SERVICE_NAME logs:" +TEMP_DIR="/tmp/_temp_logs_$$" +mkdir -p "$TEMP_DIR" +unzip -o $LOG_PATH -d "$TEMP_DIR" >/dev/null 2>&1 +# Fix permissions on extracted files +chmod -R 755 "$TEMP_DIR" 2>/dev/null || true + +output_size=0 +max_exceeded=false + +echo "Azure App Service $APP_SERVICE_NAME logs (Level: $LOG_LEVEL, Max Lines: $MAX_LOG_LINES):" echo "" + +# Function to add content with size check +add_content() { + local content="$1" + local content_size=${#content} + + if (( output_size + content_size > MAX_TOTAL_SIZE )); then + if [ "$max_exceeded" = false ]; then + echo "" + echo "⚠️ Output truncated - size limit reached (${MAX_TOTAL_SIZE} bytes)" + echo "💡 To see more logs, reduce LOG_LEVEL to ERROR or WARN, or download logs directly from Azure Portal" + max_exceeded=true + fi + return 1 + fi + + echo "$content" + output_size=$((output_size + content_size)) + return 0 +} + +# Define log level priorities for filtering (compatible with older bash) +case "$LOG_LEVEL" in + "ERROR") CURRENT_PRIORITY=1 ;; + "WARN") CURRENT_PRIORITY=2 ;; + "INFO") CURRENT_PRIORITY=3 ;; + "DEBUG") CURRENT_PRIORITY=4 ;; + "VERBOSE") CURRENT_PRIORITY=5 ;; + *) CURRENT_PRIORITY=3 ;; # Default to INFO +esac + +# Display Application logs (errors, warnings, app output) +if [ -d "$TEMP_DIR/LogFiles/Application" ]; then + add_content "=== Application Logs ===" || exit 0 + + for log_file in "$TEMP_DIR/LogFiles/Application"/*; do + if [ -f "$log_file" ]; then + add_content "--- $(basename "$log_file") ---" || exit 0 + + # Filter by log level - only show errors/warnings for INFO and above + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # For INFO level and higher, filter for important entries + filtered_content=$(grep -iE 'error|warn|exception|fail|critical' "$log_file" | tail -n "$MAX_LOG_LINES" 2>/dev/null || echo "No errors/warnings found in recent logs") + else + # For DEBUG/VERBOSE, show more content but still limited + filtered_content=$(tail -n "$MAX_LOG_LINES" "$log_file") + fi + + add_content "$filtered_content" || exit 0 + add_content "" || exit 0 + fi + done +else + add_content "No Application logs directory found" || exit 0 +fi + +# Display Detailed Error logs (4xx/5xx errors) - always include if present +if [ -d "$TEMP_DIR/LogFiles/DetailedErrors" ]; then + add_content "=== Detailed Error Logs ===" || exit 0 + + for error_file in "$TEMP_DIR/LogFiles/DetailedErrors"/*; do + if [ -f "$error_file" ]; then + add_content "--- $(basename "$error_file") ---" || exit 0 + error_content=$(cat "$error_file") + add_content "$error_content" || exit 0 + add_content "" || exit 0 + fi + done +fi + +# Display System Event Log (summary only) - avoid verbose XML dumps +if [ -f "$TEMP_DIR/LogFiles/eventlog.xml" ] && [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== System Events (Last 20 Events) ===" || exit 0 + if command -v xmllint &>/dev/null; then + event_summary=$(xmllint --xpath '//Event[position()<=20]/concat("Time=", System/TimeCreated/@SystemTime, " | Level=", System/Level/text(), " | Message=", substring(RenderingInfo/Message/text(), 1, 100), "\n")' "$TEMP_DIR/LogFiles/eventlog.xml" 2>/dev/null || echo "No recent system events") + add_content "$event_summary" || exit 0 + else + add_content "xmllint not available, skipping system events" || exit 0 + fi +fi + +# # Cleanup +# rm -rf "$TEMP_DIR" "$LOG_PATH" + echo "" -echo -e "$log_contents" \ No newline at end of file +echo "📊 Output size: ${output_size} bytes (Limit: ${MAX_TOTAL_SIZE} bytes)" +if [ "$max_exceeded" = true ]; then + echo "🔍 For complete logs, visit: https://portal.azure.com and navigate to your App Service > Logs" +fi \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/runbook.robot b/codebundles/azure-appservice-webapp-health/runbook.robot index cdb84007d..b76949eb6 100644 --- a/codebundles/azure-appservice-webapp-health/runbook.robot +++ b/codebundles/azure-appservice-webapp-health/runbook.robot @@ -134,8 +134,8 @@ Fetch App Service `${APP_SERVICE_NAME}` Utilization Metrics In Resource Group `$ Get App Service `${APP_SERVICE_NAME}` Logs In Resource Group `${AZ_RESOURCE_GROUP}` - [Documentation] Fetch logs of appservice workload - [Tags] appservice logs tail access:read-only + [Documentation] Fetch filtered logs of appservice workload (application-level only, optimized for report size) + [Tags] appservice logs filtered access:read-only ${logs}= RW.CLI.Run Bash File ... bash_file=appservice_logs.sh ... env=${env} @@ -312,6 +312,16 @@ Suite Initialization ... description=The threshold of average response time (ms) in which to generate an issue. Higher than this value indicates slow response time. ... pattern=\w* ... default=300 + ${LOG_LEVEL}= RW.Core.Import User Variable LOG_LEVEL + ... type=string + ... description=Log verbosity level: ERROR, WARN, INFO, DEBUG, VERBOSE + ... pattern=\w* + ... default=INFO + ${MAX_LOG_LINES}= RW.Core.Import User Variable MAX_LOG_LINES + ... type=string + ... description=Maximum lines per log file to display + ... pattern=\w* + ... default=100 Set Suite Variable ${APP_SERVICE_NAME} ${APP_SERVICE_NAME} Set Suite Variable ${AZ_RESOURCE_GROUP} ${AZ_RESOURCE_GROUP} Set Suite Variable ${TIME_PERIOD_MINUTES} ${TIME_PERIOD_MINUTES} @@ -323,7 +333,9 @@ Suite Initialization Set Suite Variable ${HTTP4XX_THRESHOLD} ${HTTP4XX_THRESHOLD} Set Suite Variable ${DISK_USAGE_THRESHOLD} ${DISK_USAGE_THRESHOLD} Set Suite Variable ${AVG_RSP_TIME} ${AVG_RSP_TIME} + Set Suite Variable ${LOG_LEVEL} ${LOG_LEVEL} + Set Suite Variable ${MAX_LOG_LINES} ${MAX_LOG_LINES} Set Suite Variable ... ${env} - ... {"APP_SERVICE_NAME":"${APP_SERVICE_NAME}", "AZ_RESOURCE_GROUP":"${AZ_RESOURCE_GROUP}", "TIME_PERIOD_MINUTES":"${TIME_PERIOD_MINUTES}","CPU_THRESHOLD":"${CPU_THRESHOLD}", "REQUESTS_THRESHOLD":"${REQUESTS_THRESHOLD}", "BYTES_RECEIVED_THRESHOLD":"${BYTES_RECEIVED_THRESHOLD}", "HTTP5XX_THRESHOLD":"${HTTP5XX_THRESHOLD}","HTTP2XX_THRESHOLD":"${HTTP2XX_THRESHOLD}", "HTTP4XX_THRESHOLD":"${HTTP4XX_THRESHOLD}", "DISK_USAGE_THRESHOLD":"${DISK_USAGE_THRESHOLD}", "AVG_RSP_TIME":"${AVG_RSP_TIME}"} \ No newline at end of file + ... {"APP_SERVICE_NAME":"${APP_SERVICE_NAME}", "AZ_RESOURCE_GROUP":"${AZ_RESOURCE_GROUP}", "TIME_PERIOD_MINUTES":"${TIME_PERIOD_MINUTES}","CPU_THRESHOLD":"${CPU_THRESHOLD}", "REQUESTS_THRESHOLD":"${REQUESTS_THRESHOLD}", "BYTES_RECEIVED_THRESHOLD":"${BYTES_RECEIVED_THRESHOLD}", "HTTP5XX_THRESHOLD":"${HTTP5XX_THRESHOLD}","HTTP2XX_THRESHOLD":"${HTTP2XX_THRESHOLD}", "HTTP4XX_THRESHOLD":"${HTTP4XX_THRESHOLD}", "DISK_USAGE_THRESHOLD":"${DISK_USAGE_THRESHOLD}", "AVG_RSP_TIME":"${AVG_RSP_TIME}", "LOG_LEVEL":"${LOG_LEVEL}", "MAX_LOG_LINES":"${MAX_LOG_LINES}"} \ No newline at end of file From 6446f8b54b1a188c78d24145b9e77241ee64d30f Mon Sep 17 00:00:00 2001 From: Samir Patil Date: Wed, 25 Jun 2025 17:24:29 +0530 Subject: [PATCH 2/2] Enhance log collection configuration in Azure App Service with new features for Docker logs, deployment history, and performance traces. Update README and runbook to reflect changes in verbosity control and configuration examples. --- .../azure-appservice-webapp-health/README.md | 46 ++- .../appservice_logs_enhanced.sh | 381 ++++++++++++++++++ .../runbook.robot | 20 +- 3 files changed, 439 insertions(+), 8 deletions(-) create mode 100755 codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh diff --git a/codebundles/azure-appservice-webapp-health/README.md b/codebundles/azure-appservice-webapp-health/README.md index 088569f01..31a6f8326 100644 --- a/codebundles/azure-appservice-webapp-health/README.md +++ b/codebundles/azure-appservice-webapp-health/README.md @@ -9,9 +9,9 @@ The TaskSet requires initialization to import necessary secrets, services, and u export APP_SERVICE_NAME export AZ_RESOURCE_GROUP -## Log Collection Configuration (Optimized for Size) +## Log Collection Configuration (Enhanced & Optimized) -The log collection has been optimized to prevent large report files while maintaining diagnostic value. You can control the verbosity: +The log collection has been enhanced with multiple log sources while maintaining size optimization. You can control both verbosity and which log sources to include: ### Log Levels @@ -21,22 +21,54 @@ The log collection has been optimized to prevent large report files while mainta - `DEBUG`: Detailed debugging information - `VERBOSE`: All logs including system events (use with caution) +### Enhanced Features + +- **Docker Container Logs**: Container startup, runtime, and error diagnostics +- **Deployment History**: Recent deployment success/failure status and build logs +- **Performance Traces**: Slow requests and failed API calls (DEBUG+ only) + ### Configuration Variables +#### Core Settings + - `LOG_LEVEL`: Set log verbosity (default: INFO) - `MAX_LOG_LINES`: Maximum lines per log file (default: 100) - `MAX_TOTAL_SIZE`: Maximum total output size in bytes (default: 500000) -### Examples +#### Enhanced Features (New) + +- `INCLUDE_DOCKER_LOGS`: Include Docker container logs (default: true) +- `INCLUDE_DEPLOYMENT_LOGS`: Include deployment history (default: true) +- `INCLUDE_PERFORMANCE_TRACES`: Include performance traces (default: false) + +### Configuration Examples ```bash -# For production troubleshooting (minimal output) +# Production troubleshooting (minimal output) export LOG_LEVEL=ERROR -export MAX_LOG_LINES=50 +export INCLUDE_DOCKER_LOGS=false +export INCLUDE_DEPLOYMENT_LOGS=false + +# Standard configuration (recommended default) +export LOG_LEVEL=INFO +export INCLUDE_DOCKER_LOGS=true +export INCLUDE_DEPLOYMENT_LOGS=true + +# Docker container troubleshooting +export LOG_LEVEL=INFO +export INCLUDE_DOCKER_LOGS=true +export INCLUDE_DEPLOYMENT_LOGS=false + +# Deployment troubleshooting +export LOG_LEVEL=INFO +export INCLUDE_DOCKER_LOGS=false +export INCLUDE_DEPLOYMENT_LOGS=true -# For development debugging (more detailed) +# Full diagnostic mode (advanced) export LOG_LEVEL=DEBUG -export MAX_LOG_LINES=200 +export INCLUDE_DOCKER_LOGS=true +export INCLUDE_DEPLOYMENT_LOGS=true +export INCLUDE_PERFORMANCE_TRACES=true ``` ## Size Optimization diff --git a/codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh b/codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh new file mode 100755 index 000000000..24232bca7 --- /dev/null +++ b/codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh @@ -0,0 +1,381 @@ +#!/bin/bash + +# Enhanced Azure App Service Logs Collection Script +# Implements all optimization phases while maintaining size limits +# +# ENV: +# AZ_USERNAME +# AZ_SECRET_VALUE +# AZ_SUBSCRIPTION +# AZ_TENANT +# APP_SERVICE_NAME +# AZ_RESOURCE_GROUP +# LOG_LEVEL (Optional, default is INFO) +# MAX_LOG_LINES (Optional, default is 100) +# INCLUDE_DOCKER_LOGS (Optional, default is true) +# INCLUDE_DEPLOYMENT_LOGS (Optional, default is true) +# INCLUDE_PERFORMANCE_TRACES (Optional, default is false) + +# Set defaults +LOG_LEVEL="${LOG_LEVEL:-INFO}" +MAX_LOG_LINES="${MAX_LOG_LINES:-100}" +MAX_TOTAL_SIZE="${MAX_TOTAL_SIZE:-500000}" # 500KB limit +INCLUDE_DOCKER_LOGS="${INCLUDE_DOCKER_LOGS:-true}" +INCLUDE_DEPLOYMENT_LOGS="${INCLUDE_DEPLOYMENT_LOGS:-true}" +INCLUDE_PERFORMANCE_TRACES="${INCLUDE_PERFORMANCE_TRACES:-false}" + +LOG_PATH="_rw_logs_$APP_SERVICE_NAME.zip" +subscription_id=$(az account show --query "id" -o tsv) + +# Set the subscription +az account set --subscription $subscription_id + +# Download and extract logs +az webapp log download --name $APP_SERVICE_NAME --resource-group $AZ_RESOURCE_GROUP --subscription $subscription_id --log-file $LOG_PATH + +TEMP_DIR="/tmp/_temp_logs_$$" +mkdir -p "$TEMP_DIR" +unzip -o $LOG_PATH -d "$TEMP_DIR" >/dev/null 2>&1 +# Fix permissions on extracted files +chmod -R 755 "$TEMP_DIR" 2>/dev/null || true + +output_size=0 +max_exceeded=false + +echo "Azure App Service $APP_SERVICE_NAME Enhanced Logs (Level: $LOG_LEVEL, Max Lines: $MAX_LOG_LINES):" +echo "Features: Docker[${INCLUDE_DOCKER_LOGS}] | Deployments[${INCLUDE_DEPLOYMENT_LOGS}] | Performance[${INCLUDE_PERFORMANCE_TRACES}]" +echo "" + +# Function to add content with size check +add_content() { + local content="$1" + local content_size=${#content} + + if (( output_size + content_size > MAX_TOTAL_SIZE )); then + if [ "$max_exceeded" = false ]; then + echo "" + echo "⚠️ Output truncated - size limit reached (${MAX_TOTAL_SIZE} bytes)" + echo "💡 To see more logs, reduce LOG_LEVEL to ERROR or WARN, or download logs directly from Azure Portal" + max_exceeded=true + fi + return 1 + fi + + echo "$content" + output_size=$((output_size + content_size)) + return 0 +} + +# Define log level priorities for filtering (compatible with older bash) +case "$LOG_LEVEL" in + "ERROR") CURRENT_PRIORITY=1 ;; + "WARN") CURRENT_PRIORITY=2 ;; + "INFO") CURRENT_PRIORITY=3 ;; + "DEBUG") CURRENT_PRIORITY=4 ;; + "VERBOSE") CURRENT_PRIORITY=5 ;; + *) CURRENT_PRIORITY=3 ;; # Default to INFO +esac + +# ============================================================================= +# PHASE 1: APPLICATION LOGS (CORE - ALWAYS INCLUDED) +# ============================================================================= +# Debug: Check what's in the temp directory +if [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== Debug: Temp Directory Contents ===" || exit 0 + add_content "Temp dir: $TEMP_DIR" || exit 0 + add_content "Contents: $(ls -la "$TEMP_DIR" 2>/dev/null || echo 'No temp dir found')" || exit 0 + if [ -d "$TEMP_DIR/LogFiles" ]; then + add_content "LogFiles contents: $(ls -la "$TEMP_DIR/LogFiles" 2>/dev/null || echo 'No LogFiles dir')" || exit 0 + fi + add_content "" || exit 0 +fi + +if [ -d "$TEMP_DIR/LogFiles/Application" ]; then + add_content "=== Application Logs ===" || exit 0 + + for log_file in "$TEMP_DIR/LogFiles/Application"/*; do + if [ -f "$log_file" ]; then + add_content "--- $(basename "$log_file") ---" || exit 0 + + # Filter by log level - only show errors/warnings for INFO and above + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # For INFO level and higher, filter for important entries + filtered_content=$(grep -iE 'error|warn|exception|fail|critical' "$log_file" | tail -n "$MAX_LOG_LINES" 2>/dev/null || echo "No errors/warnings found in recent logs") + else + # For DEBUG/VERBOSE, show more content but still limited + filtered_content=$(tail -n "$MAX_LOG_LINES" "$log_file") + fi + + add_content "$filtered_content" || exit 0 + add_content "" || exit 0 + fi + done +else + add_content "No Application logs directory found" || exit 0 +fi + +# ============================================================================= +# DOCKER CONTAINER LOGS +# ============================================================================= +if [ "$INCLUDE_DOCKER_LOGS" = "true" ] && [ -d "$TEMP_DIR/LogFiles" ]; then + docker_logs_found=false + + for docker_log in "$TEMP_DIR/LogFiles"/*_default_docker.log "$TEMP_DIR/LogFiles"/*_docker.log; do + if [ -f "$docker_log" ]; then + if [ "$docker_logs_found" = false ]; then + add_content "=== Docker Container Logs ===" || exit 0 + docker_logs_found=true + fi + + add_content "--- $(basename "$docker_log") ---" || exit 0 + + # Smart filtering based on log level - FOCUS ON ACTUAL PROBLEMS + if [ "$CURRENT_PRIORITY" -le 2 ]; then + # ERROR/WARN: Only critical failures (deduplicated) + filtered_content=$(grep -iE 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" | sort -u | head -8 2>/dev/null || echo "No critical issues found") + elif [ "$CURRENT_PRIORITY" -eq 3 ]; then + # INFO: Show unique errors + key startup info + temp_content=$( + # Show unique critical errors + grep -iE 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" | sort -u | head -5 + # Show application startup attempts (unique) + grep -iE 'npm start|next start|blog.*start' "$docker_log" | sort -u | head -3 + # Show ONE port configuration line, not all 40 + grep -E 'export PORT=' "$docker_log" | head -1 + ) + # Add restart/failure summary separately + restart_count=$(grep -c 'export PORT=' "$docker_log" 2>/dev/null || echo "0") + error_count=$(grep -c 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" 2>/dev/null || echo "0") + # Ensure we have valid integers + restart_count=${restart_count//[^0-9]/} + error_count=${error_count//[^0-9]/} + restart_count=${restart_count:-0} + error_count=${error_count:-0} + if [ "$restart_count" -gt 1 ]; then + temp_content="$temp_content"$'\n'"INFO: Container restarted $restart_count times with $error_count error logs" + fi + filtered_content=$(echo "$temp_content" | grep -v '^$' | head -10 2>/dev/null || echo "No significant events found") + else + # DEBUG/VERBOSE: More detailed but still avoid excessive repetition + temp_content=$( + # Unique errors and failures + grep -iE 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" | sort -u | head -8 + # Key startup events (unique) + grep -iE 'app.*service.*on.*linux|npm.*start|build.*operation|manifest' "$docker_log" | sort -u | head -5 + # Port and environment info (limited) + grep -E 'export PORT=|NODE_PATH=' "$docker_log" | head -2 + ) + # Container restart summary separately + restart_count=$(grep -c 'A P P S E R V I C E O N L I N U X' "$docker_log" 2>/dev/null || echo "0") + # Ensure we have a valid integer + restart_count=${restart_count//[^0-9]/} + restart_count=${restart_count:-0} + if [ "$restart_count" -gt 1 ]; then + temp_content="$temp_content"$'\n'"DEBUG: Container restarted $restart_count times during log period" + fi + filtered_content=$(echo "$temp_content" | grep -v '^$' | head -15 2>/dev/null || tail -n 15 "$docker_log") + fi + + add_content "$filtered_content" || exit 0 + add_content "" || exit 0 + fi + done + + if [ "$docker_logs_found" = false ] && [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== Docker Container Logs ===" || exit 0 + add_content "No Docker container logs found" || exit 0 + add_content "" || exit 0 + fi +fi + +# ============================================================================= +# RECENT DEPLOYMENT HISTORY +# ============================================================================= +if [ "$INCLUDE_DEPLOYMENT_LOGS" = "true" ] && [ -d "$TEMP_DIR/deployments" ] && [ "$CURRENT_PRIORITY" -ge 3 ]; then + add_content "=== Recent Deployments (Last 3) ===" || exit 0 + + deployment_count=0 + # Sort by modification time, newest first + for deployment_dir in $(find "$TEMP_DIR/deployments" -name "log.log" -exec ls -t {} \; 2>/dev/null | head -3); do + if [ -f "$deployment_dir" ]; then + deployment_id=$(basename "$(dirname "$deployment_dir")") + add_content "--- Deployment: ${deployment_id:0:8}... ---" || exit 0 + + # Show key deployment events and outcomes + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # INFO: Focus on outcomes and errors + deployment_content=$(grep -iE 'successful|failed|error|warning|deployment.*complete|build.*complete|exception' "$deployment_dir" | head -8 2>/dev/null || echo "No deployment status found") + else + # DEBUG/VERBOSE: More detailed deployment steps + deployment_content=$(grep -iE 'successful|failed|error|warning|deployment|build|predeployment|package|npm|dotnet|restore' "$deployment_dir" | head -12 2>/dev/null || head -8 "$deployment_dir") + fi + + add_content "$deployment_content" || exit 0 + add_content "" || exit 0 + + deployment_count=$((deployment_count + 1)) + [ $deployment_count -ge 3 ] && break + fi + done + + if [ $deployment_count -eq 0 ]; then + add_content "No recent deployment logs found" || exit 0 + add_content "" || exit 0 + fi +fi + +# ============================================================================= +# DETAILED ERROR LOGS +# ============================================================================= +if [ -d "$TEMP_DIR/LogFiles/DetailedErrors" ]; then + add_content "=== Detailed Error Logs ===" || exit 0 + + error_count=0 + for error_file in "$TEMP_DIR/LogFiles/DetailedErrors"/*; do + if [ -f "$error_file" ] && [ $error_count -lt 5 ]; then + add_content "--- $(basename "$error_file") ---" || exit 0 + + # Always show detailed errors, but limit size + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # Truncate very large error files for INFO and above + error_content=$(head -c 2000 "$error_file") + if [ $(wc -c < "$error_file") -gt 2000 ]; then + error_content="$error_content... [truncated - full error in Azure Portal]" + fi + else + # Show more for DEBUG/VERBOSE + error_content=$(head -c 4000 "$error_file") + if [ $(wc -c < "$error_file") -gt 4000 ]; then + error_content="$error_content... [truncated]" + fi + fi + + add_content "$error_content" || exit 0 + add_content "" || exit 0 + + error_count=$((error_count + 1)) + fi + done +fi + +# ============================================================================= +# PERFORMANCE & API TRACES +# ============================================================================= +if [ "$INCLUDE_PERFORMANCE_TRACES" = "true" ] && [ -d "$TEMP_DIR/LogFiles/kudu/trace" ] && [ "$CURRENT_PRIORITY" -ge 3 ]; then + add_content "=== Performance Issues ===" || exit 0 + + # Find slow requests (>5s) and failed requests + performance_issues_found=false + + # Look for slow requests (files with timing indicators) + # For INFO level: show requests >30s, for DEBUG/VERBOSE: show requests >10s + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # INFO level: show slower requests (30s+) + trace_files=$(find "$TEMP_DIR/LogFiles/kudu/trace" -name "*_[3-9][0-9]s.xml" -o -name "*_[0-9][0-9][0-9]s.xml" 2>/dev/null | head -5) + else + # DEBUG/VERBOSE: show more requests (10s+) + trace_files=$(find "$TEMP_DIR/LogFiles/kudu/trace" -name "*_[1-9][0-9]s.xml" -o -name "*_[0-9][0-9][0-9]s.xml" 2>/dev/null | head -5) + fi + + for trace_file in $trace_files; do + if [ -f "$trace_file" ]; then + performance_issues_found=true + filename=$(basename "$trace_file") + # Extract timing info from filename (simple and reliable) + timing=$(echo "$filename" | grep -o '_[0-9]*s\.xml' | tr -d '_s.xml' || echo "unknown") + + # Add severity indicator based on timing + if [ "$timing" != "unknown" ] && [ "$timing" -gt 120 ]; then + severity="🔴 CRITICAL" + elif [ "$timing" != "unknown" ] && [ "$timing" -gt 60 ]; then + severity="🟠 HIGH" + else + severity="🟡 MEDIUM" + fi + + # Simple, reliable format that works for all filename patterns + add_content "⚠️ $severity Slow Request (${timing}s): $(echo "$filename" | cut -c1-90)" || exit 0 + fi + done + + # Look for failed requests (HTTP error codes) + for trace_file in $(find "$TEMP_DIR/LogFiles/kudu/trace" -name "*_500_*.xml" -o -name "*_404_*.xml" -o -name "*_pending.xml" 2>/dev/null | head -5); do + if [ -f "$trace_file" ]; then + performance_issues_found=true + filename=$(basename "$trace_file") + + # Extract status code from filename (simple and reliable) + status=$(echo "$filename" | grep -o '_[45][0-9][0-9]_' | tr -d '_' || echo "unknown") + + # Add error severity indicator + if [ "$status" = "500" ]; then + error_type="🔴 SERVER ERROR" + elif [ "$status" = "404" ]; then + error_type="🟡 NOT FOUND" + elif [[ "$filename" =~ pending ]]; then + error_type="⏳ PENDING" + else + error_type="🟠 CLIENT ERROR" + fi + + # Simple, reliable format that works for all filename patterns + if [[ "$filename" =~ pending ]]; then + add_content "⏳ Pending Request: $(echo "$filename" | cut -c1-90)" || exit 0 + else + add_content "❌ $error_type (HTTP $status): $(echo "$filename" | cut -c1-90)" || exit 0 + fi + fi + done + + if [ "$performance_issues_found" = false ]; then + add_content "No significant performance issues detected" || exit 0 + fi + + add_content "" || exit 0 +fi + +# ============================================================================= +# SYSTEM EVENT LOG (SUMMARY ONLY - AVOID VERBOSE XML) +# ============================================================================= +if [ -f "$TEMP_DIR/LogFiles/eventlog.xml" ] && [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== System Events (Last 10 Events) ===" || exit 0 + if command -v xmllint &>/dev/null; then + event_summary=$(xmllint --xpath '//Event[position()<=10]/concat("Time=", System/TimeCreated/@SystemTime, " | Level=", System/Level/text(), " | Message=", substring(RenderingInfo/Message/text(), 1, 80), "\n")' "$TEMP_DIR/LogFiles/eventlog.xml" 2>/dev/null || echo "No recent system events") + add_content "$event_summary" || exit 0 + else + # Fallback: simple grep for basic event info + event_summary=$(grep -o ' Logs" + +# Cleanup +rm -rf "$TEMP_DIR" "$LOG_PATH" 2>/dev/null || true \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/runbook.robot b/codebundles/azure-appservice-webapp-health/runbook.robot index b76949eb6..ba387d33b 100644 --- a/codebundles/azure-appservice-webapp-health/runbook.robot +++ b/codebundles/azure-appservice-webapp-health/runbook.robot @@ -322,6 +322,21 @@ Suite Initialization ... description=Maximum lines per log file to display ... pattern=\w* ... default=100 + ${INCLUDE_DOCKER_LOGS}= RW.Core.Import User Variable INCLUDE_DOCKER_LOGS + ... type=string + ... description=Include Docker container logs in output (true/false) + ... pattern=\w* + ... default=true + ${INCLUDE_DEPLOYMENT_LOGS}= RW.Core.Import User Variable INCLUDE_DEPLOYMENT_LOGS + ... type=string + ... description=Include deployment history logs in output (true/false) + ... pattern=\w* + ... default=true + ${INCLUDE_PERFORMANCE_TRACES}= RW.Core.Import User Variable INCLUDE_PERFORMANCE_TRACES + ... type=string + ... description=Include performance traces in output (true/false) + ... pattern=\w* + ... default=false Set Suite Variable ${APP_SERVICE_NAME} ${APP_SERVICE_NAME} Set Suite Variable ${AZ_RESOURCE_GROUP} ${AZ_RESOURCE_GROUP} Set Suite Variable ${TIME_PERIOD_MINUTES} ${TIME_PERIOD_MINUTES} @@ -335,7 +350,10 @@ Suite Initialization Set Suite Variable ${AVG_RSP_TIME} ${AVG_RSP_TIME} Set Suite Variable ${LOG_LEVEL} ${LOG_LEVEL} Set Suite Variable ${MAX_LOG_LINES} ${MAX_LOG_LINES} + Set Suite Variable ${INCLUDE_DOCKER_LOGS} ${INCLUDE_DOCKER_LOGS} + Set Suite Variable ${INCLUDE_DEPLOYMENT_LOGS} ${INCLUDE_DEPLOYMENT_LOGS} + Set Suite Variable ${INCLUDE_PERFORMANCE_TRACES} ${INCLUDE_PERFORMANCE_TRACES} Set Suite Variable ... ${env} - ... {"APP_SERVICE_NAME":"${APP_SERVICE_NAME}", "AZ_RESOURCE_GROUP":"${AZ_RESOURCE_GROUP}", "TIME_PERIOD_MINUTES":"${TIME_PERIOD_MINUTES}","CPU_THRESHOLD":"${CPU_THRESHOLD}", "REQUESTS_THRESHOLD":"${REQUESTS_THRESHOLD}", "BYTES_RECEIVED_THRESHOLD":"${BYTES_RECEIVED_THRESHOLD}", "HTTP5XX_THRESHOLD":"${HTTP5XX_THRESHOLD}","HTTP2XX_THRESHOLD":"${HTTP2XX_THRESHOLD}", "HTTP4XX_THRESHOLD":"${HTTP4XX_THRESHOLD}", "DISK_USAGE_THRESHOLD":"${DISK_USAGE_THRESHOLD}", "AVG_RSP_TIME":"${AVG_RSP_TIME}", "LOG_LEVEL":"${LOG_LEVEL}", "MAX_LOG_LINES":"${MAX_LOG_LINES}"} \ No newline at end of file + ... {"APP_SERVICE_NAME":"${APP_SERVICE_NAME}", "AZ_RESOURCE_GROUP":"${AZ_RESOURCE_GROUP}", "TIME_PERIOD_MINUTES":"${TIME_PERIOD_MINUTES}","CPU_THRESHOLD":"${CPU_THRESHOLD}", "REQUESTS_THRESHOLD":"${REQUESTS_THRESHOLD}", "BYTES_RECEIVED_THRESHOLD":"${BYTES_RECEIVED_THRESHOLD}", "HTTP5XX_THRESHOLD":"${HTTP5XX_THRESHOLD}","HTTP2XX_THRESHOLD":"${HTTP2XX_THRESHOLD}", "HTTP4XX_THRESHOLD":"${HTTP4XX_THRESHOLD}", "DISK_USAGE_THRESHOLD":"${DISK_USAGE_THRESHOLD}", "AVG_RSP_TIME":"${AVG_RSP_TIME}", "LOG_LEVEL":"${LOG_LEVEL}", "MAX_LOG_LINES":"${MAX_LOG_LINES}", "INCLUDE_DOCKER_LOGS":"${INCLUDE_DOCKER_LOGS}", "INCLUDE_DEPLOYMENT_LOGS":"${INCLUDE_DEPLOYMENT_LOGS}", "INCLUDE_PERFORMANCE_TRACES":"${INCLUDE_PERFORMANCE_TRACES}"} \ No newline at end of file