From 89996ea421306688e54dd03668f387f7f80ac524 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 20 May 2025 16:28:53 +0530 Subject: [PATCH 01/19] add scripts and runbook file --- .../azure-devops-triage/agent-pools.sh | 194 +++++++++++ .../long-running-pipelines.sh | 237 ++++++++++++++ .../azure-devops-triage/pipeline-logs.sh | 188 +++++++++++ .../azure-devops-triage/policy-standards.json | 83 +++++ .../azure-devops-triage/queued-pipelines.sh | 234 +++++++++++++ .../azure-devops-triage/repo-policies.sh | 308 ++++++++++++++++++ codebundles/azure-devops-triage/runbook.robot | 278 ++++++++++++++++ .../service-connections.sh | 199 +++++++++++ 8 files changed, 1721 insertions(+) create mode 100644 codebundles/azure-devops-triage/agent-pools.sh create mode 100644 codebundles/azure-devops-triage/long-running-pipelines.sh create mode 100644 codebundles/azure-devops-triage/pipeline-logs.sh create mode 100644 codebundles/azure-devops-triage/policy-standards.json create mode 100644 codebundles/azure-devops-triage/queued-pipelines.sh create mode 100644 codebundles/azure-devops-triage/repo-policies.sh create mode 100755 codebundles/azure-devops-triage/runbook.robot create mode 100644 codebundles/azure-devops-triage/service-connections.sh diff --git a/codebundles/azure-devops-triage/agent-pools.sh b/codebundles/azure-devops-triage/agent-pools.sh new file mode 100644 index 000000000..ca950d015 --- /dev/null +++ b/codebundles/azure-devops-triage/agent-pools.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +set -x +env +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# +# OPTIONAL ENV VARS: +# HIGH_UTILIZATION_THRESHOLD - Percentage threshold for agent utilization (default: 80) +# +# This script: +# 1) Lists all agent pools in the specified Azure DevOps organization +# 2) Checks the status of agents in each pool +# 3) Identifies offline, disabled, or unhealthy agents +# 4) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${HIGH_UTILIZATION_THRESHOLD:=80}" # Default to 80% if not specified + +OUTPUT_FILE="agent_pools_issues.json" +issues_json='[]' +ORG_URL="https://dev.azure.com/$AZURE_DEVOPS_ORG" + +echo "Analyzing Azure DevOps Agent Pools..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "High Utilization Threshold: ${HIGH_UTILIZATION_THRESHOLD}%" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="$ORG_URL" --output none + +# Get list of agent pools +echo "Retrieving agent pools in organization..." +if ! pools=$(az pipelines pool list --org "$ORG_URL" --output json 2>pools_err.log); then + err_msg=$(cat pools_err.log) + rm -f pools_err.log + + echo "ERROR: Could not list agent pools." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Agent Pools" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if you have sufficient permissions to view agent pools." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pools_err.log + +# Save pools to a file to avoid subshell issues +echo "$pools" > pools.json + +# Get the number of pools +pool_count=$(jq '. | length' pools.json) + +# Process each agent pool using a for loop instead of pipe to while +for ((i=0; iagents_err.log); then + err_msg=$(cat agents_err.log) + rm -f agents_err.log + + # Failed to list agents in pool + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Agents in Pool \`$pool_name\`" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view agents in this pool." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f agents_err.log + + # Check if pool has no agents + agent_count=$(echo "$agents" | jq '. | length') + if [[ "$agent_count" -eq 0 ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "No Agents Found in Pool \`$pool_name\`" \ + --arg details "Agent pool $pool_name (ID: $pool_id) has no registered agents." \ + --arg severity "3" \ + --arg nextStep "Add agents to this pool or remove the pool if it's no longer needed." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + + # Check for offline agents + offline_agents=$(echo "$agents" | jq '[.[] | select(.status != "online")]') + offline_count=$(echo "$offline_agents" | jq '. | length') + + if [[ "$offline_count" -gt 0 ]]; then + offline_details=$(echo "$offline_agents" | jq -c '[.[] | {name: .name, status: .status, enabled: .enabled, version: .version}]') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Offline Agents Found in Pool \`$pool_name\`" \ + --arg details "$offline_details" \ + --arg severity "3" \ + --arg nextStep "Check the agent machines and restart the agent service if needed. Verify network connectivity between agents and Azure DevOps." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + + # Check for disabled agents + disabled_agents=$(echo "$agents" | jq '[.[] | select(.enabled == false)]') + disabled_count=$(echo "$disabled_agents" | jq '. | length') + + if [[ "$disabled_count" -gt 0 ]]; then + disabled_details=$(echo "$disabled_agents" | jq -c '[.[] | {name: .name, status: .status, enabled: .enabled, version: .version}]') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Disabled Agents Found in Pool \`$pool_name\`" \ + --arg details "$disabled_details" \ + --arg severity "2" \ + --arg nextStep "Enable these agents if they should be available for builds, or remove them if they're no longer needed." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + + # Check for agents with high job count (potentially overloaded) + busy_agents=$(echo "$agents" | jq '[.[] | select(.assignedRequest != null)]') + busy_count=$(echo "$busy_agents" | jq '. | length') + total_online=$(echo "$agents" | jq '[.[] | select(.status == "online")] | length') + + # If more than HIGH_UTILIZATION_THRESHOLD% of agents are busy, flag as potential capacity issue + if [[ "$total_online" -gt 0 && "$busy_count" -gt 0 ]]; then + busy_percentage=$((busy_count * 100 / total_online)) + if [[ "$busy_percentage" -gt "$HIGH_UTILIZATION_THRESHOLD" ]]; then + busy_details=$(echo "$busy_agents" | jq -c '[.[] | {name: .name, status: .status, enabled: .enabled}]') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "High Agent Utilization in Pool \`$pool_name\`" \ + --arg details "Pool has $busy_count out of $total_online agents currently busy ($busy_percentage% utilization)" \ + --arg severity "2" \ + --arg nextStep "Consider adding more agents to this pool to handle the workload or optimize your pipelines to reduce build times." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi +done + +# Clean up temporary file +rm -f pools.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps agent pool analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/long-running-pipelines.sh b/codebundles/azure-devops-triage/long-running-pipelines.sh new file mode 100644 index 000000000..f7b0469c2 --- /dev/null +++ b/codebundles/azure-devops-triage/long-running-pipelines.sh @@ -0,0 +1,237 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# OPTIONAL ENV VARS: +# DAYS_TO_LOOK_BACK - Number of days to look back for pipeline runs (default: 7) +# DURATION_THRESHOLD - Threshold in minutes or hours (e.g., "60m" or "2h") for long-running pipelines (default: "60m") +# +# This script: +# 1) Lists all pipelines in the specified Azure DevOps project +# 2) Checks for runs that exceed the specified duration threshold +# 3) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" +: "${DAYS_TO_LOOK_BACK:=7}" +: "${DURATION_THRESHOLD:=60m}" + +OUTPUT_FILE="long_running_pipelines.json" +issues_json='[]' + +# Convert duration threshold to minutes +convert_to_minutes() { + local threshold=$1 + local number=$(echo "$threshold" | sed -E 's/[^0-9]//g') + local unit=$(echo "$threshold" | sed -E 's/[0-9]//g') + + case $unit in + m|min|mins) + echo $number + ;; + h|hr|hrs|hour|hours) + echo $((number * 60)) + ;; + *) + echo "Invalid duration format. Use format like '60m' or '2h'" >&2 + exit 1 + ;; + esac +} + +THRESHOLD_MINUTES=$(convert_to_minutes "$DURATION_THRESHOLD") + +echo "Analyzing Azure DevOps Pipeline Durations..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" +echo "Look Back: $DAYS_TO_LOOK_BACK days" +echo "Threshold: $THRESHOLD_MINUTES minutes" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of pipelines +echo "Retrieving pipelines in project..." +if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then + err_msg=$(cat pipelines_err.log) + rm -f pipelines_err.log + + echo "ERROR: Could not list pipelines." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Pipelines" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if the project exists and you have the right permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pipelines_err.log + +# Process each pipeline +for row in $(echo "${pipelines}" | jq -c '.[]'); do + pipeline_id=$(echo $row | jq -r '.id') + pipeline_name=$(echo $row | jq -r '.name') + + echo "Processing Pipeline: $pipeline_name (ID: $pipeline_id)" + + # Calculate date for filtering runs (in ISO format) + from_date=$(date -d "$DAYS_TO_LOOK_BACK days ago" -u +"%Y-%m-%dT%H:%M:%SZ") + + # Get recent pipeline runs + if ! runs=$(az pipelines runs list --pipeline-id "$pipeline_id" --min-created-time "$from_date" --output json 2>runs_err.log); then + err_msg=$(cat runs_err.log) + rm -f runs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Runs for Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline runs." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f runs_err.log + + # Check for currently running pipelines + for run in $(echo "${runs}" | jq -c '.[] | select(.state == "inProgress")'); do + run_id=$(echo $run | jq -r '.id') + run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') + web_url=$(echo $run | jq -r '.url') + branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') + created_date=$(echo $run | jq -r '.createdDate') + + # Calculate run duration in minutes + created_timestamp=$(date -d "$created_date" +%s) + current_timestamp=$(date +%s) + duration_seconds=$((current_timestamp - created_timestamp)) + duration_minutes=$((duration_seconds / 60)) + + # Format duration for display + if [ $duration_minutes -ge 1440 ]; then + days=$((duration_minutes / 1440)) + hours=$(((duration_minutes % 1440) / 60)) + mins=$((duration_minutes % 60)) + formatted_duration="${days}d ${hours}h ${mins}m" + elif [ $duration_minutes -ge 60 ]; then + hours=$((duration_minutes / 60)) + mins=$((duration_minutes % 60)) + formatted_duration="${hours}h ${mins}m" + else + formatted_duration="${duration_minutes}m" + fi + + echo " Checking running pipeline: $run_name (ID: $run_id, Branch: $branch, Duration: $formatted_duration)" + + # Check if duration exceeds threshold + if [ $duration_minutes -ge $THRESHOLD_MINUTES ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Long Running Pipeline: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "Pipeline has been running for $formatted_duration (exceeds threshold of $THRESHOLD_MINUTES minutes)" \ + --arg severity "3" \ + --arg nextStep "Investigate why pipeline \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\` is taking longer than expected. Check for resource constraints or inefficient tasks." \ + --arg resource_url "$web_url" \ + --arg duration "$formatted_duration" \ + --arg duration_minutes "$duration_minutes" \ + --arg pipeline_id "$pipeline_id" \ + --arg run_id "$run_id" \ + --arg branch "$branch" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url, + "duration": $duration, + "duration_minutes": ($duration_minutes | tonumber), + "pipeline_id": $pipeline_id, + "run_id": $run_id, + "branch": $branch + }]') + fi + done + + # Also check for completed runs that took longer than the threshold + for run in $(echo "${runs}" | jq -c '.[] | select(.state == "completed")'); do + run_id=$(echo $run | jq -r '.id') + run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') + web_url=$(echo $run | jq -r '.url') + branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') + + # Get duration in seconds and convert to minutes + duration_seconds=$(echo $run | jq -r '.finishedDate | fromdateiso8601 - (.startTime | fromdateiso8601)' 2>/dev/null || echo 0) + if [ "$duration_seconds" = "null" ] || [ -z "$duration_seconds" ]; then + continue + fi + + duration_minutes=$((duration_seconds / 60)) + + # Format duration for display + if [ $duration_minutes -ge 1440 ]; then + days=$((duration_minutes / 1440)) + hours=$(((duration_minutes % 1440) / 60)) + mins=$((duration_minutes % 60)) + formatted_duration="${days}d ${hours}h ${mins}m" + elif [ $duration_minutes -ge 60 ]; then + hours=$((duration_minutes / 60)) + mins=$((duration_minutes % 60)) + formatted_duration="${hours}h ${mins}m" + else + formatted_duration="${duration_minutes}m" + fi + + # Check if duration exceeds threshold + if [ $duration_minutes -ge $THRESHOLD_MINUTES ]; then + echo " Found long-running completed pipeline: $run_name (ID: $run_id, Branch: $branch, Duration: $formatted_duration)" + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Long Running Completed Pipeline: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "Pipeline run completed in $formatted_duration (exceeds threshold of $THRESHOLD_MINUTES minutes)" \ + --arg severity "2" \ + --arg nextStep "Review pipeline \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\` for optimization opportunities. Consider parallelizing tasks or upgrading agent resources." \ + --arg resource_url "$web_url" \ + --arg duration "$formatted_duration" \ + --arg duration_minutes "$duration_minutes" \ + --arg pipeline_id "$pipeline_id" \ + --arg run_id "$run_id" \ + --arg branch "$branch" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url, + "duration": $duration, + "duration_minutes": ($duration_minutes | tonumber), + "pipeline_id": $pipeline_id, + "run_id": $run_id, + "branch": $branch + }]') + fi + done +done + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps long-running pipeline analysis completed. Saved results to $OUTPUT_FILE" \ No newline at end of file diff --git a/codebundles/azure-devops-triage/pipeline-logs.sh b/codebundles/azure-devops-triage/pipeline-logs.sh new file mode 100644 index 000000000..6313f5acb --- /dev/null +++ b/codebundles/azure-devops-triage/pipeline-logs.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# This script: +# 1) Lists all pipelines in the specified Azure DevOps project +# 2) Checks for failed runs within the specified time period +# 3) Retrieves logs for each failed run +# 4) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" +: "${DAYS_TO_LOOK_BACK:=7}" + +OUTPUT_FILE="pipeline_logs_issues.json" +TEMP_LOG_FILE="pipeline_log_temp.json" +issues_json='[]' + +echo "Analyzing Azure DevOps Pipeline Logs..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" +echo "Look Back: $DAYS_TO_LOOK_BACK days" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of pipelines +echo "Retrieving pipelines in project..." +if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then + err_msg=$(cat pipelines_err.log) + rm -f pipelines_err.log + + echo "ERROR: Could not list pipelines." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Pipelines" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if the project exists and you have the right permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pipelines_err.log + +# Process each pipeline +for row in $(echo "${pipelines}" | jq -c '.[]'); do + pipeline_id=$(echo $row | jq -r '.id') + pipeline_name=$(echo $row | jq -r '.name') + + echo "Processing Pipeline: $pipeline_name (ID: $pipeline_id)" + + # Calculate date for filtering runs (in ISO format) + from_date=$(date -d "$DAYS_TO_LOOK_BACK days ago" -u +"%Y-%m-%dT%H:%M:%SZ") + + # Get recent pipeline runs + if ! runs=$(az pipelines runs list --pipeline-id "$pipeline_id" --min-created-time "$from_date" --output json 2>runs_err.log); then + err_msg=$(cat runs_err.log) + rm -f runs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Runs for Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline runs." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f runs_err.log + + # Check for failed runs + for run in $(echo "${runs}" | jq -c '.[] | select(.result == "failed")'); do + run_id=$(echo $run | jq -r '.id') + run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') + web_url=$(echo $run | jq -r '.url') + branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') + + echo " Checking failed run: $run_name (ID: $run_id, Branch: $branch)" + + # Get log content + if ! log_content=$(az pipelines runs show-logs --id "$run_id" --output json 2>log_content_err.log); then + err_msg=$(cat log_content_err.log) + rm -f log_content_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to Get Logs for Run $run_name in Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline logs." \ + --arg resource_url "$web_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') + continue + fi + rm -f log_content_err.log + + # Save log content to temp file for processing + echo "$log_content" > "$TEMP_LOG_FILE" + + # Extract error information from logs + if [[ -s "$TEMP_LOG_FILE" ]]; then + # Extract error lines from logs + error_lines=$(jq -r '.[] | select(.line | test("error|exception|failed|Error|Exception|Failed"; "i")) | .line' "$TEMP_LOG_FILE" | head -n 50) + + if [[ -n "$error_lines" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed Pipeline Run: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "$error_lines" \ + --arg severity "3" \ + --arg nextStep "Review pipeline configuration for \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\`. Check branch \`$branch\` for recent changes that might have caused the failure." \ + --arg resource_url "$web_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') + fi + + # Check for timeout issues + timeout_lines=$(jq -r '.[] | select(.line | test("timeout|timed out|canceled after|cancelled after"; "i")) | .line' "$TEMP_LOG_FILE") + if [[ -n "$timeout_lines" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Pipeline Timeout Detected: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "$timeout_lines" \ + --arg severity "3" \ + --arg nextStep "Increase timeout settings for the pipeline or optimize the build process to complete faster." \ + --arg resource_url "$web_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') + fi + + # Check for dependency issues + dependency_lines=$(jq -r '.[] | select(.line | test("package|dependency|module|nuget|npm|pip|maven"; "i") and .line | test("failed|error|not found|missing"; "i")) | .line' "$TEMP_LOG_FILE") + if [[ -n "$dependency_lines" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Dependency Issues Detected: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "$dependency_lines" \ + --arg severity "3" \ + --arg nextStep "Check package references and ensure all dependencies are available and correctly versioned." \ + --arg resource_url "$web_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') + fi + fi + rm -f "$TEMP_LOG_FILE" + done +done + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps pipeline log analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/policy-standards.json b/codebundles/azure-devops-triage/policy-standards.json new file mode 100644 index 000000000..c85858a17 --- /dev/null +++ b/codebundles/azure-devops-triage/policy-standards.json @@ -0,0 +1,83 @@ +{ + "requiredPolicies": { + "minimumReviewers": { + "typeId": "fa4e907d-c16b-4a4c-9dfa-4906e5d171dd", + "displayName": "Minimum number of reviewers", + "settings": { + "minimumApproverCount": 2, + "creatorVoteCounts": false, + "allowDownvotes": false, + "resetOnSourcePush": true + } + }, + "workItemLinking": { + "typeId": "40e92b44-2fe1-4dd6-b3d8-74a9c21d0c6e", + "displayName": "Work item linking", + "settings": { + "enabled": true, + "workItemType": "Any" + } + }, + "commentRequirements": { + "typeId": "c6a1889d-b943-4856-b76f-9e46bb6b0df2", + "displayName": "Comment requirements", + "settings": { + "blockCommentsDuringPush": true, + "rejectionErrorMessage": "Comments must be provided with your changes" + } + }, + "buildValidation": { + "typeId": "0609b952-1397-4640-95ec-e00a01b2c241", + "displayName": "Build validation", + "settings": { + "buildDefinitionId": 0, + "queueOnSourceUpdateOnly": true, + "manualQueueOnly": false, + "displayName": "Build validation", + "validDuration": 720, + "scope": [ + { + "repositoryId": null, + "refName": null, + "matchKind": "Exact" + } + ] + } + }, + "requiredReviewers": { + "typeId": "fd2167ab-b0be-447a-8ec8-39368250530e", + "displayName": "Required reviewers", + "settings": { + "requiredReviewerIds": [], + "minimumApproverCount": 1, + "creatorVoteCounts": false, + "message": "Code owners review required" + } + } + }, + "branchPolicies": { + "defaultBranch": { + "isLocked": true, + "requirePullRequest": true, + "resetOnSourcePush": true + }, + "featureBranches": { + "isLocked": false, + "requirePullRequest": false + } + }, + "namingConventions": { + "repositories": "^[a-z0-9][-a-z0-9]*$", + "branches": { + "feature": "^feature/[A-Z]+-[0-9]+-.+$", + "bugfix": "^bugfix/[A-Z]+-[0-9]+-.+$", + "release": "^release/[0-9]+\\.[0-9]+\\.[0-9]+$", + "hotfix": "^hotfix/[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$" + } + }, + "securitySettings": { + "enableCrossRepoMerge": false, + "enableForkSync": false, + "enableWebHooks": true + } +} \ No newline at end of file diff --git a/codebundles/azure-devops-triage/queued-pipelines.sh b/codebundles/azure-devops-triage/queued-pipelines.sh new file mode 100644 index 000000000..a7b45e816 --- /dev/null +++ b/codebundles/azure-devops-triage/queued-pipelines.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# OPTIONAL ENV VARS: +# QUEUE_THRESHOLD - Threshold in minutes or hours (e.g., "10m" or "1h") for queued pipelines (default: "10m") +# +# This script: +# 1) Lists all pipelines in the specified Azure DevOps project +# 2) Checks for runs that are queued longer than the specified threshold +# 3) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" +: "${QUEUE_THRESHOLD:=10m}" + +OUTPUT_FILE="queued_pipelines.json" +issues_json='[]' + +# Convert duration threshold to minutes +convert_to_minutes() { + local threshold=$1 + local number=$(echo "$threshold" | sed -E 's/[^0-9]//g') + local unit=$(echo "$threshold" | sed -E 's/[0-9]//g') + + case $unit in + m|min|mins) + echo $number + ;; + h|hr|hrs|hour|hours) + echo $((number * 60)) + ;; + *) + echo "Invalid duration format. Use format like '10m' or '1h'" >&2 + exit 1 + ;; + esac +} + +THRESHOLD_MINUTES=$(convert_to_minutes "$QUEUE_THRESHOLD") + +echo "Analyzing Azure DevOps Queued Pipelines..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" +echo "Threshold: $THRESHOLD_MINUTES minutes" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of pipelines +echo "Retrieving pipelines in project..." +if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then + err_msg=$(cat pipelines_err.log) + rm -f pipelines_err.log + + echo "ERROR: Could not list pipelines." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Pipelines" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if the project exists and you have the right permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pipelines_err.log + +# Process each pipeline +for row in $(echo "${pipelines}" | jq -c '.[]'); do + pipeline_id=$(echo $row | jq -r '.id') + pipeline_name=$(echo $row | jq -r '.name') + + echo "Processing Pipeline: $pipeline_name (ID: $pipeline_id)" + + # Get queued runs for this pipeline + if ! runs=$(az pipelines runs list --pipeline-id "$pipeline_id" --output json 2>runs_err.log); then + err_msg=$(cat runs_err.log) + rm -f runs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Runs for Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline runs." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f runs_err.log + + # Check for queued runs + for run in $(echo "${runs}" | jq -c '.[] | select(.state == "notStarted")'); do + run_id=$(echo $run | jq -r '.id') + run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') + web_url=$(echo $run | jq -r '.url') + branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') + created_date=$(echo $run | jq -r '.createdDate') + + # Calculate queue time in minutes + created_timestamp=$(date -d "$created_date" +%s) + current_timestamp=$(date +%s) + queue_seconds=$((current_timestamp - created_timestamp)) + queue_minutes=$((queue_seconds / 60)) + + # Format queue time for display + if [ $queue_minutes -ge 1440 ]; then + days=$((queue_minutes / 1440)) + hours=$(((queue_minutes % 1440) / 60)) + mins=$((queue_minutes % 60)) + formatted_queue_time="${days}d ${hours}h ${mins}m" + elif [ $queue_minutes -ge 60 ]; then + hours=$((queue_minutes / 60)) + mins=$((queue_minutes % 60)) + formatted_queue_time="${hours}h ${mins}m" + else + formatted_queue_time="${queue_minutes}m" + fi + + echo " Checking queued pipeline: $run_name (ID: $run_id, Branch: $branch, Queue Time: $formatted_queue_time)" + + # Check if queue time exceeds threshold + if [ $queue_minutes -ge $THRESHOLD_MINUTES ]; then + # Try to get more details about why it's queued + queue_reason="Unknown" + if ! run_details=$(az pipelines runs show --id "$run_id" --output json 2>/dev/null); then + queue_reason="Could not retrieve detailed information" + else + # Extract queue position if available + queue_position=$(echo "$run_details" | jq -r '.queuePosition // "Unknown"') + if [ "$queue_position" != "null" ] && [ "$queue_position" != "Unknown" ]; then + queue_reason="Queue position: $queue_position" + fi + + # Try to extract any waiting reason + waiting_reason=$(echo "$run_details" | jq -r '.reason // "Unknown"') + if [ "$waiting_reason" != "null" ] && [ "$waiting_reason" != "Unknown" ]; then + queue_reason="$queue_reason, Reason: $waiting_reason" + fi + fi + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Pipeline Queued Too Long: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "Pipeline has been queued for $formatted_queue_time (exceeds threshold of $THRESHOLD_MINUTES minutes). $queue_reason" \ + --arg severity "3" \ + --arg nextStep "Check agent pool capacity and availability. Consider adding more agents or optimizing pipeline concurrency limits." \ + --arg resource_url "$web_url" \ + --arg queue_time "$formatted_queue_time" \ + --arg queue_minutes "$queue_minutes" \ + --arg pipeline_id "$pipeline_id" \ + --arg run_id "$run_id" \ + --arg branch "$branch" \ + --arg queue_reason "$queue_reason" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url, + "queue_time": $queue_time, + "queue_minutes": ($queue_minutes | tonumber), + "pipeline_id": $pipeline_id, + "run_id": $run_id, + "branch": $branch, + "queue_reason": $queue_reason + }]') + fi + done +done + +# Get agent pools to check for capacity issues +echo "Checking agent pools for capacity issues..." +if ! pools=$(az pipelines pool list --output json 2>/dev/null); then + echo "WARNING: Could not list agent pools to check capacity." +else + # Process each pool to check capacity + for pool in $(echo "${pools}" | jq -c '.[]'); do + pool_id=$(echo $pool | jq -r '.id') + pool_name=$(echo $pool | jq -r '.name') + + # Skip Microsoft-hosted pools as we can't manage their capacity + is_hosted=$(echo $pool | jq -r '.isHosted // false') + if [ "$is_hosted" = "true" ]; then + continue + } + + # Get agents in the pool + if ! agents=$(az pipelines agent list --pool-id "$pool_id" --output json 2>/dev/null); then + continue + fi + + # Count total and busy agents + total_agents=$(echo "$agents" | jq 'length') + busy_agents=$(echo "$agents" | jq '[.[] | select(.status == "online" and .enabled == true and .assignedRequest != null)] | length') + + # If all agents are busy, report capacity issue + if [ "$total_agents" -gt 0 ] && [ "$busy_agents" -eq "$total_agents" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Agent Pool \`$pool_name\` at Full Capacity" \ + --arg details "All $total_agents agents in pool are currently busy. This may be causing pipeline queuing." \ + --arg severity "3" \ + --arg nextStep "Consider adding more agents to pool \`$pool_name\` to reduce queue times." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + done +fi + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps queued pipeline analysis completed. Saved results to $OUTPUT_FILE" \ No newline at end of file diff --git a/codebundles/azure-devops-triage/repo-policies.sh b/codebundles/azure-devops-triage/repo-policies.sh new file mode 100644 index 000000000..c29087b8f --- /dev/null +++ b/codebundles/azure-devops-triage/repo-policies.sh @@ -0,0 +1,308 @@ +#!/usr/bin/env bash +# set -x + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG - Azure DevOps organization name +# AZURE_DEVOPS_PROJECT - Azure DevOps project name (optional, checks all projects if not specified) +# +# This script: +# 1) Lists all repositories in the specified Azure DevOps organization/project +# 2) Checks branch policies against the standards defined in policy-standards.json +# 3) Identifies missing or misconfigured policies +# 4) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" + +OUTPUT_FILE="repo_policies_issues.json" +issues_json='[]' +ORG_URL="https://dev.azure.com/$AZURE_DEVOPS_ORG" + +echo "Analyzing Azure DevOps Repository Policies..." +echo "Organization: $AZURE_DEVOPS_ORG" +if [[ -n "$AZURE_DEVOPS_PROJECT" ]]; then + echo "Project: $AZURE_DEVOPS_PROJECT" +fi + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="$ORG_URL" --output none + +# Load policy standards +if [[ -f "policy-standards.json" ]]; then + policy_standards=$(cat policy-standards.json) + echo "Loaded policy standards from policy-standards.json" +else + echo "WARNING: policy-standards.json not found. Using default standards." + # Default minimal standards if file not found + policy_standards='{ + "requiredPolicies": { + "minimumReviewers": { + "typeId": "fa4e907d-c16b-4a4c-9dfa-4906e5d171dd", + "displayName": "Minimum number of reviewers", + "settings": { + "minimumApproverCount": 2, + "creatorVoteCounts": false, + "allowDownvotes": false, + "resetOnSourcePush": true + } + }, + "workItemLinking": { + "typeId": "40e92b44-2fe1-4dd6-b3d8-74a9c21d0c6e", + "displayName": "Work item linking", + "settings": { + "enabled": true, + "workItemType": "Any" + } + } + }, + "branchPolicies": { + "defaultBranch": { + "isLocked": true, + "requirePullRequest": true, + "resetOnSourcePush": true + } + } + }' +fi + +# Get list of projects +if [[ -n "$AZURE_DEVOPS_PROJECT" ]]; then + projects_json="[{\"name\": \"$AZURE_DEVOPS_PROJECT\"}]" +else + echo "Retrieving all projects in organization..." + if ! projects_json=$(az devops project list --org "$ORG_URL" --output json 2>projects_err.log); then + err_msg=$(cat projects_err.log) + rm -f projects_err.log + + echo "ERROR: Could not list projects." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Projects" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if you have sufficient permissions to view projects." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 + fi + projects_json=$(echo "$projects_json" | jq '.value') + rm -f projects_err.log +fi + +# Save projects to a file to avoid subshell issues +echo "$projects_json" > projects.json + +# Get the number of projects +project_count=$(jq '. | length' projects.json) +echo "Found $project_count project(s) to analyze" + +# Process each project +for ((p=0; prepos_err.log); then + err_msg=$(cat repos_err.log) + rm -f repos_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Repositories in Project \`$project_name\`" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view repositories in this project." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f repos_err.log + + # Save repos to a file to avoid subshell issues + echo "$repos_json" > repos.json + + # Get the number of repos + repo_count=$(jq '. | length' repos.json) + echo "Found $repo_count repositories in project $project_name" + + # Process each repository + for ((r=0; rpolicies_err.log); then + err_msg=$(cat policies_err.log) + rm -f policies_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Policies for Repository \`$repo_name\`" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view policies in this repository." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f policies_err.log + + # Check if default branch is locked + if [[ $(echo "$policy_standards" | jq -r '.branchPolicies.defaultBranch.isLocked') == "true" ]]; then + # Check if branch has lock policy + if ! echo "$policies_json" | jq -e '[.[] | select(.type.id == "fa4e907d-c16b-4a4c-9dfa-4916e5d171ab")] | length > 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Default Branch Not Locked" \ + --arg details "Default branch \`$repo_default_branch\` in repository \`$repo_name\` (project \`$project_name\`) is not locked as required by policy." \ + --arg severity "3" \ + --arg nextStep "Enable branch lock policy for the default branch." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + + # Check if default branch requires pull request + if [[ $(echo "$policy_standards" | jq -r '.branchPolicies.defaultBranch.requirePullRequest') == "true" ]]; then + # Check if branch has PR policy + if ! echo "$policies_json" | jq -e '[.[] | select(.type.id == "fa4e907d-c16b-4a4c-9dfa-4906e5d171dd")] | length > 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Default Branch Does Not Require Pull Requests" \ + --arg details "Default branch \`$repo_default_branch\` in repository \`$repo_name\` (project \`$project_name\`) does not require pull requests as required by policy." \ + --arg severity "3" \ + --arg nextStep "Enable pull request policy for the default branch." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + + # Check for required policies + required_policies=$(echo "$policy_standards" | jq -r '.requiredPolicies | keys[]') + for policy_key in $required_policies; do + policy_type_id=$(echo "$policy_standards" | jq -r ".requiredPolicies.$policy_key.typeId") + policy_display_name=$(echo "$policy_standards" | jq -r ".requiredPolicies.$policy_key.displayName") + + # Check if policy exists + if ! echo "$policies_json" | jq -e --arg type_id "$policy_type_id" '[.[] | select(.type.id == $type_id)] | length > 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Missing Required Policy: $policy_display_name" \ + --arg details "Repository \`$repo_name\` (project \`$project_name\`) is missing the required policy: $policy_display_name" \ + --arg severity "3" \ + --arg nextStep "Add the required policy to the repository's default branch." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + else + # Policy exists, check settings + policy_settings=$(echo "$policy_standards" | jq -r ".requiredPolicies.$policy_key.settings") + actual_policy=$(echo "$policies_json" | jq --arg type_id "$policy_type_id" '[.[] | select(.type.id == $type_id)][0]') + + # For minimum reviewers policy, check the count + if [[ "$policy_key" == "minimumReviewers" ]]; then + required_count=$(echo "$policy_settings" | jq -r '.minimumApproverCount') + actual_count=$(echo "$actual_policy" | jq -r '.settings.minimumApproverCount') + + if [[ "$actual_count" -lt "$required_count" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Insufficient Minimum Reviewers" \ + --arg details "Repository \`$repo_name\` (project \`$project_name\`) requires only $actual_count reviewers, but policy requires $required_count." \ + --arg severity "2" \ + --arg nextStep "Increase the minimum number of required reviewers to $required_count." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + + # For build validation, check if it's configured + if [[ "$policy_key" == "buildValidation" ]]; then + if echo "$actual_policy" | jq -e '.settings.buildDefinitionId == 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Build Validation Not Configured" \ + --arg details "Repository \`$repo_name\` (project \`$project_name\`) has build validation policy but no build definition is selected." \ + --arg severity "2" \ + --arg nextStep "Configure a build definition for the build validation policy." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + fi + done + done + + # Clean up repos file + rm -f repos.json +done + +# Clean up projects file +rm -f projects.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps repository policy analysis completed. Saved results to $OUTPUT_FILE" \ No newline at end of file diff --git a/codebundles/azure-devops-triage/runbook.robot b/codebundles/azure-devops-triage/runbook.robot new file mode 100755 index 000000000..29a1aea49 --- /dev/null +++ b/codebundles/azure-devops-triage/runbook.robot @@ -0,0 +1,278 @@ +*** Settings *** +Documentation Check Azure DevOps health by examining pipeline status, agent pools, and build logs +Metadata Author saurabh3460 +Metadata Display Name Azure DevOps Triage +Metadata Supports Azure DevOps Pipelines Health +Force Tags Azure DevOps Pipelines Health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + + +*** Tasks *** +Check Agent pool availability in organisation `${AZURE_DEVOPS_ORG}` in resource group `${AZURE_RESOURCE_GROUP}` + [Documentation] Check the health status of Agent Pools in the specified organization + [Tags] DevOps Azure Health access:read-only + ${agent_pool}= RW.CLI.Run Bash File + ... bash_file=agent-pools.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + ${issues}= RW.CLI.Run Cli + ... cmd=cat agent_pools_issues.json + + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + + IF len(@{issue_list}) > 0 + FOR ${agent} IN @{issue_list} + RW.Core.Add Issue + ... severity=${agent['severity']} + ... expected=Agent Pool should be available in organization `${AZURE_DEVOPS_ORG}` + ... actual=Agent Pool is unhealthy in organization `${AZURE_DEVOPS_ORG}` + ... title=Azure DevOps reports an Issue for Agent Pool in organization `${AZURE_DEVOPS_ORG}` + ... reproduce_hint=${agent_pool.cmd} + ... details=${agent} + ... next_steps=Please escalate to the Azure DevOps service owner or check back later. + END + END + +Check for Failed Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Identify failed pipeline runs in the specified project + [Tags] DevOps Azure Pipelines Failures access:read-only + ${failed_pipelines}= RW.CLI.Run Bash File + ... bash_file=pipeline-logs.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${failed_pipelines.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat pipeline_logs_issues.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Pipeline should complete successfully + ... actual=Pipeline failed with errors + ... title=${issue['title']} + ... reproduce_hint=${failed_pipelines.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + ... resource_url=${issue['resource_url']} + END + END + +Check for Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Identify pipelines that are running longer than expected + [Tags] DevOps Azure Pipelines Performance access:read-only + ${long_running}= RW.CLI.Run Bash File + ... bash_file=long-running-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${long_running.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat long_running_pipelines.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Pipeline should complete within the expected time frame + ... actual=Pipeline is running longer than expected (${issue['duration']}) + ... title=${issue['title']} + ... reproduce_hint=${long_running.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + ... resource_url=${issue['resource_url']} + END + END + +Check for Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Identify pipelines that are queued for longer than expected + [Tags] DevOps Azure Pipelines Queue access:read-only + ${queued_pipelines}= RW.CLI.Run Bash File + ... bash_file=queued-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${queued_pipelines.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat queued_pipelines.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Pipeline should start execution promptly + ... actual=Pipeline has been queued for ${issue['queue_time']} + ... title=${issue['title']} + ... reproduce_hint=${queued_pipelines.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + ... resource_url=${issue['resource_url']} + END + END + +Check for Repository Policy Issues in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Verify repository branch policies against best practices + [Tags] DevOps Azure Repository Policies access:read-only + ${repo_policies}= RW.CLI.Run Bash File + ... bash_file=repo-policies.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${repo_policies.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat repo_policies_issues.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Repository should have proper branch policies configured + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${repo_policies.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + END + END + +Check for Service Connection Issues in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Verify the health of service connections used by pipelines + [Tags] DevOps Azure ServiceConnections access:read-only + ${service_connections}= RW.CLI.Run Bash File + ... bash_file=service-connections.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${service_connections.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat service_connections_issues.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Service connections should be healthy and accessible + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${service_connections.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + END + END + +*** Keywords *** +Suite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=The secret containing AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID + ... pattern=\w* + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Azure resource group. + ... pattern=\w* + ${AZURE_DEVOPS_ORG}= RW.Core.Import User Variable AZURE_DEVOPS_ORG + ... type=string + ... description=Azure DevOps organization. + ... pattern=\w* + ${AZURE_DEVOPS_PROJECT}= RW.Core.Import User Variable AZURE_DEVOPS_PROJECT + ... type=string + ... description=Azure DevOps project. + ... pattern=\w* + ${DAYS_TO_LOOK_BACK}= RW.Core.Import User Variable DAYS_TO_LOOK_BACK + ... type=integer + ... description=Number of days to look back for pipeline runs + ... default=7 + ${DURATION_THRESHOLD}= RW.Core.Import User Variable DURATION_THRESHOLD + ... type=string + ... description=Threshold for long-running pipelines (format: 60m, 2h) + ... default=60m + ${QUEUE_THRESHOLD}= RW.Core.Import User Variable QUEUE_THRESHOLD + ... type=string + ... description=Threshold for queued pipelines (format: 10m, 1h) + ... default=10m + ${SEVERITY_FAILED}= RW.Core.Import User Variable SEVERITY_FAILED + ... type=string + ... description=Severity level for failed pipeline issues (1=Low, 2=Medium, 3=High, 4=Critical) + ... default=3 + ${SEVERITY_LONG_RUNNING}= RW.Core.Import User Variable SEVERITY_LONG_RUNNING + ... type=string + ... description=Severity level for long-running pipeline issues (1=Low, 2=Medium, 3=High, 4=Critical) + ... default=2 + ${SEVERITY_QUEUED}= RW.Core.Import User Variable SEVERITY_QUEUED + ... type=string + ... description=Severity level for queued pipeline issues (1=Low, 2=Medium, 3=High, 4=Critical) + ... default=2 + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} + Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} + Set Suite Variable ${DAYS_TO_LOOK_BACK} ${DAYS_TO_LOOK_BACK} + Set Suite Variable ${DURATION_THRESHOLD} ${DURATION_THRESHOLD} + Set Suite Variable ${QUEUE_THRESHOLD} ${QUEUE_THRESHOLD} + Set Suite Variable ${SEVERITY_FAILED} ${SEVERITY_FAILED} + Set Suite Variable ${SEVERITY_LONG_RUNNING} ${SEVERITY_LONG_RUNNING} + Set Suite Variable ${SEVERITY_QUEUED} ${SEVERITY_QUEUED} + Set Suite Variable + ... ${env} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DAYS_TO_LOOK_BACK":"${DAYS_TO_LOOK_BACK}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}", "SEVERITY_FAILED":"${SEVERITY_FAILED}", "SEVERITY_LONG_RUNNING":"${SEVERITY_LONG_RUNNING}", "SEVERITY_QUEUED":"${SEVERITY_QUEUED}"} diff --git a/codebundles/azure-devops-triage/service-connections.sh b/codebundles/azure-devops-triage/service-connections.sh new file mode 100644 index 000000000..043a7de4f --- /dev/null +++ b/codebundles/azure-devops-triage/service-connections.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# This script: +# 1) Lists all service connections in the specified Azure DevOps project +# 2) Checks the status of each service connection +# 3) Validates connectivity to external services +# 4) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" + +OUTPUT_FILE="service_connections_issues.json" +issues_json='[]' + +echo "Analyzing Azure DevOps Service Connections..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of service connections +echo "Retrieving service connections in project..." +if ! connections=$(az devops service-endpoint list --output json 2>connections_err.log); then + err_msg=$(cat connections_err.log) + rm -f connections_err.log + + echo "ERROR: Could not list service connections." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Service Connections" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if you have sufficient permissions to view service connections." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f connections_err.log + +# Process each service connection +for connection in $(echo "${connections}" | jq -c '.[]'); do + conn_id=$(echo $connection | jq -r '.id') + conn_name=$(echo $connection | jq -r '.name') + conn_type=$(echo $connection | jq -r '.type') + conn_url=$(echo $connection | jq -r '.url // "N/A"') + is_ready=$(echo $connection | jq -r '.isReady // false') + created_by=$(echo $connection | jq -r '.createdBy.displayName // "Unknown"') + + echo "Processing Service Connection: $conn_name (ID: $conn_id, Type: $conn_type)" + + # Check if connection is not ready + if [[ "$is_ready" != "true" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Service Connection \`$conn_name\` is Not Ready" \ + --arg details "Connection type: $conn_type, URL: $conn_url, Created by: $created_by" \ + --arg severity "3" \ + --arg nextStep "Verify the service connection configuration and credentials. Try to refresh or recreate the connection." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + + # Check connection health by type + if [[ "$conn_type" == "azurerm" || "$conn_type" == "azure" ]]; then + # For Azure connections, try to validate + if ! validation=$(az devops service-endpoint test --id "$conn_id" --output json 2>validation_err.log); then + err_msg=$(cat validation_err.log) + rm -f validation_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to Validate Azure Service Connection \`$conn_name\`" \ + --arg details "Connection type: $conn_type, Error: $err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if the service principal credentials are valid and not expired. Verify the Azure subscription is active." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + else + # Check validation result + is_valid=$(echo "$validation" | jq -r '.isValid // false') + if [[ "$is_valid" != "true" ]]; then + error_message=$(echo "$validation" | jq -r '.errorMessage // "Unknown error"') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Invalid Azure Service Connection \`$conn_name\`" \ + --arg details "Connection type: $conn_type, Error: $error_message" \ + --arg severity "3" \ + --arg nextStep "Update the service connection with valid credentials. Check if the service principal has the required permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + elif [[ "$conn_type" == "github" || "$conn_type" == "githubenterprise" ]]; then + # For GitHub connections, we can't directly test but can check usage + if ! usage=$(az devops service-endpoint get --id "$conn_id" --output json 2>usage_err.log); then + err_msg=$(cat usage_err.log) + rm -f usage_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to Get Details for GitHub Connection \`$conn_name\`" \ + --arg details "Connection type: $conn_type, Error: $err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if the connection still exists and you have permissions to view it." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + else + # Check if authorization is using a personal access token that might expire + auth_scheme=$(echo "$usage" | jq -r '.authorization.scheme // "Unknown"') + if [[ "$auth_scheme" == "PersonalAccessToken" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "GitHub Connection \`$conn_name\` Uses Personal Access Token" \ + --arg details "Connection type: $conn_type, Authorization scheme: $auth_scheme" \ + --arg severity "2" \ + --arg nextStep "Consider using GitHub Apps or OAuth instead of PAT for better security and to avoid token expiration issues." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + elif [[ "$conn_type" == "dockerregistry" ]]; then + # For Docker registry connections, check if it's using a username/password that might expire + if ! registry_details=$(az devops service-endpoint get --id "$conn_id" --output json 2>registry_err.log); then + err_msg=$(cat registry_err.log) + rm -f registry_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to Get Details for Docker Registry Connection \`$conn_name\`" \ + --arg details "Connection type: $conn_type, Error: $err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if the connection still exists and you have permissions to view it." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + else + # Check if it's using basic authentication + auth_scheme=$(echo "$registry_details" | jq -r '.authorization.scheme // "Unknown"') + if [[ "$auth_scheme" == "UsernamePassword" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Docker Registry Connection \`$conn_name\` Uses Username/Password Authentication" \ + --arg details "Connection type: $conn_type, Authorization scheme: $auth_scheme" \ + --arg severity "2" \ + --arg nextStep "Consider using service principals or managed identities for Azure Container Registry, or access tokens with limited scope for other registries." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + fi + + # Check for unused service connections (no recent usage) + # Note: This would require additional API calls to check usage history, which is not directly available via az CLI + # This is a placeholder for that functionality +done + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps service \ No newline at end of file From 296219e50caf987ebc824bd7f929b51f92cf0da0 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Thu, 22 May 2025 15:31:12 +0530 Subject: [PATCH 02/19] minor fixes --- .../azure-devops-triage/agent-pools.sh | 3 +- .../long-running-pipelines.sh | 104 +++++++--- .../azure-devops-triage/pipeline-logs.sh | 177 ++++++++++-------- .../azure-devops-triage/queued-pipelines.sh | 106 +++++------ .../service-connections.sh | 147 +++------------ 5 files changed, 244 insertions(+), 293 deletions(-) diff --git a/codebundles/azure-devops-triage/agent-pools.sh b/codebundles/azure-devops-triage/agent-pools.sh index ca950d015..0aaa92b0a 100644 --- a/codebundles/azure-devops-triage/agent-pools.sh +++ b/codebundles/azure-devops-triage/agent-pools.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -set -x -env + # ----------------------------------------------------------------------------- # REQUIRED ENV VARS: # AZURE_DEVOPS_ORG diff --git a/codebundles/azure-devops-triage/long-running-pipelines.sh b/codebundles/azure-devops-triage/long-running-pipelines.sh index f7b0469c2..ec866cbbc 100644 --- a/codebundles/azure-devops-triage/long-running-pipelines.sh +++ b/codebundles/azure-devops-triage/long-running-pipelines.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash set -euo pipefail - +set -x # ----------------------------------------------------------------------------- # REQUIRED ENV VARS: # AZURE_DEVOPS_ORG # AZURE_DEVOPS_PROJECT # # OPTIONAL ENV VARS: -# DAYS_TO_LOOK_BACK - Number of days to look back for pipeline runs (default: 7) # DURATION_THRESHOLD - Threshold in minutes or hours (e.g., "60m" or "2h") for long-running pipelines (default: "60m") # # This script: @@ -18,8 +17,7 @@ set -euo pipefail : "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" : "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" -: "${DAYS_TO_LOOK_BACK:=7}" -: "${DURATION_THRESHOLD:=60m}" +: "${DURATION_THRESHOLD:=1m}" OUTPUT_FILE="long_running_pipelines.json" issues_json='[]' @@ -49,7 +47,6 @@ THRESHOLD_MINUTES=$(convert_to_minutes "$DURATION_THRESHOLD") echo "Analyzing Azure DevOps Pipeline Durations..." echo "Organization: $AZURE_DEVOPS_ORG" echo "Project: $AZURE_DEVOPS_PROJECT" -echo "Look Back: $DAYS_TO_LOOK_BACK days" echo "Threshold: $THRESHOLD_MINUTES minutes" # Ensure Azure CLI is logged in and DevOps extension is installed @@ -84,18 +81,27 @@ if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then fi rm -f pipelines_err.log -# Process each pipeline -for row in $(echo "${pipelines}" | jq -c '.[]'); do - pipeline_id=$(echo $row | jq -r '.id') - pipeline_name=$(echo $row | jq -r '.name') +# Save pipelines to a file to avoid subshell issues +echo "$pipelines" > pipelines.json + +# Get the number of pipelines +pipeline_count=$(jq '. | length' pipelines.json) + +# Process each pipeline using a for loop instead of pipe to while +for ((i=0; iruns_err.log); then + if ! runs=$(az pipelines runs list --pipeline-id "$pipeline_id" --output json 2>runs_err.log); then err_msg=$(cat runs_err.log) rm -f runs_err.log @@ -114,13 +120,27 @@ for row in $(echo "${pipelines}" | jq -c '.[]'); do fi rm -f runs_err.log + # Save runs to a file to avoid subshell issues + echo "$runs" > runs.json + + # Get the number of runs + run_count=$(jq '. | length' runs.json) + # Check for currently running pipelines - for run in $(echo "${runs}" | jq -c '.[] | select(.state == "inProgress")'); do - run_id=$(echo $run | jq -r '.id') - run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') - web_url=$(echo $run | jq -r '.url') - branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') - created_date=$(echo $run | jq -r '.createdDate') + for ((j=0; j/dev/null || echo 0) - if [ "$duration_seconds" = "null" ] || [ -z "$duration_seconds" ]; then + # Check if run is completed + run_state=$(echo "$run_json" | jq -r '.status') + if [[ "$run_state" != "completed" ]]; then continue fi + run_id=$(echo "$run_json" | jq -r '.id') + run_name=$(echo "$run_json" | jq -r '.name // "Run #\(.id)"') + web_url=$(echo "$run_json" | jq -r '.url') + branch=$(echo "$run_json" | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') + + # Get start and finish times + start_time=$(echo "$run_json" | jq -r '.startTime') + finish_time=$(echo "$run_json" | jq -r '.finishTime') + + # Check if both times are valid + if [ "$start_time" != "null" ] && [ -n "$start_time" ] && [ "$finish_time" != "null" ] && [ -n "$finish_time" ]; then + # Convert ISO timestamps to Unix timestamps using date + start_timestamp=$(date -d "$start_time" +%s 2>/dev/null || echo 0) + finish_timestamp=$(date -d "$finish_time" +%s 2>/dev/null || echo 0) + + # Calculate duration in seconds + if [ "$start_timestamp" -gt 0 ] && [ "$finish_timestamp" -gt 0 ]; then + duration_seconds=$((finish_timestamp - start_timestamp)) + else + duration_seconds=0 + echo " Warning: Could not parse timestamps for run $run_id" + fi + else + duration_seconds=0 + echo " Warning: Missing start or finish time for run $run_id" + fi + duration_minutes=$((duration_seconds / 60)) # Format duration for display @@ -230,8 +274,14 @@ for row in $(echo "${pipelines}" | jq -c '.[]'); do }]') fi done + + # Clean up runs file + rm -f runs.json done +# Clean up pipelines file +rm -f pipelines.json + # Write final JSON echo "$issues_json" > "$OUTPUT_FILE" -echo "Azure DevOps long-running pipeline analysis completed. Saved results to $OUTPUT_FILE" \ No newline at end of file +echo "Azure DevOps long-running pipeline analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/pipeline-logs.sh b/codebundles/azure-devops-triage/pipeline-logs.sh index 6313f5acb..d33cc14a0 100644 --- a/codebundles/azure-devops-triage/pipeline-logs.sh +++ b/codebundles/azure-devops-triage/pipeline-logs.sh @@ -1,21 +1,20 @@ #!/usr/bin/env bash -set -euo pipefail - +#set -euo pipefail +set -x # ----------------------------------------------------------------------------- # REQUIRED ENV VARS: # AZURE_DEVOPS_ORG # AZURE_DEVOPS_PROJECT # +# # This script: # 1) Lists all pipelines in the specified Azure DevOps project -# 2) Checks for failed runs within the specified time period -# 3) Retrieves logs for each failed run -# 4) Outputs results in JSON format +# 2) Retrieves logs for each failed run +# 3) Outputs results in JSON format # ----------------------------------------------------------------------------- : "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" : "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" -: "${DAYS_TO_LOOK_BACK:=7}" OUTPUT_FILE="pipeline_logs_issues.json" TEMP_LOG_FILE="pipeline_log_temp.json" @@ -24,7 +23,6 @@ issues_json='[]' echo "Analyzing Azure DevOps Pipeline Logs..." echo "Organization: $AZURE_DEVOPS_ORG" echo "Project: $AZURE_DEVOPS_PROJECT" -echo "Look Back: $DAYS_TO_LOOK_BACK days" # Ensure Azure CLI is logged in and DevOps extension is installed if ! az extension show --name azure-devops &>/dev/null; then @@ -58,18 +56,24 @@ if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then fi rm -f pipelines_err.log -# Process each pipeline -for row in $(echo "${pipelines}" | jq -c '.[]'); do - pipeline_id=$(echo $row | jq -r '.id') - pipeline_name=$(echo $row | jq -r '.name') +# Save pipelines to a file to avoid subshell issues +echo "$pipelines" > pipelines.json + +# Get the number of pipelines +pipeline_count=$(jq '. | length' pipelines.json) + +# Process each pipeline using a for loop instead of pipe to while +for ((i=0; iruns_err.log); then + if ! runs=$(az pipelines runs list --pipeline-id "$pipeline_id" --output json 2>runs_err.log); then err_msg=$(cat runs_err.log) rm -f runs_err.log @@ -88,19 +92,36 @@ for row in $(echo "${pipelines}" | jq -c '.[]'); do fi rm -f runs_err.log + # Save runs to a file to avoid subshell issues + echo "$runs" > runs.json + + # Get the number of runs + run_count=$(jq '. | length' runs.json) + # Check for failed runs - for run in $(echo "${runs}" | jq -c '.[] | select(.result == "failed")'); do - run_id=$(echo $run | jq -r '.id') - run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') - web_url=$(echo $run | jq -r '.url') - branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') + for ((j=0; jlog_content_err.log); then - err_msg=$(cat log_content_err.log) - rm -f log_content_err.log + # Get all logs for the run using the new API + if ! all_logs=$(az devops invoke --org "https://dev.azure.com/$AZURE_DEVOPS_ORG" --area pipelines --resource logs --route-parameters project="$AZURE_DEVOPS_PROJECT" pipelineId="$pipeline_id" runId="$run_id" --api-version=7.0 --output json 2>logs_err.log); then + err_msg=$(cat logs_err.log) + rm -f logs_err.log issues_json=$(echo "$issues_json" | jq \ --arg title "Failed to Get Logs for Run $run_name in Pipeline $pipeline_name" \ @@ -117,72 +138,62 @@ for row in $(echo "${pipelines}" | jq -c '.[]'); do }]') continue fi - rm -f log_content_err.log + rm -f logs_err.log + + # Save all logs to a file for processing + echo "$all_logs" > all_logs.json + + # Get log with highest line count + if ! log_info=$(jq -c '.logs[] | {id: .id, lineCount: .lineCount}' all_logs.json | sort -r -k2,2 | head -1); then + echo "Failed to find logs with line count information" + continue + fi + + # Extract log ID with highest line count + log_id=$(echo "$log_info" | jq -r '.id') + echo " Selected log ID with highest line count: $log_id" + + # Get detailed log content for the selected log + if ! log_content=$(az devops invoke --org "https://dev.azure.com/$AZURE_DEVOPS_ORG" --area build --resource logs --route-parameters project="$AZURE_DEVOPS_PROJECT" buildId="$run_id" logId="$log_id" --api-version=7.0 --output json --only-show-errors 2>log_content_err.log); then + echo " Failed to get log content for log ID $log_id, skipping..." + continue + fi # Save log content to temp file for processing echo "$log_content" > "$TEMP_LOG_FILE" - # Extract error information from logs - if [[ -s "$TEMP_LOG_FILE" ]]; then - # Extract error lines from logs - error_lines=$(jq -r '.[] | select(.line | test("error|exception|failed|Error|Exception|Failed"; "i")) | .line' "$TEMP_LOG_FILE" | head -n 50) - - if [[ -n "$error_lines" ]]; then - issues_json=$(echo "$issues_json" | jq \ - --arg title "Failed Pipeline Run: \`$pipeline_name\` (Branch: \`$branch\`)" \ - --arg details "$error_lines" \ - --arg severity "3" \ - --arg nextStep "Review pipeline configuration for \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\`. Check branch \`$branch\` for recent changes that might have caused the failure." \ - --arg resource_url "$web_url" \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber), - "resource_url": $resource_url - }]') - fi - - # Check for timeout issues - timeout_lines=$(jq -r '.[] | select(.line | test("timeout|timed out|canceled after|cancelled after"; "i")) | .line' "$TEMP_LOG_FILE") - if [[ -n "$timeout_lines" ]]; then - issues_json=$(echo "$issues_json" | jq \ - --arg title "Pipeline Timeout Detected: \`$pipeline_name\` (Branch: \`$branch\`)" \ - --arg details "$timeout_lines" \ - --arg severity "3" \ - --arg nextStep "Increase timeout settings for the pipeline or optimize the build process to complete faster." \ - --arg resource_url "$web_url" \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber), - "resource_url": $resource_url - }]') - fi - - # Check for dependency issues - dependency_lines=$(jq -r '.[] | select(.line | test("package|dependency|module|nuget|npm|pip|maven"; "i") and .line | test("failed|error|not found|missing"; "i")) | .line' "$TEMP_LOG_FILE") - if [[ -n "$dependency_lines" ]]; then - issues_json=$(echo "$issues_json" | jq \ - --arg title "Dependency Issues Detected: \`$pipeline_name\` (Branch: \`$branch\`)" \ - --arg details "$dependency_lines" \ - --arg severity "3" \ - --arg nextStep "Check package references and ensure all dependencies are available and correctly versioned." \ - --arg resource_url "$web_url" \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber), - "resource_url": $resource_url - }]') - fi - fi - rm -f "$TEMP_LOG_FILE" + # Extract all log lines and join them with newlines + log_details=$(jq -r '.value | join("\n")' "$TEMP_LOG_FILE") + + # Construct the correct log URL format + error_log_url="https://dev.azure.com/$AZURE_DEVOPS_ORG/$project_id/_apis/build/builds/$run_id/logs/$log_id" + + # Clean up temp files + rm -f "$TEMP_LOG_FILE" all_logs.json + + # Add an issue with the full log content + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed Pipeline Run: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "$log_details" \ + --arg severity "3" \ + --arg nextStep "Review pipeline configuration for \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\`. Check branch \`$branch\` for recent changes that might have caused the failure." \ + --arg resource_url "$error_log_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') done + + # Clean up runs file + rm -f runs.json done +# Clean up pipelines file +rm -f pipelines.json + # Write final JSON echo "$issues_json" > "$OUTPUT_FILE" echo "Azure DevOps pipeline log analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/queued-pipelines.sh b/codebundles/azure-devops-triage/queued-pipelines.sh index a7b45e816..26a93abda 100644 --- a/codebundles/azure-devops-triage/queued-pipelines.sh +++ b/codebundles/azure-devops-triage/queued-pipelines.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash set -euo pipefail - +set -x # ----------------------------------------------------------------------------- # REQUIRED ENV VARS: # AZURE_DEVOPS_ORG # AZURE_DEVOPS_PROJECT # # OPTIONAL ENV VARS: -# QUEUE_THRESHOLD - Threshold in minutes or hours (e.g., "10m" or "1h") for queued pipelines (default: "10m") # # This script: # 1) Lists all pipelines in the specified Azure DevOps project @@ -17,7 +16,7 @@ set -euo pipefail : "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" : "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" -: "${QUEUE_THRESHOLD:=10m}" +: "${QUEUE_THRESHOLD:=1m}" OUTPUT_FILE="queued_pipelines.json" issues_json='[]' @@ -81,10 +80,19 @@ if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then fi rm -f pipelines_err.log -# Process each pipeline -for row in $(echo "${pipelines}" | jq -c '.[]'); do - pipeline_id=$(echo $row | jq -r '.id') - pipeline_name=$(echo $row | jq -r '.name') +# Save pipelines to a file to avoid subshell issues +echo "$pipelines" > pipelines.json + +# Get the number of pipelines +pipeline_count=$(jq '. | length' pipelines.json) + +# Process each pipeline using a for loop instead of pipe to while +for ((i=0; i runs.json + + # Get the number of runs + run_count=$(jq '. | length' runs.json) + # Check for queued runs - for run in $(echo "${runs}" | jq -c '.[] | select(.state == "notStarted")'); do - run_id=$(echo $run | jq -r '.id') - run_name=$(echo $run | jq -r '.name // "Run #\(.id)"') - web_url=$(echo $run | jq -r '.url') - branch=$(echo $run | jq -r '.sourceBranch // "unknown"' | sed 's|refs/heads/||') - created_date=$(echo $run | jq -r '.createdDate') + for ((j=0; j/dev/null); then queue_reason="Could not retrieve detailed information" else + # Save run details to a file + echo "$run_details" > run_details.json + # Extract queue position if available - queue_position=$(echo "$run_details" | jq -r '.queuePosition // "Unknown"') + queue_position=$(jq -r '.queuePosition // "Unknown"' run_details.json) if [ "$queue_position" != "null" ] && [ "$queue_position" != "Unknown" ]; then queue_reason="Queue position: $queue_position" fi # Try to extract any waiting reason - waiting_reason=$(echo "$run_details" | jq -r '.reason // "Unknown"') + waiting_reason=$(jq -r '.reason // "Unknown"' run_details.json) if [ "$waiting_reason" != "null" ] && [ "$waiting_reason" != "Unknown" ]; then queue_reason="$queue_reason, Reason: $waiting_reason" fi + + # Clean up run details file + rm -f run_details.json fi issues_json=$(echo "$issues_json" | jq \ @@ -185,50 +213,14 @@ for row in $(echo "${pipelines}" | jq -c '.[]'); do }]') fi done + + # Clean up runs file + rm -f runs.json done -# Get agent pools to check for capacity issues -echo "Checking agent pools for capacity issues..." -if ! pools=$(az pipelines pool list --output json 2>/dev/null); then - echo "WARNING: Could not list agent pools to check capacity." -else - # Process each pool to check capacity - for pool in $(echo "${pools}" | jq -c '.[]'); do - pool_id=$(echo $pool | jq -r '.id') - pool_name=$(echo $pool | jq -r '.name') - - # Skip Microsoft-hosted pools as we can't manage their capacity - is_hosted=$(echo $pool | jq -r '.isHosted // false') - if [ "$is_hosted" = "true" ]; then - continue - } - - # Get agents in the pool - if ! agents=$(az pipelines agent list --pool-id "$pool_id" --output json 2>/dev/null); then - continue - fi - - # Count total and busy agents - total_agents=$(echo "$agents" | jq 'length') - busy_agents=$(echo "$agents" | jq '[.[] | select(.status == "online" and .enabled == true and .assignedRequest != null)] | length') - - # If all agents are busy, report capacity issue - if [ "$total_agents" -gt 0 ] && [ "$busy_agents" -eq "$total_agents" ]; then - issues_json=$(echo "$issues_json" | jq \ - --arg title "Agent Pool \`$pool_name\` at Full Capacity" \ - --arg details "All $total_agents agents in pool are currently busy. This may be causing pipeline queuing." \ - --arg severity "3" \ - --arg nextStep "Consider adding more agents to pool \`$pool_name\` to reduce queue times." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - fi - done -fi +# Clean up pipelines file +rm -f pipelines.json # Write final JSON echo "$issues_json" > "$OUTPUT_FILE" -echo "Azure DevOps queued pipeline analysis completed. Saved results to $OUTPUT_FILE" \ No newline at end of file +echo "Azure DevOps queued pipeline analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/service-connections.sh b/codebundles/azure-devops-triage/service-connections.sh index 043a7de4f..fc4e88c4b 100644 --- a/codebundles/azure-devops-triage/service-connections.sh +++ b/codebundles/azure-devops-triage/service-connections.sh @@ -8,9 +8,8 @@ set -euo pipefail # # This script: # 1) Lists all service connections in the specified Azure DevOps project -# 2) Checks the status of each service connection -# 3) Validates connectivity to external services -# 4) Outputs results in JSON format +# 2) Checks if each service connection is ready +# 3) Outputs results in JSON format # ----------------------------------------------------------------------------- : "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" @@ -55,14 +54,23 @@ if ! connections=$(az devops service-endpoint list --output json 2>connections_e fi rm -f connections_err.log -# Process each service connection -for connection in $(echo "${connections}" | jq -c '.[]'); do - conn_id=$(echo $connection | jq -r '.id') - conn_name=$(echo $connection | jq -r '.name') - conn_type=$(echo $connection | jq -r '.type') - conn_url=$(echo $connection | jq -r '.url // "N/A"') - is_ready=$(echo $connection | jq -r '.isReady // false') - created_by=$(echo $connection | jq -r '.createdBy.displayName // "Unknown"') +# Save connections to a file to avoid subshell issues +echo "$connections" > connections.json + +# Get the number of connections +connection_count=$(jq '. | length' connections.json) + +# Process each service connection using a for loop instead of pipe to while +for ((i=0; ivalidation_err.log); then - err_msg=$(cat validation_err.log) - rm -f validation_err.log - - issues_json=$(echo "$issues_json" | jq \ - --arg title "Failed to Validate Azure Service Connection \`$conn_name\`" \ - --arg details "Connection type: $conn_type, Error: $err_msg" \ - --arg severity "3" \ - --arg nextStep "Check if the service principal credentials are valid and not expired. Verify the Azure subscription is active." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - else - # Check validation result - is_valid=$(echo "$validation" | jq -r '.isValid // false') - if [[ "$is_valid" != "true" ]]; then - error_message=$(echo "$validation" | jq -r '.errorMessage // "Unknown error"') - - issues_json=$(echo "$issues_json" | jq \ - --arg title "Invalid Azure Service Connection \`$conn_name\`" \ - --arg details "Connection type: $conn_type, Error: $error_message" \ - --arg severity "3" \ - --arg nextStep "Update the service connection with valid credentials. Check if the service principal has the required permissions." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - fi - fi - elif [[ "$conn_type" == "github" || "$conn_type" == "githubenterprise" ]]; then - # For GitHub connections, we can't directly test but can check usage - if ! usage=$(az devops service-endpoint get --id "$conn_id" --output json 2>usage_err.log); then - err_msg=$(cat usage_err.log) - rm -f usage_err.log - - issues_json=$(echo "$issues_json" | jq \ - --arg title "Failed to Get Details for GitHub Connection \`$conn_name\`" \ - --arg details "Connection type: $conn_type, Error: $err_msg" \ - --arg severity "3" \ - --arg nextStep "Check if the connection still exists and you have permissions to view it." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - else - # Check if authorization is using a personal access token that might expire - auth_scheme=$(echo "$usage" | jq -r '.authorization.scheme // "Unknown"') - if [[ "$auth_scheme" == "PersonalAccessToken" ]]; then - issues_json=$(echo "$issues_json" | jq \ - --arg title "GitHub Connection \`$conn_name\` Uses Personal Access Token" \ - --arg details "Connection type: $conn_type, Authorization scheme: $auth_scheme" \ - --arg severity "2" \ - --arg nextStep "Consider using GitHub Apps or OAuth instead of PAT for better security and to avoid token expiration issues." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - fi - fi - elif [[ "$conn_type" == "dockerregistry" ]]; then - # For Docker registry connections, check if it's using a username/password that might expire - if ! registry_details=$(az devops service-endpoint get --id "$conn_id" --output json 2>registry_err.log); then - err_msg=$(cat registry_err.log) - rm -f registry_err.log - - issues_json=$(echo "$issues_json" | jq \ - --arg title "Failed to Get Details for Docker Registry Connection \`$conn_name\`" \ - --arg details "Connection type: $conn_type, Error: $err_msg" \ - --arg severity "3" \ - --arg nextStep "Check if the connection still exists and you have permissions to view it." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - else - # Check if it's using basic authentication - auth_scheme=$(echo "$registry_details" | jq -r '.authorization.scheme // "Unknown"') - if [[ "$auth_scheme" == "UsernamePassword" ]]; then - issues_json=$(echo "$issues_json" | jq \ - --arg title "Docker Registry Connection \`$conn_name\` Uses Username/Password Authentication" \ - --arg details "Connection type: $conn_type, Authorization scheme: $auth_scheme" \ - --arg severity "2" \ - --arg nextStep "Consider using service principals or managed identities for Azure Container Registry, or access tokens with limited scope for other registries." \ - '. += [{ - "title": $title, - "details": $details, - "next_step": $nextStep, - "severity": ($severity | tonumber) - }]') - fi - fi - fi - - # Check for unused service connections (no recent usage) - # Note: This would require additional API calls to check usage history, which is not directly available via az CLI - # This is a placeholder for that functionality done +# Clean up temporary files +rm -f connections.json + # Write final JSON echo "$issues_json" > "$OUTPUT_FILE" -echo "Azure DevOps service \ No newline at end of file +echo "Azure DevOps service connections analysis completed. Saved results to $OUTPUT_FILE" From abfe234a3bbba5e807959b8b5ced76080897658c Mon Sep 17 00:00:00 2001 From: Nbarola Date: Thu, 22 May 2025 16:24:34 +0530 Subject: [PATCH 03/19] add terraform code --- .../.test/terraform/Taskfile.yaml | 69 +++++ .../.test/terraform/backend.tf | 5 + .../.test/terraform/main.tf | 291 ++++++++++++++++++ .../.test/terraform/providers.tf | 37 +++ .../.test/terraform/variables.tf | 58 ++++ codebundles/azure-devops-triage/README.md | 106 +++++++ codebundles/azure-devops-triage/runbook.robot | 25 +- 7 files changed, 571 insertions(+), 20 deletions(-) create mode 100755 codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml create mode 100755 codebundles/azure-devops-triage/.test/terraform/backend.tf create mode 100755 codebundles/azure-devops-triage/.test/terraform/main.tf create mode 100644 codebundles/azure-devops-triage/.test/terraform/providers.tf create mode 100644 codebundles/azure-devops-triage/.test/terraform/variables.tf create mode 100755 codebundles/azure-devops-triage/README.md diff --git a/codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml b/codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml new file mode 100755 index 000000000..08e0e835d --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml @@ -0,0 +1,69 @@ +version: '3' + +env: + TERM: screen-256color + +tasks: + default: + cmds: + - task: test + + test: + desc: Run tests. + cmds: + - task: test-terraform + + clean: + desc: Clean the environment. + cmds: + - task: clean-go + - task: clean-terraform + + clean-terraform: + desc: Clean the terraform environment (remove terraform directories and files) + cmds: + - find . -type d -name .terraform -exec rm -rf {} + + - find . -type f -name .terraform.lock.hcl -delete + + format-and-init-terraform: + desc: Run Terraform fmt and init + cmds: + - | + terraform fmt + terraform init + test-terraform: + desc: Run tests for all terraform directories. + silent: true + env: + DIRECTORIES: + sh: find . -path '*/.terraform/*' -prune -o -name '*.tf' -type f -exec dirname {} \; | sort -u + cmds: + - | + BOLD=$(tput bold) + NORM=$(tput sgr0) + + CWD=$PWD + + for d in $DIRECTORIES; do + cd $d + echo "${BOLD}$PWD:${NORM}" + if ! terraform fmt -check=true -list=false -recursive=false; then + echo " ✗ terraform fmt" && exit 1 + else + echo " √ terraform fmt" + fi + + if ! terraform init -backend=false -input=false -get=true -no-color > /dev/null; then + echo " ✗ terraform init" && exit 1 + else + echo " √ terraform init" + fi + + if ! terraform validate > /dev/null; then + echo " ✗ terraform validate" && exit 1 + else + echo " √ terraform validate" + fi + + cd $CWD + done \ No newline at end of file diff --git a/codebundles/azure-devops-triage/.test/terraform/backend.tf b/codebundles/azure-devops-triage/.test/terraform/backend.tf new file mode 100755 index 000000000..3d0c056bc --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} \ No newline at end of file diff --git a/codebundles/azure-devops-triage/.test/terraform/main.tf b/codebundles/azure-devops-triage/.test/terraform/main.tf new file mode 100755 index 000000000..b1d283d69 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/main.tf @@ -0,0 +1,291 @@ +resource "azurerm_resource_group" "rg" { + name = var.resource_group + location = var.location + tags = var.tags +} + +data "azurerm_client_config" "current" {} + +# Azure DevOps Organization and Project setup +resource "azuredevops_project" "test_project" { + name = "DevOps-Triage-Test" + visibility = "private" + version_control = "Git" + work_item_template = "Agile" + description = "Project for testing Azure DevOps triage scripts" +} + +# Create a Git repository in the project with proper initialization +resource "azuredevops_git_repository" "test_repo" { + project_id = azuredevops_project.test_project.id + name = "test-pipeline-repo" + initialization { + init_type = "Clean" # This creates an initial commit and main branch + } +} + +# Create a variable group for pipeline variables +resource "azuredevops_variable_group" "test_vars" { + project_id = azuredevops_project.test_project.id + name = "Test Pipeline Variables" + description = "Variables for test pipelines" + allow_access = true + + variable { + name = "TEST_VAR" + value = "test-value" + } + + variable { + name = "RESOURCE_GROUP" + value = azurerm_resource_group.rg.name + } + + variable { + name = "AZURE_SUBSCRIPTION_ID" + value = data.azurerm_client_config.current.subscription_id + } +} + +# Create a self-hosted agent pool +resource "azuredevops_agent_pool" "test_pool" { + name = "Test-Agent-Pool" + auto_provision = false + auto_update = true +} + +# Create an agent queue for the project +resource "azuredevops_agent_queue" "test_queue" { + project_id = azuredevops_project.test_project.id + agent_pool_id = azuredevops_agent_pool.test_pool.id +} + +# Authorize the queue for use by all pipelines +resource "azuredevops_pipeline_authorization" "test_auth" { + project_id = azuredevops_project.test_project.id + resource_id = azuredevops_agent_queue.test_queue.id + type = "queue" +} + +# Output the agent pool information for manual agent setup +output "agent_pool_setup_instructions" { + value = <<-EOT + To set up a self-hosted agent: + + 1. Download the agent from: https://dev.azure.com/${var.azure_devops_org}/_settings/agentpools?poolId=${azuredevops_agent_pool.test_pool.id}&_a=agents + + 2. Or follow these steps: + a. Create a folder on your machine (e.g., mkdir ~/azagent && cd ~/azagent) + b. Download the agent: curl -O https://vstsagentpackage.azureedge.net/agent/2.214.1/vsts-agent-linux-x64-2.214.1.tar.gz + c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz + d. Configure: ./config.sh + - Server URL: https://dev.azure.com/${var.azure_devops_org} + - PAT: (your PAT) + - Agent pool: ${azuredevops_agent_pool.test_pool.name} + e. Run as a service: ./svc.sh install && ./svc.sh start + EOT +} + +# Create a service connection to Azure +resource "azuredevops_serviceendpoint_azurerm" "test_endpoint" { + project_id = azuredevops_project.test_project.id + service_endpoint_name = "Test-Azure-Connection" + description = "Managed by Terraform" + azurerm_spn_tenantid = data.azurerm_client_config.current.tenant_id + azurerm_subscription_id = data.azurerm_client_config.current.subscription_id + azurerm_subscription_name = "Test Subscription" + credentials { + serviceprincipalid = var.service_principal_id + serviceprincipalkey = var.service_principal_key + } +} + +# Create YAML files for pipelines +resource "local_file" "success_pipeline_yaml" { + content = <<-EOT + trigger: + - master + + pool: + name: ${azuredevops_agent_pool.test_pool.name} # Use self-hosted agent pool + + steps: + - script: | + echo "Running successful pipeline" + echo "This pipeline will succeed" + echo "Using resource group: $(RESOURCE_GROUP)" + echo "Agent name: $(Agent.Name)" + echo "Agent machine name: $(Agent.MachineName)" + displayName: 'Run successful script' + EOT + filename = "${path.module}/success-pipeline.yml" +} + +resource "local_file" "failing_pipeline_yaml" { + content = <<-EOT + trigger: + - master + + pool: + name: ${azuredevops_agent_pool.test_pool.name} # Use self-hosted agent pool + + steps: + - script: | + echo "Running failing pipeline" + echo "This pipeline will fail" + echo "Using resource group: $(RESOURCE_GROUP)" + echo "Agent name: $(Agent.Name)" + echo "Agent machine name: $(Agent.MachineName)" + exit 1 + displayName: 'Run failing script' + EOT + filename = "${path.module}/failing-pipeline.yml" +} + +resource "local_file" "long_running_pipeline_yaml" { + content = <<-EOT + trigger: + - master + + pool: + name: ${azuredevops_agent_pool.test_pool.name} # Use self-hosted agent pool + + steps: + - script: | + echo "Starting long-running pipeline" + echo "This pipeline will sleep for 5 minutes" # Reduced time for testing + echo "Using resource group: $(RESOURCE_GROUP)" + echo "Agent name: $(Agent.Name)" + echo "Agent machine name: $(Agent.MachineName)" + sleep 300 + echo "Long-running pipeline completed" + displayName: 'Run long script' + EOT + filename = "${path.module}/long-running-pipeline.yml" +} + +# Upload YAML files to the repository +resource "azuredevops_git_repository_file" "success_pipeline_file" { + repository_id = azuredevops_git_repository.test_repo.id + file = "success-pipeline.yml" + content = local_file.success_pipeline_yaml.content + branch = "refs/heads/master" # Use full ref format + commit_message = "Add success pipeline YAML" + overwrite_on_create = true + + depends_on = [azuredevops_git_repository.test_repo] +} + +resource "azuredevops_git_repository_file" "failing_pipeline_file" { + repository_id = azuredevops_git_repository.test_repo.id + file = "failing-pipeline.yml" + content = local_file.failing_pipeline_yaml.content + branch = "refs/heads/master" # Use full ref format + commit_message = "Add failing pipeline YAML" + overwrite_on_create = true + + depends_on = [azuredevops_git_repository.test_repo] +} + +resource "azuredevops_git_repository_file" "long_running_pipeline_file" { + repository_id = azuredevops_git_repository.test_repo.id + file = "long-running-pipeline.yml" + content = local_file.long_running_pipeline_yaml.content + branch = "refs/heads/master" # Use full ref format + commit_message = "Add long-running pipeline YAML" + overwrite_on_create = true + + depends_on = [azuredevops_git_repository.test_repo] +} + +# Create the pipelines +resource "azuredevops_build_definition" "success_pipeline" { + project_id = azuredevops_project.test_project.id + name = "Success-Pipeline" + path = "\\Test" + + ci_trigger { + use_yaml = true + } + + repository { + repo_type = "TfsGit" + repo_id = azuredevops_git_repository.test_repo.id + branch_name = "refs/heads/master" + yml_path = "success-pipeline.yml" + } + + variable_groups = [ + azuredevops_variable_group.test_vars.id + ] + + depends_on = [ + azuredevops_git_repository_file.success_pipeline_file, + azuredevops_pipeline_authorization.test_auth + ] +} + +resource "azuredevops_build_definition" "failing_pipeline" { + project_id = azuredevops_project.test_project.id + name = "Failing-Pipeline" + path = "\\Test" + + ci_trigger { + use_yaml = true + } + + repository { + repo_type = "TfsGit" + repo_id = azuredevops_git_repository.test_repo.id + branch_name = "refs/heads/master" + yml_path = "failing-pipeline.yml" + } + + variable_groups = [ + azuredevops_variable_group.test_vars.id + ] + + depends_on = [ + azuredevops_git_repository_file.failing_pipeline_file, + azuredevops_pipeline_authorization.test_auth + ] +} + +resource "azuredevops_build_definition" "long_running_pipeline" { + project_id = azuredevops_project.test_project.id + name = "Long-Running-Pipeline" + path = "\\Test" + + ci_trigger { + use_yaml = true + } + + repository { + repo_type = "TfsGit" + repo_id = azuredevops_git_repository.test_repo.id + branch_name = "refs/heads/master" + yml_path = "long-running-pipeline.yml" + } + + variable_groups = [ + azuredevops_variable_group.test_vars.id + ] + + depends_on = [ + azuredevops_git_repository_file.long_running_pipeline_file, + azuredevops_pipeline_authorization.test_auth + ] +} + +# Outputs +output "project_name" { + value = azuredevops_project.test_project.name +} + +output "project_url" { + value = "https://dev.azure.com/${var.azure_devops_org}/${azuredevops_project.test_project.name}" +} + +output "agent_pool_name" { + value = azuredevops_agent_pool.test_pool.name +} diff --git a/codebundles/azure-devops-triage/.test/terraform/providers.tf b/codebundles/azure-devops-triage/.test/terraform/providers.tf new file mode 100644 index 000000000..4c85e2705 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/providers.tf @@ -0,0 +1,37 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.0" + } + azuredevops = { + source = "microsoft/azuredevops" + version = "~> 1.8.1" + } + time = { + source = "hashicorp/time" + version = "~> 0.9.1" + } + } + required_version = ">= 1.0.0" +} + +provider "azurerm" { + features {} +} + +provider "azuredevops" { + org_service_url = var.azure_devops_org_url != null ? var.azure_devops_org_url : "https://dev.azure.com/${var.azure_devops_org}" + client_id = var.service_principal_id + tenant_id = var.tenant_id + client_secret = var.service_principal_key +} + +# provider "azapi" { +# } + +# provider "local" { +# } + +# provider "null" { +# } diff --git a/codebundles/azure-devops-triage/.test/terraform/variables.tf b/codebundles/azure-devops-triage/.test/terraform/variables.tf new file mode 100644 index 000000000..641a00c82 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/variables.tf @@ -0,0 +1,58 @@ +variable "azure_devops_org" { + description = "Azure DevOps organization name" + type = string +} + +variable "azure_devops_org_url" { + description = "Azure DevOps organization URL" + type = string + default = null +} + +variable "service_principal_id" { + description = "Service Principal ID for Azure DevOps service connection" + type = string + sensitive = true +} + +variable "service_principal_key" { + description = "Service Principal Key for Azure DevOps service connection" + type = string + sensitive = true +} + +variable "tenant_id" { + description = "Azure AD tenant ID for service principal authentication" + type = string + sensitive = true +} + +# variable "subscription_id" { +# description = "Azure subscription ID" +# type = string +# sensitive = true +# } + + + +variable "tags" { + description = "Tags to apply to resources" + type = map(string) + default = {} +} + +variable "resource_group" { + description = "Name of the Azure resource group" + type = string +} + +variable "location" { + description = "Azure region where resources will be created" + type = string +} + +variable "trigger_pipelines" { + description = "Whether to trigger the pipelines after creation" + type = bool + default = true +} diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md new file mode 100755 index 000000000..02919d0c9 --- /dev/null +++ b/codebundles/azure-devops-triage/README.md @@ -0,0 +1,106 @@ +# Azure DevOps Triage + +This codebundle runs a suite of health checks for Azure DevOps. It identifies: + +- Agent Pool Availability +- Failed Pipeline Runs +- Long-Running Pipelines +- Queued Pipelines +- Repository Policies +- Service Connection Health + +## Configuration + +The runbook requires initialization to import necessary secrets and user variables. The following variables should be set: + +- `AZURE_RESOURCE_GROUP`: The Azure resource group where DevOps resources are deployed +- `AZURE_DEVOPS_ORG`: Your Azure DevOps organization name +- `AZURE_DEVOPS_PROJECT`: Your Azure DevOps project name +- `DURATION_THRESHOLD`: Threshold for long-running pipelines (format: 60m, 2h) (default: 60m) +- `QUEUE_THRESHOLD`: Threshold for queued pipelines (format: 10m, 1h) (default: 30m) + +## Testing + +The `.test` directory contains infrastructure test code using Terraform to set up a test environment. + +### Prerequisites for Testing + +1. An existing Azure subscription +2. An existing Azure DevOps organization +3. Permissions to create resources in Azure and Azure DevOps +4. Azure CLI installed and configured +5. Terraform installed (v1.0.0+) + +### Test Environment Setup + +The test environment creates: +- A new Azure DevOps project +- A new agent pool +- Git repositories with sample pipeline definitions +- Variable groups for testing + +#### Step 1: Configure Terraform Variables + +Create a `terraform.tfvars` file in the `.test/terraform` directory: + +```hcl +azure_devops_org = "your-org-name" +azure_devops_pat = "your-personal-access-token" +azure_subscription_id = "your-subscription-id" +azure_tenant_id = "your-tenant-id" +azure_client_id = "your-client-id" +azure_client_secret = "your-client-secret" +resource_group_name = "your-resource-group" +location = "eastus" +``` + +#### Step 2: Initialize and Apply Terraform + +```bash +cd .test/terraform +terraform init +terraform apply +``` + +#### Step 3: Set Up Self-Hosted Agent (Manual Step) + +After Terraform creates the agent pool, you need to manually set up at least one self-hosted agent: + +1. In Azure DevOps, navigate to Project Settings > Agent pools > [Your Pool Name] +2. Click "New agent" +3. Follow the instructions to download and configure the agent on your machine +4. Start the agent and verify it's online + +#### Step 4: Trigger Test Pipelines (Manual Step) + +The test environment includes several pipeline definitions: +- Success Pipeline: A pipeline that completes successfully +- Failed Pipeline: A pipeline that intentionally fails +- Long-Running Pipeline: A pipeline that runs for longer than the threshold + +To trigger these pipelines: +1. Navigate to Pipelines in your Azure DevOps project +2. Select each pipeline and click "Run pipeline" + +#### Step 5: Run the Triage Runbook + +Once the test environment is set up and pipelines are running, you can execute the triage runbook to verify it correctly identifies issues. + +### Cleaning Up + +To remove the test environment: + +```bash +cd .test/terraform +terraform destroy +``` + +Note: This will not remove the Azure DevOps organization, as it was a prerequisite. + +## Notes + +- The codebundle uses the Azure CLI with the Azure DevOps extension to interact with Azure DevOps. +- Service principal authentication is used for Azure resources. +- The runbook focuses on identifying issues rather than fixing them. +- For queued pipelines, the threshold is measured from when the pipeline was created to the current time. +- For long-running pipelines, the threshold is measured from start time to finish time (or current time if still running). diff --git a/codebundles/azure-devops-triage/runbook.robot b/codebundles/azure-devops-triage/runbook.robot index 29a1aea49..b9a28fec3 100755 --- a/codebundles/azure-devops-triage/runbook.robot +++ b/codebundles/azure-devops-triage/runbook.robot @@ -79,7 +79,7 @@ Check for Failed Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation END END -Check for Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` +Check for Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` (Threshold: ${DURATION_THRESHOLD}) [Documentation] Identify pipelines that are running longer than expected [Tags] DevOps Azure Pipelines Performance access:read-only ${long_running}= RW.CLI.Run Bash File @@ -105,7 +105,7 @@ Check for Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organis FOR ${issue} IN @{issue_list} RW.Core.Add Issue ... severity=${issue['severity']} - ... expected=Pipeline should complete within the expected time frame + ... expected=Pipeline should complete within the expected time frame (${DURATION_THRESHOLD}) ... actual=Pipeline is running longer than expected (${issue['duration']}) ... title=${issue['title']} ... reproduce_hint=${long_running.cmd} @@ -115,7 +115,7 @@ Check for Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organis END END -Check for Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` +Check for Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` (Threshold: ${QUEUE_THRESHOLD}) [Documentation] Identify pipelines that are queued for longer than expected [Tags] DevOps Azure Pipelines Queue access:read-only ${queued_pipelines}= RW.CLI.Run Bash File @@ -141,7 +141,7 @@ Check for Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation FOR ${issue} IN @{issue_list} RW.Core.Add Issue ... severity=${issue['severity']} - ... expected=Pipeline should start execution promptly + ... expected=Pipeline should start execution promptly (within ${QUEUE_THRESHOLD}) ... actual=Pipeline has been queued for ${issue['queue_time']} ... title=${issue['title']} ... reproduce_hint=${queued_pipelines.cmd} @@ -252,27 +252,12 @@ Suite Initialization ... type=string ... description=Threshold for queued pipelines (format: 10m, 1h) ... default=10m - ${SEVERITY_FAILED}= RW.Core.Import User Variable SEVERITY_FAILED - ... type=string - ... description=Severity level for failed pipeline issues (1=Low, 2=Medium, 3=High, 4=Critical) - ... default=3 - ${SEVERITY_LONG_RUNNING}= RW.Core.Import User Variable SEVERITY_LONG_RUNNING - ... type=string - ... description=Severity level for long-running pipeline issues (1=Low, 2=Medium, 3=High, 4=Critical) - ... default=2 - ${SEVERITY_QUEUED}= RW.Core.Import User Variable SEVERITY_QUEUED - ... type=string - ... description=Severity level for queued pipeline issues (1=Low, 2=Medium, 3=High, 4=Critical) - ... default=2 Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} Set Suite Variable ${DAYS_TO_LOOK_BACK} ${DAYS_TO_LOOK_BACK} Set Suite Variable ${DURATION_THRESHOLD} ${DURATION_THRESHOLD} Set Suite Variable ${QUEUE_THRESHOLD} ${QUEUE_THRESHOLD} - Set Suite Variable ${SEVERITY_FAILED} ${SEVERITY_FAILED} - Set Suite Variable ${SEVERITY_LONG_RUNNING} ${SEVERITY_LONG_RUNNING} - Set Suite Variable ${SEVERITY_QUEUED} ${SEVERITY_QUEUED} Set Suite Variable ... ${env} - ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DAYS_TO_LOOK_BACK":"${DAYS_TO_LOOK_BACK}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}", "SEVERITY_FAILED":"${SEVERITY_FAILED}", "SEVERITY_LONG_RUNNING":"${SEVERITY_LONG_RUNNING}", "SEVERITY_QUEUED":"${SEVERITY_QUEUED}"} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DAYS_TO_LOOK_BACK":"${DAYS_TO_LOOK_BACK}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}"} From b8a01f84a9145e9bf9a5c881d31be1383ff8d0b2 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Thu, 22 May 2025 16:31:31 +0530 Subject: [PATCH 04/19] update README file --- codebundles/azure-devops-triage/README.md | 23 +++++++++++++++++++ codebundles/azure-devops-triage/runbook.robot | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md index 02919d0c9..8db6c04ea 100755 --- a/codebundles/azure-devops-triage/README.md +++ b/codebundles/azure-devops-triage/README.md @@ -31,6 +31,29 @@ The `.test` directory contains infrastructure test code using Terraform to set u 4. Azure CLI installed and configured 5. Terraform installed (v1.0.0+) +### Azure DevOps Organization Setup (Before Running Terraform) + +Before running Terraform, you need to configure your Azure DevOps organization with the necessary permissions: + +#### 1. Organization Settings Configuration + +1. Navigate to your Azure DevOps organization settings +2. Add the user who will be running Terraform to the organization +3. Add the service principal as user that will be used by Terraform + +#### 2. Agent Pool Permissions + +1. Go to Organization Settings > Agent Pools > Security +2. Add your user (service principal) account with Administrator permissions + +#### 3. Organization-Level Security Permissions + +1. Go to Organization Settings > Security > Permissions +2. Find your user (service principal) +3. Ensure they have "Create new projects" permission set to "Allow" + +These permissions are required for Terraform to successfully create and configure resources in your Azure DevOps organization. + ### Test Environment Setup The test environment creates: diff --git a/codebundles/azure-devops-triage/runbook.robot b/codebundles/azure-devops-triage/runbook.robot index b9a28fec3..afeee2259 100755 --- a/codebundles/azure-devops-triage/runbook.robot +++ b/codebundles/azure-devops-triage/runbook.robot @@ -251,7 +251,7 @@ Suite Initialization ${QUEUE_THRESHOLD}= RW.Core.Import User Variable QUEUE_THRESHOLD ... type=string ... description=Threshold for queued pipelines (format: 10m, 1h) - ... default=10m + ... default=30m Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} From e7149b580ca297aa32bf3cbf4fd3f42bc3c5201a Mon Sep 17 00:00:00 2001 From: Nbarola Date: Thu, 22 May 2025 16:35:45 +0530 Subject: [PATCH 05/19] update README --- .../azure-devops-triage/.test/terraform/main.tf | 2 +- codebundles/azure-devops-triage/README.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/codebundles/azure-devops-triage/.test/terraform/main.tf b/codebundles/azure-devops-triage/.test/terraform/main.tf index b1d283d69..b83af8e36 100755 --- a/codebundles/azure-devops-triage/.test/terraform/main.tf +++ b/codebundles/azure-devops-triage/.test/terraform/main.tf @@ -80,7 +80,7 @@ output "agent_pool_setup_instructions" { c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz d. Configure: ./config.sh - Server URL: https://dev.azure.com/${var.azure_devops_org} - - PAT: (your PAT) + - PAT: (your PAT) #generate PAT from the your azure devops org - Agent pool: ${azuredevops_agent_pool.test_pool.name} e. Run as a service: ./svc.sh install && ./svc.sh start EOT diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md index 8db6c04ea..6acd29350 100755 --- a/codebundles/azure-devops-triage/README.md +++ b/codebundles/azure-devops-triage/README.md @@ -94,6 +94,16 @@ After Terraform creates the agent pool, you need to manually set up at least one 3. Follow the instructions to download and configure the agent on your machine 4. Start the agent and verify it's online +Or follow these steps: + a. Create a folder on your machine (e.g., mkdir ~/azagent && cd ~/azagent) + b. Download the agent: curl -O https://vstsagentpackage.azureedge.net/agent/2.214.1/vsts-agent-linux-x64-2.214.1.tar.gz + c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz + d. Configure: ./config.sh + - Server URL: https://dev.azure.com/${var.azure_devops_org} + - PAT: (your PAT) #generate PAT from the your azure devops org + - Agent pool: ${azuredevops_agent_pool.test_pool.name} + e. Run as a service: ./svc.sh install && ./svc.sh start + #### Step 4: Trigger Test Pipelines (Manual Step) The test environment includes several pipeline definitions: From 95241cee3d8c0ba22f5ad458f715755c3881025a Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 12:18:49 +0530 Subject: [PATCH 06/19] add generation rule and templates --- .gitignore | 3 + .../generation-rules/azure-devops-triage.yaml | 22 + .../templates/azure-devops-triage-sli.yaml | 62 +++ .../templates/azure-devops-triage-slx.yaml | 42 ++ .../azure-devops-triage-taskset.yaml | 43 ++ .../azure-devops-triage/.test/README.md | 118 +++++ .../azure-devops-triage/.test/Taskfile.yaml | 409 ++++++++++++++++++ codebundles/azure-devops-triage/README.md | 12 +- 8 files changed, 705 insertions(+), 6 deletions(-) create mode 100755 codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml create mode 100755 codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml create mode 100755 codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml create mode 100755 codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml create mode 100755 codebundles/azure-devops-triage/.test/README.md create mode 100755 codebundles/azure-devops-triage/.test/Taskfile.yaml diff --git a/.gitignore b/.gitignore index 9b81cc2a6..7a64caddb 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,9 @@ build **/.terraform/ **/.terraform.lock.hcl +# Ignore terraform tfvars +**/*.tfvars + # Ignore crash logs **/crash.log diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml new file mode 100755 index 000000000..a7cae8d49 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -0,0 +1,22 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: azure + generationRules: + - resourceTypes: + - azure_devops_organization + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: az-devops-triage + qualifiers: ["resource_group"] + baseTemplateName: azure-devops-triage + levelOfDetail: basic + outputItems: + - type: slx + - type: sli + - type: runbook + templateName: azure-devops-triage-taskset.yaml diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml new file mode 100755 index 000000000..4138c9a85 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml @@ -0,0 +1,62 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: OK + displayUnitsShort: ok + locations: + - {{default_location}} + description: Checks Azure DevOps health by examining pipeline status, agent pools, repository policies, and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }} + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-devops-triage/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 600 + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{ match_resource.resource_group.name }}" + - name: AZURE_DEVOPS_ORG + value: "{{ custom.devops_org }}" + - name: AZURE_DEVOPS_PROJECT + value: "{{ custom.devops_project }}" + - name: DAYS_TO_LOOK_BACK + value: "7" + - name: DURATION_THRESHOLD + value: "60m" + - name: QUEUE_THRESHOLD + value: "10m" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} + + alerts: + warning: + operator: < + threshold: '1' + for: '20m' + ticket: + operator: < + threshold: '1' + for: '40m' + page: + operator: '==' + threshold: '0' + for: '' diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml new file mode 100755 index 000000000..121afed96 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml @@ -0,0 +1,42 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{ slx_name }} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/azure/security/10245-icon-service-Key-Vaults.svg + alias: >- + {{ match_resource.resource_group.name }} Azure DevOps Health + asMeasuredBy: Composite health score of Azure DevOps resources & activities. + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{ match_resource.resource_group.name }}" + - name: AZURE_DEVOPS_ORG + value: "{{ custom.devops_org }}" + - name: AZURE_DEVOPS_PROJECT + value: "{{ custom.devops_project }}" + - name: DAYS_TO_LOOK_BACK + value: "7" + - name: DURATION_THRESHOLD + value: "60m" + - name: QUEUE_THRESHOLD + value: "10m" + owners: + - {{ workspace.owner_email }} + statement: >- + Measure Azure DevOps health by checking agent pools, pipeline status, repository policies, + and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }}. + additionalContext: + {% include "azure-hierarchy.yaml" ignore missing %} + qualified_name: "{{ match_resource.qualified_name }}" + tags: + {% include "azure-tags.yaml" ignore missing %} + - name: cloud + value: azure + - name: service + value: devops + - name: access + value: read-only diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml new file mode 100755 index 000000000..0cad58e1d --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml @@ -0,0 +1,43 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: Check Azure DevOps health by examining pipeline status, agent pools, repository policies, and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }} + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-devops-triage/runbook.robot + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{ match_resource.resource_group.name }}" + - name: AZURE_DEVOPS_ORG + value: "{{ custom.devops_org }}" + - name: AZURE_DEVOPS_PROJECT + value: "{{ custom.devops_project }}" + - name: DAYS_TO_LOOK_BACK + value: "7" + - name: DURATION_THRESHOLD + value: "60m" + - name: QUEUE_THRESHOLD + value: "10m" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} diff --git a/codebundles/azure-devops-triage/.test/README.md b/codebundles/azure-devops-triage/.test/README.md new file mode 100755 index 000000000..b53189ac0 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/README.md @@ -0,0 +1,118 @@ +## Testing + +The `.test` directory contains infrastructure test code using Terraform to set up a test environment. + +### Prerequisites for Testing + +1. An existing Azure subscription +2. An existing Azure DevOps organization +3. Permissions to create resources in Azure and Azure DevOps +4. Azure CLI installed and configured +5. Terraform installed (v1.0.0+) + +### Azure DevOps Organization Setup (Before Running Terraform) + +Before running Terraform, you need to configure your Azure DevOps organization with the necessary permissions: + +#### 1. Organization Settings Configuration + +1. Navigate to your Azure DevOps organization settings +2. Add the user who will be running Terraform to the organization +3. Add the service principal as user that will be used by Terraform + +#### 2. Agent Pool Permissions + +1. Go to Organization Settings > Agent Pools > Security +2. Add your user (service principal) account with Administrator permissions + +#### 3. Organization-Level Security Permissions + +1. Go to Organization Settings > Security > Permissions +2. Find your user (service principal) +3. Ensure they have "Create new projects" permission set to "Allow" + +These permissions are required for Terraform to successfully create and configure resources in your Azure DevOps organization. + +### Test Environment Setup + +The test environment creates: +- A new Azure DevOps project +- A new agent pool +- Git repositories with sample pipeline definitions +- Variable groups for testing + +#### Step 1: Configure Terraform Variables + +Create a `terraform.tfvars` file in the `.test/terraform` directory: + +```hcl +azure_devops_org = "your-org-name" +azure_devops_org_url = "https://dev.azure.com/your-org-name" +service_principal_id = "your-service-principal-id" +service_principal_key = "your-service-principal-key" +tenant_id = "your-tenant-id" +resource_group = "your-resource-group" +location = "eastus" +tags = "your-tags" +``` + +#### Step 2: Initialize and Apply Terraform + +```bash +cd .test/terraform +terraform init +terraform apply +``` + +#### Step 3: Set Up Self-Hosted Agent (Manual Step) + +After Terraform creates the agent pool, you need to manually set up at least one self-hosted agent: + +1. In Azure DevOps, navigate to Project Settings > Agent pools > [Your Pool Name] +2. Click "New agent" +3. Follow the instructions to download and configure the agent on your machine +4. Start the agent and verify it's online + +Or follow these steps: + a. Create a folder on your machine (e.g., mkdir ~/azagent && cd ~/azagent) + b. Download the agent: curl -O https://vstsagentpackage.azureedge.net/agent/2.214.1/vsts-agent-linux-x64-2.214.1.tar.gz + c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz + d. Configure: ./config.sh + - Server URL: https://dev.azure.com/${var.azure_devops_org} + - PAT: (your PAT) #generate PAT from the your azure devops org + - Agent pool: ${azuredevops_agent_pool.test_pool.name} + e. Run as a service: ./svc.sh install && ./svc.sh start + +#### Step 4: Trigger Test Pipelines (Manual Step) + +The test environment includes several pipeline definitions: +- Success Pipeline: A pipeline that completes successfully +- Failed Pipeline: A pipeline that intentionally fails +- Long-Running Pipeline: A pipeline that runs for longer than the threshold + +To trigger these pipelines: +1. Navigate to Pipelines in your Azure DevOps project +2. Select each pipeline and click "Run pipeline" + +#### Step 5: Run the Triage Runbook + +Once the test environment is set up and pipelines are running, you can execute the triage runbook to verify it correctly identifies issues. + +### Cleaning Up + +To remove the test environment: + +```bash +cd .test/terraform +terraform destroy +``` + +Note: This will not remove the Azure DevOps organization, as it was a prerequisite. + +## Notes + +- The codebundle uses the Azure CLI with the Azure DevOps extension to interact with Azure DevOps. +- Service principal authentication is used for Azure resources. +- The runbook focuses on identifying issues rather than fixing them. +- For queued pipelines, the threshold is measured from when the pipeline was created to the current time. +- For long-running pipelines, the threshold is measured from start time to finish time (or current time if still running). diff --git a/codebundles/azure-devops-triage/.test/Taskfile.yaml b/codebundles/azure-devops-triage/.test/Taskfile.yaml new file mode 100755 index 000000000..a18660ee6 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/Taskfile.yaml @@ -0,0 +1,409 @@ +version: "3" + +tasks: + default: + desc: "Run/refresh config" + cmds: + - task: check-unpushed-commits + - task: generate-rwl-config + - task: run-rwl-discovery + + clean: + desc: "Run cleanup tasks" + cmds: + - task: check-and-cleanup-terraform + - task: delete-slxs + - task: clean-rwl-discovery + + build-infra: + desc: "Build test infrastructure" + cmds: + - task: build-terraform-infra + + check-unpushed-commits: + desc: Check if outstanding commits or file updates need to be pushed before testing. + vars: + # Specify the base directory relative to your Taskfile location + BASE_DIR: "../" + cmds: + - | + echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED_FILES" ]; then + echo "✗" + echo "Uncommitted changes found:" + echo "$UNCOMMITTED_FILES" + echo "Remember to commit & push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No uncommitted changes in specified directories." + echo "------------" + fi + - | + echo "Checking for unpushed commits in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + git fetch origin + UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNPUSHED_FILES" ]; then + echo "✗" + echo "Unpushed commits found:" + echo "$UNPUSHED_FILES" + echo "Remember to push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No unpushed commits in specified directories." + echo "------------" + fi + silent: true + generate-rwl-config: + desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" + env: + ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" + AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" + AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" + AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + cmds: + - | + source terraform/tf.secret + repo_url=$(git config --get remote.origin.url) + branch_name=$(git rev-parse --abbrev-ref HEAD) + codebundle=$(basename "$(dirname "$PWD")") + AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID + subscription_name=$(az account show --subscription ${AZURE_SUBSCRIPTION_ID} --query name -o tsv) + # Fetch individual cluster details from Terraform state + pushd terraform > /dev/null + resource_group=$(terraform show -json terraform.tfstate | jq -r ' + .values.root_module.resources[] | + select(.type == "azurerm_resource_group") | .values.name') + popd > /dev/null + + # Check if any of the required cluster variables are empty + if [ -z "$resource_group" ]; then + echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." + exit 1 + fi + source terraform/tf.secret + # Generate workspaceInfo.yaml with fetched resource_group details + cat < workspaceInfo.yaml + workspaceName: "$RW_WORKSPACE" + workspaceOwnerEmail: authors@runwhen.com + defaultLocation: location-01-us-west1 + defaultLOD: detailed + cloudConfig: + azure: + subscriptionId: "$ARM_SUBSCRIPTION_ID" + tenantId: "$AZ_TENANT_ID" + clientId: "$AZ_CLIENT_ID" + clientSecret: "$AZ_CLIENT_SECRET" + resourceGroupLevelOfDetails: + $resource_group: detailed + codeCollections: + - repoURL: "$repo_url" + branch: "$branch_name" + codeBundles: ["$codebundle"] + custom: + subscription_name: $subscription_name + EOF + silent: true + + run-rwl-discovery: + desc: "Run RunWhen Local Discovery on test infrastructure" + cmds: + - | + source terraform/tf.secret + CONTAINER_NAME="RunWhenLocal" + if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Stopping and removing existing container $CONTAINER_NAME..." + docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME + elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Removing existing stopped container $CONTAINER_NAME..." + docker rm $CONTAINER_NAME + else + echo "No existing container named $CONTAINER_NAME found." + fi + + echo "Cleaning up output directory..." + sudo rm -rf output || { echo "Failed to remove output directory"; exit 1; } + mkdir output && chmod 777 output || { echo "Failed to set permissions"; exit 1; } + + echo "Starting new container $CONTAINER_NAME..." + + docker run --name $CONTAINER_NAME -p 8081:8081 -v "$(pwd)":/shared -d ghcr.io/runwhen-contrib/runwhen-local:latest || { + echo "Failed to start container"; exit 1; + } + + echo "Running workspace builder script in container..." + docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { + echo "Error executing script in container"; exit 1; + } + + echo "Review generated config files under output/workspaces/" + silent: true + + validate-generation-rules: + desc: "Validate YAML files in .runwhen/generation-rules" + cmds: + - | + for cmd in curl yq ajv; do + if ! command -v $cmd &> /dev/null; then + echo "Error: $cmd is required but not installed." + exit 1 + fi + done + + temp_dir=$(mktemp -d) + curl -s -o "$temp_dir/generation-rule-schema.json" https://raw.githubusercontent.com/runwhen-contrib/runwhen-local/refs/heads/main/src/generation-rule-schema.json + + for yaml_file in ../.runwhen/generation-rules/*.yaml; do + echo "Validating $yaml_file" + json_file="$temp_dir/$(basename "${yaml_file%.*}.json")" + yq -o=json "$yaml_file" > "$json_file" + ajv validate -s "$temp_dir/generation-rule-schema.json" -d "$json_file" --spec=draft2020 --strict=false \ + && echo "$yaml_file is valid." || echo "$yaml_file is invalid." + done + + rm -rf "$temp_dir" + silent: true + + check-rwp-config: + desc: Check if env vars are set for RunWhen Platform + cmds: + - | + source terraform/tf.secret + missing_vars=() + + if [ -z "$RW_WORKSPACE" ]; then + missing_vars+=("RW_WORKSPACE") + fi + + if [ -z "$RW_API_URL" ]; then + missing_vars+=("RW_API_URL") + fi + + if [ -z "$RW_PAT" ]; then + missing_vars+=("RW_PAT") + fi + + if [ ${#missing_vars[@]} -ne 0 ]; then + echo "The following required environment variables are missing: ${missing_vars[*]}" + exit 1 + fi + silent: true + + upload-slxs: + desc: "Upload SLX files to the appropriate URL" + env: + RW_WORKSPACE: "{{.RW_WORKSPACE}}" + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/tf.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Upload aborted." + exit 1 + fi + + # Create Secrets + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" + PAYLOAD="{\"secrets\": {\"az_subscriptionId\": \"${ARM_SUBSCRIPTION_ID}\", \"az_clientId\": \"${AZ_CLIENT_ID}\", \"az_tenantId\": \"${AZ_TENANT_ID}\", \"az_clientSecret\": \"${AZ_CLIENT_SECRET}\"}}" + echo "Uploading secrets to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded secrets to $URL" + else + echo "Failed to upload secrets: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi + + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + PAYLOAD=$(jq -n --arg commitMsg "Creating new SLX $SLX_NAME" '{ commitMsg: $commitMsg, files: {} }') + for file in slx.yaml runbook.yaml sli.yaml; do + if [ -f "$dir/$file" ]; then + CONTENT=$(cat "$dir/$file") + PAYLOAD=$(echo "$PAYLOAD" | jq --arg fileContent "$CONTENT" --arg fileName "$file" '.files[$fileName] = $fileContent') + fi + done + + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Uploading SLX: $SLX_NAME to $URL" + + response=$(curl -v -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" -w "%{http_code}" -o /dev/null -s 2>&1) + + if [[ "$response" =~ 200|201 ]]; then + echo "Successfully uploaded SLX: $SLX_NAME to $URL" + else + echo "Failed to upload SLX: $SLX_NAME to $URL. Response:" + echo "$response" + fi + fi + done + silent: true + delete-slxs: + desc: "Delete SLX objects from the appropriate URL" + env: + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/tf.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Deletion aborted." + exit 1 + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Deleting SLX: $SLX_NAME from $URL" + response=$(curl -v -X DELETE "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" -w "%{http_code}" -o /dev/null -s 2>&1) + + if [[ "$response" =~ 200|204 ]]; then + echo "Successfully deleted SLX: $SLX_NAME from $URL" + else + echo "Failed to delete SLX: $SLX_NAME from $URL. Response:" + echo "$response" + fi + fi + done + silent: true + + check-terraform-infra: + desc: "Check if Terraform has any deployed infrastructure in the terraform subdirectory" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + # export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + # export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + # export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ ! -d "terraform" ]; then + echo "Terraform directory not found." + exit 1 + fi + cd terraform + + # Check if Terraform state file exists + if [ ! -f "terraform.tfstate" ]; then + echo "No Terraform state file found in the terraform directory. No infrastructure is deployed." + exit 0 + fi + + # List resources in Terraform state + resources=$(terraform state list) + + # Check if any resources are listed in the state file + if [ -n "$resources" ]; then + echo "Deployed infrastructure detected." + echo "$resources" + exit 0 + else + echo "No deployed infrastructure found in Terraform state." + exit 0 + fi + silent: true + + build-terraform-infra: + desc: "Run terraform apply" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + # export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + # export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + # export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Terraform apply aborted." + exit 1 + fi + task format-and-init-terraform + echo "Starting Terraform Build of Terraform infrastructure..." + terraform apply -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure build completed." + silent: true + + cleanup-terraform-infra: + desc: "Cleanup deployed Terraform infrastructure" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + # export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + # export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + # export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Cleanup aborted." + exit 1 + fi + + echo "Starting cleanup of Terraform infrastructure..." + terraform destroy -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure cleanup completed." + silent: true + + check-and-cleanup-terraform: + desc: "Check and clean up deployed Terraform infrastructure if it exists" + cmds: + - | + # Capture the output of check-terraform-infra + infra_output=$(task check-terraform-infra | tee /dev/tty) + + # Check if output contains indication of deployed infrastructure + if echo "$infra_output" | grep -q "Deployed infrastructure detected"; then + echo "Infrastructure detected; proceeding with cleanup." + task cleanup-terraform-infra + else + echo "No deployed infrastructure found; no cleanup required." + fi + silent: true + + clean-rwl-discovery: + desc: "Check and clean up RunWhen Local discovery output" + cmds: + - | + sudo rm -rf output + rm workspaceInfo.yaml + silent: true diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md index 6acd29350..c326c88b5 100755 --- a/codebundles/azure-devops-triage/README.md +++ b/codebundles/azure-devops-triage/README.md @@ -68,13 +68,13 @@ Create a `terraform.tfvars` file in the `.test/terraform` directory: ```hcl azure_devops_org = "your-org-name" -azure_devops_pat = "your-personal-access-token" -azure_subscription_id = "your-subscription-id" -azure_tenant_id = "your-tenant-id" -azure_client_id = "your-client-id" -azure_client_secret = "your-client-secret" -resource_group_name = "your-resource-group" +azure_devops_org_url = "https://dev.azure.com/your-org-name" +service_principal_id = "your-service-principal-id" +service_principal_key = "your-service-principal-key" +tenant_id = "your-tenant-id" +resource_group = "your-resource-group" location = "eastus" +tags = "your-tags" ``` #### Step 2: Initialize and Apply Terraform From 1c0f17a51285b3cfabb184ee2d40589eeb7d9d7c Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 12:24:12 +0530 Subject: [PATCH 07/19] update README.md --- codebundles/azure-devops-triage/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md index c326c88b5..dfdc95852 100755 --- a/codebundles/azure-devops-triage/README.md +++ b/codebundles/azure-devops-triage/README.md @@ -31,6 +31,13 @@ The `.test` directory contains infrastructure test code using Terraform to set u 4. Azure CLI installed and configured 5. Terraform installed (v1.0.0+) +## tf.secret file + +export RW_PAT="" +export RW_WORKSPACE="ifc-sre-stack" +export RW_API_URL="papi.beta.runwhen.com" + + ### Azure DevOps Organization Setup (Before Running Terraform) Before running Terraform, you need to configure your Azure DevOps organization with the necessary permissions: From 6c5f6bdd491bad1be244be29db9e22e785001b41 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 12:41:00 +0530 Subject: [PATCH 08/19] update templates --- .../templates/azure-devops-triage-sli.yaml | 6 -- .../templates/azure-devops-triage-slx.yaml | 6 -- .../azure-devops-triage-taskset.yaml | 6 -- .../azure-devops-triage/.test/Taskfile.yaml | 63 ++++++++++++++++--- codebundles/azure-devops-triage/runbook.robot | 7 +-- 5 files changed, 54 insertions(+), 34 deletions(-) diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml index 4138c9a85..0866e0887 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml @@ -33,12 +33,6 @@ spec: value: "{{ custom.devops_org }}" - name: AZURE_DEVOPS_PROJECT value: "{{ custom.devops_project }}" - - name: DAYS_TO_LOOK_BACK - value: "7" - - name: DURATION_THRESHOLD - value: "60m" - - name: QUEUE_THRESHOLD - value: "10m" secretsProvided: {% if wb_version %} {% include "azure-auth.yaml" ignore missing %} diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml index 121afed96..b59f3f57a 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml @@ -18,12 +18,6 @@ spec: value: "{{ custom.devops_org }}" - name: AZURE_DEVOPS_PROJECT value: "{{ custom.devops_project }}" - - name: DAYS_TO_LOOK_BACK - value: "7" - - name: DURATION_THRESHOLD - value: "60m" - - name: QUEUE_THRESHOLD - value: "10m" owners: - {{ workspace.owner_email }} statement: >- diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml index 0cad58e1d..fde52854a 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml @@ -28,12 +28,6 @@ spec: value: "{{ custom.devops_org }}" - name: AZURE_DEVOPS_PROJECT value: "{{ custom.devops_project }}" - - name: DAYS_TO_LOOK_BACK - value: "7" - - name: DURATION_THRESHOLD - value: "60m" - - name: QUEUE_THRESHOLD - value: "10m" secretsProvided: {% if wb_version %} {% include "azure-auth.yaml" ignore missing %} diff --git a/codebundles/azure-devops-triage/.test/Taskfile.yaml b/codebundles/azure-devops-triage/.test/Taskfile.yaml index a18660ee6..796cfeb88 100755 --- a/codebundles/azure-devops-triage/.test/Taskfile.yaml +++ b/codebundles/azure-devops-triage/.test/Taskfile.yaml @@ -60,34 +60,56 @@ tasks: silent: true generate-rwl-config: desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" - env: - ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" - AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" - AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" - AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" - RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + # env: + # ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" + # AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" + # AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" + # AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" + # RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' cmds: - | source terraform/tf.secret repo_url=$(git config --get remote.origin.url) branch_name=$(git rev-parse --abbrev-ref HEAD) codebundle=$(basename "$(dirname "$PWD")") - AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID - subscription_name=$(az account show --subscription ${AZURE_SUBSCRIPTION_ID} --query name -o tsv) + #AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID + #subscription_name=$(az account show --subscription ${AZURE_SUBSCRIPTION_ID} --query name -o tsv) # Fetch individual cluster details from Terraform state pushd terraform > /dev/null resource_group=$(terraform show -json terraform.tfstate | jq -r ' .values.root_module.resources[] | select(.type == "azurerm_resource_group") | .values.name') + + # Get Azure DevOps organization and project from Terraform state + devops_org=$(terraform show -json terraform.tfstate | jq -r ' + .values.root_module.resources[] | + select(.type == "azuredevops_project") | + .values.name' | head -n 1 | xargs -I{} echo "azure-devops-test-org") + + devops_project=$(terraform show -json terraform.tfstate | jq -r ' + .values.root_module.resources[] | + select(.type == "azuredevops_project") | + .values.name' | head -n 1) popd > /dev/null - # Check if any of the required cluster variables are empty + # Check if any of the required variables are empty if [ -z "$resource_group" ]; then echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." exit 1 fi + + if [ -z "$devops_org" ]; then + echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." + exit 1 + fi + + if [ -z "$devops_project" ]; then + echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." + exit 1 + fi + source terraform/tf.secret - # Generate workspaceInfo.yaml with fetched resource_group details + # Generate workspaceInfo.yaml with fetched details cat < workspaceInfo.yaml workspaceName: "$RW_WORKSPACE" workspaceOwnerEmail: authors@runwhen.com @@ -107,6 +129,10 @@ tasks: codeBundles: ["$codebundle"] custom: subscription_name: $subscription_name + devops_org: $devops_org + devops_project: $devops_project + duration_threshold: "60m" + queue_threshold: "10m" EOF silent: true @@ -354,6 +380,23 @@ tasks: exit 1 } echo "Terraform infrastructure build completed." + + # Extract and display Azure DevOps project details + echo "Azure DevOps Project Details:" + project_name=$(terraform output -json | jq -r '.project_name.value') + org_service_url=$(terraform output -json | jq -r '.org_service_url.value') + echo "Organization URL: $org_service_url" + echo "Project Name: $project_name" + + # Extract and display agent pool details + echo "Agent Pool Details:" + agent_pool_name=$(terraform output -json | jq -r '.agent_pool_name.value') + echo "Agent Pool Name: $agent_pool_name" + + echo "Next Steps:" + echo "1. Set up a self-hosted agent in the '$agent_pool_name' pool" + echo "2. Configure the agent according to the README instructions" + echo "3. Run the test pipelines to generate data for the triage codebundle" silent: true cleanup-terraform-infra: diff --git a/codebundles/azure-devops-triage/runbook.robot b/codebundles/azure-devops-triage/runbook.robot index afeee2259..c413bdb34 100755 --- a/codebundles/azure-devops-triage/runbook.robot +++ b/codebundles/azure-devops-triage/runbook.robot @@ -240,10 +240,6 @@ Suite Initialization ... type=string ... description=Azure DevOps project. ... pattern=\w* - ${DAYS_TO_LOOK_BACK}= RW.Core.Import User Variable DAYS_TO_LOOK_BACK - ... type=integer - ... description=Number of days to look back for pipeline runs - ... default=7 ${DURATION_THRESHOLD}= RW.Core.Import User Variable DURATION_THRESHOLD ... type=string ... description=Threshold for long-running pipelines (format: 60m, 2h) @@ -255,9 +251,8 @@ Suite Initialization Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} - Set Suite Variable ${DAYS_TO_LOOK_BACK} ${DAYS_TO_LOOK_BACK} Set Suite Variable ${DURATION_THRESHOLD} ${DURATION_THRESHOLD} Set Suite Variable ${QUEUE_THRESHOLD} ${QUEUE_THRESHOLD} Set Suite Variable ... ${env} - ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DAYS_TO_LOOK_BACK":"${DAYS_TO_LOOK_BACK}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}"} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}"} From 9907943e03345156ed6f96ca97c55a548afa04cb Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 12:51:09 +0530 Subject: [PATCH 09/19] update Taskfile --- .../azure-devops-triage/.test/Taskfile.yaml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/codebundles/azure-devops-triage/.test/Taskfile.yaml b/codebundles/azure-devops-triage/.test/Taskfile.yaml index 796cfeb88..9b23991f9 100755 --- a/codebundles/azure-devops-triage/.test/Taskfile.yaml +++ b/codebundles/azure-devops-triage/.test/Taskfile.yaml @@ -60,12 +60,12 @@ tasks: silent: true generate-rwl-config: desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" - # env: - # ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" - # AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" - # AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" - # AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" - # RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + env: + ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" + AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" + AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" + AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' cmds: - | source terraform/tf.secret @@ -128,11 +128,8 @@ tasks: branch: "$branch_name" codeBundles: ["$codebundle"] custom: - subscription_name: $subscription_name devops_org: $devops_org devops_project: $devops_project - duration_threshold: "60m" - queue_threshold: "10m" EOF silent: true From f8e225d2613ec3e4219b8da58dbd032e0e68db03 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 13:00:11 +0530 Subject: [PATCH 10/19] update generation rule --- .../.runwhen/generation-rules/azure-devops-triage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml index a7cae8d49..c15e6e549 100755 --- a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -4,7 +4,7 @@ spec: platform: azure generationRules: - resourceTypes: - - azure_devops_organization + - azure_resources_resource_groups matchRules: - type: pattern pattern: ".+" From b0d559133aeb153b29be502340576acb76ff2067 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 13:26:30 +0530 Subject: [PATCH 11/19] temp change --- .../.runwhen/templates/azure-devops-triage-slx.yaml | 6 +++--- codebundles/azure-devops-triage/.test/Taskfile.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml index b59f3f57a..9f46a0411 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml @@ -23,9 +23,9 @@ spec: statement: >- Measure Azure DevOps health by checking agent pools, pipeline status, repository policies, and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }}. - additionalContext: - {% include "azure-hierarchy.yaml" ignore missing %} - qualified_name: "{{ match_resource.qualified_name }}" + # additionalContext: + # {% include "azure-hierarchy.yaml" ignore missing %} + # qualified_name: "{{ match_resource.qualified_name }}" tags: {% include "azure-tags.yaml" ignore missing %} - name: cloud diff --git a/codebundles/azure-devops-triage/.test/Taskfile.yaml b/codebundles/azure-devops-triage/.test/Taskfile.yaml index 9b23991f9..e5bf4bfaa 100755 --- a/codebundles/azure-devops-triage/.test/Taskfile.yaml +++ b/codebundles/azure-devops-triage/.test/Taskfile.yaml @@ -72,7 +72,7 @@ tasks: repo_url=$(git config --get remote.origin.url) branch_name=$(git rev-parse --abbrev-ref HEAD) codebundle=$(basename "$(dirname "$PWD")") - #AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID + AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID #subscription_name=$(az account show --subscription ${AZURE_SUBSCRIPTION_ID} --query name -o tsv) # Fetch individual cluster details from Terraform state pushd terraform > /dev/null From 83a5368f176876ce8931feb3d5253948c9299ea6 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 20:04:03 +0530 Subject: [PATCH 12/19] update README --- codebundles/azure-devops-triage/README.md | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md index dfdc95852..501ab3e49 100755 --- a/codebundles/azure-devops-triage/README.md +++ b/codebundles/azure-devops-triage/README.md @@ -31,33 +31,26 @@ The `.test` directory contains infrastructure test code using Terraform to set u 4. Azure CLI installed and configured 5. Terraform installed (v1.0.0+) -## tf.secret file - -export RW_PAT="" -export RW_WORKSPACE="ifc-sre-stack" -export RW_API_URL="papi.beta.runwhen.com" - - ### Azure DevOps Organization Setup (Before Running Terraform) Before running Terraform, you need to configure your Azure DevOps organization with the necessary permissions: #### 1. Organization Settings Configuration -1. Navigate to your Azure DevOps organization settings -2. Add the user who will be running Terraform to the organization -3. Add the service principal as user that will be used by Terraform +1. Navigate to your Azure DevOps organization settings (To Add the user who will be running Terraform to the organization) +2. Navigate to Users and Add the service principal as user with Basic Access level. #### 2. Agent Pool Permissions 1. Go to Organization Settings > Agent Pools > Security 2. Add your user (service principal) account with Administrator permissions +3. Click on Save. #### 3. Organization-Level Security Permissions 1. Go to Organization Settings > Security > Permissions -2. Find your user (service principal) -3. Ensure they have "Create new projects" permission set to "Allow" +2. Navigate to Users and Find your user (service principal) +3. Click on the user and Ensure they have "Create new projects" permission set to "Allow" These permissions are required for Terraform to successfully create and configure resources in your Azure DevOps organization. @@ -76,9 +69,6 @@ Create a `terraform.tfvars` file in the `.test/terraform` directory: ```hcl azure_devops_org = "your-org-name" azure_devops_org_url = "https://dev.azure.com/your-org-name" -service_principal_id = "your-service-principal-id" -service_principal_key = "your-service-principal-key" -tenant_id = "your-tenant-id" resource_group = "your-resource-group" location = "eastus" tags = "your-tags" From 83320acf5f72fef702720f199cf1f0e84dfe155f Mon Sep 17 00:00:00 2001 From: Nbarola Date: Tue, 27 May 2025 21:06:56 +0530 Subject: [PATCH 13/19] update generation rule --- .../.runwhen/generation-rules/azure-devops-triage.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml index c15e6e549..89a41ddeb 100755 --- a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -4,7 +4,7 @@ spec: platform: azure generationRules: - resourceTypes: - - azure_resources_resource_groups + - azure_devops_pipeline_template_definitions matchRules: - type: pattern pattern: ".+" @@ -12,7 +12,7 @@ spec: mode: substring slxs: - baseName: az-devops-triage - qualifiers: ["resource_group"] + qualifiers: ["*"] baseTemplateName: azure-devops-triage levelOfDetail: basic outputItems: From e0997253c43c57399c6b5c432b40c041209b3ced Mon Sep 17 00:00:00 2001 From: Nbarola Date: Wed, 28 May 2025 15:29:29 +0530 Subject: [PATCH 14/19] WIP generetaion rule --- .../.runwhen/generation-rules/azure-devops-triage.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml index 89a41ddeb..c15e6e549 100755 --- a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -4,7 +4,7 @@ spec: platform: azure generationRules: - resourceTypes: - - azure_devops_pipeline_template_definitions + - azure_resources_resource_groups matchRules: - type: pattern pattern: ".+" @@ -12,7 +12,7 @@ spec: mode: substring slxs: - baseName: az-devops-triage - qualifiers: ["*"] + qualifiers: ["resource_group"] baseTemplateName: azure-devops-triage levelOfDetail: basic outputItems: From 39863ef336e042d9ccb785d56d9ba153b768ac5b Mon Sep 17 00:00:00 2001 From: Nbarola Date: Wed, 28 May 2025 16:55:56 +0530 Subject: [PATCH 15/19] WIP template --- .../.runwhen/templates/azure-devops-triage-slx.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml index 9f46a0411..0979f5666 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml @@ -23,11 +23,12 @@ spec: statement: >- Measure Azure DevOps health by checking agent pools, pipeline status, repository policies, and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }}. - # additionalContext: + additionalContext: + name: "{{ match_resource.resource.name }}" # {% include "azure-hierarchy.yaml" ignore missing %} # qualified_name: "{{ match_resource.qualified_name }}" tags: - {% include "azure-tags.yaml" ignore missing %} + #{% include "azure-tags.yaml" ignore missing %} - name: cloud value: azure - name: service From c12e4f8934dc44736bd7ab4e340018b2172857d1 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Wed, 28 May 2025 17:29:15 +0530 Subject: [PATCH 16/19] WIP generation rule --- .../.runwhen/generation-rules/azure-devops-triage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml index c15e6e549..f9480082a 100755 --- a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -16,7 +16,7 @@ spec: baseTemplateName: azure-devops-triage levelOfDetail: basic outputItems: - - type: slx + #- type: slx - type: sli - type: runbook templateName: azure-devops-triage-taskset.yaml From 5efe59ad36e405656e0da543f7e0edd048784456 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Thu, 29 May 2025 11:36:23 +0530 Subject: [PATCH 17/19] WIP --- .gitignore | 3 - .../generation-rules/azure-devops-triage.yaml | 4 +- .../templates/azure-devops-triage-sli.yaml | 2 +- .../azure-devops-triage/.test/README.md | 13 ++- .../azure-devops-triage/.test/Taskfile.yaml | 86 +++++++++++++------ .../.test/terraform/main.tf | 4 +- .../.test/terraform/providers.tf | 4 +- .../.test/terraform/variables.tf | 15 +++- 8 files changed, 86 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index 7a64caddb..9b81cc2a6 100644 --- a/.gitignore +++ b/.gitignore @@ -24,9 +24,6 @@ build **/.terraform/ **/.terraform.lock.hcl -# Ignore terraform tfvars -**/*.tfvars - # Ignore crash logs **/crash.log diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml index f9480082a..cac21339d 100755 --- a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -16,7 +16,7 @@ spec: baseTemplateName: azure-devops-triage levelOfDetail: basic outputItems: - #- type: slx - - type: sli + - type: slx + #- type: sli - type: runbook templateName: azure-devops-triage-taskset.yaml diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml index 0866e0887..cdad62583 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml @@ -28,7 +28,7 @@ spec: intervalSeconds: 600 configProvided: - name: AZURE_RESOURCE_GROUP - value: "{{ match_resource.resource_group.name }}" + value: "{{ match_resource.resource.name }}" - name: AZURE_DEVOPS_ORG value: "{{ custom.devops_org }}" - name: AZURE_DEVOPS_PROJECT diff --git a/codebundles/azure-devops-triage/.test/README.md b/codebundles/azure-devops-triage/.test/README.md index b53189ac0..b2d3189da 100755 --- a/codebundles/azure-devops-triage/.test/README.md +++ b/codebundles/azure-devops-triage/.test/README.md @@ -16,20 +16,20 @@ Before running Terraform, you need to configure your Azure DevOps organization w #### 1. Organization Settings Configuration -1. Navigate to your Azure DevOps organization settings -2. Add the user who will be running Terraform to the organization -3. Add the service principal as user that will be used by Terraform +1. Navigate to your Azure DevOps organization settings (To Add the user who will be running Terraform to the organization) +2. Navigate to Users and Add the service principal as user with Basic Access level. #### 2. Agent Pool Permissions 1. Go to Organization Settings > Agent Pools > Security 2. Add your user (service principal) account with Administrator permissions +3. Click on Save. #### 3. Organization-Level Security Permissions 1. Go to Organization Settings > Security > Permissions -2. Find your user (service principal) -3. Ensure they have "Create new projects" permission set to "Allow" +2. Navigate to Users and Find your user (service principal) +3. Click on the user and Ensure they have "Create new projects" permission set to "Allow" These permissions are required for Terraform to successfully create and configure resources in your Azure DevOps organization. @@ -48,9 +48,6 @@ Create a `terraform.tfvars` file in the `.test/terraform` directory: ```hcl azure_devops_org = "your-org-name" azure_devops_org_url = "https://dev.azure.com/your-org-name" -service_principal_id = "your-service-principal-id" -service_principal_key = "your-service-principal-key" -tenant_id = "your-tenant-id" resource_group = "your-resource-group" location = "eastus" tags = "your-tags" diff --git a/codebundles/azure-devops-triage/.test/Taskfile.yaml b/codebundles/azure-devops-triage/.test/Taskfile.yaml index e5bf4bfaa..5a0731f56 100755 --- a/codebundles/azure-devops-triage/.test/Taskfile.yaml +++ b/codebundles/azure-devops-triage/.test/Taskfile.yaml @@ -72,44 +72,78 @@ tasks: repo_url=$(git config --get remote.origin.url) branch_name=$(git rev-parse --abbrev-ref HEAD) codebundle=$(basename "$(dirname "$PWD")") - AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID - #subscription_name=$(az account show --subscription ${AZURE_SUBSCRIPTION_ID} --query name -o tsv) - # Fetch individual cluster details from Terraform state + + # Check if terraform state exists + if [ ! -f "terraform/terraform.tfstate" ]; then + echo "❌ ERROR: Terraform state file not found." + echo "Required infrastructure is missing. Please run 'task build-infra' first." + exit 1 + fi + + # Check if terraform state is valid JSON + if ! jq empty terraform/terraform.tfstate 2>/dev/null; then + echo "❌ ERROR: Terraform state file is not valid JSON." + echo "Please run 'task build-infra' to create proper infrastructure." + exit 1 + fi + + # Check if required resources exist in terraform state pushd terraform > /dev/null - resource_group=$(terraform show -json terraform.tfstate | jq -r ' - .values.root_module.resources[] | - select(.type == "azurerm_resource_group") | .values.name') - # Get Azure DevOps organization and project from Terraform state - devops_org=$(terraform show -json terraform.tfstate | jq -r ' - .values.root_module.resources[] | - select(.type == "azuredevops_project") | - .values.name' | head -n 1 | xargs -I{} echo "azure-devops-test-org") + # Check for resource group + if ! jq '.resources' terraform.tfstate | grep -q "azurerm_resource_group"; then + echo "❌ ERROR: No Azure Resource Group found in Terraform state." + echo "Please run 'task build-infra' to create required infrastructure." + exit 1 + fi - devops_project=$(terraform show -json terraform.tfstate | jq -r ' - .values.root_module.resources[] | - select(.type == "azuredevops_project") | - .values.name' | head -n 1) - popd > /dev/null - - # Check if any of the required variables are empty - if [ -z "$resource_group" ]; then - echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." + # Check for Azure DevOps project + if ! jq '.resources' terraform.tfstate | grep -q "azuredevops_project"; then + echo "❌ ERROR: No Azure DevOps Project found in Terraform state." + echo "Please run 'task build-infra' to create required infrastructure." exit 1 fi - if [ -z "$devops_org" ]; then - echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." + # Extract resource values from terraform state + resource_group=$(terraform show -json terraform.tfstate | jq -r '.values.root_module.resources[] | select(.type == "azurerm_resource_group") | .values.name') + if [ -z "$resource_group" ]; then + echo "❌ ERROR: Failed to extract Resource Group name from Terraform state." + echo "Please run 'task build-infra' to recreate the infrastructure." exit 1 fi + # Extract DevOps project name + devops_project=$(terraform show -json terraform.tfstate | jq -r '.values.root_module.resources[] | select(.type == "azuredevops_project") | .values.name' | head -n 1) if [ -z "$devops_project" ]; then - echo "Error: Missing resource_group details. Ensure Terraform plan has been applied." + echo "❌ ERROR: Failed to extract Azure DevOps Project name from Terraform state." + echo "Please run 'task build-infra' to recreate the infrastructure." exit 1 fi - source terraform/tf.secret - # Generate workspaceInfo.yaml with fetched details + # For DevOps org, we need to extract from the service URL + org_service_url=$(terraform show -json terraform.tfstate | jq -r ' + .values.outputs["project_url"].value' | head -n 1) + if [ -z "$org_service_url" ]; then + echo "❌ ERROR: Failed to extract Azure DevOps Organization URL from Terraform state." + echo "Please run 'task build-infra' to recreate the infrastructure." + exit 1 + fi + + # Extract org name from URL + devops_org=$(echo "$org_service_url" | sed -n 's/.*dev\.azure\.com\/\([^\/]*\).*/\1/p') + if [ -z "$devops_org" ]; then + echo "❌ ERROR: Failed to extract Azure DevOps Organization name from URL." + echo "Please run 'task build-infra' to recreate the infrastructure." + exit 1 + fi + popd > /dev/null + + echo "Using the following values:" + echo "Resource Group: $resource_group" + echo "DevOps Organization: $devops_org" + echo "DevOps Project: $devops_project" + + # Generate workspaceInfo.yaml with fetched or default details cat < workspaceInfo.yaml workspaceName: "$RW_WORKSPACE" workspaceOwnerEmail: authors@runwhen.com @@ -131,6 +165,8 @@ tasks: devops_org: $devops_org devops_project: $devops_project EOF + + echo "Generated workspaceInfo.yaml with configuration for RunWhen Local." silent: true run-rwl-discovery: diff --git a/codebundles/azure-devops-triage/.test/terraform/main.tf b/codebundles/azure-devops-triage/.test/terraform/main.tf index b83af8e36..7103d1a60 100755 --- a/codebundles/azure-devops-triage/.test/terraform/main.tf +++ b/codebundles/azure-devops-triage/.test/terraform/main.tf @@ -95,8 +95,8 @@ resource "azuredevops_serviceendpoint_azurerm" "test_endpoint" { azurerm_subscription_id = data.azurerm_client_config.current.subscription_id azurerm_subscription_name = "Test Subscription" credentials { - serviceprincipalid = var.service_principal_id - serviceprincipalkey = var.service_principal_key + serviceprincipalid = var.client_id + serviceprincipalkey = var.client_secret } } diff --git a/codebundles/azure-devops-triage/.test/terraform/providers.tf b/codebundles/azure-devops-triage/.test/terraform/providers.tf index 4c85e2705..6c32a8037 100644 --- a/codebundles/azure-devops-triage/.test/terraform/providers.tf +++ b/codebundles/azure-devops-triage/.test/terraform/providers.tf @@ -22,9 +22,9 @@ provider "azurerm" { provider "azuredevops" { org_service_url = var.azure_devops_org_url != null ? var.azure_devops_org_url : "https://dev.azure.com/${var.azure_devops_org}" - client_id = var.service_principal_id + client_id = var.client_id tenant_id = var.tenant_id - client_secret = var.service_principal_key + client_secret = var.client_secret } # provider "azapi" { diff --git a/codebundles/azure-devops-triage/.test/terraform/variables.tf b/codebundles/azure-devops-triage/.test/terraform/variables.tf index 641a00c82..9f3817ac8 100644 --- a/codebundles/azure-devops-triage/.test/terraform/variables.tf +++ b/codebundles/azure-devops-triage/.test/terraform/variables.tf @@ -15,8 +15,19 @@ variable "service_principal_id" { sensitive = true } -variable "service_principal_key" { - description = "Service Principal Key for Azure DevOps service connection" +# variable "service_principal_key" { +# description = "Service Principal Key for Azure DevOps service connection" +# type = string +# sensitive = true +# } +variable "client_id" { + description = "Client ID for Azure DevOps service connection" + type = string + sensitive = true +} + +variable "client_secret" { + description = "Client Secret for Azure DevOps service connection" type = string sensitive = true } From 2469d6cb53c08458d86a3089fd7a984a7a1f3195 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Thu, 29 May 2025 13:33:39 +0530 Subject: [PATCH 18/19] WIP --- .../.runwhen/templates/azure-devops-triage-slx.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml index 0979f5666..3c29dd417 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml @@ -12,12 +12,8 @@ spec: {{ match_resource.resource_group.name }} Azure DevOps Health asMeasuredBy: Composite health score of Azure DevOps resources & activities. configProvided: - - name: AZURE_RESOURCE_GROUP - value: "{{ match_resource.resource_group.name }}" - - name: AZURE_DEVOPS_ORG - value: "{{ custom.devops_org }}" - - name: AZURE_DEVOPS_PROJECT - value: "{{ custom.devops_project }}" + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER owners: - {{ workspace.owner_email }} statement: >- From 9e0098e67b100f4aef6e9af44eb4ebf348e6d470 Mon Sep 17 00:00:00 2001 From: Nbarola Date: Sat, 5 Jul 2025 01:25:00 +0530 Subject: [PATCH 19/19] add terraform tfvars file --- .../generation-rules/azure-devops-triage.yaml | 2 +- .../azure-devops-triage-taskset.yaml | 2 +- .../.test/terraform/terraform.tfvars | 9 + codebundles/azure-devops-triage/runbook.robot | 2 +- codebundles/azure-devops-triage/sli.robot | 156 ++++++++++++++++++ 5 files changed, 168 insertions(+), 3 deletions(-) create mode 100755 codebundles/azure-devops-triage/.test/terraform/terraform.tfvars create mode 100755 codebundles/azure-devops-triage/sli.robot diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml index cac21339d..424dedaa8 100755 --- a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -12,7 +12,7 @@ spec: mode: substring slxs: - baseName: az-devops-triage - qualifiers: ["resource_group"] + qualifiers: ["resource"] baseTemplateName: azure-devops-triage levelOfDetail: basic outputItems: diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml index fde52854a..a6779c4c0 100755 --- a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml @@ -23,7 +23,7 @@ spec: pathToRobot: codebundles/azure-devops-triage/runbook.robot configProvided: - name: AZURE_RESOURCE_GROUP - value: "{{ match_resource.resource_group.name }}" + value: "{{ resource_group.name }}" - name: AZURE_DEVOPS_ORG value: "{{ custom.devops_org }}" - name: AZURE_DEVOPS_PROJECT diff --git a/codebundles/azure-devops-triage/.test/terraform/terraform.tfvars b/codebundles/azure-devops-triage/.test/terraform/terraform.tfvars new file mode 100755 index 000000000..871c4cdb5 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/terraform.tfvars @@ -0,0 +1,9 @@ +resource_group = "azure-devops-triage" +location = "East US" +azure_devops_org = "nishant0471" +azure_devops_org_url = "https://dev.azure.com/nishant0471" +tags = { + "env" : "test", + "lifecycle" : "deleteme", + "product" : "runwhen" +} \ No newline at end of file diff --git a/codebundles/azure-devops-triage/runbook.robot b/codebundles/azure-devops-triage/runbook.robot index c413bdb34..ac3960c13 100755 --- a/codebundles/azure-devops-triage/runbook.robot +++ b/codebundles/azure-devops-triage/runbook.robot @@ -1,6 +1,6 @@ *** Settings *** Documentation Check Azure DevOps health by examining pipeline status, agent pools, and build logs -Metadata Author saurabh3460 +Metadata Author Nbarola Metadata Display Name Azure DevOps Triage Metadata Supports Azure DevOps Pipelines Health Force Tags Azure DevOps Pipelines Health diff --git a/codebundles/azure-devops-triage/sli.robot b/codebundles/azure-devops-triage/sli.robot new file mode 100755 index 000000000..2957caf5b --- /dev/null +++ b/codebundles/azure-devops-triage/sli.robot @@ -0,0 +1,156 @@ +*** Settings *** +Documentation Counts Azure DevOps health issues by examining pipeline status, agent pools, and build logs +Metadata Author Nbarola +Metadata Display Name Azure DevOps Health +Metadata Supports Azure DevOps Pipelines Health +Force Tags Azure DevOps Pipelines Health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + +*** Tasks *** +Count Agent Pool Health Issues in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts the health issues of Agent Pools in the specified organization + [Tags] DevOps Azure Health access:read-only + ${agent_pool}= RW.CLI.Run Bash File + ... bash_file=agent-pools.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('agent_pools_issues.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${agent_pool_health_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${agent_pool_health_score} + +Count Failed Pipeline Runs in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts the number of failed pipeline runs in the specified project + [Tags] DevOps Azure Pipelines Failures access:read-only + ${failed_pipelines}= RW.CLI.Run Bash File + ... bash_file=pipeline-logs.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('pipeline_logs_issues.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${failed_pipelines_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${failed_pipelines_score} + +Count Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts pipelines that are running longer than expected + [Tags] DevOps Azure Pipelines Performance access:read-only + ${long_running}= RW.CLI.Run Bash File + ... bash_file=long-running-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('long_running_pipelines.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${long_running_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${long_running_score} + +Count Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts pipelines that are queued for longer than expected + [Tags] DevOps Azure Pipelines Queue access:read-only + ${queued_pipelines}= RW.CLI.Run Bash File + ... bash_file=queued-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('queued_pipelines.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${queued_pipelines_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${queued_pipelines_score} + +Count Repository Policy Issues in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts repository branch policy issues against best practices + [Tags] DevOps Azure Pipelines Policies access:read-only + ${repo_policy}= RW.CLI.Run Bash File + ... bash_file=repo-policies.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('repo_policy_issues.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${repo_policy_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${repo_policy_score} + +Generate Comprehensive Azure DevOps Health Score + ${devops_health_score}= Evaluate (${agent_pool_health_score} + ${failed_pipelines_score} + ${long_running_score} + ${queued_pipelines_score} + ${repo_policy_score}) / 5 + ${health_score}= Convert to Number ${devops_health_score} 2 + RW.Core.Push Metric ${health_score} + +*** Keywords *** +uite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=The secret containing AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID + ... pattern=\w* + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Azure resource group. + ... pattern=\w* + ${AZURE_DEVOPS_ORG}= RW.Core.Import User Variable AZURE_DEVOPS_ORG + ... type=string + ... description=Azure DevOps organization. + ... pattern=\w* + ${AZURE_DEVOPS_PROJECT}= RW.Core.Import User Variable AZURE_DEVOPS_PROJECT + ... type=string + ... description=Azure DevOps project. + ... pattern=\w* + ${DURATION_THRESHOLD}= RW.Core.Import User Variable DURATION_THRESHOLD + ... type=string + ... description=Threshold for long-running pipelines (format: 60m, 2h) + ... default=60m + ${QUEUE_THRESHOLD}= RW.Core.Import User Variable QUEUE_THRESHOLD + ... type=string + ... description=Threshold for queued pipelines (format: 10m, 1h) + ... default=30m + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} + Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} + Set Suite Variable ${DURATION_THRESHOLD} ${DURATION_THRESHOLD} + Set Suite Variable ${QUEUE_THRESHOLD} ${QUEUE_THRESHOLD} + Set Suite Variable + ... ${env} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}"}