diff --git a/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml new file mode 100755 index 000000000..424dedaa8 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/generation-rules/azure-devops-triage.yaml @@ -0,0 +1,22 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: azure + generationRules: + - resourceTypes: + - azure_resources_resource_groups + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: az-devops-triage + qualifiers: ["resource"] + baseTemplateName: azure-devops-triage + levelOfDetail: basic + outputItems: + - type: slx + #- type: sli + - type: runbook + templateName: azure-devops-triage-taskset.yaml diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml new file mode 100755 index 000000000..cdad62583 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-sli.yaml @@ -0,0 +1,56 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: OK + displayUnitsShort: ok + locations: + - {{default_location}} + description: Checks Azure DevOps health by examining pipeline status, agent pools, repository policies, and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }} + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-devops-triage/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 600 + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{ match_resource.resource.name }}" + - name: AZURE_DEVOPS_ORG + value: "{{ custom.devops_org }}" + - name: AZURE_DEVOPS_PROJECT + value: "{{ custom.devops_project }}" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} + + alerts: + warning: + operator: < + threshold: '1' + for: '20m' + ticket: + operator: < + threshold: '1' + for: '40m' + page: + operator: '==' + threshold: '0' + for: '' diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml new file mode 100755 index 000000000..3c29dd417 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-slx.yaml @@ -0,0 +1,33 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{ slx_name }} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/azure/security/10245-icon-service-Key-Vaults.svg + alias: >- + {{ match_resource.resource_group.name }} Azure DevOps Health + asMeasuredBy: Composite health score of Azure DevOps resources & activities. + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{ workspace.owner_email }} + statement: >- + Measure Azure DevOps health by checking agent pools, pipeline status, repository policies, + and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }}. + additionalContext: + name: "{{ match_resource.resource.name }}" + # {% include "azure-hierarchy.yaml" ignore missing %} + # qualified_name: "{{ match_resource.qualified_name }}" + tags: + #{% include "azure-tags.yaml" ignore missing %} + - name: cloud + value: azure + - name: service + value: devops + - name: access + value: read-only diff --git a/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml new file mode 100755 index 000000000..a6779c4c0 --- /dev/null +++ b/codebundles/azure-devops-triage/.runwhen/templates/azure-devops-triage-taskset.yaml @@ -0,0 +1,37 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: Check Azure DevOps health by examining pipeline status, agent pools, repository policies, and service connections in project {{ custom.devops_project }} of organization {{ custom.devops_org }} + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-devops-triage/runbook.robot + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{ resource_group.name }}" + - name: AZURE_DEVOPS_ORG + value: "{{ custom.devops_org }}" + - name: AZURE_DEVOPS_PROJECT + value: "{{ custom.devops_project }}" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} diff --git a/codebundles/azure-devops-triage/.test/README.md b/codebundles/azure-devops-triage/.test/README.md new file mode 100755 index 000000000..b2d3189da --- /dev/null +++ b/codebundles/azure-devops-triage/.test/README.md @@ -0,0 +1,115 @@ +## Testing + +The `.test` directory contains infrastructure test code using Terraform to set up a test environment. + +### Prerequisites for Testing + +1. An existing Azure subscription +2. An existing Azure DevOps organization +3. Permissions to create resources in Azure and Azure DevOps +4. Azure CLI installed and configured +5. Terraform installed (v1.0.0+) + +### Azure DevOps Organization Setup (Before Running Terraform) + +Before running Terraform, you need to configure your Azure DevOps organization with the necessary permissions: + +#### 1. Organization Settings Configuration + +1. Navigate to your Azure DevOps organization settings (To Add the user who will be running Terraform to the organization) +2. Navigate to Users and Add the service principal as user with Basic Access level. + +#### 2. Agent Pool Permissions + +1. Go to Organization Settings > Agent Pools > Security +2. Add your user (service principal) account with Administrator permissions +3. Click on Save. + +#### 3. Organization-Level Security Permissions + +1. Go to Organization Settings > Security > Permissions +2. Navigate to Users and Find your user (service principal) +3. Click on the user and Ensure they have "Create new projects" permission set to "Allow" + +These permissions are required for Terraform to successfully create and configure resources in your Azure DevOps organization. + +### Test Environment Setup + +The test environment creates: +- A new Azure DevOps project +- A new agent pool +- Git repositories with sample pipeline definitions +- Variable groups for testing + +#### Step 1: Configure Terraform Variables + +Create a `terraform.tfvars` file in the `.test/terraform` directory: + +```hcl +azure_devops_org = "your-org-name" +azure_devops_org_url = "https://dev.azure.com/your-org-name" +resource_group = "your-resource-group" +location = "eastus" +tags = "your-tags" +``` + +#### Step 2: Initialize and Apply Terraform + +```bash +cd .test/terraform +terraform init +terraform apply +``` + +#### Step 3: Set Up Self-Hosted Agent (Manual Step) + +After Terraform creates the agent pool, you need to manually set up at least one self-hosted agent: + +1. In Azure DevOps, navigate to Project Settings > Agent pools > [Your Pool Name] +2. Click "New agent" +3. Follow the instructions to download and configure the agent on your machine +4. Start the agent and verify it's online + +Or follow these steps: + a. Create a folder on your machine (e.g., mkdir ~/azagent && cd ~/azagent) + b. Download the agent: curl -O https://vstsagentpackage.azureedge.net/agent/2.214.1/vsts-agent-linux-x64-2.214.1.tar.gz + c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz + d. Configure: ./config.sh + - Server URL: https://dev.azure.com/${var.azure_devops_org} + - PAT: (your PAT) #generate PAT from the your azure devops org + - Agent pool: ${azuredevops_agent_pool.test_pool.name} + e. Run as a service: ./svc.sh install && ./svc.sh start + +#### Step 4: Trigger Test Pipelines (Manual Step) + +The test environment includes several pipeline definitions: +- Success Pipeline: A pipeline that completes successfully +- Failed Pipeline: A pipeline that intentionally fails +- Long-Running Pipeline: A pipeline that runs for longer than the threshold + +To trigger these pipelines: +1. Navigate to Pipelines in your Azure DevOps project +2. Select each pipeline and click "Run pipeline" + +#### Step 5: Run the Triage Runbook + +Once the test environment is set up and pipelines are running, you can execute the triage runbook to verify it correctly identifies issues. + +### Cleaning Up + +To remove the test environment: + +```bash +cd .test/terraform +terraform destroy +``` + +Note: This will not remove the Azure DevOps organization, as it was a prerequisite. + +## Notes + +- The codebundle uses the Azure CLI with the Azure DevOps extension to interact with Azure DevOps. +- Service principal authentication is used for Azure resources. +- The runbook focuses on identifying issues rather than fixing them. +- For queued pipelines, the threshold is measured from when the pipeline was created to the current time. +- For long-running pipelines, the threshold is measured from start time to finish time (or current time if still running). diff --git a/codebundles/azure-devops-triage/.test/Taskfile.yaml b/codebundles/azure-devops-triage/.test/Taskfile.yaml new file mode 100755 index 000000000..5a0731f56 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/Taskfile.yaml @@ -0,0 +1,485 @@ +version: "3" + +tasks: + default: + desc: "Run/refresh config" + cmds: + - task: check-unpushed-commits + - task: generate-rwl-config + - task: run-rwl-discovery + + clean: + desc: "Run cleanup tasks" + cmds: + - task: check-and-cleanup-terraform + - task: delete-slxs + - task: clean-rwl-discovery + + build-infra: + desc: "Build test infrastructure" + cmds: + - task: build-terraform-infra + + check-unpushed-commits: + desc: Check if outstanding commits or file updates need to be pushed before testing. + vars: + # Specify the base directory relative to your Taskfile location + BASE_DIR: "../" + cmds: + - | + echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED_FILES" ]; then + echo "✗" + echo "Uncommitted changes found:" + echo "$UNCOMMITTED_FILES" + echo "Remember to commit & push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No uncommitted changes in specified directories." + echo "------------" + fi + - | + echo "Checking for unpushed commits in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + git fetch origin + UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNPUSHED_FILES" ]; then + echo "✗" + echo "Unpushed commits found:" + echo "$UNPUSHED_FILES" + echo "Remember to push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No unpushed commits in specified directories." + echo "------------" + fi + silent: true + generate-rwl-config: + desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" + env: + ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" + AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" + AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" + AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + cmds: + - | + source terraform/tf.secret + repo_url=$(git config --get remote.origin.url) + branch_name=$(git rev-parse --abbrev-ref HEAD) + codebundle=$(basename "$(dirname "$PWD")") + + # Check if terraform state exists + if [ ! -f "terraform/terraform.tfstate" ]; then + echo "❌ ERROR: Terraform state file not found." + echo "Required infrastructure is missing. Please run 'task build-infra' first." + exit 1 + fi + + # Check if terraform state is valid JSON + if ! jq empty terraform/terraform.tfstate 2>/dev/null; then + echo "❌ ERROR: Terraform state file is not valid JSON." + echo "Please run 'task build-infra' to create proper infrastructure." + exit 1 + fi + + # Check if required resources exist in terraform state + pushd terraform > /dev/null + + # Check for resource group + if ! jq '.resources' terraform.tfstate | grep -q "azurerm_resource_group"; then + echo "❌ ERROR: No Azure Resource Group found in Terraform state." + echo "Please run 'task build-infra' to create required infrastructure." + exit 1 + fi + + # Check for Azure DevOps project + if ! jq '.resources' terraform.tfstate | grep -q "azuredevops_project"; then + echo "❌ ERROR: No Azure DevOps Project found in Terraform state." + echo "Please run 'task build-infra' to create required infrastructure." + exit 1 + fi + + # Extract resource values from terraform state + resource_group=$(terraform show -json terraform.tfstate | jq -r '.values.root_module.resources[] | select(.type == "azurerm_resource_group") | .values.name') + if [ -z "$resource_group" ]; then + echo "❌ ERROR: Failed to extract Resource Group name from Terraform state." + echo "Please run 'task build-infra' to recreate the infrastructure." + exit 1 + fi + + # Extract DevOps project name + devops_project=$(terraform show -json terraform.tfstate | jq -r '.values.root_module.resources[] | select(.type == "azuredevops_project") | .values.name' | head -n 1) + if [ -z "$devops_project" ]; then + echo "❌ ERROR: Failed to extract Azure DevOps Project name from Terraform state." + echo "Please run 'task build-infra' to recreate the infrastructure." + exit 1 + fi + + # For DevOps org, we need to extract from the service URL + org_service_url=$(terraform show -json terraform.tfstate | jq -r ' + .values.outputs["project_url"].value' | head -n 1) + if [ -z "$org_service_url" ]; then + echo "❌ ERROR: Failed to extract Azure DevOps Organization URL from Terraform state." + echo "Please run 'task build-infra' to recreate the infrastructure." + exit 1 + fi + + # Extract org name from URL + devops_org=$(echo "$org_service_url" | sed -n 's/.*dev\.azure\.com\/\([^\/]*\).*/\1/p') + if [ -z "$devops_org" ]; then + echo "❌ ERROR: Failed to extract Azure DevOps Organization name from URL." + echo "Please run 'task build-infra' to recreate the infrastructure." + exit 1 + fi + popd > /dev/null + + echo "Using the following values:" + echo "Resource Group: $resource_group" + echo "DevOps Organization: $devops_org" + echo "DevOps Project: $devops_project" + + # Generate workspaceInfo.yaml with fetched or default details + cat < workspaceInfo.yaml + workspaceName: "$RW_WORKSPACE" + workspaceOwnerEmail: authors@runwhen.com + defaultLocation: location-01-us-west1 + defaultLOD: detailed + cloudConfig: + azure: + subscriptionId: "$ARM_SUBSCRIPTION_ID" + tenantId: "$AZ_TENANT_ID" + clientId: "$AZ_CLIENT_ID" + clientSecret: "$AZ_CLIENT_SECRET" + resourceGroupLevelOfDetails: + $resource_group: detailed + codeCollections: + - repoURL: "$repo_url" + branch: "$branch_name" + codeBundles: ["$codebundle"] + custom: + devops_org: $devops_org + devops_project: $devops_project + EOF + + echo "Generated workspaceInfo.yaml with configuration for RunWhen Local." + silent: true + + run-rwl-discovery: + desc: "Run RunWhen Local Discovery on test infrastructure" + cmds: + - | + source terraform/tf.secret + CONTAINER_NAME="RunWhenLocal" + if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Stopping and removing existing container $CONTAINER_NAME..." + docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME + elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Removing existing stopped container $CONTAINER_NAME..." + docker rm $CONTAINER_NAME + else + echo "No existing container named $CONTAINER_NAME found." + fi + + echo "Cleaning up output directory..." + sudo rm -rf output || { echo "Failed to remove output directory"; exit 1; } + mkdir output && chmod 777 output || { echo "Failed to set permissions"; exit 1; } + + echo "Starting new container $CONTAINER_NAME..." + + docker run --name $CONTAINER_NAME -p 8081:8081 -v "$(pwd)":/shared -d ghcr.io/runwhen-contrib/runwhen-local:latest || { + echo "Failed to start container"; exit 1; + } + + echo "Running workspace builder script in container..." + docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { + echo "Error executing script in container"; exit 1; + } + + echo "Review generated config files under output/workspaces/" + silent: true + + validate-generation-rules: + desc: "Validate YAML files in .runwhen/generation-rules" + cmds: + - | + for cmd in curl yq ajv; do + if ! command -v $cmd &> /dev/null; then + echo "Error: $cmd is required but not installed." + exit 1 + fi + done + + temp_dir=$(mktemp -d) + curl -s -o "$temp_dir/generation-rule-schema.json" https://raw.githubusercontent.com/runwhen-contrib/runwhen-local/refs/heads/main/src/generation-rule-schema.json + + for yaml_file in ../.runwhen/generation-rules/*.yaml; do + echo "Validating $yaml_file" + json_file="$temp_dir/$(basename "${yaml_file%.*}.json")" + yq -o=json "$yaml_file" > "$json_file" + ajv validate -s "$temp_dir/generation-rule-schema.json" -d "$json_file" --spec=draft2020 --strict=false \ + && echo "$yaml_file is valid." || echo "$yaml_file is invalid." + done + + rm -rf "$temp_dir" + silent: true + + check-rwp-config: + desc: Check if env vars are set for RunWhen Platform + cmds: + - | + source terraform/tf.secret + missing_vars=() + + if [ -z "$RW_WORKSPACE" ]; then + missing_vars+=("RW_WORKSPACE") + fi + + if [ -z "$RW_API_URL" ]; then + missing_vars+=("RW_API_URL") + fi + + if [ -z "$RW_PAT" ]; then + missing_vars+=("RW_PAT") + fi + + if [ ${#missing_vars[@]} -ne 0 ]; then + echo "The following required environment variables are missing: ${missing_vars[*]}" + exit 1 + fi + silent: true + + upload-slxs: + desc: "Upload SLX files to the appropriate URL" + env: + RW_WORKSPACE: "{{.RW_WORKSPACE}}" + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/tf.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Upload aborted." + exit 1 + fi + + # Create Secrets + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" + PAYLOAD="{\"secrets\": {\"az_subscriptionId\": \"${ARM_SUBSCRIPTION_ID}\", \"az_clientId\": \"${AZ_CLIENT_ID}\", \"az_tenantId\": \"${AZ_TENANT_ID}\", \"az_clientSecret\": \"${AZ_CLIENT_SECRET}\"}}" + echo "Uploading secrets to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded secrets to $URL" + else + echo "Failed to upload secrets: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi + + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + PAYLOAD=$(jq -n --arg commitMsg "Creating new SLX $SLX_NAME" '{ commitMsg: $commitMsg, files: {} }') + for file in slx.yaml runbook.yaml sli.yaml; do + if [ -f "$dir/$file" ]; then + CONTENT=$(cat "$dir/$file") + PAYLOAD=$(echo "$PAYLOAD" | jq --arg fileContent "$CONTENT" --arg fileName "$file" '.files[$fileName] = $fileContent') + fi + done + + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Uploading SLX: $SLX_NAME to $URL" + + response=$(curl -v -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" -w "%{http_code}" -o /dev/null -s 2>&1) + + if [[ "$response" =~ 200|201 ]]; then + echo "Successfully uploaded SLX: $SLX_NAME to $URL" + else + echo "Failed to upload SLX: $SLX_NAME to $URL. Response:" + echo "$response" + fi + fi + done + silent: true + delete-slxs: + desc: "Delete SLX objects from the appropriate URL" + env: + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/tf.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Deletion aborted." + exit 1 + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Deleting SLX: $SLX_NAME from $URL" + response=$(curl -v -X DELETE "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" -w "%{http_code}" -o /dev/null -s 2>&1) + + if [[ "$response" =~ 200|204 ]]; then + echo "Successfully deleted SLX: $SLX_NAME from $URL" + else + echo "Failed to delete SLX: $SLX_NAME from $URL. Response:" + echo "$response" + fi + fi + done + silent: true + + check-terraform-infra: + desc: "Check if Terraform has any deployed infrastructure in the terraform subdirectory" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + # export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + # export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + # export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ ! -d "terraform" ]; then + echo "Terraform directory not found." + exit 1 + fi + cd terraform + + # Check if Terraform state file exists + if [ ! -f "terraform.tfstate" ]; then + echo "No Terraform state file found in the terraform directory. No infrastructure is deployed." + exit 0 + fi + + # List resources in Terraform state + resources=$(terraform state list) + + # Check if any resources are listed in the state file + if [ -n "$resources" ]; then + echo "Deployed infrastructure detected." + echo "$resources" + exit 0 + else + echo "No deployed infrastructure found in Terraform state." + exit 0 + fi + silent: true + + build-terraform-infra: + desc: "Run terraform apply" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + # export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + # export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + # export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Terraform apply aborted." + exit 1 + fi + task format-and-init-terraform + echo "Starting Terraform Build of Terraform infrastructure..." + terraform apply -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure build completed." + + # Extract and display Azure DevOps project details + echo "Azure DevOps Project Details:" + project_name=$(terraform output -json | jq -r '.project_name.value') + org_service_url=$(terraform output -json | jq -r '.org_service_url.value') + echo "Organization URL: $org_service_url" + echo "Project Name: $project_name" + + # Extract and display agent pool details + echo "Agent Pool Details:" + agent_pool_name=$(terraform output -json | jq -r '.agent_pool_name.value') + echo "Agent Pool Name: $agent_pool_name" + + echo "Next Steps:" + echo "1. Set up a self-hosted agent in the '$agent_pool_name' pool" + echo "2. Configure the agent according to the README instructions" + echo "3. Run the test pipelines to generate data for the triage codebundle" + silent: true + + cleanup-terraform-infra: + desc: "Cleanup deployed Terraform infrastructure" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + # export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + # export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + # export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Cleanup aborted." + exit 1 + fi + + echo "Starting cleanup of Terraform infrastructure..." + terraform destroy -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure cleanup completed." + silent: true + + check-and-cleanup-terraform: + desc: "Check and clean up deployed Terraform infrastructure if it exists" + cmds: + - | + # Capture the output of check-terraform-infra + infra_output=$(task check-terraform-infra | tee /dev/tty) + + # Check if output contains indication of deployed infrastructure + if echo "$infra_output" | grep -q "Deployed infrastructure detected"; then + echo "Infrastructure detected; proceeding with cleanup." + task cleanup-terraform-infra + else + echo "No deployed infrastructure found; no cleanup required." + fi + silent: true + + clean-rwl-discovery: + desc: "Check and clean up RunWhen Local discovery output" + cmds: + - | + sudo rm -rf output + rm workspaceInfo.yaml + silent: true diff --git a/codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml b/codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml new file mode 100755 index 000000000..08e0e835d --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/Taskfile.yaml @@ -0,0 +1,69 @@ +version: '3' + +env: + TERM: screen-256color + +tasks: + default: + cmds: + - task: test + + test: + desc: Run tests. + cmds: + - task: test-terraform + + clean: + desc: Clean the environment. + cmds: + - task: clean-go + - task: clean-terraform + + clean-terraform: + desc: Clean the terraform environment (remove terraform directories and files) + cmds: + - find . -type d -name .terraform -exec rm -rf {} + + - find . -type f -name .terraform.lock.hcl -delete + + format-and-init-terraform: + desc: Run Terraform fmt and init + cmds: + - | + terraform fmt + terraform init + test-terraform: + desc: Run tests for all terraform directories. + silent: true + env: + DIRECTORIES: + sh: find . -path '*/.terraform/*' -prune -o -name '*.tf' -type f -exec dirname {} \; | sort -u + cmds: + - | + BOLD=$(tput bold) + NORM=$(tput sgr0) + + CWD=$PWD + + for d in $DIRECTORIES; do + cd $d + echo "${BOLD}$PWD:${NORM}" + if ! terraform fmt -check=true -list=false -recursive=false; then + echo " ✗ terraform fmt" && exit 1 + else + echo " √ terraform fmt" + fi + + if ! terraform init -backend=false -input=false -get=true -no-color > /dev/null; then + echo " ✗ terraform init" && exit 1 + else + echo " √ terraform init" + fi + + if ! terraform validate > /dev/null; then + echo " ✗ terraform validate" && exit 1 + else + echo " √ terraform validate" + fi + + cd $CWD + done \ No newline at end of file diff --git a/codebundles/azure-devops-triage/.test/terraform/backend.tf b/codebundles/azure-devops-triage/.test/terraform/backend.tf new file mode 100755 index 000000000..3d0c056bc --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} \ No newline at end of file diff --git a/codebundles/azure-devops-triage/.test/terraform/main.tf b/codebundles/azure-devops-triage/.test/terraform/main.tf new file mode 100755 index 000000000..7103d1a60 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/main.tf @@ -0,0 +1,291 @@ +resource "azurerm_resource_group" "rg" { + name = var.resource_group + location = var.location + tags = var.tags +} + +data "azurerm_client_config" "current" {} + +# Azure DevOps Organization and Project setup +resource "azuredevops_project" "test_project" { + name = "DevOps-Triage-Test" + visibility = "private" + version_control = "Git" + work_item_template = "Agile" + description = "Project for testing Azure DevOps triage scripts" +} + +# Create a Git repository in the project with proper initialization +resource "azuredevops_git_repository" "test_repo" { + project_id = azuredevops_project.test_project.id + name = "test-pipeline-repo" + initialization { + init_type = "Clean" # This creates an initial commit and main branch + } +} + +# Create a variable group for pipeline variables +resource "azuredevops_variable_group" "test_vars" { + project_id = azuredevops_project.test_project.id + name = "Test Pipeline Variables" + description = "Variables for test pipelines" + allow_access = true + + variable { + name = "TEST_VAR" + value = "test-value" + } + + variable { + name = "RESOURCE_GROUP" + value = azurerm_resource_group.rg.name + } + + variable { + name = "AZURE_SUBSCRIPTION_ID" + value = data.azurerm_client_config.current.subscription_id + } +} + +# Create a self-hosted agent pool +resource "azuredevops_agent_pool" "test_pool" { + name = "Test-Agent-Pool" + auto_provision = false + auto_update = true +} + +# Create an agent queue for the project +resource "azuredevops_agent_queue" "test_queue" { + project_id = azuredevops_project.test_project.id + agent_pool_id = azuredevops_agent_pool.test_pool.id +} + +# Authorize the queue for use by all pipelines +resource "azuredevops_pipeline_authorization" "test_auth" { + project_id = azuredevops_project.test_project.id + resource_id = azuredevops_agent_queue.test_queue.id + type = "queue" +} + +# Output the agent pool information for manual agent setup +output "agent_pool_setup_instructions" { + value = <<-EOT + To set up a self-hosted agent: + + 1. Download the agent from: https://dev.azure.com/${var.azure_devops_org}/_settings/agentpools?poolId=${azuredevops_agent_pool.test_pool.id}&_a=agents + + 2. Or follow these steps: + a. Create a folder on your machine (e.g., mkdir ~/azagent && cd ~/azagent) + b. Download the agent: curl -O https://vstsagentpackage.azureedge.net/agent/2.214.1/vsts-agent-linux-x64-2.214.1.tar.gz + c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz + d. Configure: ./config.sh + - Server URL: https://dev.azure.com/${var.azure_devops_org} + - PAT: (your PAT) #generate PAT from the your azure devops org + - Agent pool: ${azuredevops_agent_pool.test_pool.name} + e. Run as a service: ./svc.sh install && ./svc.sh start + EOT +} + +# Create a service connection to Azure +resource "azuredevops_serviceendpoint_azurerm" "test_endpoint" { + project_id = azuredevops_project.test_project.id + service_endpoint_name = "Test-Azure-Connection" + description = "Managed by Terraform" + azurerm_spn_tenantid = data.azurerm_client_config.current.tenant_id + azurerm_subscription_id = data.azurerm_client_config.current.subscription_id + azurerm_subscription_name = "Test Subscription" + credentials { + serviceprincipalid = var.client_id + serviceprincipalkey = var.client_secret + } +} + +# Create YAML files for pipelines +resource "local_file" "success_pipeline_yaml" { + content = <<-EOT + trigger: + - master + + pool: + name: ${azuredevops_agent_pool.test_pool.name} # Use self-hosted agent pool + + steps: + - script: | + echo "Running successful pipeline" + echo "This pipeline will succeed" + echo "Using resource group: $(RESOURCE_GROUP)" + echo "Agent name: $(Agent.Name)" + echo "Agent machine name: $(Agent.MachineName)" + displayName: 'Run successful script' + EOT + filename = "${path.module}/success-pipeline.yml" +} + +resource "local_file" "failing_pipeline_yaml" { + content = <<-EOT + trigger: + - master + + pool: + name: ${azuredevops_agent_pool.test_pool.name} # Use self-hosted agent pool + + steps: + - script: | + echo "Running failing pipeline" + echo "This pipeline will fail" + echo "Using resource group: $(RESOURCE_GROUP)" + echo "Agent name: $(Agent.Name)" + echo "Agent machine name: $(Agent.MachineName)" + exit 1 + displayName: 'Run failing script' + EOT + filename = "${path.module}/failing-pipeline.yml" +} + +resource "local_file" "long_running_pipeline_yaml" { + content = <<-EOT + trigger: + - master + + pool: + name: ${azuredevops_agent_pool.test_pool.name} # Use self-hosted agent pool + + steps: + - script: | + echo "Starting long-running pipeline" + echo "This pipeline will sleep for 5 minutes" # Reduced time for testing + echo "Using resource group: $(RESOURCE_GROUP)" + echo "Agent name: $(Agent.Name)" + echo "Agent machine name: $(Agent.MachineName)" + sleep 300 + echo "Long-running pipeline completed" + displayName: 'Run long script' + EOT + filename = "${path.module}/long-running-pipeline.yml" +} + +# Upload YAML files to the repository +resource "azuredevops_git_repository_file" "success_pipeline_file" { + repository_id = azuredevops_git_repository.test_repo.id + file = "success-pipeline.yml" + content = local_file.success_pipeline_yaml.content + branch = "refs/heads/master" # Use full ref format + commit_message = "Add success pipeline YAML" + overwrite_on_create = true + + depends_on = [azuredevops_git_repository.test_repo] +} + +resource "azuredevops_git_repository_file" "failing_pipeline_file" { + repository_id = azuredevops_git_repository.test_repo.id + file = "failing-pipeline.yml" + content = local_file.failing_pipeline_yaml.content + branch = "refs/heads/master" # Use full ref format + commit_message = "Add failing pipeline YAML" + overwrite_on_create = true + + depends_on = [azuredevops_git_repository.test_repo] +} + +resource "azuredevops_git_repository_file" "long_running_pipeline_file" { + repository_id = azuredevops_git_repository.test_repo.id + file = "long-running-pipeline.yml" + content = local_file.long_running_pipeline_yaml.content + branch = "refs/heads/master" # Use full ref format + commit_message = "Add long-running pipeline YAML" + overwrite_on_create = true + + depends_on = [azuredevops_git_repository.test_repo] +} + +# Create the pipelines +resource "azuredevops_build_definition" "success_pipeline" { + project_id = azuredevops_project.test_project.id + name = "Success-Pipeline" + path = "\\Test" + + ci_trigger { + use_yaml = true + } + + repository { + repo_type = "TfsGit" + repo_id = azuredevops_git_repository.test_repo.id + branch_name = "refs/heads/master" + yml_path = "success-pipeline.yml" + } + + variable_groups = [ + azuredevops_variable_group.test_vars.id + ] + + depends_on = [ + azuredevops_git_repository_file.success_pipeline_file, + azuredevops_pipeline_authorization.test_auth + ] +} + +resource "azuredevops_build_definition" "failing_pipeline" { + project_id = azuredevops_project.test_project.id + name = "Failing-Pipeline" + path = "\\Test" + + ci_trigger { + use_yaml = true + } + + repository { + repo_type = "TfsGit" + repo_id = azuredevops_git_repository.test_repo.id + branch_name = "refs/heads/master" + yml_path = "failing-pipeline.yml" + } + + variable_groups = [ + azuredevops_variable_group.test_vars.id + ] + + depends_on = [ + azuredevops_git_repository_file.failing_pipeline_file, + azuredevops_pipeline_authorization.test_auth + ] +} + +resource "azuredevops_build_definition" "long_running_pipeline" { + project_id = azuredevops_project.test_project.id + name = "Long-Running-Pipeline" + path = "\\Test" + + ci_trigger { + use_yaml = true + } + + repository { + repo_type = "TfsGit" + repo_id = azuredevops_git_repository.test_repo.id + branch_name = "refs/heads/master" + yml_path = "long-running-pipeline.yml" + } + + variable_groups = [ + azuredevops_variable_group.test_vars.id + ] + + depends_on = [ + azuredevops_git_repository_file.long_running_pipeline_file, + azuredevops_pipeline_authorization.test_auth + ] +} + +# Outputs +output "project_name" { + value = azuredevops_project.test_project.name +} + +output "project_url" { + value = "https://dev.azure.com/${var.azure_devops_org}/${azuredevops_project.test_project.name}" +} + +output "agent_pool_name" { + value = azuredevops_agent_pool.test_pool.name +} diff --git a/codebundles/azure-devops-triage/.test/terraform/providers.tf b/codebundles/azure-devops-triage/.test/terraform/providers.tf new file mode 100644 index 000000000..6c32a8037 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/providers.tf @@ -0,0 +1,37 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.0" + } + azuredevops = { + source = "microsoft/azuredevops" + version = "~> 1.8.1" + } + time = { + source = "hashicorp/time" + version = "~> 0.9.1" + } + } + required_version = ">= 1.0.0" +} + +provider "azurerm" { + features {} +} + +provider "azuredevops" { + org_service_url = var.azure_devops_org_url != null ? var.azure_devops_org_url : "https://dev.azure.com/${var.azure_devops_org}" + client_id = var.client_id + tenant_id = var.tenant_id + client_secret = var.client_secret +} + +# provider "azapi" { +# } + +# provider "local" { +# } + +# provider "null" { +# } diff --git a/codebundles/azure-devops-triage/.test/terraform/terraform.tfvars b/codebundles/azure-devops-triage/.test/terraform/terraform.tfvars new file mode 100755 index 000000000..871c4cdb5 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/terraform.tfvars @@ -0,0 +1,9 @@ +resource_group = "azure-devops-triage" +location = "East US" +azure_devops_org = "nishant0471" +azure_devops_org_url = "https://dev.azure.com/nishant0471" +tags = { + "env" : "test", + "lifecycle" : "deleteme", + "product" : "runwhen" +} \ No newline at end of file diff --git a/codebundles/azure-devops-triage/.test/terraform/variables.tf b/codebundles/azure-devops-triage/.test/terraform/variables.tf new file mode 100644 index 000000000..9f3817ac8 --- /dev/null +++ b/codebundles/azure-devops-triage/.test/terraform/variables.tf @@ -0,0 +1,69 @@ +variable "azure_devops_org" { + description = "Azure DevOps organization name" + type = string +} + +variable "azure_devops_org_url" { + description = "Azure DevOps organization URL" + type = string + default = null +} + +variable "service_principal_id" { + description = "Service Principal ID for Azure DevOps service connection" + type = string + sensitive = true +} + +# variable "service_principal_key" { +# description = "Service Principal Key for Azure DevOps service connection" +# type = string +# sensitive = true +# } +variable "client_id" { + description = "Client ID for Azure DevOps service connection" + type = string + sensitive = true +} + +variable "client_secret" { + description = "Client Secret for Azure DevOps service connection" + type = string + sensitive = true +} + +variable "tenant_id" { + description = "Azure AD tenant ID for service principal authentication" + type = string + sensitive = true +} + +# variable "subscription_id" { +# description = "Azure subscription ID" +# type = string +# sensitive = true +# } + + + +variable "tags" { + description = "Tags to apply to resources" + type = map(string) + default = {} +} + +variable "resource_group" { + description = "Name of the Azure resource group" + type = string +} + +variable "location" { + description = "Azure region where resources will be created" + type = string +} + +variable "trigger_pipelines" { + description = "Whether to trigger the pipelines after creation" + type = bool + default = true +} diff --git a/codebundles/azure-devops-triage/README.md b/codebundles/azure-devops-triage/README.md new file mode 100755 index 000000000..501ab3e49 --- /dev/null +++ b/codebundles/azure-devops-triage/README.md @@ -0,0 +1,136 @@ +# Azure DevOps Triage + +This codebundle runs a suite of health checks for Azure DevOps. It identifies: + +- Agent Pool Availability +- Failed Pipeline Runs +- Long-Running Pipelines +- Queued Pipelines +- Repository Policies +- Service Connection Health + +## Configuration + +The runbook requires initialization to import necessary secrets and user variables. The following variables should be set: + +- `AZURE_RESOURCE_GROUP`: The Azure resource group where DevOps resources are deployed +- `AZURE_DEVOPS_ORG`: Your Azure DevOps organization name +- `AZURE_DEVOPS_PROJECT`: Your Azure DevOps project name +- `DURATION_THRESHOLD`: Threshold for long-running pipelines (format: 60m, 2h) (default: 60m) +- `QUEUE_THRESHOLD`: Threshold for queued pipelines (format: 10m, 1h) (default: 30m) + +## Testing + +The `.test` directory contains infrastructure test code using Terraform to set up a test environment. + +### Prerequisites for Testing + +1. An existing Azure subscription +2. An existing Azure DevOps organization +3. Permissions to create resources in Azure and Azure DevOps +4. Azure CLI installed and configured +5. Terraform installed (v1.0.0+) + +### Azure DevOps Organization Setup (Before Running Terraform) + +Before running Terraform, you need to configure your Azure DevOps organization with the necessary permissions: + +#### 1. Organization Settings Configuration + +1. Navigate to your Azure DevOps organization settings (To Add the user who will be running Terraform to the organization) +2. Navigate to Users and Add the service principal as user with Basic Access level. + +#### 2. Agent Pool Permissions + +1. Go to Organization Settings > Agent Pools > Security +2. Add your user (service principal) account with Administrator permissions +3. Click on Save. + +#### 3. Organization-Level Security Permissions + +1. Go to Organization Settings > Security > Permissions +2. Navigate to Users and Find your user (service principal) +3. Click on the user and Ensure they have "Create new projects" permission set to "Allow" + +These permissions are required for Terraform to successfully create and configure resources in your Azure DevOps organization. + +### Test Environment Setup + +The test environment creates: +- A new Azure DevOps project +- A new agent pool +- Git repositories with sample pipeline definitions +- Variable groups for testing + +#### Step 1: Configure Terraform Variables + +Create a `terraform.tfvars` file in the `.test/terraform` directory: + +```hcl +azure_devops_org = "your-org-name" +azure_devops_org_url = "https://dev.azure.com/your-org-name" +resource_group = "your-resource-group" +location = "eastus" +tags = "your-tags" +``` + +#### Step 2: Initialize and Apply Terraform + +```bash +cd .test/terraform +terraform init +terraform apply +``` + +#### Step 3: Set Up Self-Hosted Agent (Manual Step) + +After Terraform creates the agent pool, you need to manually set up at least one self-hosted agent: + +1. In Azure DevOps, navigate to Project Settings > Agent pools > [Your Pool Name] +2. Click "New agent" +3. Follow the instructions to download and configure the agent on your machine +4. Start the agent and verify it's online + +Or follow these steps: + a. Create a folder on your machine (e.g., mkdir ~/azagent && cd ~/azagent) + b. Download the agent: curl -O https://vstsagentpackage.azureedge.net/agent/2.214.1/vsts-agent-linux-x64-2.214.1.tar.gz + c. Extract: tar zxvf vsts-agent-linux-x64-2.214.1.tar.gz + d. Configure: ./config.sh + - Server URL: https://dev.azure.com/${var.azure_devops_org} + - PAT: (your PAT) #generate PAT from the your azure devops org + - Agent pool: ${azuredevops_agent_pool.test_pool.name} + e. Run as a service: ./svc.sh install && ./svc.sh start + +#### Step 4: Trigger Test Pipelines (Manual Step) + +The test environment includes several pipeline definitions: +- Success Pipeline: A pipeline that completes successfully +- Failed Pipeline: A pipeline that intentionally fails +- Long-Running Pipeline: A pipeline that runs for longer than the threshold + +To trigger these pipelines: +1. Navigate to Pipelines in your Azure DevOps project +2. Select each pipeline and click "Run pipeline" + +#### Step 5: Run the Triage Runbook + +Once the test environment is set up and pipelines are running, you can execute the triage runbook to verify it correctly identifies issues. + +### Cleaning Up + +To remove the test environment: + +```bash +cd .test/terraform +terraform destroy +``` + +Note: This will not remove the Azure DevOps organization, as it was a prerequisite. + +## Notes + +- The codebundle uses the Azure CLI with the Azure DevOps extension to interact with Azure DevOps. +- Service principal authentication is used for Azure resources. +- The runbook focuses on identifying issues rather than fixing them. +- For queued pipelines, the threshold is measured from when the pipeline was created to the current time. +- For long-running pipelines, the threshold is measured from start time to finish time (or current time if still running). diff --git a/codebundles/azure-devops-triage/agent-pools.sh b/codebundles/azure-devops-triage/agent-pools.sh new file mode 100644 index 000000000..0aaa92b0a --- /dev/null +++ b/codebundles/azure-devops-triage/agent-pools.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# +# OPTIONAL ENV VARS: +# HIGH_UTILIZATION_THRESHOLD - Percentage threshold for agent utilization (default: 80) +# +# This script: +# 1) Lists all agent pools in the specified Azure DevOps organization +# 2) Checks the status of agents in each pool +# 3) Identifies offline, disabled, or unhealthy agents +# 4) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${HIGH_UTILIZATION_THRESHOLD:=80}" # Default to 80% if not specified + +OUTPUT_FILE="agent_pools_issues.json" +issues_json='[]' +ORG_URL="https://dev.azure.com/$AZURE_DEVOPS_ORG" + +echo "Analyzing Azure DevOps Agent Pools..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "High Utilization Threshold: ${HIGH_UTILIZATION_THRESHOLD}%" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="$ORG_URL" --output none + +# Get list of agent pools +echo "Retrieving agent pools in organization..." +if ! pools=$(az pipelines pool list --org "$ORG_URL" --output json 2>pools_err.log); then + err_msg=$(cat pools_err.log) + rm -f pools_err.log + + echo "ERROR: Could not list agent pools." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Agent Pools" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if you have sufficient permissions to view agent pools." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pools_err.log + +# Save pools to a file to avoid subshell issues +echo "$pools" > pools.json + +# Get the number of pools +pool_count=$(jq '. | length' pools.json) + +# Process each agent pool using a for loop instead of pipe to while +for ((i=0; iagents_err.log); then + err_msg=$(cat agents_err.log) + rm -f agents_err.log + + # Failed to list agents in pool + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Agents in Pool \`$pool_name\`" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view agents in this pool." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f agents_err.log + + # Check if pool has no agents + agent_count=$(echo "$agents" | jq '. | length') + if [[ "$agent_count" -eq 0 ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "No Agents Found in Pool \`$pool_name\`" \ + --arg details "Agent pool $pool_name (ID: $pool_id) has no registered agents." \ + --arg severity "3" \ + --arg nextStep "Add agents to this pool or remove the pool if it's no longer needed." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + + # Check for offline agents + offline_agents=$(echo "$agents" | jq '[.[] | select(.status != "online")]') + offline_count=$(echo "$offline_agents" | jq '. | length') + + if [[ "$offline_count" -gt 0 ]]; then + offline_details=$(echo "$offline_agents" | jq -c '[.[] | {name: .name, status: .status, enabled: .enabled, version: .version}]') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Offline Agents Found in Pool \`$pool_name\`" \ + --arg details "$offline_details" \ + --arg severity "3" \ + --arg nextStep "Check the agent machines and restart the agent service if needed. Verify network connectivity between agents and Azure DevOps." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + + # Check for disabled agents + disabled_agents=$(echo "$agents" | jq '[.[] | select(.enabled == false)]') + disabled_count=$(echo "$disabled_agents" | jq '. | length') + + if [[ "$disabled_count" -gt 0 ]]; then + disabled_details=$(echo "$disabled_agents" | jq -c '[.[] | {name: .name, status: .status, enabled: .enabled, version: .version}]') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Disabled Agents Found in Pool \`$pool_name\`" \ + --arg details "$disabled_details" \ + --arg severity "2" \ + --arg nextStep "Enable these agents if they should be available for builds, or remove them if they're no longer needed." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + + # Check for agents with high job count (potentially overloaded) + busy_agents=$(echo "$agents" | jq '[.[] | select(.assignedRequest != null)]') + busy_count=$(echo "$busy_agents" | jq '. | length') + total_online=$(echo "$agents" | jq '[.[] | select(.status == "online")] | length') + + # If more than HIGH_UTILIZATION_THRESHOLD% of agents are busy, flag as potential capacity issue + if [[ "$total_online" -gt 0 && "$busy_count" -gt 0 ]]; then + busy_percentage=$((busy_count * 100 / total_online)) + if [[ "$busy_percentage" -gt "$HIGH_UTILIZATION_THRESHOLD" ]]; then + busy_details=$(echo "$busy_agents" | jq -c '[.[] | {name: .name, status: .status, enabled: .enabled}]') + + issues_json=$(echo "$issues_json" | jq \ + --arg title "High Agent Utilization in Pool \`$pool_name\`" \ + --arg details "Pool has $busy_count out of $total_online agents currently busy ($busy_percentage% utilization)" \ + --arg severity "2" \ + --arg nextStep "Consider adding more agents to this pool to handle the workload or optimize your pipelines to reduce build times." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi +done + +# Clean up temporary file +rm -f pools.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps agent pool analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/long-running-pipelines.sh b/codebundles/azure-devops-triage/long-running-pipelines.sh new file mode 100644 index 000000000..ec866cbbc --- /dev/null +++ b/codebundles/azure-devops-triage/long-running-pipelines.sh @@ -0,0 +1,287 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# OPTIONAL ENV VARS: +# DURATION_THRESHOLD - Threshold in minutes or hours (e.g., "60m" or "2h") for long-running pipelines (default: "60m") +# +# This script: +# 1) Lists all pipelines in the specified Azure DevOps project +# 2) Checks for runs that exceed the specified duration threshold +# 3) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" +: "${DURATION_THRESHOLD:=1m}" + +OUTPUT_FILE="long_running_pipelines.json" +issues_json='[]' + +# Convert duration threshold to minutes +convert_to_minutes() { + local threshold=$1 + local number=$(echo "$threshold" | sed -E 's/[^0-9]//g') + local unit=$(echo "$threshold" | sed -E 's/[0-9]//g') + + case $unit in + m|min|mins) + echo $number + ;; + h|hr|hrs|hour|hours) + echo $((number * 60)) + ;; + *) + echo "Invalid duration format. Use format like '60m' or '2h'" >&2 + exit 1 + ;; + esac +} + +THRESHOLD_MINUTES=$(convert_to_minutes "$DURATION_THRESHOLD") + +echo "Analyzing Azure DevOps Pipeline Durations..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" +echo "Threshold: $THRESHOLD_MINUTES minutes" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of pipelines +echo "Retrieving pipelines in project..." +if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then + err_msg=$(cat pipelines_err.log) + rm -f pipelines_err.log + + echo "ERROR: Could not list pipelines." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Pipelines" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if the project exists and you have the right permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pipelines_err.log + +# Save pipelines to a file to avoid subshell issues +echo "$pipelines" > pipelines.json + +# Get the number of pipelines +pipeline_count=$(jq '. | length' pipelines.json) + +# Process each pipeline using a for loop instead of pipe to while +for ((i=0; iruns_err.log); then + err_msg=$(cat runs_err.log) + rm -f runs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Runs for Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline runs." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f runs_err.log + + # Save runs to a file to avoid subshell issues + echo "$runs" > runs.json + + # Get the number of runs + run_count=$(jq '. | length' runs.json) + + # Check for currently running pipelines + for ((j=0; j/dev/null || echo 0) + finish_timestamp=$(date -d "$finish_time" +%s 2>/dev/null || echo 0) + + # Calculate duration in seconds + if [ "$start_timestamp" -gt 0 ] && [ "$finish_timestamp" -gt 0 ]; then + duration_seconds=$((finish_timestamp - start_timestamp)) + else + duration_seconds=0 + echo " Warning: Could not parse timestamps for run $run_id" + fi + else + duration_seconds=0 + echo " Warning: Missing start or finish time for run $run_id" + fi + + duration_minutes=$((duration_seconds / 60)) + + # Format duration for display + if [ $duration_minutes -ge 1440 ]; then + days=$((duration_minutes / 1440)) + hours=$(((duration_minutes % 1440) / 60)) + mins=$((duration_minutes % 60)) + formatted_duration="${days}d ${hours}h ${mins}m" + elif [ $duration_minutes -ge 60 ]; then + hours=$((duration_minutes / 60)) + mins=$((duration_minutes % 60)) + formatted_duration="${hours}h ${mins}m" + else + formatted_duration="${duration_minutes}m" + fi + + # Check if duration exceeds threshold + if [ $duration_minutes -ge $THRESHOLD_MINUTES ]; then + echo " Found long-running completed pipeline: $run_name (ID: $run_id, Branch: $branch, Duration: $formatted_duration)" + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Long Running Completed Pipeline: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "Pipeline run completed in $formatted_duration (exceeds threshold of $THRESHOLD_MINUTES minutes)" \ + --arg severity "2" \ + --arg nextStep "Review pipeline \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\` for optimization opportunities. Consider parallelizing tasks or upgrading agent resources." \ + --arg resource_url "$web_url" \ + --arg duration "$formatted_duration" \ + --arg duration_minutes "$duration_minutes" \ + --arg pipeline_id "$pipeline_id" \ + --arg run_id "$run_id" \ + --arg branch "$branch" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url, + "duration": $duration, + "duration_minutes": ($duration_minutes | tonumber), + "pipeline_id": $pipeline_id, + "run_id": $run_id, + "branch": $branch + }]') + fi + done + + # Clean up runs file + rm -f runs.json +done + +# Clean up pipelines file +rm -f pipelines.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps long-running pipeline analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/pipeline-logs.sh b/codebundles/azure-devops-triage/pipeline-logs.sh new file mode 100644 index 000000000..d33cc14a0 --- /dev/null +++ b/codebundles/azure-devops-triage/pipeline-logs.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +#set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# +# This script: +# 1) Lists all pipelines in the specified Azure DevOps project +# 2) Retrieves logs for each failed run +# 3) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" + +OUTPUT_FILE="pipeline_logs_issues.json" +TEMP_LOG_FILE="pipeline_log_temp.json" +issues_json='[]' + +echo "Analyzing Azure DevOps Pipeline Logs..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of pipelines +echo "Retrieving pipelines in project..." +if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then + err_msg=$(cat pipelines_err.log) + rm -f pipelines_err.log + + echo "ERROR: Could not list pipelines." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Pipelines" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if the project exists and you have the right permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pipelines_err.log + +# Save pipelines to a file to avoid subshell issues +echo "$pipelines" > pipelines.json + +# Get the number of pipelines +pipeline_count=$(jq '. | length' pipelines.json) + +# Process each pipeline using a for loop instead of pipe to while +for ((i=0; iruns_err.log); then + err_msg=$(cat runs_err.log) + rm -f runs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Runs for Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline runs." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f runs_err.log + + # Save runs to a file to avoid subshell issues + echo "$runs" > runs.json + + # Get the number of runs + run_count=$(jq '. | length' runs.json) + + # Check for failed runs + for ((j=0; jlogs_err.log); then + err_msg=$(cat logs_err.log) + rm -f logs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to Get Logs for Run $run_name in Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline logs." \ + --arg resource_url "$web_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') + continue + fi + rm -f logs_err.log + + # Save all logs to a file for processing + echo "$all_logs" > all_logs.json + + # Get log with highest line count + if ! log_info=$(jq -c '.logs[] | {id: .id, lineCount: .lineCount}' all_logs.json | sort -r -k2,2 | head -1); then + echo "Failed to find logs with line count information" + continue + fi + + # Extract log ID with highest line count + log_id=$(echo "$log_info" | jq -r '.id') + echo " Selected log ID with highest line count: $log_id" + + # Get detailed log content for the selected log + if ! log_content=$(az devops invoke --org "https://dev.azure.com/$AZURE_DEVOPS_ORG" --area build --resource logs --route-parameters project="$AZURE_DEVOPS_PROJECT" buildId="$run_id" logId="$log_id" --api-version=7.0 --output json --only-show-errors 2>log_content_err.log); then + echo " Failed to get log content for log ID $log_id, skipping..." + continue + fi + + # Save log content to temp file for processing + echo "$log_content" > "$TEMP_LOG_FILE" + + # Extract all log lines and join them with newlines + log_details=$(jq -r '.value | join("\n")' "$TEMP_LOG_FILE") + + # Construct the correct log URL format + error_log_url="https://dev.azure.com/$AZURE_DEVOPS_ORG/$project_id/_apis/build/builds/$run_id/logs/$log_id" + + # Clean up temp files + rm -f "$TEMP_LOG_FILE" all_logs.json + + # Add an issue with the full log content + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed Pipeline Run: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "$log_details" \ + --arg severity "3" \ + --arg nextStep "Review pipeline configuration for \`$pipeline_name\` in project \`$AZURE_DEVOPS_PROJECT\`. Check branch \`$branch\` for recent changes that might have caused the failure." \ + --arg resource_url "$error_log_url" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url + }]') + done + + # Clean up runs file + rm -f runs.json +done + +# Clean up pipelines file +rm -f pipelines.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps pipeline log analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/policy-standards.json b/codebundles/azure-devops-triage/policy-standards.json new file mode 100644 index 000000000..c85858a17 --- /dev/null +++ b/codebundles/azure-devops-triage/policy-standards.json @@ -0,0 +1,83 @@ +{ + "requiredPolicies": { + "minimumReviewers": { + "typeId": "fa4e907d-c16b-4a4c-9dfa-4906e5d171dd", + "displayName": "Minimum number of reviewers", + "settings": { + "minimumApproverCount": 2, + "creatorVoteCounts": false, + "allowDownvotes": false, + "resetOnSourcePush": true + } + }, + "workItemLinking": { + "typeId": "40e92b44-2fe1-4dd6-b3d8-74a9c21d0c6e", + "displayName": "Work item linking", + "settings": { + "enabled": true, + "workItemType": "Any" + } + }, + "commentRequirements": { + "typeId": "c6a1889d-b943-4856-b76f-9e46bb6b0df2", + "displayName": "Comment requirements", + "settings": { + "blockCommentsDuringPush": true, + "rejectionErrorMessage": "Comments must be provided with your changes" + } + }, + "buildValidation": { + "typeId": "0609b952-1397-4640-95ec-e00a01b2c241", + "displayName": "Build validation", + "settings": { + "buildDefinitionId": 0, + "queueOnSourceUpdateOnly": true, + "manualQueueOnly": false, + "displayName": "Build validation", + "validDuration": 720, + "scope": [ + { + "repositoryId": null, + "refName": null, + "matchKind": "Exact" + } + ] + } + }, + "requiredReviewers": { + "typeId": "fd2167ab-b0be-447a-8ec8-39368250530e", + "displayName": "Required reviewers", + "settings": { + "requiredReviewerIds": [], + "minimumApproverCount": 1, + "creatorVoteCounts": false, + "message": "Code owners review required" + } + } + }, + "branchPolicies": { + "defaultBranch": { + "isLocked": true, + "requirePullRequest": true, + "resetOnSourcePush": true + }, + "featureBranches": { + "isLocked": false, + "requirePullRequest": false + } + }, + "namingConventions": { + "repositories": "^[a-z0-9][-a-z0-9]*$", + "branches": { + "feature": "^feature/[A-Z]+-[0-9]+-.+$", + "bugfix": "^bugfix/[A-Z]+-[0-9]+-.+$", + "release": "^release/[0-9]+\\.[0-9]+\\.[0-9]+$", + "hotfix": "^hotfix/[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$" + } + }, + "securitySettings": { + "enableCrossRepoMerge": false, + "enableForkSync": false, + "enableWebHooks": true + } +} \ No newline at end of file diff --git a/codebundles/azure-devops-triage/queued-pipelines.sh b/codebundles/azure-devops-triage/queued-pipelines.sh new file mode 100644 index 000000000..26a93abda --- /dev/null +++ b/codebundles/azure-devops-triage/queued-pipelines.sh @@ -0,0 +1,226 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# OPTIONAL ENV VARS: +# +# This script: +# 1) Lists all pipelines in the specified Azure DevOps project +# 2) Checks for runs that are queued longer than the specified threshold +# 3) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" +: "${QUEUE_THRESHOLD:=1m}" + +OUTPUT_FILE="queued_pipelines.json" +issues_json='[]' + +# Convert duration threshold to minutes +convert_to_minutes() { + local threshold=$1 + local number=$(echo "$threshold" | sed -E 's/[^0-9]//g') + local unit=$(echo "$threshold" | sed -E 's/[0-9]//g') + + case $unit in + m|min|mins) + echo $number + ;; + h|hr|hrs|hour|hours) + echo $((number * 60)) + ;; + *) + echo "Invalid duration format. Use format like '10m' or '1h'" >&2 + exit 1 + ;; + esac +} + +THRESHOLD_MINUTES=$(convert_to_minutes "$QUEUE_THRESHOLD") + +echo "Analyzing Azure DevOps Queued Pipelines..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" +echo "Threshold: $THRESHOLD_MINUTES minutes" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of pipelines +echo "Retrieving pipelines in project..." +if ! pipelines=$(az pipelines list --output json 2>pipelines_err.log); then + err_msg=$(cat pipelines_err.log) + rm -f pipelines_err.log + + echo "ERROR: Could not list pipelines." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Pipelines" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if the project exists and you have the right permissions." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f pipelines_err.log + +# Save pipelines to a file to avoid subshell issues +echo "$pipelines" > pipelines.json + +# Get the number of pipelines +pipeline_count=$(jq '. | length' pipelines.json) + +# Process each pipeline using a for loop instead of pipe to while +for ((i=0; iruns_err.log); then + err_msg=$(cat runs_err.log) + rm -f runs_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Runs for Pipeline $pipeline_name" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view pipeline runs." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f runs_err.log + + # Save runs to a file to avoid subshell issues + echo "$runs" > runs.json + + # Get the number of runs + run_count=$(jq '. | length' runs.json) + + # Check for queued runs + for ((j=0; j/dev/null); then + queue_reason="Could not retrieve detailed information" + else + # Save run details to a file + echo "$run_details" > run_details.json + + # Extract queue position if available + queue_position=$(jq -r '.queuePosition // "Unknown"' run_details.json) + if [ "$queue_position" != "null" ] && [ "$queue_position" != "Unknown" ]; then + queue_reason="Queue position: $queue_position" + fi + + # Try to extract any waiting reason + waiting_reason=$(jq -r '.reason // "Unknown"' run_details.json) + if [ "$waiting_reason" != "null" ] && [ "$waiting_reason" != "Unknown" ]; then + queue_reason="$queue_reason, Reason: $waiting_reason" + fi + + # Clean up run details file + rm -f run_details.json + fi + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Pipeline Queued Too Long: \`$pipeline_name\` (Branch: \`$branch\`)" \ + --arg details "Pipeline has been queued for $formatted_queue_time (exceeds threshold of $THRESHOLD_MINUTES minutes). $queue_reason" \ + --arg severity "3" \ + --arg nextStep "Check agent pool capacity and availability. Consider adding more agents or optimizing pipeline concurrency limits." \ + --arg resource_url "$web_url" \ + --arg queue_time "$formatted_queue_time" \ + --arg queue_minutes "$queue_minutes" \ + --arg pipeline_id "$pipeline_id" \ + --arg run_id "$run_id" \ + --arg branch "$branch" \ + --arg queue_reason "$queue_reason" \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber), + "resource_url": $resource_url, + "queue_time": $queue_time, + "queue_minutes": ($queue_minutes | tonumber), + "pipeline_id": $pipeline_id, + "run_id": $run_id, + "branch": $branch, + "queue_reason": $queue_reason + }]') + fi + done + + # Clean up runs file + rm -f runs.json +done + +# Clean up pipelines file +rm -f pipelines.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps queued pipeline analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/repo-policies.sh b/codebundles/azure-devops-triage/repo-policies.sh new file mode 100644 index 000000000..c29087b8f --- /dev/null +++ b/codebundles/azure-devops-triage/repo-policies.sh @@ -0,0 +1,308 @@ +#!/usr/bin/env bash +# set -x + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG - Azure DevOps organization name +# AZURE_DEVOPS_PROJECT - Azure DevOps project name (optional, checks all projects if not specified) +# +# This script: +# 1) Lists all repositories in the specified Azure DevOps organization/project +# 2) Checks branch policies against the standards defined in policy-standards.json +# 3) Identifies missing or misconfigured policies +# 4) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" + +OUTPUT_FILE="repo_policies_issues.json" +issues_json='[]' +ORG_URL="https://dev.azure.com/$AZURE_DEVOPS_ORG" + +echo "Analyzing Azure DevOps Repository Policies..." +echo "Organization: $AZURE_DEVOPS_ORG" +if [[ -n "$AZURE_DEVOPS_PROJECT" ]]; then + echo "Project: $AZURE_DEVOPS_PROJECT" +fi + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="$ORG_URL" --output none + +# Load policy standards +if [[ -f "policy-standards.json" ]]; then + policy_standards=$(cat policy-standards.json) + echo "Loaded policy standards from policy-standards.json" +else + echo "WARNING: policy-standards.json not found. Using default standards." + # Default minimal standards if file not found + policy_standards='{ + "requiredPolicies": { + "minimumReviewers": { + "typeId": "fa4e907d-c16b-4a4c-9dfa-4906e5d171dd", + "displayName": "Minimum number of reviewers", + "settings": { + "minimumApproverCount": 2, + "creatorVoteCounts": false, + "allowDownvotes": false, + "resetOnSourcePush": true + } + }, + "workItemLinking": { + "typeId": "40e92b44-2fe1-4dd6-b3d8-74a9c21d0c6e", + "displayName": "Work item linking", + "settings": { + "enabled": true, + "workItemType": "Any" + } + } + }, + "branchPolicies": { + "defaultBranch": { + "isLocked": true, + "requirePullRequest": true, + "resetOnSourcePush": true + } + } + }' +fi + +# Get list of projects +if [[ -n "$AZURE_DEVOPS_PROJECT" ]]; then + projects_json="[{\"name\": \"$AZURE_DEVOPS_PROJECT\"}]" +else + echo "Retrieving all projects in organization..." + if ! projects_json=$(az devops project list --org "$ORG_URL" --output json 2>projects_err.log); then + err_msg=$(cat projects_err.log) + rm -f projects_err.log + + echo "ERROR: Could not list projects." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Projects" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if you have sufficient permissions to view projects." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 + fi + projects_json=$(echo "$projects_json" | jq '.value') + rm -f projects_err.log +fi + +# Save projects to a file to avoid subshell issues +echo "$projects_json" > projects.json + +# Get the number of projects +project_count=$(jq '. | length' projects.json) +echo "Found $project_count project(s) to analyze" + +# Process each project +for ((p=0; prepos_err.log); then + err_msg=$(cat repos_err.log) + rm -f repos_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Repositories in Project \`$project_name\`" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view repositories in this project." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f repos_err.log + + # Save repos to a file to avoid subshell issues + echo "$repos_json" > repos.json + + # Get the number of repos + repo_count=$(jq '. | length' repos.json) + echo "Found $repo_count repositories in project $project_name" + + # Process each repository + for ((r=0; rpolicies_err.log); then + err_msg=$(cat policies_err.log) + rm -f policies_err.log + + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Policies for Repository \`$repo_name\`" \ + --arg details "$err_msg" \ + --arg severity "3" \ + --arg nextStep "Check if you have sufficient permissions to view policies in this repository." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + continue + fi + rm -f policies_err.log + + # Check if default branch is locked + if [[ $(echo "$policy_standards" | jq -r '.branchPolicies.defaultBranch.isLocked') == "true" ]]; then + # Check if branch has lock policy + if ! echo "$policies_json" | jq -e '[.[] | select(.type.id == "fa4e907d-c16b-4a4c-9dfa-4916e5d171ab")] | length > 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Default Branch Not Locked" \ + --arg details "Default branch \`$repo_default_branch\` in repository \`$repo_name\` (project \`$project_name\`) is not locked as required by policy." \ + --arg severity "3" \ + --arg nextStep "Enable branch lock policy for the default branch." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + + # Check if default branch requires pull request + if [[ $(echo "$policy_standards" | jq -r '.branchPolicies.defaultBranch.requirePullRequest') == "true" ]]; then + # Check if branch has PR policy + if ! echo "$policies_json" | jq -e '[.[] | select(.type.id == "fa4e907d-c16b-4a4c-9dfa-4906e5d171dd")] | length > 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Default Branch Does Not Require Pull Requests" \ + --arg details "Default branch \`$repo_default_branch\` in repository \`$repo_name\` (project \`$project_name\`) does not require pull requests as required by policy." \ + --arg severity "3" \ + --arg nextStep "Enable pull request policy for the default branch." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + + # Check for required policies + required_policies=$(echo "$policy_standards" | jq -r '.requiredPolicies | keys[]') + for policy_key in $required_policies; do + policy_type_id=$(echo "$policy_standards" | jq -r ".requiredPolicies.$policy_key.typeId") + policy_display_name=$(echo "$policy_standards" | jq -r ".requiredPolicies.$policy_key.displayName") + + # Check if policy exists + if ! echo "$policies_json" | jq -e --arg type_id "$policy_type_id" '[.[] | select(.type.id == $type_id)] | length > 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Missing Required Policy: $policy_display_name" \ + --arg details "Repository \`$repo_name\` (project \`$project_name\`) is missing the required policy: $policy_display_name" \ + --arg severity "3" \ + --arg nextStep "Add the required policy to the repository's default branch." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + else + # Policy exists, check settings + policy_settings=$(echo "$policy_standards" | jq -r ".requiredPolicies.$policy_key.settings") + actual_policy=$(echo "$policies_json" | jq --arg type_id "$policy_type_id" '[.[] | select(.type.id == $type_id)][0]') + + # For minimum reviewers policy, check the count + if [[ "$policy_key" == "minimumReviewers" ]]; then + required_count=$(echo "$policy_settings" | jq -r '.minimumApproverCount') + actual_count=$(echo "$actual_policy" | jq -r '.settings.minimumApproverCount') + + if [[ "$actual_count" -lt "$required_count" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Insufficient Minimum Reviewers" \ + --arg details "Repository \`$repo_name\` (project \`$project_name\`) requires only $actual_count reviewers, but policy requires $required_count." \ + --arg severity "2" \ + --arg nextStep "Increase the minimum number of required reviewers to $required_count." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + + # For build validation, check if it's configured + if [[ "$policy_key" == "buildValidation" ]]; then + if echo "$actual_policy" | jq -e '.settings.buildDefinitionId == 0' > /dev/null; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Build Validation Not Configured" \ + --arg details "Repository \`$repo_name\` (project \`$project_name\`) has build validation policy but no build definition is selected." \ + --arg severity "2" \ + --arg nextStep "Configure a build definition for the build validation policy." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + fi + fi + fi + done + done + + # Clean up repos file + rm -f repos.json +done + +# Clean up projects file +rm -f projects.json + +# Write final JSON +echo "$issues_json" > "$OUTPUT_FILE" +echo "Azure DevOps repository policy analysis completed. Saved results to $OUTPUT_FILE" \ No newline at end of file diff --git a/codebundles/azure-devops-triage/runbook.robot b/codebundles/azure-devops-triage/runbook.robot new file mode 100755 index 000000000..ac3960c13 --- /dev/null +++ b/codebundles/azure-devops-triage/runbook.robot @@ -0,0 +1,258 @@ +*** Settings *** +Documentation Check Azure DevOps health by examining pipeline status, agent pools, and build logs +Metadata Author Nbarola +Metadata Display Name Azure DevOps Triage +Metadata Supports Azure DevOps Pipelines Health +Force Tags Azure DevOps Pipelines Health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + + +*** Tasks *** +Check Agent pool availability in organisation `${AZURE_DEVOPS_ORG}` in resource group `${AZURE_RESOURCE_GROUP}` + [Documentation] Check the health status of Agent Pools in the specified organization + [Tags] DevOps Azure Health access:read-only + ${agent_pool}= RW.CLI.Run Bash File + ... bash_file=agent-pools.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + ${issues}= RW.CLI.Run Cli + ... cmd=cat agent_pools_issues.json + + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + + IF len(@{issue_list}) > 0 + FOR ${agent} IN @{issue_list} + RW.Core.Add Issue + ... severity=${agent['severity']} + ... expected=Agent Pool should be available in organization `${AZURE_DEVOPS_ORG}` + ... actual=Agent Pool is unhealthy in organization `${AZURE_DEVOPS_ORG}` + ... title=Azure DevOps reports an Issue for Agent Pool in organization `${AZURE_DEVOPS_ORG}` + ... reproduce_hint=${agent_pool.cmd} + ... details=${agent} + ... next_steps=Please escalate to the Azure DevOps service owner or check back later. + END + END + +Check for Failed Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Identify failed pipeline runs in the specified project + [Tags] DevOps Azure Pipelines Failures access:read-only + ${failed_pipelines}= RW.CLI.Run Bash File + ... bash_file=pipeline-logs.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${failed_pipelines.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat pipeline_logs_issues.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Pipeline should complete successfully + ... actual=Pipeline failed with errors + ... title=${issue['title']} + ... reproduce_hint=${failed_pipelines.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + ... resource_url=${issue['resource_url']} + END + END + +Check for Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` (Threshold: ${DURATION_THRESHOLD}) + [Documentation] Identify pipelines that are running longer than expected + [Tags] DevOps Azure Pipelines Performance access:read-only + ${long_running}= RW.CLI.Run Bash File + ... bash_file=long-running-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${long_running.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat long_running_pipelines.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Pipeline should complete within the expected time frame (${DURATION_THRESHOLD}) + ... actual=Pipeline is running longer than expected (${issue['duration']}) + ... title=${issue['title']} + ... reproduce_hint=${long_running.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + ... resource_url=${issue['resource_url']} + END + END + +Check for Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` (Threshold: ${QUEUE_THRESHOLD}) + [Documentation] Identify pipelines that are queued for longer than expected + [Tags] DevOps Azure Pipelines Queue access:read-only + ${queued_pipelines}= RW.CLI.Run Bash File + ... bash_file=queued-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${queued_pipelines.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat queued_pipelines.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Pipeline should start execution promptly (within ${QUEUE_THRESHOLD}) + ... actual=Pipeline has been queued for ${issue['queue_time']} + ... title=${issue['title']} + ... reproduce_hint=${queued_pipelines.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + ... resource_url=${issue['resource_url']} + END + END + +Check for Repository Policy Issues in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Verify repository branch policies against best practices + [Tags] DevOps Azure Repository Policies access:read-only + ${repo_policies}= RW.CLI.Run Bash File + ... bash_file=repo-policies.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${repo_policies.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat repo_policies_issues.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Repository should have proper branch policies configured + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${repo_policies.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + END + END + +Check for Service Connection Issues in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Verify the health of service connections used by pipelines + [Tags] DevOps Azure ServiceConnections access:read-only + ${service_connections}= RW.CLI.Run Bash File + ... bash_file=service-connections.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + + RW.Core.Add Pre To Report ${service_connections.stdout} + + ${issues}= RW.CLI.Run Cli + ... cmd=cat service_connections_issues.json + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${issue_list}= Create List + END + + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Service connections should be healthy and accessible + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${service_connections.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_step']} + END + END + +*** Keywords *** +Suite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=The secret containing AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID + ... pattern=\w* + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Azure resource group. + ... pattern=\w* + ${AZURE_DEVOPS_ORG}= RW.Core.Import User Variable AZURE_DEVOPS_ORG + ... type=string + ... description=Azure DevOps organization. + ... pattern=\w* + ${AZURE_DEVOPS_PROJECT}= RW.Core.Import User Variable AZURE_DEVOPS_PROJECT + ... type=string + ... description=Azure DevOps project. + ... pattern=\w* + ${DURATION_THRESHOLD}= RW.Core.Import User Variable DURATION_THRESHOLD + ... type=string + ... description=Threshold for long-running pipelines (format: 60m, 2h) + ... default=60m + ${QUEUE_THRESHOLD}= RW.Core.Import User Variable QUEUE_THRESHOLD + ... type=string + ... description=Threshold for queued pipelines (format: 10m, 1h) + ... default=30m + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} + Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} + Set Suite Variable ${DURATION_THRESHOLD} ${DURATION_THRESHOLD} + Set Suite Variable ${QUEUE_THRESHOLD} ${QUEUE_THRESHOLD} + Set Suite Variable + ... ${env} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}"} diff --git a/codebundles/azure-devops-triage/service-connections.sh b/codebundles/azure-devops-triage/service-connections.sh new file mode 100644 index 000000000..fc4e88c4b --- /dev/null +++ b/codebundles/azure-devops-triage/service-connections.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# REQUIRED ENV VARS: +# AZURE_DEVOPS_ORG +# AZURE_DEVOPS_PROJECT +# +# This script: +# 1) Lists all service connections in the specified Azure DevOps project +# 2) Checks if each service connection is ready +# 3) Outputs results in JSON format +# ----------------------------------------------------------------------------- + +: "${AZURE_DEVOPS_ORG:?Must set AZURE_DEVOPS_ORG}" +: "${AZURE_DEVOPS_PROJECT:?Must set AZURE_DEVOPS_PROJECT}" + +OUTPUT_FILE="service_connections_issues.json" +issues_json='[]' + +echo "Analyzing Azure DevOps Service Connections..." +echo "Organization: $AZURE_DEVOPS_ORG" +echo "Project: $AZURE_DEVOPS_PROJECT" + +# Ensure Azure CLI is logged in and DevOps extension is installed +if ! az extension show --name azure-devops &>/dev/null; then + echo "Installing Azure DevOps CLI extension..." + az extension add --name azure-devops --output none +fi + +# Configure Azure DevOps CLI defaults +az devops configure --defaults organization="https://dev.azure.com/$AZURE_DEVOPS_ORG" project="$AZURE_DEVOPS_PROJECT" --output none + +# Get list of service connections +echo "Retrieving service connections in project..." +if ! connections=$(az devops service-endpoint list --output json 2>connections_err.log); then + err_msg=$(cat connections_err.log) + rm -f connections_err.log + + echo "ERROR: Could not list service connections." + issues_json=$(echo "$issues_json" | jq \ + --arg title "Failed to List Service Connections" \ + --arg details "$err_msg" \ + --arg severity "4" \ + --arg nextStep "Check if you have sufficient permissions to view service connections." \ + '. += [{ + "title": $title, + "details": $details, + "next_step": $nextStep, + "severity": ($severity | tonumber) + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 1 +fi +rm -f connections_err.log + +# Save connections to a file to avoid subshell issues +echo "$connections" > connections.json + +# Get the number of connections +connection_count=$(jq '. | length' connections.json) + +# Process each service connection using a for loop instead of pipe to while +for ((i=0; i "$OUTPUT_FILE" +echo "Azure DevOps service connections analysis completed. Saved results to $OUTPUT_FILE" diff --git a/codebundles/azure-devops-triage/sli.robot b/codebundles/azure-devops-triage/sli.robot new file mode 100755 index 000000000..2957caf5b --- /dev/null +++ b/codebundles/azure-devops-triage/sli.robot @@ -0,0 +1,156 @@ +*** Settings *** +Documentation Counts Azure DevOps health issues by examining pipeline status, agent pools, and build logs +Metadata Author Nbarola +Metadata Display Name Azure DevOps Health +Metadata Supports Azure DevOps Pipelines Health +Force Tags Azure DevOps Pipelines Health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + +*** Tasks *** +Count Agent Pool Health Issues in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts the health issues of Agent Pools in the specified organization + [Tags] DevOps Azure Health access:read-only + ${agent_pool}= RW.CLI.Run Bash File + ... bash_file=agent-pools.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('agent_pools_issues.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${agent_pool_health_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${agent_pool_health_score} + +Count Failed Pipeline Runs in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts the number of failed pipeline runs in the specified project + [Tags] DevOps Azure Pipelines Failures access:read-only + ${failed_pipelines}= RW.CLI.Run Bash File + ... bash_file=pipeline-logs.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('pipeline_logs_issues.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${failed_pipelines_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${failed_pipelines_score} + +Count Long Running Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts pipelines that are running longer than expected + [Tags] DevOps Azure Pipelines Performance access:read-only + ${long_running}= RW.CLI.Run Bash File + ... bash_file=long-running-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('long_running_pipelines.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${long_running_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${long_running_score} + +Count Queued Pipelines in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts pipelines that are queued for longer than expected + [Tags] DevOps Azure Pipelines Queue access:read-only + ${queued_pipelines}= RW.CLI.Run Bash File + ... bash_file=queued-pipelines.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('queued_pipelines.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${queued_pipelines_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${queued_pipelines_score} + +Count Repository Policy Issues in project `${AZURE_DEVOPS_PROJECT}` in organisation `${AZURE_DEVOPS_ORG}` + [Documentation] Counts repository branch policy issues against best practices + [Tags] DevOps Azure Pipelines Policies access:read-only + ${repo_policy}= RW.CLI.Run Bash File + ... bash_file=repo-policies.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(open('repo_policy_issues.json').read()) json + EXCEPT + Log Failed to load JSON file, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${issue_count}= Get Length ${issue_list} + ${repo_policy_score}= Evaluate 1 if ${issue_count} == 0 else 0 + Set Global Variable ${repo_policy_score} + +Generate Comprehensive Azure DevOps Health Score + ${devops_health_score}= Evaluate (${agent_pool_health_score} + ${failed_pipelines_score} + ${long_running_score} + ${queued_pipelines_score} + ${repo_policy_score}) / 5 + ${health_score}= Convert to Number ${devops_health_score} 2 + RW.Core.Push Metric ${health_score} + +*** Keywords *** +uite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=The secret containing AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID + ... pattern=\w* + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Azure resource group. + ... pattern=\w* + ${AZURE_DEVOPS_ORG}= RW.Core.Import User Variable AZURE_DEVOPS_ORG + ... type=string + ... description=Azure DevOps organization. + ... pattern=\w* + ${AZURE_DEVOPS_PROJECT}= RW.Core.Import User Variable AZURE_DEVOPS_PROJECT + ... type=string + ... description=Azure DevOps project. + ... pattern=\w* + ${DURATION_THRESHOLD}= RW.Core.Import User Variable DURATION_THRESHOLD + ... type=string + ... description=Threshold for long-running pipelines (format: 60m, 2h) + ... default=60m + ${QUEUE_THRESHOLD}= RW.Core.Import User Variable QUEUE_THRESHOLD + ... type=string + ... description=Threshold for queued pipelines (format: 10m, 1h) + ... default=30m + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${AZURE_DEVOPS_ORG} ${AZURE_DEVOPS_ORG} + Set Suite Variable ${AZURE_DEVOPS_PROJECT} ${AZURE_DEVOPS_PROJECT} + Set Suite Variable ${DURATION_THRESHOLD} ${DURATION_THRESHOLD} + Set Suite Variable ${QUEUE_THRESHOLD} ${QUEUE_THRESHOLD} + Set Suite Variable + ... ${env} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_DEVOPS_ORG":"${AZURE_DEVOPS_ORG}", "AZURE_DEVOPS_PROJECT":"${AZURE_DEVOPS_PROJECT}", "DURATION_THRESHOLD":"${DURATION_THRESHOLD}", "QUEUE_THRESHOLD":"${QUEUE_THRESHOLD}"}