Skip to content

E2E

E2E #23

Workflow file for this run

# AgentOps end-to-end demo workflow.
#
# Manual-only: trigger via "Run workflow" on the Actions page.
# Uploads the resulting evidence/ folder as a downloadable artifact and
# writes a Markdown summary to the run page.
#
# This workflow does NOT contact Azure or Foundry. It validates the
# CLI surface (init, eval run, eval run --baseline, report generate)
# against a local in-process HTTP echo agent.
name: E2E
on:
workflow_dispatch:
inputs:
scenarios:
description: "Which live scenario(s) to run"
type: choice
default: offline-only
options:
- offline-only
- all
- foundry-prompt
- foundry-hosted
- http-aca
- model-direct
keep_resources:
description: "Skip teardown of per-run resources (debug)"
type: boolean
default: false
permissions:
contents: read
env:
# Silence "Node.js 20 actions are deprecated" warnings from azure/login@v2 etc.
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
jobs:
demo:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: ">=0.9.0"
- name: Install AgentOps
run: uv sync --group dev
- name: Install runtime evaluator dependencies
run: uv pip install azure-ai-evaluation pandas
- name: Run E2E demo
run: uv run python scripts/e2e_demo.py
- name: Render summary on run page
if: always()
run: |
summary=$(find evidence -name SUMMARY.md | head -n 1)
if [ -n "$summary" ]; then
cat "$summary" >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload evidence artifact
if: always()
uses: actions/upload-artifact@v7
with:
name: agentops-e2e-evidence
path: evidence/
if-no-files-found: error
retention-days: 14
unit-tests-with-coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: ">=0.9.0"
- name: Install dependencies
run: uv sync --group dev
- name: Install runtime evaluator dependencies
run: uv pip install azure-ai-evaluation pandas
- name: Run pytest with coverage
run: |
uv run pytest tests/ \
--ignore=tests/unit/test_browse.py \
--junitxml=test-results.xml \
--cov=agentops \
--cov-report=xml \
--cov-report=term-missing
- name: Upload coverage + JUnit
if: always()
uses: actions/upload-artifact@v7
with:
name: agentops-test-results
path: |
test-results.xml
coverage.xml
if-no-files-found: warn
retention-days: 14
# =====================================================================
# Live Azure scenarios — opt-in via the `scenarios` workflow input.
# Auth: OIDC federated credential. No secrets stored in the repo.
# See docs/e2e-live-setup.md for the one-time configuration steps.
# =====================================================================
bootstrap-live:
if: ${{ github.event.inputs.scenarios != 'offline-only' }}
runs-on: ubuntu-latest
environment: e2e
permissions:
id-token: write
contents: read
outputs:
aca_url: ${{ steps.deploy_perrun.outputs.aca_url }}
aca_app_name: ${{ steps.deploy_perrun.outputs.aca_app_name }}
hosted_agent_id: ${{ steps.create_hosted_agent.outputs.agent_id }}
hosted_agent_name: ${{ steps.create_hosted_agent.outputs.agent_name }}
suffix: ${{ steps.suffix.outputs.value }}
steps:
- uses: actions/checkout@v6
- id: suffix
name: Compute per-run suffix
run: echo "value=run${{ github.run_id }}" >> "$GITHUB_OUTPUT"
- name: Azure login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- id: deploy_perrun
name: Deploy per-run ACA echo app
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'http-aca' }}
run: |
set -euo pipefail
deployment_name="agentops-e2e-perrun-${{ github.run_id }}"
az deployment group create \
--resource-group "${{ vars.AZURE_E2E_RESOURCE_GROUP }}" \
--name "$deployment_name" \
--template-file infra/e2e/perrun.bicep \
--parameters \
acaEnvironmentId="${{ vars.AZURE_E2E_ACA_ENV_ID }}" \
suffix="${{ steps.suffix.outputs.value }}" \
--output json > deployment.json
aca_url=$(jq -r '.properties.outputs.echoUrl.value' deployment.json)
aca_app=$(jq -r '.properties.outputs.appName.value' deployment.json)
echo "aca_url=$aca_url" >> "$GITHUB_OUTPUT"
echo "aca_app_name=$aca_app" >> "$GITHUB_OUTPUT"
echo "Deployed ACA echo app: $aca_url"
- name: Wait for ACA app to be reachable
if: steps.deploy_perrun.outputs.aca_url != ''
run: |
set -e
for i in $(seq 1 30); do
if curl -fsS -o /dev/null --max-time 5 "${{ steps.deploy_perrun.outputs.aca_url }}/"; then
echo "ACA reachable after ${i} attempts"
exit 0
fi
sleep 5
done
echo "ACA app did not become reachable" >&2
exit 1
- name: Set up Python (for hosted agent provisioning)
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'foundry-hosted' }}
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install azure-ai-projects (for hosted agent provisioning)
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'foundry-hosted' }}
run: pip install --quiet "azure-ai-projects>=2.0.1" azure-identity
- id: create_hosted_agent
name: Create transient hosted agent (with get_weather tool)
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'foundry-hosted' }}
env:
AZURE_AI_FOUNDRY_PROJECT_ENDPOINT: ${{ vars.AZURE_E2E_FOUNDRY_PROJECT_ENDPOINT }}
run: |
python scripts/e2e_hosted_agent.py create \
--name "e2e-hosted-${{ steps.suffix.outputs.value }}" \
--model "${{ vars.AZURE_E2E_MODEL_DEPLOYMENT }}"
live-foundry-prompt:
needs: bootstrap-live
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'foundry-prompt' }}
runs-on: ubuntu-latest
environment: e2e
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: astral-sh/setup-uv@v7
with:
version: ">=0.9.0"
- name: Azure login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- run: uv sync --group dev
- run: uv pip install azure-ai-evaluation pandas azure-ai-projects azure-identity
- name: Render scenario config
env:
AGENTOPS_E2E_FOUNDRY_PROMPT_AGENT: ${{ vars.AGENTOPS_E2E_FOUNDRY_PROMPT_AGENT }}
run: uv run python scripts/e2e_render_config.py
- name: Run AgentOps eval
env:
AZURE_AI_FOUNDRY_PROJECT_ENDPOINT: ${{ vars.AZURE_E2E_FOUNDRY_PROJECT_ENDPOINT }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_E2E_OPENAI_ENDPOINT }}
AZURE_OPENAI_DEPLOYMENT: ${{ vars.AZURE_E2E_MODEL_DEPLOYMENT }}
working-directory: e2e-runs/foundry-prompt
run: uv run agentops eval run --config agentops.yaml
- name: Render transcript
if: always()
run: uv run python scripts/e2e_make_transcript.py e2e-runs/foundry-prompt
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-foundry-prompt
path: |
e2e-runs/foundry-prompt/.agentops/results/
e2e-runs/foundry-prompt/transcript.md
e2e-runs/foundry-prompt/HEADER.md
e2e-runs/foundry-prompt/agentops.yaml
if-no-files-found: warn
retention-days: 14
live-foundry-hosted:
needs: bootstrap-live
if: ${{ (github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'foundry-hosted') && needs.bootstrap-live.outputs.hosted_agent_id != '' }}
runs-on: ubuntu-latest
environment: e2e
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: astral-sh/setup-uv@v7
with:
version: ">=0.9.0"
- name: Azure login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- run: uv sync --group dev
- run: uv pip install azure-ai-evaluation pandas azure-ai-projects azure-identity
- name: Render scenario config
env:
AGENTOPS_E2E_FOUNDRY_HOSTED_AGENT: ${{ needs.bootstrap-live.outputs.hosted_agent_id }}
run: uv run python scripts/e2e_render_config.py
- name: Run AgentOps eval
env:
AZURE_AI_FOUNDRY_PROJECT_ENDPOINT: ${{ vars.AZURE_E2E_FOUNDRY_PROJECT_ENDPOINT }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_E2E_OPENAI_ENDPOINT }}
AZURE_OPENAI_DEPLOYMENT: ${{ vars.AZURE_E2E_MODEL_DEPLOYMENT }}
working-directory: e2e-runs/foundry-hosted
run: uv run agentops eval run --config agentops.yaml
- name: Render transcript
if: always()
run: uv run python scripts/e2e_make_transcript.py e2e-runs/foundry-hosted
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-foundry-hosted
path: |
e2e-runs/foundry-hosted/.agentops/results/
e2e-runs/foundry-hosted/transcript.md
e2e-runs/foundry-hosted/HEADER.md
e2e-runs/foundry-hosted/agentops.yaml
e2e-runs/foundry-hosted/agent-info.json
if-no-files-found: warn
retention-days: 14
live-http-aca:
needs: bootstrap-live
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'http-aca' }}
runs-on: ubuntu-latest
environment: e2e
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: astral-sh/setup-uv@v7
with:
version: ">=0.9.0"
- name: Azure login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- run: uv sync --group dev
- run: uv pip install azure-ai-evaluation pandas
- name: Render scenario config
env:
AGENTOPS_E2E_ACA_URL: ${{ needs.bootstrap-live.outputs.aca_url }}
run: uv run python scripts/e2e_render_config.py
- name: Run AgentOps eval
env:
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_E2E_OPENAI_ENDPOINT }}
AZURE_OPENAI_DEPLOYMENT: ${{ vars.AZURE_E2E_MODEL_DEPLOYMENT }}
working-directory: e2e-runs/http-aca
run: uv run agentops eval run --config agentops.yaml
- name: Render transcript
if: always()
run: uv run python scripts/e2e_make_transcript.py e2e-runs/http-aca
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-http-aca
path: |
e2e-runs/http-aca/.agentops/results/
e2e-runs/http-aca/transcript.md
e2e-runs/http-aca/HEADER.md
e2e-runs/http-aca/agentops.yaml
if-no-files-found: warn
retention-days: 14
live-model-direct:
needs: bootstrap-live
if: ${{ github.event.inputs.scenarios == 'all' || github.event.inputs.scenarios == 'model-direct' }}
runs-on: ubuntu-latest
environment: e2e
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: astral-sh/setup-uv@v7
with:
version: ">=0.9.0"
- name: Azure login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- run: uv sync --group dev
- run: uv pip install azure-ai-evaluation pandas azure-ai-projects azure-identity
- name: Render scenario config
env:
AGENTOPS_E2E_MODEL_DEPLOYMENT: ${{ vars.AZURE_E2E_MODEL_DEPLOYMENT }}
run: uv run python scripts/e2e_render_config.py
- name: Run AgentOps eval
env:
AZURE_AI_FOUNDRY_PROJECT_ENDPOINT: ${{ vars.AZURE_E2E_FOUNDRY_PROJECT_ENDPOINT }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_E2E_OPENAI_ENDPOINT }}
AZURE_OPENAI_DEPLOYMENT: ${{ vars.AZURE_E2E_MODEL_DEPLOYMENT }}
working-directory: e2e-runs/model-direct
run: uv run agentops eval run --config agentops.yaml
- name: Render transcript
if: always()
run: uv run python scripts/e2e_make_transcript.py e2e-runs/model-direct
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-model-direct
path: |
e2e-runs/model-direct/.agentops/results/
e2e-runs/model-direct/transcript.md
e2e-runs/model-direct/HEADER.md
e2e-runs/model-direct/agentops.yaml
if-no-files-found: warn
retention-days: 14
teardown-live:
needs:
- bootstrap-live
- live-foundry-prompt
- live-foundry-hosted
- live-http-aca
- live-model-direct
if: ${{ always() && github.event.inputs.scenarios != 'offline-only' && github.event.inputs.keep_resources != 'true' }}
runs-on: ubuntu-latest
environment: e2e
permissions:
id-token: write
contents: read
steps:
- name: Azure login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- name: Delete per-run ACA app
if: needs.bootstrap-live.outputs.aca_app_name != ''
run: |
set -e
az containerapp delete \
--resource-group "${{ vars.AZURE_E2E_RESOURCE_GROUP }}" \
--name "${{ needs.bootstrap-live.outputs.aca_app_name }}" \
--yes || true
- name: Set up Python (for hosted agent teardown)
if: needs.bootstrap-live.outputs.hosted_agent_name != ''
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install azure-ai-projects (for hosted agent teardown)
if: needs.bootstrap-live.outputs.hosted_agent_name != ''
run: pip install --quiet "azure-ai-projects>=2.0.1" azure-identity
- name: Delete transient hosted agent
if: needs.bootstrap-live.outputs.hosted_agent_name != ''
env:
AZURE_AI_FOUNDRY_PROJECT_ENDPOINT: ${{ vars.AZURE_E2E_FOUNDRY_PROJECT_ENDPOINT }}
run: |
python scripts/e2e_hosted_agent.py delete \
--name "${{ needs.bootstrap-live.outputs.hosted_agent_name }}" || true
- name: Sweep stale e2e ACA apps (>1d old)
run: |
set -e
cutoff=$(date -u -d '1 day ago' +%FT%TZ)
mapfile -t apps < <(az containerapp list \
--resource-group "${{ vars.AZURE_E2E_RESOURCE_GROUP }}" \
--query "[?starts_with(name,'aca-echo-run') && properties.latestRevisionFqdn!=null].{name:name,created:systemData.createdAt}" \
-o json | jq -r --arg cutoff "$cutoff" '.[] | select(.created < $cutoff) | .name')
for app in "${apps[@]}"; do
[ -z "$app" ] && continue
echo "Deleting stale app: $app"
az containerapp delete -g "${{ vars.AZURE_E2E_RESOURCE_GROUP }}" -n "$app" --yes || true
done