diff --git a/.github/workflows/acr-control-plane-release.yml b/.github/workflows/acr-control-plane-release.yml new file mode 100644 index 0000000..5974464 --- /dev/null +++ b/.github/workflows/acr-control-plane-release.yml @@ -0,0 +1,164 @@ +name: ACR Control Plane Release + +on: + push: + tags: + - "v*.*.*" + +permissions: + contents: read + +env: + CONTROL_PLANE_DIR: implementations/acr-control-plane + COMPLIANCE_OUTPUT_DIR: dist/compliance + PACKAGE_BASENAME: acr-control-plane-compliance-package-${{ github.ref_name }} + +jobs: + docker-publish: + name: Publish ${{ matrix.service }} image + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + attestations: write + strategy: + matrix: + include: + - service: gateway + dockerfile: Dockerfile + - service: killswitch + dockerfile: Dockerfile.killswitch + + steps: + - uses: actions/checkout@v4 + + - name: Normalize image name + id: image + shell: bash + run: | + repo_lc="$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')" + echo "uri=ghcr.io/${repo_lc}/acr-${{ matrix.service }}" >> "${GITHUB_OUTPUT}" + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ steps.image.outputs.uri }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha,prefix=sha- + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install Cosign + uses: sigstore/cosign-installer@v4.1.0 + + - name: Build and push ${{ matrix.service }} + id: build + uses: docker/build-push-action@v6 + with: + context: ${{ env.CONTROL_PLANE_DIR }} + file: ${{ env.CONTROL_PLANE_DIR }}/${{ matrix.dockerfile }} + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Attest build provenance + uses: actions/attest-build-provenance@v3 + with: + subject-name: ${{ steps.image.outputs.uri }} + subject-digest: ${{ steps.build.outputs.digest }} + push-to-registry: true + + - name: Sign image with GitHub OIDC + run: cosign sign --yes "${IMAGE_URI}@${DIGEST}" + env: + IMAGE_URI: ${{ steps.image.outputs.uri }} + DIGEST: ${{ steps.build.outputs.digest }} + + compliance-package: + name: Build signed compliance package + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + attestations: write + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Cosign + uses: sigstore/cosign-installer@v4.1.0 + + - name: Build compliance package + run: | + python scripts/build_compliance_package.py \ + --implementation-dir . \ + --version "${GITHUB_REF_NAME}" \ + --source-ref "${GITHUB_SHA}" \ + --output-dir "../../${COMPLIANCE_OUTPUT_DIR}" + working-directory: ${{ env.CONTROL_PLANE_DIR }} + + - name: Sign compliance package tarball + run: | + cosign sign-blob --yes \ + --bundle "${COMPLIANCE_OUTPUT_DIR}/${PACKAGE_BASENAME}.sigstore.json" \ + "${COMPLIANCE_OUTPUT_DIR}/${PACKAGE_BASENAME}.tar.gz" + + - name: Attest compliance package provenance + uses: actions/attest-build-provenance@v3 + with: + subject-path: ${{ env.COMPLIANCE_OUTPUT_DIR }}/${{ env.PACKAGE_BASENAME }}.tar.gz + + - name: Upload compliance release assets + uses: actions/upload-artifact@v4 + with: + name: acr-control-plane-compliance-assets + path: | + ${{ env.COMPLIANCE_OUTPUT_DIR }}/${{ env.PACKAGE_BASENAME }}.tar.gz + ${{ env.COMPLIANCE_OUTPUT_DIR }}/${{ env.PACKAGE_BASENAME }}.manifest.json + ${{ env.COMPLIANCE_OUTPUT_DIR }}/${{ env.PACKAGE_BASENAME }}.sha256 + ${{ env.COMPLIANCE_OUTPUT_DIR }}/${{ env.PACKAGE_BASENAME }}.sigstore.json + retention-days: 30 + + github-release: + name: Create GitHub Release + runs-on: ubuntu-latest + needs: [docker-publish, compliance-package] + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - name: Download compliance assets + uses: actions/download-artifact@v4 + with: + name: acr-control-plane-compliance-assets + path: dist/release-assets + + - name: Create release + uses: softprops/action-gh-release@v2 + with: + generate_release_notes: true + make_latest: true + files: | + dist/release-assets/${{ env.PACKAGE_BASENAME }}.tar.gz + dist/release-assets/${{ env.PACKAGE_BASENAME }}.manifest.json + dist/release-assets/${{ env.PACKAGE_BASENAME }}.sha256 + dist/release-assets/${{ env.PACKAGE_BASENAME }}.sigstore.json diff --git a/implementations/acr-control-plane/.env.example b/implementations/acr-control-plane/.env.example index 6e46db2..982bbc6 100644 --- a/implementations/acr-control-plane/.env.example +++ b/implementations/acr-control-plane/.env.example @@ -76,6 +76,8 @@ WEBHOOK_URL= # Receivers must verify this header before trusting the payload. # Minimum 32 random bytes (hex). Leave blank to skip signing. WEBHOOK_HMAC_SECRET= +# Minimum 32 random bytes (hex). Used to sign audit/evidence bundles. +AUDIT_SIGNING_SECRET=dev_audit_signing_secret_change_me # ── OpenTelemetry ───────────────────────────────────────────────────────────── # Leave blank to disable OTLP trace export (e.g. http://localhost:4318). @@ -106,6 +108,7 @@ EXECUTOR_TIMEOUT_SECONDS=8.0 # Supported backends: # local : write bundles to the local filesystem # s3 : write bundles to S3 or an S3-compatible object store +REQUIRE_BUNDLE_AUTH=true POLICY_BUNDLE_BACKEND=local POLICY_BUNDLE_LOCAL_DIR=./var/policy_bundles # Required when POLICY_BUNDLE_BACKEND=s3 diff --git a/implementations/acr-control-plane/.env.production.example b/implementations/acr-control-plane/.env.production.example index 0dc4cd5..9df035a 100644 --- a/implementations/acr-control-plane/.env.production.example +++ b/implementations/acr-control-plane/.env.production.example @@ -55,9 +55,10 @@ EXECUTOR_INTEGRATIONS_JSON= WEBHOOK_URL= OTEL_EXPORTER_OTLP_ENDPOINT= +REQUIRE_BUNDLE_AUTH=false POLICY_BUNDLE_BACKEND=s3 POLICY_BUNDLE_S3_BUCKET=CHANGE_ME POLICY_BUNDLE_S3_PREFIX=acr/policy-bundles POLICY_BUNDLE_S3_REGION=us-east-1 POLICY_BUNDLE_S3_ENDPOINT_URL= -POLICY_BUNDLE_PUBLIC_BASE_URL= +POLICY_BUNDLE_PUBLIC_BASE_URL=https://acr.example.com diff --git a/implementations/acr-control-plane/README.md b/implementations/acr-control-plane/README.md index 2519b43..b944414 100644 --- a/implementations/acr-control-plane/README.md +++ b/implementations/acr-control-plane/README.md @@ -102,6 +102,12 @@ With the included stack, you can: The included sample agent shows the control plane denying unsafe actions and escalating high-risk ones. +There is now an official [SDK and adapter guide](docs/sdk.md) covering: + +- a Python SDK +- a LangGraph/LangChain-style tool guard +- a TypeScript SDK + There is also a runnable [protected executor example](examples/protected_executor/README.md) that verifies both `X-ACR-Execution-Token` and `X-ACR-Brokered-Credential`, so downstream services can reject direct-bypass requests that were not explicitly authorized by the gateway. For workflow builders and orchestration tools, there is now an explicit [orchestrator integration guide](docs/orchestrators.md) plus an [n8n reference example](examples/n8n/README.md) showing how to put ACR underneath the workflow layer instead of relying on optional user behavior. @@ -212,6 +218,10 @@ Current highlights: - the repo includes a dedicated security workflow for CodeQL, Semgrep, Gitleaks, Trivy, and SBOM generation - production secret generation and secret-hygiene checks are in place - brokered downstream credential minting is implemented +- release provenance and signing guidance now live in [docs/provenance-and-verification.md](docs/provenance-and-verification.md) +- the enterprise review artifacts now live in [docs/compliance/README.md](docs/compliance/README.md) +- the blessed production deployment path now lives in [deploy/k8s/overlays/production/README.md](deploy/k8s/overlays/production/README.md) +- official Python and TypeScript SDKs now live in [docs/sdk.md](docs/sdk.md) - the next major gap is reliability semantics under dependency failure ## Architecture diff --git a/implementations/acr-control-plane/deploy/k8s/base/gateway-configmap.yaml b/implementations/acr-control-plane/deploy/k8s/base/gateway-configmap.yaml index e39c9a0..a2961c0 100644 --- a/implementations/acr-control-plane/deploy/k8s/base/gateway-configmap.yaml +++ b/implementations/acr-control-plane/deploy/k8s/base/gateway-configmap.yaml @@ -11,9 +11,12 @@ data: KILLSWITCH_URL: "http://acr-killswitch:8443" OTEL_SERVICE_NAME: "acr-control-plane" EXECUTE_ALLOWED_ACTIONS: "true" + REQUIRE_BUNDLE_AUTH: "false" POLICY_BUNDLE_BACKEND: "s3" POLICY_BUNDLE_S3_BUCKET: "change-me" POLICY_BUNDLE_S3_PREFIX: "acr/policy-bundles" + POLICY_BUNDLE_S3_REGION: "us-east-1" + POLICY_BUNDLE_PUBLIC_BASE_URL: "https://acr.example.com" OIDC_ENABLED: "true" OIDC_ISSUER: "https://login.example.com/" OIDC_CLIENT_ID: "acr-control-plane" diff --git a/implementations/acr-control-plane/deploy/k8s/base/gateway-secret.example.yaml b/implementations/acr-control-plane/deploy/k8s/base/gateway-secret.example.yaml index cf1e2ed..2775db4 100644 --- a/implementations/acr-control-plane/deploy/k8s/base/gateway-secret.example.yaml +++ b/implementations/acr-control-plane/deploy/k8s/base/gateway-secret.example.yaml @@ -13,7 +13,9 @@ stringData: OPERATOR_SESSION_SECRET: "change-me-32-bytes-minimum" OIDC_CLIENT_SECRET: "change-me" WEBHOOK_HMAC_SECRET: "change-me-32-bytes-minimum" + AUDIT_SIGNING_SECRET: "change-me-32-bytes-minimum" EXECUTOR_HMAC_SECRET: "change-me-32-bytes-minimum" + EXECUTOR_CREDENTIAL_SECRET: "change-me-32-bytes-minimum" FINANCE_EXECUTOR_API_KEY: "change-me" EMAIL_EXECUTOR_API_KEY: "change-me" TICKET_EXECUTOR_API_KEY: "change-me" diff --git a/implementations/acr-control-plane/deploy/k8s/base/kustomization.yaml b/implementations/acr-control-plane/deploy/k8s/base/kustomization.yaml index d2b3a10..9e28aa5 100644 --- a/implementations/acr-control-plane/deploy/k8s/base/kustomization.yaml +++ b/implementations/acr-control-plane/deploy/k8s/base/kustomization.yaml @@ -5,7 +5,6 @@ resources: - namespace.yaml - rbac.yaml - gateway-configmap.yaml - - gateway-secret.example.yaml - gateway-deployment.yaml - gateway-service.yaml - killswitch-deployment.yaml diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/README.md b/implementations/acr-control-plane/deploy/k8s/overlays/production/README.md new file mode 100644 index 0000000..f8e1576 --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/README.md @@ -0,0 +1,84 @@ +# Blessed Production Deployment + +This overlay is the supported production deployment path for the ACR control plane. + +It assumes: + +- Kubernetes as the runtime platform +- External Secrets Operator for application secret material +- managed PostgreSQL and managed Redis +- OIDC-first operator authentication +- S3 or an S3-compatible object store for policy bundle publishing +- ingress-nginx plus cert-manager for north-south traffic +- internal executors reachable only on private networks + +It is opinionated on purpose. This path exists so platform and security teams have one deployment model to review, harden, and automate. + +## What This Overlay Changes + +Relative to `deploy/k8s/base`, this overlay: + +- removes the example secret from the runtime path and expects a real `ExternalSecret` +- keeps bundle delivery on object storage and disables bundle auth so OPA can poll directly +- assumes OIDC is the normal operator login path +- replaces the base gateway and kill-switch network policy with a production egress allowlist +- adds namespace pod-security labels +- adds rollout settings like `minReadySeconds`, `progressDeadlineSeconds`, and topology spreading +- points workloads at release images rather than local `acr-gateway:1.0.0` tags + +## Before You Apply It + +1. Create a `ClusterSecretStore` or `SecretStore` for your secret manager. +2. Edit [external-secret.yaml](external-secret.yaml): + - `secretStoreRef.name` + - remote secret keys and property names +3. Edit [networkpolicy-production.yaml](networkpolicy-production.yaml): + - replace all `203.0.113.x/32` placeholder CIDRs + - add or remove executor namespaces as needed +4. Edit [gateway-configmap-patch.yaml](gateway-configmap-patch.yaml): + - hostnames + - object-storage details + - OIDC details + - OTLP endpoint if used +5. Edit [ingress-patch.yaml](ingress-patch.yaml): + - public host + - TLS secret +6. Make sure your cluster can pull the release images: + - mirror them into your internal registry, or + - create a registry pull secret if your GHCR packages are private +7. Replace the example release tags in [kustomization.yaml](kustomization.yaml) with your promoted image digests before production promotion. + +## Build and Review + +```bash +kubectl kustomize deploy/k8s/overlays/production +``` + +## Apply + +```bash +kubectl apply -k deploy/k8s/overlays/production +``` + +## Post-Deploy Validation + +- `kubectl -n acr-system get externalsecret,secret` +- `kubectl -n acr-system get pods` +- `kubectl -n acr-system rollout status deploy/acr-gateway` +- `kubectl -n acr-system rollout status deploy/acr-killswitch` +- `kubectl -n acr-system rollout status deploy/acr-opa` +- `curl https://acr.example.com/acr/health` +- `curl https://acr.example.com/acr/ready` +- validate OIDC login, policy activation, approval flow, evidence export, and executor reachability + +## Why This Is The Blessed Path + +This is the path that closes the earlier gaps: + +- no runtime application of example secrets +- one explicit secrets-management model +- one explicit network-enforcement story +- object storage instead of local bundle state +- one ingress and rollout model for production + +If you diverge from this overlay, treat it as a custom platform variant and review it accordingly. diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/external-secret.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/external-secret.yaml new file mode 100644 index 0000000..bf65189 --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/external-secret.yaml @@ -0,0 +1,83 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: acr-gateway-secret +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: acr-secrets + target: + name: acr-gateway-secret + creationPolicy: Owner + template: + type: Opaque + data: + - secretKey: DATABASE_URL + remoteRef: + key: acr/control-plane/production + property: DATABASE_URL + - secretKey: REDIS_URL + remoteRef: + key: acr/control-plane/production + property: REDIS_URL + - secretKey: JWT_SECRET_KEY + remoteRef: + key: acr/control-plane/production + property: JWT_SECRET_KEY + - secretKey: KILLSWITCH_SECRET + remoteRef: + key: acr/control-plane/production + property: KILLSWITCH_SECRET + - secretKey: SERVICE_OPERATOR_API_KEY + remoteRef: + key: acr/control-plane/production + property: SERVICE_OPERATOR_API_KEY + - secretKey: OPERATOR_API_KEYS_JSON + remoteRef: + key: acr/control-plane/production + property: OPERATOR_API_KEYS_JSON + - secretKey: OPERATOR_SESSION_SECRET + remoteRef: + key: acr/control-plane/production + property: OPERATOR_SESSION_SECRET + - secretKey: OIDC_CLIENT_SECRET + remoteRef: + key: acr/control-plane/production + property: OIDC_CLIENT_SECRET + - secretKey: WEBHOOK_HMAC_SECRET + remoteRef: + key: acr/control-plane/production + property: WEBHOOK_HMAC_SECRET + - secretKey: AUDIT_SIGNING_SECRET + remoteRef: + key: acr/control-plane/production + property: AUDIT_SIGNING_SECRET + - secretKey: EXECUTOR_HMAC_SECRET + remoteRef: + key: acr/control-plane/production + property: EXECUTOR_HMAC_SECRET + - secretKey: EXECUTOR_CREDENTIAL_SECRET + remoteRef: + key: acr/control-plane/production + property: EXECUTOR_CREDENTIAL_SECRET + - secretKey: FINANCE_EXECUTOR_API_KEY + remoteRef: + key: acr/control-plane/production + property: FINANCE_EXECUTOR_API_KEY + - secretKey: EMAIL_EXECUTOR_API_KEY + remoteRef: + key: acr/control-plane/production + property: EMAIL_EXECUTOR_API_KEY + - secretKey: TICKET_EXECUTOR_API_KEY + remoteRef: + key: acr/control-plane/production + property: TICKET_EXECUTOR_API_KEY + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + key: acr/control-plane/production + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + key: acr/control-plane/production + property: AWS_SECRET_ACCESS_KEY diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/gateway-configmap-patch.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/gateway-configmap-patch.yaml new file mode 100644 index 0000000..ee78c6d --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/gateway-configmap-patch.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: acr-gateway-config +data: + ACR_ENV: "production" + LOG_LEVEL: "INFO" + SCHEMA_BOOTSTRAP_MODE: "validate" + STRICT_DEPENDENCY_STARTUP: "true" + REQUIRE_BUNDLE_AUTH: "false" + OPA_URL: "http://acr-opa:8181" + KILLSWITCH_URL: "http://acr-killswitch:8443" + OTEL_SERVICE_NAME: "acr-control-plane" + OTEL_EXPORTER_OTLP_ENDPOINT: "https://otel.example.com" + EXECUTE_ALLOWED_ACTIONS: "true" + POLICY_BUNDLE_BACKEND: "s3" + POLICY_BUNDLE_S3_BUCKET: "acr-control-plane-production" + POLICY_BUNDLE_S3_PREFIX: "acr/policy-bundles" + POLICY_BUNDLE_S3_REGION: "us-east-1" + POLICY_BUNDLE_PUBLIC_BASE_URL: "https://acr.example.com" + OIDC_ENABLED: "true" + OIDC_ISSUER: "https://login.example.com/" + OIDC_CLIENT_ID: "acr-control-plane" + OIDC_AUTHORIZE_URL: "https://login.example.com/oauth2/v1/authorize" + OIDC_TOKEN_URL: "https://login.example.com/oauth2/v1/token" + OIDC_JWKS_URL: "https://login.example.com/oauth2/v1/keys" + OIDC_REDIRECT_URI: "https://acr.example.com/acr/auth/oidc/callback" + OIDC_SCOPES: "openid profile email groups" + OIDC_ROLES_CLAIM: "groups" + OIDC_SUBJECT_CLAIM: "email" + OIDC_ROLE_MAPPING_JSON: > + {"acr-platform-admin":["agent_admin","security_admin","auditor","killswitch_operator","approver"], + "acr-approver":["approver"], + "acr-auditor":["auditor"]} diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/gateway-deployment-patch.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/gateway-deployment-patch.yaml new file mode 100644 index 0000000..cd14c64 --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/gateway-deployment-patch.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: acr-gateway +spec: + revisionHistoryLimit: 5 + minReadySeconds: 10 + progressDeadlineSeconds: 600 + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + spec: + automountServiceAccountToken: false + terminationGracePeriodSeconds: 30 + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: acr-gateway diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/ingress-patch.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/ingress-patch.yaml new file mode 100644 index 0000000..447c74a --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/ingress-patch.yaml @@ -0,0 +1,27 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: acr-gateway-ingress + namespace: acr-system + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "1m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "60" +spec: + ingressClassName: nginx + tls: + - hosts: + - acr.example.com + secretName: acr-gateway-tls + rules: + - host: acr.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: acr-gateway + port: + number: 8000 diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/killswitch-deployment-patch.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/killswitch-deployment-patch.yaml new file mode 100644 index 0000000..6567d83 --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/killswitch-deployment-patch.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: acr-killswitch +spec: + revisionHistoryLimit: 5 + minReadySeconds: 10 + progressDeadlineSeconds: 600 + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + spec: + automountServiceAccountToken: false + terminationGracePeriodSeconds: 30 + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: acr-killswitch diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/kustomization.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/kustomization.yaml new file mode 100644 index 0000000..7b93ef1 --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/kustomization.yaml @@ -0,0 +1,25 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../base + - external-secret.yaml + - networkpolicy-production.yaml +patches: + - path: namespace-patch.yaml + - path: gateway-configmap-patch.yaml + - path: gateway-deployment-patch.yaml + - path: killswitch-deployment-patch.yaml + - path: opa-deployment-patch.yaml + - path: ingress-patch.yaml + - path: remove-base-gateway-netpol.yaml + - path: remove-base-killswitch-netpol.yaml +images: + - name: acr-gateway + newName: ghcr.io/adamdistefanoai/acr-framework/acr-gateway + newTag: v1.0.1 + - name: acr-killswitch + newName: ghcr.io/adamdistefanoai/acr-framework/acr-killswitch + newTag: v1.0.1 + - name: openpolicyagent/opa + newName: openpolicyagent/opa + newTag: 0.63.0-rootless diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/namespace-patch.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/namespace-patch.yaml new file mode 100644 index 0000000..2b499ba --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/namespace-patch.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: acr-system + labels: + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/warn: restricted + acr.io/network-zone: control-plane diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/networkpolicy-production.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/networkpolicy-production.yaml new file mode 100644 index 0000000..50f789e --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/networkpolicy-production.yaml @@ -0,0 +1,127 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: acr-gateway-production-netpol + namespace: acr-system +spec: + podSelector: + matchLabels: + app: acr-gateway + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: ingress-nginx + ports: + - protocol: TCP + port: 8000 + egress: + - to: + - namespaceSelector: {} + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + - to: + - podSelector: + matchLabels: + app: acr-opa + ports: + - protocol: TCP + port: 8181 + - to: + - podSelector: + matchLabels: + app: acr-killswitch + ports: + - protocol: TCP + port: 8443 + - to: + - namespaceSelector: + matchLabels: + acr.io/network-zone: protected-executors + ports: + - protocol: TCP + port: 443 + - to: + - ipBlock: + cidr: 203.0.113.10/32 + ports: + - protocol: TCP + port: 5432 + - to: + - ipBlock: + cidr: 203.0.113.20/32 + ports: + - protocol: TCP + port: 6379 + - to: + - ipBlock: + cidr: 203.0.113.30/32 + ports: + - protocol: TCP + port: 443 + - to: + - ipBlock: + cidr: 203.0.113.40/32 + ports: + - protocol: TCP + port: 443 + - to: + - ipBlock: + cidr: 203.0.113.50/32 + ports: + - protocol: TCP + port: 443 + - to: + - ipBlock: + cidr: 203.0.113.60/32 + ports: + - protocol: TCP + port: 443 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: acr-killswitch-production-netpol + namespace: acr-system +spec: + podSelector: + matchLabels: + app: acr-killswitch + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + app: acr-gateway + ports: + - protocol: TCP + port: 8443 + - from: + - namespaceSelector: + matchLabels: + acr.io/network-zone: operations + ports: + - protocol: TCP + port: 8443 + egress: + - to: + - namespaceSelector: {} + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + - to: + - ipBlock: + cidr: 203.0.113.20/32 + ports: + - protocol: TCP + port: 6379 diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/opa-deployment-patch.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/opa-deployment-patch.yaml new file mode 100644 index 0000000..0223724 --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/opa-deployment-patch.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: acr-opa +spec: + revisionHistoryLimit: 5 + minReadySeconds: 10 + progressDeadlineSeconds: 600 + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + spec: + automountServiceAccountToken: false + terminationGracePeriodSeconds: 30 + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: acr-opa diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/remove-base-gateway-netpol.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/remove-base-gateway-netpol.yaml new file mode 100644 index 0000000..a535b2b --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/remove-base-gateway-netpol.yaml @@ -0,0 +1,6 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: acr-gateway-netpol + namespace: acr-system +$patch: delete diff --git a/implementations/acr-control-plane/deploy/k8s/overlays/production/remove-base-killswitch-netpol.yaml b/implementations/acr-control-plane/deploy/k8s/overlays/production/remove-base-killswitch-netpol.yaml new file mode 100644 index 0000000..68a798d --- /dev/null +++ b/implementations/acr-control-plane/deploy/k8s/overlays/production/remove-base-killswitch-netpol.yaml @@ -0,0 +1,6 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: acr-killswitch-netpol + namespace: acr-system +$patch: delete diff --git a/implementations/acr-control-plane/docs/api.md b/implementations/acr-control-plane/docs/api.md index 7d363da..119b517 100644 --- a/implementations/acr-control-plane/docs/api.md +++ b/implementations/acr-control-plane/docs/api.md @@ -42,7 +42,7 @@ Downstream executors can require: The helper dependency `acr.gateway.executor_auth.require_gateway_execution` verifies that the token is valid and that it authorizes the exact payload being executed. The helper dependency `acr.gateway.executor_auth.require_brokered_execution_credential` verifies the downstream credential and its intended audience. -If you are integrating with workflow builders like `n8n`, treat these executor controls as the protected downstream boundary. See [orchestrators.md](/Users/adamdistefano/Desktop/control_plane/docs/orchestrators.md). +If you are integrating with workflow builders like `n8n`, treat these executor controls as the protected downstream boundary. See [orchestrators.md](orchestrators.md). --- diff --git a/implementations/acr-control-plane/docs/compliance/README.md b/implementations/acr-control-plane/docs/compliance/README.md new file mode 100644 index 0000000..3e95831 --- /dev/null +++ b/implementations/acr-control-plane/docs/compliance/README.md @@ -0,0 +1,56 @@ +# Compliance Package + +This folder is the enterprise review package for the ACR control plane implementation. + +It is designed to give security, risk, audit, and platform teams a starting set of artifacts for architecture review, control mapping, and deployment approval. + +## Included Artifacts + +- [threat-model.md](threat-model.md) +- [shared-responsibility-matrix.md](shared-responsibility-matrix.md) +- [control-mapping.md](control-mapping.md) +- [evidence-package.md](evidence-package.md) +- [external-assessment-scope.md](external-assessment-scope.md) +- [provenance-and-verification.md](../provenance-and-verification.md) +- [failure-load-dr-validation-2026-04-08.md](../failure-load-dr-validation-2026-04-08.md) + +## Intended Use + +Use this package when: + +- a security team is reviewing the control plane before deployment +- a risk or compliance team needs a control narrative +- an enterprise buyer requests architecture, evidence, or shared-responsibility material +- an assessor needs an agreed scope for penetration testing or architecture review + +## Important Limits + +This package is: + +- a technical and operational evidence set +- a reference mapping, not a certification claim +- deployment-aware, not deployment-complete + +This package is not: + +- legal advice +- a substitute for an organization’s own risk acceptance process +- proof that every deployment of ACR is compliant by default + +## Build the Versioned Package + +From the implementation root: + +```bash +python scripts/build_compliance_package.py \ + --implementation-dir . \ + --version v1.0.1 \ + --source-ref \ + --output-dir dist/compliance +``` + +That command creates: + +- a release tarball +- a manifest with file digests +- a checksum file for the release assets diff --git a/implementations/acr-control-plane/docs/compliance/control-mapping.md b/implementations/acr-control-plane/docs/compliance/control-mapping.md new file mode 100644 index 0000000..e7c38ae --- /dev/null +++ b/implementations/acr-control-plane/docs/compliance/control-mapping.md @@ -0,0 +1,58 @@ +# Control Mapping + +This is a reference mapping for enterprise review. It is not a certification claim, legal opinion, or guarantee that every deployment is compliant out of the box. + +## Mapping Approach + +The goal of this document is to show where the control plane materially supports common governance and security frameworks, and where operator implementation work is still required. + +## NIST AI RMF Reference Mapping + +| NIST AI RMF Theme | ACR Capability | Operator Evidence Needed | +|---|---|---| +| Govern | policy releases, operator roles, approval workflows, containment authority | role design, policy approval process, release approvals | +| Map | agent manifests, purpose binding, allowed tools, risk tiering | agent inventory, business-purpose approval, data classification | +| Measure | telemetry, evidence export, drift scoring, validation results | alert thresholds, review cadence, issue tracking | +| Manage | allow/deny/escalate or modify decisions, kill switch, release rollback | incident response procedures, runtime exception handling, promotion controls | + +## SOC 2 Common Criteria Reference Mapping + +| SOC 2 Area | ACR Capability | Shared Responsibility Note | +|---|---|---| +| CC6 Logical Access | operator RBAC, signed sessions, API keys, agent token validation | identity-provider configuration and access reviews are external | +| CC7 System Operations | health and readiness endpoints, telemetry, evidence bundles, drift signals | monitoring, alerting, and on-call response remain deployment responsibilities | +| CC8 Change Management | versioned policy drafts, releases, activation history, rollback | change approvals and segregation of duties must be implemented by the operator | +| CC9 Risk Mitigation | fail-secure behavior, containment, approval gating, downstream authorization | enterprise risk acceptance and exception management remain external | + +## ISO/IEC 42001 Theme-Level Mapping + +| ISO 42001 Theme | ACR Capability | Shared Responsibility Note | +|---|---|---| +| Governance and leadership | operator roles, approval authority, policy lifecycle | organizational policy, leadership review, and accountability are external | +| Planning and risk treatment | agent boundaries, runtime guardrails, escalation paths | risk registers and treatment acceptance remain operator-managed | +| Operational controls | hot-path policy enforcement, output filtering, containment, brokered credentials | deployment hardening and downstream system control must be implemented externally | +| Monitoring and evaluation | telemetry, evidence export, drift scoring, failure/load/DR validation | periodic review, audit collection, and corrective action tracking remain external | +| Improvement | governed baseline lifecycle, policy versioning, rollback | continuous improvement process is organizational, not product-only | + +## EU AI Act Support Mapping + +| EU AI Act Obligation Theme | ACR Capability | Shared Responsibility Note | +|---|---|---| +| Risk management | policy enforcement, containment, approval gating, failure validation | deployers still need a formal risk-management process | +| Logging and traceability | telemetry, evidence bundles, approval records, signed integrity chain | retention, access control, and legal hold remain external | +| Human oversight | escalation queues, approval endpoints, operator console, kill switch | operators must define when human review is mandatory | +| Accuracy, robustness, cybersecurity | runtime enforcement, dependency checks, brokered credentials, signed releases | infrastructure hardening, testing depth, and production controls remain shared | + +## What This Package Does Not Claim + +It does not claim: + +- SOC 2 certification +- ISO 42001 certification +- automatic EU AI Act compliance +- completeness of organizational governance outside the control plane + +It does claim: + +- the implementation provides technical controls that map cleanly into those programs +- the repo now includes artifacts that make those controls easier to assess and verify diff --git a/implementations/acr-control-plane/docs/compliance/evidence-package.md b/implementations/acr-control-plane/docs/compliance/evidence-package.md new file mode 100644 index 0000000..55fcc53 --- /dev/null +++ b/implementations/acr-control-plane/docs/compliance/evidence-package.md @@ -0,0 +1,56 @@ +# Evidence Package + +This document describes the evidence set an enterprise reviewer should expect for a release or deployment review. + +## Core Product Evidence + +- release tag and merge commit +- signed container images +- build provenance attestations +- signed compliance package tarball and checksum file +- policy release history and active bundle alias +- failure/load/DR validation report + +## Runtime Governance Evidence + +- sample approval records +- sample evidence bundle for a single correlation id +- telemetry integrity verification result +- drift baseline and drift score outputs +- containment event history + +## Deployment Evidence + +- production configuration values from a secret manager, not committed files +- OIDC and role-mapping configuration +- network policy or equivalent ingress/egress controls +- digest-pinned deployment manifest or Helm values +- backup and restore verification logs + +## Security Review Evidence + +- CI security scan output +- SBOM generation record +- dependency audit output +- static-analysis findings and resolution notes +- release verification record showing cosign and attestation checks + +## Suggested Review Packet + +For an internal approval board or customer security review, package: + +1. [provenance-and-verification.md](../provenance-and-verification.md) +2. [failure-load-dr-validation-2026-04-08.md](../failure-load-dr-validation-2026-04-08.md) +3. [threat-model.md](threat-model.md) +4. [shared-responsibility-matrix.md](shared-responsibility-matrix.md) +5. [control-mapping.md](control-mapping.md) + +## Known Limits + +Evidence generated by the product is only part of the package. Enterprise reviewers will still expect: + +- identity governance records +- change approvals +- vulnerability remediation process +- incident-management process +- external assessment results diff --git a/implementations/acr-control-plane/docs/compliance/external-assessment-scope.md b/implementations/acr-control-plane/docs/compliance/external-assessment-scope.md new file mode 100644 index 0000000..36060c3 --- /dev/null +++ b/implementations/acr-control-plane/docs/compliance/external-assessment-scope.md @@ -0,0 +1,56 @@ +# External Assessment Scope + +This document is a scoping package for independent review. It is not a substitute for an actual assessment report. + +## Recommended Assessment Types + +- application penetration test of operator and runtime APIs +- authenticated architecture review of runtime control boundaries +- cloud and Kubernetes configuration review for the target deployment +- release integrity and provenance verification review + +## In Scope + +- `POST /acr/evaluate` decision path +- operator authentication and authorization paths +- policy draft, release, and activation APIs +- approval and containment APIs +- telemetry and evidence export paths +- downstream authorization token and brokered credential patterns +- release workflow provenance and signature verification + +## Explicit Questions for Assessors + +- Can an agent bypass the gateway and still reach protected systems? +- Can operator roles be escalated through session or API-key misuse? +- Can policy or evidence records be tampered with undetected? +- What happens if Redis, OPA, PostgreSQL, or the kill-switch service are attacked or degraded? +- Do release artifacts provide enough provenance to support admission or promotion policies? + +## Deployment Preconditions + +The following should be available to the assessor: + +- deployment diagram and trust boundaries +- sample operator roles +- policy bundle source and active release id +- staging environment with representative downstream integrations +- digest-pinned deployed images +- recent validation report and release verification output + +## Expected Deliverables + +- executive summary +- findings with severity and evidence +- exploit or reproduction notes where appropriate +- architectural observations and compensating controls +- retest notes after remediation + +## Current Known Focus Areas + +Based on the latest validation pass, assessors should pay special attention to: + +- generic `500` behavior during PostgreSQL outage +- readiness blind spot for kill-switch service loss +- full compose deployment startup behavior around OPA health gating +- hot-path latency under higher concurrency and more realistic policy/data load diff --git a/implementations/acr-control-plane/docs/compliance/shared-responsibility-matrix.md b/implementations/acr-control-plane/docs/compliance/shared-responsibility-matrix.md new file mode 100644 index 0000000..41a86e9 --- /dev/null +++ b/implementations/acr-control-plane/docs/compliance/shared-responsibility-matrix.md @@ -0,0 +1,27 @@ +# Shared Responsibility Matrix + +The control plane provides a strong enforcement core, but enterprise readiness depends on clear operator responsibilities. + +| Control Area | ACR Implementation Responsibility | Deployer / Operator Responsibility | +|---|---|---| +| Agent identity validation | Validate registered agents, lifecycle state, and runtime tokens | Establish who may register agents and how agent ownership is approved | +| Runtime policy enforcement | Evaluate policy, enforce allow/deny/escalate or modify, persist decision traces | Author policy, approve releases, and govern activation rights | +| Operator authentication | Provide API key, OIDC session, and RBAC surfaces | Integrate OIDC, manage break-glass keys, and review role assignments | +| Secret handling | Reject weak production secrets and provide generation templates | Store secrets in a real secret manager and rotate them on schedule | +| Kill-switch containment | Enforce Redis-backed kill checks and operator containment APIs | Protect access to containment roles and monitor containment events | +| Downstream authorization | Mint payload-bound execution tokens and brokered credentials | Ensure downstream systems verify those credentials and cannot be reached directly | +| Observability and evidence | Capture telemetry, export evidence bundles, and sign records | Define retention, immutability, access control, and long-term storage policy | +| Data persistence | Persist policy, approval, telemetry, and drift records | Operate backups, restore tests, partition lifecycle, and database hardening | +| Release integrity | Publish signed images, provenance attestations, and signed compliance package artifacts | Verify signatures, pin by digest, and block unverified promotions | +| Network isolation | Provide reference K8s manifests and deployment guidance | Enforce ingress, egress, and service-to-service restrictions in the target environment | +| Compliance narrative | Provide control mappings, threat model, and evidence checklist | Map ACR into the organization’s own control framework and evidence program | +| External assessment support | Provide a scoping package and technical review artifacts | Commission penetration testing, architecture review, and remediation tracking | + +## Practical Reading + +If a control depends on external identity, storage, networking, or change management, it is almost certainly shared. + +Governance is strongest when: + +- ACR owns decision enforcement and evidence generation +- the operator owns environment trust, identity, storage, and release promotion diff --git a/implementations/acr-control-plane/docs/compliance/threat-model.md b/implementations/acr-control-plane/docs/compliance/threat-model.md new file mode 100644 index 0000000..4ada3c5 --- /dev/null +++ b/implementations/acr-control-plane/docs/compliance/threat-model.md @@ -0,0 +1,150 @@ +# Threat Model + +This threat model is focused on the runtime control plane implementation, not only the abstract standard. + +## Protected Assets + +- agent manifests and registered boundaries +- agent JWTs +- operator API keys and signed sessions +- policy bundles and active release aliases +- approval records and override decisions +- telemetry events and evidence bundles +- execution authorization tokens and brokered downstream credentials +- Postgres and Redis runtime state + +## Trust Boundaries + +1. Agent runtime to gateway +2. Operator to gateway and console +3. Gateway to OPA +4. Gateway to PostgreSQL and Redis +5. Gateway to kill-switch service +6. Gateway to protected executors and downstream enterprise systems +7. Release pipeline to container registry and release assets + +## Primary Threats + +### Direct Tool Bypass + +Scenario: +An agent or compromised service calls a protected downstream tool directly without passing through the control plane. + +Relevant controls: + +- gateway-issued execution authorization tokens +- payload-bound execution verification +- brokered downstream credentials +- network segmentation expectations in deployment guidance + +Residual risk: +If downstream services are reachable directly and do not verify gateway-issued credentials, bypass remains possible. + +### Stolen Agent Identity + +Scenario: +An attacker obtains an agent JWT and replays or abuses it. + +Relevant controls: + +- short-lived JWTs +- agent registration and lifecycle validation +- kill-switch containment +- server-side rate and spend controls + +Residual risk: +Token theft remains impactful until expiration or containment unless deployers add stronger issuer controls and network restrictions. + +### Operator Credential Misuse + +Scenario: +An attacker obtains operator API keys or session material and attempts approvals, policy changes, or containment actions. + +Relevant controls: + +- operator RBAC +- OIDC support +- signed operator sessions +- operator key rotation and audit trails + +Residual risk: +Bootstrap API keys remain powerful and should be limited to break-glass and automation scenarios. + +### Policy Tampering + +Scenario: +An attacker or unauthorized insider changes policy content or activation state. + +Relevant controls: + +- versioned drafts and releases +- activation and rollback history +- operator-authenticated policy management +- release provenance and container signing for shipped artifacts + +Residual risk: +Bundle hosting and operator access control remain deployment responsibilities. + +### Evidence Tampering + +Scenario: +An attacker attempts to alter telemetry or exported evidence after the fact. + +Relevant controls: + +- chained telemetry integrity metadata +- bundle signatures +- exported checksums + +Residual risk: +Retention immutability and long-term custody controls still depend on the operator’s storage and retention model. + +### Dependency Failure as an Availability Attack + +Scenario: +OPA, Redis, PostgreSQL, or the kill-switch service become unavailable due to outage or hostile interference. + +Relevant controls: + +- fail-secure policy behavior +- Redis-backed hot-path kill checks +- readiness endpoints +- documented failure/load/DR validation + +Residual risk: +The current validation showed two open items: + +- PostgreSQL loss currently surfaces as generic `500 INTERNAL_ERROR` on evaluate +- readiness does not currently include kill-switch service availability + +### Supply-Chain Compromise + +Scenario: +Release artifacts are modified, replaced, or built from untrusted workflow state. + +Relevant controls: + +- GitHub Actions release workflow identity +- container image signing +- build provenance attestations +- signed compliance-package blob + +Residual risk: +Deployers still need to verify signatures and attestations before promotion. + +## Assumptions + +- downstream systems enforce gateway-issued credentials or are otherwise network-isolated +- production deployments replace development secrets and use a real secret-management flow +- operator roles are assigned through an approved identity process +- release consumers verify signatures and provenance rather than trusting tags alone + +## Recommended Review Focus + +For enterprise review, pay special attention to: + +- downstream bypass resistance +- bootstrap key governance +- policy release authorization +- retention and evidence custody +- dependency failure semantics under production load diff --git a/implementations/acr-control-plane/docs/configuration.md b/implementations/acr-control-plane/docs/configuration.md index 27c2851..527f076 100644 --- a/implementations/acr-control-plane/docs/configuration.md +++ b/implementations/acr-control-plane/docs/configuration.md @@ -29,6 +29,7 @@ All configuration is via environment variables. | `STRICT_DEPENDENCY_STARTUP` | `false` | If true, fail startup when Redis initialization fails. Recommended in production. | | `WEBHOOK_URL` | `` | HTTP endpoint to notify on new approval requests | | `WEBHOOK_HMAC_SECRET` | `` | HMAC-SHA256 signing key for webhook `X-ACR-Signature` header. Required if `WEBHOOK_URL` is set. | +| `AUDIT_SIGNING_SECRET` | `dev_audit_signing_secret_change_me` | HMAC signing key for evidence bundles and audit-integrity metadata. Must be strong outside development/test. | | `OTEL_EXPORTER_OTLP_ENDPOINT` | `` | OTLP endpoint for traces (e.g. `http://jaeger:4318`) | | `OTEL_SERVICE_NAME` | `acr-control-plane` | OpenTelemetry service name | | `OPERATOR_API_KEYS_JSON` | `` | JSON object mapping operator API keys to `{subject, roles}` identities | @@ -55,6 +56,7 @@ All configuration is via environment variables. | `EXECUTOR_CREDENTIAL_SECRET` | `` | Shared secret used to mint short-lived brokered downstream credentials in `X-ACR-Brokered-Credential` | | `EXECUTOR_CREDENTIAL_TTL_SECONDS` | `300` | Lifetime of the brokered downstream credential in seconds | | `EXECUTOR_TIMEOUT_SECONDS` | `8.0` | Timeout for downstream executor HTTP calls | +| `REQUIRE_BUNDLE_AUTH` | `true` | Require operator auth on bundle/discovery endpoints. Set `false` when OPA pulls bundles directly and network policy is the enforcement boundary. | | `POLICY_BUNDLE_BACKEND` | `local` | Bundle publishing backend. Supported values: `local`, `s3`. | | `POLICY_BUNDLE_LOCAL_DIR` | `./var/policy_bundles` | Filesystem destination for published policy bundles | | `POLICY_BUNDLE_S3_BUCKET` | `` | S3 bucket or S3-compatible object-store bucket used when `POLICY_BUNDLE_BACKEND=s3` | @@ -75,10 +77,12 @@ All configuration is via environment variables. - [ ] Configure OIDC and `OPERATOR_SESSION_SECRET` for production operator login - [ ] Set `SERVICE_OPERATOR_API_KEY` to a key with `killswitch_operator` or `security_admin` - [ ] Configure `WEBHOOK_URL` for approval notifications +- [ ] Set `AUDIT_SIGNING_SECRET` for signed evidence export - [ ] Set `OTEL_EXPORTER_OTLP_ENDPOINT` for distributed tracing - [ ] Set `EXECUTE_ALLOWED_ACTIONS=true` and populate `EXECUTOR_INTEGRATIONS_JSON` or `TOOL_EXECUTOR_MAP_JSON` - [ ] Set a strong `EXECUTOR_HMAC_SECRET` and require downstream executors to verify `X-ACR-Execution-Token` - [ ] Set a strong `EXECUTOR_CREDENTIAL_SECRET` and require downstream executors to verify `X-ACR-Brokered-Credential` +- [ ] Set `REQUIRE_BUNDLE_AUTH=false` if OPA will poll bundles directly without auth headers - [ ] Set `POLICY_BUNDLE_BACKEND` and a durable bundle destination - [ ] Use a managed PostgreSQL instance (PgBouncer for connection pooling) - [ ] Use a managed Redis instance (Redis Sentinel or Cluster for HA) diff --git a/implementations/acr-control-plane/docs/deployment.md b/implementations/acr-control-plane/docs/deployment.md index 242a200..4c97140 100644 --- a/implementations/acr-control-plane/docs/deployment.md +++ b/implementations/acr-control-plane/docs/deployment.md @@ -16,6 +16,9 @@ docker-compose logs -f acr-gateway ## Production Deployment +The supported production path is the Kubernetes overlay at [deploy/k8s/overlays/production/README.md](../deploy/k8s/overlays/production/README.md). +Use `deploy/k8s/base` as the reusable foundation, not as the final production install target. + ### Security hardening 1. **Rotate all secrets** before deploying: @@ -129,8 +132,8 @@ If you are layering ACR under `n8n`, LangGraph, or a custom workflow tool: Reference guide: -- [orchestrator integration guide](/Users/adamdistefano/Desktop/control_plane/docs/orchestrators.md) -- [n8n example](/Users/adamdistefano/Desktop/control_plane/examples/n8n/README.md) +- [orchestrator integration guide](orchestrators.md) +- [n8n example](../examples/n8n/README.md) ### Scaling @@ -155,18 +158,13 @@ Key metrics to alert on: ### Kubernetes manifests -This repo includes a production-oriented Kubernetes base at -[kustomization.yaml](/Users/adamdistefano/Desktop/control_plane/deploy/k8s/base/kustomization.yaml). +This repo includes: -It includes: -- gateway deployment/service -- kill-switch deployment/service -- OPA deployment/service in discovery mode -- config and secret templates -- starter network policy +- a reusable base at [deploy/k8s/base/kustomization.yaml](../deploy/k8s/base/kustomization.yaml) +- the blessed production overlay at [deploy/k8s/overlays/production/kustomization.yaml](../deploy/k8s/overlays/production/kustomization.yaml) -Apply with: +Production apply path: ```bash -kubectl apply -k deploy/k8s/base +kubectl apply -k deploy/k8s/overlays/production ``` diff --git a/implementations/acr-control-plane/docs/enterprise-roadmap.md b/implementations/acr-control-plane/docs/enterprise-roadmap.md index 3fe8a94..9e8122f 100644 --- a/implementations/acr-control-plane/docs/enterprise-roadmap.md +++ b/implementations/acr-control-plane/docs/enterprise-roadmap.md @@ -51,13 +51,25 @@ Remaining follow-up: - tighten IAM and network controls so agents cannot talk to protected systems directly ### 5. Dependency Degradation Semantics -Status: `next` +Status: `in_progress` Scope: - explicit fail-open / fail-closed matrix by subsystem - runbooks for Redis, OPA, Postgres, and kill-switch degradation - load-tested latency and dependency-failure benchmarks +Initial validation completed: +- local failure / load / disaster-recovery validation was run on `2026-04-08` +- report: `docs/failure-load-dr-validation-2026-04-08.md` + +Findings from that run: +- OPA outage failed secure with `503 POLICY_ENGINE_ERROR` +- Redis outage failed secure with `503 KILLSWITCH_ERROR` +- PostgreSQL outage denied safely but surfaced as `500 INTERNAL_ERROR` instead of a dependency-specific contract +- the independent kill-switch service can be unavailable while `/acr/ready` still reports `ready` +- the documented full compose startup path is not yet dependable because the OPA service never becomes healthy in that gated startup flow on the validated machine +- load stayed stable at `1000` requests / `25` concurrency, but p95 / p99 latency is still above an enterprise-grade hot-path target + ### 6. Governed Baseline Lifecycle Status: `done` @@ -84,7 +96,7 @@ What landed: - stronger protected executor packaging and deployment guidance Remaining follow-up: -- provider-specific adapters beyond `n8n` +- provider-specific adapters beyond LangGraph and `n8n` - deeper copy-paste examples for LangGraph and similar stacks Why it matters: @@ -105,12 +117,14 @@ Why it matters: - behavior alone is often too late; intent metadata gives operators earlier context about why the system is attempting an action ### 9. Provenance and Artifact Signing -Status: `planned` +Status: `done` -Scope: -- signed container images -- release provenance / attestation -- verification guidance for deployers +What landed: +- active root release workflow for the control-plane release path +- keyless Cosign signing for published container images +- GitHub build provenance attestations for release images +- signed release-time compliance package tarball +- verification guidance for deployers in `docs/provenance-and-verification.md` ### 10. Kubernetes Policy Validation Status: `planned` @@ -137,12 +151,28 @@ Scope: - cleaner approval and escalation operations for on-call teams ### 13. Reference Production Deployment -Status: `planned` +Status: `done` -Scope: -- one clearly blessed deployment model -- network enforcement story -- identity, secrets, observability, and rollback guidance +What landed: +- one clearly blessed Kubernetes production overlay under `deploy/k8s/overlays/production` +- External Secrets based runtime secret management instead of applying example secrets +- explicit production egress allowlists for managed dependencies and protected executors +- OIDC-first operator auth, object-storage-backed bundle delivery, and rollout guidance in the production install docs + +Remaining follow-up: +- optional admission-policy examples for clusters enforcing extra guardrails +- provider-specific examples for secret stores and workload identity + +### 14. Compliance Package +Status: `done` + +What landed: +- compliance review package under `docs/compliance/` +- threat model, shared-responsibility matrix, control mapping, evidence package, and external assessment scope +- build script for a versioned compliance package release artifact + +Why it matters: +- enterprise review teams need an assessable package, not only source code and marketing claims ## Adoption Test diff --git a/implementations/acr-control-plane/docs/failure-load-dr-validation-2026-04-08.md b/implementations/acr-control-plane/docs/failure-load-dr-validation-2026-04-08.md new file mode 100644 index 0000000..f0e0f37 --- /dev/null +++ b/implementations/acr-control-plane/docs/failure-load-dr-validation-2026-04-08.md @@ -0,0 +1,230 @@ +# Failure / Load / DR Validation — 2026-04-08 + +Version under test: `v1.0.1` + +## Scope + +This validation pass exercised the control plane in `production` mode with: + +- PostgreSQL, Redis, OPA, and the independent kill-switch service running locally +- Alembic migrations applied to an empty database +- the main gateway running locally with `STRICT_DEPENDENCY_STARTUP=true` +- authorization-only execution (`EXECUTE_ALLOWED_ACTIONS=false`) to isolate control-plane behavior + +The validation covered: + +- baseline runtime behavior +- hot-path load on `POST /acr/evaluate` +- dependency failure behavior for OPA, Redis, PostgreSQL, and the kill-switch service +- disaster recovery via `pg_dump` backup, full dependency teardown, restore, and post-restore verification + +## Environment + +- Date: `2026-04-08` +- Gateway mode: `production` +- Dependency topology: local Docker services for PostgreSQL, Redis, OPA, and kill-switch +- Gateway startup path used for runtime validation: + - `docker compose up -d postgres redis opa acr-killswitch` + - `alembic -c alembic.ini upgrade head` + - `python3 -m uvicorn acr.main:app --host 127.0.0.1 --port 8000` + +Note: +The full `docker compose` startup path that includes `acr-gateway` and `acr-migrate` was also attempted, but the compose-gated path did not become runnable because the OPA service never reached a healthy state in that startup flow on this machine. That is tracked as a deployment finding below. + +## Baseline Validation + +- Gateway health: `200 healthy` +- Gateway readiness: `200 ready` with `database=ok`, `redis=ok`, `opa=ok` +- Baseline allow path: + - tool: `query_customer_db` + - result: `200 allow` + - observed server latency: `89ms` +- Baseline escalation path: + - tool: `issue_refund` + - amount: `$250` + - result: `202 escalate` + - approval created successfully in queue `finance-approvals` + - approval request id: `apr-0b7c6d35-c136-40f9-af0d-217e74d3edf9` + +## Load Validation + +Workload: + +- endpoint: `POST /acr/evaluate` +- action: `query_customer_db` +- total requests: `1000` +- concurrency: `25` + +Results: + +- success rate: `1000 / 1000` (`100%`) +- decision mix: `1000 allow` +- throughput: `102.54 requests/sec` + +Client-observed latency: + +- mean: `241.90ms` +- p50: `210.05ms` +- p95: `405.99ms` +- p99: `848.24ms` +- max: `926.63ms` + +Gateway-reported latency: + +- mean: `204.89ms` +- p50: `176.5ms` +- p95: `366.0ms` +- p99: `809.13ms` +- max: `861.0ms` + +Assessment: + +- The gateway stayed stable under sustained concurrent load and returned correct decisions. +- The current hot-path latency is functional but not yet at an enterprise-grade p99 target if the expectation is sub-100ms policy gating under pressure. + +## Failure Validation + +### OPA Outage + +Observed behavior: + +- readiness: `503 not_ready` +- checks: `database=ok`, `redis=ok`, `opa=error` +- evaluate result: `503 deny` +- error code: `POLICY_ENGINE_ERROR` +- reason: `OPA unreachable after 3 attempts` + +Recovery: + +- after OPA restart, readiness returned to `200 ready` +- next evaluation returned `200 allow` + +Assessment: + +- Pass. This is the expected fail-secure behavior. + +### Redis Outage + +Observed behavior: + +- readiness: `503 not_ready` +- checks: `database=ok`, `redis=error`, `opa=ok` +- evaluate result: `503 deny` +- error code: `KILLSWITCH_ERROR` +- reason: `Kill switch state unavailable: Redis read failed` + +Recovery: + +- after Redis restart, readiness returned to `200 ready` +- next evaluation returned `200 allow` + +Assessment: + +- Pass. The hot path failed secure when Redis-backed runtime control state was unavailable. + +### PostgreSQL Outage + +Observed behavior: + +- readiness: `503 not_ready` +- checks: `database=error`, `redis=ok`, `opa=ok` +- evaluate result: `500 deny` +- error code: `INTERNAL_ERROR` + +Recovery: + +- after PostgreSQL restart, readiness returned to `200 ready` +- next evaluation returned `200 allow` + +Assessment: + +- Partial pass. The system denied execution during database loss, which is safe, but it surfaced as a generic `500 INTERNAL_ERROR` instead of an explicit dependency failure code such as `503`. + +### Kill-Switch Service Outage + +Observed behavior: + +- readiness: `200 ready` +- evaluate result: `200 allow` +- containment status call: `503` + +Recovery: + +- after kill-switch service restart, evaluate remained `200 allow` +- containment status returned `200` + +Assessment: + +- Mixed result: + - runtime evaluation stayed available because hot-path kill checks read Redis directly + - operator containment writes/reads through the independent service were unavailable + - readiness did not report this dependency loss + +This is an enterprise gap: the current readiness contract does not reflect independent kill-switch service availability. + +## Disaster Recovery Validation + +Backup artifact: + +- method: `pg_dump --clean --if-exists --no-owner --no-privileges` +- artifact size: `2,351,336 bytes` + +Recovery flow: + +1. created baseline agent, telemetry, and one pending approval +2. took a PostgreSQL backup +3. tore down the dependency stack and removed volumes +4. brought PostgreSQL, Redis, OPA, and kill-switch back up +5. restored the SQL backup into a fresh database +6. verified that the running gateway returned to `ready` +7. verified persisted entities and a fresh evaluation + +Post-restore verification: + +- readiness: `200 ready` +- agent fetch: `200` +- approval fetch: `200` +- telemetry events returned: `50` +- post-restore evaluation: `200 allow` +- gateway readiness after restore completion: `0.10s` + +Assessment: + +- Pass. Data needed for runtime governance survived backup and restore, and the live gateway resumed normal decisioning after dependencies were restored. + +## Deployment Finding + +The full compose startup path is currently not reliable in this environment: + +- `docker compose up -d --build postgres redis opa acr-killswitch acr-migrate acr-gateway` +- observed result: the gateway never became startable because compose blocked on an unhealthy OPA service + +This appears to be a compose/dependency-gating problem rather than a runtime policy-engine problem, because: + +- OPA answered `GET /health` successfully when started on its own +- runtime validation worked once dependencies were started and the gateway was launched outside that compose-gated path + +## Summary + +What passed: + +- baseline allow and escalate paths +- load stability at `1000` requests / `25` concurrency +- fail-secure behavior for OPA outage +- fail-secure behavior for Redis outage +- recovery after OPA, Redis, and PostgreSQL restart +- PostgreSQL backup / restore with persisted governance data + +What still blocks a stronger enterprise-ready claim: + +- PostgreSQL outage returns a generic `500 INTERNAL_ERROR` instead of an explicit dependency failure contract +- readiness does not include kill-switch service availability +- the compose full-stack deployment path is not currently dependable +- load latency, especially p95/p99, is still higher than a hardened control-plane target would ideally be + +## Recommended Next Actions + +1. Add explicit database dependency failure handling so PostgreSQL loss returns a controlled `503` contract instead of `500 INTERNAL_ERROR`. +2. Extend `/acr/ready` to include the independent kill-switch service, not just Redis. +3. Fix the compose OPA health / dependency gating path so `acr-gateway` can be started through the documented full-stack deployment flow. +4. Repeat the load test after deployment-path fixes and capture a second run with higher concurrency and a published latency budget. diff --git a/implementations/acr-control-plane/docs/orchestrators.md b/implementations/acr-control-plane/docs/orchestrators.md index b2fb280..1b03a85 100644 --- a/implementations/acr-control-plane/docs/orchestrators.md +++ b/implementations/acr-control-plane/docs/orchestrators.md @@ -2,6 +2,8 @@ This guide explains how to place the ACR control plane underneath tools like `n8n`, LangGraph, custom agent runtimes, or internal workflow builders. +If you want a typed integration surface instead of direct HTTP calls, start with [sdk.md](sdk.md). + The key design choice is: **ACR should be the mandatory enforcement layer for sensitive actions, not an optional best-practice inside each workflow.** @@ -87,8 +89,8 @@ Recommended node flow: If you want a reference shape, see: -- [n8n README](/Users/adamdistefano/Desktop/control_plane/examples/n8n/README.md) -- [n8n workflow JSON](/Users/adamdistefano/Desktop/control_plane/examples/n8n/acr_sensitive_action_workflow.json) +- [n8n README](../examples/n8n/README.md) +- [n8n workflow JSON](../examples/n8n/acr_sensitive_action_workflow.json) ## Protected Executor Pattern @@ -103,8 +105,8 @@ Instead: Reference implementation: -- [protected executor app](/Users/adamdistefano/Desktop/control_plane/examples/protected_executor/app.py) -- [protected executor guide](/Users/adamdistefano/Desktop/control_plane/examples/protected_executor/README.md) +- [protected executor app](../examples/protected_executor/app.py) +- [protected executor guide](../examples/protected_executor/README.md) ## Adoption Guidance For Non-Engineering Teams @@ -132,6 +134,9 @@ Before claiming orchestrator integration is production-ready, confirm: This repo already provides: - the gateway decision API +- an official Python SDK +- a LangGraph/LangChain-style tool guard +- a TypeScript SDK - approval workflows - evidence export - protected executor verification helpers diff --git a/implementations/acr-control-plane/docs/production_install.md b/implementations/acr-control-plane/docs/production_install.md index 87306ca..2b85db7 100644 --- a/implementations/acr-control-plane/docs/production_install.md +++ b/implementations/acr-control-plane/docs/production_install.md @@ -1,142 +1,171 @@ # ACR Control Plane — Production Install Guide -This is the shortest end-to-end path for taking the repo from GitHub to a real production deployment. +This is the supported end-to-end production path. + +Use the production overlay at [deploy/k8s/overlays/production](../deploy/k8s/overlays/production/README.md), not `deploy/k8s/base`, for real deployments. ## 1. Prerequisites -- Kubernetes cluster with ingress/load balancing -- Managed PostgreSQL -- Managed Redis +- Kubernetes cluster +- `ingress-nginx` +- `cert-manager` +- External Secrets Operator +- managed PostgreSQL +- managed Redis - OIDC identity provider -- Internal business APIs for refunds, outbound email, and ticket creation -- Object storage for policy bundles if using `POLICY_BUNDLE_BACKEND=s3` +- S3 or S3-compatible object storage for policy bundles +- internal executor endpoints for refunds, email, tickets, or other governed actions ## 2. Clone and install ```bash -git clone https://github.com/SynergeiaLabs/acr-framework.git +git clone https://github.com/AdamDiStefanoAI/acr-framework.git cd acr-framework/implementations/acr-control-plane pip install -e ".[dev]" ``` -## 3. Configure secrets +## 3. Start from the blessed overlay + +The production overlay lives at: + +- [deploy/k8s/overlays/production/kustomization.yaml](../deploy/k8s/overlays/production/kustomization.yaml) + +It assumes: + +- OIDC-first operator auth +- `ExternalSecret` as the source of `acr-gateway-secret` +- object-storage-backed policy bundles +- explicit network allowlists for managed services and internal executors -Start with [gateway-secret.example.yaml](/Users/adamdistefano/Desktop/control_plane/deploy/k8s/base/gateway-secret.example.yaml) and replace every placeholder. +## 4. Configure secrets -Generate strong values for: +Edit [external-secret.yaml](../deploy/k8s/overlays/production/external-secret.yaml) to point at your real secret store. + +At minimum, populate: + +- `DATABASE_URL` +- `REDIS_URL` - `JWT_SECRET_KEY` - `KILLSWITCH_SECRET` +- `SERVICE_OPERATOR_API_KEY` +- `OPERATOR_API_KEYS_JSON` - `OPERATOR_SESSION_SECRET` +- `OIDC_CLIENT_SECRET` - `WEBHOOK_HMAC_SECRET` +- `AUDIT_SIGNING_SECRET` - `EXECUTOR_HMAC_SECRET` +- `EXECUTOR_CREDENTIAL_SECRET` -## 4. Configure OIDC operator login +If you use S3 credentials instead of workload identity, also provide: -Set these in [gateway-configmap.yaml](/Users/adamdistefano/Desktop/control_plane/deploy/k8s/base/gateway-configmap.yaml): +- `AWS_ACCESS_KEY_ID` +- `AWS_SECRET_ACCESS_KEY` -- `OIDC_ENABLED=true` -- `OIDC_ISSUER` -- `OIDC_CLIENT_ID` -- `OIDC_CLIENT_SECRET` -- `OIDC_AUTHORIZE_URL` -- `OIDC_TOKEN_URL` -- `OIDC_JWKS_URL` -- `OIDC_REDIRECT_URI` -- `OIDC_ROLE_MAPPING_JSON` - -Recommended role mapping: -- platform admins -> `agent_admin`, `security_admin`, `auditor`, `approver`, `killswitch_operator` -- approvers -> `approver` -- auditors -> `auditor` - -## 5. Configure executor integrations - -Set `EXECUTE_ALLOWED_ACTIONS=true` and define `EXECUTOR_INTEGRATIONS_JSON`. - -Supported providers: -- `refund_api` -- `email_api` -- `ticket_api` -- `http` - -Example: - -```json -{ - "issue_refund": { - "provider": "refund_api", - "url": "https://refunds.internal/api/refunds", - "api_key": "env:FINANCE_EXECUTOR_API_KEY", - "default_currency": "USD" - }, - "send_email": { - "provider": "email_api", - "url": "https://messaging.internal/api/send", - "api_key": "env:EMAIL_EXECUTOR_API_KEY", - "from_address": "ops@example.com" - }, - "create_ticket": { - "provider": "ticket_api", - "url": "https://tickets.internal/api/tickets", - "api_key": "env:TICKET_EXECUTOR_API_KEY", - "default_queue": "operations" - } -} +## 5. Configure runtime settings + +Edit [gateway-configmap-patch.yaml](../deploy/k8s/overlays/production/gateway-configmap-patch.yaml). + +Set: + +- public hostname and redirect URI +- OIDC issuer and endpoint URLs +- object-storage bucket details +- OTLP endpoint if used + +Important production defaults in this path: + +- `SCHEMA_BOOTSTRAP_MODE=validate` +- `STRICT_DEPENDENCY_STARTUP=true` +- `REQUIRE_BUNDLE_AUTH=false` + +That last setting is intentional: OPA polls bundles directly, so network policy becomes the enforcement boundary for bundle delivery. + +## 6. Configure network enforcement + +Edit [networkpolicy-production.yaml](../deploy/k8s/overlays/production/networkpolicy-production.yaml). + +Replace the placeholder CIDRs for: + +- managed PostgreSQL +- managed Redis +- OIDC provider +- object storage +- OTLP collector +- webhook endpoints if used + +If your protected executors run in-cluster, place them in a namespace labeled: + +```yaml +acr.io/network-zone: protected-executors ``` -## 6. Configure policy bundle delivery +If they run outside the cluster, add their CIDRs to the gateway egress allowlist instead. -If using object storage, set: -- `POLICY_BUNDLE_BACKEND=s3` -- `POLICY_BUNDLE_S3_BUCKET` -- `POLICY_BUNDLE_S3_PREFIX` +## 7. Pin release images -## 7. Run migrations +The overlay ships with release tags as defaults. Before production promotion, replace them with digests: ```bash -PYTHONPATH=src alembic upgrade head +kustomize edit set image \ + acr-gateway=ghcr.io/adamdistefanoai/acr-framework/acr-gateway@sha256: \ + acr-killswitch=ghcr.io/adamdistefanoai/acr-framework/acr-killswitch@sha256: ``` -## 8. Deploy Kubernetes base +Then verify those digests using [provenance-and-verification.md](provenance-and-verification.md). + +If your registry packages are private, make sure the cluster has image-pull credentials or mirror the images into your internal registry before deployment. + +## 8. Build and review the rendered manifests ```bash -kubectl apply -k deploy/k8s/base +kubectl kustomize deploy/k8s/overlays/production ``` -## 9. Confirm runtime policy wiring +Review for: + +- no example secrets +- correct hostnames and CIDRs +- expected image references +- expected external secret mappings + +## 9. Apply the overlay + +```bash +kubectl apply -k deploy/k8s/overlays/production +``` + +## 10. Confirm policy delivery + +The production OPA path uses: -The provided OPA deployment uses: - `/acr/policy-bundles/discovery.json` Production policy flow: -1. Create or edit a draft in the GUI -2. Validate it -3. Publish a versioned release -4. Activate the release -5. OPA pulls the active runtime bundle automatically -## 10. First operator login +1. Create or edit a draft in the console. +2. Validate it. +3. Publish a versioned release. +4. Activate the release. +5. OPA pulls the active runtime bundle automatically. + +## 11. First operator login Open: + - `https://your-domain/console` Use OIDC for normal login. Keep API keys for bootstrap and break-glass only. -## 11. First production agent - -From the console: -- register the agent -- issue an agent token -- create or load a policy draft -- publish and activate the policy -- verify the agent can call `/acr/evaluate` - ## 12. Readiness checklist +- `kubectl -n acr-system get externalsecret,secret` +- `kubectl -n acr-system rollout status deploy/acr-gateway` +- `kubectl -n acr-system rollout status deploy/acr-killswitch` +- `kubectl -n acr-system rollout status deploy/acr-opa` - OIDC login works - approval queue works - kill/restore works - OPA discovery endpoint is reachable - active runtime bundle contains the activated release -- refund/email/ticket executor endpoints succeed +- executor endpoints succeed - alerts and dashboards are configured diff --git a/implementations/acr-control-plane/docs/provenance-and-verification.md b/implementations/acr-control-plane/docs/provenance-and-verification.md new file mode 100644 index 0000000..fc21561 --- /dev/null +++ b/implementations/acr-control-plane/docs/provenance-and-verification.md @@ -0,0 +1,98 @@ +# Provenance and Verification + +This implementation now includes a release path designed for enterprise verification, not just distribution. + +## What Gets Produced + +The active release workflow is [`acr-control-plane-release.yml`](../../../.github/workflows/acr-control-plane-release.yml). + +For each tagged release it is intended to produce: + +- signed container images for: + - `ghcr.io///acr-gateway` + - `ghcr.io///acr-killswitch` +- GitHub build provenance attestations for those images +- a versioned compliance package tarball +- a Sigstore bundle for the compliance package tarball +- release assets containing the compliance package, manifest, checksums, and signature bundle + +## Trust Model + +Release verification assumes: + +- images are built from the tagged repository state by GitHub Actions +- GitHub OIDC is used for keyless signing +- Sigstore and GitHub artifact attestations are the source of truth for build identity +- deployers verify by digest, not just by tag + +## Verify a Container Signature + +Use `cosign` against the image digest: + +```bash +cosign verify \ + --certificate-identity-regexp "^https://github.com///.github/workflows/acr-control-plane-release.yml@refs/tags/v.*$" \ + --certificate-oidc-issuer "https://token.actions.githubusercontent.com" \ + ghcr.io///acr-gateway@sha256: +``` + +Repeat for `acr-killswitch` with its own digest. + +Why this matters: + +- the digest prevents tag-rebinding attacks +- the workflow identity ties the signature to the intended release workflow + +## Verify GitHub Build Provenance + +Use the GitHub CLI against the image reference: + +```bash +docker login ghcr.io +gh attestation verify \ + oci://ghcr.io///acr-gateway@sha256: \ + -R / +``` + +Use the same pattern for `acr-killswitch`. + +## Verify the Compliance Package Signature + +The compliance package is signed as a blob and shipped with a Sigstore bundle: + +```bash +cosign verify-blob \ + acr-control-plane-compliance-package-v1.0.1.tar.gz \ + --bundle acr-control-plane-compliance-package-v1.0.1.sigstore.json \ + --certificate-identity-regexp "^https://github.com///.github/workflows/acr-control-plane-release.yml@refs/tags/v.*$" \ + --certificate-oidc-issuer "https://token.actions.githubusercontent.com" +``` + +Then verify the tarball checksum: + +```bash +shasum -a 256 -c acr-control-plane-compliance-package-v1.0.1.sha256 +``` + +## Operational Guidance + +- Pin deployments by digest, not mutable tags. +- Verify signatures and provenance before promotion into staging or production. +- Store verification output as part of the release approval record. +- Treat signature failure, missing attestations, or digest drift as release blockers. + +## Known Scope + +This release path proves: + +- who built the release artifact +- what workflow identity signed it +- what immutable image or blob digest was produced + +It does not by itself prove: + +- that the deployment environment is correctly segmented +- that runtime policy was configured safely after install +- that downstream systems are protected from direct access + +Those remain part of the deployment and compliance package. diff --git a/implementations/acr-control-plane/docs/sdk.md b/implementations/acr-control-plane/docs/sdk.md new file mode 100644 index 0000000..8478237 --- /dev/null +++ b/implementations/acr-control-plane/docs/sdk.md @@ -0,0 +1,113 @@ +# ACR SDKs and Adapters + +This repo now includes official client surfaces for integrating ACR into agent runtimes and workflow tooling. + +## Python SDK + +Import from: + +```python +from acr.sdk import ACRClient, AsyncACRClient +``` + +What it covers: + +- agent registration +- token issuance +- runtime calls to `POST /acr/evaluate` +- bound per-agent sessions +- typed request and decision models + +Core modules: + +- [client.py](../src/acr/sdk/client.py) +- [errors.py](../src/acr/sdk/errors.py) +- [langgraph.py](../src/acr/sdk/langgraph.py) + +Basic example: + +```python +from acr.pillar1_identity.models import AgentRegisterRequest +from acr.sdk import ACRClient + +client = ACRClient( + base_url="https://acr.example.com", + operator_api_key="operator-key", +) + +client.ensure_agent_registered( + AgentRegisterRequest( + agent_id="support-bot", + owner="support@example.com", + purpose="Handle governed support actions", + allowed_tools=["send_email", "create_ticket"], + ) +) + +session = client.issue_agent_session("support-bot") +result = session.evaluate_action( + tool_name="send_email", + parameters={"to": "alice@example.com", "subject": "Update", "body": "Resolved"}, + context={"session_id": "sess-123"}, +) +``` + +## LangGraph Adapter + +The Python SDK also includes a real adapter layer for LangGraph/LangChain-style tools. + +Use: + +- `guard_tool(...)` +- `guard_async_tool(...)` +- `build_langchain_tool(...)` + +Example: + +```python +from acr.sdk import ACRClient, guard_tool + +client = ACRClient(base_url="https://acr.example.com", operator_api_key="operator-key") +session = client.issue_agent_session("refund-agent") + +def issue_refund(customer_id: str, amount: float) -> dict: + return {"status": "queued", "customer_id": customer_id, "amount": amount} + +guarded_refund = guard_tool( + issue_refund, + session=session, + context_builder=lambda params: {"workflow": "refund_graph"}, + intent_builder=lambda params: { + "goal": "Resolve customer issue with a refund", + "justification": f"Refund {params['amount']} to customer {params['customer_id']}", + }, +) +``` + +That wrapper will: + +- call ACR before the tool executes +- raise on `deny` +- raise on `escalate` +- apply modified parameters on `modify` +- return gateway-managed execution output when configured not to execute locally + +## TypeScript SDK + +The TypeScript package lives at: + +- [sdks/typescript/README.md](../sdks/typescript/README.md) + +It mirrors the same concepts: + +- `ACRClient` +- `ACRAgentSession` +- `evaluate(...)` +- `evaluateAction(...)` +- typed decision and error objects + +## Why This Matters + +The goal is not to make ACR optional glue code. + +The goal is to make the control plane easy to place at the action boundary, so teams can integrate it into their existing runtime without re-implementing auth, evaluation payloads, or decision handling every time. diff --git a/implementations/acr-control-plane/examples/n8n/README.md b/implementations/acr-control-plane/examples/n8n/README.md index 7d09fcf..7c0af83 100644 --- a/implementations/acr-control-plane/examples/n8n/README.md +++ b/implementations/acr-control-plane/examples/n8n/README.md @@ -24,8 +24,8 @@ n8n Trigger / Workflow ## Files In This Example -- [workflow JSON](/Users/adamdistefano/Desktop/control_plane/examples/n8n/acr_sensitive_action_workflow.json) -- [protected executor example](/Users/adamdistefano/Desktop/control_plane/examples/protected_executor/README.md) +- [workflow JSON](acr_sensitive_action_workflow.json) +- [protected executor example](../protected_executor/README.md) The workflow JSON is meant as a starting point, not a complete production export. diff --git a/implementations/acr-control-plane/examples/protected_executor/README.md b/implementations/acr-control-plane/examples/protected_executor/README.md index ab673f6..614528a 100644 --- a/implementations/acr-control-plane/examples/protected_executor/README.md +++ b/implementations/acr-control-plane/examples/protected_executor/README.md @@ -68,14 +68,14 @@ That is the recommended production model: - protected executor verifies that ACR explicitly authorized the exact payload - internal system executes only after both checks pass -For a higher-level integration view, see [docs/orchestrators.md](/Users/adamdistefano/Desktop/control_plane/docs/orchestrators.md). +For a higher-level integration view, see [docs/orchestrators.md](../../docs/orchestrators.md). ## Make It More Reusable This example now supports a small amount of packaging and runtime configuration: -- [Dockerfile](/Users/adamdistefano/Desktop/control_plane/examples/protected_executor/Dockerfile) -- [env example](/Users/adamdistefano/Desktop/control_plane/examples/protected_executor/.env.example) +- [Dockerfile](Dockerfile) +- [env example](.env.example) - `PROTECTED_EXECUTOR_ALLOWED_TOOLS` to restrict which demo tools are exposed Example container run: diff --git a/implementations/acr-control-plane/examples/sample_agent/README.md b/implementations/acr-control-plane/examples/sample_agent/README.md index 1c366ce..90aec00 100644 --- a/implementations/acr-control-plane/examples/sample_agent/README.md +++ b/implementations/acr-control-plane/examples/sample_agent/README.md @@ -10,9 +10,9 @@ A simple demonstration agent that exercises all six ACR control pillars. docker-compose up --build ``` -2. Install the HTTP client: +2. Install the control plane package: ```bash - pip install httpx + pip install -e ".[dev]" ``` 3. Export the development operator API key used for agent onboarding: @@ -26,6 +26,12 @@ A simple demonstration agent that exercises all six ACR control pillars. python examples/sample_agent/agent.py ``` +The sample now uses the official Python SDK: + +- `ACRClient.ensure_agent_registered(...)` +- `ACRClient.issue_agent_session(...)` +- `ACRAgentSession.evaluate_action(...)` + ## What it demonstrates | Step | Action | Expected result | diff --git a/implementations/acr-control-plane/examples/sample_agent/agent.py b/implementations/acr-control-plane/examples/sample_agent/agent.py index 9ebd231..7009056 100644 --- a/implementations/acr-control-plane/examples/sample_agent/agent.py +++ b/implementations/acr-control-plane/examples/sample_agent/agent.py @@ -16,28 +16,28 @@ import os import sys -import httpx +from acr.pillar1_identity.models import AgentBoundaries, AgentRegisterRequest +from acr.sdk import ACRClient +from acr.gateway.models import EvaluateResponse ACR_GATEWAY_URL = "http://localhost:8000" AGENT_ID = "customer-support-01" OPERATOR_API_KEY = os.getenv("ACR_OPERATOR_API_KEY", "dev-operator-key") -def print_result(action_name: str, response: httpx.Response) -> None: - status = response.status_code - data = response.json() - decision = data.get("decision", "unknown") - symbol = {"allow": "✓", "deny": "✗", "escalate": "⏳"}.get(decision, "?") - print(f"\n {symbol} [{status}] {action_name}") +def print_result(action_name: str, result: EvaluateResponse) -> None: + decision = result.decision + symbol = {"allow": "✓", "modify": "~", "deny": "✗", "escalate": "⏳"}.get(decision, "?") + print(f"\n {symbol} [{decision.upper()}] {action_name}") print(f" Decision: {decision}") - if data.get("reason"): - print(f" Reason: {data['reason']}") - if data.get("approval_request_id"): - print(f" Approval ID: {data['approval_request_id']}") - if data.get("latency_ms"): - print(f" Latency: {data['latency_ms']}ms") - if data.get("correlation_id"): - print(f" Correlation: {data['correlation_id']}") + if result.reason: + print(f" Reason: {result.reason}") + if result.approval_request_id: + print(f" Approval ID: {result.approval_request_id}") + if result.latency_ms is not None: + print(f" Latency: {result.latency_ms}ms") + if result.correlation_id: + print(f" Correlation: {result.correlation_id}") def main() -> None: @@ -45,134 +45,101 @@ def main() -> None: print(" ACR Sample Agent — Customer Support Bot") print("=" * 60) - client = httpx.Client(base_url=ACR_GATEWAY_URL, timeout=10.0) - operator_headers = {"X-Operator-API-Key": OPERATOR_API_KEY} - - # ── Step 1: Register the agent ──────────────────────────────────────────── - print("\n[1] Registering agent with ACR gateway...") - reg_resp = client.post( - "/acr/agents", - headers=operator_headers, - json={ - "agent_id": AGENT_ID, - "owner": "support-engineering@example.com", - "purpose": "Handle customer support tickets and issue resolutions", - "risk_tier": "medium", - "allowed_tools": ["query_customer_db", "send_email", "create_ticket", "issue_refund"], - "forbidden_tools": ["delete_customer"], - "boundaries": { - "max_actions_per_minute": 30, - "max_cost_per_hour_usd": 5.0, - "credential_rotation_days": 90, + with ACRClient(base_url=ACR_GATEWAY_URL, operator_api_key=OPERATOR_API_KEY) as client: + print("\n[1] Registering agent with ACR gateway...") + manifest = AgentRegisterRequest( + agent_id=AGENT_ID, + owner="support-engineering@example.com", + purpose="Handle customer support tickets and issue resolutions", + risk_tier="medium", + allowed_tools=["query_customer_db", "send_email", "create_ticket", "issue_refund"], + forbidden_tools=["delete_customer"], + boundaries=AgentBoundaries( + max_actions_per_minute=30, + max_cost_per_hour_usd=5.0, + credential_rotation_days=90, + ), + ) + agent = client.ensure_agent_registered(manifest) + print(f" ✓ Agent ready ({agent.agent_id})") + + print("\n[2] Issuing agent JWT through the SDK...") + session = client.issue_agent_session(AGENT_ID) + token = session.access_token + print(f" ✓ Token issued (length={len(token)} chars)") + + print("\n[3] Executing allowed tool calls...") + context = {"session_id": "sess-demo-001", "actions_this_minute": 1, "hourly_spend_usd": 0.10} + + result = session.evaluate_action( + tool_name="query_customer_db", + parameters={"customer_id": "C-12345"}, + description="Look up customer record", + context=context, + ) + print_result("query_customer_db (customer C-12345)", result) + + context["actions_this_minute"] += 1 + result = session.evaluate_action( + tool_name="send_email", + parameters={ + "to": "alice@example.com", + "subject": "Your ticket", + "body": "We have resolved your issue.", }, - }, - ) - if reg_resp.status_code in (201, 409): - print(f" ✓ Agent registered (status={reg_resp.status_code})") - else: - print(f" ✗ Registration failed: {reg_resp.status_code} — {reg_resp.text}") - sys.exit(1) - - # ── Step 2: Issue a JWT and attach it to all evaluate requests ──────────── - print("\n[2] Issuing agent JWT...") - token_resp = client.post(f"/acr/agents/{AGENT_ID}/token", headers=operator_headers) - if token_resp.status_code != 200: - print(f" ✗ Token issuance failed: {token_resp.status_code} — {token_resp.text}") - sys.exit(1) - token_data = token_resp.json() - access_token = token_data["access_token"] - expires_in = token_data["expires_in_seconds"] - print(f" ✓ Token issued (expires in {expires_in}s)") - - auth_headers = {"Authorization": f"Bearer {access_token}"} - - # ── Step 3: Allowed actions ─────────────────────────────────────────────── - print("\n[3] Executing allowed tool calls...") - - context = {"session_id": "sess-demo-001", "actions_this_minute": 1, "hourly_spend_usd": 0.10} - - resp = client.post("/acr/evaluate", headers=auth_headers, json={ - "agent_id": AGENT_ID, - "action": { - "tool_name": "query_customer_db", - "parameters": {"customer_id": "C-12345"}, - "description": "Look up customer record", - }, - "context": context, - }) - print_result("query_customer_db (customer C-12345)", resp) - - context["actions_this_minute"] += 1 - resp = client.post("/acr/evaluate", headers=auth_headers, json={ - "agent_id": AGENT_ID, - "action": { - "tool_name": "send_email", - "parameters": {"to": "alice@example.com", "subject": "Your ticket", "body": "We have resolved your issue."}, - "description": "Send resolution email", - }, - "context": context, - }) - print_result("send_email (resolution notification)", resp) - - context["actions_this_minute"] += 1 - resp = client.post("/acr/evaluate", headers=auth_headers, json={ - "agent_id": AGENT_ID, - "action": { - "tool_name": "create_ticket", - "parameters": {"customer_id": "C-12345", "subject": "Follow-up required"}, - "description": "Create follow-up ticket", - }, - "context": context, - }) - print_result("create_ticket (follow-up)", resp) - - # ── Step 4: Forbidden action ────────────────────────────────────────────── - print("\n[4] Attempting forbidden action (delete_customer)...") - context["actions_this_minute"] += 1 - resp = client.post("/acr/evaluate", headers=auth_headers, json={ - "agent_id": AGENT_ID, - "action": { - "tool_name": "delete_customer", - "parameters": {"customer_id": "C-12345"}, - "description": "Delete customer record", - }, - "context": context, - }) - print_result("delete_customer (should be DENIED)", resp) - assert resp.json().get("decision") == "deny", "Expected policy denial!" - - # ── Step 5: High-value refund → approval escalation ─────────────────────── - print("\n[5] Requesting high-value refund (>$100 → human approval required)...") - context["actions_this_minute"] += 1 - resp = client.post("/acr/evaluate", headers=auth_headers, json={ - "agent_id": AGENT_ID, - "action": { - "tool_name": "issue_refund", - "parameters": {"customer_id": "C-12345", "amount": 250.00, "reason": "Product defect"}, - "description": "Issue $250 refund", - }, - "context": context, - }) - print_result("issue_refund $250 (should ESCALATE)", resp) - assert resp.json().get("decision") == "escalate", "Expected escalation!" - - # ── Step 6: Check health ────────────────────────────────────────────────── - print("\n[6] Checking control plane health...") - health_resp = client.get("/acr/health") - print(f" ✓ Health: {health_resp.json()}") - - print("\n" + "=" * 60) - print(" Sample agent run complete.") - print(" All six ACR pillars exercised:") - print(" ✓ Pillar 1: Identity — agent registered and JWT issued") - print(" ✓ Pillar 2: Policy — tool allowlist + forbidden tool blocked") - print(" ✓ Pillar 3: Drift — metrics recorded (async)") - print(" ✓ Pillar 4: Observability — telemetry events logged") - print(" ✓ Pillar 5: Containment — kill switch checked each request") - print(" ✓ Pillar 6: Authority — refund escalated to approval queue") - print("=" * 60) - - client.close() + description="Send resolution email", + context=context, + ) + print_result("send_email (resolution notification)", result) + + context["actions_this_minute"] += 1 + result = session.evaluate_action( + tool_name="create_ticket", + parameters={"customer_id": "C-12345", "subject": "Follow-up required"}, + description="Create follow-up ticket", + context=context, + ) + print_result("create_ticket (follow-up)", result) + + print("\n[4] Attempting forbidden action (delete_customer)...") + context["actions_this_minute"] += 1 + result = session.evaluate_action( + tool_name="delete_customer", + parameters={"customer_id": "C-12345"}, + description="Delete customer record", + context=context, + ) + print_result("delete_customer (should be DENIED)", result) + assert result.decision == "deny", "Expected policy denial!" + + print("\n[5] Requesting high-value refund (>$100 → human approval required)...") + context["actions_this_minute"] += 1 + result = session.evaluate_action( + tool_name="issue_refund", + parameters={"customer_id": "C-12345", "amount": 250.00, "reason": "Product defect"}, + description="Issue $250 refund", + context=context, + ) + print_result("issue_refund $250 (should ESCALATE)", result) + assert result.decision == "escalate", "Expected escalation!" + + print("\n[6] Checking control plane health...") + try: + health = client.get_health() + except Exception as exc: + print(f" ✗ Health check failed: {exc}") + sys.exit(1) + print(f" ✓ Health: {health}") + print("\n" + "=" * 60) + print(" Sample agent run complete.") + print(" All six ACR pillars exercised:") + print(" ✓ Pillar 1: Identity — agent registered and JWT issued") + print(" ✓ Pillar 2: Policy — tool allowlist + forbidden tool blocked") + print(" ✓ Pillar 3: Drift — metrics recorded (async)") + print(" ✓ Pillar 4: Observability — telemetry events logged") + print(" ✓ Pillar 5: Containment — kill switch checked each request") + print(" ✓ Pillar 6: Authority — refund escalated to approval queue") + print("=" * 60) if __name__ == "__main__": diff --git a/implementations/acr-control-plane/pyproject.toml b/implementations/acr-control-plane/pyproject.toml index b2e3fa3..ffe5ffe 100644 --- a/implementations/acr-control-plane/pyproject.toml +++ b/implementations/acr-control-plane/pyproject.toml @@ -72,6 +72,9 @@ dev = [ # Linting / formatting "ruff>=0.4.0,<1.0.0", ] +integrations = [ + "langchain-core>=0.3.0,<1.0.0", +] [tool.hatch.build.targets.wheel] packages = ["src/acr"] diff --git a/implementations/acr-control-plane/scripts/build_compliance_package.py b/implementations/acr-control-plane/scripts/build_compliance_package.py new file mode 100644 index 0000000..5428913 --- /dev/null +++ b/implementations/acr-control-plane/scripts/build_compliance_package.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +"""Build a versioned compliance package for the ACR control plane release.""" +from __future__ import annotations + +import argparse +import hashlib +import json +import tarfile +from datetime import datetime, timezone +from pathlib import Path + + +INCLUDED_FILES = [ + "README.md", + "docs/provenance-and-verification.md", + "docs/enterprise-roadmap.md", + "docs/failure-load-dr-validation-2026-04-08.md", + "docs/deployment.md", + "docs/production_install.md", + "docs/compliance/README.md", + "docs/compliance/threat-model.md", + "docs/compliance/shared-responsibility-matrix.md", + "docs/compliance/control-mapping.md", + "docs/compliance/evidence-package.md", + "docs/compliance/external-assessment-scope.md", +] + + +def sha256_bytes(data: bytes) -> str: + return hashlib.sha256(data).hexdigest() + + +def sha256_file(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def build_manifest( + *, + implementation_dir: Path, + version: str, + source_ref: str, + package_basename: str, +) -> tuple[dict, bytes]: + files: list[dict[str, object]] = [] + for rel in INCLUDED_FILES: + path = implementation_dir / rel + if not path.exists(): + raise FileNotFoundError(f"Required compliance package file is missing: {path}") + files.append( + { + "path": rel, + "bytes": path.stat().st_size, + "sha256": sha256_file(path), + } + ) + + manifest = { + "package_name": package_basename, + "version": version, + "source_ref": source_ref, + "generated_at": datetime.now(timezone.utc).isoformat(), + "files": files, + } + manifest_bytes = json.dumps(manifest, indent=2, sort_keys=True).encode() + b"\n" + return manifest, manifest_bytes + + +def create_archive( + *, + implementation_dir: Path, + package_basename: str, + archive_path: Path, + manifest_bytes: bytes, +) -> None: + archive_path.parent.mkdir(parents=True, exist_ok=True) + with tarfile.open(archive_path, "w:gz") as archive: + for rel in INCLUDED_FILES: + source = implementation_dir / rel + archive.add(source, arcname=f"{package_basename}/{rel}") + + manifest_info = tarfile.TarInfo(name=f"{package_basename}/package-manifest.json") + manifest_info.size = len(manifest_bytes) + archive.addfile(manifest_info, fileobj=_BytesReader(manifest_bytes)) + + +class _BytesReader: + def __init__(self, payload: bytes) -> None: + self._payload = payload + self._offset = 0 + + def read(self, size: int = -1) -> bytes: + if size < 0: + size = len(self._payload) - self._offset + chunk = self._payload[self._offset : self._offset + size] + self._offset += len(chunk) + return chunk + + +def write_checksums(paths: list[Path], destination: Path) -> None: + lines = [f"{sha256_file(path)} {path.name}" for path in paths] + destination.write_text("\n".join(lines) + "\n") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--implementation-dir", + required=True, + help="Path to implementations/acr-control-plane", + ) + parser.add_argument( + "--version", + required=True, + help="Release version or tag, for example v1.0.1", + ) + parser.add_argument( + "--source-ref", + required=True, + help="Git commit SHA or other immutable source reference", + ) + parser.add_argument( + "--output-dir", + default="dist/compliance", + help="Directory where the package, manifest, and checksums will be written", + ) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + implementation_dir = Path(args.implementation_dir).resolve() + output_dir = Path(args.output_dir).resolve() + package_basename = f"acr-control-plane-compliance-package-{args.version}" + + _, manifest_bytes = build_manifest( + implementation_dir=implementation_dir, + version=args.version, + source_ref=args.source_ref, + package_basename=package_basename, + ) + + output_dir.mkdir(parents=True, exist_ok=True) + manifest_path = output_dir / f"{package_basename}.manifest.json" + manifest_path.write_bytes(manifest_bytes) + + archive_path = output_dir / f"{package_basename}.tar.gz" + create_archive( + implementation_dir=implementation_dir, + package_basename=package_basename, + archive_path=archive_path, + manifest_bytes=manifest_bytes, + ) + + checksum_path = output_dir / f"{package_basename}.sha256" + write_checksums([archive_path, manifest_path], checksum_path) + + summary = { + "package": str(archive_path), + "manifest": str(manifest_path), + "checksums": str(checksum_path), + "package_sha256": sha256_file(archive_path), + } + print(json.dumps(summary, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/implementations/acr-control-plane/sdks/typescript/README.md b/implementations/acr-control-plane/sdks/typescript/README.md new file mode 100644 index 0000000..8e0f376 --- /dev/null +++ b/implementations/acr-control-plane/sdks/typescript/README.md @@ -0,0 +1,69 @@ +# ACR TypeScript SDK + +This package provides the official TypeScript client for the ACR control plane. + +## What It Covers + +- operator-side agent registration +- token issuance +- runtime calls to `POST /acr/evaluate` +- session-bound agent helpers +- decision-aware error helpers + +## Install + +```bash +npm install acr-control-plane-sdk +``` + +## Basic Usage + +```ts +import { ACRClient } from "acr-control-plane-sdk"; + +const client = new ACRClient({ + baseUrl: "https://acr.example.com", + operatorApiKey: process.env.ACR_OPERATOR_API_KEY!, +}); + +await client.ensureAgentRegistered({ + agent_id: "support-bot", + owner: "support@example.com", + purpose: "Handle support actions", + risk_tier: "medium", + allowed_tools: ["query_customer_db", "send_email"], +}); + +const session = await client.issueAgentSession("support-bot"); + +const result = await session.evaluateAction({ + tool_name: "send_email", + parameters: { + to: "alice@example.com", + subject: "Ticket update", + body: "We resolved your issue." + }, + description: "Send support resolution email" +}, { + session_id: "sess-123" +}); + +console.log(result.decision); +``` + +## Decision Handling + +The SDK returns normal decision payloads for: + +- `allow` +- `modify` +- `deny` +- `escalate` + +If you want exception-based control flow, use `assertRunnableDecision(result)`. + +## Package Layout + +- [src/index.ts](src/index.ts): source SDK +- [dist/index.js](dist/index.js): runtime JS entrypoint +- [dist/index.d.ts](dist/index.d.ts): published TypeScript declarations diff --git a/implementations/acr-control-plane/sdks/typescript/dist/index.d.ts b/implementations/acr-control-plane/sdks/typescript/dist/index.d.ts new file mode 100644 index 0000000..46e1750 --- /dev/null +++ b/implementations/acr-control-plane/sdks/typescript/dist/index.d.ts @@ -0,0 +1,129 @@ +export type Decision = "allow" | "deny" | "modify" | "escalate"; +export interface ActionRequest { + tool_name: string; + parameters?: Record; + description?: string; +} +export interface IntentRequest { + goal?: string; + justification?: string; + expected_effects?: string[]; + requested_by_step?: string; + metadata?: Record; +} +export interface EvaluateRequest { + agent_id: string; + action: ActionRequest; + context?: Record; + intent?: IntentRequest; +} +export interface PolicyDecision { + policy_id: string; + decision: Decision; + reason?: string | null; + latency_ms?: number | null; +} +export interface EvaluateResponse { + decision: Decision; + correlation_id?: string | null; + reason?: string | null; + error_code?: string | null; + approval_request_id?: string | null; + approval_queue?: string | null; + sla_minutes?: number | null; + policy_decisions?: PolicyDecision[]; + drift_score?: number | null; + latency_ms?: number | null; + estimated_cost_usd?: number | null; + authoritative_hourly_spend_usd?: number | null; + modified_action?: ActionRequest | null; + execution_result?: Record | null; +} +export interface AgentBoundaries { + max_actions_per_minute?: number; + max_cost_per_hour_usd?: number; + default_action_cost_usd?: number | null; + tool_costs_usd?: Record; + allowed_regions?: string[]; + credential_rotation_days?: number; +} +export interface DataAccessEntry { + resource: string; + permission?: "READ" | "READ_WRITE" | "WRITE" | "NONE"; +} +export interface AgentRegisterRequest { + agent_id: string; + owner: string; + purpose: string; + risk_tier?: "low" | "medium" | "high"; + allowed_tools?: string[]; + forbidden_tools?: string[]; + data_access?: DataAccessEntry[]; + boundaries?: AgentBoundaries; + version?: string; + parent_agent_id?: string | null; + capabilities?: string[]; + lifecycle_state?: "draft" | "active" | "deprecated" | "retired"; +} +export interface AgentResponse extends AgentRegisterRequest { + is_active: boolean; + health_status: "unknown" | "healthy" | "degraded" | "unhealthy"; + last_heartbeat_at?: string | null; + created_at: string; + updated_at: string; +} +export interface TokenResponse { + agent_id: string; + access_token: string; + token_type: string; + expires_in_seconds: number; +} +export interface ACRClientOptions { + baseUrl: string; + operatorApiKey?: string; + fetchImpl?: typeof fetch; +} +export declare class ACRHttpError extends Error { + readonly statusCode: number; + readonly body: unknown; + constructor(statusCode: number, message: string, body?: unknown); +} +export declare class ACRDecisionError extends Error { + readonly response: EvaluateResponse; + constructor(response: EvaluateResponse); +} +export declare class ACRDeniedError extends ACRDecisionError { + constructor(response: EvaluateResponse); +} +export declare class ACREscalatedError extends ACRDecisionError { + constructor(response: EvaluateResponse); +} +export declare function assertRunnableDecision(response: EvaluateResponse): EvaluateResponse; +export declare class ACRClient { + private readonly baseUrl; + private readonly operatorApiKey?; + private readonly fetchImpl; + constructor(options: ACRClientOptions); + private operatorHeaders; + private agentHeaders; + private requestJson; + registerAgent(request: AgentRegisterRequest): Promise; + ensureAgentRegistered(request: AgentRegisterRequest): Promise; + getAgent(agentId: string): Promise; + issueAgentToken(agentId: string): Promise; + createAgentSession(agentId: string, accessToken: string): ACRAgentSession; + issueAgentSession(agentId: string): Promise; + evaluate(request: EvaluateRequest, accessToken: string): Promise; + evaluateAction(agentId: string, accessToken: string, action: ActionRequest, context?: Record, intent?: IntentRequest): Promise; + getHealth(): Promise>; + getReady(): Promise>; +} +export declare class ACRAgentSession { + private readonly client; + readonly agentId: string; + accessToken: string; + constructor(client: ACRClient, agentId: string, accessToken: string); + refreshToken(): Promise; + evaluate(request: EvaluateRequest): Promise; + evaluateAction(action: ActionRequest, context?: Record, intent?: IntentRequest): Promise; +} diff --git a/implementations/acr-control-plane/sdks/typescript/dist/index.js b/implementations/acr-control-plane/sdks/typescript/dist/index.js new file mode 100644 index 0000000..b5b89e8 --- /dev/null +++ b/implementations/acr-control-plane/sdks/typescript/dist/index.js @@ -0,0 +1,197 @@ +export class ACRHttpError extends Error { + constructor(statusCode, message, body) { + super(message); + this.name = "ACRHttpError"; + this.statusCode = statusCode; + this.body = body; + } +} + +export class ACRDecisionError extends Error { + constructor(response) { + super(response.reason ?? `ACR decision '${response.decision}' blocked execution`); + this.name = "ACRDecisionError"; + this.response = response; + } +} + +export class ACRDeniedError extends ACRDecisionError { + constructor(response) { + super(response); + this.name = "ACRDeniedError"; + } +} + +export class ACREscalatedError extends ACRDecisionError { + constructor(response) { + super(response); + this.name = "ACREscalatedError"; + } +} + +function normalizeBaseUrl(baseUrl) { + return baseUrl.replace(/\/+$/, ""); +} + +function ensureOperatorApiKey(apiKey) { + if (!apiKey) { + throw new Error("operatorApiKey is required for operator endpoints"); + } + return apiKey; +} + +async function parseResponseBody(response) { + const contentType = response.headers.get("content-type") ?? ""; + if (contentType.includes("application/json")) { + return response.json(); + } + return response.text(); +} + +export function assertRunnableDecision(response) { + if (response.decision === "deny") { + throw new ACRDeniedError(response); + } + if (response.decision === "escalate") { + throw new ACREscalatedError(response); + } + return response; +} + +export class ACRClient { + constructor(options) { + this.baseUrl = normalizeBaseUrl(options.baseUrl); + this.operatorApiKey = options.operatorApiKey; + this.fetchImpl = options.fetchImpl ?? fetch; + } + + operatorHeaders() { + return { + "Content-Type": "application/json", + "X-Operator-API-Key": ensureOperatorApiKey(this.operatorApiKey) + }; + } + + agentHeaders(accessToken) { + return { + "Content-Type": "application/json", + "Authorization": `Bearer ${accessToken}` + }; + } + + async requestJson(path, init, expectedStatuses = [200, 201]) { + const response = await this.fetchImpl(`${this.baseUrl}${path}`, init); + const body = await parseResponseBody(response); + if (!expectedStatuses.includes(response.status)) { + throw new ACRHttpError(response.status, `ACR API request failed with status ${response.status}`, body); + } + return body; + } + + async registerAgent(request) { + return this.requestJson("/acr/agents", { + method: "POST", + headers: this.operatorHeaders(), + body: JSON.stringify(request) + }); + } + + async ensureAgentRegistered(request) { + const response = await this.fetchImpl(`${this.baseUrl}/acr/agents`, { + method: "POST", + headers: this.operatorHeaders(), + body: JSON.stringify(request) + }); + const body = await parseResponseBody(response); + if (response.status === 409) { + return this.getAgent(request.agent_id); + } + if (response.status !== 201) { + throw new ACRHttpError(response.status, `ACR API request failed with status ${response.status}`, body); + } + return body; + } + + async getAgent(agentId) { + return this.requestJson(`/acr/agents/${agentId}`, { + method: "GET", + headers: this.operatorHeaders() + }); + } + + async issueAgentToken(agentId) { + return this.requestJson(`/acr/agents/${agentId}/token`, { + method: "POST", + headers: this.operatorHeaders() + }); + } + + createAgentSession(agentId, accessToken) { + return new ACRAgentSession(this, agentId, accessToken); + } + + async issueAgentSession(agentId) { + const token = await this.issueAgentToken(agentId); + return this.createAgentSession(token.agent_id, token.access_token); + } + + async evaluate(request, accessToken) { + const response = await this.fetchImpl(`${this.baseUrl}/acr/evaluate`, { + method: "POST", + headers: this.agentHeaders(accessToken), + body: JSON.stringify(request) + }); + const body = await parseResponseBody(response); + const expectedStatuses = new Set([200, 202, 403, 500, 503]); + if (!expectedStatuses.has(response.status)) { + throw new ACRHttpError(response.status, `Unexpected evaluate response status ${response.status}`, body); + } + return body; + } + + async evaluateAction(agentId, accessToken, action, context = {}, intent) { + return this.evaluate({ + agent_id: agentId, + action: { + tool_name: action.tool_name, + parameters: action.parameters ?? {}, + description: action.description + }, + context, + intent + }, accessToken); + } + + async getHealth() { + return this.requestJson("/acr/health", { method: "GET" }, [200]); + } + + async getReady() { + return this.requestJson("/acr/ready", { method: "GET" }, [200, 503]); + } +} + +export class ACRAgentSession { + constructor(client, agentId, accessToken) { + this.client = client; + this.agentId = agentId; + this.accessToken = accessToken; + } + + async refreshToken() { + const token = await this.client.issueAgentToken(this.agentId); + this.accessToken = token.access_token; + return token; + } + + async evaluate(request) { + if (request.agent_id !== this.agentId) { + throw new Error("EvaluateRequest.agent_id does not match the bound session agentId"); + } + return this.client.evaluate(request, this.accessToken); + } + + async evaluateAction(action, context = {}, intent) { + return this.client.evaluateAction(this.agentId, this.accessToken, action, context, intent); + } +} diff --git a/implementations/acr-control-plane/sdks/typescript/package.json b/implementations/acr-control-plane/sdks/typescript/package.json new file mode 100644 index 0000000..933fea6 --- /dev/null +++ b/implementations/acr-control-plane/sdks/typescript/package.json @@ -0,0 +1,26 @@ +{ + "name": "acr-control-plane-sdk", + "version": "1.0.1", + "description": "Official TypeScript SDK for the ACR control plane", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + } + }, + "files": [ + "dist", + "src", + "README.md" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "typecheck": "tsc --noEmit -p tsconfig.json" + }, + "engines": { + "node": ">=18" + } +} diff --git a/implementations/acr-control-plane/sdks/typescript/src/index.ts b/implementations/acr-control-plane/sdks/typescript/src/index.ts new file mode 100644 index 0000000..0023930 --- /dev/null +++ b/implementations/acr-control-plane/sdks/typescript/src/index.ts @@ -0,0 +1,323 @@ +export type Decision = "allow" | "deny" | "modify" | "escalate"; + +export interface ActionRequest { + tool_name: string; + parameters?: Record; + description?: string; +} + +export interface IntentRequest { + goal?: string; + justification?: string; + expected_effects?: string[]; + requested_by_step?: string; + metadata?: Record; +} + +export interface EvaluateRequest { + agent_id: string; + action: ActionRequest; + context?: Record; + intent?: IntentRequest; +} + +export interface PolicyDecision { + policy_id: string; + decision: Decision; + reason?: string | null; + latency_ms?: number | null; +} + +export interface EvaluateResponse { + decision: Decision; + correlation_id?: string | null; + reason?: string | null; + error_code?: string | null; + approval_request_id?: string | null; + approval_queue?: string | null; + sla_minutes?: number | null; + policy_decisions?: PolicyDecision[]; + drift_score?: number | null; + latency_ms?: number | null; + estimated_cost_usd?: number | null; + authoritative_hourly_spend_usd?: number | null; + modified_action?: ActionRequest | null; + execution_result?: Record | null; +} + +export interface AgentBoundaries { + max_actions_per_minute?: number; + max_cost_per_hour_usd?: number; + default_action_cost_usd?: number | null; + tool_costs_usd?: Record; + allowed_regions?: string[]; + credential_rotation_days?: number; +} + +export interface DataAccessEntry { + resource: string; + permission?: "READ" | "READ_WRITE" | "WRITE" | "NONE"; +} + +export interface AgentRegisterRequest { + agent_id: string; + owner: string; + purpose: string; + risk_tier?: "low" | "medium" | "high"; + allowed_tools?: string[]; + forbidden_tools?: string[]; + data_access?: DataAccessEntry[]; + boundaries?: AgentBoundaries; + version?: string; + parent_agent_id?: string | null; + capabilities?: string[]; + lifecycle_state?: "draft" | "active" | "deprecated" | "retired"; +} + +export interface AgentResponse extends AgentRegisterRequest { + is_active: boolean; + health_status: "unknown" | "healthy" | "degraded" | "unhealthy"; + last_heartbeat_at?: string | null; + created_at: string; + updated_at: string; +} + +export interface TokenResponse { + agent_id: string; + access_token: string; + token_type: string; + expires_in_seconds: number; +} + +export interface ACRClientOptions { + baseUrl: string; + operatorApiKey?: string; + fetchImpl?: typeof fetch; +} + +export class ACRHttpError extends Error { + readonly statusCode: number; + readonly body: unknown; + + constructor(statusCode: number, message: string, body?: unknown) { + super(message); + this.name = "ACRHttpError"; + this.statusCode = statusCode; + this.body = body; + } +} + +export class ACRDecisionError extends Error { + readonly response: EvaluateResponse; + + constructor(response: EvaluateResponse) { + super(response.reason ?? `ACR decision '${response.decision}' blocked execution`); + this.name = "ACRDecisionError"; + this.response = response; + } +} + +export class ACRDeniedError extends ACRDecisionError { + constructor(response: EvaluateResponse) { + super(response); + this.name = "ACRDeniedError"; + } +} + +export class ACREscalatedError extends ACRDecisionError { + constructor(response: EvaluateResponse) { + super(response); + this.name = "ACREscalatedError"; + } +} + +function normalizeBaseUrl(baseUrl: string): string { + return baseUrl.replace(/\/+$/, ""); +} + +function ensureOperatorApiKey(apiKey: string | undefined): string { + if (!apiKey) { + throw new Error("operatorApiKey is required for operator endpoints"); + } + return apiKey; +} + +async function parseResponseBody(response: Response): Promise { + const contentType = response.headers.get("content-type") ?? ""; + if (contentType.includes("application/json")) { + return response.json(); + } + return response.text(); +} + +function assertRunnableDecision(response: EvaluateResponse): EvaluateResponse { + if (response.decision === "deny") { + throw new ACRDeniedError(response); + } + if (response.decision === "escalate") { + throw new ACREscalatedError(response); + } + return response; +} + +export { assertRunnableDecision }; + +export class ACRClient { + private readonly baseUrl: string; + private readonly operatorApiKey?: string; + private readonly fetchImpl: typeof fetch; + + constructor(options: ACRClientOptions) { + this.baseUrl = normalizeBaseUrl(options.baseUrl); + this.operatorApiKey = options.operatorApiKey; + this.fetchImpl = options.fetchImpl ?? fetch; + } + + private operatorHeaders(): HeadersInit { + return { + "Content-Type": "application/json", + "X-Operator-API-Key": ensureOperatorApiKey(this.operatorApiKey) + }; + } + + private agentHeaders(accessToken: string): HeadersInit { + return { + "Content-Type": "application/json", + "Authorization": `Bearer ${accessToken}` + }; + } + + private async requestJson(path: string, init: RequestInit, expectedStatuses: number[] = [200, 201]): Promise { + const response = await this.fetchImpl(`${this.baseUrl}${path}`, init); + const body = await parseResponseBody(response); + if (!expectedStatuses.includes(response.status)) { + throw new ACRHttpError(response.status, `ACR API request failed with status ${response.status}`, body); + } + return body as T; + } + + async registerAgent(request: AgentRegisterRequest): Promise { + return this.requestJson("/acr/agents", { + method: "POST", + headers: this.operatorHeaders(), + body: JSON.stringify(request) + }); + } + + async ensureAgentRegistered(request: AgentRegisterRequest): Promise { + const response = await this.fetchImpl(`${this.baseUrl}/acr/agents`, { + method: "POST", + headers: this.operatorHeaders(), + body: JSON.stringify(request) + }); + const body = await parseResponseBody(response); + if (response.status === 409) { + return this.getAgent(request.agent_id); + } + if (response.status !== 201) { + throw new ACRHttpError(response.status, `ACR API request failed with status ${response.status}`, body); + } + return body as AgentResponse; + } + + async getAgent(agentId: string): Promise { + return this.requestJson(`/acr/agents/${agentId}`, { + method: "GET", + headers: this.operatorHeaders() + }); + } + + async issueAgentToken(agentId: string): Promise { + return this.requestJson(`/acr/agents/${agentId}/token`, { + method: "POST", + headers: this.operatorHeaders() + }); + } + + createAgentSession(agentId: string, accessToken: string): ACRAgentSession { + return new ACRAgentSession(this, agentId, accessToken); + } + + async issueAgentSession(agentId: string): Promise { + const token = await this.issueAgentToken(agentId); + return this.createAgentSession(token.agent_id, token.access_token); + } + + async evaluate(request: EvaluateRequest, accessToken: string): Promise { + const response = await this.fetchImpl(`${this.baseUrl}/acr/evaluate`, { + method: "POST", + headers: this.agentHeaders(accessToken), + body: JSON.stringify(request) + }); + const body = await parseResponseBody(response); + const expectedStatuses = new Set([200, 202, 403, 500, 503]); + if (!expectedStatuses.has(response.status)) { + throw new ACRHttpError(response.status, `Unexpected evaluate response status ${response.status}`, body); + } + return body as EvaluateResponse; + } + + async evaluateAction( + agentId: string, + accessToken: string, + action: ActionRequest, + context: Record = {}, + intent?: IntentRequest + ): Promise { + return this.evaluate({ + agent_id: agentId, + action: { + tool_name: action.tool_name, + parameters: action.parameters ?? {}, + description: action.description + }, + context, + intent + }, accessToken); + } + + async getHealth(): Promise> { + return this.requestJson>("/acr/health", { + method: "GET" + }, [200]); + } + + async getReady(): Promise> { + return this.requestJson>("/acr/ready", { + method: "GET" + }, [200, 503]); + } +} + +export class ACRAgentSession { + private readonly client: ACRClient; + readonly agentId: string; + accessToken: string; + + constructor(client: ACRClient, agentId: string, accessToken: string) { + this.client = client; + this.agentId = agentId; + this.accessToken = accessToken; + } + + async refreshToken(): Promise { + const token = await this.client.issueAgentToken(this.agentId); + this.accessToken = token.access_token; + return token; + } + + async evaluate(request: EvaluateRequest): Promise { + if (request.agent_id !== this.agentId) { + throw new Error("EvaluateRequest.agent_id does not match the bound session agentId"); + } + return this.client.evaluate(request, this.accessToken); + } + + async evaluateAction( + action: ActionRequest, + context: Record = {}, + intent?: IntentRequest + ): Promise { + return this.client.evaluateAction(this.agentId, this.accessToken, action, context, intent); + } +} diff --git a/implementations/acr-control-plane/sdks/typescript/tsconfig.json b/implementations/acr-control-plane/sdks/typescript/tsconfig.json new file mode 100644 index 0000000..f10a0ba --- /dev/null +++ b/implementations/acr-control-plane/sdks/typescript/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Node", + "declaration": true, + "strict": true, + "outDir": "dist", + "rootDir": "src", + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src/**/*.ts"] +} diff --git a/implementations/acr-control-plane/src/acr/gateway/models.py b/implementations/acr-control-plane/src/acr/gateway/models.py new file mode 100644 index 0000000..57673e5 --- /dev/null +++ b/implementations/acr-control-plane/src/acr/gateway/models.py @@ -0,0 +1,63 @@ +"""Shared gateway request/response models for server and SDK consumers.""" +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +from acr.pillar2_policy.models import PolicyDecision + + +DecisionLiteral = Literal["allow", "deny", "modify", "escalate"] + + +class ActionRequest(BaseModel): + tool_name: str + parameters: dict[str, Any] = Field(default_factory=dict) + description: str | None = None + + +class IntentRequest(BaseModel): + goal: str | None = None + justification: str | None = None + expected_effects: list[str] = Field(default_factory=list) + requested_by_step: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class EvaluateRequest(BaseModel): + agent_id: str + action: ActionRequest + context: dict[str, Any] = Field(default_factory=dict) + intent: IntentRequest | None = None + + +class EvaluateResponse(BaseModel): + decision: DecisionLiteral + correlation_id: str | None = None + reason: str | None = None + error_code: str | None = None + approval_request_id: str | None = None + approval_queue: str | None = None + sla_minutes: int | None = None + policy_decisions: list[PolicyDecision] = Field(default_factory=list) + drift_score: float | None = None + latency_ms: int | None = None + estimated_cost_usd: float | None = None + authoritative_hourly_spend_usd: float | None = None + modified_action: ActionRequest | None = None + execution_result: dict[str, Any] | None = None + + model_config = {"extra": "allow"} + + @property + def is_allowed(self) -> bool: + return self.decision in {"allow", "modify"} + + @property + def requires_approval(self) -> bool: + return self.decision == "escalate" + + @property + def was_modified(self) -> bool: + return self.decision == "modify" diff --git a/implementations/acr-control-plane/src/acr/gateway/router.py b/implementations/acr-control-plane/src/acr/gateway/router.py index 957e5bd..ad955a7 100644 --- a/implementations/acr-control-plane/src/acr/gateway/router.py +++ b/implementations/acr-control-plane/src/acr/gateway/router.py @@ -21,7 +21,6 @@ from fastapi import APIRouter, BackgroundTasks, Depends from fastapi.responses import JSONResponse -from pydantic import BaseModel, Field from sqlalchemy.ext.asyncio import AsyncSession import structlog @@ -42,6 +41,7 @@ from acr.db.models import PolicyDecisionRecord from acr.gateway.auth import require_agent_token from acr.gateway.executor import execute_action +from acr.gateway.models import EvaluateRequest, EvaluateResponse from acr.gateway.spend_control import ( adjust_authoritative_spend, get_authoritative_projected_spend, @@ -72,29 +72,6 @@ _DRIFT_CHECK_EVERY_N = 10 -# ── Request / Response models ───────────────────────────────────────────────── - -class ActionRequest(BaseModel): - tool_name: str - parameters: dict = Field(default_factory=dict) - description: str | None = None - - -class IntentRequest(BaseModel): - goal: str | None = None - justification: str | None = None - expected_effects: list[str] = Field(default_factory=list) - requested_by_step: str | None = None - metadata: dict = Field(default_factory=dict) - - -class EvaluateRequest(BaseModel): - agent_id: str - action: ActionRequest - context: dict = Field(default_factory=dict) - intent: IntentRequest | None = None - - # ── Background tasks ────────────────────────────────────────────────────────── async def _record_drift_sample( @@ -254,7 +231,7 @@ async def _should_run_drift_check(agent_id: str) -> bool: # ── Main endpoint ───────────────────────────────────────────────────────────── -@router.post("/evaluate") +@router.post("/evaluate", response_model=EvaluateResponse) async def evaluate( req: EvaluateRequest, background_tasks: BackgroundTasks, diff --git a/implementations/acr-control-plane/src/acr/sdk/__init__.py b/implementations/acr-control-plane/src/acr/sdk/__init__.py new file mode 100644 index 0000000..9dde0c9 --- /dev/null +++ b/implementations/acr-control-plane/src/acr/sdk/__init__.py @@ -0,0 +1,35 @@ +"""Public SDK exports for ACR integrations.""" +from acr.gateway.models import ActionRequest, EvaluateRequest, EvaluateResponse, IntentRequest +from acr.sdk.client import ( + ACRAgentSession, + ACRClient, + AsyncACRAgentSession, + AsyncACRClient, +) +from acr.sdk.errors import ( + ACRDecisionError, + ACRDeniedError, + ACRHTTPError, + ACREscalatedError, + ACRSDKError, +) +from acr.sdk.langgraph import build_langchain_tool, guard_async_tool, guard_tool + +__all__ = [ + "ACRAgentSession", + "ACRClient", + "ACRDecisionError", + "ACRDeniedError", + "ACRHTTPError", + "ACRSDKError", + "ACREscalatedError", + "ActionRequest", + "AsyncACRAgentSession", + "AsyncACRClient", + "EvaluateRequest", + "EvaluateResponse", + "IntentRequest", + "build_langchain_tool", + "guard_async_tool", + "guard_tool", +] diff --git a/implementations/acr-control-plane/src/acr/sdk/client.py b/implementations/acr-control-plane/src/acr/sdk/client.py new file mode 100644 index 0000000..6de7768 --- /dev/null +++ b/implementations/acr-control-plane/src/acr/sdk/client.py @@ -0,0 +1,405 @@ +"""Official Python SDK for the ACR control plane.""" +from __future__ import annotations + +from typing import Any, Mapping + +import httpx +from pydantic import BaseModel + +from acr.gateway.models import ActionRequest, EvaluateRequest, EvaluateResponse, IntentRequest +from acr.pillar1_identity.models import AgentRegisterRequest, AgentResponse, TokenResponse +from acr.sdk.errors import ACRHTTPError + +_EVALUATE_STATUS_CODES = {200, 202, 403, 500, 503} + + +def _jsonable(payload: BaseModel | Mapping[str, Any] | None) -> dict[str, Any]: + if payload is None: + return {} + if isinstance(payload, BaseModel): + return payload.model_dump(mode="json", exclude_none=True) + return dict(payload) + + +def _parse_json_response(response: httpx.Response) -> Any: + try: + return response.json() + except ValueError: + return response.text + + +def _raise_http_error(response: httpx.Response) -> None: + body = _parse_json_response(response) + message = f"ACR API request failed with status {response.status_code}" + if isinstance(body, dict): + detail = body.get("detail") or body.get("reason") or body.get("message") + if detail: + message = f"{message}: {detail}" + raise ACRHTTPError(status_code=response.status_code, message=message, body=body) + + +def _parse_model(response: httpx.Response, model_type): + if response.is_error: + _raise_http_error(response) + return model_type.model_validate(response.json()) + + +def _parse_evaluate_response(response: httpx.Response) -> EvaluateResponse: + payload = _parse_json_response(response) + if response.status_code not in _EVALUATE_STATUS_CODES: + raise ACRHTTPError( + status_code=response.status_code, + message=f"Unexpected evaluate response status {response.status_code}", + body=payload, + ) + if not isinstance(payload, dict): + raise ACRHTTPError( + status_code=response.status_code, + message="Evaluate response body was not JSON", + body=payload, + ) + return EvaluateResponse.model_validate(payload) + + +class ACRClient: + """Synchronous ACR client for agent onboarding and runtime decisions.""" + + def __init__( + self, + *, + base_url: str, + operator_api_key: str | None = None, + timeout: float = 10.0, + client: httpx.Client | None = None, + transport: httpx.BaseTransport | None = None, + ) -> None: + self.base_url = base_url.rstrip("/") + self.operator_api_key = operator_api_key + self._owns_client = client is None + self._client = client or httpx.Client( + base_url=self.base_url, + timeout=timeout, + transport=transport, + ) + + def close(self) -> None: + if self._owns_client: + self._client.close() + + def __enter__(self) -> ACRClient: + return self + + def __exit__(self, exc_type, exc, tb) -> None: + self.close() + + def _operator_headers(self) -> dict[str, str]: + if not self.operator_api_key: + raise ValueError("operator_api_key is required for operator endpoints") + return {"X-Operator-API-Key": self.operator_api_key} + + @staticmethod + def _agent_headers(access_token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {access_token}"} + + def register_agent(self, request: AgentRegisterRequest) -> AgentResponse: + response = self._client.post( + "/acr/agents", + headers=self._operator_headers(), + json=_jsonable(request), + ) + return _parse_model(response, AgentResponse) + + def ensure_agent_registered(self, request: AgentRegisterRequest) -> AgentResponse: + response = self._client.post( + "/acr/agents", + headers=self._operator_headers(), + json=_jsonable(request), + ) + if response.status_code == 409: + return self.get_agent(request.agent_id) + return _parse_model(response, AgentResponse) + + def get_agent(self, agent_id: str) -> AgentResponse: + response = self._client.get(f"/acr/agents/{agent_id}", headers=self._operator_headers()) + return _parse_model(response, AgentResponse) + + def issue_agent_token(self, agent_id: str) -> TokenResponse: + response = self._client.post( + f"/acr/agents/{agent_id}/token", + headers=self._operator_headers(), + ) + return _parse_model(response, TokenResponse) + + def get_health(self) -> dict[str, Any]: + response = self._client.get("/acr/health") + if response.is_error: + _raise_http_error(response) + payload = _parse_json_response(response) + if not isinstance(payload, dict): + raise ACRHTTPError( + status_code=response.status_code, + message="Health response body was not JSON", + body=payload, + ) + return payload + + def get_ready(self) -> dict[str, Any]: + response = self._client.get("/acr/ready") + if response.status_code not in {200, 503}: + _raise_http_error(response) + payload = _parse_json_response(response) + if not isinstance(payload, dict): + raise ACRHTTPError( + status_code=response.status_code, + message="Ready response body was not JSON", + body=payload, + ) + return payload + + def create_agent_session(self, agent_id: str, access_token: str) -> ACRAgentSession: + return ACRAgentSession(client=self, agent_id=agent_id, access_token=access_token) + + def issue_agent_session(self, agent_id: str) -> ACRAgentSession: + token = self.issue_agent_token(agent_id) + return self.create_agent_session(token.agent_id, token.access_token) + + def evaluate(self, request: EvaluateRequest, *, access_token: str) -> EvaluateResponse: + response = self._client.post( + "/acr/evaluate", + headers=self._agent_headers(access_token), + json=_jsonable(request), + ) + return _parse_evaluate_response(response) + + def evaluate_action( + self, + *, + agent_id: str, + access_token: str, + tool_name: str, + parameters: Mapping[str, Any] | None = None, + description: str | None = None, + context: Mapping[str, Any] | None = None, + intent: IntentRequest | Mapping[str, Any] | None = None, + ) -> EvaluateResponse: + request = EvaluateRequest( + agent_id=agent_id, + action=ActionRequest( + tool_name=tool_name, + parameters=dict(parameters or {}), + description=description, + ), + context=dict(context or {}), + intent=IntentRequest.model_validate(intent) if intent is not None else None, + ) + return self.evaluate(request, access_token=access_token) + + +class AsyncACRClient: + """Async ACR client for agent onboarding and runtime decisions.""" + + def __init__( + self, + *, + base_url: str, + operator_api_key: str | None = None, + timeout: float = 10.0, + client: httpx.AsyncClient | None = None, + transport: httpx.AsyncBaseTransport | None = None, + ) -> None: + self.base_url = base_url.rstrip("/") + self.operator_api_key = operator_api_key + self._owns_client = client is None + self._client = client or httpx.AsyncClient( + base_url=self.base_url, + timeout=timeout, + transport=transport, + ) + + async def aclose(self) -> None: + if self._owns_client: + await self._client.aclose() + + async def __aenter__(self) -> AsyncACRClient: + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: + await self.aclose() + + def _operator_headers(self) -> dict[str, str]: + if not self.operator_api_key: + raise ValueError("operator_api_key is required for operator endpoints") + return {"X-Operator-API-Key": self.operator_api_key} + + @staticmethod + def _agent_headers(access_token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {access_token}"} + + async def register_agent(self, request: AgentRegisterRequest) -> AgentResponse: + response = await self._client.post( + "/acr/agents", + headers=self._operator_headers(), + json=_jsonable(request), + ) + return _parse_model(response, AgentResponse) + + async def ensure_agent_registered(self, request: AgentRegisterRequest) -> AgentResponse: + response = await self._client.post( + "/acr/agents", + headers=self._operator_headers(), + json=_jsonable(request), + ) + if response.status_code == 409: + return await self.get_agent(request.agent_id) + return _parse_model(response, AgentResponse) + + async def get_agent(self, agent_id: str) -> AgentResponse: + response = await self._client.get(f"/acr/agents/{agent_id}", headers=self._operator_headers()) + return _parse_model(response, AgentResponse) + + async def issue_agent_token(self, agent_id: str) -> TokenResponse: + response = await self._client.post( + f"/acr/agents/{agent_id}/token", + headers=self._operator_headers(), + ) + return _parse_model(response, TokenResponse) + + async def get_health(self) -> dict[str, Any]: + response = await self._client.get("/acr/health") + if response.is_error: + _raise_http_error(response) + payload = _parse_json_response(response) + if not isinstance(payload, dict): + raise ACRHTTPError( + status_code=response.status_code, + message="Health response body was not JSON", + body=payload, + ) + return payload + + async def get_ready(self) -> dict[str, Any]: + response = await self._client.get("/acr/ready") + if response.status_code not in {200, 503}: + _raise_http_error(response) + payload = _parse_json_response(response) + if not isinstance(payload, dict): + raise ACRHTTPError( + status_code=response.status_code, + message="Ready response body was not JSON", + body=payload, + ) + return payload + + def create_agent_session(self, agent_id: str, access_token: str) -> AsyncACRAgentSession: + return AsyncACRAgentSession(client=self, agent_id=agent_id, access_token=access_token) + + async def issue_agent_session(self, agent_id: str) -> AsyncACRAgentSession: + token = await self.issue_agent_token(agent_id) + return self.create_agent_session(token.agent_id, token.access_token) + + async def evaluate(self, request: EvaluateRequest, *, access_token: str) -> EvaluateResponse: + response = await self._client.post( + "/acr/evaluate", + headers=self._agent_headers(access_token), + json=_jsonable(request), + ) + return _parse_evaluate_response(response) + + async def evaluate_action( + self, + *, + agent_id: str, + access_token: str, + tool_name: str, + parameters: Mapping[str, Any] | None = None, + description: str | None = None, + context: Mapping[str, Any] | None = None, + intent: IntentRequest | Mapping[str, Any] | None = None, + ) -> EvaluateResponse: + request = EvaluateRequest( + agent_id=agent_id, + action=ActionRequest( + tool_name=tool_name, + parameters=dict(parameters or {}), + description=description, + ), + context=dict(context or {}), + intent=IntentRequest.model_validate(intent) if intent is not None else None, + ) + return await self.evaluate(request, access_token=access_token) + + +class ACRAgentSession: + """Bound sync session for a specific agent and token.""" + + def __init__(self, *, client: ACRClient, agent_id: str, access_token: str) -> None: + self.client = client + self.agent_id = agent_id + self.access_token = access_token + + def refresh_token(self) -> TokenResponse: + token = self.client.issue_agent_token(self.agent_id) + self.access_token = token.access_token + return token + + def evaluate(self, request: EvaluateRequest) -> EvaluateResponse: + if request.agent_id != self.agent_id: + raise ValueError("EvaluateRequest.agent_id does not match the bound session agent_id") + return self.client.evaluate(request, access_token=self.access_token) + + def evaluate_action( + self, + *, + tool_name: str, + parameters: Mapping[str, Any] | None = None, + description: str | None = None, + context: Mapping[str, Any] | None = None, + intent: IntentRequest | Mapping[str, Any] | None = None, + ) -> EvaluateResponse: + return self.client.evaluate_action( + agent_id=self.agent_id, + access_token=self.access_token, + tool_name=tool_name, + parameters=parameters, + description=description, + context=context, + intent=intent, + ) + + +class AsyncACRAgentSession: + """Bound async session for a specific agent and token.""" + + def __init__(self, *, client: AsyncACRClient, agent_id: str, access_token: str) -> None: + self.client = client + self.agent_id = agent_id + self.access_token = access_token + + async def refresh_token(self) -> TokenResponse: + token = await self.client.issue_agent_token(self.agent_id) + self.access_token = token.access_token + return token + + async def evaluate(self, request: EvaluateRequest) -> EvaluateResponse: + if request.agent_id != self.agent_id: + raise ValueError("EvaluateRequest.agent_id does not match the bound session agent_id") + return await self.client.evaluate(request, access_token=self.access_token) + + async def evaluate_action( + self, + *, + tool_name: str, + parameters: Mapping[str, Any] | None = None, + description: str | None = None, + context: Mapping[str, Any] | None = None, + intent: IntentRequest | Mapping[str, Any] | None = None, + ) -> EvaluateResponse: + return await self.client.evaluate_action( + agent_id=self.agent_id, + access_token=self.access_token, + tool_name=tool_name, + parameters=parameters, + description=description, + context=context, + intent=intent, + ) diff --git a/implementations/acr-control-plane/src/acr/sdk/errors.py b/implementations/acr-control-plane/src/acr/sdk/errors.py new file mode 100644 index 0000000..c92f1f4 --- /dev/null +++ b/implementations/acr-control-plane/src/acr/sdk/errors.py @@ -0,0 +1,34 @@ +"""SDK-specific errors for client and adapter consumers.""" +from __future__ import annotations + +from acr.gateway.models import EvaluateResponse + + +class ACRSDKError(RuntimeError): + """Base exception for SDK consumers.""" + + +class ACRHTTPError(ACRSDKError): + """Raised when an HTTP call fails before a normal gateway decision is returned.""" + + def __init__(self, *, status_code: int, message: str, body: object | None = None) -> None: + super().__init__(message) + self.status_code = status_code + self.body = body + + +class ACRDecisionError(ACRSDKError): + """Base exception for non-runnable control-plane decisions.""" + + def __init__(self, response: EvaluateResponse) -> None: + message = response.reason or f"ACR decision '{response.decision}' blocked execution" + super().__init__(message) + self.response = response + + +class ACRDeniedError(ACRDecisionError): + """Raised when ACR returns a deny decision.""" + + +class ACREscalatedError(ACRDecisionError): + """Raised when ACR requires human approval before execution can continue.""" diff --git a/implementations/acr-control-plane/src/acr/sdk/langgraph.py b/implementations/acr-control-plane/src/acr/sdk/langgraph.py new file mode 100644 index 0000000..c3ff5d8 --- /dev/null +++ b/implementations/acr-control-plane/src/acr/sdk/langgraph.py @@ -0,0 +1,173 @@ +"""LangGraph/LangChain-style tool guards built on top of the ACR SDK.""" +from __future__ import annotations + +import inspect +from functools import wraps +from typing import Any, Callable + +from acr.gateway.models import EvaluateResponse, IntentRequest +from acr.sdk.client import ACRAgentSession, AsyncACRAgentSession +from acr.sdk.errors import ACRDeniedError, ACREscalatedError + + +def _bind_arguments(func: Callable[..., Any], *args: Any, **kwargs: Any) -> dict[str, Any]: + bound = inspect.signature(func).bind(*args, **kwargs) + bound.apply_defaults() + return dict(bound.arguments) + + +def _coerce_intent(intent: IntentRequest | dict[str, Any] | None) -> IntentRequest | None: + if intent is None: + return None + return IntentRequest.model_validate(intent) + + +def _resolve_effective_parameters( + original_parameters: dict[str, Any], + response: EvaluateResponse, +) -> dict[str, Any]: + if response.modified_action is None: + return original_parameters + return dict(response.modified_action.parameters) + + +def _handle_blocking_decision(response: EvaluateResponse) -> None: + if response.decision == "deny": + raise ACRDeniedError(response) + if response.decision == "escalate": + raise ACREscalatedError(response) + + +def guard_tool( + func: Callable[..., Any], + *, + session: ACRAgentSession, + tool_name: str | None = None, + description: str | None = None, + context_builder: Callable[[dict[str, Any]], dict[str, Any]] | None = None, + intent_builder: Callable[[dict[str, Any]], IntentRequest | dict[str, Any] | None] | None = None, + execute_locally_on_allow: bool = True, +) -> Callable[..., Any]: + """ + Wrap a local tool so ACR authorizes it before execution. + + This works well for LangGraph/LangChain tool functions even when the graph + itself is not tightly coupled to ACR-specific logic. + """ + resolved_tool_name = tool_name or func.__name__ + resolved_description = description or inspect.getdoc(func) + + @wraps(func) + def wrapped(*args: Any, **kwargs: Any) -> Any: + parameters = _bind_arguments(func, *args, **kwargs) + context = context_builder(parameters) if context_builder else {} + intent = _coerce_intent(intent_builder(parameters)) if intent_builder else None + response = session.evaluate_action( + tool_name=resolved_tool_name, + parameters=parameters, + description=resolved_description, + context=context, + intent=intent, + ) + _handle_blocking_decision(response) + if response.execution_result is not None and not execute_locally_on_allow: + return response.execution_result + effective_parameters = _resolve_effective_parameters(parameters, response) + return func(**effective_parameters) + + return wrapped + + +def guard_async_tool( + func: Callable[..., Any], + *, + session: AsyncACRAgentSession, + tool_name: str | None = None, + description: str | None = None, + context_builder: Callable[[dict[str, Any]], dict[str, Any]] | None = None, + intent_builder: Callable[[dict[str, Any]], IntentRequest | dict[str, Any] | None] | None = None, + execute_locally_on_allow: bool = True, +) -> Callable[..., Any]: + """Async version of ``guard_tool`` for coroutine-based LangGraph tools.""" + resolved_tool_name = tool_name or func.__name__ + resolved_description = description or inspect.getdoc(func) + + @wraps(func) + async def wrapped(*args: Any, **kwargs: Any) -> Any: + parameters = _bind_arguments(func, *args, **kwargs) + context = context_builder(parameters) if context_builder else {} + intent = _coerce_intent(intent_builder(parameters)) if intent_builder else None + response = await session.evaluate_action( + tool_name=resolved_tool_name, + parameters=parameters, + description=resolved_description, + context=context, + intent=intent, + ) + _handle_blocking_decision(response) + if response.execution_result is not None and not execute_locally_on_allow: + return response.execution_result + effective_parameters = _resolve_effective_parameters(parameters, response) + result = func(**effective_parameters) + if inspect.isawaitable(result): + return await result + return result + + return wrapped + + +def build_langchain_tool( + func: Callable[..., Any], + *, + session: ACRAgentSession | AsyncACRAgentSession, + name: str | None = None, + description: str | None = None, + context_builder: Callable[[dict[str, Any]], dict[str, Any]] | None = None, + intent_builder: Callable[[dict[str, Any]], IntentRequest | dict[str, Any] | None] | None = None, + execute_locally_on_allow: bool = True, +): + """ + Optional helper that returns a LangChain StructuredTool when + ``langchain-core`` is installed. + """ + try: + from langchain_core.tools import StructuredTool + except ImportError as exc: # pragma: no cover - exercised by users, not CI + raise RuntimeError( + "langchain-core is not installed. Install the integration extra or " + "add langchain-core to your environment." + ) from exc + + resolved_name = name or func.__name__ + resolved_description = description or inspect.getdoc(func) or f"Guarded ACR tool: {resolved_name}" + + if isinstance(session, AsyncACRAgentSession): + coroutine = guard_async_tool( + func, + session=session, + tool_name=resolved_name, + description=resolved_description, + context_builder=context_builder, + intent_builder=intent_builder, + execute_locally_on_allow=execute_locally_on_allow, + ) + return StructuredTool.from_function( + coroutine=coroutine, + name=resolved_name, + description=resolved_description, + ) + + wrapped = guard_tool( + func, + session=session, + tool_name=resolved_name, + description=resolved_description, + context_builder=context_builder, + intent_builder=intent_builder, + execute_locally_on_allow=execute_locally_on_allow, + ) + return StructuredTool.from_function( + func=wrapped, + name=resolved_name, + description=resolved_description, + ) diff --git a/implementations/acr-control-plane/tests/test_sdk.py b/implementations/acr-control-plane/tests/test_sdk.py new file mode 100644 index 0000000..06c609b --- /dev/null +++ b/implementations/acr-control-plane/tests/test_sdk.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import cast + +import httpx +import pytest + +from acr.gateway.models import ActionRequest, EvaluateResponse +from acr.pillar1_identity.models import AgentRegisterRequest +from acr.sdk import ( + ACRAgentSession, + ACRClient, + ACRDeniedError, + ACREscalatedError, + AsyncACRAgentSession, + AsyncACRClient, + guard_async_tool, + guard_tool, +) + + +def _agent_response_body(agent_id: str) -> dict: + now = datetime.now(timezone.utc).isoformat() + return { + "agent_id": agent_id, + "owner": "support@example.com", + "purpose": "Handle governed support actions", + "risk_tier": "medium", + "allowed_tools": ["query_customer_db", "send_email"], + "forbidden_tools": ["delete_customer"], + "data_access": [], + "boundaries": { + "max_actions_per_minute": 30, + "max_cost_per_hour_usd": 5.0, + "default_action_cost_usd": None, + "tool_costs_usd": {}, + "allowed_regions": [], + "credential_rotation_days": 90, + }, + "is_active": True, + "version": "1.0.0", + "parent_agent_id": None, + "capabilities": [], + "lifecycle_state": "active", + "health_status": "healthy", + "last_heartbeat_at": None, + "created_at": now, + "updated_at": now, + } + + +def test_sync_client_ensure_agent_registered_fetches_existing_agent() -> None: + calls: list[tuple[str, str]] = [] + + def handler(request: httpx.Request) -> httpx.Response: + calls.append((request.method, request.url.path)) + if request.method == "POST" and request.url.path == "/acr/agents": + return httpx.Response(409, json={"detail": "already exists"}) + if request.method == "GET" and request.url.path == "/acr/agents/support-bot": + return httpx.Response(200, json=_agent_response_body("support-bot")) + raise AssertionError(f"Unexpected request: {request.method} {request.url.path}") + + client = ACRClient( + base_url="http://test", + operator_api_key="operator-key", + transport=httpx.MockTransport(handler), + ) + request = AgentRegisterRequest( + agent_id="support-bot", + owner="support@example.com", + purpose="Handle governed support actions", + allowed_tools=["query_customer_db", "send_email"], + ) + + agent = client.ensure_agent_registered(request) + + assert agent.agent_id == "support-bot" + assert calls == [("POST", "/acr/agents"), ("GET", "/acr/agents/support-bot")] + + +@pytest.mark.asyncio +async def test_async_client_can_issue_session_and_evaluate(async_client, sample_agent) -> None: + sdk = AsyncACRClient( + base_url="http://test", + operator_api_key="operator-key", + client=async_client, + ) + + session = await sdk.issue_agent_session(sample_agent.agent_id) + result = await session.evaluate_action( + tool_name="query_customer_db", + parameters={"customer_id": "C-001"}, + context={"session_id": "sess-sdk-001"}, + ) + + assert result.decision == "allow" + assert result.correlation_id + + +def test_guard_tool_uses_modified_parameters() -> None: + class _FakeSession: + def evaluate_action(self, **kwargs): + return EvaluateResponse( + decision="modify", + modified_action=ActionRequest( + tool_name="issue_refund", + parameters={"customer_id": kwargs["parameters"]["customer_id"], "amount": 25.0}, + description=kwargs["description"], + ), + ) + + def issue_refund(customer_id: str, amount: float) -> dict: + return {"customer_id": customer_id, "amount": amount} + + guarded = guard_tool( + issue_refund, + session=cast(ACRAgentSession, _FakeSession()), + ) + + result = guarded(customer_id="C-100", amount=250.0) + + assert result == {"customer_id": "C-100", "amount": 25.0} + + +def test_guard_tool_raises_on_deny() -> None: + class _FakeSession: + def evaluate_action(self, **kwargs): + return EvaluateResponse(decision="deny", reason="policy blocked") + + def create_ticket(subject: str) -> dict: + return {"subject": subject} + + guarded = guard_tool( + create_ticket, + session=cast(ACRAgentSession, _FakeSession()), + ) + + with pytest.raises(ACRDeniedError, match="policy blocked"): + guarded(subject="Escalate to finance") + + +@pytest.mark.asyncio +async def test_guard_async_tool_returns_execution_result_without_local_call() -> None: + class _FakeAsyncSession: + async def evaluate_action(self, **kwargs): + return EvaluateResponse( + decision="allow", + execution_result={"status": "executed-by-gateway", "tool": kwargs["tool_name"]}, + ) + + async def send_email(to: str, subject: str) -> dict: + raise AssertionError("local tool should not execute when execution_result is returned") + + guarded = guard_async_tool( + send_email, + session=cast(AsyncACRAgentSession, _FakeAsyncSession()), + execute_locally_on_allow=False, + ) + + result = await guarded(to="alice@example.com", subject="Status update") + + assert result == {"status": "executed-by-gateway", "tool": "send_email"} + + +@pytest.mark.asyncio +async def test_guard_async_tool_raises_on_escalate() -> None: + class _FakeAsyncSession: + async def evaluate_action(self, **kwargs): + return EvaluateResponse( + decision="escalate", + reason="manager approval required", + approval_request_id="apr_123", + ) + + async def send_email(to: str, subject: str) -> dict: + return {"to": to, "subject": subject} + + guarded = guard_async_tool( + send_email, + session=cast(AsyncACRAgentSession, _FakeAsyncSession()), + ) + + with pytest.raises(ACREscalatedError, match="manager approval required"): + await guarded(to="alice@example.com", subject="Status update")