diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..d22ad88 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,34 @@ +name: tests + +on: + pull_request: + push: + branches: ["main", "hack_demo"] + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + node-version: ["20", "22"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + cache: "npm" + + - name: Install dependencies + run: npm ci + + - name: Run type check + run: npm run typecheck + + - name: Run tests + run: npm test diff --git a/src/index.ts b/src/index.ts index eb979be..4934646 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,7 @@ export * from "./config.js"; export * from "./control-plane-sync.js"; export * from "./errors.js"; export * from "./non-web-evidence.js"; +export * from "./web-evidence.js"; export * from "./openclaw-hooks.js"; export * from "./openclaw-plugin-api.js"; export * from "./provider.js"; diff --git a/src/web-evidence.ts b/src/web-evidence.ts new file mode 100644 index 0000000..b7c7a3e --- /dev/null +++ b/src/web-evidence.ts @@ -0,0 +1,95 @@ +import crypto from "node:crypto"; +import { + buildWebStateEvidence, + buildWebStateEvidenceFromRuntimeSnapshot, + type RuntimeSnapshotLike, + type StateEvidence, + type WebStateSnapshot, +} from "@predicatesystems/authority"; + +/** + * Runtime context captured from OpenClaw web agent execution environment. + * Maps to ts-predicate-authority WebStateSnapshot contract. + */ +export interface WebRuntimeContext { + url?: string; + title?: string; + domHtml?: string; + domHash?: string; + visibleText?: string; + visibleTextHash?: string; + eventId?: string; + observedAt?: string; + dominantGroupKey?: string; + snapshotTimestamp?: string; + confidence?: number; + confidenceReasons?: string[]; +} + +/** + * Provider interface for web state evidence capture. + * Implementations should capture browser/DOM state from the agent runtime. + */ +export interface WebEvidenceProvider { + captureWebSnapshot(): Promise; +} + +/** + * OpenClaw-specific web evidence provider. + * Captures web state from the agent runtime and maps to TS SDK contract. + */ +export class OpenClawWebEvidenceProvider implements WebEvidenceProvider { + constructor( + private readonly capture: () => + | Promise + | WebRuntimeContext, + ) {} + + async captureWebSnapshot(): Promise { + const runtime = await this.capture(); + return { + url: runtime.url, + title: runtime.title, + dom_hash: runtime.domHash ?? sha256(runtime.domHtml ?? ""), + visible_text_hash: + runtime.visibleTextHash ?? sha256(runtime.visibleText ?? ""), + event_id: runtime.eventId, + observed_at: runtime.observedAt ?? new Date().toISOString(), + dominant_group_key: runtime.dominantGroupKey, + snapshot_timestamp: runtime.snapshotTimestamp, + confidence: runtime.confidence, + confidence_reasons: runtime.confidenceReasons, + }; + } +} + +/** + * Build StateEvidence from an OpenClaw web evidence provider. + */ +export async function buildWebEvidenceFromProvider( + provider: WebEvidenceProvider, + options?: { schemaVersion?: string }, +): Promise { + const snapshot = await provider.captureWebSnapshot(); + return buildWebStateEvidence({ + snapshot, + schemaVersion: options?.schemaVersion ?? "v1", + }); +} + +/** + * Convenience adapter for predicate-runtime snapshot output. + * Maps RuntimeSnapshotLike to StateEvidence directly. + */ +export function buildWebEvidenceFromRuntimeSnapshot( + snapshot: RuntimeSnapshotLike, + options?: { schemaVersion?: string }, +): StateEvidence { + return buildWebStateEvidenceFromRuntimeSnapshot(snapshot, { + schemaVersion: options?.schemaVersion ?? "v1", + }); +} + +function sha256(input: string): string { + return crypto.createHash("sha256").update(input).digest("hex"); +} diff --git a/tests/audit-event-e2e.test.ts b/tests/audit-event-e2e.test.ts new file mode 100644 index 0000000..8c630df --- /dev/null +++ b/tests/audit-event-e2e.test.ts @@ -0,0 +1,258 @@ +import { describe, expect, it, vi } from "vitest"; +import { + type DecisionAuditExporter, + type DecisionTelemetryEvent, + GuardedProvider, + ActionDeniedError, +} from "../src/provider.js"; + +/** + * End-to-end audit event visibility tests. + * + * These tests verify that decision events flow correctly from the provider + * through to audit exporters in a format compatible with control-plane + * audit pipelines. + */ +describe("audit event visibility (e2e)", () => { + it("exports allow decisions with full context to audit sink", async () => { + const exportedEvents: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ + allow: true, + reason: "policy_matched", + mandateId: "mandate-12345", + }), + }; + + const auditExporter: DecisionAuditExporter = { + exportDecision: async (event) => { + exportedEvents.push(event); + }, + }; + + const provider = new GuardedProvider({ + principal: "agent:e2e-test-agent", + authorityClient: mockClient, + auditExporter, + }); + + await provider.authorize({ + action: "shell.execute", + resource: "npm install lodash", + args: { cmd: "npm install lodash" }, + context: { + tenant_id: "tenant-prod", + session_id: "sess-e2e-001", + user_id: "user-developer", + trace_id: "trace-e2e-xyz", + source: "trusted_ui", + }, + }); + + // Verify event was exported + expect(exportedEvents).toHaveLength(1); + + const event = exportedEvents[0]; + + // Verify control-plane compatible fields + expect(event.principal).toBe("agent:e2e-test-agent"); + expect(event.action).toBe("shell.execute"); + expect(event.resource).toBe("npm install lodash"); + expect(event.outcome).toBe("allow"); + expect(event.reason).toBe("policy_matched"); + expect(event.mandateId).toBe("mandate-12345"); + + // Verify tenant/session context + expect(event.tenantId).toBe("tenant-prod"); + expect(event.sessionId).toBe("sess-e2e-001"); + expect(event.userId).toBe("user-developer"); + expect(event.traceId).toBe("trace-e2e-xyz"); + expect(event.source).toBe("trusted_ui"); + + // Verify timestamp is ISO format + expect(event.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/); + }); + + it("exports deny decisions with redacted sensitive resources", async () => { + const exportedEvents: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ + allow: false, + reason: "sensitive_path_blocked", + }), + }; + + const auditExporter: DecisionAuditExporter = { + exportDecision: async (event) => { + exportedEvents.push(event); + }, + }; + + const provider = new GuardedProvider({ + principal: "agent:sensitive-test", + authorityClient: mockClient, + auditExporter, + }); + + await expect( + provider.authorize({ + action: "fs.read", + resource: "/home/user/.ssh/id_rsa", + args: { path: "/home/user/.ssh/id_rsa" }, + context: { tenant_id: "tenant-sec" }, + }), + ).rejects.toThrow(ActionDeniedError); + + expect(exportedEvents).toHaveLength(1); + + const event = exportedEvents[0]; + expect(event.outcome).toBe("deny"); + expect(event.reason).toBe("sensitive_path_blocked"); + // Resource should be redacted for sensitive paths + expect(event.resource).toBe("[REDACTED]"); + }); + + it("exports error events when sidecar is unavailable", async () => { + const exportedEvents: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi.fn().mockRejectedValue(new Error("Connection refused")), + }; + + const auditExporter: DecisionAuditExporter = { + exportDecision: async (event) => { + exportedEvents.push(event); + }, + }; + + const provider = new GuardedProvider({ + principal: "agent:error-test", + authorityClient: mockClient, + auditExporter, + }); + + await expect( + provider.authorize({ + action: "net.http", + resource: "https://api.example.com", + args: { url: "https://api.example.com" }, + context: { tenant_id: "tenant-error" }, + }), + ).rejects.toThrow(); + + expect(exportedEvents).toHaveLength(1); + + const event = exportedEvents[0]; + expect(event.outcome).toBe("error"); + expect(event.tenantId).toBe("tenant-error"); + }); + + it("handles audit exporter failures gracefully (best-effort)", async () => { + let callCount = 0; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ allow: true }), + }; + + const failingExporter: DecisionAuditExporter = { + exportDecision: async () => { + callCount++; + throw new Error("Audit sink unavailable"); + }, + }; + + const provider = new GuardedProvider({ + principal: "agent:failing-audit", + authorityClient: mockClient, + auditExporter: failingExporter, + }); + + // Should not throw even though exporter fails + const result = await provider.authorize({ + action: "fs.read", + resource: "/workspace/safe-file.txt", + args: { path: "/workspace/safe-file.txt" }, + }); + + // Authorization should succeed despite audit failure + expect(result).toBeNull(); // No mandate ID returned in this case + expect(callCount).toBe(1); // Exporter was called + }); + + it("chains multiple decision events with consistent trace context", async () => { + const exportedEvents: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi + .fn() + .mockResolvedValueOnce({ allow: true, mandateId: "m1" }) + .mockResolvedValueOnce({ allow: true, mandateId: "m2" }) + .mockResolvedValueOnce({ allow: false, reason: "rate_limited" }), + }; + + const auditExporter: DecisionAuditExporter = { + exportDecision: async (event) => { + exportedEvents.push(event); + }, + }; + + const provider = new GuardedProvider({ + principal: "agent:chained-ops", + authorityClient: mockClient, + auditExporter, + }); + + const sharedContext = { + tenant_id: "tenant-chain", + session_id: "sess-chain", + trace_id: "trace-chain-root", + }; + + // First operation - allowed + await provider.authorize({ + action: "fs.read", + resource: "/workspace/config.json", + args: { path: "/workspace/config.json" }, + context: sharedContext, + }); + + // Second operation - allowed + await provider.authorize({ + action: "net.http", + resource: "https://api.internal/fetch", + args: { url: "https://api.internal/fetch" }, + context: sharedContext, + }); + + // Third operation - denied + try { + await provider.authorize({ + action: "shell.execute", + resource: "curl external.com", + args: { cmd: "curl external.com" }, + context: sharedContext, + }); + } catch { + // Expected denial + } + + expect(exportedEvents).toHaveLength(3); + + // All events should share the same trace context + for (const event of exportedEvents) { + expect(event.tenantId).toBe("tenant-chain"); + expect(event.sessionId).toBe("sess-chain"); + expect(event.traceId).toBe("trace-chain-root"); + } + + // Verify outcomes + expect(exportedEvents[0].outcome).toBe("allow"); + expect(exportedEvents[0].mandateId).toBe("m1"); + expect(exportedEvents[1].outcome).toBe("allow"); + expect(exportedEvents[1].mandateId).toBe("m2"); + expect(exportedEvents[2].outcome).toBe("deny"); + expect(exportedEvents[2].reason).toBe("rate_limited"); + }); +}); diff --git a/tests/multi-tenant-isolation.test.ts b/tests/multi-tenant-isolation.test.ts new file mode 100644 index 0000000..c516967 --- /dev/null +++ b/tests/multi-tenant-isolation.test.ts @@ -0,0 +1,183 @@ +import { describe, expect, it, vi } from "vitest"; +import type { AuthorizationRequest } from "@predicatesystems/authority"; +import { + type DecisionAuditExporter, + type DecisionTelemetryEvent, + GuardedProvider, +} from "../src/provider.js"; + +describe("multi-tenant isolation", () => { + it("propagates tenant_id through authorization request", async () => { + const capturedRequests: AuthorizationRequest[] = []; + + const mockClient = { + authorize: vi.fn().mockImplementation((req: AuthorizationRequest) => { + capturedRequests.push(req); + return Promise.resolve({ allow: true, reason: "policy_pass" }); + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:test-agent", + authorityClient: mockClient, + }); + + await provider.authorize({ + action: "fs.read", + resource: "/workspace/file.txt", + args: { path: "/workspace/file.txt" }, + context: { + tenant_id: "tenant-alpha", + session_id: "session-123", + source: "trusted_ui", + }, + }); + + expect(capturedRequests).toHaveLength(1); + expect(capturedRequests[0].labels).toContain("source:trusted_ui"); + }); + + it("isolates decisions by tenant in telemetry events", async () => { + const events: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi + .fn() + .mockResolvedValue({ allow: true, reason: "tenant_policy" }), + }; + + const telemetry = { + onDecision: (event: DecisionTelemetryEvent) => events.push(event), + }; + + const provider = new GuardedProvider({ + principal: "agent:multi-tenant-agent", + authorityClient: mockClient, + telemetry, + }); + + // Authorize as tenant A + await provider.authorize({ + action: "shell.execute", + resource: "echo hello", + args: { cmd: "echo hello" }, + context: { tenant_id: "tenant-a", user_id: "user-a1" }, + }); + + // Authorize as tenant B + await provider.authorize({ + action: "shell.execute", + resource: "echo world", + args: { cmd: "echo world" }, + context: { tenant_id: "tenant-b", user_id: "user-b1" }, + }); + + expect(events).toHaveLength(2); + expect(events[0].tenantId).toBe("tenant-a"); + expect(events[0].userId).toBe("user-a1"); + expect(events[1].tenantId).toBe("tenant-b"); + expect(events[1].userId).toBe("user-b1"); + }); + + it("audit exports include tenant isolation context", async () => { + const exportedEvents: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ allow: true }), + }; + + const auditExporter: DecisionAuditExporter = { + exportDecision: async (event) => { + exportedEvents.push(event); + }, + }; + + const provider = new GuardedProvider({ + principal: "agent:audited-agent", + authorityClient: mockClient, + auditExporter, + }); + + await provider.authorize({ + action: "net.http", + resource: "https://api.example.com/data", + args: { method: "GET", url: "https://api.example.com/data" }, + context: { + tenant_id: "tenant-enterprise", + session_id: "sess-456", + trace_id: "trace-abc", + source: "trusted_ui", + }, + }); + + expect(exportedEvents).toHaveLength(1); + expect(exportedEvents[0].tenantId).toBe("tenant-enterprise"); + expect(exportedEvents[0].sessionId).toBe("sess-456"); + expect(exportedEvents[0].traceId).toBe("trace-abc"); + expect(exportedEvents[0].source).toBe("trusted_ui"); + }); + + it("denials preserve tenant context in error events", async () => { + const events: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ + allow: false, + reason: "tenant_quota_exceeded", + }), + }; + + const telemetry = { + onDecision: (event: DecisionTelemetryEvent) => events.push(event), + }; + + const provider = new GuardedProvider({ + principal: "agent:quota-agent", + authorityClient: mockClient, + telemetry, + }); + + await expect( + provider.authorize({ + action: "shell.execute", + resource: "rm -rf /", + args: { cmd: "rm -rf /" }, + context: { tenant_id: "tenant-restricted" }, + }), + ).rejects.toThrow(); + + expect(events).toHaveLength(1); + expect(events[0].outcome).toBe("deny"); + expect(events[0].tenantId).toBe("tenant-restricted"); + expect(events[0].reason).toBe("tenant_quota_exceeded"); + }); + + it("handles missing tenant context gracefully", async () => { + const events: DecisionTelemetryEvent[] = []; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ allow: true }), + }; + + const telemetry = { + onDecision: (event: DecisionTelemetryEvent) => events.push(event), + }; + + const provider = new GuardedProvider({ + principal: "agent:no-tenant", + authorityClient: mockClient, + telemetry, + }); + + await provider.authorize({ + action: "fs.read", + resource: "/tmp/file.txt", + args: { path: "/tmp/file.txt" }, + // No context provided + }); + + expect(events).toHaveLength(1); + expect(events[0].tenantId).toBeUndefined(); + expect(events[0].sessionId).toBeUndefined(); + }); +}); diff --git a/tests/web-evidence.test.ts b/tests/web-evidence.test.ts new file mode 100644 index 0000000..45784a4 --- /dev/null +++ b/tests/web-evidence.test.ts @@ -0,0 +1,113 @@ +import crypto from "node:crypto"; +import { describe, expect, it } from "vitest"; +import { + buildWebEvidenceFromProvider, + buildWebEvidenceFromRuntimeSnapshot, + OpenClawWebEvidenceProvider, + type WebRuntimeContext, +} from "../src/web-evidence.js"; + +function sha256(input: string): string { + return crypto.createHash("sha256").update(input).digest("hex"); +} + +describe("web evidence providers", () => { + it("builds web state evidence from OpenClaw runtime context", async () => { + const mockContext: WebRuntimeContext = { + url: "https://example.com/dashboard", + title: "Dashboard - Example App", + domHtml: "

Dashboard

", + visibleText: "Dashboard", + eventId: "evt-123", + observedAt: "2026-02-20T12:00:00Z", + dominantGroupKey: "main-content", + confidence: 0.95, + confidenceReasons: ["stable_dom", "no_pending_requests"], + }; + + const provider = new OpenClawWebEvidenceProvider(() => mockContext); + const evidence = await buildWebEvidenceFromProvider(provider); + + expect(evidence.source).toBe("browser"); + expect(evidence.schema_version).toBe("v1"); + expect(evidence.state_hash).toBeDefined(); + expect(typeof evidence.state_hash).toBe("string"); + expect(evidence.confidence).toBe(0.95); + }); + + it("computes dom_hash when domHtml provided without domHash", async () => { + const domHtml = "Test"; + const expectedHash = sha256(domHtml); + + const provider = new OpenClawWebEvidenceProvider(() => ({ + url: "https://example.com", + domHtml, + })); + + const snapshot = await provider.captureWebSnapshot(); + expect(snapshot.dom_hash).toBe(expectedHash); + }); + + it("computes visible_text_hash when visibleText provided without hash", async () => { + const visibleText = "Hello World"; + const expectedHash = sha256(visibleText); + + const provider = new OpenClawWebEvidenceProvider(() => ({ + url: "https://example.com", + visibleText, + })); + + const snapshot = await provider.captureWebSnapshot(); + expect(snapshot.visible_text_hash).toBe(expectedHash); + }); + + it("uses provided hashes when available", async () => { + const precomputedDomHash = "abc123"; + const precomputedTextHash = "def456"; + + const provider = new OpenClawWebEvidenceProvider(() => ({ + url: "https://example.com", + domHash: precomputedDomHash, + visibleTextHash: precomputedTextHash, + })); + + const snapshot = await provider.captureWebSnapshot(); + expect(snapshot.dom_hash).toBe(precomputedDomHash); + expect(snapshot.visible_text_hash).toBe(precomputedTextHash); + }); + + it("builds evidence from predicate-runtime snapshot format", () => { + const runtimeSnapshot = { + url: "https://example.com/page", + timestamp: "2026-02-20T12:00:00Z", + dominant_group_key: "content-area", + diagnostics: { + confidence: 0.88, + reasons: ["dom_stable"], + }, + }; + + const evidence = buildWebEvidenceFromRuntimeSnapshot(runtimeSnapshot); + + expect(evidence.source).toBe("browser"); + expect(evidence.schema_version).toBe("v1"); + expect(evidence.confidence).toBe(0.88); + }); + + it("handles async capture functions", async () => { + const asyncCapture = async (): Promise => { + await new Promise((resolve) => setTimeout(resolve, 1)); + return { + url: "https://async.example.com", + title: "Async Page", + }; + }; + + const provider = new OpenClawWebEvidenceProvider(asyncCapture); + const snapshot = await provider.captureWebSnapshot(); + + expect(snapshot.url).toBe("https://async.example.com"); + expect(snapshot.title).toBe("Async Page"); + expect(snapshot.observed_at).toBeDefined(); + }); +});