From 654154c769e3f06d4c174d4bc6e536b33e058976 Mon Sep 17 00:00:00 2001 From: Kacy Fortner Date: Sat, 11 Apr 2026 01:01:46 +0000 Subject: [PATCH] Refine app operator UX --- README.md | 14 +- docs/cluster-guide.md | 5 +- docs/users-guide.md | 15 +- src/api/routes/cluster_agents/app_routes.zig | 218 +++++++++++- src/lib/command_registry.zig | 4 +- src/lib/completion.zig | 4 +- src/lib/json_helpers.zig | 14 + src/manifest/cli/ops.zig | 304 ++++++++++++++--- src/manifest/release_history.zig | 11 +- src/manifest/release_plan.zig | 28 +- src/manifest/rollback_snapshot.zig | 337 +++++++++++++++++++ src/runtime/cli/status_command.zig | 217 ++++++++++-- src/state/store.zig | 2 + src/state/store/deployments.zig | 98 ++++++ src/test_root.zig | 1 + 15 files changed, 1175 insertions(+), 97 deletions(-) create mode 100644 src/manifest/rollback_snapshot.zig diff --git a/README.md b/README.md index 66c63f6..53846f8 100644 --- a/README.md +++ b/README.md @@ -195,9 +195,9 @@ yoq validate [-f manifest.toml] [-q] validate a manifest ```text yoq rollback roll back a service deployment -yoq rollback --app [name] print the last app release snapshot -yoq rollback --app [name] --server host:port --release - re-apply a prior app release remotely +yoq rollback --app [name] re-apply the previous successful app release +yoq rollback --app [name] [--release ] [--print] +yoq rollback --app [name] --server host:port [--release ] [--print] yoq history show service deployment history yoq history --app [name] show local app release history yoq history --app [name] --server host:port [--json] @@ -206,8 +206,10 @@ yoq status [--verbose] show service status and resources yoq status --app [name] show local app release status yoq status --app [name] --server host:port show remote app release status -yoq apps [--json] list local app release summaries -yoq apps --server host:port [--json] list remote app release summaries +yoq apps [--json] [--status s|--failed|--in-progress] + list local app release summaries +yoq apps --server host:port [--json] [--status s|--failed|--in-progress] + list remote app release summaries yoq metrics [service] show service metrics yoq metrics --pairs show service-to-service metrics yoq policy deny block traffic between services @@ -235,6 +237,8 @@ yoq cert rm remove a certificate If `--email` is omitted for the standalone ACME flow, yoq uses `YOQ_ACME_EMAIL` when set and otherwise falls back to `admin@`. +For app rollbacks, omitting `--release` picks the previous successful release before the current one. Use `--print` to inspect the selected stored app snapshot without applying it. + ### server and cluster ```text diff --git a/docs/cluster-guide.md b/docs/cluster-guide.md index cf16595..73d7c2d 100644 --- a/docs/cluster-guide.md +++ b/docs/cluster-guide.md @@ -144,10 +144,10 @@ after deploy, use the app-first day-2 commands: yoq apps --server 10.0.0.1:7700 yoq status --app [name] --server 10.0.0.1:7700 yoq history --app [name] --server 10.0.0.1:7700 -yoq rollback --app [name] --server 10.0.0.1:7700 --release +yoq rollback --app [name] --server 10.0.0.1:7700 [--release ] [--print] ``` -`yoq apps` shows the latest release summary for every app, `status --app` shows the latest release metadata for one app, `history --app` lists prior releases, and remote `rollback --app ... --release` re-applies a stored app snapshot. `yoq run-worker --server ...` and `yoq train ... --server ...` now resolve workers and training jobs from the current app release on the server. Clustered app applies also register cron schedules from the current app snapshot, and the app summary/status views include live training runtime counts for the app. +`yoq apps` shows the latest release summary for every app, `status --app` shows the latest release metadata for one app, `history --app` lists prior releases, and remote `rollback --app` re-applies the previous successful app release by default. Add `--release` to target a specific stored release or `--print` to inspect the selected snapshot without applying it. `yoq run-worker --server ...` and `yoq train ... --server ...` resolve workers and training jobs from the current app release on the server. Clustered app applies also register cron schedules from the current app snapshot, and the app summary/status views include live training runtime counts plus previous-successful release context for the app. --- @@ -354,6 +354,7 @@ the important read paths are: - `GET /apps` - `GET /apps//status` - `GET /apps//history` +- `POST /apps//rollback` - `GET /apps//training//status` - `GET /apps//training//logs` diff --git a/docs/users-guide.md b/docs/users-guide.md index eee2b6a..febf57d 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -167,13 +167,16 @@ this gives the operator one app-first day-2 model: - `yoq status --app [name]` — current app release status - `yoq history --app [name]` — app release history -- `yoq rollback --app [name]` — print the last successful local app snapshot -- `yoq rollback --app [name] --server host:port --release ` — re-apply a prior remote app release -- `yoq apps` — list app release summaries across all known apps +- `yoq rollback --app [name]` — re-apply the previous successful app release +- `yoq rollback --app [name] [--release ] [--print]` — target a specific stored release or print the selected app snapshot without applying it +- `yoq rollback --app [name] --server host:port [--release ] [--print]` — do the same against a cluster +- `yoq apps [--status | --failed | --in-progress]` — list app release summaries across all known apps with optional rollout filtering - `yoq run-worker [--server host:port] ` — run a worker from the current app release - `yoq train start|status|stop|pause|resume|scale|logs [--server host:port] ` — manage training jobs from the current app release -`yoq apps` and `yoq status --app` now show both the desired workload mix from the latest app release and the current training runtime summary for that app. On clustered applies, cron definitions from the app snapshot are also registered in cluster state, so rollback restores the active cron schedule set along with the rest of the app snapshot. +`yoq apps` and `yoq status --app` now show the current release, previous successful release, desired workload mix, rollout target counts, and the current training runtime summary for each app. On clustered applies, cron definitions from the app snapshot are also registered in cluster state, so rollback restores the active cron schedule set along with the rest of the app snapshot. + +When `--app` is present and you omit the app name, yoq defaults to the current working directory name for `status --app`, `history --app`, and `rollback --app`. When `rollback --app` omits `--release`, yoq selects the previous successful release before the current one. Remote `yoq train logs --server ...` now proxies the request to the agent that hosts the selected rank. If that agent is unreachable or does not expose the log endpoint, the API returns an explicit hosting-agent error instead of a misleading empty or missing result. @@ -230,12 +233,12 @@ the cluster API also exposes app-scoped day-2 reads and rollback: - `GET /apps` — latest release summary per app - `GET /apps//status` — latest app release metadata - `GET /apps//history` — app release history -- `POST /apps//rollback` with `{"release_id":"..."}` — re-apply a stored app release snapshot +- `POST /apps//rollback` with optional `{"release_id":"...","print":true|false}` — re-apply or print a stored app release snapshot - `POST /apps//workers//run` — run a worker from the current app release - `POST /apps//training//start|stop|pause|resume|scale` — manage training jobs for the current app release - `GET /apps//training//status|logs` — inspect training jobs for the current app release -The app status surfaces (`GET /apps`, `GET /apps//status`, `yoq apps`, and `yoq status --app`) also report live training runtime counts for the app: active, paused, and failed jobs. +The app status surfaces (`GET /apps`, `GET /apps//status`, `yoq apps`, and `yoq status --app`) also report live training runtime counts for the app: active, paused, and failed jobs. Their JSON output now includes nested `current_release`, `previous_successful_release`, `workloads`, and `training_runtime` sections while keeping the older top-level fields for compatibility. For `GET /apps//training//logs`, the control plane now proxies the request to the hosting agent for the selected rank. If that agent is unreachable or does not expose the log endpoint, the route returns an explicit hosting-agent error. ### rolling upgrades diff --git a/src/api/routes/cluster_agents/app_routes.zig b/src/api/routes/cluster_agents/app_routes.zig index 9bcac14..0e7075d 100644 --- a/src/api/routes/cluster_agents/app_routes.zig +++ b/src/api/routes/cluster_agents/app_routes.zig @@ -95,18 +95,24 @@ pub fn handleAppRollback( ctx: RouteContext, ) Response { const node = ctx.cluster orelse return common.badRequest("not running in cluster mode"); - const release_id = json_helpers.extractJsonString(request.body, "release_id") orelse - return common.badRequest("missing release_id"); - if (!common.validateContainerId(release_id)) return common.badRequest("invalid release_id"); + const release_id = json_helpers.extractJsonString(request.body, "release_id"); + const print_only = json_helpers.extractJsonBool(request.body, "print") orelse false; + if (release_id) |id| { + if (!common.validateContainerId(id)) return common.badRequest("invalid release_id"); + } - const release = store.getDeploymentInDb(node.stateMachineDb(), alloc, release_id) catch |err| return switch (err) { + const release = store.getRollbackTargetDeploymentByAppInDb(node.stateMachineDb(), alloc, app_name, release_id) catch |err| return switch (err) { error.NotFound => common.notFound(), else => common.internalError(), }; defer release.deinit(alloc); - if (release.app_name == null or !std.mem.eql(u8, release.app_name.?, app_name)) { - return common.notFound(); + if (print_only) { + return .{ + .status = .ok, + .body = alloc.dupe(u8, release.config_snapshot) catch return common.internalError(), + .allocated = true, + }; } const apply_request = http.Request{ @@ -118,7 +124,7 @@ pub fn handleAppRollback( .body = release.config_snapshot, .content_length = release.config_snapshot.len, }; - return deploy_routes.handleAppRollbackApply(alloc, apply_request, ctx, release_id); + return deploy_routes.handleAppRollbackApply(alloc, apply_request, ctx, release.id); } fn formatAppsResponse( @@ -172,6 +178,9 @@ fn formatAppStatusResponseFromDeployments( } fn formatAppHistoryResponse(alloc: std.mem.Allocator, deployments: []const store.DeploymentRecord) ![]u8 { + const current_release_id = if (deployments.len > 0) deployments[0].id else null; + const previous_successful_release_id = findPreviousSuccessfulReleaseId(deployments); + var json_buf: std.ArrayList(u8) = .empty; errdefer json_buf.deinit(alloc); const writer = json_buf.writer(alloc); @@ -180,6 +189,8 @@ fn formatAppHistoryResponse(alloc: std.mem.Allocator, deployments: []const store for (deployments, 0..) |dep, i| { const report = apply_release.reportFromDeployment(dep); const summary = app_snapshot.summarize(dep.config_snapshot); + const is_current = current_release_id != null and std.mem.eql(u8, dep.id, current_release_id.?); + const is_previous_successful = previous_successful_release_id != null and std.mem.eql(u8, dep.id, previous_successful_release_id.?); if (i > 0) try writer.writeByte(','); try writer.writeByte('{'); try json_helpers.writeJsonStringField(writer, "id", report.release_id orelse ""); @@ -207,6 +218,34 @@ fn formatAppHistoryResponse(alloc: std.mem.Allocator, deployments: []const store try json_helpers.writeNullableJsonStringField(writer, "source_release_id", report.source_release_id); try writer.writeByte(','); try json_helpers.writeNullableJsonStringField(writer, "message", report.message); + try writer.print(",\"is_current\":{},\"is_previous_successful\":{}", .{ is_current, is_previous_successful }); + try writer.writeAll(",\"release\":{"); + try json_helpers.writeJsonStringField(writer, "id", report.release_id orelse ""); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "trigger", report.trigger.toString()); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "status", report.status.toString()); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "manifest_hash", report.manifest_hash); + try writer.print(",\"created_at\":{d},\"completed_targets\":{d},\"failed_targets\":{d},\"remaining_targets\":{d},\"current\":{},\"previous_successful\":{}", .{ + report.created_at, + report.completed_targets, + report.failed_targets, + report.remainingTargets(), + is_current, + is_previous_successful, + }); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "source_release_id", report.source_release_id); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "message", report.message); + try writer.writeByte('}'); + try writer.print(",\"workloads\":{{\"services\":{d},\"workers\":{d},\"crons\":{d},\"training_jobs\":{d}}}", .{ + summary.service_count, + summary.worker_count, + summary.cron_count, + summary.training_job_count, + }); try writer.writeByte('}'); } try writer.writeByte(']'); @@ -252,18 +291,94 @@ fn formatAppStatusResponse( try writer.writeByte(','); try json_helpers.writeNullableJsonStringField(writer, "previous_successful_release_id", if (previous_successful) |prev| prev.release_id else null); try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "previous_successful_trigger", if (previous_successful) |prev| prev.trigger.toString() else null); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "previous_successful_status", if (previous_successful) |prev| prev.status.toString() else null); + try writer.writeByte(','); try json_helpers.writeNullableJsonStringField(writer, "previous_successful_manifest_hash", if (previous_successful) |prev| prev.manifest_hash else null); if (previous_successful) |prev| { try writer.print(",\"previous_successful_created_at\":{d}", .{prev.created_at}); + try writer.print(",\"previous_successful_completed_targets\":{d},\"previous_successful_failed_targets\":{d},\"previous_successful_remaining_targets\":{d}", .{ + prev.completed_targets, + prev.failed_targets, + prev.remainingTargets(), + }); } else { try writer.writeAll(",\"previous_successful_created_at\":null"); + try writer.writeAll(",\"previous_successful_completed_targets\":0,\"previous_successful_failed_targets\":0,\"previous_successful_remaining_targets\":0"); } try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "previous_successful_source_release_id", if (previous_successful) |prev| prev.source_release_id else null); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "previous_successful_message", if (previous_successful) |prev| prev.message else null); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "message", report.message); + try writer.writeAll(",\"current_release\":{"); + try json_helpers.writeJsonStringField(writer, "id", report.release_id orelse ""); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "trigger", report.trigger.toString()); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "status", report.status.toString()); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "manifest_hash", report.manifest_hash); + try writer.print(",\"created_at\":{d},\"completed_targets\":{d},\"failed_targets\":{d},\"remaining_targets\":{d}", .{ + report.created_at, + report.completed_targets, + report.failed_targets, + report.remainingTargets(), + }); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "source_release_id", report.source_release_id); + try writer.writeByte(','); try json_helpers.writeNullableJsonStringField(writer, "message", report.message); try writer.writeByte('}'); + try writer.writeAll(",\"previous_successful_release\":"); + if (previous_successful) |prev| { + try writer.writeByte('{'); + try json_helpers.writeJsonStringField(writer, "id", prev.release_id orelse ""); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "trigger", prev.trigger.toString()); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "status", prev.status.toString()); + try writer.writeByte(','); + try json_helpers.writeJsonStringField(writer, "manifest_hash", prev.manifest_hash); + try writer.print(",\"created_at\":{d},\"completed_targets\":{d},\"failed_targets\":{d},\"remaining_targets\":{d}", .{ + prev.created_at, + prev.completed_targets, + prev.failed_targets, + prev.remainingTargets(), + }); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "source_release_id", prev.source_release_id); + try writer.writeByte(','); + try json_helpers.writeNullableJsonStringField(writer, "message", prev.message); + try writer.writeByte('}'); + } else { + try writer.writeAll("null"); + } + try writer.print(",\"workloads\":{{\"services\":{d},\"workers\":{d},\"crons\":{d},\"training_jobs\":{d}}}", .{ + summary.service_count, + summary.worker_count, + summary.cron_count, + summary.training_job_count, + }); + try writer.print(",\"training_runtime\":{{\"active\":{d},\"paused\":{d},\"failed\":{d}}}", .{ + training_summary.active, + training_summary.paused, + training_summary.failed, + }); + try writer.writeByte('}'); return json_buf.toOwnedSlice(alloc); } +fn findPreviousSuccessfulReleaseId(deployments: []const store.DeploymentRecord) ?[]const u8 { + if (deployments.len == 0) return null; + for (deployments[1..]) |dep| { + if (std.mem.eql(u8, dep.status, "completed")) return dep.id; + } + return null; +} + const RouteFlowHarness = struct { alloc: std.mem.Allocator, tmp: std.testing.TmpDir, @@ -325,6 +440,18 @@ const RouteFlowHarness = struct { return handleAppRollback(self.alloc, app_name, makeRequest(.POST, path, body), self.ctx()); } + fn rollbackDefault(self: *RouteFlowHarness, app_name: []const u8) !Response { + const path = try std.fmt.allocPrint(self.alloc, "/apps/{s}/rollback", .{app_name}); + defer self.alloc.free(path); + return handleAppRollback(self.alloc, app_name, makeRequest(.POST, path, "{\"print\":false}"), self.ctx()); + } + + fn rollbackPrint(self: *RouteFlowHarness, app_name: []const u8) !Response { + const path = try std.fmt.allocPrint(self.alloc, "/apps/{s}/rollback", .{app_name}); + defer self.alloc.free(path); + return handleAppRollback(self.alloc, app_name, makeRequest(.POST, path, "{\"print\":true}"), self.ctx()); + } + fn status(self: *RouteFlowHarness, app_name: []const u8) Response { return handleAppStatus(self.alloc, app_name, self.ctx()); } @@ -392,6 +519,8 @@ test "formatAppHistoryResponse emits release records" { try std.testing.expect(std.mem.indexOf(u8, json, "\"source_release_id\":null") != null); try std.testing.expect(std.mem.indexOf(u8, json, "\"message\":\"placement failed\"") != null); try std.testing.expect(std.mem.indexOf(u8, json, "\"message\":null") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"release\":{\"id\":\"dep-2\"") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"workloads\":{\"services\":0,\"workers\":0,\"crons\":0,\"training_jobs\":0}") != null); } test "formatAppStatusResponse summarizes latest release" { @@ -419,6 +548,9 @@ test "formatAppStatusResponse summarizes latest release" { try std.testing.expect(std.mem.indexOf(u8, json, "\"remaining_targets\":2") != null); try std.testing.expect(std.mem.indexOf(u8, json, "\"source_release_id\":null") != null); try std.testing.expect(std.mem.indexOf(u8, json, "\"previous_successful_release_id\":null") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"current_release\":{\"id\":\"dep-2\"") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"workloads\":{\"services\":2,\"workers\":0,\"crons\":0,\"training_jobs\":0}") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"training_runtime\":{\"active\":0,\"paused\":0,\"failed\":0}") != null); } test "formatAppsResponse emits one latest summary per app" { @@ -769,6 +901,78 @@ test "app apply then rollback routes preserve release transition metadata" { try expectJsonContains(history_response.body, source_release_id); } +test "app rollback defaults to the previous successful release when release id is omitted" { + const alloc = std.testing.allocator; + const first_apply_body = + \\{"app_name":"demo-app","services":[{"name":"web","image":"nginx:1","command":["echo","first"]}]} + ; + const second_apply_body = + \\{"app_name":"demo-app","services":[{"name":"web","image":"nginx:2","command":["echo","second"]}]} + ; + + var harness = try RouteFlowHarness.init(alloc); + defer harness.deinit(); + + const first_apply_response = harness.appApply(first_apply_body); + defer freeResponse(alloc, first_apply_response); + try expectResponseOk(first_apply_response); + const source_release_id = json_helpers.extractJsonString(first_apply_response.body, "release_id").?; + + const second_apply_response = harness.appApply(second_apply_body); + defer freeResponse(alloc, second_apply_response); + try expectResponseOk(second_apply_response); + + const rollback_response = try harness.rollbackDefault("demo-app"); + defer freeResponse(alloc, rollback_response); + try expectResponseOk(rollback_response); + try expectJsonContains(rollback_response.body, "\"source_release_id\":\""); + try expectJsonContains(rollback_response.body, source_release_id); + + const latest = try store.getLatestDeploymentByAppInDb(harness.node.stateMachineDb(), alloc, "demo-app"); + defer latest.deinit(alloc); + try std.testing.expectEqualStrings("rollback", latest.trigger.?); + try std.testing.expectEqualStrings(source_release_id, latest.source_release_id.?); +} + +test "app rollback print returns the selected snapshot without creating a new release" { + const alloc = std.testing.allocator; + const first_apply_body = + \\{"app_name":"demo-app","services":[{"name":"web","image":"nginx:1","command":["echo","first"]}]} + ; + const second_apply_body = + \\{"app_name":"demo-app","services":[{"name":"web","image":"nginx:2","command":["echo","second"]}]} + ; + + var harness = try RouteFlowHarness.init(alloc); + defer harness.deinit(); + + const first_apply_response = harness.appApply(first_apply_body); + defer freeResponse(alloc, first_apply_response); + try expectResponseOk(first_apply_response); + + const second_apply_response = harness.appApply(second_apply_body); + defer freeResponse(alloc, second_apply_response); + try expectResponseOk(second_apply_response); + + var before = try store.listDeploymentsByAppInDb(harness.node.stateMachineDb(), alloc, "demo-app"); + defer { + for (before.items) |dep| dep.deinit(alloc); + before.deinit(alloc); + } + + const rollback_response = try harness.rollbackPrint("demo-app"); + defer freeResponse(alloc, rollback_response); + try expectResponseOk(rollback_response); + try std.testing.expect(std.mem.indexOf(u8, rollback_response.body, "\"image\":\"nginx:1\"") != null); + + var after = try store.listDeploymentsByAppInDb(harness.node.stateMachineDb(), alloc, "demo-app"); + defer { + for (after.items) |dep| dep.deinit(alloc); + after.deinit(alloc); + } + try std.testing.expectEqual(before.items.len, after.items.len); +} + test "app apply registers cluster cron schedules from snapshot" { const alloc = std.testing.allocator; const apply_body = diff --git a/src/lib/command_registry.zig b/src/lib/command_registry.zig index 154a71e..10b9f58 100644 --- a/src/lib/command_registry.zig +++ b/src/lib/command_registry.zig @@ -44,7 +44,7 @@ pub const command_specs = [_]CommandSpec{ .{ .name = "restart", .group = .runtime, .usage = "restart ", .description = "restart a container", .handler = container_cmds.restart }, .{ .name = "exec", .group = .runtime, .usage = "exec [args...]", .description = "run a command in a running container", .handler = container_cmds.exec_cmd }, .{ .name = "status", .group = .runtime, .usage = "status [--app [name]] [--verbose] [--server h:p]", .description = "show service or app status", .handler = runtime_cmds.status }, - .{ .name = "apps", .group = .runtime, .usage = "apps [--server h:p] [--json]", .description = "list app release summaries", .handler = runtime_cmds.apps }, + .{ .name = "apps", .group = .runtime, .usage = "apps [--server h:p] [--json] [--status s|--failed|--in-progress]", .description = "list app release summaries", .handler = runtime_cmds.apps }, .{ .name = "metrics", .group = .runtime, .usage = "metrics [service] [--server h:p]", .description = "show per-service network metrics", .handler = runtime_cmds.metrics }, .{ .name = "gpu", .group = .runtime, .usage = "gpu [--json]", .description = "GPU topology, diagnostics, and benchmarking", .handler = gpu_cmds.gpu }, @@ -61,7 +61,7 @@ pub const command_specs = [_]CommandSpec{ .{ .name = "up", .group = .build_manifest, .usage = "up [-f manifest.toml] [--dev] [--server host:port] [service...]", .description = "start services from a manifest", .handler = manifest_cmds.up }, .{ .name = "down", .group = .build_manifest, .usage = "down [-f manifest.toml]", .description = "stop all services from manifest", .handler = manifest_cmds.down }, .{ .name = "run-worker", .group = .build_manifest, .usage = "run-worker [-f manifest.toml] [--server host:port] ", .description = "run a one-shot worker task", .handler = manifest_cmds.runWorker }, - .{ .name = "rollback", .group = .build_manifest, .usage = "rollback | --app [name] [--server h:p --release id]", .description = "rollback a service or app release", .handler = manifest_cmds.rollback }, + .{ .name = "rollback", .group = .build_manifest, .usage = "rollback | --app [name] [--server h:p] [--release id] [--print]", .description = "rollback a service or app release", .handler = manifest_cmds.rollback }, .{ .name = "history", .group = .build_manifest, .usage = "history | --app [name] [--server h:p] [--json]", .description = "show service or app release history", .handler = manifest_cmds.history }, .{ .name = "train", .group = .build_manifest, .usage = "train [--server host:port] ", .description = "manage training jobs", .handler = manifest_cmds.train }, diff --git a/src/lib/completion.zig b/src/lib/completion.zig index bfd70f0..7ddcd02 100644 --- a/src/lib/completion.zig +++ b/src/lib/completion.zig @@ -42,7 +42,7 @@ const command_meta = [_]CommandMeta{ .{ .name = "restart" }, .{ .name = "exec" }, .{ .name = "status", .flags = &.{ "--app", "--verbose", "-v", "--server" } }, - .{ .name = "apps", .flags = &.{ "--server", "--json" } }, + .{ .name = "apps", .flags = &.{ "--server", "--json", "--status", "--failed", "--in-progress" } }, .{ .name = "metrics", .flags = &.{ "--server", "--pairs" } }, .{ .name = "gpu", .subcommands = &.{ .{ .name = "topo", .flags = &.{"--json"} }, @@ -63,7 +63,7 @@ const command_meta = [_]CommandMeta{ .{ .name = "up", .flags = &.{ "-f", "--dev", "--server" } }, .{ .name = "down", .flags = &.{"-f"} }, .{ .name = "run-worker", .flags = &.{ "-f", "--server" } }, - .{ .name = "rollback", .flags = &.{ "--app", "--server", "--release" } }, + .{ .name = "rollback", .flags = &.{ "--app", "--server", "--release", "--print" } }, .{ .name = "history", .flags = &.{ "--app", "--server", "--json" } }, .{ .name = "train", .flags = &.{ "-f", "--server", "--rank" }, .subcommands = &.{ .{ .name = "start", .flags = &.{ "-f", "--server" } }, diff --git a/src/lib/json_helpers.zig b/src/lib/json_helpers.zig index b87535e..a312538 100644 --- a/src/lib/json_helpers.zig +++ b/src/lib/json_helpers.zig @@ -121,6 +121,20 @@ pub fn extractJsonFloat(json: []const u8, key: []const u8) ?f64 { return std.fmt.parseFloat(f64, json[pos..end]) catch null; } +/// extract a boolean value from a JSON object: {"key":true} +pub fn extractJsonBool(json: []const u8, key: []const u8) ?bool { + var search_buf: [128]u8 = undefined; + const needle = std.fmt.bufPrint(&search_buf, "\"{s}\":", .{key}) catch return null; + + const start_pos = std.mem.indexOf(u8, json, needle) orelse return null; + var pos = start_pos + needle.len; + while (pos < json.len and json[pos] == ' ') : (pos += 1) {} + + if (std.mem.startsWith(u8, json[pos..], "true")) return true; + if (std.mem.startsWith(u8, json[pos..], "false")) return false; + return null; +} + /// extract a top-level array value from a JSON object: {"key":[...]} pub fn extractJsonArray(json: []const u8, key: []const u8) ?[]const u8 { var search_buf: [128]u8 = undefined; diff --git a/src/manifest/cli/ops.zig b/src/manifest/cli/ops.zig index 9b12784..a4c91b0 100644 --- a/src/manifest/cli/ops.zig +++ b/src/manifest/cli/ops.zig @@ -4,6 +4,8 @@ const json_helpers = @import("../../lib/json_helpers.zig"); const json_out = @import("../../lib/json_output.zig"); const apply_release = @import("../apply_release.zig"); const app_snapshot = @import("../app_snapshot.zig"); +const rollback_snapshot = @import("../rollback_snapshot.zig"); +const local_apply_backend = @import("../local_apply_backend.zig"); const manifest_loader = @import("../loader.zig"); const orchestrator = @import("../orchestrator.zig"); const release_history = @import("../release_history.zig"); @@ -29,6 +31,7 @@ pub fn rollback(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void var app_mode = false; var server_addr: ?[]const u8 = null; var release_id: ?[]const u8 = null; + var print_only = false; while (args.next()) |arg| { if (std.mem.eql(u8, arg, "--app")) { @@ -43,6 +46,8 @@ pub fn rollback(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void writeErr("--release requires a release id\n", .{}); return OpsError.InvalidArgument; }; + } else if (std.mem.eql(u8, arg, "--print")) { + print_only = true; } else { target_name = arg; } @@ -56,27 +61,23 @@ pub fn rollback(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void const owned_app_name = if (target_name == null) try currentAppNameAlloc(alloc) else null; defer if (owned_app_name) |name| alloc.free(name); const app_name = target_name orelse owned_app_name.?; - const id = release_id orelse { - writeErr("remote rollback requires --release \n", .{}); - return OpsError.InvalidArgument; - }; - try rollbackRemoteApp(alloc, server_addr.?, app_name, id); + try rollbackRemoteApp(alloc, server_addr.?, app_name, release_id, print_only); return; } - const config = if (app_mode) blk: { + if (app_mode) { const owned_app_name = if (target_name == null) try currentAppNameAlloc(alloc) else null; defer if (owned_app_name) |name| alloc.free(name); const app_name = target_name orelse owned_app_name.?; - break :blk release_history.rollbackApp(alloc, app_name) catch { - writeErr("no previous deployment found for app {s}\n", .{app_name}); - return OpsError.StoreError; - }; - } else blk: { + try rollbackLocalApp(alloc, app_name, release_id, print_only); + return; + } + + const config = blk: { const service_name = target_name orelse { writeErr("usage: yoq rollback \n", .{}); - writeErr(" or: yoq rollback --app [name]\n", .{}); - writeErr(" or: yoq rollback --app [name] --server host:port --release \n", .{}); + writeErr(" or: yoq rollback --app [name] [--print] [--release ]\n", .{}); + writeErr(" or: yoq rollback --app [name] [--server host:port] [--release ] [--print]\n", .{}); return OpsError.InvalidArgument; }; @@ -97,17 +98,86 @@ pub fn rollback(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void }; defer alloc.free(config); - if (app_mode) { - const owned_app_name = if (target_name == null) try currentAppNameAlloc(alloc) else null; - defer if (owned_app_name) |name| alloc.free(name); - const app_name = target_name orelse owned_app_name.?; - write("rollback config for app {s}:\n{s}\n", .{ app_name, config }); - } else { - write("rollback config for {s}:\n{s}\n", .{ target_name.?, config }); - } + write("rollback config for {s}:\n{s}\n", .{ target_name.?, config }); write("\nto apply this rollback, redeploy with this config using 'yoq up'\n", .{}); } +const RollbackSummary = struct { + app_name: []const u8, + release_id: []const u8, + trigger: []const u8, + status: []const u8, + completed_targets: usize, + failed_targets: usize, + remaining_targets: usize, + source_release_id: ?[]const u8, + message: ?[]const u8, + is_current: bool = false, + is_previous_successful: bool = false, +}; + +fn rollbackLocalApp( + alloc: std.mem.Allocator, + app_name: []const u8, + release_id: ?[]const u8, + print_only: bool, +) !void { + const target = store.getRollbackTargetDeploymentByApp(alloc, app_name, release_id) catch { + writeErr("no previous deployment found for app {s}\n", .{app_name}); + return OpsError.StoreError; + }; + defer target.deinit(alloc); + + if (print_only) { + write("rollback snapshot for app {s}:\n{s}\n", .{ app_name, target.config_snapshot }); + return; + } + + var loaded = rollback_snapshot.loadLocalRollbackSnapshot(alloc, target.config_snapshot) catch |err| { + writeErr("failed to load rollback snapshot: {}\n", .{err}); + return OpsError.StoreError; + }; + defer loaded.deinit(); + + var prepared = local_apply_backend.PreparedLocalApply.init(alloc, &loaded.manifest, &loaded.release, false) catch |err| { + writeErr("failed to initialize rollback runtime: {}\n", .{err}); + return OpsError.DeploymentFailed; + }; + defer prepared.deinit(); + prepared.beginRuntime(); + + const apply_report = prepared.startRelease(.{ + .trigger = .rollback, + .source_release_id = target.id, + }) catch |err| { + writeErr("rollback failed: {}\n", .{err}); + return OpsError.DeploymentFailed; + }; + defer apply_report.deinit(alloc); + + printRollbackSummary(.{ + .app_name = app_name, + .release_id = apply_report.release_id orelse "?", + .trigger = apply_report.trigger.toString(), + .status = apply_report.status.toString(), + .completed_targets = apply_report.completed_targets, + .failed_targets = apply_report.failed_targets, + .remaining_targets = apply_report.remainingTargets(), + .source_release_id = apply_report.source_release_id, + .message = apply_report.message, + }); + + if (loaded.release.resolvedServiceCount() == 0) { + return; + } + + writeErr("rollback applied. services running. press ctrl-c to stop.\n", .{}); + prepared.orch.waitForShutdown(); + writeErr("\nshutting down...\n", .{}); + prepared.orch.stopAll(); + writeErr("stopped\n", .{}); +} + pub fn history(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void { var target_name: ?[]const u8 = null; var app_mode = false; @@ -168,8 +238,12 @@ pub fn history(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void { if (cli.output_mode == .json) { var w = json_out.JsonWriter{}; w.beginArray(); - for (deployments.items) |dep| { - writeHistoryJsonObject(&w, historyEntryFromDeployment(dep)); + const previous_successful_id = previousSuccessfulReleaseId(deployments.items); + for (deployments.items, 0..) |dep, i| { + var entry = historyEntryFromDeployment(dep); + entry.is_current = i == 0; + entry.is_previous_successful = previous_successful_id != null and std.mem.eql(u8, entry.id, previous_successful_id.?); + writeHistoryJsonObject(&w, entry); } w.endArray(); w.flush(); @@ -187,8 +261,12 @@ pub fn history(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void { writeHistoryHeader(); - for (deployments.items) |dep| { - writeHistoryRow(historyEntryFromDeployment(dep)); + const previous_successful_id = previousSuccessfulReleaseId(deployments.items); + for (deployments.items, 0..) |dep, i| { + var entry = historyEntryFromDeployment(dep); + entry.is_current = i == 0; + entry.is_previous_successful = previous_successful_id != null and std.mem.eql(u8, entry.id, previous_successful_id.?); + writeHistoryRow(entry); } } @@ -223,17 +301,31 @@ fn printRemoteAppHistory(alloc: std.mem.Allocator, addr_str: []const u8, app_nam return; } + var entries: std.ArrayList(HistoryEntryView) = .empty; + defer entries.deinit(alloc); + var iter = json_helpers.extractJsonObjects(resp.body); - const first = iter.next() orelse { + while (iter.next()) |obj| { + entries.append(alloc, parseHistoryObject(obj)) catch return OpsError.StoreError; + } + + if (entries.items.len == 0) { write("no releases found for app {s}\n", .{app_name}); return; - }; + } writeHistoryHeader(); - writeHistoryRow(parseHistoryObject(first)); - while (iter.next()) |obj| { - writeHistoryRow(parseHistoryObject(obj)); + for (entries.items) |entry| { + writeHistoryRow(entry); + } +} + +fn previousSuccessfulReleaseId(deployments: []const store.DeploymentRecord) ?[]const u8 { + if (deployments.len == 0) return null; + for (deployments[1..]) |dep| { + if (std.mem.eql(u8, dep.status, "completed")) return dep.id; } + return null; } const HistoryEntryView = struct { @@ -253,6 +345,8 @@ const HistoryEntryView = struct { remaining_targets: usize, source_release_id: ?[]const u8, message: ?[]const u8, + is_current: bool = false, + is_previous_successful: bool = false, }; fn historyEntryFromDeployment(dep: store.DeploymentRecord) HistoryEntryView { @@ -275,6 +369,8 @@ fn historyEntryFromDeployment(dep: store.DeploymentRecord) HistoryEntryView { .remaining_targets = report.remainingTargets(), .source_release_id = report.source_release_id, .message = report.message, + .is_current = false, + .is_previous_successful = false, }; } @@ -296,25 +392,36 @@ fn parseHistoryObject(obj: []const u8) HistoryEntryView { .remaining_targets = @intCast(@max(0, json_helpers.extractJsonInt(obj, "remaining_targets") orelse 0)), .source_release_id = json_helpers.extractJsonString(obj, "source_release_id"), .message = json_helpers.extractJsonString(obj, "message"), + .is_current = json_helpers.extractJsonBool(obj, "is_current") orelse false, + .is_previous_successful = json_helpers.extractJsonBool(obj, "is_previous_successful") orelse false, }; } fn writeHistoryHeader() void { - write("{s:<14} {s:<14} {s:<14} {s:<20} {s}\n", .{ "ID", "STATUS", "HASH", "TIMESTAMP", "MESSAGE" }); + write("{s:<8} {s:<14} {s:<14} {s:<10} {s:<14} {s:<16} {s}\n", .{ + "MARK", "ID", "STATUS", "TRIGGER", "HASH", "TARGETS", "MESSAGE", + }); } fn writeHistoryRow(entry: HistoryEntryView) void { const message = entry.message orelse ""; + const mark = if (entry.is_current) + "current" + else if (entry.is_previous_successful) + "prev-ok" + else + ""; + var progress_buf: [64]u8 = undefined; + const progress = formatProgressCounts(&progress_buf, entry.completed_targets, entry.failed_targets, entry.remaining_targets); - var ts_buf: [20]u8 = undefined; - const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{entry.created_at}) catch "?"; - - write("{s:<14} {s:<14} {s:<14} {s:<20} {s}\n", .{ + write("{s:<8} {s:<14} {s:<14} {s:<10} {s:<14} {s:<16} {s}\n", .{ + mark, truncate(entry.id, 12), entry.status, + entry.trigger, truncate(entry.manifest_hash, 12), - ts_str, - truncate(message, 40), + progress, + truncate(message, 36), }); } @@ -336,19 +443,45 @@ fn writeHistoryJsonObject(w: *json_out.JsonWriter, entry: HistoryEntryView) void w.uintField("remaining_targets", entry.remaining_targets); if (entry.source_release_id) |source_release_id| w.stringField("source_release_id", source_release_id) else w.nullField("source_release_id"); if (entry.message) |message| w.stringField("message", message) else w.nullField("message"); + w.boolField("is_current", entry.is_current); + w.boolField("is_previous_successful", entry.is_previous_successful); + w.beginObjectField("release"); + w.stringField("id", entry.id); + w.stringField("trigger", entry.trigger); + w.stringField("status", entry.status); + w.stringField("manifest_hash", entry.manifest_hash); + w.intField("created_at", entry.created_at); + w.uintField("completed_targets", entry.completed_targets); + w.uintField("failed_targets", entry.failed_targets); + w.uintField("remaining_targets", entry.remaining_targets); + if (entry.source_release_id) |source_release_id| w.stringField("source_release_id", source_release_id) else w.nullField("source_release_id"); + if (entry.message) |message| w.stringField("message", message) else w.nullField("message"); + w.boolField("current", entry.is_current); + w.boolField("previous_successful", entry.is_previous_successful); + w.endObject(); + w.beginObjectField("workloads"); + w.uintField("services", entry.service_count); + w.uintField("workers", entry.worker_count); + w.uintField("crons", entry.cron_count); + w.uintField("training_jobs", entry.training_job_count); + w.endObject(); w.endObject(); } -fn rollbackRemoteApp(alloc: std.mem.Allocator, addr_str: []const u8, app_name: []const u8, release_id: []const u8) !void { - if (release_id.len == 0) { - writeErr("remote rollback requires a release id\n", .{}); - return OpsError.InvalidArgument; - } - +fn rollbackRemoteApp( + alloc: std.mem.Allocator, + addr_str: []const u8, + app_name: []const u8, + release_id: ?[]const u8, + print_only: bool, +) !void { const server = cli.parseServerAddr(addr_str); const path = std.fmt.allocPrint(alloc, "/apps/{s}/rollback", .{app_name}) catch return OpsError.StoreError; defer alloc.free(path); - const body = std.fmt.allocPrint(alloc, "{{\"release_id\":\"{s}\"}}", .{release_id}) catch return OpsError.StoreError; + const body = if (release_id) |id| + std.fmt.allocPrint(alloc, "{{\"release_id\":\"{s}\",\"print\":{}}}", .{ id, print_only }) catch return OpsError.StoreError + else + std.fmt.allocPrint(alloc, "{{\"print\":{}}}", .{print_only}) catch return OpsError.StoreError; defer alloc.free(body); var token_buf: [64]u8 = undefined; @@ -366,7 +499,56 @@ fn rollbackRemoteApp(alloc: std.mem.Allocator, addr_str: []const u8, app_name: [ return OpsError.StoreError; } - write("{s}\n", .{resp.body}); + if (print_only) { + write("rollback snapshot for app {s}:\n{s}\n", .{ app_name, resp.body }); + return; + } + + printRollbackSummary(parseRollbackSummary(resp.body)); +} + +fn parseRollbackSummary(json: []const u8) RollbackSummary { + return .{ + .app_name = json_helpers.extractJsonString(json, "app_name") orelse "?", + .release_id = json_helpers.extractJsonString(json, "release_id") orelse "?", + .trigger = json_helpers.extractJsonString(json, "trigger") orelse "rollback", + .status = json_helpers.extractJsonString(json, "status") orelse "unknown", + .completed_targets = @intCast(@max(0, json_helpers.extractJsonInt(json, "completed_targets") orelse 0)), + .failed_targets = @intCast(@max(0, json_helpers.extractJsonInt(json, "failed_targets") orelse 0)), + .remaining_targets = @intCast(@max(0, json_helpers.extractJsonInt(json, "remaining_targets") orelse 0)), + .source_release_id = json_helpers.extractJsonString(json, "source_release_id"), + .message = json_helpers.extractJsonString(json, "message"), + }; +} + +fn printRollbackSummary(summary: RollbackSummary) void { + write("app: {s}\n", .{summary.app_name}); + write("release: {s}\n", .{summary.release_id}); + write("trigger: {s}\n", .{summary.trigger}); + write("source_release_id: {s}\n", .{summary.source_release_id orelse "-"}); + write("status: {s}\n", .{summary.status}); + + var progress_buf: [64]u8 = undefined; + const progress = formatProgressCounts(&progress_buf, summary.completed_targets, summary.failed_targets, summary.remaining_targets); + write("targets: {s}\n", .{progress}); + + if (summary.message) |message| { + write("message: {s}\n", .{message}); + } +} + +fn formatProgressCounts(buf: []u8, completed_targets: usize, failed_targets: usize, remaining_targets: usize) []const u8 { + if (failed_targets == 0 and remaining_targets == 0) { + return std.fmt.bufPrint(buf, "{d} ok", .{completed_targets}) catch "?"; + } + if (remaining_targets == 0) { + return std.fmt.bufPrint(buf, "{d} ok, {d} fail", .{ completed_targets, failed_targets }) catch "?"; + } + return std.fmt.bufPrint(buf, "{d} ok, {d} fail, {d} left", .{ + completed_targets, + failed_targets, + remaining_targets, + }) catch "?"; } test "parseHistoryObject extracts app release fields" { @@ -463,6 +645,38 @@ test "writeHistoryJsonObject round-trips through remote parser" { try std.testing.expectEqualStrings(entry.message.?, parsed.message.?); } +test "writeHistoryJsonObject includes nested release markers" { + const entry = HistoryEntryView{ + .id = "dep-1", + .app = "demo-app", + .service = "demo-app", + .trigger = "rollback", + .status = "completed", + .manifest_hash = "sha256:123", + .created_at = 42, + .service_count = 1, + .worker_count = 2, + .cron_count = 3, + .training_job_count = 4, + .completed_targets = 1, + .failed_targets = 0, + .remaining_targets = 0, + .source_release_id = "dep-0", + .message = "healthy", + .is_current = true, + .is_previous_successful = false, + }; + + var w = json_out.JsonWriter{}; + writeHistoryJsonObject(&w, entry); + const json = w.getWritten(); + + try std.testing.expect(std.mem.indexOf(u8, json, "\"is_current\":true") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"release\":{\"id\":\"dep-1\"") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"current\":true") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"workloads\":{\"services\":1,\"workers\":2,\"crons\":3,\"training_jobs\":4}") != null); +} + test "historyEntryFromDeployment preserves partially failed local release state" { const dep = store.DeploymentRecord{ .id = "dep-3", diff --git a/src/manifest/release_history.zig b/src/manifest/release_history.zig index 3112a1e..daf504c 100644 --- a/src/manifest/release_history.zig +++ b/src/manifest/release_history.zig @@ -38,13 +38,14 @@ pub fn markAppReleaseFailed(id: []const u8, message: ?[]const u8) !void { } pub fn rollbackApp(alloc: std.mem.Allocator, app_name: []const u8) ![]const u8 { - const latest = try store.getLatestDeploymentByApp(alloc, app_name); - defer latest.deinit(alloc); + return rollbackAppToRelease(alloc, app_name, null); +} - const previous_successful = try store.getPreviousSuccessfulDeploymentByApp(alloc, app_name, latest.id); - defer previous_successful.deinit(alloc); +pub fn rollbackAppToRelease(alloc: std.mem.Allocator, app_name: []const u8, explicit_release_id: ?[]const u8) ![]const u8 { + const target = try store.getRollbackTargetDeploymentByApp(alloc, app_name, explicit_release_id); + defer target.deinit(alloc); - return alloc.dupe(u8, previous_successful.config_snapshot); + return alloc.dupe(u8, target.config_snapshot); } pub fn listAppReleases(alloc: std.mem.Allocator, app_name: []const u8) !std.ArrayList(store.DeploymentRecord) { diff --git a/src/manifest/release_plan.zig b/src/manifest/release_plan.zig index f8970ad..8ae8504 100644 --- a/src/manifest/release_plan.zig +++ b/src/manifest/release_plan.zig @@ -22,6 +22,17 @@ pub const ReleasePlan = struct { alloc: std.mem.Allocator, app: *const app_spec.ApplicationSpec, targets: []const []const u8, + ) !ReleasePlan { + const config_snapshot = try makeConfigSnapshot(alloc, app, targets); + defer alloc.free(config_snapshot); + return fromAppSpecWithSnapshot(alloc, app, targets, config_snapshot); + } + + pub fn fromAppSpecWithSnapshot( + alloc: std.mem.Allocator, + app: *const app_spec.ApplicationSpec, + targets: []const []const u8, + config_snapshot: []const u8, ) !ReleasePlan { var planned_app = if (targets.len == 0) try app.clone(alloc) @@ -39,16 +50,16 @@ pub const ReleasePlan = struct { service_filter = filter; } - const config_snapshot = try planned_app.toApplyJson(alloc); - errdefer alloc.free(config_snapshot); - const manifest_hash = try deployment_store.computeManifestHash(alloc, config_snapshot); + const owned_snapshot = try alloc.dupe(u8, config_snapshot); + errdefer alloc.free(owned_snapshot); + const manifest_hash = try deployment_store.computeManifestHash(alloc, owned_snapshot); errdefer alloc.free(manifest_hash); return .{ .app = planned_app, .service_filter = service_filter, .manifest_hash = manifest_hash, - .config_snapshot = config_snapshot, + .config_snapshot = owned_snapshot, .requested_target_count = targets.len, .alloc = alloc, }; @@ -68,6 +79,15 @@ pub const ReleasePlan = struct { } }; +fn makeConfigSnapshot(alloc: std.mem.Allocator, app: *const app_spec.ApplicationSpec, targets: []const []const u8) ![]u8 { + var planned_app = if (targets.len == 0) + try app.clone(alloc) + else + try app.selectServices(alloc, targets); + defer planned_app.deinit(); + return planned_app.toApplyJson(alloc); +} + test "full release plan clones full app without a service filter" { const alloc = std.testing.allocator; diff --git a/src/manifest/rollback_snapshot.zig b/src/manifest/rollback_snapshot.zig new file mode 100644 index 0000000..3278475 --- /dev/null +++ b/src/manifest/rollback_snapshot.zig @@ -0,0 +1,337 @@ +const std = @import("std"); +const spec = @import("spec.zig"); +const app_spec = @import("app_spec.zig"); +const release_plan = @import("release_plan.zig"); + +const JsonPort = struct { + host_port: u16, + container_port: u16, +}; + +const JsonVolume = struct { + source: []const u8, + target: []const u8, + kind: []const u8 = "bind", +}; + +const JsonHealthCheck = struct { + kind: []const u8, + path: ?[]const u8 = null, + port: ?u16 = null, + service: ?[]const u8 = null, + command: []const []const u8 = &.{}, + interval: u32 = 10, + timeout: u32 = 5, + retries: u32 = 3, + start_period: u32 = 0, +}; + +const JsonTls = struct { + domain: []const u8, + acme: bool = false, + email: ?[]const u8 = null, +}; + +const JsonMethodMatch = struct { + method: []const u8, +}; + +const JsonHeaderMatch = struct { + name: []const u8, + value: []const u8, +}; + +const JsonBackendRoute = struct { + service_name: []const u8, + weight: u8, +}; + +const JsonHttpRoute = struct { + name: []const u8, + host: []const u8, + path_prefix: []const u8 = "/", + rewrite_prefix: ?[]const u8 = null, + match_methods: []const JsonMethodMatch = &.{}, + match_headers: []const JsonHeaderMatch = &.{}, + backend_services: []const JsonBackendRoute = &.{}, + mirror_service: ?[]const u8 = null, + retries: u8 = 0, + connect_timeout_ms: u32 = 1000, + request_timeout_ms: u32 = 5000, + http2_idle_timeout_ms: u32 = 30000, + preserve_host: bool = true, + retry_on_5xx: bool = true, + circuit_breaker_threshold: u8 = 3, + circuit_breaker_timeout_ms: u32 = 30000, +}; + +const JsonGpu = struct { + count: u32 = 0, + model: ?[]const u8 = null, + vram_min_mb: ?u64 = null, +}; + +const JsonGpuMesh = struct { + world_size: u32, + gpus_per_rank: u32 = 1, + master_port: u16 = 29500, +}; + +const JsonService = struct { + name: []const u8, + image: []const u8, + command: []const []const u8 = &.{}, + ports: []const JsonPort = &.{}, + env: []const []const u8 = &.{}, + depends_on: []const []const u8 = &.{}, + working_dir: ?[]const u8 = null, + volumes: []const JsonVolume = &.{}, + health_check: ?JsonHealthCheck = null, + restart: []const u8 = "none", + tls: ?JsonTls = null, + http_routes: []const JsonHttpRoute = &.{}, + gpu: ?JsonGpu = null, + gpu_mesh: ?JsonGpuMesh = null, +}; + +const JsonApp = struct { + app_name: []const u8, + services: []const JsonService = &.{}, +}; + +pub const LoadedRollbackSnapshot = struct { + manifest: spec.Manifest, + release: release_plan.ReleasePlan, + + pub fn deinit(self: *LoadedRollbackSnapshot) void { + self.release.deinit(); + self.manifest.deinit(); + } +}; + +pub fn loadLocalRollbackSnapshot(alloc: std.mem.Allocator, snapshot_json: []const u8) !LoadedRollbackSnapshot { + const parsed = try std.json.parseFromSlice(JsonApp, alloc, snapshot_json, .{ + .ignore_unknown_fields = true, + .allocate = .alloc_always, + }); + defer parsed.deinit(); + + var manifest = try manifestFromSnapshot(alloc, parsed.value); + errdefer manifest.deinit(); + + var app = try app_spec.fromManifest(alloc, parsed.value.app_name, &manifest); + defer app.deinit(); + + var release = try release_plan.ReleasePlan.fromAppSpecWithSnapshot(alloc, &app, &.{}, snapshot_json); + errdefer release.deinit(); + + return .{ + .manifest = manifest, + .release = release, + }; +} + +fn manifestFromSnapshot(alloc: std.mem.Allocator, parsed: JsonApp) !spec.Manifest { + const services = try alloc.alloc(spec.Service, parsed.services.len); + errdefer alloc.free(services); + + for (parsed.services, 0..) |svc, i| { + services[i] = try serviceFromSnapshot(alloc, svc); + } + + return .{ + .services = services, + .workers = try alloc.alloc(spec.Worker, 0), + .crons = try alloc.alloc(spec.Cron, 0), + .training_jobs = try alloc.alloc(spec.TrainingJob, 0), + .volumes = try alloc.alloc(spec.Volume, 0), + .alloc = alloc, + }; +} + +fn serviceFromSnapshot(alloc: std.mem.Allocator, svc: JsonService) !spec.Service { + return .{ + .name = try alloc.dupe(u8, svc.name), + .image = try alloc.dupe(u8, svc.image), + .command = try dupeStringArray(alloc, svc.command), + .ports = try dupPorts(alloc, svc.ports), + .env = try dupeStringArray(alloc, svc.env), + .depends_on = try dupeStringArray(alloc, svc.depends_on), + .working_dir = if (svc.working_dir) |working_dir| try alloc.dupe(u8, working_dir) else null, + .volumes = try dupVolumes(alloc, svc.volumes), + .health_check = if (svc.health_check) |health_check| try dupHealthCheck(alloc, health_check) else null, + .restart = parseRestartPolicy(svc.restart), + .tls = if (svc.tls) |tls| try dupTls(alloc, tls) else null, + .http_routes = try dupHttpRoutes(alloc, svc.http_routes), + .gpu = if (svc.gpu) |gpu| try dupGpu(alloc, gpu) else null, + .gpu_mesh = if (svc.gpu_mesh) |mesh| .{ + .world_size = mesh.world_size, + .gpus_per_rank = mesh.gpus_per_rank, + .master_port = mesh.master_port, + } else null, + }; +} + +fn dupeStringArray(alloc: std.mem.Allocator, items: []const []const u8) ![]const []const u8 { + const out = try alloc.alloc([]const u8, items.len); + errdefer alloc.free(out); + for (items, 0..) |item, i| { + out[i] = try alloc.dupe(u8, item); + } + return out; +} + +fn dupPorts(alloc: std.mem.Allocator, ports: []const JsonPort) ![]const spec.PortMapping { + const out = try alloc.alloc(spec.PortMapping, ports.len); + for (ports, 0..) |port, i| { + out[i] = .{ + .host_port = port.host_port, + .container_port = port.container_port, + }; + } + return out; +} + +fn dupVolumes(alloc: std.mem.Allocator, volumes: []const JsonVolume) ![]const spec.VolumeMount { + const out = try alloc.alloc(spec.VolumeMount, volumes.len); + errdefer alloc.free(out); + for (volumes, 0..) |vol, i| { + out[i] = .{ + .source = try alloc.dupe(u8, vol.source), + .target = try alloc.dupe(u8, vol.target), + .kind = if (std.mem.eql(u8, vol.kind, "named")) .named else .bind, + }; + } + return out; +} + +fn dupHealthCheck(alloc: std.mem.Allocator, health_check: JsonHealthCheck) !spec.HealthCheck { + const check_type: spec.CheckType = if (std.mem.eql(u8, health_check.kind, "http")) + .{ .http = .{ + .path = try alloc.dupe(u8, health_check.path orelse "/"), + .port = health_check.port orelse 0, + } } + else if (std.mem.eql(u8, health_check.kind, "tcp")) + .{ .tcp = .{ + .port = health_check.port orelse 0, + } } + else if (std.mem.eql(u8, health_check.kind, "grpc")) + .{ .grpc = .{ + .port = health_check.port orelse 0, + .service = if (health_check.service) |service| try alloc.dupe(u8, service) else null, + } } + else + .{ .exec = .{ + .command = try dupeStringArray(alloc, health_check.command), + } }; + + return .{ + .check_type = check_type, + .interval = health_check.interval, + .timeout = health_check.timeout, + .retries = health_check.retries, + .start_period = health_check.start_period, + }; +} + +fn dupTls(alloc: std.mem.Allocator, tls: JsonTls) !spec.TlsConfig { + return .{ + .domain = try alloc.dupe(u8, tls.domain), + .acme = tls.acme, + .email = if (tls.email) |email| try alloc.dupe(u8, email) else null, + }; +} + +fn dupHttpRoutes(alloc: std.mem.Allocator, routes: []const JsonHttpRoute) ![]const spec.HttpProxyRoute { + const out = try alloc.alloc(spec.HttpProxyRoute, routes.len); + errdefer alloc.free(out); + for (routes, 0..) |route, i| { + out[i] = .{ + .name = try alloc.dupe(u8, route.name), + .host = try alloc.dupe(u8, route.host), + .path_prefix = try alloc.dupe(u8, route.path_prefix), + .rewrite_prefix = if (route.rewrite_prefix) |rewrite_prefix| try alloc.dupe(u8, rewrite_prefix) else null, + .match_methods = try dupMethodMatches(alloc, route.match_methods), + .match_headers = try dupHeaderMatches(alloc, route.match_headers), + .backend_services = try dupBackendRoutes(alloc, route.backend_services), + .mirror_service = if (route.mirror_service) |mirror_service| try alloc.dupe(u8, mirror_service) else null, + .retries = route.retries, + .connect_timeout_ms = route.connect_timeout_ms, + .request_timeout_ms = route.request_timeout_ms, + .http2_idle_timeout_ms = route.http2_idle_timeout_ms, + .preserve_host = route.preserve_host, + .retry_on_5xx = route.retry_on_5xx, + .circuit_breaker_threshold = route.circuit_breaker_threshold, + .circuit_breaker_timeout_ms = route.circuit_breaker_timeout_ms, + }; + } + return out; +} + +fn dupMethodMatches(alloc: std.mem.Allocator, methods: []const JsonMethodMatch) ![]const spec.HttpMethodMatch { + const out = try alloc.alloc(spec.HttpMethodMatch, methods.len); + errdefer alloc.free(out); + for (methods, 0..) |method, i| { + out[i] = .{ .method = try alloc.dupe(u8, method.method) }; + } + return out; +} + +fn dupHeaderMatches(alloc: std.mem.Allocator, headers: []const JsonHeaderMatch) ![]const spec.HttpHeaderMatch { + const out = try alloc.alloc(spec.HttpHeaderMatch, headers.len); + errdefer alloc.free(out); + for (headers, 0..) |header, i| { + out[i] = .{ + .name = try alloc.dupe(u8, header.name), + .value = try alloc.dupe(u8, header.value), + }; + } + return out; +} + +fn dupBackendRoutes(alloc: std.mem.Allocator, backends: []const JsonBackendRoute) ![]const spec.HttpRouteBackend { + const out = try alloc.alloc(spec.HttpRouteBackend, backends.len); + errdefer alloc.free(out); + for (backends, 0..) |backend, i| { + out[i] = .{ + .service_name = try alloc.dupe(u8, backend.service_name), + .weight = backend.weight, + }; + } + return out; +} + +fn dupGpu(alloc: std.mem.Allocator, gpu: JsonGpu) !spec.GpuSpec { + return .{ + .count = gpu.count, + .model = if (gpu.model) |model| try alloc.dupe(u8, model) else null, + .vram_min_mb = gpu.vram_min_mb, + }; +} + +fn parseRestartPolicy(text: []const u8) spec.RestartPolicy { + if (std.mem.eql(u8, text, "always")) return .always; + if (std.mem.eql(u8, text, "on_failure")) return .on_failure; + return .none; +} + +test "loadLocalRollbackSnapshot preserves service runtime fields while keeping original snapshot" { + const alloc = std.testing.allocator; + const snapshot = + \\{"app_name":"demo-app","services":[{"name":"web","image":"nginx:1","command":["nginx","-g","daemon off"],"ports":[{"host_port":8080,"container_port":80}],"env":["MODE=prod"],"depends_on":["db"],"working_dir":"/srv/app","volumes":[{"source":"./src","target":"/app","kind":"bind"}],"health_check":{"kind":"http","path":"/health","port":8080,"interval":11,"timeout":6,"retries":4,"start_period":2},"restart":"always","tls":{"domain":"demo.internal","acme":true,"email":"ops@example.com"},"http_routes":[{"name":"default","host":"demo.internal","path_prefix":"/","retries":2,"connect_timeout_ms":1500,"request_timeout_ms":6000,"http2_idle_timeout_ms":30000,"preserve_host":false,"retry_on_5xx":true,"circuit_breaker_threshold":3,"circuit_breaker_timeout_ms":30000,"match_methods":[{"method":"GET"}],"match_headers":[{"name":"x-env","value":"prod"}],"backend_services":[{"service_name":"web","weight":100}]}],"gpu":{"count":1,"model":"L4","vram_min_mb":24576},"gpu_mesh":{"world_size":2,"gpus_per_rank":1,"master_port":29501}}],"workers":[{"name":"migrate"}],"crons":[{"name":"nightly"}],"training_jobs":[{"name":"finetune"}]} + ; + + var loaded = try loadLocalRollbackSnapshot(alloc, snapshot); + defer loaded.deinit(); + + try std.testing.expectEqualStrings("demo-app", loaded.release.app.app_name); + try std.testing.expectEqualStrings(snapshot, loaded.release.config_snapshot); + try std.testing.expectEqual(@as(usize, 1), loaded.manifest.services.len); + try std.testing.expectEqualStrings("web", loaded.manifest.services[0].name); + try std.testing.expectEqualStrings("nginx", loaded.manifest.services[0].command[0]); + try std.testing.expectEqual(@as(u16, 8080), loaded.manifest.services[0].ports[0].host_port); + try std.testing.expectEqual(spec.RestartPolicy.always, loaded.manifest.services[0].restart); + try std.testing.expectEqualStrings("demo.internal", loaded.manifest.services[0].tls.?.domain); + try std.testing.expectEqual(@as(usize, 1), loaded.manifest.services[0].http_routes.len); + try std.testing.expectEqual(@as(u32, 2), loaded.manifest.services[0].gpu_mesh.?.world_size); +} diff --git a/src/runtime/cli/status_command.zig b/src/runtime/cli/status_command.zig index cef4997..e276c0a 100644 --- a/src/runtime/cli/status_command.zig +++ b/src/runtime/cli/status_command.zig @@ -77,6 +77,7 @@ pub fn status(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void { pub fn apps(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void { var server: ?cli.ServerAddr = null; + var filters = AppListFilters{}; while (args.next()) |arg| { if (std.mem.eql(u8, arg, "--json")) { @@ -87,19 +88,34 @@ pub fn apps(args: *std.process.ArgIterator, alloc: std.mem.Allocator) !void { return StatusError.InvalidArgument; }; server = cli.parseServerAddr(addr_str); + } else if (std.mem.eql(u8, arg, "--status")) { + filters.status = args.next() orelse { + writeErr("--status requires a rollout status\n", .{}); + return StatusError.InvalidArgument; + }; + } else if (std.mem.eql(u8, arg, "--failed")) { + filters.failed_only = true; + } else if (std.mem.eql(u8, arg, "--in-progress")) { + filters.in_progress_only = true; } else { - writeErr("usage: yoq apps [--server host:port] [--json]\n", .{}); + writeErr("usage: yoq apps [--server host:port] [--json] [--status ] [--failed] [--in-progress]\n", .{}); return StatusError.InvalidArgument; } } if (server) |s| { - try appsRemote(alloc, s.ip, s.port); + try appsRemote(alloc, s.ip, s.port, filters); } else { - try appsLocal(alloc); + try appsLocal(alloc, filters); } } +const AppListFilters = struct { + status: ?[]const u8 = null, + failed_only: bool = false, + in_progress_only: bool = false, +}; + fn statusLocal(alloc: std.mem.Allocator, verbose: bool) StatusError!void { var records = store.listAll(alloc) catch { writeErr("failed to list containers\n", .{}); @@ -141,11 +157,18 @@ const AppStatusSnapshot = struct { completed_targets: usize, failed_targets: usize, remaining_targets: usize, - source_release_id: ?[]const u8, - previous_successful_release_id: ?[]const u8, - previous_successful_manifest_hash: ?[]const u8, - previous_successful_created_at: ?i64, - message: ?[]const u8, + source_release_id: ?[]const u8 = null, + previous_successful_release_id: ?[]const u8 = null, + previous_successful_trigger: ?[]const u8 = null, + previous_successful_status: ?[]const u8 = null, + previous_successful_manifest_hash: ?[]const u8 = null, + previous_successful_created_at: ?i64 = null, + previous_successful_completed_targets: usize = 0, + previous_successful_failed_targets: usize = 0, + previous_successful_remaining_targets: usize = 0, + previous_successful_source_release_id: ?[]const u8 = null, + previous_successful_message: ?[]const u8 = null, + message: ?[]const u8 = null, }; fn statusLocalApp(alloc: std.mem.Allocator, app_name: []const u8) StatusError!void { @@ -273,7 +296,7 @@ fn statusRemoteApp(alloc: std.mem.Allocator, addr: [4]u8, port: u16, app_name: [ printAppStatus(snapshot); } -fn appsLocal(alloc: std.mem.Allocator) StatusError!void { +fn appsLocal(alloc: std.mem.Allocator, filters: AppListFilters) StatusError!void { var latest = store.listLatestDeploymentsByApp(alloc) catch { writeErr("failed to read app list\n", .{}); return StatusError.StoreError; @@ -293,7 +316,10 @@ fn appsLocal(alloc: std.mem.Allocator) StatusError!void { }; defer if (previous_successful) |prev| prev.deinit(alloc); - snapshots.append(alloc, snapshotFromDeployments(dep, previous_successful)) catch return StatusError.OutOfMemory; + const snapshot = snapshotFromDeployments(dep, previous_successful); + if (appMatchesFilters(snapshot, filters)) { + snapshots.append(alloc, snapshot) catch return StatusError.OutOfMemory; + } } printAppStatuses(snapshots.items); @@ -310,7 +336,7 @@ fn loadPreviousSuccessfulDeployment( }; } -fn appsRemote(alloc: std.mem.Allocator, addr: [4]u8, port: u16) StatusError!void { +fn appsRemote(alloc: std.mem.Allocator, addr: [4]u8, port: u16, filters: AppListFilters) StatusError!void { var token_buf: [64]u8 = undefined; const token = cli.readApiToken(&token_buf); @@ -330,7 +356,10 @@ fn appsRemote(alloc: std.mem.Allocator, addr: [4]u8, port: u16) StatusError!void var iter = json_helpers.extractJsonObjects(resp.body); while (iter.next()) |obj| { - snapshots.append(alloc, parseAppStatusResponse(obj)) catch return StatusError.OutOfMemory; + const snapshot = parseAppStatusResponse(obj); + if (appMatchesFilters(snapshot, filters)) { + snapshots.append(alloc, snapshot) catch return StatusError.OutOfMemory; + } } printAppStatuses(snapshots.items); @@ -374,14 +403,12 @@ fn printAppStatuses(snapshots: []const AppStatusSnapshot) void { } fn printAppStatusHeader() void { - write("{s:<14} {s:<14} {s:<14} {s:<11} {s:<20} {s:<22} {s:<18} {s:<14} {s}\n", .{ - "APP", "RELEASE", "STATUS", "KINDS", "TIMESTAMP", "TARGETS", "TRAINING", "PREV OK", "MESSAGE", + write("{s:<14} {s:<14} {s:<14} {s:<10} {s:<11} {s:<22} {s:<18} {s:<14} {s}\n", .{ + "APP", "RELEASE", "STATUS", "TRIGGER", "WORKLOADS", "TARGETS", "TRAINING", "PREV OK", "MESSAGE", }); } fn printAppStatusRow(snapshot: AppStatusSnapshot) void { - var ts_buf: [20]u8 = undefined; - const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{snapshot.created_at}) catch "?"; const msg = snapshot.message orelse ""; var progress_buf: [64]u8 = undefined; @@ -401,16 +428,16 @@ fn printAppStatusRow(snapshot: AppStatusSnapshot) void { else "-"; - write("{s:<14} {s:<14} {s:<14} {s:<11} {s:<20} {s:<22} {s:<18} {s:<14} {s}\n", .{ + write("{s:<14} {s:<14} {s:<14} {s:<10} {s:<11} {s:<22} {s:<18} {s:<14} {s}\n", .{ snapshot.app_name, cli.truncate(snapshot.release_id, 12), snapshot.status, + snapshot.trigger, kinds_str, - ts_str, progress_str, training_str, previous_successful, - cli.truncate(msg, 40), + cli.truncate(msg, 48), }); } @@ -462,8 +489,15 @@ fn parseAppStatusResponse(json: []const u8) AppStatusSnapshot { .remaining_targets = @intCast(@max(0, extractJsonInt(json, "remaining_targets") orelse 0)), .source_release_id = extractJsonString(json, "source_release_id"), .previous_successful_release_id = extractJsonString(json, "previous_successful_release_id"), + .previous_successful_trigger = extractJsonString(json, "previous_successful_trigger"), + .previous_successful_status = extractJsonString(json, "previous_successful_status"), .previous_successful_manifest_hash = extractJsonString(json, "previous_successful_manifest_hash"), .previous_successful_created_at = extractJsonInt(json, "previous_successful_created_at"), + .previous_successful_completed_targets = @intCast(@max(0, extractJsonInt(json, "previous_successful_completed_targets") orelse 0)), + .previous_successful_failed_targets = @intCast(@max(0, extractJsonInt(json, "previous_successful_failed_targets") orelse 0)), + .previous_successful_remaining_targets = @intCast(@max(0, extractJsonInt(json, "previous_successful_remaining_targets") orelse 0)), + .previous_successful_source_release_id = extractJsonString(json, "previous_successful_source_release_id"), + .previous_successful_message = extractJsonString(json, "previous_successful_message"), .message = extractJsonString(json, "message"), }; } @@ -488,9 +522,55 @@ fn writeAppStatusJsonObject(w: *json_out.JsonWriter, snapshot: AppStatusSnapshot w.uintField("remaining_targets", snapshot.remaining_targets); if (snapshot.source_release_id) |source_release_id| w.stringField("source_release_id", source_release_id) else w.nullField("source_release_id"); if (snapshot.previous_successful_release_id) |release_id| w.stringField("previous_successful_release_id", release_id) else w.nullField("previous_successful_release_id"); + if (snapshot.previous_successful_trigger) |trigger| w.stringField("previous_successful_trigger", trigger) else w.nullField("previous_successful_trigger"); + if (snapshot.previous_successful_status) |status_text| w.stringField("previous_successful_status", status_text) else w.nullField("previous_successful_status"); if (snapshot.previous_successful_manifest_hash) |manifest_hash| w.stringField("previous_successful_manifest_hash", manifest_hash) else w.nullField("previous_successful_manifest_hash"); if (snapshot.previous_successful_created_at) |created_at| w.intField("previous_successful_created_at", created_at) else w.nullField("previous_successful_created_at"); + w.uintField("previous_successful_completed_targets", snapshot.previous_successful_completed_targets); + w.uintField("previous_successful_failed_targets", snapshot.previous_successful_failed_targets); + w.uintField("previous_successful_remaining_targets", snapshot.previous_successful_remaining_targets); + if (snapshot.previous_successful_source_release_id) |source_release_id| w.stringField("previous_successful_source_release_id", source_release_id) else w.nullField("previous_successful_source_release_id"); + if (snapshot.previous_successful_message) |message| w.stringField("previous_successful_message", message) else w.nullField("previous_successful_message"); + if (snapshot.message) |message| w.stringField("message", message) else w.nullField("message"); + w.beginObjectField("current_release"); + w.stringField("id", snapshot.release_id); + w.stringField("trigger", snapshot.trigger); + w.stringField("status", snapshot.status); + w.stringField("manifest_hash", snapshot.manifest_hash); + w.intField("created_at", snapshot.created_at); + w.uintField("completed_targets", snapshot.completed_targets); + w.uintField("failed_targets", snapshot.failed_targets); + w.uintField("remaining_targets", snapshot.remaining_targets); + if (snapshot.source_release_id) |source_release_id| w.stringField("source_release_id", source_release_id) else w.nullField("source_release_id"); if (snapshot.message) |message| w.stringField("message", message) else w.nullField("message"); + w.endObject(); + if (snapshot.previous_successful_release_id) |release_id| { + w.beginObjectField("previous_successful_release"); + w.stringField("id", release_id); + w.stringField("trigger", snapshot.previous_successful_trigger orelse "apply"); + w.stringField("status", snapshot.previous_successful_status orelse "completed"); + if (snapshot.previous_successful_manifest_hash) |manifest_hash| w.stringField("manifest_hash", manifest_hash) else w.nullField("manifest_hash"); + if (snapshot.previous_successful_created_at) |created_at| w.intField("created_at", created_at) else w.nullField("created_at"); + w.uintField("completed_targets", snapshot.previous_successful_completed_targets); + w.uintField("failed_targets", snapshot.previous_successful_failed_targets); + w.uintField("remaining_targets", snapshot.previous_successful_remaining_targets); + if (snapshot.previous_successful_source_release_id) |source_release_id| w.stringField("source_release_id", source_release_id) else w.nullField("source_release_id"); + if (snapshot.previous_successful_message) |message| w.stringField("message", message) else w.nullField("message"); + w.endObject(); + } else { + w.nullField("previous_successful_release"); + } + w.beginObjectField("workloads"); + w.uintField("services", snapshot.service_count); + w.uintField("workers", snapshot.worker_count); + w.uintField("crons", snapshot.cron_count); + w.uintField("training_jobs", snapshot.training_job_count); + w.endObject(); + w.beginObjectField("training_runtime"); + w.uintField("active", snapshot.active_training_jobs); + w.uintField("paused", snapshot.paused_training_jobs); + w.uintField("failed", snapshot.failed_training_jobs); + w.endObject(); } fn appStatusFromReports( @@ -518,8 +598,15 @@ fn appStatusFromReports( .remaining_targets = report.remainingTargets(), .source_release_id = report.source_release_id, .previous_successful_release_id = if (previous_successful) |prev| prev.release_id else null, + .previous_successful_trigger = if (previous_successful) |prev| prev.trigger.toString() else null, + .previous_successful_status = if (previous_successful) |prev| prev.status.toString() else null, .previous_successful_manifest_hash = if (previous_successful) |prev| prev.manifest_hash else null, .previous_successful_created_at = if (previous_successful) |prev| prev.created_at else null, + .previous_successful_completed_targets = if (previous_successful) |prev| prev.completed_targets else 0, + .previous_successful_failed_targets = if (previous_successful) |prev| prev.failed_targets else 0, + .previous_successful_remaining_targets = if (previous_successful) |prev| prev.remainingTargets() else 0, + .previous_successful_source_release_id = if (previous_successful) |prev| prev.source_release_id else null, + .previous_successful_message = if (previous_successful) |prev| prev.message else null, .message = report.message, }; } @@ -542,6 +629,23 @@ fn currentAppNameAlloc(alloc: std.mem.Allocator) ![]u8 { return alloc.dupe(u8, std.fs.path.basename(cwd)) catch return StatusError.OutOfMemory; } +fn appMatchesFilters(snapshot: AppStatusSnapshot, filters: AppListFilters) bool { + if (filters.status) |status_filter| { + if (!std.mem.eql(u8, snapshot.status, status_filter)) return false; + } + if (filters.failed_only and !isFailedLikeRollout(snapshot.status)) return false; + if (filters.in_progress_only and !isInProgressRollout(snapshot.status)) return false; + return true; +} + +fn isFailedLikeRollout(status_text: []const u8) bool { + return std.mem.eql(u8, status_text, "failed") or std.mem.eql(u8, status_text, "partially_failed"); +} + +fn isInProgressRollout(status_text: []const u8) bool { + return std.mem.eql(u8, status_text, "pending") or std.mem.eql(u8, status_text, "in_progress"); +} + fn printStatusTable(snapshots: []const monitor.ServiceSnapshot, verbose: bool) void { if (cli.output_mode == .json) { statusJson(snapshots); @@ -756,6 +860,81 @@ test "writeAppStatusJsonObject round-trips through remote parser" { try std.testing.expectEqualStrings(snapshot.message.?, parsed.message.?); } +test "writeAppStatusJsonObject includes nested release and workload views" { + const snapshot = AppStatusSnapshot{ + .app_name = "demo-app", + .trigger = "rollback", + .release_id = "dep-2", + .status = "completed", + .manifest_hash = "sha256:222", + .created_at = 200, + .service_count = 2, + .worker_count = 1, + .cron_count = 2, + .training_job_count = 3, + .active_training_jobs = 1, + .paused_training_jobs = 1, + .failed_training_jobs = 1, + .completed_targets = 1, + .failed_targets = 1, + .remaining_targets = 0, + .source_release_id = "dep-1", + .previous_successful_release_id = "dep-0", + .previous_successful_manifest_hash = "sha256:111", + .previous_successful_created_at = 100, + .message = "all placements healthy", + }; + + var w = json_out.JsonWriter{}; + writeAppStatusJsonObject(&w, snapshot); + const json = w.getWritten(); + + try std.testing.expect(std.mem.indexOf(u8, json, "\"current_release\":{\"id\":\"dep-2\"") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"previous_successful_release\":{\"id\":\"dep-0\"") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"workloads\":{\"services\":2,\"workers\":1,\"crons\":2,\"training_jobs\":3}") != null); + try std.testing.expect(std.mem.indexOf(u8, json, "\"training_runtime\":{\"active\":1,\"paused\":1,\"failed\":1}") != null); +} + +test "appMatchesFilters applies failed and in-progress filters" { + const failed_snapshot = AppStatusSnapshot{ + .app_name = "demo-app", + .trigger = "apply", + .release_id = "dep-1", + .status = "partially_failed", + .manifest_hash = "sha256:111", + .created_at = 100, + .completed_targets = 1, + .failed_targets = 1, + .remaining_targets = 0, + .source_release_id = null, + .previous_successful_release_id = null, + .previous_successful_manifest_hash = null, + .previous_successful_created_at = null, + .message = null, + }; + const pending_snapshot = AppStatusSnapshot{ + .app_name = "demo-app", + .trigger = "apply", + .release_id = "dep-2", + .status = "in_progress", + .manifest_hash = "sha256:222", + .created_at = 200, + .completed_targets = 1, + .failed_targets = 0, + .remaining_targets = 1, + .source_release_id = null, + .previous_successful_release_id = null, + .previous_successful_manifest_hash = null, + .previous_successful_created_at = null, + .message = null, + }; + + try std.testing.expect(appMatchesFilters(failed_snapshot, .{ .failed_only = true })); + try std.testing.expect(!appMatchesFilters(failed_snapshot, .{ .in_progress_only = true })); + try std.testing.expect(appMatchesFilters(pending_snapshot, .{ .in_progress_only = true })); + try std.testing.expect(!appMatchesFilters(pending_snapshot, .{ .status = "completed" })); +} + test "appStatusFromReport preserves partially failed local release state" { const dep = store.DeploymentRecord{ .id = "dep-3", diff --git a/src/state/store.zig b/src/state/store.zig index 181f83f..8e34fcd 100644 --- a/src/state/store.zig +++ b/src/state/store.zig @@ -99,6 +99,8 @@ pub const getLastSuccessfulDeployment = @import("store/deployments.zig").getLast pub const getLastSuccessfulDeploymentByApp = @import("store/deployments.zig").getLastSuccessfulDeploymentByApp; pub const getPreviousSuccessfulDeploymentByApp = @import("store/deployments.zig").getPreviousSuccessfulDeploymentByApp; pub const getPreviousSuccessfulDeploymentByAppInDb = @import("store/deployments.zig").getPreviousSuccessfulDeploymentByAppInDb; +pub const getRollbackTargetDeploymentByApp = @import("store/deployments.zig").getRollbackTargetDeploymentByApp; +pub const getRollbackTargetDeploymentByAppInDb = @import("store/deployments.zig").getRollbackTargetDeploymentByAppInDb; pub const replaceCronSchedulesForApp = @import("store/crons.zig").replaceCronSchedulesForApp; pub const replaceCronSchedulesForAppInDb = @import("store/crons.zig").replaceCronSchedulesForAppInDb; diff --git a/src/state/store/deployments.zig b/src/state/store/deployments.zig index 0382360..b764966 100644 --- a/src/state/store/deployments.zig +++ b/src/state/store/deployments.zig @@ -314,6 +314,35 @@ pub fn getPreviousSuccessfulDeploymentByAppInDb( ); } +pub fn getRollbackTargetDeploymentByApp( + alloc: Allocator, + app_name: []const u8, + explicit_release_id: ?[]const u8, +) StoreError!DeploymentRecord { + const db = try common.getDb(); + return getRollbackTargetDeploymentByAppInDb(db, alloc, app_name, explicit_release_id); +} + +pub fn getRollbackTargetDeploymentByAppInDb( + db: *sqlite.Db, + alloc: Allocator, + app_name: []const u8, + explicit_release_id: ?[]const u8, +) StoreError!DeploymentRecord { + if (explicit_release_id) |release_id| { + const dep = try getDeploymentInDb(db, alloc, release_id); + errdefer dep.deinit(alloc); + if (dep.app_name == null or !std.mem.eql(u8, dep.app_name.?, app_name)) { + return StoreError.NotFound; + } + return dep; + } + + const latest = try getLatestDeploymentByAppInDb(db, alloc, app_name); + defer latest.deinit(alloc); + return getPreviousSuccessfulDeploymentByAppInDb(db, alloc, app_name, latest.id); +} + test "deployment record round-trip via sqlite" { var db = try sqlite.Db.init(.{ .mode = .Memory, .open_flags = .{ .write = true } }); defer db.deinit(); @@ -434,6 +463,75 @@ test "getPreviousSuccessfulDeploymentByAppInDb excludes current release" { try std.testing.expectEqualStrings("completed", previous.status); } +test "getRollbackTargetDeploymentByAppInDb defaults to the previous successful release" { + var db = try sqlite.Db.init(.{ .mode = .Memory, .open_flags = .{ .write = true } }); + defer db.deinit(); + try schema.init(&db); + + try saveDeploymentInDb(&db, .{ + .id = "dep-1", + .app_name = "demo-app", + .service_name = "demo-app", + .trigger = "apply", + .manifest_hash = "sha256:111", + .config_snapshot = "{\"app_name\":\"demo-app\",\"services\":[{\"name\":\"web\",\"image\":\"nginx:1\"}]}", + .status = "completed", + .message = "apply completed", + .created_at = 100, + }); + try saveDeploymentInDb(&db, .{ + .id = "dep-2", + .app_name = "demo-app", + .service_name = "demo-app", + .trigger = "apply", + .manifest_hash = "sha256:222", + .config_snapshot = "{\"app_name\":\"demo-app\",\"services\":[{\"name\":\"web\",\"image\":\"nginx:2\"}]}", + .status = "completed", + .message = "apply completed", + .created_at = 200, + }); + + const target = try getRollbackTargetDeploymentByAppInDb(&db, std.testing.allocator, "demo-app", null); + defer target.deinit(std.testing.allocator); + + try std.testing.expectEqualStrings("dep-1", target.id); +} + +test "getRollbackTargetDeploymentByAppInDb honors an explicit release id" { + var db = try sqlite.Db.init(.{ .mode = .Memory, .open_flags = .{ .write = true } }); + defer db.deinit(); + try schema.init(&db); + + try saveDeploymentInDb(&db, .{ + .id = "dep-1", + .app_name = "demo-app", + .service_name = "demo-app", + .trigger = "apply", + .manifest_hash = "sha256:111", + .config_snapshot = "{}", + .status = "completed", + .message = "apply completed", + .created_at = 100, + }); + try saveDeploymentInDb(&db, .{ + .id = "dep-2", + .app_name = "demo-app", + .service_name = "demo-app", + .trigger = "rollback", + .source_release_id = "dep-1", + .manifest_hash = "sha256:222", + .config_snapshot = "{}", + .status = "completed", + .message = "rollback completed", + .created_at = 200, + }); + + const target = try getRollbackTargetDeploymentByAppInDb(&db, std.testing.allocator, "demo-app", "dep-2"); + defer target.deinit(std.testing.allocator); + + try std.testing.expectEqualStrings("dep-2", target.id); +} + test "listLatestDeploymentsByAppInDb returns one latest row per app" { var db = try sqlite.Db.init(.{ .mode = .Memory, .open_flags = .{ .write = true } }); defer db.deinit(); diff --git a/src/test_root.zig b/src/test_root.zig index 6071f58..0332654 100644 --- a/src/test_root.zig +++ b/src/test_root.zig @@ -81,6 +81,7 @@ comptime { _ = @import("manifest/app_snapshot.zig"); _ = @import("manifest/apply_release.zig"); _ = @import("manifest/local_apply_backend.zig"); + _ = @import("manifest/rollback_snapshot.zig"); _ = @import("manifest/release_plan.zig"); _ = @import("manifest/release_history.zig"); _ = @import("manifest/cli/ops.zig");