From a376644b90626aed53f9feb05df792a22297a87f Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Tue, 23 Jun 2026 15:25:37 -0400 Subject: [PATCH 01/32] feat: add reusable thread lifecycle response pipeline --- docs/architecture.md | 89 +++ docs/development.md | 12 +- .../architecture/model_resolution_test.go | 95 +++ internal/cmd/reviewcmd/reviewcmd.go | 310 +++++++- internal/cmd/reviewcmd/reviewcmd_test.go | 197 ++++- internal/ledger/ledger.go | 264 ++++++- internal/ledger/ledger_test.go | 116 ++- internal/llm/adapter.go | 22 +- internal/llmrun/llmrun.go | 195 +++++ internal/llmrun/llmrun_test.go | 352 +++++++++ internal/plannedactions/plannedactions.go | 110 +++ .../plannedactions/plannedactions_test.go | 50 ++ internal/review/review.go | 51 ++ internal/review/review_test.go | 60 ++ internal/reviewplan/reviewplan.go | 76 ++ internal/reviewplan/reviewplan_test.go | 88 +++ internal/stagemodel/resolver.go | 137 ++++ internal/stagemodel/resolver_test.go | 205 ++++++ internal/threadanalysis/threadanalysis.go | 397 ++++++++++ .../threadanalysis/threadanalysis_test.go | 455 ++++++++++++ internal/threadcontext/threadcontext.go | 348 +++++++++ internal/threadcontext/threadcontext_test.go | 422 +++++++++++ internal/threadrespond/threadrespond.go | 680 ++++++++++++++++++ internal/threadrespond/threadrespond_test.go | 604 ++++++++++++++++ 24 files changed, 5306 insertions(+), 29 deletions(-) create mode 100644 docs/architecture.md create mode 100644 internal/architecture/model_resolution_test.go create mode 100644 internal/llmrun/llmrun.go create mode 100644 internal/llmrun/llmrun_test.go create mode 100644 internal/plannedactions/plannedactions.go create mode 100644 internal/plannedactions/plannedactions_test.go create mode 100644 internal/stagemodel/resolver.go create mode 100644 internal/stagemodel/resolver_test.go create mode 100644 internal/threadanalysis/threadanalysis.go create mode 100644 internal/threadanalysis/threadanalysis_test.go create mode 100644 internal/threadcontext/threadcontext.go create mode 100644 internal/threadcontext/threadcontext_test.go create mode 100644 internal/threadrespond/threadrespond.go create mode 100644 internal/threadrespond/threadrespond_test.go diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 00000000..f7c9a11c --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,89 @@ +# Architecture Decisions + +This document records review-pipeline boundaries that are intended to stay +stable as the implementation evolves. + +## LLM Execution Boundary + +Every LLM action must flow through one durable execution boundary. Callers +should describe the stage, scope, prompt input, structured output contract, and +model requirements; the runner owns provider invocation, retries, resume checks, +telemetry, and persistence. + +The durable runner must do a pre-flight lookup before calling the provider. If +a completed matching step already exists, it should reuse the stored structured +result. After a provider call succeeds, the runner must persist the structured +result and metadata before returning it to downstream code. Metadata includes at +least provider, adapter, model, effort, started/completed timestamps, duration, +token usage, cost, and provider session identifiers when available. + +New LLM-backed pipeline components should not call provider adapters directly. +They should receive a fakeable runner interface in unit tests and should return +domain results rather than posting comments or mutating provider state. + +## Stage Model Resolution + +Runtime model choice must be resolved through `internal/stagemodel`. Code that +executes an LLM stage must not hard-code model IDs and must not call +`config.ResolveModelTier` directly. + +`stagemodel.ResolveStageModel` is the single runtime path from profile +preferences and command overrides to a concrete model and effort. The request +must include the named stage, requested tier, default effort, and any explicit +operator override. The resolver applies user profile `llm.model_map` values, +built-in provider defaults, and configured tier floors before returning the +concrete runtime choice. + +This boundary exists so model catalog data, provider capabilities, token costs, +and profile-level tier floors can be added without touching individual review +stages. Runtime hard-coding bypasses user preference and is a bug. + +The direct `config.ResolveModelTier` exception is config inspection and the +resolver implementation itself. The architecture guardrail is enforced by +`internal/architecture/model_resolution_test.go`. + +## Git Provider Writes + +Provider writes have one durable path: planned actions in the ledger followed +by outbox execution. Commands and domain analyzers should not post comments, +reply to review threads, resolve threads, submit reviews, or mutate provider +state directly. + +This keeps markers, retries, reconciliation, idempotency, and resume behavior in +one place. New commands such as `cr respond` should produce planned thread +actions and let the reviewplan/ledger/outbox flow perform provider writes. + +## Inline Thread Lifecycle + +Inline PR discussion threads are domain input, not provider-specific prompt +data. The intended decomposition is: + +- `internal/threadcontext` normalizes `gitprovider.InlineThread`, detects + codereview-authored finding threads, detects latest human replies, strips + shared markers, collapses resolved threads to the latest durable summary, and + produces file-scoped reviewer context. +- `internal/threadanalysis` accepts normalized thread context and returns + reusable domain decisions: thread ID, decision, reply body, summary, resolve + flag, and rationale. + +Resolved inline threads should not be reprocessed as full conversations on +every review. Their durable context is the latest summary comment. Reviewer +prompts should receive compact file-scoped summaries so agents avoid re-raising +issues that have already been discussed and resolved. + +`cr review` and `cr respond` should share the same normalization, filtering, +model resolution, LLM execution, and action-planning components. `cr respond` +is a command-shaped reuse of the thread-response portion of the review +pipeline, not a separate posting system. + +## Retention And Cleanup + +Durable LLM steps, thread-analysis results, and artifacts must be owned by a +run and must be safe to delete through the existing data lifecycle commands. +Database rows should reference `runs(run_id)` with cascade delete semantics, and +large artifacts should live under the run artifact directory. + +When the retention window elapses, normal prune/GC commands should remove these +results along with the rest of the run. If a user deletes retained state, a +future review may need to spend time and tokens recreating it; that is the +expected tradeoff for user-controlled local data retention. diff --git a/docs/development.md b/docs/development.md index 2bbf9a2a..d186461c 100644 --- a/docs/development.md +++ b/docs/development.md @@ -13,8 +13,13 @@ session management, and local data lifecycle commands. The current Go code is a Cobra command tree in `internal/cmd/*` with a thin `cmd/cr` entrypoint, shared exit-code mapping in `internal/cmd/exitcode`, and version plumbing in `internal/version`. Review orchestration is split across -`internal/pipeline`, `internal/reviewrun`, `internal/reviewplan`, -`internal/outbox`, `internal/gate`, and `internal/gateio`. +`internal/pipeline`, `internal/reviewrun`, `internal/threadrespond`, +`internal/reviewplan`, `internal/outbox`, `internal/gate`, and +`internal/gateio`. + +Architecture guardrails for LLM execution, model resolution, Git provider +writes, inline thread lifecycle, and retention live in +[`docs/architecture.md`](architecture.md). Within `internal/pipeline`, the public entry points are `DryRun`, `Live`, and `SelectionOnly`. `DryRun` and `Live` execute the full review pipeline, while @@ -80,7 +85,8 @@ make clean # remove build artifacts state/config adapters in `internal/config`, `internal/ledger`, and `internal/statepaths`, provider/LLM adapters in their owning packages, and review posting/gating in `internal/outbox`, `internal/gate`, and - `internal/gateio`. + `internal/gateio`, and response-only inline discussion handling in + `internal/threadrespond`. ## Interactive Init Notes diff --git a/internal/architecture/model_resolution_test.go b/internal/architecture/model_resolution_test.go new file mode 100644 index 00000000..17314b8c --- /dev/null +++ b/internal/architecture/model_resolution_test.go @@ -0,0 +1,95 @@ +package architecture_test + +import ( + "go/ast" + "go/parser" + "go/token" + "io/fs" + "path/filepath" + "strings" + "testing" +) + +func TestRuntimeModelResolutionGoesThroughStageResolver(t *testing.T) { + repoRoot := repoRootFromTest(t) + allowedDirs := []string{ + "internal/config", + "internal/stagemodel", + "internal/cmd/configcmd", + } + + fset := token.NewFileSet() + err := filepath.WalkDir(repoRoot, func(path string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() { + switch entry.Name() { + case ".git", "dist", "vendor": + return filepath.SkipDir + default: + return nil + } + } + if filepath.Ext(path) != ".go" || strings.HasSuffix(path, "_test.go") { + return nil + } + rel := filepath.ToSlash(mustRel(t, repoRoot, path)) + if pathInAllowedDirs(rel, allowedDirs) { + return nil + } + + parsed, err := parser.ParseFile(fset, path, nil, 0) + if err != nil { + return err + } + ast.Inspect(parsed, func(node ast.Node) bool { + call, ok := node.(*ast.CallExpr) + if !ok { + return true + } + selector, ok := call.Fun.(*ast.SelectorExpr) + if !ok || selector.Sel.Name != "ResolveModelTier" { + return true + } + ident, ok := selector.X.(*ast.Ident) + if !ok || ident.Name != "config" { + return true + } + pos := fset.Position(selector.Pos()) + t.Fatalf("%s calls config.ResolveModelTier directly; runtime model selection must use internal/stagemodel", pos) + return false + }) + return nil + }) + if err != nil { + t.Fatalf("WalkDir(%s): %v", repoRoot, err) + } +} + +func repoRootFromTest(t *testing.T) string { + t.Helper() + dir, err := filepath.Abs(".") + if err != nil { + t.Fatalf("Abs(.): %v", err) + } + return filepath.Clean(filepath.Join(dir, "..", "..")) +} + +func mustRel(t *testing.T, base, target string) string { + t.Helper() + rel, err := filepath.Rel(base, target) + if err != nil { + t.Fatalf("Rel(%s, %s): %v", base, target, err) + } + return rel +} + +func pathInAllowedDirs(path string, allowedDirs []string) bool { + for _, dir := range allowedDirs { + if path == dir || strings.HasPrefix(path, dir+"/") { + return true + } + } + return false +} diff --git a/internal/cmd/reviewcmd/reviewcmd.go b/internal/cmd/reviewcmd/reviewcmd.go index 38cbb12e..1819dc80 100644 --- a/internal/cmd/reviewcmd/reviewcmd.go +++ b/internal/cmd/reviewcmd/reviewcmd.go @@ -37,7 +37,9 @@ import ( "github.com/open-cli-collective/codereview-cli/internal/review" "github.com/open-cli-collective/codereview-cli/internal/reviewplan" "github.com/open-cli-collective/codereview-cli/internal/reviewrun" + "github.com/open-cli-collective/codereview-cli/internal/stagemodel" "github.com/open-cli-collective/codereview-cli/internal/statepaths" + "github.com/open-cli-collective/codereview-cli/internal/threadrespond" "github.com/open-cli-collective/codereview-cli/internal/version" "github.com/open-cli-collective/codereview-cli/internal/view" ) @@ -69,9 +71,15 @@ type Runner interface { Live(context.Context, pipeline.Request, reviewrun.Flags) (reviewrun.Result, error) } +// ResponseRunner executes response-only thread lifecycle runs. +type ResponseRunner interface { + Respond(context.Context, threadrespond.Request) (threadrespond.Result, error) +} + // Runtime contains per-command dependencies that need cleanup after a run. type Runtime struct { Runner Runner + Responder ResponseRunner PostingIdentity gitprovider.Identity Cleanup func() } @@ -124,6 +132,14 @@ type commandFlags struct { noResolveThreads bool } +type respondFlags struct { + dryRun bool + noPost bool + retryPosts string + jsonOutput bool + noResolveThreads bool +} + // Register attaches the review command to rootCmd. func Register(rootCmd *cobra.Command, opts *root.Options) { RegisterWithFactory(rootCmd, opts, newRuntime) @@ -169,6 +185,30 @@ func RegisterWithFactory(rootCmd *cobra.Command, opts *root.Options, factory Run cmd.Flags().BoolVar(&flags.allowSelfApprove, "allow-self-approve", false, "Allow approval when posting identity is the PR author") cmd.Flags().BoolVar(&flags.noResolveThreads, "no-resolve-threads", false, "Do not plan thread-resolution actions") rootCmd.AddCommand(cmd) + rootCmd.AddCommand(newRespondCommand(opts, factory)) +} + +func newRespondCommand(opts *root.Options, factory RuntimeFactory) *cobra.Command { + var flags respondFlags + cmd := &cobra.Command{ + Use: "respond ", + Short: "Respond to unresolved codereview inline discussion threads", + Args: func(_ *cobra.Command, args []string) error { + if len(args) != 1 { + return exitcode.Usage(fmt.Errorf("respond requires one PR URL")) + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + return runRespond(cmd.Context(), cmd, opts, factory, flags, args[0]) + }, + } + cmd.Flags().BoolVar(&flags.dryRun, "dry-run", false, "Plan thread responses without posting") + cmd.Flags().BoolVar(&flags.noPost, "no-post", false, "Alias for --dry-run") + cmd.Flags().StringVar(&flags.retryPosts, "retry-posts", "", "Retry missing or failed required posts for the given response run ID") + cmd.Flags().BoolVar(&flags.jsonOutput, "json", false, "Emit JSON") + cmd.Flags().BoolVar(&flags.noResolveThreads, "no-resolve-threads", false, "Do not plan thread-resolution actions") + return cmd } func runReview(ctx context.Context, cmd *cobra.Command, opts *root.Options, factory RuntimeFactory, flags commandFlags, prArg string) error { @@ -377,6 +417,84 @@ func runReview(ctx context.Context, cmd *cobra.Command, opts *root.Options, fact return nil } +func runRespond(ctx context.Context, cmd *cobra.Command, opts *root.Options, factory RuntimeFactory, flags respondFlags, prArg string) error { + if flags.noPost { + flags.dryRun = true + } + retryRunID := strings.TrimSpace(flags.retryPosts) + if cmd.Flags().Changed("retry-posts") && retryRunID == "" { + return exitcode.Usage(fmt.Errorf("--retry-posts must be a non-empty run ID")) + } + if retryRunID != "" && flags.dryRun { + return exitcode.Usage(fmt.Errorf("--retry-posts cannot be used with --dry-run or --no-post")) + } + path, err := configPath(opts) + if err != nil { + return exitcode.AuthConfig(err) + } + cfg, err := config.Load(path) + if err != nil { + return cmderr.Config(err) + } + ref, err := prref.ParseGitHubPullURL(prArg) + if err != nil { + return exitcode.Usage(err) + } + profileName, profile, err := config.ResolveProfileForRepository(cfg, opts.Profile, root.ProfileFlagChanged(cmd), config.RepositoryTarget{ + Host: ref.Host, + Namespace: ref.Owner, + Repo: ref.Repo, + }) + if err != nil { + return cmderr.Config(err) + } + if !prref.SameHost(ref.Host, profile.Git.Host) { + return exitcode.Usage(fmt.Errorf("PR host %q must match configured git host %q", ref.Host, profile.Git.Host)) + } + runtime, err := factory(cmd, opts, cfg, profile, RuntimeOptions{ + PRRef: ref, + Retention: retentionPolicyFromConfig(cfg.Data.Retention), + RetentionManualOnly: cfg.Data.Retention.Enforcement == config.RetentionManualOnly, + }) + if err != nil { + return err + } + if runtime.Cleanup != nil { + defer runtime.Cleanup() + } + if runtime.Responder == nil { + return fmt.Errorf("respond: runtime responder is required") + } + noResolve := flags.noResolveThreads || profile.ReviewPolicy.ResolveThreads == config.ResolveThreadsNever + result, err := runtime.Responder.Respond(ctx, threadrespond.Request{ + PRRef: ref, + PRURL: prArg, + ProfileName: profileName, + Profile: profile, + PostingIdentity: runtime.PostingIdentity, + DryRun: flags.dryRun, + NoResolveThreads: noResolve, + RetryRunID: retryRunID, + }) + if err != nil { + return mapRunError(err) + } + rendered := newRespondResult(result) + if flags.jsonOutput { + if err := renderRespondJSON(opts.Stdout, rendered); err != nil { + return err + } + } else { + if err := renderRespondText(opts.Stdout, rendered); err != nil { + return err + } + } + if result.ExitCode != exitcode.Success { + return exitcode.With(result.ExitCode, respondResultError(result)) + } + return nil +} + func validModelEffort(value string) bool { return modelprefs.Effort(value).Valid() } @@ -614,6 +732,144 @@ func newReviewLive(result reviewrun.Result) view.ReviewLive { return rendered } +type respondRendered struct { + Run view.ReviewRun `json:"run"` + Counts respondCounts `json:"counts"` + Outbox view.ReviewOutbox `json:"outbox"` + Message string `json:"message,omitempty"` +} + +type respondCounts struct { + Considered int `json:"considered"` + Responded int `json:"responded"` + Resolved int `json:"resolved"` + Planned int `json:"planned"` +} + +func newRespondResult(result threadrespond.Result) respondRendered { + outcome := result.Outbox.Outcome.String() + if outcome == "" && result.Run.Outcome != nil { + outcome = result.Run.Outcome.String() + } + outboxExitCode := result.Outbox.ExitCode + if outboxExitCode == 0 && result.ExitCode != 0 { + outboxExitCode = result.ExitCode + } + responded := countThreadReplies(result.Plan.Actions) + resolved := countThreadResolves(result.Plan.Actions) + if responded == 0 && resolved == 0 && len(result.PlannedActions) > 0 { + responded = countPlannedThreadReplies(result.PlannedActions) + resolved = countPlannedThreadResolves(result.PlannedActions) + } + return respondRendered{ + Run: view.ReviewRun{ + RunID: result.Run.RunID, + PRURL: result.PR.URL, + PRKey: result.PRKey, + PostMode: result.Run.PostMode.String(), + Outcome: outcome, + ArtifactPath: result.Run.ArtifactPath, + BaseSHA: result.Run.BaseSHA, + HeadSHA: result.Run.SHA, + }, + Counts: respondCounts{ + Considered: len(result.EligibleThreads), + Responded: responded, + Resolved: resolved, + Planned: len(result.PlannedActions), + }, + Outbox: view.ReviewOutbox{ + Outcome: outcome, + ExitCode: outboxExitCode, + Posted: result.Outbox.Posted, + Pending: result.Outbox.Pending, + FailedTerminal: result.Outbox.FailedTerminal, + Aborted: result.Outbox.Aborted, + }, + Message: result.Message, + } +} + +func renderRespondJSON(w io.Writer, rendered respondRendered) error { + encoder := json.NewEncoder(w) + encoder.SetIndent("", " ") + return encoder.Encode(rendered) +} + +func renderRespondText(w io.Writer, rendered respondRendered) error { + if _, err := fmt.Fprintf(w, "Run: %s\n", rendered.Run.RunID); err != nil { + return err + } + if rendered.Run.PRURL != "" { + if _, err := fmt.Fprintf(w, "PR: %s\n", rendered.Run.PRURL); err != nil { + return err + } + } + if _, err := fmt.Fprintf(w, "Outcome: %s\n", rendered.Run.Outcome); err != nil { + return err + } + if _, err := fmt.Fprintf(w, "Threads: considered %d, responded %d, resolved %d\n", rendered.Counts.Considered, rendered.Counts.Responded, rendered.Counts.Resolved); err != nil { + return err + } + if _, err := fmt.Fprintf(w, "Planned actions: %d\n", rendered.Counts.Planned); err != nil { + return err + } + if _, err := fmt.Fprintf(w, "Outbox: posted %d, pending %d, failed %d\n", rendered.Outbox.Posted, rendered.Outbox.Pending, rendered.Outbox.FailedTerminal); err != nil { + return err + } + if rendered.Run.ArtifactPath != "" { + if _, err := fmt.Fprintf(w, "Artifacts: %s\n", rendered.Run.ArtifactPath); err != nil { + return err + } + } + if strings.TrimSpace(rendered.Message) != "" { + if _, err := fmt.Fprintf(w, "Message: %s\n", rendered.Message); err != nil { + return err + } + } + return nil +} + +func countThreadReplies(actions []reviewplan.Action) int { + var count int + for _, action := range actions { + if action.Kind == reviewplan.ActionKindThreadReply { + count++ + } + } + return count +} + +func countThreadResolves(actions []reviewplan.Action) int { + var count int + for _, action := range actions { + if action.Kind == reviewplan.ActionKindResolveThread { + count++ + } + } + return count +} + +func countPlannedThreadReplies(actions []ledger.PlannedAction) int { + var count int + for _, action := range actions { + if action.Kind == ledger.PlannedActionThreadReply { + count++ + } + } + return count +} + +func countPlannedThreadResolves(actions []ledger.PlannedAction) int { + var count int + for _, action := range actions { + if action.Kind == ledger.PlannedActionResolveThread { + count++ + } + } + return count +} + func liveResultError(result reviewrun.Result) error { if strings.TrimSpace(result.Message) != "" { return errors.New(result.Message) @@ -624,6 +880,16 @@ func liveResultError(result reviewrun.Result) error { return fmt.Errorf("live review did not complete") } +func respondResultError(result threadrespond.Result) error { + if strings.TrimSpace(result.Message) != "" { + return errors.New(result.Message) + } + if result.Outbox.Outcome != "" { + return fmt.Errorf("respond completed with outcome %s", result.Outbox.Outcome) + } + return fmt.Errorf("respond did not complete") +} + func viewFinding(finding reviewplan.AnchoredFinding) view.ReviewFinding { out := view.ReviewFinding{ ID: finding.FindingID.String(), @@ -773,6 +1039,7 @@ func newRuntime(cmd *cobra.Command, opts *root.Options, cfg config.File, profile runner := buildReviewRunner(ledgerStore, provider, adapter, profile, limiter, layout, opts.Stderr, logger, runtimeOpts) return Runtime{ Runner: runner, + Responder: runner, PostingIdentity: postingIdentity, Cleanup: cleanup, }, nil @@ -902,6 +1169,15 @@ func buildReviewRunner(ledgerStore *ledger.Store, provider gitprovider.GitProvid Retention: runtimeOpts.Retention, RetentionManualOnly: runtimeOpts.RetentionManualOnly, }, + respond: threadrespond.Options{ + Store: ledgerStore, + Provider: provider, + Adapter: adapter, + Limiter: limiter, + Layout: layout, + NewActionID: pipelineOpts.NewActionID, + NewStepID: pipelineOpts.NewLLMStepID, + }, } } @@ -944,15 +1220,18 @@ func (c *lazyApprovalOverrideClassifier) get() (approvaloverride.Classifier, boo c.disabled = true return nil, false } - if resolved, ok := config.ResolveModelTier(c.profile.LLM, config.ModelTierSmall); ok { - c.classifier = approvaloverride.NewLLMClassifier(c.adapter, resolved.Model, "low") - return c.classifier, true - } - if resolved, ok := config.ResolveModelTier(c.profile.LLM, config.ModelTierMedium); ok { + resolved, ok := stagemodel.ResolveFirstAvailable(stagemodel.Request{ + Profile: c.profile, + Stage: stagemodel.StageApprovalOverride, + DefaultEffort: "low", + }, config.ModelTierSmall, config.ModelTierMedium) + if ok { if c.warnings != nil { - _, _ = fmt.Fprintf(c.warnings, "warning: approval override classifier small model is not configured; falling back to medium tier model %s\n", resolved.Model) + if resolved.Tier == config.ModelTierMedium { + _, _ = fmt.Fprintf(c.warnings, "warning: approval override classifier small model is not configured; falling back to medium tier model %s\n", resolved.Model) + } } - c.classifier = approvaloverride.NewLLMClassifier(c.adapter, resolved.Model, "low") + c.classifier = approvaloverride.NewLLMClassifier(c.adapter, resolved.Model, resolved.Effort) return c.classifier, true } if c.warnings != nil { @@ -1062,6 +1341,7 @@ func retentionPolicyFromConfig(retention config.RetentionConfig) datalifecycle.R type reviewRunner struct { pipeline pipeline.Options live reviewrun.Options + respond threadrespond.Options } func (r reviewRunner) DryRun(ctx context.Context, req pipeline.Request) (pipeline.Result, error) { @@ -1072,6 +1352,22 @@ func (r reviewRunner) Live(ctx context.Context, req pipeline.Request, flags revi return reviewrun.Run(ctx, r.live, reviewrun.Request{Pipeline: req, Flags: flags}) } +func (r reviewRunner) Respond(ctx context.Context, req threadrespond.Request) (threadrespond.Result, error) { + opts := r.respond + if opts.Acquire == nil && r.live.Acquire != nil { + opts.Acquire = func(path string) (threadrespond.Lock, error) { + return r.live.Acquire(path) + } + } + if opts.Now == nil { + opts.Now = r.live.Now + } + if opts.NewRunID == nil { + opts.NewRunID = r.live.NewRunID + } + return threadrespond.Run(ctx, opts, req) +} + type livePlanner struct { opts pipeline.Options } diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index 0a80c5d1..9b983b75 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -39,6 +39,8 @@ import ( "github.com/open-cli-collective/codereview-cli/internal/reviewplan" "github.com/open-cli-collective/codereview-cli/internal/reviewrun" "github.com/open-cli-collective/codereview-cli/internal/statepaths" + "github.com/open-cli-collective/codereview-cli/internal/threadcontext" + "github.com/open-cli-collective/codereview-cli/internal/threadrespond" "github.com/open-cli-collective/codereview-cli/internal/view" ) @@ -99,6 +101,117 @@ func TestReviewDryRunCallsRunnerAndRendersText(t *testing.T) { } } +func TestRespondDryRunCallsResponderAndRendersText(t *testing.T) { + runner := &fakeRunner{respondResult: testThreadRespondResult(ledger.OutcomeDryRun)} + var cleanupCalled bool + cmd, out := newTestCommand(t, testConfig(), func(_ *cobra.Command, _ *root.Options, _ config.File, _ config.Profile, _ RuntimeOptions) (Runtime, error) { + return Runtime{ + Runner: runner, + Responder: runner, + PostingIdentity: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}, + Cleanup: func() { cleanupCalled = true }, + }, nil + }) + + err := root.Execute(cmd, []string{ + "respond", "https://github.com/open-cli-collective/codereview-cli/pull/29", + "--dry-run", + "--no-resolve-threads", + }) + if err != nil { + t.Fatalf("Execute respond: %v", err) + } + if len(runner.respondRequests) != 1 { + t.Fatalf("respond calls = %d, want 1", len(runner.respondRequests)) + } + req := runner.respondRequests[0] + if req.PRRef.Number != 29 || req.ProfileName != "home" || req.PostingIdentity.Login != "review-bot" { + t.Fatalf("respond request identity/ref = %#v", req) + } + if !req.DryRun || !req.NoResolveThreads { + t.Fatalf("respond request flags = %#v, want dry-run no-resolve", req) + } + if req.RetryRunID != "" { + t.Fatalf("RetryRunID = %q, want empty", req.RetryRunID) + } + if !cleanupCalled { + t.Fatal("runtime cleanup was not called") + } + text := out.String() + if !strings.Contains(text, "Threads: considered 1, responded 1, resolved 1") || !strings.Contains(text, "Planned actions: 2") { + t.Fatalf("stdout = %q, want respond summary", text) + } +} + +func TestRespondRetryPostsFlagCallsResponder(t *testing.T) { + result := testThreadRespondResult(ledger.OutcomeComment) + result.Plan = reviewplan.Plan{} + runner := &fakeRunner{respondResult: result} + cmd, out := newTestCommand(t, testConfig(), fakeFactory(runner)) + + err := root.Execute(cmd, []string{ + "respond", "https://github.com/open-cli-collective/codereview-cli/pull/29", + "--retry-posts", "run-123", + }) + if err != nil { + t.Fatalf("Execute respond retry: %v", err) + } + if len(runner.respondRequests) != 1 { + t.Fatalf("respond calls = %d, want 1", len(runner.respondRequests)) + } + if got := runner.respondRequests[0].RetryRunID; got != "run-123" { + t.Fatalf("RetryRunID = %q, want run-123", got) + } + if runner.respondRequests[0].DryRun { + t.Fatalf("respond retry request = %#v, want live retry", runner.respondRequests[0]) + } + if text := out.String(); !strings.Contains(text, "responded 1, resolved 1") || !strings.Contains(text, "Planned actions: 2") { + t.Fatalf("stdout = %q, want retry counts from planned actions", text) + } +} + +func TestRespondJSONRendersStableShape(t *testing.T) { + runner := &fakeRunner{respondResult: testThreadRespondResult(ledger.OutcomeComment)} + cmd, out := newTestCommand(t, testConfig(), fakeFactory(runner)) + + if err := root.Execute(cmd, []string{"respond", "https://github.com/open-cli-collective/codereview-cli/pull/29", "--json"}); err != nil { + t.Fatalf("Execute respond json: %v", err) + } + var decoded struct { + Run struct { + RunID string `json:"run_id"` + Outcome string `json:"outcome"` + } `json:"run"` + Counts respondCounts `json:"counts"` + Outbox struct { + Posted int `json:"posted"` + } `json:"outbox"` + } + if err := json.Unmarshal(out.Bytes(), &decoded); err != nil { + t.Fatalf("Unmarshal stdout: %v\n%s", err, out.String()) + } + if decoded.Run.RunID != "respond-run-1" || decoded.Run.Outcome != "comment" || decoded.Counts.Responded != 1 || decoded.Outbox.Posted != 2 { + t.Fatalf("decoded json = %#v, want response summary", decoded) + } +} + +func TestRespondRejectsRetryPostsWithDryRun(t *testing.T) { + runner := &fakeRunner{respondResult: testThreadRespondResult(ledger.OutcomeComment)} + cmd, _ := newTestCommand(t, testConfig(), fakeFactory(runner)) + + err := root.Execute(cmd, []string{ + "respond", "https://github.com/open-cli-collective/codereview-cli/pull/29", + "--retry-posts", "run-123", + "--dry-run", + }) + if err == nil || !strings.Contains(err.Error(), "--retry-posts cannot be used") { + t.Fatalf("Execute error = %v, want retry/dry-run usage", err) + } + if len(runner.respondRequests) != 0 { + t.Fatalf("respond calls = %d, want none", len(runner.respondRequests)) + } +} + func TestReviewUsesRepositoryProfileRoute(t *testing.T) { cfg := testConfig() work := cfg.Profiles["home"] @@ -2387,13 +2500,16 @@ func TestReviewMapsUnsafeAgentSourceError(t *testing.T) { } type fakeRunner struct { - result pipeline.Result - err error - requests []pipeline.Request - liveResult reviewrun.Result - liveErr error - liveRequests []pipeline.Request - liveFlags []reviewrun.Flags + result pipeline.Result + err error + requests []pipeline.Request + liveResult reviewrun.Result + liveErr error + liveRequests []pipeline.Request + liveFlags []reviewrun.Flags + respondResult threadrespond.Result + respondErr error + respondRequests []threadrespond.Request } type noopLimiter struct{} @@ -2417,9 +2533,17 @@ func (r *fakeRunner) Live(_ context.Context, req pipeline.Request, flags reviewr return r.liveResult, nil } +func (r *fakeRunner) Respond(_ context.Context, req threadrespond.Request) (threadrespond.Result, error) { + r.respondRequests = append(r.respondRequests, req) + if r.respondErr != nil { + return threadrespond.Result{}, r.respondErr + } + return r.respondResult, nil +} + func fakeFactory(runner *fakeRunner) RuntimeFactory { return func(*cobra.Command, *root.Options, config.File, config.Profile, RuntimeOptions) (Runtime, error) { - return Runtime{Runner: runner, PostingIdentity: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}}, nil + return Runtime{Runner: runner, Responder: runner, PostingIdentity: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}}, nil } } @@ -2589,6 +2713,63 @@ func testPipelineResult(failOnTriggered bool) pipeline.Result { } } +func testThreadRespondResult(outcome ledger.Outcome) threadrespond.Result { + ref := gitprovider.PRRef{Host: "github.com", Owner: "open-cli-collective", Repo: "codereview-cli", Number: 29} + now := time.Date(2026, 6, 1, 12, 0, 0, 0, time.UTC) + return threadrespond.Result{ + Run: ledger.Run{ + RunID: "respond-run-1", + PRKey: "github.com_open-cli-collective_codereview-cli_29", + SHA: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + BaseSHA: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + PostMode: ledger.PostModeLive, + PostingIdentity: "review-bot", + ArtifactPath: "/tmp/respond-run-1", + Outcome: &outcome, + }, + PR: gitprovider.PR{ + Ref: ref, + Title: "CR respond", + URL: "https://github.com/open-cli-collective/codereview-cli/pull/29", + }, + PRKey: "github.com_open-cli-collective_codereview-cli_29", + EligibleThreads: []threadcontext.Thread{{ + ID: "thread-1", + }}, + Plan: reviewplan.Plan{ + Outcome: reviewplan.OutcomeComment, + Actions: []reviewplan.Action{ + { + ActionID: "thread_reply-1", + Kind: reviewplan.ActionKindThreadReply, + ThreadID: "thread-1", + PlannedAt: now, + Status: reviewplan.ActionStatusPending, + Required: true, + ThreadReply: &reviewplan.ThreadReplyPayload{ + Body: "Summary", + Summary: true, + }, + }, + { + ActionID: "resolve_thread-1", + Kind: reviewplan.ActionKindResolveThread, + ThreadID: "thread-1", + PlannedAt: now, + Status: reviewplan.ActionStatusPending, + Required: true, + ResolveThread: &reviewplan.ResolveThreadPayload{}, + }, + }, + }, + PlannedActions: []ledger.PlannedAction{ + {ActionID: "thread_reply-1", RunID: "respond-run-1", Kind: ledger.PlannedActionThreadReply, Status: ledger.PlannedActionPending, Required: true}, + {ActionID: "resolve_thread-1", RunID: "respond-run-1", Kind: ledger.PlannedActionResolveThread, Status: ledger.PlannedActionPending, Required: true}, + }, + Outbox: outbox.Result{Outcome: outcome, ExitCode: 0, Posted: 2}, + } +} + func testLiveResult(failOnTriggered bool) reviewrun.Result { ref := gitprovider.PRRef{Host: "github.com", Owner: "open-cli-collective", Repo: "codereview-cli", Number: 29} return reviewrun.Result{ diff --git a/internal/ledger/ledger.go b/internal/ledger/ledger.go index c8df57cb..eb519998 100644 --- a/internal/ledger/ledger.go +++ b/internal/ledger/ledger.go @@ -21,7 +21,7 @@ import ( const ( // SchemaVersion is the current ledger schema version. - SchemaVersion = 2 + SchemaVersion = 3 // DefaultBusyTimeout is the SQLite busy timeout configured at open. DefaultBusyTimeout = 5 * time.Second writeQueueSize = 64 @@ -292,6 +292,66 @@ type Session struct { CostUSD *float64 } +// LLMStepStatus records durable LLM step completion state. +type LLMStepStatus string + +// LLM step statuses. +const ( + LLMStepStatusCompleted LLMStepStatus = "completed" + LLMStepStatusFailed LLMStepStatus = "failed" +) + +func (s LLMStepStatus) String() string { return string(s) } + +// Valid reports whether s is a known LLM step status. +func (s LLMStepStatus) Valid() bool { + switch s { + case LLMStepStatusCompleted, LLMStepStatusFailed: + return true + default: + return false + } +} + +// LLMStep records one durable structured LLM execution step. +type LLMStep struct { + StepID string + RunID string + Stage string + ScopeKey string + InputHash string + PromptHash string + Provider string + Adapter string + Model string + Effort string + ProviderSessionID string + Status LLMStepStatus + StructuredOutputJSON string + StartedAt time.Time + CompletedAt *time.Time + DurationMS *int64 + TokensIn *int64 + TokensOut *int64 + CacheRead *int64 + CacheCreate *int64 + CostUSD *float64 + Error *string +} + +// LLMStepLookup identifies a reusable completed LLM step. +type LLMStepLookup struct { + RunID string + Stage string + ScopeKey string + InputHash string + PromptHash string + Provider string + Adapter string + Model string + Effort string +} + // Finding records one harness-assigned review finding. type Finding struct { FindingID review.FindingID @@ -465,6 +525,18 @@ func migrations() []dbmig.Migration { return err }, }, + { + Version: 3, + Name: "durable llm steps", + Up: func(ctx context.Context, tx *sql.Tx) error { + for _, statement := range llmStepSchemaStatements { + if _, err := tx.ExecContext(ctx, statement); err != nil { + return err + } + } + return nil + }, + }, } } @@ -817,6 +889,69 @@ FROM sessions WHERE run_id = ? ORDER BY session_row_id`, runID) return sessions, nil } +// InsertLLMStep inserts a durable LLM execution step row. +func (s *Store) InsertLLMStep(ctx context.Context, step LLMStep) error { + if err := validateLLMStep(step); err != nil { + return err + } + return s.write(ctx, func(ctx context.Context, db *sql.DB) error { + _, err := db.ExecContext(ctx, ` +INSERT INTO llm_steps ( + step_id, run_id, stage, scope_key, input_hash, prompt_hash, provider, adapter, model, effort, + provider_session_id, status, structured_output_json, started_at, completed_at, duration_ms, + tokens_in, tokens_out, cache_read, cache_create, cost_usd, error +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + step.StepID, step.RunID, step.Stage, step.ScopeKey, step.InputHash, step.PromptHash, + step.Provider, step.Adapter, step.Model, step.Effort, step.ProviderSessionID, + step.Status.String(), step.StructuredOutputJSON, encodeTime(step.StartedAt), + encodeOptionalTime(step.CompletedAt), step.DurationMS, step.TokensIn, step.TokensOut, + step.CacheRead, step.CacheCreate, step.CostUSD, step.Error, + ) + if err != nil { + return fmt.Errorf("ledger: insert llm step: %w", err) + } + return nil + }) +} + +// FindCompletedLLMStep returns a matching completed LLM step. +func (s *Store) FindCompletedLLMStep(ctx context.Context, lookup LLMStepLookup) (LLMStep, error) { + if err := validateLLMStepLookup(lookup); err != nil { + return LLMStep{}, err + } + if err := s.checkOpen(); err != nil { + return LLMStep{}, err + } + row := s.db.QueryRowContext(ctx, ` +SELECT step_id, run_id, stage, scope_key, input_hash, prompt_hash, provider, adapter, model, effort, + provider_session_id, status, structured_output_json, started_at, completed_at, duration_ms, + tokens_in, tokens_out, cache_read, cache_create, cost_usd, error +FROM llm_steps +WHERE run_id = ? + AND stage = ? + AND scope_key = ? + AND input_hash = ? + AND prompt_hash = ? + AND provider = ? + AND adapter = ? + AND model = ? + AND effort = ? + AND status = ? +ORDER BY completed_at DESC, step_id DESC +LIMIT 1`, + lookup.RunID, lookup.Stage, lookup.ScopeKey, lookup.InputHash, lookup.PromptHash, + lookup.Provider, lookup.Adapter, lookup.Model, lookup.Effort, LLMStepStatusCompleted.String(), + ) + step, err := scanLLMStep(row) + if errors.Is(err, sql.ErrNoRows) { + return LLMStep{}, ErrNotFound + } + if err != nil { + return LLMStep{}, fmt.Errorf("ledger: find completed llm step: %w", err) + } + return step, nil +} + // InsertFinding inserts a validated harness finding row. func (s *Store) InsertFinding(ctx context.Context, finding Finding) error { if err := validateFinding(finding); err != nil { @@ -1168,6 +1303,59 @@ func validateSession(session Session) error { return nil } +func validateLLMStep(step LLMStep) error { + for field, value := range map[string]string{ + "step_id": step.StepID, + "run_id": step.RunID, + "stage": step.Stage, + "scope_key": step.ScopeKey, + "input_hash": step.InputHash, + "prompt_hash": step.PromptHash, + "provider": step.Provider, + "adapter": step.Adapter, + "model": step.Model, + "provider_session_id": step.ProviderSessionID, + } { + if strings.TrimSpace(value) == "" { + return invalidInput(field, value) + } + } + if !step.Status.Valid() { + return invalidInput("status", step.Status.String()) + } + if step.Status == LLMStepStatusCompleted && strings.TrimSpace(step.StructuredOutputJSON) == "" { + return invalidInput("structured_output_json", step.StructuredOutputJSON) + } + if step.Status == LLMStepStatusCompleted && step.CompletedAt == nil { + return invalidInput("completed_at", "") + } + if step.Status == LLMStepStatusFailed && (step.Error == nil || strings.TrimSpace(*step.Error) == "") { + return invalidInput("error", "") + } + if step.StartedAt.IsZero() { + return invalidInput("started_at", "") + } + return nil +} + +func validateLLMStepLookup(lookup LLMStepLookup) error { + for field, value := range map[string]string{ + "run_id": lookup.RunID, + "stage": lookup.Stage, + "scope_key": lookup.ScopeKey, + "input_hash": lookup.InputHash, + "prompt_hash": lookup.PromptHash, + "provider": lookup.Provider, + "adapter": lookup.Adapter, + "model": lookup.Model, + } { + if strings.TrimSpace(value) == "" { + return invalidInput(field, value) + } + } + return nil +} + func validateFinding(finding Finding) error { for field, value := range map[string]string{ "finding_id": finding.FindingID.String(), @@ -1353,6 +1541,51 @@ func scanSession(row interface{ Scan(...any) error }) (Session, error) { return session, nil } +func scanLLMStep(row interface{ Scan(...any) error }) (LLMStep, error) { + var ( + step LLMStep + status string + startedAt string + completedAt sql.NullString + durationMS sql.NullInt64 + tokensIn sql.NullInt64 + tokensOut sql.NullInt64 + cacheRead sql.NullInt64 + cacheCreate sql.NullInt64 + costUSD sql.NullFloat64 + errorText sql.NullString + ) + if err := row.Scan( + &step.StepID, &step.RunID, &step.Stage, &step.ScopeKey, &step.InputHash, &step.PromptHash, + &step.Provider, &step.Adapter, &step.Model, &step.Effort, &step.ProviderSessionID, &status, + &step.StructuredOutputJSON, &startedAt, &completedAt, &durationMS, &tokensIn, &tokensOut, + &cacheRead, &cacheCreate, &costUSD, &errorText, + ); err != nil { + return LLMStep{}, err + } + step.Status = LLMStepStatus(normalizeStorageValue(status)) + if !step.Status.Valid() { + return LLMStep{}, invalidInput("status", status) + } + var err error + step.StartedAt, err = parseTime(startedAt) + if err != nil { + return LLMStep{}, err + } + step.CompletedAt, err = timePtrFromNull(completedAt) + if err != nil { + return LLMStep{}, err + } + step.DurationMS = int64PtrFromNull(durationMS) + step.TokensIn = int64PtrFromNull(tokensIn) + step.TokensOut = int64PtrFromNull(tokensOut) + step.CacheRead = int64PtrFromNull(cacheRead) + step.CacheCreate = int64PtrFromNull(cacheCreate) + step.CostUSD = float64PtrFromNull(costUSD) + step.Error = stringPtrFromNull(errorText) + return step, nil +} + func scanFinding(row interface{ Scan(...any) error }) (Finding, error) { var ( finding Finding @@ -1648,3 +1881,32 @@ var schemaStatements = []string{ last_used_at TEXT NOT NULL )`, } + +var llmStepSchemaStatements = []string{ + `CREATE TABLE llm_steps ( + step_id TEXT PRIMARY KEY, + run_id TEXT NOT NULL REFERENCES runs(run_id) ON DELETE CASCADE, + stage TEXT NOT NULL, + scope_key TEXT NOT NULL, + input_hash TEXT NOT NULL, + prompt_hash TEXT NOT NULL, + provider TEXT NOT NULL, + adapter TEXT NOT NULL, + model TEXT NOT NULL, + effort TEXT NOT NULL DEFAULT '', + provider_session_id TEXT NOT NULL, + status TEXT NOT NULL, + structured_output_json TEXT NOT NULL DEFAULT '', + started_at TEXT NOT NULL, + completed_at TEXT, + duration_ms INTEGER, + tokens_in INTEGER, + tokens_out INTEGER, + cache_read INTEGER, + cache_create INTEGER, + cost_usd REAL, + error TEXT +)`, + `CREATE INDEX llm_steps_run ON llm_steps(run_id)`, + `CREATE INDEX llm_steps_completed_lookup ON llm_steps(run_id, stage, scope_key, input_hash, prompt_hash, provider, adapter, model, effort, status)`, +} diff --git a/internal/ledger/ledger_test.go b/internal/ledger/ledger_test.go index 9509f91b..afc62f3d 100644 --- a/internal/ledger/ledger_test.go +++ b/internal/ledger/ledger_test.go @@ -32,12 +32,12 @@ func TestOpenMigratesFreshDatabaseAndAppliesStartupContract(t *testing.T) { t.Fatalf("PRAGMA busy_timeout = %d, want %d", got, DefaultBusyTimeout.Milliseconds()) } - for _, table := range []string{"prs", "runs", "sessions", "findings", "planned_actions", "named_sessions"} { + for _, table := range []string{"prs", "runs", "sessions", "findings", "planned_actions", "named_sessions", "llm_steps"} { if !tableExists(t, store.db, table) { t.Fatalf("table %s does not exist", table) } } - for _, index := range []string{"runs_pr_sha", "runs_resume", "runs_started_at", "sessions_run", "sessions_provider", "findings_run", "planned_actions_run", "planned_actions_status"} { + for _, index := range []string{"runs_pr_sha", "runs_resume", "runs_started_at", "sessions_run", "sessions_provider", "findings_run", "planned_actions_run", "planned_actions_status", "llm_steps_run", "llm_steps_completed_lookup"} { if !indexExists(t, store.db, index) { t.Fatalf("index %s does not exist", index) } @@ -45,6 +45,9 @@ func TestOpenMigratesFreshDatabaseAndAppliesStartupContract(t *testing.T) { if !columnExists(t, store.db, "planned_actions", "failure_class") { t.Fatal("planned_actions.failure_class column does not exist") } + if !tableExists(t, store.db, "llm_steps") { + t.Fatal("llm_steps table does not exist") + } wantResumeIndex := []string{"pr_key", "sha", "base_sha", "profile", "posting_identity", "post_mode", "outcome"} if got := indexColumns(t, store.db, "runs_resume"); !reflect.DeepEqual(got, wantResumeIndex) { t.Fatalf("runs_resume columns = %#v, want %#v", got, wantResumeIndex) @@ -102,6 +105,7 @@ func TestForeignKeyCascadeDeletesRunChildren(t *testing.T) { run := allocateRun(t, store, validAllocateRunParams()) session := validSession(run.RunID) insertSession(t, store, session) + insertLLMStep(t, store, validLLMStep(run.RunID)) insertFinding(t, store, validFinding(run.RunID, session.SessionRowID)) insertPlannedAction(t, store, validPlannedAction(run.RunID)) @@ -115,6 +119,9 @@ func TestForeignKeyCascadeDeletesRunChildren(t *testing.T) { if count := queryInt(t, store.db, "SELECT COUNT(*) FROM sessions"); count != 0 { t.Fatalf("sessions count = %d, want 0", count) } + if count := queryInt(t, store.db, "SELECT COUNT(*) FROM llm_steps"); count != 0 { + t.Fatalf("llm_steps count = %d, want 0", count) + } if count := queryInt(t, store.db, "SELECT COUNT(*) FROM findings"); count != 0 { t.Fatalf("findings count = %d, want 0", count) } @@ -123,6 +130,70 @@ func TestForeignKeyCascadeDeletesRunChildren(t *testing.T) { } } +func TestInsertAndFindCompletedLLMStep(t *testing.T) { + store := openStore(t) + run := allocateRun(t, store, validAllocateRunParams()) + step := validLLMStep(run.RunID) + insertLLMStep(t, store, step) + + got, err := store.FindCompletedLLMStep(context.Background(), LLMStepLookup{ + RunID: run.RunID, + Stage: step.Stage, + ScopeKey: step.ScopeKey, + InputHash: step.InputHash, + PromptHash: step.PromptHash, + Provider: step.Provider, + Adapter: step.Adapter, + Model: step.Model, + Effort: step.Effort, + }) + if err != nil { + t.Fatalf("FindCompletedLLMStep: %v", err) + } + if !reflect.DeepEqual(got, step) { + t.Fatalf("FindCompletedLLMStep = %#v, want %#v", got, step) + } + + failed := step + failed.StepID = "llm-step-failed" + failed.Status = LLMStepStatusFailed + failed.Error = strPtr("provider failed") + insertLLMStep(t, store, failed) + if _, err := store.FindCompletedLLMStep(context.Background(), LLMStepLookup{ + RunID: run.RunID, + Stage: failed.Stage, + ScopeKey: failed.ScopeKey, + InputHash: failed.InputHash, + PromptHash: failed.PromptHash, + Provider: failed.Provider, + Adapter: failed.Adapter, + Model: failed.Model, + Effort: failed.Effort, + }); err != nil { + t.Fatalf("FindCompletedLLMStep should ignore failed rows and find completed row: %v", err) + } +} + +func TestInsertLLMStepValidatesAuditFields(t *testing.T) { + store := openStore(t) + run := allocateRun(t, store, validAllocateRunParams()) + + completedMissingCompletedAt := validLLMStep(run.RunID) + completedMissingCompletedAt.CompletedAt = nil + if err := store.InsertLLMStep(context.Background(), completedMissingCompletedAt); !errors.Is(err, ErrInvalidInput) { + t.Fatalf("InsertLLMStep completed without completed_at error = %v, want ErrInvalidInput", err) + } + + failedMissingError := validLLMStep(run.RunID) + failedMissingError.StepID = "failed-missing-error" + failedMissingError.Status = LLMStepStatusFailed + failedMissingError.StructuredOutputJSON = "" + failedMissingError.Error = nil + if err := store.InsertLLMStep(context.Background(), failedMissingError); !errors.Is(err, ErrInvalidInput) { + t.Fatalf("InsertLLMStep failed without error text error = %v, want ErrInvalidInput", err) + } +} + func TestForeignKeyCascadeDeletesSessionChildren(t *testing.T) { store := openStore(t) run := allocateRun(t, store, validAllocateRunParams()) @@ -1318,6 +1389,47 @@ func insertSession(t *testing.T, store *Store, session Session) { } } +func validLLMStep(runID string) LLMStep { + completed := time.Date(2026, 5, 30, 12, 3, 0, 0, time.UTC) + duration := int64(1200) + tokensIn := int64(100) + tokensOut := int64(20) + cacheRead := int64(5) + cacheCreate := int64(7) + cost := 0.42 + + return LLMStep{ + StepID: "llm-step-1", + RunID: runID, + Stage: "thread_analysis", + ScopeKey: "thread:thread-1", + InputHash: "input-hash", + PromptHash: "prompt-hash", + Provider: "openai", + Adapter: "codex_cli", + Model: "gpt-5.5", + Effort: "medium", + ProviderSessionID: "provider-session-1", + Status: LLMStepStatusCompleted, + StructuredOutputJSON: `{"ok":true}`, + StartedAt: time.Date(2026, 5, 30, 12, 1, 0, 0, time.UTC), + CompletedAt: &completed, + DurationMS: &duration, + TokensIn: &tokensIn, + TokensOut: &tokensOut, + CacheRead: &cacheRead, + CacheCreate: &cacheCreate, + CostUSD: &cost, + } +} + +func insertLLMStep(t *testing.T, store *Store, step LLMStep) { + t.Helper() + if err := store.InsertLLMStep(context.Background(), step); err != nil { + t.Fatalf("InsertLLMStep: %v", err) + } +} + func validFinding(runID, sessionRowID string) Finding { side := review.DiffSideRight line := int64(42) diff --git a/internal/llm/adapter.go b/internal/llm/adapter.go index 4e250539..df68df2d 100644 --- a/internal/llm/adapter.go +++ b/internal/llm/adapter.go @@ -120,6 +120,7 @@ type StructuredResult[T any] struct { Response Response SessionID string ValidationAttempts []StructuredValidationAttempt + AcceptedOutput []byte } // StructuredValidationAttempt records one failed schema-validation attempt. @@ -179,9 +180,9 @@ func RunStructuredWithSessionResume[T any](ctx context.Context, adapter Adapter, if err != nil { return StructuredResult[T]{Response: response, SessionID: sessionID}, err } - value, decodeErr := decodeStructured(decode, response.StructuredOutput) + value, acceptedOutput, decodeErr := decodeStructuredAccepted(decode, response.StructuredOutput) if decodeErr == nil { - return StructuredResult[T]{Value: value, Response: response, SessionID: sessionID}, nil + return StructuredResult[T]{Value: value, Response: response, SessionID: sessionID, AcceptedOutput: acceptedOutput}, nil } attempts := []StructuredValidationAttempt{{ Label: "initial", @@ -200,7 +201,7 @@ func RunStructuredWithSessionResume[T any](ctx context.Context, adapter Adapter, if err != nil { return StructuredResult[T]{Response: retryResponse, SessionID: retrySessionID, ValidationAttempts: attempts}, err } - retryValue, retryErr := decodeStructured(decode, retryResponse.StructuredOutput) + retryValue, retryAcceptedOutput, retryErr := decodeStructuredAccepted(decode, retryResponse.StructuredOutput) if retryErr != nil { attempts = append(attempts, StructuredValidationAttempt{ Label: "retry", @@ -210,7 +211,7 @@ func RunStructuredWithSessionResume[T any](ctx context.Context, adapter Adapter, }) return StructuredResult[T]{Value: zero, Response: retryResponse, SessionID: retrySessionID, ValidationAttempts: attempts}, &StructuredValidationError{Attempts: attempts} } - return StructuredResult[T]{Value: retryValue, Response: retryResponse, SessionID: retrySessionID, ValidationAttempts: attempts}, nil + return StructuredResult[T]{Value: retryValue, Response: retryResponse, SessionID: retrySessionID, ValidationAttempts: attempts, AcceptedOutput: retryAcceptedOutput}, nil } // decodeStructured strict-decodes data, then on failure recovers a response @@ -219,20 +220,25 @@ func RunStructuredWithSessionResume[T any](ctx context.Context, adapter Adapter, // extracted object also fails the schema, that error is returned because it // describes the real schema violation; otherwise the strict error stands. func decodeStructured[T any](decode Decoder[T], data []byte) (T, error) { + value, _, err := decodeStructuredAccepted(decode, data) + return value, err +} + +func decodeStructuredAccepted[T any](decode Decoder[T], data []byte) (T, []byte, error) { value, err := decode(data) if err == nil { - return value, nil + return value, data, nil } var zero T extracted, ok := extractSingleJSONObject(data) if !ok || bytes.Equal(extracted, data) { - return zero, err + return zero, nil, err } extractedValue, extractedErr := decode(extracted) if extractedErr != nil { - return zero, extractedErr + return zero, nil, extractedErr } - return extractedValue, nil + return extractedValue, extracted, nil } // runOnceWithSession runs a single attempt and retries transient provider diff --git a/internal/llmrun/llmrun.go b/internal/llmrun/llmrun.go new file mode 100644 index 00000000..1b5a7eff --- /dev/null +++ b/internal/llmrun/llmrun.go @@ -0,0 +1,195 @@ +// Package llmrun provides durable execution for structured LLM steps. +package llmrun + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "strings" + "time" + + "github.com/open-cli-collective/codereview-cli/internal/ledger" + "github.com/open-cli-collective/codereview-cli/internal/llm" + "github.com/open-cli-collective/codereview-cli/internal/stagemodel" +) + +// Store is the durable step storage used by RunStructuredStep. +type Store interface { + FindCompletedLLMStep(context.Context, ledger.LLMStepLookup) (ledger.LLMStep, error) + InsertLLMStep(context.Context, ledger.LLMStep) error +} + +// Request describes one durable structured LLM step. +type Request struct { + RunID string + Stage stagemodel.Stage + ScopeKey string + InputHash string + Provider string + Adapter llm.Adapter + Model string + Effort string + Prompt string + LogPath string + ResumeSessionID string + Now func() time.Time + NewStepID func() string +} + +// Result is the durable structured LLM step result. +type Result[T any] struct { + Value T + Step ledger.LLMStep + Cached bool +} + +// HashPrompt returns the SHA-256 hash used for prompt identity. +func HashPrompt(prompt string) string { + sum := sha256.Sum256([]byte(prompt)) + return hex.EncodeToString(sum[:]) +} + +// RunStructuredStep runs or reuses one durable structured LLM step. +func RunStructuredStep[T any](ctx context.Context, store Store, req Request, decode llm.Decoder[T]) (Result[T], error) { + var zero Result[T] + if err := validateRequest(store, req, decode); err != nil { + return zero, err + } + now := req.Now + if now == nil { + now = func() time.Time { return time.Now().UTC() } + } + promptHash := HashPrompt(req.Prompt) + inputHash := strings.TrimSpace(req.InputHash) + if inputHash == "" { + inputHash = promptHash + } + adapterName := strings.TrimSpace(req.Adapter.Name()) + if adapterName == "" { + return zero, fmt.Errorf("llmrun: adapter name is required") + } + lookup := ledger.LLMStepLookup{ + RunID: req.RunID, + Stage: string(req.Stage), + ScopeKey: req.ScopeKey, + InputHash: inputHash, + PromptHash: promptHash, + Provider: req.Provider, + Adapter: adapterName, + Model: req.Model, + Effort: req.Effort, + } + cached, err := store.FindCompletedLLMStep(ctx, lookup) + if err == nil { + value, decodeErr := decode([]byte(cached.StructuredOutputJSON)) + if decodeErr != nil { + return zero, fmt.Errorf("llmrun: decode cached step %s: %w", cached.StepID, decodeErr) + } + return Result[T]{Value: value, Step: cached, Cached: true}, nil + } + if !errors.Is(err, ledger.ErrNotFound) { + return zero, fmt.Errorf("llmrun: find completed step: %w", err) + } + + stepID := strings.TrimSpace(req.NewStepID()) + if stepID == "" { + return zero, fmt.Errorf("llmrun: step ID generator returned blank ID") + } + started := now() + structured, runErr := llm.RunStructuredWithSessionResume(ctx, req.Adapter, req.ResumeSessionID, llm.Request{ + Model: req.Model, + Effort: req.Effort, + Prompt: req.Prompt, + LogPath: req.LogPath, + }, decode) + completed := now() + step := stepFromResult(req, adapterName, stepID, inputHash, promptHash, started, completed, structured) + if runErr != nil { + step.Status = ledger.LLMStepStatusFailed + message := runErr.Error() + step.Error = &message + if insertErr := store.InsertLLMStep(ctx, step); insertErr != nil { + return zero, fmt.Errorf("llmrun: persist failed step: %w; original error: %v", insertErr, runErr) + } + return zero, runErr + } + + step.Status = ledger.LLMStepStatusCompleted + step.StructuredOutputJSON = string(structured.AcceptedOutput) + if strings.TrimSpace(step.StructuredOutputJSON) == "" { + return zero, fmt.Errorf("llmrun: accepted structured output is empty") + } + if insertErr := store.InsertLLMStep(ctx, step); insertErr != nil { + return zero, fmt.Errorf("llmrun: persist completed step: %w", insertErr) + } + return Result[T]{Value: structured.Value, Step: step}, nil +} + +func validateRequest[T any](store Store, req Request, decode llm.Decoder[T]) error { + if store == nil { + return fmt.Errorf("llmrun: store is required") + } + if req.Adapter == nil { + return fmt.Errorf("llmrun: adapter is required") + } + if decode == nil { + return fmt.Errorf("llmrun: decoder is required") + } + for field, value := range map[string]string{ + "run_id": req.RunID, + "stage": string(req.Stage), + "scope_key": req.ScopeKey, + "provider": req.Provider, + "model": req.Model, + "prompt": req.Prompt, + } { + if strings.TrimSpace(value) == "" { + return fmt.Errorf("llmrun: %s is required", field) + } + } + if !req.Stage.Valid() { + return fmt.Errorf("llmrun: stage %q is invalid", req.Stage) + } + if req.NewStepID == nil { + return fmt.Errorf("llmrun: step ID generator is required") + } + return nil +} + +func stepFromResult[T any](req Request, adapterName, stepID, inputHash, promptHash string, started, completed time.Time, result llm.StructuredResult[T]) ledger.LLMStep { + providerSessionID := strings.TrimSpace(result.SessionID) + if providerSessionID == "" { + providerSessionID = stepID + } + return ledger.LLMStep{ + StepID: stepID, + RunID: req.RunID, + Stage: string(req.Stage), + ScopeKey: req.ScopeKey, + InputHash: inputHash, + PromptHash: promptHash, + Provider: req.Provider, + Adapter: adapterName, + Model: req.Model, + Effort: req.Effort, + ProviderSessionID: providerSessionID, + StartedAt: started, + CompletedAt: &completed, + DurationMS: &result.Response.DurationMS, + TokensIn: intPtrToInt64(result.Response.Usage.TokensIn), + TokensOut: intPtrToInt64(result.Response.Usage.TokensOut), + CacheRead: intPtrToInt64(result.Response.Usage.CacheRead), + CacheCreate: intPtrToInt64(result.Response.Usage.CacheCreate), + CostUSD: result.Response.Usage.CostUSD, + } +} + +func intPtrToInt64(value *int) *int64 { + if value == nil { + return nil + } + converted := int64(*value) + return &converted +} diff --git a/internal/llmrun/llmrun_test.go b/internal/llmrun/llmrun_test.go new file mode 100644 index 00000000..0dfd70dd --- /dev/null +++ b/internal/llmrun/llmrun_test.go @@ -0,0 +1,352 @@ +package llmrun + +import ( + "context" + "encoding/json" + "errors" + "strings" + "testing" + "time" + + "github.com/open-cli-collective/codereview-cli/internal/ledger" + "github.com/open-cli-collective/codereview-cli/internal/llm" + "github.com/open-cli-collective/codereview-cli/internal/stagemodel" +) + +func TestRunStructuredStepReturnsCachedCompletedStep(t *testing.T) { + store := &fakeStore{completed: ledger.LLMStep{ + StepID: "step-cached", + RunID: "run-1", + Stage: string(stagemodel.StageThreadAnalysis), + ScopeKey: "thread:thread-1", + InputHash: "input-hash", + PromptHash: HashPrompt("prompt"), + Provider: "openai", + Adapter: "fake", + Model: "gpt-5.4", + Effort: "medium", + ProviderSessionID: "provider-session", + Status: ledger.LLMStepStatusCompleted, + StructuredOutputJSON: `{"ok":true}`, + StartedAt: testNow, + }} + adapter := &llm.FakeAdapter{NameValue: "fake"} + + got, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + InputHash: "input-hash", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-new" }, + }, decodeOK) + if err != nil { + t.Fatalf("RunStructuredStep: %v", err) + } + if !got.Cached || !got.Value.OK || got.Step.StepID != "step-cached" { + t.Fatalf("result = %#v, want cached completed step", got) + } + if len(adapter.Requests()) != 0 { + t.Fatalf("adapter requests = %#v, want cache hit to skip adapter", adapter.Requests()) + } + if len(store.inserted) != 0 { + t.Fatalf("inserted steps = %#v, want none on cache hit", store.inserted) + } +} + +func TestRunStructuredStepCachedDecodeFailureFailsClosed(t *testing.T) { + store := &fakeStore{completed: ledger.LLMStep{ + StepID: "step-corrupt", + RunID: "run-1", + Stage: string(stagemodel.StageThreadAnalysis), + ScopeKey: "thread:thread-1", + InputHash: HashPrompt("prompt"), + PromptHash: HashPrompt("prompt"), + Provider: "openai", + Adapter: "fake", + Model: "gpt-5.4", + Effort: "medium", + ProviderSessionID: "provider-session", + Status: ledger.LLMStepStatusCompleted, + StructuredOutputJSON: `not json`, + StartedAt: testNow, + }} + adapter := &llm.FakeAdapter{NameValue: "fake"} + + _, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-new" }, + }, decodeOK) + if err == nil || !strings.Contains(err.Error(), "cached") { + t.Fatalf("RunStructuredStep error = %v, want cached decode error", err) + } + if len(adapter.Requests()) != 0 { + t.Fatalf("adapter requests = %#v, want corrupt cache to fail closed", adapter.Requests()) + } +} + +func TestRunStructuredStepRejectsBlankAdapterName(t *testing.T) { + store := &fakeStore{findErr: ledger.ErrNotFound} + adapter := blankNameAdapter{Adapter: &llm.FakeAdapter{}} + + _, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-1" }, + }, decodeOK) + if err == nil || !strings.Contains(err.Error(), "adapter name") { + t.Fatalf("RunStructuredStep error = %v, want adapter name error", err) + } + if len(store.lookups) != 0 { + t.Fatalf("lookups = %#v, want validation before lookup", store.lookups) + } +} + +func TestRunStructuredStepMissPersistsAcceptedOutputBeforeReturning(t *testing.T) { + store := &fakeStore{findErr: ledger.ErrNotFound} + adapter := &llm.FakeAdapter{NameValue: "fake"} + adapter.Queue(llm.FakeResult{ + SessionID: "provider-session", + Response: llm.Response{ + StructuredOutput: []byte(`Here is the JSON: {"ok":true}`), + Usage: llm.Usage{ + TokensIn: intPtr(10), + TokensOut: intPtr(2), + CostUSD: floatPtr(0.25), + }, + DurationMS: 1200, + }, + }) + + got, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-1" }, + }, decodeOK) + if err != nil { + t.Fatalf("RunStructuredStep: %v", err) + } + if got.Cached || !got.Value.OK { + t.Fatalf("result = %#v, want uncached ok value", got) + } + if len(store.inserted) != 1 { + t.Fatalf("inserted steps = %#v, want one", store.inserted) + } + if len(store.lookups) != 1 { + t.Fatalf("lookups = %#v, want one", store.lookups) + } + lookup := store.lookups[0] + if lookup.Provider != "openai" || lookup.Adapter != "fake" || lookup.Model != "gpt-5.4" || lookup.Effort != "medium" { + t.Fatalf("lookup runtime identity = %#v", lookup) + } + step := store.inserted[0] + if step.Status != ledger.LLMStepStatusCompleted || step.StructuredOutputJSON != `{"ok":true}` { + t.Fatalf("persisted step = %#v, want completed accepted JSON", step) + } + if step.Provider != "openai" || step.Adapter != "fake" || step.ProviderSessionID != "provider-session" { + t.Fatalf("persisted runtime metadata = %#v", step) + } +} + +func TestRunStructuredStepPersistenceFailureAfterSuccessReturnsError(t *testing.T) { + insertErr := errors.New("store down") + store := &fakeStore{findErr: ledger.ErrNotFound, insertErr: insertErr} + adapter := &llm.FakeAdapter{NameValue: "fake"} + adapter.Queue(llm.FakeResult{SessionID: "provider-session", Response: llm.Response{StructuredOutput: []byte(`{"ok":true}`)}}) + + _, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-1" }, + }, decodeOK) + if !errors.Is(err, insertErr) { + t.Fatalf("RunStructuredStep error = %v, want insert error", err) + } +} + +func TestRunStructuredStepRejectsBlankAcceptedOutputOnSuccess(t *testing.T) { + store := &fakeStore{findErr: ledger.ErrNotFound} + adapter := &llm.FakeAdapter{NameValue: "fake"} + adapter.Queue(llm.FakeResult{SessionID: "provider-session", Response: llm.Response{StructuredOutput: nil}}) + + _, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-1" }, + }, func([]byte) (okResponse, error) { + return okResponse{OK: true}, nil + }) + if err == nil || !strings.Contains(err.Error(), "accepted structured output") { + t.Fatalf("RunStructuredStep error = %v, want accepted output invariant error", err) + } + if len(store.inserted) != 0 { + t.Fatalf("inserted steps = %#v, want no completed row with blank output", store.inserted) + } +} + +func TestRunStructuredStepPersistsFailedStep(t *testing.T) { + store := &fakeStore{findErr: ledger.ErrNotFound} + adapter := &llm.FakeAdapter{NameValue: "fake"} + adapter.Queue(llm.FakeResult{SessionID: "provider-session-1", Response: llm.Response{StructuredOutput: []byte(`bad1`)}}) + adapter.Queue(llm.FakeResult{SessionID: "provider-session-2", Response: llm.Response{StructuredOutput: []byte(`bad2`)}}) + + _, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-1" }, + }, decodeOK) + if err == nil { + t.Fatal("RunStructuredStep error = nil, want decode failure") + } + if len(store.inserted) != 1 { + t.Fatalf("inserted steps = %#v, want failed step", store.inserted) + } + step := store.inserted[0] + if step.Status != ledger.LLMStepStatusFailed { + t.Fatalf("step.Status = %q, want failed", step.Status) + } + if step.Error == nil || *step.Error == "" { + t.Fatalf("step.Error = %#v, want error text", step.Error) + } +} + +func TestRunStructuredStepPersistsProviderStartFailure(t *testing.T) { + providerErr := errors.New("provider unavailable") + store := &fakeStore{findErr: ledger.ErrNotFound} + adapter := &llm.FakeAdapter{NameValue: "fake"} + adapter.Queue(llm.FakeResult{StartErr: providerErr}) + + _, err := RunStructuredStep(context.Background(), store, Request{ + RunID: "run-1", + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:thread-1", + Provider: "openai", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + Prompt: "prompt", + Now: fixedClock(), + NewStepID: func() string { return "step-1" }, + }, decodeOK) + if !errors.Is(err, providerErr) { + t.Fatalf("RunStructuredStep error = %v, want provider error", err) + } + if len(store.inserted) != 1 { + t.Fatalf("inserted steps = %#v, want failed step", store.inserted) + } + step := store.inserted[0] + if step.Status != ledger.LLMStepStatusFailed || step.ProviderSessionID != "step-1" { + t.Fatalf("failed step = %#v, want failed with step ID fallback session", step) + } +} + +type okResponse struct { + OK bool `json:"ok"` +} + +func decodeOK(data []byte) (okResponse, error) { + var out okResponse + if err := json.Unmarshal(data, &out); err != nil { + return okResponse{}, err + } + if !out.OK { + return okResponse{}, errors.New("ok must be true") + } + return out, nil +} + +type fakeStore struct { + completed ledger.LLMStep + findErr error + insertErr error + lookups []ledger.LLMStepLookup + inserted []ledger.LLMStep +} + +type blankNameAdapter struct { + llm.Adapter +} + +func (blankNameAdapter) Name() string { return "" } + +func (s *fakeStore) FindCompletedLLMStep(_ context.Context, lookup ledger.LLMStepLookup) (ledger.LLMStep, error) { + s.lookups = append(s.lookups, lookup) + if s.findErr != nil { + return ledger.LLMStep{}, s.findErr + } + return s.completed, nil +} + +func (s *fakeStore) InsertLLMStep(_ context.Context, step ledger.LLMStep) error { + if s.insertErr != nil { + return s.insertErr + } + s.inserted = append(s.inserted, step) + return nil +} + +var testNow = time.Date(2026, 6, 22, 12, 0, 0, 0, time.UTC) + +func fixedClock() func() time.Time { + var calls int + return func() time.Time { + calls++ + return testNow.Add(time.Duration(calls) * time.Second) + } +} + +func intPtr(value int) *int { + return &value +} + +func floatPtr(value float64) *float64 { + return &value +} diff --git a/internal/plannedactions/plannedactions.go b/internal/plannedactions/plannedactions.go new file mode 100644 index 00000000..1a5ce568 --- /dev/null +++ b/internal/plannedactions/plannedactions.go @@ -0,0 +1,110 @@ +// Package plannedactions converts review plans into durable ledger actions. +package plannedactions + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/open-cli-collective/codereview-cli/internal/ledger" + "github.com/open-cli-collective/codereview-cli/internal/outbox" + "github.com/open-cli-collective/codereview-cli/internal/reviewplan" +) + +// FromReviewPlan converts one planner-local action into a ledger planned action. +func FromReviewPlan(runID string, action reviewplan.Action) (ledger.PlannedAction, error) { + payload, err := Payload(action) + if err != nil { + return ledger.PlannedAction{}, err + } + planned := ledger.PlannedAction{ + ActionID: action.ActionID, + RunID: runID, + Kind: LedgerKind(action.Kind), + PlannedAt: action.PlannedAt, + PayloadJSON: string(payload), + Status: LedgerStatus(action.Status), + Required: action.Required, + } + if action.FindingID.Assigned() { + id := action.FindingID.String() + planned.FindingID = &id + } + if strings.TrimSpace(action.ThreadID) != "" { + planned.ThreadID = &action.ThreadID + } + return planned, nil +} + +// Payload returns the outbox payload JSON for action. +func Payload(action reviewplan.Action) ([]byte, error) { + switch action.Kind { + case reviewplan.ActionKindInlineComment: + if action.InlineComment == nil { + return nil, fmt.Errorf("plannedactions: inline payload missing") + } + return json.Marshal(outbox.InlineCommentPayload{ + Body: action.InlineComment.Body, + Path: action.InlineComment.Path, + Side: action.InlineComment.Side, + Line: action.InlineComment.Line, + SubjectType: action.InlineComment.SubjectType, + DiffPosition: action.InlineComment.DiffPosition, + }) + case reviewplan.ActionKindThreadReply: + if action.ThreadReply == nil { + return nil, fmt.Errorf("plannedactions: thread reply payload missing") + } + return json.Marshal(outbox.ThreadReplyPayload{ + Body: action.ThreadReply.Body, + Summary: action.ThreadReply.Summary, + }) + case reviewplan.ActionKindResolveThread: + return json.Marshal(outbox.ResolveThreadPayload{}) + case reviewplan.ActionKindRollupComment: + if action.RollupComment == nil { + return nil, fmt.Errorf("plannedactions: rollup payload missing") + } + return json.Marshal(outbox.RollupCommentPayload{Body: action.RollupComment.Body}) + case reviewplan.ActionKindSubmitReview: + if action.SubmitReview == nil { + return nil, fmt.Errorf("plannedactions: submit review payload missing") + } + return json.Marshal(outbox.SubmitReviewPayload{ + Body: action.SubmitReview.Body, + Event: action.SubmitReview.Event, + }) + default: + return nil, fmt.Errorf("plannedactions: unknown action kind %q", action.Kind) + } +} + +// LedgerKind maps reviewplan action kinds into ledger kinds. +func LedgerKind(kind reviewplan.ActionKind) ledger.PlannedActionKind { + switch kind { + case reviewplan.ActionKindInlineComment: + return ledger.PlannedActionInlineComment + case reviewplan.ActionKindThreadReply: + return ledger.PlannedActionThreadReply + case reviewplan.ActionKindResolveThread: + return ledger.PlannedActionResolveThread + case reviewplan.ActionKindRollupComment: + return ledger.PlannedActionRollupComment + case reviewplan.ActionKindSubmitReview: + return ledger.PlannedActionSubmitReview + default: + return ledger.PlannedActionKind(kind) + } +} + +// LedgerStatus maps reviewplan statuses into ledger statuses. +func LedgerStatus(status reviewplan.ActionStatus) ledger.PlannedActionStatus { + switch status { + case reviewplan.ActionStatusPending: + return ledger.PlannedActionPending + case reviewplan.ActionStatusPlannedOnly: + return ledger.PlannedActionPlannedOnly + default: + return ledger.PlannedActionStatus(status) + } +} diff --git a/internal/plannedactions/plannedactions_test.go b/internal/plannedactions/plannedactions_test.go new file mode 100644 index 00000000..6fa84eb2 --- /dev/null +++ b/internal/plannedactions/plannedactions_test.go @@ -0,0 +1,50 @@ +package plannedactions + +import ( + "encoding/json" + "testing" + "time" + + "github.com/open-cli-collective/codereview-cli/internal/ledger" + "github.com/open-cli-collective/codereview-cli/internal/outbox" + "github.com/open-cli-collective/codereview-cli/internal/reviewplan" +) + +func TestFromReviewPlanThreadReply(t *testing.T) { + action := reviewplan.Action{ + ActionID: "reply-1", + Kind: reviewplan.ActionKindThreadReply, + ThreadID: "thread-1", + PlannedAt: time.Date(2026, 6, 1, 12, 0, 0, 0, time.UTC), + Status: reviewplan.ActionStatusPending, + Required: true, + ThreadReply: &reviewplan.ThreadReplyPayload{ + Body: "summary body", + Summary: true, + }, + } + got, err := FromReviewPlan("run-1", action) + if err != nil { + t.Fatalf("FromReviewPlan: %v", err) + } + if got.Kind != ledger.PlannedActionThreadReply || got.Status != ledger.PlannedActionPending || !got.Required { + t.Fatalf("planned action = %#v, want required pending thread reply", got) + } + if got.ThreadID == nil || *got.ThreadID != "thread-1" { + t.Fatalf("ThreadID = %#v, want thread-1", got.ThreadID) + } + var payload outbox.ThreadReplyPayload + if err := json.Unmarshal([]byte(got.PayloadJSON), &payload); err != nil { + t.Fatalf("payload JSON: %v", err) + } + if payload.Body != "summary body" || !payload.Summary { + t.Fatalf("payload = %#v, want summary body", payload) + } +} + +func TestPayloadRejectsMissingThreadReplyPayload(t *testing.T) { + _, err := Payload(reviewplan.Action{Kind: reviewplan.ActionKindThreadReply}) + if err == nil { + t.Fatal("Payload error = nil, want missing payload error") + } +} diff --git a/internal/review/review.go b/internal/review/review.go index 9fdf28dd..4522b4e8 100644 --- a/internal/review/review.go +++ b/internal/review/review.go @@ -139,6 +139,57 @@ func (d ThreadDecision) Valid() bool { } } +// ThreadResponseKind identifies the type of response to post to an existing +// inline discussion thread. +type ThreadResponseKind string + +// ThreadResponseKind values. +const ( + ThreadResponseReply ThreadResponseKind = "reply" + ThreadResponseSummaryReply ThreadResponseKind = "summary_reply" +) + +// String returns the wire/domain representation. +func (k ThreadResponseKind) String() string { + return string(k) +} + +// Valid reports whether k is one of the known thread response kind values. +func (k ThreadResponseKind) Valid() bool { + switch k { + case ThreadResponseReply, ThreadResponseSummaryReply: + return true + default: + return false + } +} + +// ThreadResponseAction is a concrete response to an existing PR thread. +type ThreadResponseAction struct { + Kind ThreadResponseKind `json:"kind"` + ThreadID string `json:"thread_id"` + Body string `json:"body"` + Resolve bool `json:"resolve,omitempty"` + Rationale string `json:"rationale,omitempty"` +} + +// Validate checks the response action can be planned safely. +func (a ThreadResponseAction) Validate() error { + if !a.Kind.Valid() { + return invalidValue("thread response kind", a.Kind.String()) + } + if strings.TrimSpace(a.ThreadID) == "" { + return fmt.Errorf("thread response ID is required") + } + if strings.TrimSpace(a.Body) == "" { + return fmt.Errorf("thread response body is required") + } + if a.Resolve && a.Kind != ThreadResponseSummaryReply { + return fmt.Errorf("thread response resolve requires summary_reply") + } + return nil +} + // AnchorKind identifies whether a finding is line- or file-scoped. type AnchorKind string diff --git a/internal/review/review_test.go b/internal/review/review_test.go index 45b333f5..3c524473 100644 --- a/internal/review/review_test.go +++ b/internal/review/review_test.go @@ -209,6 +209,15 @@ func TestAllEnumConstantsParseAndValidate(t *testing.T) { }, valid: func(s string) bool { return ThreadDecision(s).Valid() }, }, + { + name: "thread response kind", + values: []interface{ String() string }{ThreadResponseReply, ThreadResponseSummaryReply}, + parse: func(s string) (string, bool) { + kind := ThreadResponseKind(normalizeLower(s)) + return kind.String(), kind.Valid() + }, + valid: func(s string) bool { return ThreadResponseKind(s).Valid() }, + }, { name: "anchor kind", values: []interface{ String() string }{AnchorKindLine, AnchorKindFile}, @@ -259,6 +268,57 @@ func TestAllEnumConstantsParseAndValidate(t *testing.T) { } } +func TestThreadResponseActionValidate(t *testing.T) { + tests := []struct { + name string + in ThreadResponseAction + err string + }{ + { + name: "reply", + in: ThreadResponseAction{Kind: ThreadResponseReply, ThreadID: "thread-1", Body: "Thanks."}, + }, + { + name: "summary reply resolves", + in: ThreadResponseAction{Kind: ThreadResponseSummaryReply, ThreadID: "thread-1", Body: "Summary.", Resolve: true}, + }, + { + name: "invalid kind", + in: ThreadResponseAction{Kind: ThreadResponseKind("other"), ThreadID: "thread-1", Body: "Body."}, + err: "invalid", + }, + { + name: "missing thread", + in: ThreadResponseAction{Kind: ThreadResponseReply, Body: "Body."}, + err: "ID", + }, + { + name: "missing body", + in: ThreadResponseAction{Kind: ThreadResponseReply, ThreadID: "thread-1"}, + err: "body", + }, + { + name: "resolve requires summary reply", + in: ThreadResponseAction{Kind: ThreadResponseReply, ThreadID: "thread-1", Body: "Body.", Resolve: true}, + err: "summary_reply", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.in.Validate() + if tt.err == "" { + if err != nil { + t.Fatalf("Validate: %v", err) + } + return + } + if err == nil || !strings.Contains(err.Error(), tt.err) { + t.Fatalf("Validate error = %v, want containing %q", err, tt.err) + } + }) + } +} + func TestAnchorValidate(t *testing.T) { tests := []struct { name string diff --git a/internal/reviewplan/reviewplan.go b/internal/reviewplan/reviewplan.go index 20974429..8cc2f81e 100644 --- a/internal/reviewplan/reviewplan.go +++ b/internal/reviewplan/reviewplan.go @@ -126,6 +126,16 @@ type Request struct { NewActionID ActionIDGenerator } +// ThreadResponseRequest is the pure input to BuildThreadResponses. +type ThreadResponseRequest struct { + PostMode PostMode + ProviderCaps ProviderCaps + Responses []review.ThreadResponseAction + + Now func() time.Time + NewActionID ActionIDGenerator +} + // Diff is the planner-owned diff metadata needed for anchoring. type Diff struct { Files []DiffFile @@ -254,6 +264,32 @@ func Build(req Request) (Plan, error) { return b.buildReview() } +// BuildThreadResponses turns thread response-domain values into a response-only +// action plan. It never creates rollup comments or submit-review actions. +func BuildThreadResponses(req ThreadResponseRequest) (Plan, error) { + b, err := newBuilder(Request{ + PostMode: req.PostMode, + ProviderCaps: req.ProviderCaps, + Now: req.Now, + NewActionID: req.NewActionID, + }) + if err != nil { + return Plan{}, err + } + actions, err := b.threadResponseActions(req.Responses) + if err != nil { + return Plan{}, err + } + outcome := OutcomeNothingToReview + if len(actions) > 0 { + outcome = OutcomeComment + } + return Plan{ + Outcome: outcome, + Actions: actions, + }, nil +} + // OutcomeFromReviewEvent maps a review-domain event into a planner outcome. func OutcomeFromReviewEvent(event review.ReviewEvent) (Outcome, error) { switch event { @@ -641,6 +677,46 @@ func (b *builder) threadActions() ([]Action, []Action, error) { return replies, resolves, nil } +func (b *builder) threadResponseActions(responses []review.ThreadResponseAction) ([]Action, error) { + var replies []Action + var resolves []Action + for _, response := range responses { + if err := response.Validate(); err != nil { + return nil, fmt.Errorf("reviewplan: invalid thread response: %w", err) + } + reply, err := b.newAction(ActionKindThreadReply) + if err != nil { + return nil, err + } + reply.Required = true + reply.ThreadID = response.ThreadID + reply.Marker = MarkerPlacement{ + BodyBearing: true, + ThreadSummary: response.Kind == review.ThreadResponseSummaryReply, + } + reply.ThreadReply = &ThreadReplyPayload{ + Body: sanitize(response.Body), + Summary: response.Kind == review.ThreadResponseSummaryReply, + } + replies = append(replies, reply) + if !response.Resolve || !b.req.ProviderCaps.ThreadResolution { + continue + } + resolve, err := b.newAction(ActionKindResolveThread) + if err != nil { + return nil, err + } + resolve.Required = true + resolve.ThreadID = response.ThreadID + resolve.ResolveThread = &ResolveThreadPayload{} + resolves = append(resolves, resolve) + } + actions := make([]Action, 0, len(replies)+len(resolves)) + actions = append(actions, replies...) + actions = append(actions, resolves...) + return actions, nil +} + func (b *builder) commentActions(ordered []review.Finding) ([]Action, error) { var actions []Action for _, finding := range ordered { diff --git a/internal/reviewplan/reviewplan_test.go b/internal/reviewplan/reviewplan_test.go index 4aed6f2b..d2f78871 100644 --- a/internal/reviewplan/reviewplan_test.go +++ b/internal/reviewplan/reviewplan_test.go @@ -177,6 +177,94 @@ func TestThreadDecisionRejectsInvalidValue(t *testing.T) { } } +func TestBuildThreadResponsesEmitsOnlyRequiredThreadActions(t *testing.T) { + plan, err := BuildThreadResponses(ThreadResponseRequest{ + PostMode: PostModeLive, + ProviderCaps: ProviderCaps{ThreadResolution: true}, + Responses: []review.ThreadResponseAction{ + {Kind: review.ThreadResponseReply, ThreadID: "thread-1", Body: "Please clarify."}, + {Kind: review.ThreadResponseSummaryReply, ThreadID: "thread-2", Body: "Resolved summary.", Resolve: true}, + }, + Now: func() time.Time { return testTime }, + NewActionID: newIDGenerator(), + }) + if err != nil { + t.Fatalf("BuildThreadResponses: %v", err) + } + if plan.Outcome != OutcomeComment { + t.Fatalf("Outcome = %q, want comment", plan.Outcome) + } + if got := actionKinds(plan.Actions); !reflect.DeepEqual(got, []ActionKind{ActionKindThreadReply, ActionKindThreadReply, ActionKindResolveThread}) { + t.Fatalf("action kinds = %#v, want reply/reply/resolve only", got) + } + for _, action := range plan.Actions { + if !action.Required { + t.Fatalf("action %#v Required = false, want true", action) + } + if action.Kind == ActionKindRollupComment || action.Kind == ActionKindSubmitReview { + t.Fatalf("response-only plan emitted %s", action.Kind) + } + if action.Status != ActionStatusPending { + t.Fatalf("action status = %q, want pending", action.Status) + } + } + if plan.Actions[0].ThreadReply == nil || plan.Actions[0].ThreadReply.Summary { + t.Fatalf("normal reply payload = %#v, want Summary=false", plan.Actions[0].ThreadReply) + } + if plan.Actions[1].ThreadReply == nil || !plan.Actions[1].ThreadReply.Summary || !plan.Actions[1].Marker.ThreadSummary { + t.Fatalf("summary reply = payload %#v marker %#v, want summary marker", plan.Actions[1].ThreadReply, plan.Actions[1].Marker) + } +} + +func TestBuildThreadResponsesDryRunAndResolutionDisabled(t *testing.T) { + plan, err := BuildThreadResponses(ThreadResponseRequest{ + PostMode: PostModeDryRun, + ProviderCaps: ProviderCaps{ThreadResolution: false}, + Responses: []review.ThreadResponseAction{ + {Kind: review.ThreadResponseSummaryReply, ThreadID: "thread-1", Body: "Summary.", Resolve: true}, + }, + Now: func() time.Time { return testTime }, + NewActionID: newIDGenerator(), + }) + if err != nil { + t.Fatalf("BuildThreadResponses: %v", err) + } + if got := actionKinds(plan.Actions); !reflect.DeepEqual(got, []ActionKind{ActionKindThreadReply}) { + t.Fatalf("action kinds = %#v, want reply only when resolution disabled", got) + } + if plan.Actions[0].Status != ActionStatusPlannedOnly { + t.Fatalf("status = %q, want planned_only", plan.Actions[0].Status) + } +} + +func TestBuildThreadResponsesNoActions(t *testing.T) { + plan, err := BuildThreadResponses(ThreadResponseRequest{ + PostMode: PostModeLive, + Now: func() time.Time { return testTime }, + NewActionID: newIDGenerator(), + }) + if err != nil { + t.Fatalf("BuildThreadResponses: %v", err) + } + if plan.Outcome != OutcomeNothingToReview || len(plan.Actions) != 0 { + t.Fatalf("plan = %#v, want nothing_to_review with no actions", plan) + } +} + +func TestBuildThreadResponsesRejectsInvalidResponse(t *testing.T) { + _, err := BuildThreadResponses(ThreadResponseRequest{ + PostMode: PostModeLive, + Responses: []review.ThreadResponseAction{ + {Kind: review.ThreadResponseReply, ThreadID: "thread-1", Resolve: true}, + }, + Now: func() time.Time { return testTime }, + NewActionID: newIDGenerator(), + }) + if err == nil || !strings.Contains(err.Error(), "thread response") { + t.Fatalf("BuildThreadResponses error = %v, want invalid response", err) + } +} + func TestMultipleInlineActionsFollowRollupOrder(t *testing.T) { req := baseRequest() req.Findings = []review.Finding{ diff --git a/internal/stagemodel/resolver.go b/internal/stagemodel/resolver.go new file mode 100644 index 00000000..20be933d --- /dev/null +++ b/internal/stagemodel/resolver.go @@ -0,0 +1,137 @@ +// Package stagemodel resolves stage-specific LLM model choices from profile +// preferences and explicit runtime overrides. +package stagemodel + +import ( + "fmt" + "strings" + + "github.com/open-cli-collective/codereview-cli/internal/config" +) + +// Stage identifies a durable LLM interaction point in the review system. +type Stage string + +// Known LLM stages. +const ( + StageSelection Stage = "selection" + StageSynthesis Stage = "synthesis" + StageReviewer Stage = "reviewer" + StageThreadAnalysis Stage = "thread_analysis" + StageApprovalOverride Stage = "approval_override" +) + +// Valid reports whether s is a known model-resolution stage. +func (s Stage) Valid() bool { + switch s { + case StageSelection, StageSynthesis, StageReviewer, StageThreadAnalysis, StageApprovalOverride: + return true + default: + return false + } +} + +// Request describes one stage model-resolution request. +type Request struct { + Profile config.Profile + Stage Stage + Tier config.ModelTier + FloorTier config.ModelTier + ModelOverride string + EffortOverride string + DefaultEffort string +} + +// Result is the concrete model/effort selected for one LLM stage. +type Result struct { + Stage Stage + Tier config.ModelTier + Model string + Effort string + Source config.ModelMapSource + Override bool +} + +// ResolveStageModel resolves one concrete model and effort for a review stage. +func ResolveStageModel(req Request) (Result, error) { + stage := Stage(strings.TrimSpace(string(req.Stage))) + if !stage.Valid() { + return Result{}, fmt.Errorf("stagemodel: stage %q is invalid", req.Stage) + } + tier := config.ModelTier(strings.TrimSpace(string(req.Tier))) + effort := strings.TrimSpace(req.EffortOverride) + if effort == "" { + effort = strings.TrimSpace(req.DefaultEffort) + } + if model := strings.TrimSpace(req.ModelOverride); model != "" { + return Result{ + Stage: stage, + Tier: tier, + Model: model, + Effort: effort, + Override: true, + }, nil + } + if tier != "" && !tier.Valid() { + return Result{}, fmt.Errorf("stagemodel: stage %s: model_tier %q is invalid; must be one of small, medium, large", stage, tier) + } + floorTier := config.ModelTier(strings.TrimSpace(string(req.FloorTier))) + if floorTier != "" { + if !floorTier.Valid() { + return Result{}, fmt.Errorf("stagemodel: stage %s: model_tier floor %q is invalid; must be one of small, medium, large", stage, floorTier) + } + if tier == "" { + tier = floorTier + } + tier = maxModelTier(tier, floorTier) + } + + resolved, ok := config.ResolveModelTier(req.Profile.LLM, tier) + if !ok { + llmConfig := req.Profile.LLM + return Result{}, fmt.Errorf("stagemodel: stage %s: model_tier %q is not mapped for provider %q adapter %q", stage, tier, llmConfig.Provider, llmConfig.Adapter) + } + return Result{ + Stage: stage, + Tier: resolved.Tier, + Model: resolved.Model, + Effort: effort, + Source: resolved.Source, + }, nil +} + +// ResolveFirstAvailable resolves the first mapped tier from tiers for req. +func ResolveFirstAvailable(req Request, tiers ...config.ModelTier) (Result, bool) { + if len(tiers) == 0 { + resolved, err := ResolveStageModel(req) + return resolved, err == nil + } + for _, tier := range tiers { + req.Tier = tier + resolved, err := ResolveStageModel(req) + if err == nil { + return resolved, true + } + } + return Result{}, false +} + +func maxModelTier(left, right config.ModelTier) config.ModelTier { + if modelTierRank(left) >= modelTierRank(right) { + return left + } + return right +} + +func modelTierRank(tier config.ModelTier) int { + switch tier { + case config.ModelTierSmall: + return 1 + case config.ModelTierMedium: + return 2 + case config.ModelTierLarge: + return 3 + default: + return 0 + } +} diff --git a/internal/stagemodel/resolver_test.go b/internal/stagemodel/resolver_test.go new file mode 100644 index 00000000..0d730c66 --- /dev/null +++ b/internal/stagemodel/resolver_test.go @@ -0,0 +1,205 @@ +package stagemodel + +import ( + "strings" + "testing" + + "github.com/open-cli-collective/codereview-cli/internal/config" +) + +func TestResolveStageModelUsesConfiguredTierMapping(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderOpenAI, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterCodexCLI, + ModelMap: config.ModelMap{"medium": "profile-medium-model"}, + }} + + got, err := ResolveStageModel(Request{ + Profile: profile, + Stage: StageThreadAnalysis, + Tier: config.ModelTierMedium, + DefaultEffort: "medium", + }) + if err != nil { + t.Fatalf("ResolveStageModel: %v", err) + } + if got.Stage != StageThreadAnalysis { + t.Fatalf("Stage = %q, want %q", got.Stage, StageThreadAnalysis) + } + if got.Tier != config.ModelTierMedium { + t.Fatalf("Tier = %q, want %q", got.Tier, config.ModelTierMedium) + } + if got.Model != "profile-medium-model" { + t.Fatalf("Model = %q, want profile-medium-model", got.Model) + } + if got.Effort != "medium" { + t.Fatalf("Effort = %q, want medium", got.Effort) + } + if got.Source != config.ModelMapSourceConfig { + t.Fatalf("Source = %q, want %q", got.Source, config.ModelMapSourceConfig) + } + if got.Override { + t.Fatalf("Override = true, want false") + } +} + +func TestResolveStageModelAppliesEffortOverrideWithoutBypassingTier(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderOpenAI, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterCodexCLI, + }} + + got, err := ResolveStageModel(Request{ + Profile: profile, + Stage: StageSelection, + Tier: config.ModelTierMedium, + EffortOverride: "high", + DefaultEffort: "medium", + }) + if err != nil { + t.Fatalf("ResolveStageModel: %v", err) + } + if got.Model != "gpt-5.4" { + t.Fatalf("Model = %q, want gpt-5.4", got.Model) + } + if got.Effort != "high" { + t.Fatalf("Effort = %q, want high", got.Effort) + } + if got.Source != config.ModelMapSourceBuiltIn { + t.Fatalf("Source = %q, want %q", got.Source, config.ModelMapSourceBuiltIn) + } +} + +func TestResolveStageModelAppliesTierFloor(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderOpenAI, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterCodexCLI, + ModelMap: config.ModelMap{ + "small": "profile-small-model", + "large": "profile-large-model", + }, + }} + + got, err := ResolveStageModel(Request{ + Profile: profile, + Stage: StageReviewer, + Tier: config.ModelTierSmall, + FloorTier: config.ModelTierLarge, + DefaultEffort: "medium", + }) + if err != nil { + t.Fatalf("ResolveStageModel: %v", err) + } + if got.Tier != config.ModelTierLarge { + t.Fatalf("Tier = %q, want %q", got.Tier, config.ModelTierLarge) + } + if got.Model != "profile-large-model" { + t.Fatalf("Model = %q, want profile-large-model", got.Model) + } + if got.Source != config.ModelMapSourceConfig { + t.Fatalf("Source = %q, want %q", got.Source, config.ModelMapSourceConfig) + } +} + +func TestResolveStageModelBypassesTierForExplicitOverride(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderPi, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterPiRPC, + }} + + got, err := ResolveStageModel(Request{ + Profile: profile, + Stage: StageThreadAnalysis, + Tier: config.ModelTierLarge, + ModelOverride: "operator-chosen-model", + DefaultEffort: "low", + }) + if err != nil { + t.Fatalf("ResolveStageModel: %v", err) + } + if got.Model != "operator-chosen-model" { + t.Fatalf("Model = %q, want operator-chosen-model", got.Model) + } + if got.Effort != "low" { + t.Fatalf("Effort = %q, want low", got.Effort) + } + if !got.Override { + t.Fatalf("Override = false, want true") + } + if got.Source != "" { + t.Fatalf("Source = %q, want empty source for explicit override", got.Source) + } +} + +func TestResolveStageModelErrorsForUnmappedTier(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderPi, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterPiRPC, + }} + + _, err := ResolveStageModel(Request{ + Profile: profile, + Stage: StageThreadAnalysis, + Tier: config.ModelTierSmall, + DefaultEffort: "low", + }) + if err == nil { + t.Fatal("ResolveStageModel returned nil error, want unmapped tier error") + } + if !strings.Contains(err.Error(), "thread_analysis") || !strings.Contains(err.Error(), "model_tier") { + t.Fatalf("error = %q, want stage and model_tier context", err) + } +} + +func TestResolveStageModelErrorsForInvalidTierBeforeApplyingFloor(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderOpenAI, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterCodexCLI, + }} + + _, err := ResolveStageModel(Request{ + Profile: profile, + Stage: StageReviewer, + Tier: config.ModelTier("flagship"), + FloorTier: config.ModelTierSmall, + DefaultEffort: "medium", + }) + if err == nil { + t.Fatal("ResolveStageModel returned nil error, want invalid tier error") + } + if !strings.Contains(err.Error(), `model_tier "flagship" is invalid`) { + t.Fatalf("error = %q, want invalid tier context", err) + } +} + +func TestResolveFirstAvailableUsesFirstConfiguredTier(t *testing.T) { + profile := config.Profile{LLM: config.LLMConfig{ + Provider: config.LLMProviderAnthropic, + Auth: config.LLMAuthSubscription, + Adapter: config.LLMAdapterClaudeCLI, + }} + + got, ok := ResolveFirstAvailable(Request{ + Profile: profile, + Stage: StageApprovalOverride, + DefaultEffort: "low", + }, config.ModelTierSmall, config.ModelTierMedium) + if !ok { + t.Fatal("ResolveFirstAvailable ok = false, want medium fallback") + } + if got.Tier != config.ModelTierMedium { + t.Fatalf("Tier = %q, want %q", got.Tier, config.ModelTierMedium) + } + if got.Model != "claude-sonnet-4-6" { + t.Fatalf("Model = %q, want claude-sonnet-4-6", got.Model) + } + if got.Effort != "low" { + t.Fatalf("Effort = %q, want low", got.Effort) + } +} diff --git a/internal/threadanalysis/threadanalysis.go b/internal/threadanalysis/threadanalysis.go new file mode 100644 index 00000000..e079537e --- /dev/null +++ b/internal/threadanalysis/threadanalysis.go @@ -0,0 +1,397 @@ +// Package threadanalysis turns normalized inline thread context into reusable +// lifecycle decisions through the durable LLM execution boundary. +package threadanalysis + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "strings" + "time" + + "github.com/open-cli-collective/codereview-cli/internal/llm" + "github.com/open-cli-collective/codereview-cli/internal/llmrun" + "github.com/open-cli-collective/codereview-cli/internal/stagemodel" + "github.com/open-cli-collective/codereview-cli/internal/threadcontext" +) + +const ( + inputSchemaVersion = 1 + outputSchemaVersion = 1 +) + +// Decision is the reusable lifecycle action selected for one thread. +type Decision string + +// Thread lifecycle decisions. +const ( + DecisionSkip Decision = "skip" + DecisionReplyOnly Decision = "reply_only" + DecisionAcknowledge Decision = "acknowledge" + DecisionClarify Decision = "clarify" + DecisionConcede Decision = "concede" + DecisionSummarize Decision = "summarize" +) + +// Valid reports whether d is a known thread analysis decision. +func (d Decision) Valid() bool { + switch d { + case DecisionSkip, DecisionReplyOnly, DecisionAcknowledge, DecisionClarify, DecisionConcede, DecisionSummarize: + return true + default: + return false + } +} + +// Result is the domain result returned by thread analysis. +type Result struct { + ThreadID string `json:"thread_id"` + Decision Decision `json:"decision"` + ReplyBody string `json:"reply_body,omitempty"` + Summary string `json:"summary,omitempty"` + Resolve bool `json:"resolve"` + Rationale string `json:"rationale,omitempty"` +} + +// Options are explicit call-site supplied dependencies for one analysis run. +type Options struct { + Store llmrun.Store + RunID string + Provider string + Adapter llm.Adapter + Model string + Effort string + LogPath string + ResumeSessionID string + Now func() time.Time + NewStepID func() string +} + +// AnalyzeThread analyzes one normalized inline thread through llmrun. +func AnalyzeThread(ctx context.Context, opts Options, thread threadcontext.Thread) (Result, error) { + var zero Result + if err := validateOptions(opts); err != nil { + return zero, err + } + threadID := strings.TrimSpace(string(thread.ID)) + if threadID == "" { + return zero, fmt.Errorf("threadanalysis: thread ID is required") + } + input := analysisInputForThread(threadID, thread) + inputHash, err := hashInput(input) + if err != nil { + return zero, err + } + prompt, err := promptForInput(input) + if err != nil { + return zero, err + } + result, err := llmrun.RunStructuredStep(ctx, opts.Store, llmrun.Request{ + RunID: opts.RunID, + Stage: stagemodel.StageThreadAnalysis, + ScopeKey: "thread:" + threadID, + InputHash: inputHash, + Provider: opts.Provider, + Adapter: opts.Adapter, + Model: opts.Model, + Effort: opts.Effort, + Prompt: prompt, + LogPath: opts.LogPath, + ResumeSessionID: opts.ResumeSessionID, + Now: opts.Now, + NewStepID: opts.NewStepID, + }, decodeResultForThread(threadID)) + if err != nil { + return zero, err + } + return result.Value, nil +} + +// AnalyzeThreads analyzes threads serially, preserving input order. +func AnalyzeThreads(ctx context.Context, opts Options, threads []threadcontext.Thread) ([]Result, error) { + results := make([]Result, 0, len(threads)) + for _, thread := range threads { + result, err := AnalyzeThread(ctx, opts, thread) + if err != nil { + return nil, fmt.Errorf("threadanalysis: analyze thread %s: %w", string(thread.ID), err) + } + results = append(results, result) + } + return results, nil +} + +func validateOptions(opts Options) error { + if opts.Store == nil { + return fmt.Errorf("threadanalysis: store is required") + } + if strings.TrimSpace(opts.RunID) == "" { + return fmt.Errorf("threadanalysis: run ID is required") + } + if strings.TrimSpace(opts.Provider) == "" { + return fmt.Errorf("threadanalysis: provider is required") + } + if opts.Adapter == nil { + return fmt.Errorf("threadanalysis: adapter is required") + } + if strings.TrimSpace(opts.Model) == "" { + return fmt.Errorf("threadanalysis: model is required") + } + if opts.NewStepID == nil { + return fmt.Errorf("threadanalysis: step ID generator is required") + } + return nil +} + +func decodeResultForThread(threadID string) llm.Decoder[Result] { + return func(data []byte) (Result, error) { + var raw resultOutput + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.DisallowUnknownFields() + if err := decoder.Decode(&raw); err != nil { + return Result{}, fmt.Errorf("threadanalysis: decode result: %w", err) + } + var extra struct{} + if err := decoder.Decode(&extra); err != io.EOF { + return Result{}, fmt.Errorf("threadanalysis: decode result: trailing data is not allowed") + } + if raw.SchemaVersion != outputSchemaVersion { + return Result{}, fmt.Errorf("threadanalysis: schema_version = %d, want %d", raw.SchemaVersion, outputSchemaVersion) + } + result := Result{ + ThreadID: strings.TrimSpace(raw.ThreadID), + Decision: Decision(strings.ToLower(strings.TrimSpace(string(raw.Decision)))), + ReplyBody: sanitize(raw.ReplyBody), + Summary: sanitize(raw.Summary), + Resolve: raw.Resolve, + Rationale: sanitize(raw.Rationale), + } + if result.ThreadID != threadID { + return Result{}, fmt.Errorf("threadanalysis: thread_id = %q, want %q", result.ThreadID, threadID) + } + if !result.Decision.Valid() { + return Result{}, fmt.Errorf("threadanalysis: decision %q is invalid", result.Decision) + } + if err := validateResult(result); err != nil { + return Result{}, err + } + return result, nil + } +} + +func validateResult(result Result) error { + if result.Decision != DecisionSkip && strings.TrimSpace(result.Rationale) == "" { + return fmt.Errorf("threadanalysis: rationale is required for decision %q", result.Decision) + } + switch result.Decision { + case DecisionSkip: + if result.ReplyBody != "" { + return fmt.Errorf("threadanalysis: reply_body is not allowed for decision %q", result.Decision) + } + if result.Summary != "" { + return fmt.Errorf("threadanalysis: summary is not allowed for decision %q", result.Decision) + } + if result.Resolve { + return fmt.Errorf("threadanalysis: resolve must be false for decision %q", result.Decision) + } + case DecisionReplyOnly: + if result.ReplyBody == "" { + return fmt.Errorf("threadanalysis: reply_body is required for decision %q", result.Decision) + } + if result.Summary != "" { + return fmt.Errorf("threadanalysis: summary is not allowed for decision %q", result.Decision) + } + if result.Resolve { + return fmt.Errorf("threadanalysis: resolve must be false for decision %q", result.Decision) + } + case DecisionAcknowledge, DecisionConcede: + if result.ReplyBody == "" { + return fmt.Errorf("threadanalysis: reply_body is required for decision %q", result.Decision) + } + if result.Resolve && result.Summary == "" { + return fmt.Errorf("threadanalysis: summary is required when resolve is true") + } + case DecisionClarify: + if result.ReplyBody == "" { + return fmt.Errorf("threadanalysis: reply_body is required for decision %q", result.Decision) + } + if result.Summary != "" { + return fmt.Errorf("threadanalysis: summary is not allowed for decision %q", result.Decision) + } + if result.Resolve { + return fmt.Errorf("threadanalysis: resolve must be false for decision %q", result.Decision) + } + case DecisionSummarize: + if result.ReplyBody != "" { + return fmt.Errorf("threadanalysis: reply_body is not allowed for decision %q", result.Decision) + } + if result.Summary == "" { + return fmt.Errorf("threadanalysis: summary is required for decision %q", result.Decision) + } + if !result.Resolve { + return fmt.Errorf("threadanalysis: resolve must be true for decision %q", result.Decision) + } + } + return nil +} + +func analysisInputForThread(threadID string, thread threadcontext.Thread) analysisInput { + comments := make([]analysisComment, 0, len(thread.Comments)) + for _, comment := range thread.Comments { + comments = append(comments, analysisComment{ + ID: sanitize(string(comment.ID)), + ThreadID: sanitize(string(comment.ThreadID)), + Body: sanitize(comment.Body), + Author: analysisIdentityFor(comment.Author.Login, comment.Author.ID, comment.Author.DisplayName), + Anchor: analysisAnchorFor(comment.Anchor), + URL: sanitize(comment.URL), + CreatedAt: formatTime(comment.CreatedAt), + UpdatedAt: formatTime(comment.UpdatedAt), + AuthoredByPostingIdentity: comment.AuthoredByPostingIdentity, + HasFindingMarker: comment.HasFindingMarker, + HasThreadReplyMarker: comment.HasThreadReplyMarker, + HasThreadSummaryMarker: comment.HasThreadSummaryMarker, + }) + } + return analysisInput{ + SchemaVersion: inputSchemaVersion, + ThreadID: sanitize(threadID), + Resolved: thread.Resolved, + Anchor: analysisAnchorFor(thread.Anchor), + Status: analysisStatus{ + CRAuthoredFinding: thread.Status.CRAuthoredFinding, + HasCRSummary: thread.Status.HasCRSummary, + LatestCRCommentID: commentID(thread.Status.LatestCRComment), + LatestHumanReplyAfterCR: commentID(thread.Status.LatestHumanReplyAfterCR), + PendingHumanReply: thread.Status.PendingHumanReply, + }, + Comments: comments, + } +} + +func promptForInput(input analysisInput) (string, error) { + contextJSON, err := json.MarshalIndent(input, "", " ") + if err != nil { + return "", fmt.Errorf("threadanalysis: encode prompt context: %w", err) + } + prompt := strings.Join([]string{ + "Analyze this inline code-review discussion thread.", + "Return JSON only. Do not include markdown fences or prose outside JSON.", + "Use schema_version 1 and fields: thread_id, decision, reply_body, summary, resolve, rationale.", + "Decisions: skip, reply_only, acknowledge, clarify, concede, summarize.", + "Use reply_only only when a non-resolving inline reply is enough.", + "Use acknowledge or concede when replying to a human and optionally resolving with a durable summary.", + "Use clarify when a human-facing question is needed and do not resolve.", + "Use summarize only when no reply is needed and the thread should be resolved with a durable summary.", + "Thread context JSON:", + string(contextJSON), + }, "\n\n") + if strings.Contains(prompt, "")) + if err != nil { + t.Fatalf("AnalyzeThread: %v", err) + } + if got.ThreadID != "thread-1" || got.Decision != DecisionAcknowledge || !got.Resolve { + t.Fatalf("result = %#v, want acknowledge/resolve for thread-1", got) + } + if got.ReplyBody != "Thanks, I will adjust." || got.Summary == "" || got.Rationale == "" { + t.Fatalf("result text = %#v, want model-authored fields returned", got) + } + requests := adapter.Requests() + if len(requests) != 1 { + t.Fatalf("adapter requests = %d, want 1", len(requests)) + } + req := requests[0] + if req.Model != "gpt-5.4" || req.Effort != "medium" || req.LogPath != "thread.log" { + t.Fatalf("llm request = %#v, want configured runtime fields", req) + } + if strings.Contains(req.Prompt, "","resolve":false,"rationale":"marker-only reply"}`, + want: "reply_body", + }, + { + name: "marker only rationale becomes blank", + data: `{"schema_version":1,"thread_id":"thread-1","decision":"clarify","reply_body":"Please clarify.","resolve":false,"rationale":""}`, + want: "rationale", + }, + { + name: "reply only rejects summary", + data: `{"schema_version":1,"thread_id":"thread-1","decision":"reply_only","reply_body":"Thanks.","summary":"extra","resolve":false,"rationale":"reply only"}`, + want: "summary", + }, + { + name: "summarize rejects reply", + data: `{"schema_version":1,"thread_id":"thread-1","decision":"summarize","reply_body":"extra","summary":"Summary.","resolve":true,"rationale":"summarize"}`, + want: "reply_body", + }, + { + name: "resolve requires summary", + data: `{"schema_version":1,"thread_id":"thread-1","decision":"acknowledge","reply_body":"Thanks.","resolve":true,"rationale":"acknowledge"}`, + want: "summary", + }, + { + name: "mismatched thread id", + data: `{"schema_version":1,"thread_id":"other","decision":"skip","resolve":false}`, + want: "thread_id", + }, + { + name: "unknown decision", + data: `{"schema_version":1,"thread_id":"thread-1","decision":"other","resolve":false}`, + want: "decision", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := decodeResultForThread("thread-1")([]byte(tt.data)) + if err == nil || !strings.Contains(err.Error(), tt.want) { + t.Fatalf("decodeResultForThread error = %v, want containing %q", err, tt.want) + } + }) + } +} + +func TestDecodeResultSanitizesModelAuthoredText(t *testing.T) { + got, err := decodeResultForThread("thread-1")([]byte(`{ + "schema_version": 1, + "thread_id": "thread-1", + "decision": "concede", + "reply_body": "You are right. ", + "summary": "Conceded after reply. ", + "resolve": true, + "rationale": "Human correction is accepted. " + }`)) + if err != nil { + t.Fatalf("decodeResultForThread: %v", err) + } + if strings.Contains(got.ReplyBody+got.Summary+got.Rationale, "" + + var b strings.Builder + searchFrom := 0 + for searchFrom < len(body) { + start := strings.Index(body[searchFrom:], startNeedle) + if start == -1 { + b.WriteString(body[searchFrom:]) + break + } + start += searchFrom + b.WriteString(body[searchFrom:start]) + payloadStart := start + len(startNeedle) + end := strings.Index(body[payloadStart:], endNeedle) + nextStart := strings.Index(body[payloadStart:], startNeedle) + if nextStart != -1 && (end == -1 || nextStart < end) { + searchFrom = payloadStart + nextStart + continue + } + if end == -1 { + b.WriteString("<!-- codereview:") + searchFrom = payloadStart + continue + } + searchFrom = payloadStart + end + len(endNeedle) + } + return strings.TrimSpace(marker.SanitizeModelContent(b.String())) +} + +func normalizeComments(thread gitprovider.InlineThread, posting gitprovider.Identity, threadAnchor Anchor) []Comment { + comments := make([]Comment, 0, len(thread.Comments)) + for i, raw := range thread.Comments { + authoredByPosting := sameIdentity(raw.Author, posting) + hasFindingMarker, hasThreadReplyMarker := false, false + if authoredByPosting { + for _, found := range marker.FindActions(raw.Body) { + switch found.Kind { + case marker.ActionKindInlineComment: + hasFindingMarker = true + case marker.ActionKindThreadReply: + hasThreadReplyMarker = true + } + } + } + hasThreadSummaryMarker := authoredByPosting && len(marker.FindThreadSummaries(raw.Body)) > 0 + commentAnchor := Anchor{ + Path: raw.Path, + Side: raw.Side, + Line: raw.Line, + SubjectType: raw.SubjectType, + CommitSHA: raw.CommitSHA, + } + comments = append(comments, Comment{ + ID: raw.ID, + ThreadID: commentThreadID(raw.ThreadID, thread.ID), + Body: SanitizeBody(raw.Body), + Author: raw.Author, + Anchor: firstNonZeroAnchor(commentAnchor, threadAnchor), + URL: raw.URL, + CreatedAt: raw.CreatedAt, + UpdatedAt: raw.UpdatedAt, + AuthoredByPostingIdentity: authoredByPosting, + HasFindingMarker: hasFindingMarker, + HasThreadReplyMarker: hasThreadReplyMarker, + HasThreadSummaryMarker: hasThreadSummaryMarker, + providerOrder: i, + }) + } + sort.SliceStable(comments, func(i, j int) bool { + return commentLess(comments[i], comments[j]) + }) + return comments +} + +func statusForComments(comments []Comment) Status { + status := Status{} + latestCRIndex := -1 + for i := range comments { + comment := &comments[i] + if comment.AuthoredByPostingIdentity && (comment.HasFindingMarker || comment.HasThreadReplyMarker) { + status.CRAuthoredFinding = true + } + if comment.AuthoredByPostingIdentity && comment.HasThreadSummaryMarker { + status.HasCRSummary = true + } + if comment.AuthoredByPostingIdentity && (comment.HasFindingMarker || comment.HasThreadReplyMarker || comment.HasThreadSummaryMarker) { + status.LatestCRComment = comment + latestCRIndex = i + } + } + if latestCRIndex == -1 { + return status + } + for i := latestCRIndex + 1; i < len(comments); i++ { + comment := &comments[i] + if !comment.AuthoredByPostingIdentity { + status.LatestHumanReplyAfterCR = comment + status.PendingHumanReply = true + } + } + return status +} + +func firstNonZeroAnchor(primary, fallback Anchor) Anchor { + if !anchorPresent(primary) { + return fallback + } + if strings.TrimSpace(primary.Path) == "" { + primary.Path = fallback.Path + } + if primary.SubjectType == review.AnchorKindFile { + if strings.TrimSpace(primary.CommitSHA) == "" { + primary.CommitSHA = fallback.CommitSHA + } + return primary + } + if primary.Side == "" { + primary.Side = fallback.Side + } + if primary.Line == 0 { + primary.Line = fallback.Line + } + if primary.SubjectType == "" { + primary.SubjectType = fallback.SubjectType + } + if strings.TrimSpace(primary.CommitSHA) == "" { + primary.CommitSHA = fallback.CommitSHA + } + return primary +} + +func anchorPresent(anchor Anchor) bool { + return strings.TrimSpace(anchor.Path) != "" || + anchor.Side != "" || + anchor.Line != 0 || + anchor.SubjectType != "" || + strings.TrimSpace(anchor.CommitSHA) != "" +} + +func commentLess(left, right Comment) bool { + leftTime, rightTime := effectiveTime(left), effectiveTime(right) + if !leftTime.Equal(rightTime) { + return leftTime.Before(rightTime) + } + if left.providerOrder != right.providerOrder { + return left.providerOrder < right.providerOrder + } + return left.ID < right.ID +} + +func threadLess(left, right Thread) bool { + if left.Anchor.Path != right.Anchor.Path { + return left.Anchor.Path < right.Anchor.Path + } + if left.Anchor.Line != right.Anchor.Line { + return left.Anchor.Line < right.Anchor.Line + } + return left.ID < right.ID +} + +func summaryLess(left, right ThreadSummary) bool { + if left.Anchor.Line != right.Anchor.Line { + return left.Anchor.Line < right.Anchor.Line + } + return left.ThreadID < right.ThreadID +} + +func effectiveTime(comment Comment) time.Time { + if !comment.CreatedAt.IsZero() { + return comment.CreatedAt + } + return comment.UpdatedAt +} + +func commentThreadID(commentID gitprovider.ThreadID, threadID gitprovider.ThreadID) gitprovider.ThreadID { + if strings.TrimSpace(string(commentID)) != "" { + return commentID + } + return threadID +} + +func sameIdentity(author gitprovider.Identity, target gitprovider.Identity) bool { + if strings.TrimSpace(author.ID) != "" && strings.TrimSpace(target.ID) != "" { + return author.ID == target.ID + } + return strings.TrimSpace(author.Login) != "" && author.Login == target.Login +} diff --git a/internal/threadcontext/threadcontext_test.go b/internal/threadcontext/threadcontext_test.go new file mode 100644 index 00000000..a663e1a0 --- /dev/null +++ b/internal/threadcontext/threadcontext_test.go @@ -0,0 +1,422 @@ +package threadcontext + +import ( + "reflect" + "strings" + "testing" + "time" + + "github.com/open-cli-collective/codereview-cli/internal/gitprovider" + "github.com/open-cli-collective/codereview-cli/internal/marker" + "github.com/open-cli-collective/codereview-cli/internal/review" +) + +func TestNormalizeRejectsEmptyPostingIdentity(t *testing.T) { + _, err := Normalize(nil, Options{}) + if err == nil { + t.Fatal("Normalize error = nil, want empty posting identity error") + } + _, err = Normalize(nil, Options{PostingIdentity: gitprovider.Identity{DisplayName: "Review Bot"}}) + if err == nil { + t.Fatal("Normalize display-name-only identity error = nil, want empty posting identity error") + } +} + +func TestNormalizeSortsCommentsDeterministically(t *testing.T) { + threads, err := Normalize([]gitprovider.InlineThread{{ + ID: "thread-1", + Path: "main.go", + Comments: []gitprovider.ThreadComment{ + comment("c-late", human(), "late", at(3)), + comment("c-zero-b", human(), "zero b", time.Time{}), + comment("c-early", human(), "early", at(1)), + comment("c-zero-a", human(), "zero a", time.Time{}), + }, + }}, Options{PostingIdentity: bot()}) + if err != nil { + t.Fatalf("Normalize: %v", err) + } + + got := commentIDs(threads[0].Comments) + want := []gitprovider.CommentID{"c-zero-b", "c-zero-a", "c-early", "c-late"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("comment order = %#v, want %#v", got, want) + } +} + +func TestNormalizeDetectsCRAuthoredFindingThread(t *testing.T) { + body := "finding body\n\n" + actionMarker(t, marker.ActionKindInlineComment) + threads, err := Normalize([]gitprovider.InlineThread{{ + ID: "thread-1", + Path: "main.go", + Resolved: false, + Comments: []gitprovider.ThreadComment{ + comment("c-1", bot(), body, at(1)), + }, + }}, Options{PostingIdentity: bot()}) + if err != nil { + t.Fatalf("Normalize: %v", err) + } + + status := threads[0].Status + if !status.CRAuthoredFinding { + t.Fatalf("CRAuthoredFinding = false, want true") + } + if status.LatestCRComment == nil || status.LatestCRComment.ID != "c-1" { + t.Fatalf("LatestCRComment = %#v, want c-1", status.LatestCRComment) + } + if threads[0].Comments[0].Body != "finding body" { + t.Fatalf("sanitized body = %q, want finding body", threads[0].Comments[0].Body) + } + if !threads[0].Comments[0].HasFindingMarker { + t.Fatalf("HasFindingMarker = false, want true") + } +} + +func TestNormalizeIdentityMatchingUsesIDBeforeLogin(t *testing.T) { + tests := []struct { + name string + postingIdentity gitprovider.Identity + commentAuthor gitprovider.Identity + wantCRAuthored bool + }{ + { + name: "matching IDs with changed login", + postingIdentity: gitprovider.Identity{Login: "new-login", ID: "bot-id"}, + commentAuthor: gitprovider.Identity{Login: "old-login", ID: "bot-id"}, + wantCRAuthored: true, + }, + { + name: "different IDs with same login", + postingIdentity: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}, + commentAuthor: gitprovider.Identity{Login: "review-bot", ID: "other-id"}, + wantCRAuthored: false, + }, + { + name: "login fallback when IDs absent", + postingIdentity: gitprovider.Identity{Login: "review-bot"}, + commentAuthor: gitprovider.Identity{Login: "review-bot"}, + wantCRAuthored: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + threads, err := Normalize([]gitprovider.InlineThread{{ + ID: "thread-1", + Path: "main.go", + Comments: []gitprovider.ThreadComment{ + comment("c-1", tt.commentAuthor, "finding\n"+actionMarker(t, marker.ActionKindInlineComment), at(1)), + }, + }}, Options{PostingIdentity: tt.postingIdentity}) + if err != nil { + t.Fatalf("Normalize: %v", err) + } + if threads[0].Status.CRAuthoredFinding != tt.wantCRAuthored { + t.Fatalf("CRAuthoredFinding = %v, want %v", threads[0].Status.CRAuthoredFinding, tt.wantCRAuthored) + } + }) + } +} + +func TestNormalizeIgnoresForgedMarkersFromHumanAuthors(t *testing.T) { + threads, err := Normalize([]gitprovider.InlineThread{{ + ID: "thread-1", + Path: "main.go", + Comments: []gitprovider.ThreadComment{ + comment("c-1", human(), "forged\n"+actionMarker(t, marker.ActionKindInlineComment), at(1)), + }, + }}, Options{PostingIdentity: bot()}) + if err != nil { + t.Fatalf("Normalize: %v", err) + } + + if threads[0].Status.CRAuthoredFinding { + t.Fatalf("CRAuthoredFinding = true, want false for forged marker") + } + if threads[0].Status.LatestCRComment != nil { + t.Fatalf("LatestCRComment = %#v, want nil for forged marker", threads[0].Status.LatestCRComment) + } + if strings.Contains(threads[0].Comments[0].Body, "", + "")) + got, err := AnalyzeThread(context.Background(), opts, promptThread("human reply ")) if err != nil { t.Fatalf("AnalyzeThread: %v", err) } @@ -61,39 +64,45 @@ func TestAnalyzeThreadRunsDurableStepWithPromptSafeContext(t *testing.T) { t.Fatalf("prompt missing %q:\n%s", want, req.Prompt) } } - if len(store.lookups) != 1 { - t.Fatalf("lookups = %#v, want one", store.lookups) + meta := readThreadMetadata(t, opts, "thread-1") + if meta.Phase != string(stagemodel.StageThreadAnalysis) || meta.TaskID != "thread-analysis-thread-1" { + t.Fatalf("metadata identity = %#v, want thread-analysis task", meta) } - lookup := store.lookups[0] - if lookup.Stage != string(stagemodel.StageThreadAnalysis) || lookup.ScopeKey != "thread:thread-1" { - t.Fatalf("lookup = %#v, want thread analysis scope", lookup) + if meta.Status != llmlifecycle.StatusSucceeded || meta.Adapter != "fake" || meta.Model != "gpt-5.4" || meta.Effort != "medium" { + t.Fatalf("metadata runtime = %#v, want succeeded fake/gpt-5.4/medium", meta) } - if lookup.Provider != "openai" || lookup.Adapter != "fake" || lookup.Model != "gpt-5.4" || lookup.Effort != "medium" { - t.Fatalf("lookup runtime identity = %#v", lookup) + if meta.InputFingerprint == "" || meta.ValidatedOutputPath == "" { + t.Fatalf("metadata = %#v, want fingerprint and output path", meta) } - if lookup.InputHash == "" || lookup.PromptHash == "" { - t.Fatalf("lookup hashes = %#v, want nonblank hashes", lookup) - } - if len(store.inserted) != 1 || store.inserted[0].Status != ledger.LLMStepStatusCompleted { - t.Fatalf("inserted = %#v, want completed step", store.inserted) - } - if strings.Contains(store.inserted[0].StructuredOutputJSON, "Here is the JSON") { - t.Fatalf("persisted output = %q, want accepted JSON only", store.inserted[0].StructuredOutputJSON) + if len(store.inserted) != 1 || store.inserted[0].SessionRowID != meta.SessionRowID { + t.Fatalf("inserted sessions = %#v, want one matching metadata session", store.inserted) } + assertFileOmits(t, meta.ValidatedOutputPath, "Here is the JSON") } func TestAnalyzeThreadCacheHitSkipsAdapter(t *testing.T) { - store := &fakeStore{completed: completedStep(`{ - "schema_version": 1, - "thread_id": "thread-1", - "decision": "summarize", - "summary": "Resolved summary.", - "resolve": true, - "rationale": "Already resolved." - }`)} + store := newFakeStore() + seedAdapter := &llm.FakeAdapter{NameValue: "fake"} + seedAdapter.Queue(llm.FakeResult{ + SessionID: "provider-session", + Response: llm.Response{StructuredOutput: []byte(`{ + "schema_version": 1, + "thread_id": "thread-1", + "decision": "summarize", + "summary": "Resolved summary.", + "resolve": true, + "rationale": "Already resolved." + }`)}, + }) + opts := testOptions(t, store, seedAdapter) + if _, err := AnalyzeThread(context.Background(), opts, promptThread("reply")); err != nil { + t.Fatalf("AnalyzeThread seed: %v", err) + } + adapter := &llm.FakeAdapter{NameValue: "fake"} + opts.Adapter = adapter - got, err := AnalyzeThread(context.Background(), testOptions(store, adapter), promptThread("reply")) + got, err := AnalyzeThread(context.Background(), opts, promptThread("reply")) if err != nil { t.Fatalf("AnalyzeThread: %v", err) } @@ -103,46 +112,39 @@ func TestAnalyzeThreadCacheHitSkipsAdapter(t *testing.T) { if len(adapter.Requests()) != 0 { t.Fatalf("adapter requests = %#v, want cache hit to skip adapter", adapter.Requests()) } - if len(store.inserted) != 0 { - t.Fatalf("inserted = %#v, want none on cache hit", store.inserted) + if len(store.inserted) != 1 { + t.Fatalf("inserted sessions = %#v, want no new session on cache hit", store.inserted) } } -func TestAnalyzeThreadInputHashChangesWhenLifecycleContextChanges(t *testing.T) { - store := &fakeStore{findErr: ledger.ErrNotFound} +func TestAnalyzeThreadRejectsStaleLifecycleContextBeforeProviderCall(t *testing.T) { + store := newFakeStore() adapter := &llm.FakeAdapter{NameValue: "fake"} adapter.Queue(llm.FakeResult{SessionID: "session-1", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-1"))}}) - adapter.Queue(llm.FakeResult{SessionID: "session-2", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-1"))}}) - opts := testOptions(store, adapter) - ids := []string{"step-1", "step-2"} - opts.NewStepID = func() string { - id := ids[0] - ids = ids[1:] - return id - } + opts := testOptions(t, store, adapter) if _, err := AnalyzeThread(context.Background(), opts, promptThread("first reply")); err != nil { t.Fatalf("AnalyzeThread first: %v", err) } - if _, err := AnalyzeThread(context.Background(), opts, promptThread("changed reply")); err != nil { - t.Fatalf("AnalyzeThread second: %v", err) + staleAdapter := &llm.FakeAdapter{NameValue: "fake"} + opts.Adapter = staleAdapter + _, err := AnalyzeThread(context.Background(), opts, promptThread("changed reply")) + if err == nil || !strings.Contains(err.Error(), "input fingerprint changed") { + t.Fatalf("AnalyzeThread stale context error = %v, want fingerprint error", err) } - if len(store.lookups) != 2 { - t.Fatalf("lookups = %#v, want two", store.lookups) - } - if store.lookups[0].InputHash == store.lookups[1].InputHash { - t.Fatalf("input hashes both %q, want lifecycle context change to invalidate cache", store.lookups[0].InputHash) + if len(staleAdapter.Requests()) != 0 { + t.Fatalf("stale adapter requests = %#v, want no provider call", staleAdapter.Requests()) } } func TestAnalyzeThreadsPreservesOrderAndFailsFast(t *testing.T) { - store := &fakeStore{findErr: ledger.ErrNotFound} + store := newFakeStore() adapter := &llm.FakeAdapter{NameValue: "fake"} adapter.Queue(llm.FakeResult{SessionID: "session-1", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-1"))}}) adapter.Queue(llm.FakeResult{SessionID: "session-2", Response: llm.Response{StructuredOutput: []byte(`{"schema_version":1,"thread_id":"thread-2","decision":"bogus","resolve":false}`)}}) adapter.Queue(llm.FakeResult{SessionID: "session-2-retry", Response: llm.Response{StructuredOutput: []byte(`{"schema_version":1,"thread_id":"thread-2","decision":"bogus","resolve":false}`)}}) adapter.Queue(llm.FakeResult{SessionID: "session-3", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-3"))}}) - opts := testOptions(store, adapter) + opts := testOptions(t, store, adapter) ids := []string{"step-1", "step-2"} opts.NewStepID = func() string { id := ids[0] @@ -161,20 +163,17 @@ func TestAnalyzeThreadsPreservesOrderAndFailsFast(t *testing.T) { if results != nil { t.Fatalf("results = %#v, want nil on fail-fast error", results) } - if len(store.lookups) != 2 { - t.Fatalf("lookups = %#v, want fail fast before third thread", store.lookups) - } - if store.lookups[0].ScopeKey != "thread:thread-1" || store.lookups[1].ScopeKey != "thread:thread-2" { - t.Fatalf("lookup order = %#v, want input order", store.lookups) + if len(adapter.Requests()) != 3 { + t.Fatalf("adapter requests = %d, want thread-1 plus thread-2 initial/retry before fail-fast", len(adapter.Requests())) } } func TestAnalyzeThreadsReturnsResultsInInputOrder(t *testing.T) { - store := &fakeStore{findErr: ledger.ErrNotFound} + store := newFakeStore() adapter := &llm.FakeAdapter{NameValue: "fake"} adapter.Queue(llm.FakeResult{SessionID: "session-2", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-2"))}}) adapter.Queue(llm.FakeResult{SessionID: "session-1", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-1"))}}) - opts := testOptions(store, adapter) + opts := testOptions(t, store, adapter) ids := []string{"step-1", "step-2"} opts.NewStepID = func() string { id := ids[0] @@ -269,7 +268,7 @@ func TestDecodeResultSanitizesModelAuthoredText(t *testing.T) { func TestAnalyzeThreadRejectsMissingRequiredOptionsBeforeProviderCall(t *testing.T) { adapter := &llm.FakeAdapter{NameValue: "fake"} - opts := testOptions(&fakeStore{}, adapter) + opts := testOptions(t, newFakeStore(), adapter) opts.NewStepID = nil _, err := AnalyzeThread(context.Background(), opts, promptThread("reply")) @@ -284,7 +283,7 @@ func TestAnalyzeThreadRejectsMissingRequiredOptionsBeforeProviderCall(t *testing func TestProductionImportsStayDomainOnly(t *testing.T) { allowedInternal := map[string]bool{ "github.com/open-cli-collective/codereview-cli/internal/llm": true, - "github.com/open-cli-collective/codereview-cli/internal/llmrun": true, + "github.com/open-cli-collective/codereview-cli/internal/llmlifecycle": true, "github.com/open-cli-collective/codereview-cli/internal/stagemodel": true, "github.com/open-cli-collective/codereview-cli/internal/threadcontext": true, } @@ -325,7 +324,7 @@ func TestProductionImportsStayDomainOnly(t *testing.T) { for _, fragment := range rejectedFragments { if strings.Contains(importPath, fragment) { pos := fset.Position(spec.Pos()) - t.Fatalf("%s imports %q; threadanalysis production code must stay on the domain/llmrun boundary", pos, importPath) + t.Fatalf("%s imports %q; threadanalysis production code must stay on the domain/lifecycle boundary", pos, importPath) } } pos := fset.Position(spec.Pos()) @@ -338,17 +337,18 @@ func TestProductionImportsStayDomainOnly(t *testing.T) { } } -func testOptions(store llmrun.Store, adapter llm.Adapter) Options { +func testOptions(t *testing.T, store llmlifecycle.Store, adapter llm.Adapter) Options { + t.Helper() return Options{ - Store: store, - RunID: "run-1", - Provider: "openai", - Adapter: adapter, - Model: "gpt-5.4", - Effort: "medium", - LogPath: "thread.log", - Now: fixedClock(), - NewStepID: func() string { return "step-1" }, + Store: store, + RunID: "run-1", + Adapter: adapter, + Model: "gpt-5.4", + Effort: "medium", + LogPath: "thread.log", + LifecyclePaths: llmlifecycle.Paths{LLMTasksDir: filepath.Join(t.TempDir(), "llm-tasks")}, + Now: fixedClock(), + NewStepID: sequence("step"), } } @@ -401,49 +401,68 @@ func validSkipOutput(threadID string) string { return `{"schema_version":1,"thread_id":"` + threadID + `","decision":"skip","resolve":false}` } -func completedStep(output string) ledger.LLMStep { - return ledger.LLMStep{ - StepID: "step-cached", - RunID: "run-1", - Stage: string(stagemodel.StageThreadAnalysis), - ScopeKey: "thread:thread-1", - InputHash: "input-hash", - PromptHash: "prompt-hash", - Provider: "openai", - Adapter: "fake", - Model: "gpt-5.4", - Effort: "medium", - ProviderSessionID: "provider-session", - Status: ledger.LLMStepStatusCompleted, - StructuredOutputJSON: output, - StartedAt: testNow, - } -} - type fakeStore struct { - completed ledger.LLMStep - findErr error insertErr error - lookups []ledger.LLMStepLookup - inserted []ledger.LLMStep + getErr error + sessions map[string]ledger.Session + inserted []ledger.Session } -func (s *fakeStore) FindCompletedLLMStep(_ context.Context, lookup ledger.LLMStepLookup) (ledger.LLMStep, error) { - s.lookups = append(s.lookups, lookup) - if s.findErr != nil { - return ledger.LLMStep{}, s.findErr - } - return s.completed, nil +func newFakeStore() *fakeStore { + return &fakeStore{sessions: map[string]ledger.Session{}} } -func (s *fakeStore) InsertLLMStep(_ context.Context, step ledger.LLMStep) error { +func (s *fakeStore) InsertSession(_ context.Context, session ledger.Session) error { if s.insertErr != nil { return s.insertErr } - s.inserted = append(s.inserted, step) + s.sessions[session.SessionRowID] = session + s.inserted = append(s.inserted, session) return nil } +func (s *fakeStore) GetSession(_ context.Context, rowID string) (ledger.Session, error) { + if s.getErr != nil { + return ledger.Session{}, s.getErr + } + session, ok := s.sessions[rowID] + if !ok { + return ledger.Session{}, ledger.ErrNotFound + } + return session, nil +} + +func readThreadMetadata(t *testing.T, opts Options, threadID string) llmlifecycle.Metadata { + t.Helper() + meta, ok, err := llmlifecycle.ReadMetadata(opts.LifecyclePaths, "thread-analysis-"+threadID) + if err != nil { + t.Fatalf("ReadMetadata: %v", err) + } + if !ok { + t.Fatalf("thread metadata for %s missing", threadID) + } + return meta +} + +func assertFileOmits(t *testing.T, path, unwanted string) { + t.Helper() + data, err := os.ReadFile(path) // #nosec G304 -- test helper reads artifact path generated under t.TempDir. + if err != nil { + t.Fatalf("ReadFile(%s): %v", path, err) + } + if strings.Contains(string(data), unwanted) { + t.Fatalf("%s = %q, want it to omit %q", path, string(data), unwanted) + } +} + +func sequence(prefix string) func() string { + next := 0 + return func() string { + next++ + return fmt.Sprintf("%s-%d", prefix, next) + } +} + var testNow = time.Date(2026, 6, 22, 12, 0, 0, 0, time.UTC) func fixedClock() func() time.Time { diff --git a/internal/threadrespond/threadrespond.go b/internal/threadrespond/threadrespond.go index 31a48c52..199108a9 100644 --- a/internal/threadrespond/threadrespond.go +++ b/internal/threadrespond/threadrespond.go @@ -16,7 +16,7 @@ import ( "github.com/open-cli-collective/codereview-cli/internal/gitprovider" "github.com/open-cli-collective/codereview-cli/internal/ledger" "github.com/open-cli-collective/codereview-cli/internal/llm" - "github.com/open-cli-collective/codereview-cli/internal/llmrun" + "github.com/open-cli-collective/codereview-cli/internal/llmlifecycle" "github.com/open-cli-collective/codereview-cli/internal/modelprefs" "github.com/open-cli-collective/codereview-cli/internal/outbox" "github.com/open-cli-collective/codereview-cli/internal/pipeline" @@ -41,7 +41,7 @@ const ( // Store is the durable state required by response planning and posting. type Store interface { - llmrun.Store + llmlifecycle.Store GetRun(context.Context, string) (ledger.Run, error) AllocateRun(context.Context, ledger.AllocateRunParams) (ledger.Run, error) InsertPlannedAction(context.Context, ledger.PlannedAction) error @@ -171,7 +171,7 @@ func fresh(ctx context.Context, opts Options, req Request) (res Result, err erro if err != nil { return result, err } - analyses, err := analyzeThreads(ctx, opts, req, run, artifacts, runtime, eligible) + analyses, err := analyzeThreads(ctx, opts, run, artifacts, runtime, eligible) if err != nil { return result, err } @@ -394,19 +394,19 @@ func abortIfMoved(ctx context.Context, opts Options, req Request, run ledger.Run return true, fmt.Sprintf("threadrespond premises moved: head %s -> %s, base %s -> %s", run.SHA, pr.Head.SHA, run.BaseSHA, pr.Base.SHA), nil } -func analyzeThreads(ctx context.Context, opts Options, req Request, run ledger.Run, artifacts pipeline.ArtifactPaths, runtime llmRuntime, threads []threadcontext.Thread) ([]threadanalysis.Result, error) { +func analyzeThreads(ctx context.Context, opts Options, run ledger.Run, artifacts pipeline.ArtifactPaths, runtime llmRuntime, threads []threadcontext.Thread) ([]threadanalysis.Result, error) { results := make([]threadanalysis.Result, 0, len(threads)) for _, thread := range threads { result, err := threadanalysis.AnalyzeThread(ctx, threadanalysis.Options{ - Store: opts.Store, - RunID: run.RunID, - Provider: string(req.Profile.LLM.Provider), - Adapter: opts.Adapter, - Model: runtime.model, - Effort: runtime.effort, - LogPath: threadLogPath(artifacts, thread.ID), - Now: opts.now, - NewStepID: opts.newStepID, + Store: opts.Store, + RunID: run.RunID, + Adapter: opts.Adapter, + Model: runtime.model, + Effort: runtime.effort, + LogPath: threadLogPath(artifacts, thread.ID), + LifecyclePaths: llmlifecycle.Paths{LLMTasksDir: artifacts.LLMTasksDir}, + Now: opts.now, + NewStepID: opts.newStepID, }, thread) if err != nil { return nil, err From c63e75bee31acf1a3ebc94b616d1e55348a1969d Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 09:02:54 -0400 Subject: [PATCH 06/32] fix: resume thread response attempts --- docs/llm-task-artifacts.md | 12 +- internal/cmd/reviewcmd/reviewcmd.go | 6 + internal/cmd/reviewcmd/reviewcmd_test.go | 22 +- internal/llmlifecycle/lifecycle_test.go | 39 +++ internal/threadrespond/threadrespond.go | 182 +++++++++++--- internal/threadrespond/threadrespond_test.go | 235 +++++++++++++++++++ 6 files changed, 454 insertions(+), 42 deletions(-) diff --git a/docs/llm-task-artifacts.md b/docs/llm-task-artifacts.md index 9e7fa23d..0907c983 100644 --- a/docs/llm-task-artifacts.md +++ b/docs/llm-task-artifacts.md @@ -137,9 +137,15 @@ thread and return a reusable decision, reply body, summary, resolve flag, and rationale. For `cr respond`, these tasks are run-owned. Successful analyses persist normal -ledger-backed sessions and are reused on retry. If the normalized thread input -changes under the same task directory, the lifecycle runner fails closed with -rerun guidance instead of overwriting the prior task. +ledger-backed sessions and are reused on retry. A normal `cr respond` invocation +resumes the latest incomplete response run for the same PR head, base, profile, +posting identity, and post mode. If analysis completed but planning or posting +was interrupted, rerun loads the persisted thread-analysis task instead of +calling the LLM again; if planned actions already exist, rerun continues through +the ledger/outbox post phase instead of replanning. Use `cr respond --rerun` to +start a fresh response attempt and leave the incomplete attempt untouched. If +the normalized thread input changes under the same task directory, the lifecycle +runner fails closed with rerun guidance instead of overwriting the prior task. ## Approval Override Task diff --git a/internal/cmd/reviewcmd/reviewcmd.go b/internal/cmd/reviewcmd/reviewcmd.go index 242cb7ca..e37b232a 100644 --- a/internal/cmd/reviewcmd/reviewcmd.go +++ b/internal/cmd/reviewcmd/reviewcmd.go @@ -135,6 +135,7 @@ type commandFlags struct { type respondFlags struct { dryRun bool noPost bool + rerun bool retryPosts string jsonOutput bool noResolveThreads bool @@ -205,6 +206,7 @@ func newRespondCommand(opts *root.Options, factory RuntimeFactory) *cobra.Comman } cmd.Flags().BoolVar(&flags.dryRun, "dry-run", false, "Plan thread responses without posting") cmd.Flags().BoolVar(&flags.noPost, "no-post", false, "Alias for --dry-run") + cmd.Flags().BoolVar(&flags.rerun, "rerun", false, "Bypass incomplete response-run resume and start a fresh response attempt") cmd.Flags().StringVar(&flags.retryPosts, "retry-posts", "", "Retry missing or failed required posts for the given response run ID") cmd.Flags().BoolVar(&flags.jsonOutput, "json", false, "Emit JSON") cmd.Flags().BoolVar(&flags.noResolveThreads, "no-resolve-threads", false, "Do not plan thread-resolution actions") @@ -429,6 +431,9 @@ func runRespond(ctx context.Context, cmd *cobra.Command, opts *root.Options, fac if retryRunID != "" && flags.dryRun { return exitcode.Usage(fmt.Errorf("--retry-posts cannot be used with --dry-run or --no-post")) } + if retryRunID != "" && flags.rerun { + return exitcode.Usage(fmt.Errorf("--retry-posts cannot be used with --rerun")) + } path, err := configPath(opts) if err != nil { return exitcode.AuthConfig(err) @@ -475,6 +480,7 @@ func runRespond(ctx context.Context, cmd *cobra.Command, opts *root.Options, fac PostingIdentity: runtime.PostingIdentity, DryRun: flags.dryRun, NoResolveThreads: noResolve, + Rerun: flags.rerun, RetryRunID: retryRunID, }) if err != nil { diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index ff057943..9c823c83 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -128,7 +128,7 @@ func TestRespondDryRunCallsResponderAndRendersText(t *testing.T) { if req.PRRef.Number != 29 || req.ProfileName != "home" || req.PostingIdentity.Login != "review-bot" { t.Fatalf("respond request identity/ref = %#v", req) } - if !req.DryRun || !req.NoResolveThreads { + if !req.DryRun || !req.NoResolveThreads || req.Rerun { t.Fatalf("respond request flags = %#v, want dry-run no-resolve", req) } if req.RetryRunID != "" { @@ -143,6 +143,26 @@ func TestRespondDryRunCallsResponderAndRendersText(t *testing.T) { } } +func TestRespondRerunFlagCallsResponder(t *testing.T) { + runner := &fakeRunner{respondResult: testThreadRespondResult(ledger.OutcomeDryRun)} + cmd, _ := newTestCommand(t, testConfig(), fakeFactory(runner)) + + err := root.Execute(cmd, []string{ + "respond", "https://github.com/open-cli-collective/codereview-cli/pull/29", + "--dry-run", + "--rerun", + }) + if err != nil { + t.Fatalf("Execute respond rerun: %v", err) + } + if len(runner.respondRequests) != 1 { + t.Fatalf("respond calls = %d, want 1", len(runner.respondRequests)) + } + if !runner.respondRequests[0].Rerun || !runner.respondRequests[0].DryRun { + t.Fatalf("respond request = %#v, want rerun dry-run", runner.respondRequests[0]) + } +} + func TestRespondRetryPostsFlagCallsResponder(t *testing.T) { result := testThreadRespondResult(ledger.OutcomeComment) result.Plan = reviewplan.Plan{} diff --git a/internal/llmlifecycle/lifecycle_test.go b/internal/llmlifecycle/lifecycle_test.go index 64ce8b6c..e3e80279 100644 --- a/internal/llmlifecycle/lifecycle_test.go +++ b/internal/llmlifecycle/lifecycle_test.go @@ -241,6 +241,45 @@ func TestRunStructuredRerunsFailedBlockingTaskWithStoredProviderSession(t *testi } } +func TestRunStructuredRerunsFailedBlockingTaskWithLatestAttemptSession(t *testing.T) { + ctx := context.Background() + store := newLifecycleStore() + adapter := &llm.FakeAdapter{NameValue: "fake-llm", SupportsResumeValue: true} + adapter.Queue(llm.FakeResult{ + SessionID: "provider-session-3", + Response: llm.Response{StructuredOutput: []byte(`{"ok":true}`)}, + }) + req := lifecycleRequest(t, store, adapter) + failedMeta := BaseMetadata(req, SessionDraft{ + RowID: "failed-row", + Adapter: "fake-llm", + Model: req.Model, + Effort: req.Effort, + }) + failedMeta.Status = StatusFailedBlocking + failedMeta.Error = "previous failure" + failedMeta.ProviderSessionID = "" + failedMeta.Attempts = []AttemptMetadata{ + {Attempt: "initial", ProviderSessionID: "provider-session-1", DecodeError: "invalid"}, + {Attempt: "retry", ProviderSessionID: "provider-session-2", DecodeError: "still invalid"}, + } + if err := WriteMetadata(req.Paths, failedMeta); err != nil { + t.Fatalf("WriteMetadata: %v", err) + } + + got, err := RunStructured(ctx, req, decodeLifecyclePayload) + if err != nil { + t.Fatalf("RunStructured retry: %v", err) + } + if !got.Value.OK { + t.Fatalf("retry value = %#v, want ok", got.Value) + } + resumes := adapter.Resumes() + if len(resumes) != 1 || resumes[0].SessionID != "provider-session-2" { + t.Fatalf("resumes = %#v, want resume from latest failed attempt session", resumes) + } +} + func TestRunStructuredLoadsIsolatedFailureWithoutRerun(t *testing.T) { ctx := context.Background() store := newLifecycleStore() diff --git a/internal/threadrespond/threadrespond.go b/internal/threadrespond/threadrespond.go index 199108a9..936f7094 100644 --- a/internal/threadrespond/threadrespond.go +++ b/internal/threadrespond/threadrespond.go @@ -4,8 +4,10 @@ package threadrespond import ( "context" + "encoding/json" "errors" "fmt" + "os" "path/filepath" "strings" "time" @@ -37,12 +39,15 @@ const ( freshRunIDAttempts = 3 lockPRAttempts = 3 + + responseRunMarkerSchema = 1 ) // Store is the durable state required by response planning and posting. type Store interface { llmlifecycle.Store GetRun(context.Context, string) (ledger.Run, error) + ListRunsForHeadScope(context.Context, ledger.ListRunsForHeadScopeParams) ([]ledger.Run, error) AllocateRun(context.Context, ledger.AllocateRunParams) (ledger.Run, error) InsertPlannedAction(context.Context, ledger.PlannedAction) error InsertPlannedActions(context.Context, []ledger.PlannedAction) error @@ -86,6 +91,7 @@ type Request struct { PostingIdentity gitprovider.Identity DryRun bool NoResolveThreads bool + Rerun bool RetryRunID string } @@ -143,12 +149,8 @@ func fresh(ctx context.Context, opts Options, req Request) (res Result, err erro } eligible := eligibleThreads(threads) - runID := opts.newRunID() - artifacts, err := pipeline.ArtifactPathsForRun(opts.Layout, req.PRRef, pr, req.ProfileName, postingKey(req.PostingIdentity), runID) - if err != nil { - return Result{PR: pr, PRKey: prKey, Threads: threads, EligibleThreads: eligible, ExitCode: exitFailed}, err - } - run, err := allocateRun(ctx, opts, req, pr, prKey, runID, artifacts.Dir, postMode(req)) + mode := postMode(req) + run, artifacts, err := allocateOrResumeRun(ctx, opts, req, pr, prKey, mode) if err != nil { return Result{PR: pr, PRKey: prKey, Artifacts: artifacts, Threads: threads, EligibleThreads: eligible, ExitCode: exitFailed}, err } @@ -163,45 +165,56 @@ func fresh(ctx context.Context, opts Options, req Request) (res Result, err erro } defer completeFailedOnError(ctx, opts, &result, &err) - if len(eligible) > 0 { - if opts.Adapter == nil { - return result, fmt.Errorf("threadrespond: adapter is required") - } - runtime, err := resolveRuntime(opts, req) - if err != nil { + existingActions, err := opts.Store.ListPlannedActions(ctx, run.RunID) + if err != nil { + return result, err + } + if len(existingActions) > 0 { + if err := validateResponseActions(existingActions); err != nil { return result, err } - analyses, err := analyzeThreads(ctx, opts, run, artifacts, runtime, eligible) - if err != nil { - return result, err + result.PlannedActions = existingActions + } else { + if len(eligible) > 0 { + if opts.Adapter == nil { + return result, fmt.Errorf("threadrespond: adapter is required") + } + runtime, err := resolveRuntime(opts, req) + if err != nil { + return result, err + } + analyses, err := analyzeThreads(ctx, opts, run, artifacts, runtime, eligible) + if err != nil { + return result, err + } + result.Analyses = analyses + result.Responses = responsesFromAnalyses(analyses) } - result.Analyses = analyses - result.Responses = responsesFromAnalyses(analyses) - } - plan, err := reviewplan.BuildThreadResponses(reviewplan.ThreadResponseRequest{ - PostMode: planPostMode(req), - ProviderCaps: effectiveCaps(opts.Provider.Capabilities(), req), - Responses: result.Responses, - Now: opts.now, - NewActionID: opts.newActionID, - }) - if err != nil { - return result, err - } - result.Plan = plan - plannedActions := make([]ledger.PlannedAction, 0, len(plan.Actions)) - for _, action := range plan.Actions { - planned, err := plannedactions.FromReviewPlan(run.RunID, action) + plan, err := reviewplan.BuildThreadResponses(reviewplan.ThreadResponseRequest{ + PostMode: planPostMode(req), + ProviderCaps: effectiveCaps(opts.Provider.Capabilities(), req), + Responses: result.Responses, + Now: opts.now, + NewActionID: opts.newActionID, + }) if err != nil { return result, err } - plannedActions = append(plannedActions, planned) - } - if err := opts.Store.InsertPlannedActions(ctx, plannedActions); err != nil { - return result, err + result.Plan = plan + plannedActions := make([]ledger.PlannedAction, 0, len(plan.Actions)) + for _, action := range plan.Actions { + planned, err := plannedactions.FromReviewPlan(run.RunID, action) + if err != nil { + return result, err + } + plannedActions = append(plannedActions, planned) + } + if err := opts.Store.InsertPlannedActions(ctx, plannedActions); err != nil { + return result, err + } + result.PlannedActions = plannedActions } - result.PlannedActions = plannedActions if req.DryRun { if err := opts.Store.CompleteRun(ctx, run.RunID, ledger.OutcomeDryRun, opts.now()); err != nil { @@ -253,7 +266,7 @@ func fresh(ctx context.Context, opts Options, req Request) (res Result, err erro Run: run, PRRef: req.PRRef, PostingIdentity: req.PostingIdentity, - DesiredOutcome: ledger.OutcomeComment, + DesiredOutcome: desiredOutcomeForRetry(result.PlannedActions), }) result.Outbox = postResult result.ExitCode = postResult.ExitCode @@ -494,6 +507,99 @@ func combineReplyAndSummary(reply, summary string) string { return reply + "\n\nSummary:\n" + summary } +func allocateOrResumeRun(ctx context.Context, opts Options, req Request, pr gitprovider.PR, prKey string, mode ledger.PostMode) (ledger.Run, pipeline.ArtifactPaths, error) { + if !req.Rerun { + run, ok, err := findIncompleteRun(ctx, opts.Store, req, prKey, pr, mode) + if err != nil { + return ledger.Run{}, pipeline.ArtifactPaths{}, err + } + if ok { + return run, pipeline.ArtifactPathsFromDir(run.ArtifactPath), nil + } + } + + var lastErr error + for attempt := 0; attempt < freshRunIDAttempts; attempt++ { + runID := opts.newRunID() + artifacts, err := pipeline.ArtifactPathsForRun(opts.Layout, req.PRRef, pr, req.ProfileName, postingKey(req.PostingIdentity), runID) + if err != nil { + return ledger.Run{}, artifacts, err + } + run, err := allocateRun(ctx, opts, req, pr, prKey, runID, artifacts.Dir, mode) + if err == nil { + if err := writeResponseRunMarker(artifacts.Dir, run.RunID); err != nil { + return ledger.Run{}, artifacts, err + } + return run, artifacts, nil + } + if !errors.Is(err, ledger.ErrRunExists) { + return ledger.Run{}, artifacts, err + } + lastErr = err + } + return ledger.Run{}, pipeline.ArtifactPaths{}, fmt.Errorf("threadrespond: allocate fresh run ID after %d attempts: %w", freshRunIDAttempts, lastErr) +} + +func findIncompleteRun(ctx context.Context, store Store, req Request, prKey string, pr gitprovider.PR, mode ledger.PostMode) (ledger.Run, bool, error) { + runs, err := store.ListRunsForHeadScope(ctx, ledger.ListRunsForHeadScopeParams{ + PRKey: prKey, + SHA: pr.Head.SHA, + Profile: req.ProfileName, + PostingIdentity: postingKey(req.PostingIdentity), + }) + if err != nil { + return ledger.Run{}, false, err + } + var best ledger.Run + found := false + for _, run := range runs { + if run.BaseSHA != pr.Base.SHA || run.PostMode != mode { + continue + } + if run.Outcome != nil && *run.Outcome != ledger.OutcomeIncomplete { + continue + } + if strings.TrimSpace(run.ArtifactPath) == "" { + continue + } + if !hasResponseRunMarker(run.ArtifactPath) { + continue + } + if !found || run.Attempt > best.Attempt || (run.Attempt == best.Attempt && run.StartedAt.After(best.StartedAt)) { + best = run + found = true + } + } + return best, found, nil +} + +type responseRunMarker struct { + SchemaVersion int `json:"schema_version"` + Kind string `json:"kind"` + RunID string `json:"run_id"` +} + +func writeResponseRunMarker(artifactPath, runID string) error { + data, err := json.MarshalIndent(responseRunMarker{ + SchemaVersion: responseRunMarkerSchema, + Kind: "thread_response", + RunID: runID, + }, "", " ") + if err != nil { + return err + } + return llmlifecycle.WriteFileAtomic(responseRunMarkerPath(artifactPath), append(data, '\n')) +} + +func hasResponseRunMarker(artifactPath string) bool { + info, err := os.Stat(responseRunMarkerPath(artifactPath)) + return err == nil && !info.IsDir() +} + +func responseRunMarkerPath(artifactPath string) string { + return filepath.Join(artifactPath, "thread-response-run.json") +} + func allocateRun(ctx context.Context, opts Options, req Request, pr gitprovider.PR, prKey, runID, artifactPath string, mode ledger.PostMode) (ledger.Run, error) { return opts.Store.AllocateRun(ctx, ledger.AllocateRunParams{ PRKey: prKey, diff --git a/internal/threadrespond/threadrespond_test.go b/internal/threadrespond/threadrespond_test.go index ef756897..61ed58a1 100644 --- a/internal/threadrespond/threadrespond_test.go +++ b/internal/threadrespond/threadrespond_test.go @@ -3,6 +3,7 @@ package threadrespond import ( "context" "encoding/json" + "errors" "fmt" "reflect" "strings" @@ -199,6 +200,236 @@ func TestRunNoMatchingThreadsCompletesWithoutLLM(t *testing.T) { } } +func TestRunResumesIncompleteAnalysisWithoutRecomputing(t *testing.T) { + ctx := context.Background() + fixture := newFixture(t) + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionClarify, "Please clarify the intended behavior.", "", false)) + setInlineThreads(t, fixture, []gitprovider.InlineThread{ + markedThread(t, "thread-1", "main.go", 10, false, fixture.bot, fixture.human), + }) + firstOpts := fixture.options() + firstOpts.NewActionID = func(reviewplan.ActionKind) (string, error) { + return "", context.Canceled + } + + first, err := Run(ctx, firstOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Run interrupted error = %v, want context.Canceled", err) + } + if len(fixture.adapter.Requests()) != 1 { + t.Fatalf("first adapter requests = %d, want one analysis call", len(fixture.adapter.Requests())) + } + storedFirst, getErr := fixture.store.GetRun(ctx, first.Run.RunID) + if getErr != nil { + t.Fatalf("GetRun first: %v", getErr) + } + if storedFirst.Outcome != nil { + t.Fatalf("first run outcome = %v, want incomplete after cancellation", storedFirst.Outcome) + } + if actions, listErr := fixture.store.ListPlannedActions(ctx, first.Run.RunID); listErr != nil || len(actions) != 0 { + t.Fatalf("first planned actions = %#v err=%v, want none before planning", actions, listErr) + } + + fixture.adapter = &llm.FakeAdapter{NameValue: "fake-llm"} + secondOpts := fixture.options() + secondOpts.NewRunID = sequence("fresh") + second, err := Run(ctx, secondOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if err != nil { + t.Fatalf("Run resumed: %v", err) + } + if second.Run.RunID != first.Run.RunID { + t.Fatalf("resumed run = %q, want original %q", second.Run.RunID, first.Run.RunID) + } + if second.Artifacts.Dir != first.Artifacts.Dir { + t.Fatalf("resumed artifact dir = %q, want %q", second.Artifacts.Dir, first.Artifacts.Dir) + } + if len(fixture.adapter.Requests()) != 0 { + t.Fatalf("resumed adapter requests = %d, want cached analysis", len(fixture.adapter.Requests())) + } + if len(second.PlannedActions) != 1 { + t.Fatalf("resumed planned actions = %d, want one reply", len(second.PlannedActions)) + } + if _, err := fixture.store.GetRun(ctx, "fresh-1"); !errors.Is(err, ledger.ErrNotFound) { + t.Fatalf("GetRun fresh-1 error = %v, want ErrNotFound", err) + } + storedSecond, err := fixture.store.GetRun(ctx, first.Run.RunID) + if err != nil { + t.Fatalf("GetRun second: %v", err) + } + if storedSecond.Outcome == nil || *storedSecond.Outcome != ledger.OutcomeDryRun { + t.Fatalf("resumed outcome = %v, want dry_run", storedSecond.Outcome) + } +} + +func TestRunResumesExistingActionsWithoutReplanning(t *testing.T) { + ctx := context.Background() + fixture := newFixture(t) + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionSummarize, "", "Summary for future context.", true)) + setInlineThreads(t, fixture, []gitprovider.InlineThread{ + markedThread(t, "thread-1", "main.go", 10, false, fixture.bot, fixture.human), + }) + firstOpts := fixture.options() + firstOpts.Limiter = cancelLimiter{} + + first, err := Run(ctx, firstOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + }) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Run interrupted post error = %v, want context.Canceled", err) + } + if len(fixture.adapter.Requests()) != 1 { + t.Fatalf("first adapter requests = %d, want one analysis call", len(fixture.adapter.Requests())) + } + storedFirst, getErr := fixture.store.GetRun(ctx, first.Run.RunID) + if getErr != nil { + t.Fatalf("GetRun first: %v", getErr) + } + if storedFirst.Outcome != nil { + t.Fatalf("first run outcome = %v, want incomplete after post cancellation", storedFirst.Outcome) + } + if len(first.PlannedActions) != 2 { + t.Fatalf("first planned actions = %d, want reply and resolve", len(first.PlannedActions)) + } + + fixture.adapter = &llm.FakeAdapter{NameValue: "fake-llm"} + secondOpts := fixture.options() + secondOpts.NewRunID = sequence("fresh") + second, err := Run(ctx, secondOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + }) + if err != nil { + t.Fatalf("Run resumed post: %v", err) + } + if second.Run.RunID != first.Run.RunID { + t.Fatalf("resumed run = %q, want original %q", second.Run.RunID, first.Run.RunID) + } + if len(fixture.adapter.Requests()) != 0 { + t.Fatalf("resumed adapter requests = %d, want no replanning LLM call", len(fixture.adapter.Requests())) + } + if _, err := fixture.store.GetRun(ctx, "fresh-1"); !errors.Is(err, ledger.ErrNotFound) { + t.Fatalf("GetRun fresh-1 error = %v, want ErrNotFound", err) + } + if second.Outbox.Outcome != ledger.OutcomeComment || second.Outbox.Posted != 2 { + t.Fatalf("resumed outbox = %#v, want comment with two posted actions", second.Outbox) + } + if replies := fixture.provider.RecordedThreadReplies(fixture.ref); len(replies) != 1 || replies[0].ThreadID != "thread-1" { + t.Fatalf("thread replies = %#v, want resumed thread reply", replies) + } + if resolved := fixture.provider.RecordedResolvedThreads(fixture.ref); !reflect.DeepEqual(resolved, []gitprovider.ThreadID{"thread-1"}) { + t.Fatalf("resolved threads = %#v, want thread-1", resolved) + } +} + +func TestRunRerunBypassesIncompleteResponseRun(t *testing.T) { + ctx := context.Background() + fixture := newFixture(t) + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionClarify, "Please clarify.", "", false)) + setInlineThreads(t, fixture, []gitprovider.InlineThread{ + markedThread(t, "thread-1", "main.go", 10, false, fixture.bot, fixture.human), + }) + firstOpts := fixture.options() + firstOpts.NewActionID = func(reviewplan.ActionKind) (string, error) { + return "", context.Canceled + } + first, err := Run(ctx, firstOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Run interrupted error = %v, want context.Canceled", err) + } + + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionReplyOnly, "Fresh reply.", "", false)) + secondOpts := fixture.options() + secondOpts.NewStepID = sequence("rerun-step") + second, err := Run(ctx, secondOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + Rerun: true, + }) + if err != nil { + t.Fatalf("Run rerun: %v", err) + } + if second.Run.RunID == first.Run.RunID { + t.Fatalf("rerun reused %q, want fresh run", second.Run.RunID) + } + storedFirst, err := fixture.store.GetRun(ctx, first.Run.RunID) + if err != nil { + t.Fatalf("GetRun first: %v", err) + } + if storedFirst.Outcome != nil { + t.Fatalf("bypassed run outcome = %v, want still incomplete", storedFirst.Outcome) + } + if len(fixture.adapter.Requests()) != 2 { + t.Fatalf("adapter requests = %d, want original plus rerun analysis", len(fixture.adapter.Requests())) + } +} + +func TestRunDoesNotResumeIncompleteUnmarkedRun(t *testing.T) { + ctx := context.Background() + fixture := newFixture(t) + unmarked := fixture.allocateRun(t, "unmarked-review-run", ledger.PostModeDryRun) + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionReplyOnly, "Fresh reply.", "", false)) + setInlineThreads(t, fixture, []gitprovider.InlineThread{ + markedThread(t, "thread-1", "main.go", 10, false, fixture.bot, fixture.human), + }) + + result, err := Run(ctx, fixture.options(), Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if err != nil { + t.Fatalf("Run: %v", err) + } + if result.Run.RunID == unmarked.RunID { + t.Fatalf("result run = %q, want fresh response run instead of unmarked run", result.Run.RunID) + } + if len(fixture.adapter.Requests()) != 1 { + t.Fatalf("adapter requests = %d, want fresh analysis for response run", len(fixture.adapter.Requests())) + } + storedUnmarked, err := fixture.store.GetRun(ctx, unmarked.RunID) + if err != nil { + t.Fatalf("GetRun unmarked: %v", err) + } + if storedUnmarked.Outcome != nil { + t.Fatalf("unmarked run outcome = %v, want untouched incomplete", storedUnmarked.Outcome) + } +} + func TestRetryPostsExistingActionsWithoutLLM(t *testing.T) { ctx := context.Background() fixture := newFixture(t) @@ -526,6 +757,10 @@ type noopLimiter struct{} func (noopLimiter) Wait(context.Context, string) error { return nil } +type cancelLimiter struct{} + +func (cancelLimiter) Wait(context.Context, string) error { return context.Canceled } + type noopLock struct{} func (noopLock) Release() error { return nil } From cd78dd8c9dbcfba43a070141f650e3a1f9a2e133 Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 09:12:38 -0400 Subject: [PATCH 07/32] test: cover thread response resume boundaries --- internal/cmd/reviewcmd/reviewcmd_test.go | 154 +++++++++++++++++++++++ internal/llmlifecycle/lifecycle_test.go | 25 ++++ 2 files changed, 179 insertions(+) diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index 9c823c83..eb9d20ef 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -32,6 +32,7 @@ import ( githubprovider "github.com/open-cli-collective/codereview-cli/internal/gitprovider/github" "github.com/open-cli-collective/codereview-cli/internal/ledger" "github.com/open-cli-collective/codereview-cli/internal/llm" + "github.com/open-cli-collective/codereview-cli/internal/marker" "github.com/open-cli-collective/codereview-cli/internal/outbox" "github.com/open-cli-collective/codereview-cli/internal/pipeline" "github.com/open-cli-collective/codereview-cli/internal/progress" @@ -190,6 +191,98 @@ func TestRespondRetryPostsFlagCallsResponder(t *testing.T) { } } +func TestRespondRealRunnerResumesInterruptedRunThroughCLI(t *testing.T) { + ctx := context.Background() + cfg := testConfig() + ref, pr := reviewCommandPR(t) + provider := &gitprovider.Fake{} + provider.SetCapabilities(gitprovider.ProviderCaps{ThreadResolution: true}) + if err := provider.SetPR(ref, pr); err != nil { + t.Fatalf("SetPR: %v", err) + } + if err := provider.SetInlineThreads(ref, []gitprovider.InlineThread{ + reviewCommandMarkedThread(t, pr, "thread-1", "main.go", 2), + }); err != nil { + t.Fatalf("SetInlineThreads: %v", err) + } + store, err := ledger.Open(ctx, filepath.Join(t.TempDir(), "ledger.db")) + if err != nil { + t.Fatalf("ledger.Open: %v", err) + } + t.Cleanup(func() { + if err := store.Close(); err != nil { + t.Fatalf("store.Close: %v", err) + } + }) + layout := statepaths.NewLayout(t.TempDir(), t.TempDir()) + adapter := &llm.FakeAdapter{NameValue: "fake-llm"} + adapter.Queue(reviewCommandThreadAnalysisResult("thread-1", "thread-session-1")) + + factory := func(limiter outbox.Limiter) RuntimeFactory { + return func(_ *cobra.Command, opts *root.Options, _ config.File, profile config.Profile, runtimeOpts RuntimeOptions) (Runtime, error) { + runner := buildReviewRunner( + store, + provider, + adapter, + profile, + limiter, + layout, + opts.Stderr, + nil, + runtimeOptsWithWorkbench(t, runtimeOpts), + ) + return Runtime{Responder: runner, PostingIdentity: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}}, nil + } + } + + firstCmd, _ := newTestCommand(t, cfg, factory(cancelLimiter{})) + firstErr := root.Execute(firstCmd, []string{"respond", pr.URL}) + if !errors.Is(firstErr, context.Canceled) { + t.Fatalf("first Execute error = %v, want context.Canceled", firstErr) + } + if len(adapter.Requests()) != 1 { + t.Fatalf("first adapter requests = %d, want one thread-analysis call", len(adapter.Requests())) + } + runs, err := store.ListRuns(ctx) + if err != nil { + t.Fatalf("ListRuns first: %v", err) + } + if len(runs) != 1 || runs[0].Outcome != nil { + t.Fatalf("first runs = %#v, want one incomplete response run", runs) + } + actions, err := store.ListPlannedActions(ctx, runs[0].RunID) + if err != nil { + t.Fatalf("ListPlannedActions first: %v", err) + } + if len(actions) != 2 { + t.Fatalf("first planned actions = %d, want reply and resolve", len(actions)) + } + + secondCmd, out := newTestCommand(t, cfg, factory(noopLimiter{})) + if err := root.Execute(secondCmd, []string{"respond", pr.URL}); err != nil { + t.Fatalf("second Execute: %v", err) + } + if len(adapter.Requests()) != 1 { + t.Fatalf("second adapter requests = %d, want persisted analysis reused", len(adapter.Requests())) + } + runs, err = store.ListRuns(ctx) + if err != nil { + t.Fatalf("ListRuns second: %v", err) + } + if len(runs) != 1 || runs[0].RunID == "" || runs[0].Outcome == nil || *runs[0].Outcome != ledger.OutcomeComment { + t.Fatalf("second runs = %#v, want same run completed as comment", runs) + } + if replies := provider.RecordedThreadReplies(ref); len(replies) != 1 || replies[0].ThreadID != "thread-1" { + t.Fatalf("thread replies = %#v, want resumed reply post", replies) + } + if resolved := provider.RecordedResolvedThreads(ref); len(resolved) != 1 || resolved[0] != "thread-1" { + t.Fatalf("resolved threads = %#v, want resumed resolve post", resolved) + } + if text := out.String(); !strings.Contains(text, "responded 1, resolved 1") { + t.Fatalf("stdout = %q, want resumed response summary", text) + } +} + func TestRespondJSONRendersStableShape(t *testing.T) { runner := &fakeRunner{respondResult: testThreadRespondResult(ledger.OutcomeComment)} cmd, out := newTestCommand(t, testConfig(), fakeFactory(runner)) @@ -2536,6 +2629,10 @@ type noopLimiter struct{} func (noopLimiter) Wait(context.Context, string) error { return nil } +type cancelLimiter struct{} + +func (cancelLimiter) Wait(context.Context, string) error { return context.Canceled } + func (r *fakeRunner) DryRun(_ context.Context, req pipeline.Request) (pipeline.Result, error) { r.requests = append(r.requests, req) if r.err != nil { @@ -2881,6 +2978,63 @@ func reviewCommandPR(t *testing.T) (gitprovider.PRRef, gitprovider.PR) { return fixture.ref, fixture.pr } +func reviewCommandMarkedThread(t *testing.T, pr gitprovider.PR, id, path string, line int) gitprovider.InlineThread { + t.Helper() + action, err := marker.RenderAction(marker.ActionMarker{ + RunID: "old-run", + ActionID: "old-action", + Kind: marker.ActionKindInlineComment, + SHA: pr.Head.SHA, + BaseSHA: pr.Base.SHA, + }) + if err != nil { + t.Fatalf("RenderAction: %v", err) + } + threadID := gitprovider.ThreadID(id) + created := time.Date(2026, 6, 1, 12, 0, 0, 0, time.UTC) + return gitprovider.InlineThread{ + ID: threadID, + Path: path, + Side: review.DiffSideRight, + Line: line, + SubjectType: review.AnchorKindLine, + CommitSHA: pr.Head.SHA, + Comments: []gitprovider.ThreadComment{ + { + ID: gitprovider.CommentID(id + "-cr"), + ThreadID: threadID, + Body: action + "\nOriginal finding.", + Author: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}, + CommitSHA: pr.Head.SHA, + Path: path, + Side: review.DiffSideRight, + Line: line, + SubjectType: review.AnchorKindLine, + CreatedAt: created, + UpdatedAt: created, + }, + { + ID: gitprovider.CommentID(id + "-human"), + ThreadID: threadID, + Body: "Human confirmed this should be summarized.", + Author: gitprovider.Identity{Login: "human", ID: "human-id"}, + CommitSHA: pr.Head.SHA, + Path: path, + Side: review.DiffSideRight, + Line: line, + SubjectType: review.AnchorKindLine, + CreatedAt: created.Add(time.Minute), + UpdatedAt: created.Add(time.Minute), + }, + }, + } +} + +func reviewCommandThreadAnalysisResult(threadID, sessionID string) llm.FakeResult { + structured := fmt.Sprintf(`{"schema_version":1,"thread_id":%q,"decision":"summarize","summary":"Summary for future context.","resolve":true,"rationale":"human reply resolved the discussion"}`, threadID) + return fakeReviewLLMResult(sessionID, structured) +} + func reviewCommandFixtureKey(ref gitprovider.PRRef) string { return fmt.Sprintf("%s/%s/%s/%d", ref.Host, ref.Owner, ref.Repo, ref.Number) } diff --git a/internal/llmlifecycle/lifecycle_test.go b/internal/llmlifecycle/lifecycle_test.go index e3e80279..d1c9e0ab 100644 --- a/internal/llmlifecycle/lifecycle_test.go +++ b/internal/llmlifecycle/lifecycle_test.go @@ -104,6 +104,31 @@ func TestRunStructuredFailsClosedWhenCachedSessionIsMissing(t *testing.T) { } } +func TestRunStructuredRejectsCachedSessionForDifferentRun(t *testing.T) { + ctx := context.Background() + store := newLifecycleStore() + adapter := &llm.FakeAdapter{NameValue: "fake-llm"} + adapter.Queue(llm.FakeResult{ + SessionID: "provider-session-1", + Response: llm.Response{StructuredOutput: []byte(`{"ok":true}`)}, + }) + req := lifecycleRequest(t, store, adapter) + if _, err := RunStructured(ctx, req, decodeLifecyclePayload); err != nil { + t.Fatalf("RunStructured seed: %v", err) + } + + cachedAdapter := &llm.FakeAdapter{NameValue: "fake-llm"} + req.Adapter = cachedAdapter + req.RunID = "other-run" + _, err := RunStructured(ctx, req, decodeLifecyclePayload) + if err == nil || !strings.Contains(err.Error(), "belongs to run") { + t.Fatalf("RunStructured mismatched run error = %v, want session run mismatch", err) + } + if len(cachedAdapter.Requests()) != 0 { + t.Fatalf("adapter requests = %d, want no provider call for mismatched cache", len(cachedAdapter.Requests())) + } +} + func TestRunStructuredIgnoresPayloadWithoutMetadata(t *testing.T) { ctx := context.Background() store := newLifecycleStore() From b82231436a422db887474ac10d32207939b331e3 Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 09:19:27 -0400 Subject: [PATCH 08/32] test: assert resume persistence artifacts --- internal/llmlifecycle/lifecycle_test.go | 24 ++++++++++++++++++++ internal/pipeline/pipeline_test.go | 3 +++ internal/threadrespond/threadrespond_test.go | 3 +++ 3 files changed, 30 insertions(+) diff --git a/internal/llmlifecycle/lifecycle_test.go b/internal/llmlifecycle/lifecycle_test.go index d1c9e0ab..04b44b4e 100644 --- a/internal/llmlifecycle/lifecycle_test.go +++ b/internal/llmlifecycle/lifecycle_test.go @@ -264,6 +264,18 @@ func TestRunStructuredRerunsFailedBlockingTaskWithStoredProviderSession(t *testi if len(resumes) != 1 || resumes[0].SessionID != "provider-session-1" { t.Fatalf("resumes = %#v, want resume from failed metadata session", resumes) } + session := store.session(t, "session-row-1") + if session.RunID != req.RunID || session.ProviderSessionID != "provider-session-2" { + t.Fatalf("persisted session = %#v, want resumed success session for run", session) + } + meta, ok, err := ReadMetadata(req.Paths, req.TaskID) + if err != nil || !ok { + t.Fatalf("ReadMetadata = %#v ok=%t err=%v, want metadata", meta, ok, err) + } + if meta.Status != StatusSucceeded || meta.SessionRowID != "session-row-1" || meta.ProviderSessionID != "provider-session-2" { + t.Fatalf("metadata after resume = %#v, want succeeded resumed session", meta) + } + assertFileContains(t, meta.ValidatedOutputPath, `"ok":true`) } func TestRunStructuredRerunsFailedBlockingTaskWithLatestAttemptSession(t *testing.T) { @@ -303,6 +315,18 @@ func TestRunStructuredRerunsFailedBlockingTaskWithLatestAttemptSession(t *testin if len(resumes) != 1 || resumes[0].SessionID != "provider-session-2" { t.Fatalf("resumes = %#v, want resume from latest failed attempt session", resumes) } + session := store.session(t, "session-row-1") + if session.RunID != req.RunID || session.ProviderSessionID != "provider-session-3" { + t.Fatalf("persisted session = %#v, want resumed success session for run", session) + } + meta, ok, err := ReadMetadata(req.Paths, req.TaskID) + if err != nil || !ok { + t.Fatalf("ReadMetadata = %#v ok=%t err=%v, want metadata", meta, ok, err) + } + if meta.Status != StatusSucceeded || meta.SessionRowID != "session-row-1" || meta.ProviderSessionID != "provider-session-3" { + t.Fatalf("metadata after attempt resume = %#v, want succeeded resumed session", meta) + } + assertFileContains(t, meta.ValidatedOutputPath, `"ok":true`) } func TestRunStructuredLoadsIsolatedFailureWithoutRerun(t *testing.T) { diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go index 724172a1..81ff573c 100644 --- a/internal/pipeline/pipeline_test.go +++ b/internal/pipeline/pipeline_test.go @@ -278,6 +278,9 @@ func TestDryRunRerunBypassesIncompleteRunAttempt(t *testing.T) { if result.Run.RunID != "run-fresh" { t.Fatalf("result run = %q, want fresh run", result.Run.RunID) } + if result.Artifacts.Dir == resume.ArtifactPath { + t.Fatalf("result artifacts dir = %q, want fresh artifact root", result.Artifacts.Dir) + } storedResume, err := store.GetRun(ctx, resume.RunID) if err != nil { t.Fatalf("GetRun resume: %v", err) diff --git a/internal/threadrespond/threadrespond_test.go b/internal/threadrespond/threadrespond_test.go index 61ed58a1..893ec846 100644 --- a/internal/threadrespond/threadrespond_test.go +++ b/internal/threadrespond/threadrespond_test.go @@ -383,6 +383,9 @@ func TestRunRerunBypassesIncompleteResponseRun(t *testing.T) { if second.Run.RunID == first.Run.RunID { t.Fatalf("rerun reused %q, want fresh run", second.Run.RunID) } + if second.Artifacts.Dir == first.Artifacts.Dir { + t.Fatalf("rerun artifacts dir = %q, want fresh artifact root", second.Artifacts.Dir) + } storedFirst, err := fixture.store.GetRun(ctx, first.Run.RunID) if err != nil { t.Fatalf("GetRun first: %v", err) From 3fb82546403ca6e9a7910d697fcc2f1591f57dd6 Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 09:28:22 -0400 Subject: [PATCH 09/32] fix: keep failed thread analysis resumable --- internal/cmd/reviewcmd/reviewcmd_test.go | 5 + internal/threadrespond/threadrespond.go | 7 +- internal/threadrespond/threadrespond_test.go | 100 ++++++++++++++++++- 3 files changed, 108 insertions(+), 4 deletions(-) diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index eb9d20ef..a192060a 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -250,6 +250,8 @@ func TestRespondRealRunnerResumesInterruptedRunThroughCLI(t *testing.T) { if len(runs) != 1 || runs[0].Outcome != nil { t.Fatalf("first runs = %#v, want one incomplete response run", runs) } + firstRunID := runs[0].RunID + firstArtifactPath := runs[0].ArtifactPath actions, err := store.ListPlannedActions(ctx, runs[0].RunID) if err != nil { t.Fatalf("ListPlannedActions first: %v", err) @@ -272,6 +274,9 @@ func TestRespondRealRunnerResumesInterruptedRunThroughCLI(t *testing.T) { if len(runs) != 1 || runs[0].RunID == "" || runs[0].Outcome == nil || *runs[0].Outcome != ledger.OutcomeComment { t.Fatalf("second runs = %#v, want same run completed as comment", runs) } + if runs[0].RunID != firstRunID || runs[0].ArtifactPath != firstArtifactPath { + t.Fatalf("second run = %q artifact %q, want resumed %q artifact %q", runs[0].RunID, runs[0].ArtifactPath, firstRunID, firstArtifactPath) + } if replies := provider.RecordedThreadReplies(ref); len(replies) != 1 || replies[0].ThreadID != "thread-1" { t.Fatalf("thread replies = %#v, want resumed reply post", replies) } diff --git a/internal/threadrespond/threadrespond.go b/internal/threadrespond/threadrespond.go index 936f7094..9390e46f 100644 --- a/internal/threadrespond/threadrespond.go +++ b/internal/threadrespond/threadrespond.go @@ -622,7 +622,12 @@ func completeFailedOnError(ctx context.Context, opts Options, result *Result, er if errors.Is(*errp, context.Canceled) || errors.Is(*errp, context.DeadlineExceeded) { return } - _ = opts.Store.CompleteRun(ctx, result.Run.RunID, ledger.OutcomeFailed, opts.now()) + outcome := ledger.OutcomeFailed + var taskErr *llmlifecycle.TaskError + if errors.As(*errp, &taskErr) && taskErr.Status() == llmlifecycle.StatusFailedBlocking { + outcome = ledger.OutcomeIncomplete + } + _ = opts.Store.CompleteRun(ctx, result.Run.RunID, outcome, opts.now()) if run, err := opts.Store.GetRun(ctx, result.Run.RunID); err == nil { result.Run = run } diff --git a/internal/threadrespond/threadrespond_test.go b/internal/threadrespond/threadrespond_test.go index 893ec846..ff91a479 100644 --- a/internal/threadrespond/threadrespond_test.go +++ b/internal/threadrespond/threadrespond_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "os" "reflect" "strings" "sync" @@ -15,6 +16,7 @@ import ( "github.com/open-cli-collective/codereview-cli/internal/gitprovider" "github.com/open-cli-collective/codereview-cli/internal/ledger" "github.com/open-cli-collective/codereview-cli/internal/llm" + "github.com/open-cli-collective/codereview-cli/internal/llmlifecycle" "github.com/open-cli-collective/codereview-cli/internal/marker" "github.com/open-cli-collective/codereview-cli/internal/outbox" "github.com/open-cli-collective/codereview-cli/internal/review" @@ -569,7 +571,7 @@ func TestRetryRejectsProfileMismatch(t *testing.T) { } } -func TestRunFailedAnalysisCompletesRunFailed(t *testing.T) { +func TestRunFailedAnalysisCompletesRunIncomplete(t *testing.T) { ctx := context.Background() fixture := newFixture(t) setInlineThreads(t, fixture, []gitprovider.InlineThread{ @@ -591,8 +593,100 @@ func TestRunFailedAnalysisCompletesRunFailed(t *testing.T) { if getErr != nil { t.Fatalf("GetRun: %v", getErr) } - if stored.Outcome == nil || *stored.Outcome != ledger.OutcomeFailed { - t.Fatalf("stored outcome = %v, want failed", stored.Outcome) + if stored.Outcome == nil || *stored.Outcome != ledger.OutcomeIncomplete { + t.Fatalf("stored outcome = %v, want incomplete", stored.Outcome) + } +} + +func TestRunFailedStructuredAnalysisPersistsAttemptsAndResumes(t *testing.T) { + ctx := context.Background() + fixture := newFixture(t) + fixture.adapter = &llm.FakeAdapter{NameValue: "fake-llm", SupportsResumeValue: true} + fixture.adapter.Queue(llm.FakeResult{ + SessionID: "failed-initial", + Response: llm.Response{StructuredOutput: []byte(`{"schema_version":1,"thread_id":"thread-1","decision":"bogus","resolve":false}`)}, + }) + fixture.adapter.Queue(llm.FakeResult{ + SessionID: "failed-retry", + Response: llm.Response{StructuredOutput: []byte(`{"schema_version":1,"thread_id":"thread-1","decision":"still_bogus","resolve":false}`)}, + }) + setInlineThreads(t, fixture, []gitprovider.InlineThread{ + markedThread(t, "thread-1", "main.go", 10, false, fixture.bot, fixture.human), + }) + + first, err := Run(ctx, fixture.options(), Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if err == nil { + t.Fatal("Run invalid structured output error = nil, want validation failure") + } + storedFirst, getErr := fixture.store.GetRun(ctx, first.Run.RunID) + if getErr != nil { + t.Fatalf("GetRun first: %v", getErr) + } + if storedFirst.Outcome == nil || *storedFirst.Outcome != ledger.OutcomeIncomplete { + t.Fatalf("first outcome = %v, want incomplete", storedFirst.Outcome) + } + paths := llmlifecycle.Paths{LLMTasksDir: first.Artifacts.LLMTasksDir} + meta, ok, err := llmlifecycle.ReadMetadata(paths, "thread-analysis-thread-1") + if err != nil || !ok { + t.Fatalf("ReadMetadata failed task = %#v ok=%t err=%v, want metadata", meta, ok, err) + } + if meta.Status != llmlifecycle.StatusFailedBlocking || meta.ProviderSessionID != "failed-retry" || len(meta.Attempts) != 2 { + t.Fatalf("failed metadata = %#v, want blocking failure with attempts", meta) + } + for _, attempt := range meta.Attempts { + if attempt.RawOutputPath == "" { + t.Fatalf("attempt = %#v, want raw output path", attempt) + } + if _, err := os.Stat(attempt.RawOutputPath); err != nil { + t.Fatalf("raw attempt %s stat: %v", attempt.RawOutputPath, err) + } + } + + fixture.adapter = &llm.FakeAdapter{NameValue: "fake-llm", SupportsResumeValue: true} + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionSummarize, "", "Summary for future context.", true)) + secondOpts := fixture.options() + secondOpts.NewStepID = sequence("resume-step") + second, err := Run(ctx, secondOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if err != nil { + t.Fatalf("Run resumed analysis: %v", err) + } + if second.Run.RunID != first.Run.RunID || second.Artifacts.Dir != first.Artifacts.Dir { + t.Fatalf("resumed run/artifacts = %q %q, want %q %q", second.Run.RunID, second.Artifacts.Dir, first.Run.RunID, first.Artifacts.Dir) + } + if len(fixture.adapter.Requests()) != 0 { + t.Fatalf("resumed adapter starts = %d, want resume call only", len(fixture.adapter.Requests())) + } + resumes := fixture.adapter.Resumes() + if len(resumes) != 1 || resumes[0].SessionID != "failed-retry" { + t.Fatalf("resumes = %#v, want failed-retry", resumes) + } + storedSecond, err := fixture.store.GetRun(ctx, first.Run.RunID) + if err != nil { + t.Fatalf("GetRun second: %v", err) + } + if storedSecond.Outcome == nil || *storedSecond.Outcome != ledger.OutcomeDryRun { + t.Fatalf("second outcome = %v, want dry_run", storedSecond.Outcome) + } + meta, ok, err = llmlifecycle.ReadMetadata(paths, "thread-analysis-thread-1") + if err != nil || !ok { + t.Fatalf("ReadMetadata resumed task = %#v ok=%t err=%v, want metadata", meta, ok, err) + } + if meta.Status != llmlifecycle.StatusSucceeded || meta.ProviderSessionID != "session-thread-1" || meta.ValidatedOutputPath == "" { + t.Fatalf("resumed metadata = %#v, want succeeded resumed task", meta) } } From e6399bfa3b4b37cb7d47883e26d180bf02bd590b Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 09:35:47 -0400 Subject: [PATCH 10/32] test: cover stale thread resume inputs --- .../threadanalysis/threadanalysis_test.go | 38 ++++++++++++++ internal/threadrespond/threadrespond_test.go | 51 +++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/internal/threadanalysis/threadanalysis_test.go b/internal/threadanalysis/threadanalysis_test.go index d284fe74..070ae51d 100644 --- a/internal/threadanalysis/threadanalysis_test.go +++ b/internal/threadanalysis/threadanalysis_test.go @@ -168,6 +168,44 @@ func TestAnalyzeThreadsPreservesOrderAndFailsFast(t *testing.T) { } } +func TestAnalyzeThreadsPersistsCompletedResultsBeforeLaterFailure(t *testing.T) { + store := newFakeStore() + adapter := &llm.FakeAdapter{NameValue: "fake"} + adapter.Queue(llm.FakeResult{SessionID: "session-1", Response: llm.Response{StructuredOutput: []byte(validSkipOutput("thread-1"))}}) + adapter.Queue(llm.FakeResult{SessionID: "session-2", Response: llm.Response{StructuredOutput: []byte(`{"schema_version":1,"thread_id":"thread-2","decision":"bogus","resolve":false}`)}}) + adapter.Queue(llm.FakeResult{SessionID: "session-2-retry", Response: llm.Response{StructuredOutput: []byte(`{"schema_version":1,"thread_id":"thread-2","decision":"bogus","resolve":false}`)}}) + opts := testOptions(t, store, adapter) + ids := []string{"step-1", "step-2"} + opts.NewStepID = func() string { + id := ids[0] + ids = ids[1:] + return id + } + + results, err := AnalyzeThreads(context.Background(), opts, []threadcontext.Thread{ + promptThreadWithID("thread-1", "first"), + promptThreadWithID("thread-2", "second"), + }) + if err == nil { + t.Fatal("AnalyzeThreads error = nil, want second thread validation failure") + } + if results != nil { + t.Fatalf("results = %#v, want nil on fail-fast error", results) + } + thread1 := readThreadMetadata(t, opts, "thread-1") + if thread1.Status != llmlifecycle.StatusSucceeded || thread1.ValidatedOutputPath == "" || thread1.ProviderSessionID != "session-1" { + t.Fatalf("thread-1 metadata = %#v, want persisted success before later failure", thread1) + } + assertFileOmits(t, thread1.ValidatedOutputPath, "Here is JSON") + thread2 := readThreadMetadata(t, opts, "thread-2") + if thread2.Status != llmlifecycle.StatusFailedBlocking || len(thread2.Attempts) != 2 || thread2.ProviderSessionID != "session-2-retry" { + t.Fatalf("thread-2 metadata = %#v, want persisted blocking failure attempts", thread2) + } + if len(store.inserted) != 2 { + t.Fatalf("inserted sessions = %#v, want persisted sessions for success and failed thread", store.inserted) + } +} + func TestAnalyzeThreadsReturnsResultsInInputOrder(t *testing.T) { store := newFakeStore() adapter := &llm.FakeAdapter{NameValue: "fake"} diff --git a/internal/threadrespond/threadrespond_test.go b/internal/threadrespond/threadrespond_test.go index ff91a479..0f0422cc 100644 --- a/internal/threadrespond/threadrespond_test.go +++ b/internal/threadrespond/threadrespond_test.go @@ -277,6 +277,57 @@ func TestRunResumesIncompleteAnalysisWithoutRecomputing(t *testing.T) { } } +func TestRunResumeRejectsChangedThreadInputBeforeLLM(t *testing.T) { + ctx := context.Background() + fixture := newFixture(t) + fixture.adapter.Queue(threadAnalysisResult("thread-1", threadanalysis.DecisionClarify, "Please clarify the intended behavior.", "", false)) + original := markedThread(t, "thread-1", "main.go", 10, false, fixture.bot, fixture.human) + setInlineThreads(t, fixture, []gitprovider.InlineThread{original}) + firstOpts := fixture.options() + firstOpts.NewActionID = func(reviewplan.ActionKind) (string, error) { + return "", context.Canceled + } + + first, err := Run(ctx, firstOpts, Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Run interrupted error = %v, want context.Canceled", err) + } + + changed := original + changed.Comments = append([]gitprovider.ThreadComment(nil), original.Comments...) + changed.Comments[1].Body = "Human reply changed after the interrupted run" + setInlineThreads(t, fixture, []gitprovider.InlineThread{changed}) + fixture.adapter = &llm.FakeAdapter{NameValue: "fake-llm"} + _, err = Run(ctx, fixture.options(), Request{ + PRRef: fixture.ref, + PRURL: fixture.pr.URL, + ProfileName: "default", + Profile: testProfile(), + PostingIdentity: fixture.bot, + DryRun: true, + }) + if err == nil || !strings.Contains(err.Error(), "input fingerprint changed") { + t.Fatalf("Run changed thread error = %v, want stale lifecycle input error", err) + } + if len(fixture.adapter.Requests()) != 0 || len(fixture.adapter.Resumes()) != 0 { + t.Fatalf("adapter starts=%#v resumes=%#v, want no provider call for stale cached analysis", fixture.adapter.Requests(), fixture.adapter.Resumes()) + } + stored, getErr := fixture.store.GetRun(ctx, first.Run.RunID) + if getErr != nil { + t.Fatalf("GetRun first: %v", getErr) + } + if stored.Outcome == nil || *stored.Outcome != ledger.OutcomeFailed { + t.Fatalf("stale resume outcome = %v, want failed", stored.Outcome) + } +} + func TestRunResumesExistingActionsWithoutReplanning(t *testing.T) { ctx := context.Background() fixture := newFixture(t) From bac48c1e97e769202d4182ba82236233b1b1579b Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 09:46:52 -0400 Subject: [PATCH 11/32] test: cover dry-run rerun recovery --- internal/cmd/reviewcmd/reviewcmd_test.go | 18 +++++ internal/pipeline/pipeline_test.go | 90 ++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index a192060a..82f6bf92 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -1051,6 +1051,24 @@ func TestReviewNoPostIsDryRunAlias(t *testing.T) { } } +func TestReviewDryRunRerunFlagCallsDryRunner(t *testing.T) { + runner := &fakeRunner{result: testPipelineResult(false)} + cmd, _ := newTestCommand(t, testConfig(), fakeFactory(runner)) + + if err := root.Execute(cmd, []string{"review", "https://github.com/open-cli-collective/codereview-cli/pull/29", "--dry-run", "--rerun"}); err != nil { + t.Fatalf("Execute: %v", err) + } + if len(runner.requests) != 1 { + t.Fatalf("dry runner calls = %d, want 1", len(runner.requests)) + } + if len(runner.liveRequests) != 0 { + t.Fatalf("live runner calls = %d, want 0", len(runner.liveRequests)) + } + if !runner.requests[0].Rerun { + t.Fatalf("dry-run request = %#v, want rerun propagated", runner.requests[0]) + } +} + func TestReviewDryRunPassesStageOverrides(t *testing.T) { runner := &fakeRunner{result: testPipelineResult(false)} cmd, _ := newTestCommand(t, testConfig(), fakeFactory(runner)) diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go index 81ff573c..304f4df7 100644 --- a/internal/pipeline/pipeline_test.go +++ b/internal/pipeline/pipeline_test.go @@ -250,6 +250,96 @@ func TestDryRunResumesIncompleteRunAttempt(t *testing.T) { } } +func TestDryRunResumeLoadsCompletedTasksAndRerunsFailedTaskOnly(t *testing.T) { + ctx := context.Background() + store := openPipelineStore(t) + defer closeStore(t, store) + provider, req := dryRunHarness(t) + layout := statepaths.NewLayout(t.TempDir(), t.TempDir()) + run := allocateDryRunForProvider(t, store, layout, provider, req, "run-task-resume", fixedNow().Add(-time.Minute)) + + firstAdapter := &llm.FakeAdapter{NameValue: "fake-llm", SupportsResumeValue: true} + firstAdapter.Queue(fakeLLMResult("dossier-summary-session", discussionSummaryJSON(nil, nil), 8, 2)) + firstAdapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:reviewer", "main.go"), 10, 2)) + firstAdapter.Queue(fakeLLMResult("reviewer-session", findingsJSON("harness:reviewer", "main.go", "major", 2, "Fix this"), 20, 4)) + firstAdapter.Queue(fakeLLMResult("rollup-session", rollupJSON("comment", []string{"missing-finding"}), 30, 6)) + firstAdapter.Queue(fakeLLMResult("rollup-retry-session", rollupJSON("comment", []string{"missing-finding"}), 30, 6)) + _, err := dryRunForTest(ctx, Options{ + Provider: provider, + Adapter: firstAdapter, + Store: store, + Layout: layout, + Now: fixedNow, + NewRunID: func() string { return "unexpected-fresh-run" }, + NewSessionRowID: sequence("session"), + NewFindingID: findingSequence("finding"), + NewActionID: actionSequence(), + MaxConcurrency: 1, + }, req) + if err == nil || !errors.Is(err, ErrStructuredOutputInvalidAfterRetry) { + t.Fatalf("first DryRun error = %v, want invalid rollup after retry", err) + } + stored, err := store.GetRun(ctx, run.RunID) + if err != nil { + t.Fatalf("GetRun first: %v", err) + } + if stored.Outcome == nil || *stored.Outcome != ledger.OutcomeIncomplete { + t.Fatalf("first run outcome = %#v, want incomplete", stored.Outcome) + } + artifacts := ArtifactPathsFromDir(run.ArtifactPath) + for _, taskID := range []string{orchestratorSelectionStage, reviewerTaskID("harness:reviewer")} { + meta, ok, err := readLLMTaskMetadata(artifacts, taskID) + if err != nil || !ok || meta.Status != llmTaskStatusSucceeded { + t.Fatalf("task %s metadata = %#v ok %v err %v, want succeeded", taskID, meta, ok, err) + } + } + rollupMeta, ok, err := readLLMTaskMetadata(artifacts, orchestratorRollupStage) + if err != nil || !ok || rollupMeta.Status != llmTaskStatusFailedBlocking { + t.Fatalf("rollup metadata = %#v ok %v err %v, want failed_blocking", rollupMeta, ok, err) + } + + secondAdapter := &llm.FakeAdapter{NameValue: "fake-llm", SupportsResumeValue: true} + secondAdapter.Queue(fakeLLMResult("rollup-fixed-session", rollupJSON("comment", []string{"finding-1"}), 30, 6)) + result, err := dryRunForTest(ctx, Options{ + Provider: provider, + Adapter: secondAdapter, + Store: store, + Layout: layout, + Now: fixedNow, + NewRunID: func() string { + t.Fatal("NewRunID called despite resumable dry-run") + return "unexpected" + }, + NewSessionRowID: sequence("resume-session"), + NewFindingID: findingSequence("finding"), + NewActionID: actionSequence(), + MaxConcurrency: 1, + }, req) + if err != nil { + t.Fatalf("second DryRun: %v", err) + } + if result.Run.RunID != run.RunID || result.Artifacts.Dir != run.ArtifactPath { + t.Fatalf("result run/artifacts = %q %q, want %q %q", result.Run.RunID, result.Artifacts.Dir, run.RunID, run.ArtifactPath) + } + if len(secondAdapter.Requests()) != 0 { + t.Fatalf("second adapter starts = %#v, want cached completed tasks and resumed rollup only", secondAdapter.Requests()) + } + resumes := secondAdapter.Resumes() + if len(resumes) != 1 || resumes[0].SessionID != "rollup-retry-session" { + t.Fatalf("second adapter resumes = %#v, want only failed rollup retry session", resumes) + } + if len(result.Findings) != 1 || result.Findings[0].ID != "finding-1" { + t.Fatalf("result findings = %#v, want cached reviewer finding", result.Findings) + } + stored, err = store.GetRun(ctx, run.RunID) + if err != nil { + t.Fatalf("GetRun second: %v", err) + } + if stored.Outcome == nil || *stored.Outcome != ledger.OutcomeDryRun { + t.Fatalf("second run outcome = %#v, want dry_run", stored.Outcome) + } +} + func TestDryRunRerunBypassesIncompleteRunAttempt(t *testing.T) { ctx := context.Background() store := openPipelineStore(t) From b006629a48f0460b01b2d84368f300061b31ed33 Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 10:00:08 -0400 Subject: [PATCH 12/32] test: cover real lifecycle resume boundaries --- internal/cmd/reviewcmd/reviewcmd_test.go | 92 ++++++++++++++++++++++++ internal/llmlifecycle/lifecycle_test.go | 69 ++++++++++++++++++ 2 files changed, 161 insertions(+) diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index 82f6bf92..f1fcb856 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -1779,6 +1779,98 @@ func TestReviewLiveSessionThroughRealRunnerPersistsNamedSession(t *testing.T) { } } +func TestReviewRealRunnerResumesIncompleteRunThroughCLI(t *testing.T) { + cfg := testConfig() + agentDir := t.TempDir() + writeReviewAgent(t, agentDir) + profile := cfg.Profiles["home"] + profile.AgentSources = []string{agentDir} + cfg.Profiles["home"] = profile + + fixture := newReviewCommandWorkbenchFixture(t) + reviewCommandFixtures.Store(reviewCommandFixtureKey(fixture.ref), fixture) + ref, pr := fixture.ref, fixture.pr + provider := &gitprovider.Fake{} + if err := provider.SetPR(ref, pr); err != nil { + t.Fatalf("SetPR: %v", err) + } + if err := provider.SetDiff(ref, gitprovider.UnifiedDiff{Raw: reviewSmallDiff("main.go")}); err != nil { + t.Fatalf("SetDiff: %v", err) + } + store, err := ledger.Open(context.Background(), filepath.Join(t.TempDir(), "ledger.db")) + if err != nil { + t.Fatalf("ledger.Open: %v", err) + } + t.Cleanup(func() { + if err := store.Close(); err != nil { + t.Fatalf("store.Close: %v", err) + } + }) + layout := statepaths.NewLayout(t.TempDir(), t.TempDir()) + artifacts, err := pipeline.ArtifactPathsForRun(layout, ref, pr, "home", "review-bot", "resume-live") + if err != nil { + t.Fatalf("ArtifactPathsForRun: %v", err) + } + prKey, err := statepaths.PRKey(ref.Host, ref.Owner, ref.Repo, ref.Number) + if err != nil { + t.Fatalf("PRKey: %v", err) + } + run, err := store.AllocateRun(context.Background(), ledger.AllocateRunParams{ + PRKey: prKey, + PRURL: pr.URL, + RunID: "resume-live", + SHA: pr.Head.SHA, + BaseSHA: pr.Base.SHA, + Profile: "home", + PostingIdentity: "review-bot", + PostMode: ledger.PostModeLive, + StartedAt: time.Now().Add(-time.Minute), + ArtifactPath: artifacts.Dir, + }) + if err != nil { + t.Fatalf("AllocateRun: %v", err) + } + adapter := &llm.FakeAdapter{NameValue: "fake-llm"} + adapter.Queue(fakeReviewLLMResult("selection-session", `{ + "schema_version": 1, + "selected_agents": [], + "thread_actions": [], + "reasoning": "no specialist needed" + }`)) + adapter.Queue(fakeReviewLLMResult("rollup-session", reviewRollupJSON("approve", nil))) + cmd, _ := newTestCommand(t, cfg, func(_ *cobra.Command, opts *root.Options, _ config.File, profile config.Profile, runtimeOpts RuntimeOptions) (Runtime, error) { + runner := buildReviewRunner( + store, + provider, + adapter, + profile, + noopLimiter{}, + layout, + opts.Stderr, + nil, + runtimeOptsWithWorkbench(t, runtimeOpts), + ) + return Runtime{Runner: runner, PostingIdentity: gitprovider.Identity{Login: "review-bot", ID: "bot-id"}}, nil + }) + + if err := root.Execute(cmd, []string{"review", pr.URL}); err != nil { + t.Fatalf("Execute: %v", err) + } + runs, err := store.ListRuns(context.Background()) + if err != nil { + t.Fatalf("ListRuns: %v", err) + } + if len(runs) != 1 || runs[0].RunID != run.RunID || runs[0].ArtifactPath != run.ArtifactPath { + t.Fatalf("runs = %#v, want resumed run %q artifact %q", runs, run.RunID, run.ArtifactPath) + } + if runs[0].Outcome == nil || *runs[0].Outcome != ledger.OutcomeApproved { + t.Fatalf("resumed run outcome = %v, want approved", runs[0].Outcome) + } + if len(adapter.Requests()) != 2 { + t.Fatalf("adapter requests = %d, want selection/rollup planning", len(adapter.Requests())) + } +} + func TestReviewDryRunRealRunnerHonorsConfiguredRetention(t *testing.T) { cfg := testConfig() agentDir := t.TempDir() diff --git a/internal/llmlifecycle/lifecycle_test.go b/internal/llmlifecycle/lifecycle_test.go index 04b44b4e..7ed4660c 100644 --- a/internal/llmlifecycle/lifecycle_test.go +++ b/internal/llmlifecycle/lifecycle_test.go @@ -78,6 +78,75 @@ func TestRunStructuredPersistsAndLoadsSucceededTask(t *testing.T) { } } +func TestRunStructuredReloadsSucceededTaskWithRealLedgerAfterRestart(t *testing.T) { + ctx := context.Background() + dbPath := filepath.Join(t.TempDir(), "ledger.db") + store, err := ledger.Open(ctx, dbPath) + if err != nil { + t.Fatalf("ledger.Open: %v", err) + } + runID := "run-real-ledger-cache" + if _, err := store.AllocateRun(ctx, ledger.AllocateRunParams{ + PRKey: "github_open-cli-collective_codereview-cli_29", + PRURL: "https://github.com/open-cli-collective/codereview-cli/pull/29", + RunID: runID, + SHA: strings.Repeat("a", 40), + BaseSHA: strings.Repeat("b", 40), + Profile: "home", + PostingIdentity: "review-bot", + PostMode: ledger.PostModeDryRun, + StartedAt: lifecycleTestNow, + ArtifactPath: filepath.Join(t.TempDir(), "run"), + }); err != nil { + t.Fatalf("AllocateRun: %v", err) + } + adapter := &llm.FakeAdapter{NameValue: "fake-llm"} + adapter.Queue(llm.FakeResult{ + SessionID: "provider-session-1", + Response: llm.Response{StructuredOutput: []byte(`{"ok":true}`)}, + }) + req := lifecycleRequest(t, store, adapter) + req.RunID = runID + req.Paths = Paths{LLMTasksDir: filepath.Join(t.TempDir(), "llm-tasks")} + if _, err := RunStructured(ctx, req, decodeLifecyclePayload); err != nil { + t.Fatalf("RunStructured first: %v", err) + } + meta, ok, err := ReadMetadata(req.Paths, req.TaskID) + if err != nil || !ok { + t.Fatalf("ReadMetadata first = %#v ok=%t err=%v, want metadata", meta, ok, err) + } + assertFileContains(t, meta.ValidatedOutputPath, `"ok":true`) + if err := store.Close(); err != nil { + t.Fatalf("store.Close first: %v", err) + } + + reopened, err := ledger.Open(ctx, dbPath) + if err != nil { + t.Fatalf("ledger.Open reopened: %v", err) + } + defer func() { + if err := reopened.Close(); err != nil { + t.Fatalf("store.Close reopened: %v", err) + } + }() + cachedAdapter := &llm.FakeAdapter{NameValue: "fake-llm"} + req.Store = reopened + req.Adapter = cachedAdapter + cached, err := RunStructured(ctx, req, decodeLifecyclePayload) + if err != nil { + t.Fatalf("RunStructured cached after reopen: %v", err) + } + if !cached.Cached || !cached.Value.OK { + t.Fatalf("cached result = %#v, want cached ok", cached) + } + if len(cachedAdapter.Requests()) != 0 || len(cachedAdapter.Resumes()) != 0 { + t.Fatalf("cached adapter starts=%#v resumes=%#v, want no provider call", cachedAdapter.Requests(), cachedAdapter.Resumes()) + } + if cached.Session.RunID != runID || cached.Session.ProviderSessionID != "provider-session-1" { + t.Fatalf("cached session = %#v, want reloaded ledger session", cached.Session) + } +} + func TestRunStructuredFailsClosedWhenCachedSessionIsMissing(t *testing.T) { ctx := context.Background() store := newLifecycleStore() From 66e98b390dd8abf1cad0c055aa3738a6674811b3 Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Fri, 26 Jun 2026 10:25:53 -0400 Subject: [PATCH 13/32] fix: route review thread lifecycle through analysis --- internal/pipeline/pipeline.go | 179 ++++++++++++++++-- internal/pipeline/pipeline_test.go | 100 ++++++++-- internal/reviewplan/reviewplan.go | 27 ++- internal/reviewplan/summary.go | 12 +- internal/threadanalysis/threadanalysis.go | 113 ++++++++--- .../threadanalysis/threadanalysis_test.go | 23 +++ internal/threadcontext/threadcontext.go | 16 ++ internal/threadrespond/threadrespond.go | 74 +------- 8 files changed, 408 insertions(+), 136 deletions(-) diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 675d21a0..78643c5f 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -39,6 +39,8 @@ import ( "github.com/open-cli-collective/codereview-cli/internal/sessionreuse" "github.com/open-cli-collective/codereview-cli/internal/stagemodel" "github.com/open-cli-collective/codereview-cli/internal/statepaths" + "github.com/open-cli-collective/codereview-cli/internal/threadanalysis" + "github.com/open-cli-collective/codereview-cli/internal/threadcontext" ) const ( @@ -166,13 +168,14 @@ type Request struct { // SelectionRequest runs only the selection phase without review persistence or posting. type SelectionRequest struct { - PRRef gitprovider.PRRef - ProfileName string - Profile config.Profile - AgentDirs []string - ArtifactDir string - ReviewBaseSHA string - ReviewHeadSHA string + PRRef gitprovider.PRRef + ProfileName string + Profile config.Profile + PostingIdentity gitprovider.Identity + AgentDirs []string + ArtifactDir string + ReviewBaseSHA string + ReviewHeadSHA string SelectionModelOverride string SelectionEffortOverride string @@ -443,6 +446,7 @@ type namedSessionState struct { type selectionSetupRequest struct { PRRef gitprovider.PRRef Profile config.Profile + PostingIdentity gitprovider.Identity AgentDirs []string ReviewBaseSHA string ReviewHeadSHA string @@ -463,6 +467,7 @@ type preparedSelectionContext struct { parsed ParsedDiff changedFiles []string threads []gitprovider.InlineThread + threadContext []threadcontext.Thread reviews []gitprovider.Review issueComments []gitprovider.IssueComment catalog agents.Catalog @@ -484,6 +489,7 @@ type selectionPhaseRequest struct { Catalog agents.Catalog ParsedDiff ParsedDiff Threads []gitprovider.InlineThread + ThreadContext []threadcontext.Thread Artifacts ArtifactPaths ResumeSessionID string MaxAgents int @@ -547,6 +553,7 @@ func SelectionOnly(ctx context.Context, opts Options, req SelectionRequest) (Sel prepared, err := prepareSelectionContext(ctx, opts, selectionSetupRequest{ PRRef: req.PRRef, Profile: req.Profile, + PostingIdentity: req.PostingIdentity, AgentDirs: req.AgentDirs, ReviewBaseSHA: req.ReviewBaseSHA, ReviewHeadSHA: req.ReviewHeadSHA, @@ -594,6 +601,7 @@ func SelectionOnly(ctx context.Context, opts Options, req SelectionRequest) (Sel Catalog: prepared.catalog, ParsedDiff: prepared.parsed, Threads: prepared.threads, + ThreadContext: prepared.threadContext, Artifacts: prepared.artifacts, MaxAgents: opts.maxAgents(), }) @@ -652,6 +660,7 @@ func execute(ctx context.Context, opts Options, req Request, mode executionMode) prepared, err := prepareSelectionContext(ctx, opts, selectionSetupRequest{ PRRef: req.PRRef, Profile: req.Profile, + PostingIdentity: req.PostingIdentity, AgentDirs: req.AgentDirs, ReviewBaseSHA: req.ReviewBaseSHA, ReviewHeadSHA: req.ReviewHeadSHA, @@ -759,6 +768,7 @@ func execute(ctx context.Context, opts Options, req Request, mode executionMode) Catalog: prepared.catalog, ParsedDiff: prepared.parsed, Threads: prepared.threads, + ThreadContext: prepared.threadContext, Artifacts: prepared.artifacts, ResumeSessionID: namedSession.resumeID(), MaxAgents: maxAgents, @@ -774,6 +784,13 @@ func execute(ctx context.Context, opts Options, req Request, mode executionMode) namedSession.recordSessionID(selectionSession) selectionTaskIDs := []string{orchestratorSelectionStage} + threadResponses, err := analyzeReviewThreads(ctx, opts, req, run, prepared.artifacts, prepared.threadContext) + if err != nil { + if errors.Is(err, errLLMTaskFailedBlocking) { + failureOutcome = ledger.OutcomeIncomplete + } + return Result{}, err + } findings, reviewerResults, reviewerSessions, reviewerLedgerSessions, reviewerFindingSessions, reviewerFailures, err := runReviewers(ctx, opts, req, run.RunID, prepared.reviewPR, prepared.catalog, prepared.parsed, prepared.artifacts, selection, selectionTaskIDs, maxConcurrency) if err != nil { if errors.Is(err, errLLMTaskFailedBlocking) { @@ -843,6 +860,7 @@ func execute(ctx context.Context, opts Options, req Request, mode executionMode) } plan, err := opts.buildPlan(req, prepared.reviewPR, mode.planPostMode, result.EffectiveCaps, prepared.parsed.PlanDiff, findings, rollup, selection.ThreadActions, false, result.AgentDefsChanged, planRunInputs{ + threadResponses: threadResponses, hasRun: true, selection: selectionSession, reviewers: reviewerSessions, @@ -1023,6 +1041,7 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet return preparedSelectionContext{}, err } var threads []gitprovider.InlineThread + var threadContext []threadcontext.Thread var reviews []gitprovider.Review var issueComments []gitprovider.IssueComment if !reviewCtx.pinnedReview { @@ -1030,6 +1049,12 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet if err != nil { return preparedSelectionContext{}, err } + if strings.TrimSpace(req.PostingIdentity.ID) != "" || strings.TrimSpace(req.PostingIdentity.Login) != "" { + threadContext, err = threadcontext.Normalize(threads, threadcontext.Options{PostingIdentity: req.PostingIdentity}) + if err != nil { + return preparedSelectionContext{}, err + } + } reviews, err = opts.Provider.ListReviews(ctx, req.PRRef) if err != nil { return preparedSelectionContext{}, err @@ -1061,6 +1086,7 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet parsed: parsed, changedFiles: patchPaths(parsed.Patches), threads: threads, + threadContext: threadContext, reviews: reviews, issueComments: issueComments, catalog: catalog, @@ -1124,6 +1150,11 @@ func runSelectionPhase(ctx context.Context, opts Options, req selectionPhaseRequ model, effort := runtimeConfig.model, runtimeConfig.effort promptInput, promptDeps, err := selectionPromptInputFromArtifacts(req.Artifacts, req.Threads) + knownThreadIDs := knownThreads(req.Threads) + if len(req.ThreadContext) > 0 { + promptInput, promptDeps, err = selectionPromptInputFromThreadContext(req.Artifacts, req.ThreadContext) + knownThreadIDs = map[string]bool{} + } if err != nil { return llm.Selection{}, sessionDraft{}, ledger.Session{}, err } @@ -1144,7 +1175,7 @@ func runSelectionPhase(ctx context.Context, opts Options, req selectionPhaseRequ return llm.DecodeSelection(data, llm.SelectionOptions{ KnownAgents: knownAgents(req.Catalog), ChangedFiles: changedFiles(req.ParsedDiff.Patches), - KnownThreads: knownThreads(req.Threads), + KnownThreads: knownThreadIDs, }) } if strings.TrimSpace(req.RunID) != "" { @@ -1193,6 +1224,41 @@ func runSelectionPhase(ctx context.Context, opts Options, req selectionPhaseRequ return selection, selectionSession, ledger.Session{}, nil } +func analyzeReviewThreads(ctx context.Context, opts Options, req Request, run ledger.Run, artifacts ArtifactPaths, threads []threadcontext.Thread) ([]review.ThreadResponseAction, error) { + eligible := threadcontext.PendingCRAuthoredFindingThreads(threads) + if len(eligible) == 0 { + return nil, nil + } + runtimeConfig, err := resolveThreadAnalysisRuntimeConfig(req.Profile) + if err != nil { + return nil, err + } + results := make([]threadanalysis.Result, 0, len(eligible)) + for _, thread := range eligible { + logPath, err := artifacts.AgentLog("thread-analysis-" + string(thread.ID)) + if err != nil { + return nil, err + } + result, err := threadanalysis.AnalyzeThread(ctx, threadanalysis.Options{ + Store: opts.Store, + RunID: run.RunID, + Adapter: opts.Adapter, + Model: runtimeConfig.model, + Effort: runtimeConfig.effort, + LogPath: logPath, + LifecyclePaths: lifecyclePaths(artifacts), + Progress: opts.TaskProgress, + Now: opts.now, + NewStepID: opts.newSessionRowID, + }, thread) + if err != nil { + return nil, pipelineTaskError(err) + } + results = append(results, result) + } + return threadanalysis.ResponseActions(results), nil +} + func (opts Options) capSelectionAgents(selection llm.Selection, maxAgents int) llm.Selection { if maxAgents <= 0 || len(selection.SelectedAgents) <= maxAgents { return selection @@ -1937,6 +2003,7 @@ type planRunInputs struct { reviewers []sessionDraft rollup sessionDraft selectedAgents []llm.SelectedAgent + threadResponses []review.ThreadResponseAction findingSessions map[review.FindingID]string reviewerFailures []ReviewerFailure reviewerCoverage []reviewplan.ReviewerCoverageSummary @@ -2213,12 +2280,13 @@ func workstreamUsage(name string, draft sessionDraft) reviewplan.WorkstreamUsage func (opts Options) buildPlan(req Request, pr gitprovider.PR, postMode reviewplan.PostMode, caps reviewplan.ProviderCaps, diff reviewplan.Diff, findings []review.Finding, rollup review.Rollup, threadActions []review.ThreadAction, noDiff bool, agentDefsChanged bool, runInputs planRunInputs) (reviewplan.Plan, error) { runSummary, findingReviewers := opts.buildRunSummary(req, runInputs) return reviewplan.Build(reviewplan.Request{ - PostMode: postMode, - ProviderCaps: caps, - Diff: diff, - Findings: findings, - Rollup: rollup, - ThreadActions: threadActions, + PostMode: postMode, + ProviderCaps: caps, + Diff: diff, + Findings: findings, + Rollup: rollup, + ThreadActions: threadActions, + ThreadResponses: append([]review.ThreadResponseAction(nil), runInputs.threadResponses...), EventOptions: reviewplan.EventOptions{ MajorEventRequestsChanges: req.MajorRequestChanges, PostingIdentityIsPRAuthor: sameIdentity(pr.Author, req.PostingIdentity), @@ -2415,7 +2483,7 @@ type reviewerPromptInput struct { Assignment reviewerPromptAssignment `json:"assignment"` } -const defaultSelectionTask = "select reviewer agents and thread actions from dossier/workbench context; return selection JSON only" +const defaultSelectionTask = "select reviewer agents from dossier/workbench context; return selection JSON only" func buildSelectionPrompt(catalog agents.Catalog, input selectionPromptInput, maxAgents int, selectionInstructions string) (string, error) { threadIDs := make([]string, 0, len(input.Threads)) @@ -2522,6 +2590,23 @@ func selectionPromptInputFromArtifacts(paths ArtifactPaths, threads []gitprovide return input, deps, nil } +func selectionPromptInputFromThreadContext(paths ArtifactPaths, threads []threadcontext.Thread) (selectionPromptInput, []string, error) { + input, deps, err := selectionPromptInputFromArtifacts(paths, nil) + if err != nil { + return selectionPromptInput{}, nil, err + } + summaryPath, err := paths.DossierSummaryPath("discussion.json") + if err != nil { + return selectionPromptInput{}, nil, err + } + var summary dossierDiscussionSummaryArtifact + if err := readJSONFile(summaryPath, &summary); err != nil { + return selectionPromptInput{}, nil, err + } + input.Threads = selectionThreadPromptsFromContext(threads, summary) + return input, deps, nil +} + func selectionPromptContentFromPath(path string) (string, error) { data, err := os.ReadFile(path) // #nosec G304 -- artifact path is pipeline-owned under the selected run/workbench root. if err != nil { @@ -2631,6 +2716,52 @@ func selectionThreadPrompts(threads []gitprovider.InlineThread, summary dossierD return out } +func selectionThreadPromptsFromContext(threads []threadcontext.Thread, summary dossierDiscussionSummaryArtifact) []selectionThreadPrompt { + summaryByAnchor := make(map[string]dossierInlineThreadSummaryArtifact, len(summary.InlineThreads)) + for _, thread := range summary.InlineThreads { + key := dossierInlineThreadAnchorKey(thread.Path, thread.Side, thread.Line, thread.AnchorKind) + summaryByAnchor[key] = thread + } + out := make([]selectionThreadPrompt, 0, len(threads)) + for _, thread := range threads { + promptThread := selectionThreadPrompt{ + ThreadID: string(thread.ID), + Path: thread.Anchor.Path, + Line: thread.Anchor.Line, + Side: string(thread.Anchor.Side), + AnchorKind: string(thread.Anchor.SubjectType), + Resolved: thread.Resolved, + } + if thread.ResolvedSummary != nil { + promptThread.Status = "settled" + promptThread.Summary = singleLineExcerpt(thread.ResolvedSummary.Body, dossierFinalExcerptRunes) + } else if summarized, ok := summaryByAnchor[dossierInlineThreadAnchorKey(thread.Anchor.Path, string(thread.Anchor.Side), thread.Anchor.Line, string(thread.Anchor.SubjectType))]; ok { + promptThread.Status = summarized.Status + promptThread.Summary = summarized.Summary + } else { + promptThread.Status = selectionThreadStatus(thread) + if len(thread.Comments) > 0 { + promptThread.Summary = singleLineExcerpt(thread.Comments[0].Body, dossierFinalExcerptRunes) + } + } + out = append(out, promptThread) + } + return out +} + +func selectionThreadStatus(thread threadcontext.Thread) string { + switch { + case thread.Resolved: + return "settled" + case thread.Status.PendingHumanReply: + return "pending_human_reply" + case thread.Status.CRAuthoredFinding: + return "cr_authored" + default: + return "unresolved" + } +} + func buildRollupPrompt(pr gitprovider.PR, findings []review.Finding, reviewerFailures []ReviewerFailure, reviewerCoverage []reviewplan.ReviewerCoverageSummary) (string, error) { payload := map[string]any{ "task": "dedupe findings and return rollup JSON only", @@ -2758,12 +2889,12 @@ func selectionOutputContract(agents []agents.Agent, changedFiles []string, threa "selected_agents[].files must contain only paths from changed_files.", "selected_agents[].allowed_files must contain only paths from changed_files when present.", "selected_agents must contain at most max_selected_agents entries, ordered from highest to lowest review value.", - "thread_actions must be an empty array when there are no threads.", + "thread_actions must always be an empty array. Thread lifecycle replies and resolution are handled by the thread_analysis stage.", }, ResponseSchema: map[string]any{ "schema_version": "number, required, must be 1", "selected_agents": "array of {agent_id: string, rationale: string, files: string[], allowed_files?: string[]}", - "thread_actions": "array of {thread_id: string, decision: string, summary: string, safe_to_resolve_rationale: string}", + "thread_actions": "empty array, required", "reasoning": "string", }, AllowedValues: map[string]any{ @@ -2771,7 +2902,6 @@ func selectionOutputContract(agents []agents.Agent, changedFiles []string, threa "changed_files": changedFiles, "known_thread_ids": threadIDs, "max_selected_agents": maxAgents, - "thread_decisions": []string{"skip", "summarize_only", "summarize_and_resolve"}, }, Example: example, } @@ -5013,6 +5143,19 @@ func resolveSelectionRuntimeConfig(profile config.Profile, modelOverride, effort return llmRuntimeConfig{model: resolved.Model, effort: resolved.Effort}, nil } +func resolveThreadAnalysisRuntimeConfig(profile config.Profile) (llmRuntimeConfig, error) { + resolved, err := stagemodel.ResolveStageModel(stagemodel.Request{ + Profile: profile, + Stage: stagemodel.StageThreadAnalysis, + Tier: config.ModelTierMedium, + DefaultEffort: string(modelprefs.EffortMedium), + }) + if err != nil { + return llmRuntimeConfig{}, err + } + return llmRuntimeConfig{model: resolved.Model, effort: resolved.Effort}, nil +} + func resolveSynthesisRuntimeConfig(req Request) (llmRuntimeConfig, error) { resolved, err := stagemodel.ResolveStageModel(stagemodel.Request{ Profile: req.Profile, diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go index 304f4df7..f358f02d 100644 --- a/internal/pipeline/pipeline_test.go +++ b/internal/pipeline/pipeline_test.go @@ -24,8 +24,10 @@ import ( "github.com/open-cli-collective/codereview-cli/internal/gitprovider" "github.com/open-cli-collective/codereview-cli/internal/ledger" "github.com/open-cli-collective/codereview-cli/internal/llm" + "github.com/open-cli-collective/codereview-cli/internal/marker" "github.com/open-cli-collective/codereview-cli/internal/review" "github.com/open-cli-collective/codereview-cli/internal/reviewplan" + "github.com/open-cli-collective/codereview-cli/internal/stagemodel" "github.com/open-cli-collective/codereview-cli/internal/statepaths" ) @@ -3922,30 +3924,30 @@ func TestDryRunNoResolveThreadsKeepsSummaryReplyOnly(t *testing.T) { store := openPipelineStore(t) defer closeStore(t, store) provider, req := dryRunHarness(t) - provider.threads = []gitprovider.InlineThread{{ - ID: "thread-1", - Resolved: false, - Path: "main.go", - Side: review.DiffSideRight, - Line: 2, - SubjectType: review.AnchorKindLine, - }} + human := gitprovider.Identity{Login: "human", ID: "human-id"} + provider.threads = []gitprovider.InlineThread{ + markedReviewThread(t, "thread-1", "main.go", 2, req.PostingIdentity, human), + } provider.caps.ThreadResolution = true req.NoResolveThreads = true req.Profile.AgentSources = nil adapter := &llm.FakeAdapter{NameValue: "fake-llm"} - adapter.Queue(fakeLLMResult("dossier-summary-session", discussionSummaryJSON(nil, []threadSummary{{path: "main.go", line: 2, status: "noted", summary: "Thread summary"}}), 1, 1)) + adapter.Queue(fakeLLMResult("dossier-summary-session", discussionSummaryJSON(nil, nil), 1, 1)) adapter.Queue(fakeLLMResult("selection-session", `{ "schema_version": 1, "selected_agents": [], - "thread_actions": [{ - "thread_id": "thread-1", - "decision": "summarize_and_resolve", - "summary": "Summary only", - "safe_to_resolve_rationale": "safe" - }], + "thread_actions": [], "reasoning": "thread cleanup" }`, 1, 1)) + adapter.Queue(fakeLLMResult("thread-analysis-session", `{ + "schema_version": 1, + "thread_id": "thread-1", + "decision": "summarize", + "reply_body": "", + "summary": "Summary only", + "resolve": true, + "rationale": "safe" + }`, 1, 1)) adapter.Queue(fakeLLMResult("rollup-session", rollupJSON("approve", nil), 1, 1)) result, err := dryRunForTest(ctx, Options{ @@ -3966,6 +3968,20 @@ func TestDryRunNoResolveThreadsKeepsSummaryReplyOnly(t *testing.T) { if result.EffectiveCaps.ThreadResolution { t.Fatal("EffectiveCaps.ThreadResolution = true, want disabled by request") } + requests := adapter.Requests() + if len(requests) != 4 { + t.Fatalf("adapter requests = %d, want dossier/selection/thread-analysis/rollup", len(requests)) + } + if !strings.Contains(requests[1].Prompt, `"status": "pending_human_reply"`) || strings.Contains(requests[1].Prompt, "