From 34fca481af0c16492ae92ce32fc50580ea3ef20e Mon Sep 17 00:00:00 2001 From: anchen Date: Mon, 8 Jun 2026 17:14:55 +0800 Subject: [PATCH 1/2] feat: add severity classification for review comments Introduce a 4-level severity system (critical/warning/suggestion/nitpick) for LLM-generated code review comments, enabling CI pipelines to gate on issue severity and users to filter noise from review output. Co-Authored-By: Claude Opus 4.6 --- cmd/opencodereview/flags.go | 12 +++++ cmd/opencodereview/output.go | 35 ++++++++++++++- cmd/opencodereview/output_test.go | 50 +++++++++++++++++++++ cmd/opencodereview/review_cmd.go | 3 ++ internal/config/template/task_template.json | 2 +- internal/config/toolsconfig/tools.json | 8 +++- internal/model/review.go | 50 ++++++++++++++++++--- internal/model/review_test.go | 49 ++++++++++++++++++++ internal/tool/code_comment.go | 3 ++ 9 files changed, 202 insertions(+), 10 deletions(-) create mode 100644 cmd/opencodereview/output_test.go create mode 100644 internal/model/review_test.go diff --git a/cmd/opencodereview/flags.go b/cmd/opencodereview/flags.go index a15e3fa..f54dd32 100644 --- a/cmd/opencodereview/flags.go +++ b/cmd/opencodereview/flags.go @@ -4,6 +4,8 @@ import ( "flag" "fmt" "time" + + "github.com/open-code-review/open-code-review/internal/model" ) // --- custom flag set that supports short flags (-c, -f etc.) --- @@ -103,6 +105,7 @@ type reviewOptions struct { outputFormat string audience string // --audience: "human" (default) or "agent" background string // --background: optional requirement context + minSeverity string // --min-severity: filter comments below this severity concurrency int perFileTimeout int maxTools int @@ -129,6 +132,7 @@ func parseReviewFlags(args []string) (reviewOptions, error) { a.StringVarP(&opts.background, "background", "b", "", "optional requirement/business context for the review") a.IntVar(&opts.maxTools, "max-tools", 0, "max tool call rounds per file; only takes effect when greater than template default") a.IntVar(&opts.maxGitProcs, "max-git-procs", 16, "max concurrent git subprocesses") + a.StringVar(&opts.minSeverity, "min-severity", "", "minimum severity to include in output: critical, warning, suggestion, nitpick") a.BoolVarP(&opts.preview, "preview", "p", false, "preview which files will be reviewed without running the LLM") if err := a.Parse(args); err != nil { @@ -161,6 +165,10 @@ func parseReviewFlags(args []string) (reviewOptions, error) { return opts, fmt.Errorf("invalid --audience value %q: must be 'human' or 'agent'", opts.audience) } + if opts.minSeverity != "" && !model.ValidSeverity(opts.minSeverity) { + return opts, fmt.Errorf("invalid --min-severity value %q: must be one of critical, warning, suggestion, nitpick", opts.minSeverity) + } + if opts.maxTools < 0 { return opts, fmt.Errorf("--max-tools must be a non-negative integer (0 means use template default)") } @@ -197,6 +205,9 @@ Examples: # Agent mode (summary only, no progress lines) ocr review --audience agent + # Only show warnings and above (skip suggestion/nitpick) + ocr review --min-severity warning + # Preview which files will be reviewed ocr review --preview ocr review -c abc123 -p @@ -208,6 +219,7 @@ Flags: -f, --format string output format: text or json (default "text") --concurrency int max concurrent file reviews (default 8) --max-git-procs int max concurrent git subprocesses (default 16) + --min-severity string minimum severity to include: critical, warning, suggestion, nitpick --from string source ref to start diff from (e.g., 'main') --max-tools int max tool call rounds per file; only takes effect when greater than template default -p, --preview preview which files will be reviewed without running the LLM diff --git a/cmd/opencodereview/output.go b/cmd/opencodereview/output.go index 9bbd980..ddb88b1 100644 --- a/cmd/opencodereview/output.go +++ b/cmd/opencodereview/output.go @@ -12,6 +12,35 @@ import ( "github.com/open-code-review/open-code-review/internal/suggestdiff" ) +func severityBadge(s model.Severity) string { + switch s { + case model.SeverityCritical: + return "\033[91m[critical]\033[0m" + case model.SeverityWarning: + return "\033[93m[warning]\033[0m" + case model.SeveritySuggestion: + return "\033[96m[suggestion]\033[0m" + case model.SeverityNitpick: + return "\033[2m[nitpick]\033[0m" + default: + return "" + } +} + +func filterBySeverity(comments []model.LlmComment, minSeverity string) []model.LlmComment { + if minSeverity == "" { + return comments + } + minRank := model.Severity(minSeverity).Rank() + var filtered []model.LlmComment + for _, c := range comments { + if c.Severity == "" || c.Severity.Rank() >= minRank { + filtered = append(filtered, c) + } + } + return filtered +} + func outputText(comments []model.LlmComment) { if len(comments) == 0 { fmt.Println("No comments generated. Looks good to me.") @@ -57,7 +86,11 @@ func renderComment(comment model.LlmComment) { return } - fmt.Printf("\n\033[2m─── %s:%d-%d ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine) + severityTag := "" + if comment.Severity != "" { + severityTag = " " + severityBadge(comment.Severity) + } + fmt.Printf("\n\033[2m─── %s:%d-%d%s ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine, severityTag) if comment.Content != "" { for _, ln := range wrapByRunes(comment.Content, 100) { diff --git a/cmd/opencodereview/output_test.go b/cmd/opencodereview/output_test.go new file mode 100644 index 0000000..b02269f --- /dev/null +++ b/cmd/opencodereview/output_test.go @@ -0,0 +1,50 @@ +package main + +import ( + "testing" + + "github.com/open-code-review/open-code-review/internal/model" +) + +func TestFilterBySeverity(t *testing.T) { + comments := []model.LlmComment{ + {Path: "a.go", Content: "critical bug", Severity: model.SeverityCritical}, + {Path: "b.go", Content: "warning issue", Severity: model.SeverityWarning}, + {Path: "c.go", Content: "suggestion", Severity: model.SeveritySuggestion}, + {Path: "d.go", Content: "nitpick", Severity: model.SeverityNitpick}, + {Path: "e.go", Content: "no severity"}, + } + + tests := []struct { + minSeverity string + wantCount int + wantPaths []string + }{ + {"", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}}, + {"nitpick", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}}, + {"suggestion", 4, []string{"a.go", "b.go", "c.go", "e.go"}}, + {"warning", 3, []string{"a.go", "b.go", "e.go"}}, + {"critical", 2, []string{"a.go", "e.go"}}, + } + + for _, tt := range tests { + t.Run("min="+tt.minSeverity, func(t *testing.T) { + got := filterBySeverity(comments, tt.minSeverity) + if len(got) != tt.wantCount { + t.Fatalf("got %d comments, want %d", len(got), tt.wantCount) + } + for i, want := range tt.wantPaths { + if got[i].Path != want { + t.Errorf("got[%d].Path = %q, want %q", i, got[i].Path, want) + } + } + }) + } +} + +func TestFilterBySeverity_Empty(t *testing.T) { + got := filterBySeverity(nil, "warning") + if got != nil { + t.Errorf("expected nil, got %v", got) + } +} diff --git a/cmd/opencodereview/review_cmd.go b/cmd/opencodereview/review_cmd.go index 3a8987f..6d1827b 100644 --- a/cmd/opencodereview/review_cmd.go +++ b/cmd/opencodereview/review_cmd.go @@ -150,6 +150,9 @@ func runReview(args []string) error { // Resolve line numbers by matching existing_code against diff hunks. comments = diff.ResolveLineNumbers(comments, ag.Diffs()) + // Filter by minimum severity if requested. + comments = filterBySeverity(comments, opts.minSeverity) + // Record summary metrics (files_reviewed is refined by agent.Run). duration := time.Since(startTime) telemetry.RecordReviewDuration(ctx, duration) diff --git a/internal/config/template/task_template.json b/internal/config/template/task_template.json index 0e3e813..d6822a7 100644 --- a/internal/config/template/task_template.json +++ b/internal/config/template/task_template.json @@ -3,7 +3,7 @@ "messages": [ { "role": "system", - "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in , and you will use to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback.\n- If additional context is needed to confirm the issue, call the appropriate context tool." + "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in , and you will use to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Severity Classification\nWhen reporting issues via `code_comment`, you MUST assign a severity level to each comment:\n- `critical`: Security vulnerabilities, data loss, system crashes, or critical functional failures.\n- `warning`: Potential bugs, performance issues, race conditions, or edge-case problems.\n- `suggestion`: Improvements to maintainability, readability, or best practice adoption.\n- `nitpick`: Minor style or formatting issues with no functional impact.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback with an appropriate severity level.\n- If additional context is needed to confirm the issue, call the appropriate context tool." }, { "role": "user", diff --git a/internal/config/toolsconfig/tools.json b/internal/config/toolsconfig/tools.json index 1784c23..1c97a7e 100644 --- a/internal/config/toolsconfig/tools.json +++ b/internal/config/toolsconfig/tools.json @@ -51,11 +51,17 @@ "suggestion_code": { "type": "string", "description": "Corresponding suggested code snippet, maintaining consistent code style." + }, + "severity": { + "type": "string", + "enum": ["critical", "warning", "suggestion", "nitpick"], + "description": "Severity level of the issue. critical: security vulnerabilities, data loss, system crashes. warning: potential bugs, performance issues, edge cases. suggestion: improvements to maintainability, readability, best practices. nitpick: minor style or formatting issues." } }, "required": [ "content", - "existing_code" + "existing_code", + "severity" ] } } diff --git a/internal/model/review.go b/internal/model/review.go index 914d51c..52f4178 100644 --- a/internal/model/review.go +++ b/internal/model/review.go @@ -1,14 +1,50 @@ package model +// Severity represents the severity level of a review comment. +type Severity string + +const ( + SeverityCritical Severity = "critical" + SeverityWarning Severity = "warning" + SeveritySuggestion Severity = "suggestion" + SeverityNitpick Severity = "nitpick" +) + +// SeverityRank returns an integer rank for ordering (higher = more severe). +func (s Severity) Rank() int { + switch s { + case SeverityCritical: + return 4 + case SeverityWarning: + return 3 + case SeveritySuggestion: + return 2 + case SeverityNitpick: + return 1 + default: + return 0 + } +} + +// ValidSeverity reports whether s is a recognized severity level. +func ValidSeverity(s string) bool { + switch Severity(s) { + case SeverityCritical, SeverityWarning, SeveritySuggestion, SeverityNitpick: + return true + } + return false +} + // LlmComment represents a code review comment generated by the LLM. type LlmComment struct { - Path string `json:"path"` - Content string `json:"content"` - SuggestionCode string `json:"suggestion_code,omitempty"` - ExistingCode string `json:"existing_code,omitempty"` - StartLine int `json:"start_line"` - EndLine int `json:"end_line"` - Thinking string `json:"thinking,omitempty"` + Path string `json:"path"` + Content string `json:"content"` + Severity Severity `json:"severity,omitempty"` + SuggestionCode string `json:"suggestion_code,omitempty"` + ExistingCode string `json:"existing_code,omitempty"` + StartLine int `json:"start_line"` + EndLine int `json:"end_line"` + Thinking string `json:"thinking,omitempty"` } // CodeReviewResult holds raw LLM-generated review suggestion for a code segment. diff --git a/internal/model/review_test.go b/internal/model/review_test.go new file mode 100644 index 0000000..1f2c817 --- /dev/null +++ b/internal/model/review_test.go @@ -0,0 +1,49 @@ +package model + +import "testing" + +func TestSeverity_Rank(t *testing.T) { + tests := []struct { + severity Severity + wantRank int + }{ + {SeverityCritical, 4}, + {SeverityWarning, 3}, + {SeveritySuggestion, 2}, + {SeverityNitpick, 1}, + {Severity("unknown"), 0}, + {Severity(""), 0}, + } + for _, tt := range tests { + if got := tt.severity.Rank(); got != tt.wantRank { + t.Errorf("Severity(%q).Rank() = %d, want %d", tt.severity, got, tt.wantRank) + } + } +} + +func TestSeverity_Ordering(t *testing.T) { + if SeverityCritical.Rank() <= SeverityWarning.Rank() { + t.Error("critical should outrank warning") + } + if SeverityWarning.Rank() <= SeveritySuggestion.Rank() { + t.Error("warning should outrank suggestion") + } + if SeveritySuggestion.Rank() <= SeverityNitpick.Rank() { + t.Error("suggestion should outrank nitpick") + } +} + +func TestValidSeverity(t *testing.T) { + valid := []string{"critical", "warning", "suggestion", "nitpick"} + for _, s := range valid { + if !ValidSeverity(s) { + t.Errorf("ValidSeverity(%q) = false, want true", s) + } + } + invalid := []string{"", "high", "low", "error", "Critical"} + for _, s := range invalid { + if ValidSeverity(s) { + t.Errorf("ValidSeverity(%q) = true, want false", s) + } + } +} diff --git a/internal/tool/code_comment.go b/internal/tool/code_comment.go index c114a1b..d9aac1a 100644 --- a/internal/tool/code_comment.go +++ b/internal/tool/code_comment.go @@ -63,6 +63,9 @@ func ParseComments(args map[string]any) ([]model.LlmComment, string) { if existing, ok := obj["existing_code"].(string); ok { cm.ExistingCode = existing } + if severity, ok := obj["severity"].(string); ok && model.ValidSeverity(severity) { + cm.Severity = model.Severity(severity) + } if thinking, ok := obj["thinking"].(string); ok { cm.Thinking = thinking } From e20ebabe75434dcde67b10b0ac14d054de62346c Mon Sep 17 00:00:00 2001 From: anchen Date: Tue, 9 Jun 2026 10:45:08 +0800 Subject: [PATCH 2/2] fix: filter unset-severity comments and avoid nil JSON slice - Treat comments without severity (rank 0) as lowest priority, filtered out when --min-severity is specified - Use make([]LlmComment, 0) to ensure JSON output produces [] not null Co-Authored-By: Claude Opus 4.6 --- cmd/opencodereview/output.go | 6 ++++-- cmd/opencodereview/output_test.go | 31 +++++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/cmd/opencodereview/output.go b/cmd/opencodereview/output.go index ddb88b1..3d1a26e 100644 --- a/cmd/opencodereview/output.go +++ b/cmd/opencodereview/output.go @@ -32,9 +32,11 @@ func filterBySeverity(comments []model.LlmComment, minSeverity string) []model.L return comments } minRank := model.Severity(minSeverity).Rank() - var filtered []model.LlmComment + filtered := make([]model.LlmComment, 0) for _, c := range comments { - if c.Severity == "" || c.Severity.Rank() >= minRank { + // Unset severity (Rank 0) is treated as lowest priority and filtered out + // when --min-severity is specified. + if c.Severity.Rank() >= minRank { filtered = append(filtered, c) } } diff --git a/cmd/opencodereview/output_test.go b/cmd/opencodereview/output_test.go index b02269f..bea324b 100644 --- a/cmd/opencodereview/output_test.go +++ b/cmd/opencodereview/output_test.go @@ -20,11 +20,16 @@ func TestFilterBySeverity(t *testing.T) { wantCount int wantPaths []string }{ + // No filter: all comments returned. {"", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}}, - {"nitpick", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}}, - {"suggestion", 4, []string{"a.go", "b.go", "c.go", "e.go"}}, - {"warning", 3, []string{"a.go", "b.go", "e.go"}}, - {"critical", 2, []string{"a.go", "e.go"}}, + // nitpick: keeps all with severity, filters out unset (rank 0 < 1). + {"nitpick", 4, []string{"a.go", "b.go", "c.go", "d.go"}}, + // suggestion: keeps critical + warning + suggestion. + {"suggestion", 3, []string{"a.go", "b.go", "c.go"}}, + // warning: keeps critical + warning. + {"warning", 2, []string{"a.go", "b.go"}}, + // critical: keeps only critical. + {"critical", 1, []string{"a.go"}}, } for _, tt := range tests { @@ -44,7 +49,21 @@ func TestFilterBySeverity(t *testing.T) { func TestFilterBySeverity_Empty(t *testing.T) { got := filterBySeverity(nil, "warning") - if got != nil { - t.Errorf("expected nil, got %v", got) + if len(got) != 0 { + t.Errorf("expected empty slice, got %v", got) + } +} + +func TestFilterBySeverity_ReturnsNonNilSlice(t *testing.T) { + // When minSeverity is set, result should always be non-nil (for JSON: [] not null). + comments := []model.LlmComment{ + {Path: "a.go", Content: "nitpick", Severity: model.SeverityNitpick}, + } + got := filterBySeverity(comments, "critical") + if got == nil { + t.Fatal("expected non-nil empty slice, got nil") + } + if len(got) != 0 { + t.Errorf("expected 0 comments, got %d", len(got)) } }