diff --git a/cmd/opencodereview/flags.go b/cmd/opencodereview/flags.go index a15e3fa..f54dd32 100644 --- a/cmd/opencodereview/flags.go +++ b/cmd/opencodereview/flags.go @@ -4,6 +4,8 @@ import ( "flag" "fmt" "time" + + "github.com/open-code-review/open-code-review/internal/model" ) // --- custom flag set that supports short flags (-c, -f etc.) --- @@ -103,6 +105,7 @@ type reviewOptions struct { outputFormat string audience string // --audience: "human" (default) or "agent" background string // --background: optional requirement context + minSeverity string // --min-severity: filter comments below this severity concurrency int perFileTimeout int maxTools int @@ -129,6 +132,7 @@ func parseReviewFlags(args []string) (reviewOptions, error) { a.StringVarP(&opts.background, "background", "b", "", "optional requirement/business context for the review") a.IntVar(&opts.maxTools, "max-tools", 0, "max tool call rounds per file; only takes effect when greater than template default") a.IntVar(&opts.maxGitProcs, "max-git-procs", 16, "max concurrent git subprocesses") + a.StringVar(&opts.minSeverity, "min-severity", "", "minimum severity to include in output: critical, warning, suggestion, nitpick") a.BoolVarP(&opts.preview, "preview", "p", false, "preview which files will be reviewed without running the LLM") if err := a.Parse(args); err != nil { @@ -161,6 +165,10 @@ func parseReviewFlags(args []string) (reviewOptions, error) { return opts, fmt.Errorf("invalid --audience value %q: must be 'human' or 'agent'", opts.audience) } + if opts.minSeverity != "" && !model.ValidSeverity(opts.minSeverity) { + return opts, fmt.Errorf("invalid --min-severity value %q: must be one of critical, warning, suggestion, nitpick", opts.minSeverity) + } + if opts.maxTools < 0 { return opts, fmt.Errorf("--max-tools must be a non-negative integer (0 means use template default)") } @@ -197,6 +205,9 @@ Examples: # Agent mode (summary only, no progress lines) ocr review --audience agent + # Only show warnings and above (skip suggestion/nitpick) + ocr review --min-severity warning + # Preview which files will be reviewed ocr review --preview ocr review -c abc123 -p @@ -208,6 +219,7 @@ Flags: -f, --format string output format: text or json (default "text") --concurrency int max concurrent file reviews (default 8) --max-git-procs int max concurrent git subprocesses (default 16) + --min-severity string minimum severity to include: critical, warning, suggestion, nitpick --from string source ref to start diff from (e.g., 'main') --max-tools int max tool call rounds per file; only takes effect when greater than template default -p, --preview preview which files will be reviewed without running the LLM diff --git a/cmd/opencodereview/output.go b/cmd/opencodereview/output.go index 9bbd980..3d1a26e 100644 --- a/cmd/opencodereview/output.go +++ b/cmd/opencodereview/output.go @@ -12,6 +12,37 @@ import ( "github.com/open-code-review/open-code-review/internal/suggestdiff" ) +func severityBadge(s model.Severity) string { + switch s { + case model.SeverityCritical: + return "\033[91m[critical]\033[0m" + case model.SeverityWarning: + return "\033[93m[warning]\033[0m" + case model.SeveritySuggestion: + return "\033[96m[suggestion]\033[0m" + case model.SeverityNitpick: + return "\033[2m[nitpick]\033[0m" + default: + return "" + } +} + +func filterBySeverity(comments []model.LlmComment, minSeverity string) []model.LlmComment { + if minSeverity == "" { + return comments + } + minRank := model.Severity(minSeverity).Rank() + filtered := make([]model.LlmComment, 0) + for _, c := range comments { + // Unset severity (Rank 0) is treated as lowest priority and filtered out + // when --min-severity is specified. + if c.Severity.Rank() >= minRank { + filtered = append(filtered, c) + } + } + return filtered +} + func outputText(comments []model.LlmComment) { if len(comments) == 0 { fmt.Println("No comments generated. Looks good to me.") @@ -57,7 +88,11 @@ func renderComment(comment model.LlmComment) { return } - fmt.Printf("\n\033[2m─── %s:%d-%d ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine) + severityTag := "" + if comment.Severity != "" { + severityTag = " " + severityBadge(comment.Severity) + } + fmt.Printf("\n\033[2m─── %s:%d-%d%s ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine, severityTag) if comment.Content != "" { for _, ln := range wrapByRunes(comment.Content, 100) { diff --git a/cmd/opencodereview/output_test.go b/cmd/opencodereview/output_test.go new file mode 100644 index 0000000..bea324b --- /dev/null +++ b/cmd/opencodereview/output_test.go @@ -0,0 +1,69 @@ +package main + +import ( + "testing" + + "github.com/open-code-review/open-code-review/internal/model" +) + +func TestFilterBySeverity(t *testing.T) { + comments := []model.LlmComment{ + {Path: "a.go", Content: "critical bug", Severity: model.SeverityCritical}, + {Path: "b.go", Content: "warning issue", Severity: model.SeverityWarning}, + {Path: "c.go", Content: "suggestion", Severity: model.SeveritySuggestion}, + {Path: "d.go", Content: "nitpick", Severity: model.SeverityNitpick}, + {Path: "e.go", Content: "no severity"}, + } + + tests := []struct { + minSeverity string + wantCount int + wantPaths []string + }{ + // No filter: all comments returned. + {"", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}}, + // nitpick: keeps all with severity, filters out unset (rank 0 < 1). + {"nitpick", 4, []string{"a.go", "b.go", "c.go", "d.go"}}, + // suggestion: keeps critical + warning + suggestion. + {"suggestion", 3, []string{"a.go", "b.go", "c.go"}}, + // warning: keeps critical + warning. + {"warning", 2, []string{"a.go", "b.go"}}, + // critical: keeps only critical. + {"critical", 1, []string{"a.go"}}, + } + + for _, tt := range tests { + t.Run("min="+tt.minSeverity, func(t *testing.T) { + got := filterBySeverity(comments, tt.minSeverity) + if len(got) != tt.wantCount { + t.Fatalf("got %d comments, want %d", len(got), tt.wantCount) + } + for i, want := range tt.wantPaths { + if got[i].Path != want { + t.Errorf("got[%d].Path = %q, want %q", i, got[i].Path, want) + } + } + }) + } +} + +func TestFilterBySeverity_Empty(t *testing.T) { + got := filterBySeverity(nil, "warning") + if len(got) != 0 { + t.Errorf("expected empty slice, got %v", got) + } +} + +func TestFilterBySeverity_ReturnsNonNilSlice(t *testing.T) { + // When minSeverity is set, result should always be non-nil (for JSON: [] not null). + comments := []model.LlmComment{ + {Path: "a.go", Content: "nitpick", Severity: model.SeverityNitpick}, + } + got := filterBySeverity(comments, "critical") + if got == nil { + t.Fatal("expected non-nil empty slice, got nil") + } + if len(got) != 0 { + t.Errorf("expected 0 comments, got %d", len(got)) + } +} diff --git a/cmd/opencodereview/review_cmd.go b/cmd/opencodereview/review_cmd.go index 3a8987f..6d1827b 100644 --- a/cmd/opencodereview/review_cmd.go +++ b/cmd/opencodereview/review_cmd.go @@ -150,6 +150,9 @@ func runReview(args []string) error { // Resolve line numbers by matching existing_code against diff hunks. comments = diff.ResolveLineNumbers(comments, ag.Diffs()) + // Filter by minimum severity if requested. + comments = filterBySeverity(comments, opts.minSeverity) + // Record summary metrics (files_reviewed is refined by agent.Run). duration := time.Since(startTime) telemetry.RecordReviewDuration(ctx, duration) diff --git a/internal/config/template/task_template.json b/internal/config/template/task_template.json index 0e3e813..d6822a7 100644 --- a/internal/config/template/task_template.json +++ b/internal/config/template/task_template.json @@ -3,7 +3,7 @@ "messages": [ { "role": "system", - "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in , and you will use to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback.\n- If additional context is needed to confirm the issue, call the appropriate context tool." + "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in , and you will use to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Severity Classification\nWhen reporting issues via `code_comment`, you MUST assign a severity level to each comment:\n- `critical`: Security vulnerabilities, data loss, system crashes, or critical functional failures.\n- `warning`: Potential bugs, performance issues, race conditions, or edge-case problems.\n- `suggestion`: Improvements to maintainability, readability, or best practice adoption.\n- `nitpick`: Minor style or formatting issues with no functional impact.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback with an appropriate severity level.\n- If additional context is needed to confirm the issue, call the appropriate context tool." }, { "role": "user", diff --git a/internal/config/toolsconfig/tools.json b/internal/config/toolsconfig/tools.json index 1784c23..1c97a7e 100644 --- a/internal/config/toolsconfig/tools.json +++ b/internal/config/toolsconfig/tools.json @@ -51,11 +51,17 @@ "suggestion_code": { "type": "string", "description": "Corresponding suggested code snippet, maintaining consistent code style." + }, + "severity": { + "type": "string", + "enum": ["critical", "warning", "suggestion", "nitpick"], + "description": "Severity level of the issue. critical: security vulnerabilities, data loss, system crashes. warning: potential bugs, performance issues, edge cases. suggestion: improvements to maintainability, readability, best practices. nitpick: minor style or formatting issues." } }, "required": [ "content", - "existing_code" + "existing_code", + "severity" ] } } diff --git a/internal/model/review.go b/internal/model/review.go index 914d51c..52f4178 100644 --- a/internal/model/review.go +++ b/internal/model/review.go @@ -1,14 +1,50 @@ package model +// Severity represents the severity level of a review comment. +type Severity string + +const ( + SeverityCritical Severity = "critical" + SeverityWarning Severity = "warning" + SeveritySuggestion Severity = "suggestion" + SeverityNitpick Severity = "nitpick" +) + +// SeverityRank returns an integer rank for ordering (higher = more severe). +func (s Severity) Rank() int { + switch s { + case SeverityCritical: + return 4 + case SeverityWarning: + return 3 + case SeveritySuggestion: + return 2 + case SeverityNitpick: + return 1 + default: + return 0 + } +} + +// ValidSeverity reports whether s is a recognized severity level. +func ValidSeverity(s string) bool { + switch Severity(s) { + case SeverityCritical, SeverityWarning, SeveritySuggestion, SeverityNitpick: + return true + } + return false +} + // LlmComment represents a code review comment generated by the LLM. type LlmComment struct { - Path string `json:"path"` - Content string `json:"content"` - SuggestionCode string `json:"suggestion_code,omitempty"` - ExistingCode string `json:"existing_code,omitempty"` - StartLine int `json:"start_line"` - EndLine int `json:"end_line"` - Thinking string `json:"thinking,omitempty"` + Path string `json:"path"` + Content string `json:"content"` + Severity Severity `json:"severity,omitempty"` + SuggestionCode string `json:"suggestion_code,omitempty"` + ExistingCode string `json:"existing_code,omitempty"` + StartLine int `json:"start_line"` + EndLine int `json:"end_line"` + Thinking string `json:"thinking,omitempty"` } // CodeReviewResult holds raw LLM-generated review suggestion for a code segment. diff --git a/internal/model/review_test.go b/internal/model/review_test.go new file mode 100644 index 0000000..1f2c817 --- /dev/null +++ b/internal/model/review_test.go @@ -0,0 +1,49 @@ +package model + +import "testing" + +func TestSeverity_Rank(t *testing.T) { + tests := []struct { + severity Severity + wantRank int + }{ + {SeverityCritical, 4}, + {SeverityWarning, 3}, + {SeveritySuggestion, 2}, + {SeverityNitpick, 1}, + {Severity("unknown"), 0}, + {Severity(""), 0}, + } + for _, tt := range tests { + if got := tt.severity.Rank(); got != tt.wantRank { + t.Errorf("Severity(%q).Rank() = %d, want %d", tt.severity, got, tt.wantRank) + } + } +} + +func TestSeverity_Ordering(t *testing.T) { + if SeverityCritical.Rank() <= SeverityWarning.Rank() { + t.Error("critical should outrank warning") + } + if SeverityWarning.Rank() <= SeveritySuggestion.Rank() { + t.Error("warning should outrank suggestion") + } + if SeveritySuggestion.Rank() <= SeverityNitpick.Rank() { + t.Error("suggestion should outrank nitpick") + } +} + +func TestValidSeverity(t *testing.T) { + valid := []string{"critical", "warning", "suggestion", "nitpick"} + for _, s := range valid { + if !ValidSeverity(s) { + t.Errorf("ValidSeverity(%q) = false, want true", s) + } + } + invalid := []string{"", "high", "low", "error", "Critical"} + for _, s := range invalid { + if ValidSeverity(s) { + t.Errorf("ValidSeverity(%q) = true, want false", s) + } + } +} diff --git a/internal/tool/code_comment.go b/internal/tool/code_comment.go index c114a1b..d9aac1a 100644 --- a/internal/tool/code_comment.go +++ b/internal/tool/code_comment.go @@ -63,6 +63,9 @@ func ParseComments(args map[string]any) ([]model.LlmComment, string) { if existing, ok := obj["existing_code"].(string); ok { cm.ExistingCode = existing } + if severity, ok := obj["severity"].(string); ok && model.ValidSeverity(severity) { + cm.Severity = model.Severity(severity) + } if thinking, ok := obj["thinking"].(string); ok { cm.Thinking = thinking }