From 34fca481af0c16492ae92ce32fc50580ea3ef20e Mon Sep 17 00:00:00 2001
From: anchen <anchen.spp@alibaba-inc.com>
Date: Mon, 8 Jun 2026 17:14:55 +0800
Subject: [PATCH 1/2] feat: add severity classification for review comments

Introduce a 4-level severity system (critical/warning/suggestion/nitpick)
for LLM-generated code review comments, enabling CI pipelines to gate on
issue severity and users to filter noise from review output.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/opencodereview/flags.go                 | 12 +++++
 cmd/opencodereview/output.go                | 35 ++++++++++++++-
 cmd/opencodereview/output_test.go           | 50 +++++++++++++++++++++
 cmd/opencodereview/review_cmd.go            |  3 ++
 internal/config/template/task_template.json |  2 +-
 internal/config/toolsconfig/tools.json      |  8 +++-
 internal/model/review.go                    | 50 ++++++++++++++++++---
 internal/model/review_test.go               | 49 ++++++++++++++++++++
 internal/tool/code_comment.go               |  3 ++
 9 files changed, 202 insertions(+), 10 deletions(-)
 create mode 100644 cmd/opencodereview/output_test.go
 create mode 100644 internal/model/review_test.go

diff --git a/cmd/opencodereview/flags.go b/cmd/opencodereview/flags.go
index a15e3fa..f54dd32 100644
--- a/cmd/opencodereview/flags.go
+++ b/cmd/opencodereview/flags.go
@@ -4,6 +4,8 @@ import (
 	"flag"
 	"fmt"
 	"time"
+
+	"github.com/open-code-review/open-code-review/internal/model"
 )
 
 // --- custom flag set that supports short flags (-c, -f etc.) ---
@@ -103,6 +105,7 @@ type reviewOptions struct {
 	outputFormat   string
 	audience       string // --audience: "human" (default) or "agent"
 	background     string // --background: optional requirement context
+	minSeverity    string // --min-severity: filter comments below this severity
 	concurrency    int
 	perFileTimeout int
 	maxTools       int
@@ -129,6 +132,7 @@ func parseReviewFlags(args []string) (reviewOptions, error) {
 	a.StringVarP(&opts.background, "background", "b", "", "optional requirement/business context for the review")
 	a.IntVar(&opts.maxTools, "max-tools", 0, "max tool call rounds per file; only takes effect when greater than template default")
 	a.IntVar(&opts.maxGitProcs, "max-git-procs", 16, "max concurrent git subprocesses")
+	a.StringVar(&opts.minSeverity, "min-severity", "", "minimum severity to include in output: critical, warning, suggestion, nitpick")
 	a.BoolVarP(&opts.preview, "preview", "p", false, "preview which files will be reviewed without running the LLM")
 
 	if err := a.Parse(args); err != nil {
@@ -161,6 +165,10 @@ func parseReviewFlags(args []string) (reviewOptions, error) {
 		return opts, fmt.Errorf("invalid --audience value %q: must be 'human' or 'agent'", opts.audience)
 	}
 
+	if opts.minSeverity != "" && !model.ValidSeverity(opts.minSeverity) {
+		return opts, fmt.Errorf("invalid --min-severity value %q: must be one of critical, warning, suggestion, nitpick", opts.minSeverity)
+	}
+
 	if opts.maxTools < 0 {
 		return opts, fmt.Errorf("--max-tools must be a non-negative integer (0 means use template default)")
 	}
@@ -197,6 +205,9 @@ Examples:
   # Agent mode (summary only, no progress lines)
   ocr review --audience agent
 
+  # Only show warnings and above (skip suggestion/nitpick)
+  ocr review --min-severity warning
+
   # Preview which files will be reviewed
   ocr review --preview
   ocr review -c abc123 -p
@@ -208,6 +219,7 @@ Flags:
   -f, --format string     output format: text or json (default "text")
   --concurrency int       max concurrent file reviews (default 8)
   --max-git-procs int     max concurrent git subprocesses (default 16)
+  --min-severity string   minimum severity to include: critical, warning, suggestion, nitpick
   --from string           source ref to start diff from (e.g., 'main')
   --max-tools int         max tool call rounds per file; only takes effect when greater than template default
   -p, --preview           preview which files will be reviewed without running the LLM
diff --git a/cmd/opencodereview/output.go b/cmd/opencodereview/output.go
index 9bbd980..ddb88b1 100644
--- a/cmd/opencodereview/output.go
+++ b/cmd/opencodereview/output.go
@@ -12,6 +12,35 @@ import (
 	"github.com/open-code-review/open-code-review/internal/suggestdiff"
 )
 
+func severityBadge(s model.Severity) string {
+	switch s {
+	case model.SeverityCritical:
+		return "\033[91m[critical]\033[0m"
+	case model.SeverityWarning:
+		return "\033[93m[warning]\033[0m"
+	case model.SeveritySuggestion:
+		return "\033[96m[suggestion]\033[0m"
+	case model.SeverityNitpick:
+		return "\033[2m[nitpick]\033[0m"
+	default:
+		return ""
+	}
+}
+
+func filterBySeverity(comments []model.LlmComment, minSeverity string) []model.LlmComment {
+	if minSeverity == "" {
+		return comments
+	}
+	minRank := model.Severity(minSeverity).Rank()
+	var filtered []model.LlmComment
+	for _, c := range comments {
+		if c.Severity == "" || c.Severity.Rank() >= minRank {
+			filtered = append(filtered, c)
+		}
+	}
+	return filtered
+}
+
 func outputText(comments []model.LlmComment) {
 	if len(comments) == 0 {
 		fmt.Println("No comments generated. Looks good to me.")
@@ -57,7 +86,11 @@ func renderComment(comment model.LlmComment) {
 		return
 	}
 
-	fmt.Printf("\n\033[2m─── %s:%d-%d ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine)
+	severityTag := ""
+	if comment.Severity != "" {
+		severityTag = " " + severityBadge(comment.Severity)
+	}
+	fmt.Printf("\n\033[2m─── %s:%d-%d%s ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine, severityTag)
 
 	if comment.Content != "" {
 		for _, ln := range wrapByRunes(comment.Content, 100) {
diff --git a/cmd/opencodereview/output_test.go b/cmd/opencodereview/output_test.go
new file mode 100644
index 0000000..b02269f
--- /dev/null
+++ b/cmd/opencodereview/output_test.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+	"testing"
+
+	"github.com/open-code-review/open-code-review/internal/model"
+)
+
+func TestFilterBySeverity(t *testing.T) {
+	comments := []model.LlmComment{
+		{Path: "a.go", Content: "critical bug", Severity: model.SeverityCritical},
+		{Path: "b.go", Content: "warning issue", Severity: model.SeverityWarning},
+		{Path: "c.go", Content: "suggestion", Severity: model.SeveritySuggestion},
+		{Path: "d.go", Content: "nitpick", Severity: model.SeverityNitpick},
+		{Path: "e.go", Content: "no severity"},
+	}
+
+	tests := []struct {
+		minSeverity string
+		wantCount   int
+		wantPaths   []string
+	}{
+		{"", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}},
+		{"nitpick", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}},
+		{"suggestion", 4, []string{"a.go", "b.go", "c.go", "e.go"}},
+		{"warning", 3, []string{"a.go", "b.go", "e.go"}},
+		{"critical", 2, []string{"a.go", "e.go"}},
+	}
+
+	for _, tt := range tests {
+		t.Run("min="+tt.minSeverity, func(t *testing.T) {
+			got := filterBySeverity(comments, tt.minSeverity)
+			if len(got) != tt.wantCount {
+				t.Fatalf("got %d comments, want %d", len(got), tt.wantCount)
+			}
+			for i, want := range tt.wantPaths {
+				if got[i].Path != want {
+					t.Errorf("got[%d].Path = %q, want %q", i, got[i].Path, want)
+				}
+			}
+		})
+	}
+}
+
+func TestFilterBySeverity_Empty(t *testing.T) {
+	got := filterBySeverity(nil, "warning")
+	if got != nil {
+		t.Errorf("expected nil, got %v", got)
+	}
+}
diff --git a/cmd/opencodereview/review_cmd.go b/cmd/opencodereview/review_cmd.go
index 3a8987f..6d1827b 100644
--- a/cmd/opencodereview/review_cmd.go
+++ b/cmd/opencodereview/review_cmd.go
@@ -150,6 +150,9 @@ func runReview(args []string) error {
 	// Resolve line numbers by matching existing_code against diff hunks.
 	comments = diff.ResolveLineNumbers(comments, ag.Diffs())
 
+	// Filter by minimum severity if requested.
+	comments = filterBySeverity(comments, opts.minSeverity)
+
 	// Record summary metrics (files_reviewed is refined by agent.Run).
 	duration := time.Since(startTime)
 	telemetry.RecordReviewDuration(ctx, duration)
diff --git a/internal/config/template/task_template.json b/internal/config/template/task_template.json
index 0e3e813..d6822a7 100644
--- a/internal/config/template/task_template.json
+++ b/internal/config/template/task_template.json
@@ -3,7 +3,7 @@
     "messages": [
       {
         "role": "system",
-        "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in <user_task>, and you will use <tool> to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback.\n- If additional context is needed to confirm the issue, call the appropriate context tool."
+        "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in <user_task>, and you will use <tool> to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Severity Classification\nWhen reporting issues via `code_comment`, you MUST assign a severity level to each comment:\n- `critical`: Security vulnerabilities, data loss, system crashes, or critical functional failures.\n- `warning`: Potential bugs, performance issues, race conditions, or edge-case problems.\n- `suggestion`: Improvements to maintainability, readability, or best practice adoption.\n- `nitpick`: Minor style or formatting issues with no functional impact.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback with an appropriate severity level.\n- If additional context is needed to confirm the issue, call the appropriate context tool."
       },
       {
         "role": "user",
diff --git a/internal/config/toolsconfig/tools.json b/internal/config/toolsconfig/tools.json
index 1784c23..1c97a7e 100644
--- a/internal/config/toolsconfig/tools.json
+++ b/internal/config/toolsconfig/tools.json
@@ -51,11 +51,17 @@
                 "suggestion_code": {
                   "type": "string",
                   "description": "Corresponding suggested code snippet, maintaining consistent code style."
+                },
+                "severity": {
+                  "type": "string",
+                  "enum": ["critical", "warning", "suggestion", "nitpick"],
+                  "description": "Severity level of the issue. critical: security vulnerabilities, data loss, system crashes. warning: potential bugs, performance issues, edge cases. suggestion: improvements to maintainability, readability, best practices. nitpick: minor style or formatting issues."
                 }
               },
               "required": [
                 "content",
-                "existing_code"
+                "existing_code",
+                "severity"
               ]
             }
           }
diff --git a/internal/model/review.go b/internal/model/review.go
index 914d51c..52f4178 100644
--- a/internal/model/review.go
+++ b/internal/model/review.go
@@ -1,14 +1,50 @@
 package model
 
+// Severity represents the severity level of a review comment.
+type Severity string
+
+const (
+	SeverityCritical   Severity = "critical"
+	SeverityWarning    Severity = "warning"
+	SeveritySuggestion Severity = "suggestion"
+	SeverityNitpick    Severity = "nitpick"
+)
+
+// SeverityRank returns an integer rank for ordering (higher = more severe).
+func (s Severity) Rank() int {
+	switch s {
+	case SeverityCritical:
+		return 4
+	case SeverityWarning:
+		return 3
+	case SeveritySuggestion:
+		return 2
+	case SeverityNitpick:
+		return 1
+	default:
+		return 0
+	}
+}
+
+// ValidSeverity reports whether s is a recognized severity level.
+func ValidSeverity(s string) bool {
+	switch Severity(s) {
+	case SeverityCritical, SeverityWarning, SeveritySuggestion, SeverityNitpick:
+		return true
+	}
+	return false
+}
+
 // LlmComment represents a code review comment generated by the LLM.
 type LlmComment struct {
-	Path           string `json:"path"`
-	Content        string `json:"content"`
-	SuggestionCode string `json:"suggestion_code,omitempty"`
-	ExistingCode   string `json:"existing_code,omitempty"`
-	StartLine      int    `json:"start_line"`
-	EndLine        int    `json:"end_line"`
-	Thinking       string `json:"thinking,omitempty"`
+	Path           string   `json:"path"`
+	Content        string   `json:"content"`
+	Severity       Severity `json:"severity,omitempty"`
+	SuggestionCode string   `json:"suggestion_code,omitempty"`
+	ExistingCode   string   `json:"existing_code,omitempty"`
+	StartLine      int      `json:"start_line"`
+	EndLine        int      `json:"end_line"`
+	Thinking       string   `json:"thinking,omitempty"`
 }
 
 // CodeReviewResult holds raw LLM-generated review suggestion for a code segment.
diff --git a/internal/model/review_test.go b/internal/model/review_test.go
new file mode 100644
index 0000000..1f2c817
--- /dev/null
+++ b/internal/model/review_test.go
@@ -0,0 +1,49 @@
+package model
+
+import "testing"
+
+func TestSeverity_Rank(t *testing.T) {
+	tests := []struct {
+		severity Severity
+		wantRank int
+	}{
+		{SeverityCritical, 4},
+		{SeverityWarning, 3},
+		{SeveritySuggestion, 2},
+		{SeverityNitpick, 1},
+		{Severity("unknown"), 0},
+		{Severity(""), 0},
+	}
+	for _, tt := range tests {
+		if got := tt.severity.Rank(); got != tt.wantRank {
+			t.Errorf("Severity(%q).Rank() = %d, want %d", tt.severity, got, tt.wantRank)
+		}
+	}
+}
+
+func TestSeverity_Ordering(t *testing.T) {
+	if SeverityCritical.Rank() <= SeverityWarning.Rank() {
+		t.Error("critical should outrank warning")
+	}
+	if SeverityWarning.Rank() <= SeveritySuggestion.Rank() {
+		t.Error("warning should outrank suggestion")
+	}
+	if SeveritySuggestion.Rank() <= SeverityNitpick.Rank() {
+		t.Error("suggestion should outrank nitpick")
+	}
+}
+
+func TestValidSeverity(t *testing.T) {
+	valid := []string{"critical", "warning", "suggestion", "nitpick"}
+	for _, s := range valid {
+		if !ValidSeverity(s) {
+			t.Errorf("ValidSeverity(%q) = false, want true", s)
+		}
+	}
+	invalid := []string{"", "high", "low", "error", "Critical"}
+	for _, s := range invalid {
+		if ValidSeverity(s) {
+			t.Errorf("ValidSeverity(%q) = true, want false", s)
+		}
+	}
+}
diff --git a/internal/tool/code_comment.go b/internal/tool/code_comment.go
index c114a1b..d9aac1a 100644
--- a/internal/tool/code_comment.go
+++ b/internal/tool/code_comment.go
@@ -63,6 +63,9 @@ func ParseComments(args map[string]any) ([]model.LlmComment, string) {
 		if existing, ok := obj["existing_code"].(string); ok {
 			cm.ExistingCode = existing
 		}
+		if severity, ok := obj["severity"].(string); ok && model.ValidSeverity(severity) {
+			cm.Severity = model.Severity(severity)
+		}
 		if thinking, ok := obj["thinking"].(string); ok {
 			cm.Thinking = thinking
 		}

From e20ebabe75434dcde67b10b0ac14d054de62346c Mon Sep 17 00:00:00 2001
From: anchen <anchen.spp@alibaba-inc.com>
Date: Tue, 9 Jun 2026 10:45:08 +0800
Subject: [PATCH 2/2] fix: filter unset-severity comments and avoid nil JSON
 slice

- Treat comments without severity (rank 0) as lowest priority, filtered
  out when --min-severity is specified
- Use make([]LlmComment, 0) to ensure JSON output produces [] not null

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/opencodereview/output.go      |  6 ++++--
 cmd/opencodereview/output_test.go | 31 +++++++++++++++++++++++++------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/cmd/opencodereview/output.go b/cmd/opencodereview/output.go
index ddb88b1..3d1a26e 100644
--- a/cmd/opencodereview/output.go
+++ b/cmd/opencodereview/output.go
@@ -32,9 +32,11 @@ func filterBySeverity(comments []model.LlmComment, minSeverity string) []model.L
 		return comments
 	}
 	minRank := model.Severity(minSeverity).Rank()
-	var filtered []model.LlmComment
+	filtered := make([]model.LlmComment, 0)
 	for _, c := range comments {
-		if c.Severity == "" || c.Severity.Rank() >= minRank {
+		// Unset severity (Rank 0) is treated as lowest priority and filtered out
+		// when --min-severity is specified.
+		if c.Severity.Rank() >= minRank {
 			filtered = append(filtered, c)
 		}
 	}
diff --git a/cmd/opencodereview/output_test.go b/cmd/opencodereview/output_test.go
index b02269f..bea324b 100644
--- a/cmd/opencodereview/output_test.go
+++ b/cmd/opencodereview/output_test.go
@@ -20,11 +20,16 @@ func TestFilterBySeverity(t *testing.T) {
 		wantCount   int
 		wantPaths   []string
 	}{
+		// No filter: all comments returned.
 		{"", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}},
-		{"nitpick", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}},
-		{"suggestion", 4, []string{"a.go", "b.go", "c.go", "e.go"}},
-		{"warning", 3, []string{"a.go", "b.go", "e.go"}},
-		{"critical", 2, []string{"a.go", "e.go"}},
+		// nitpick: keeps all with severity, filters out unset (rank 0 < 1).
+		{"nitpick", 4, []string{"a.go", "b.go", "c.go", "d.go"}},
+		// suggestion: keeps critical + warning + suggestion.
+		{"suggestion", 3, []string{"a.go", "b.go", "c.go"}},
+		// warning: keeps critical + warning.
+		{"warning", 2, []string{"a.go", "b.go"}},
+		// critical: keeps only critical.
+		{"critical", 1, []string{"a.go"}},
 	}
 
 	for _, tt := range tests {
@@ -44,7 +49,21 @@ func TestFilterBySeverity(t *testing.T) {
 
 func TestFilterBySeverity_Empty(t *testing.T) {
 	got := filterBySeverity(nil, "warning")
-	if got != nil {
-		t.Errorf("expected nil, got %v", got)
+	if len(got) != 0 {
+		t.Errorf("expected empty slice, got %v", got)
+	}
+}
+
+func TestFilterBySeverity_ReturnsNonNilSlice(t *testing.T) {
+	// When minSeverity is set, result should always be non-nil (for JSON: [] not null).
+	comments := []model.LlmComment{
+		{Path: "a.go", Content: "nitpick", Severity: model.SeverityNitpick},
+	}
+	got := filterBySeverity(comments, "critical")
+	if got == nil {
+		t.Fatal("expected non-nil empty slice, got nil")
+	}
+	if len(got) != 0 {
+		t.Errorf("expected 0 comments, got %d", len(got))
 	}
 }