alibaba · hellomypastor · Jun 8, 2026 · Jun 9, 2026
diff --git a/cmd/opencodereview/flags.go b/cmd/opencodereview/flags.go
@@ -4,6 +4,8 @@ import (
 	"flag"
 	"fmt"
 	"time"
+
+	"github.com/open-code-review/open-code-review/internal/model"
 )
 
 // --- custom flag set that supports short flags (-c, -f etc.) ---
@@ -103,6 +105,7 @@ type reviewOptions struct {
 	outputFormat   string
 	audience       string // --audience: "human" (default) or "agent"
 	background     string // --background: optional requirement context
+	minSeverity    string // --min-severity: filter comments below this severity
 	concurrency    int
 	perFileTimeout int
 	maxTools       int
@@ -129,6 +132,7 @@ func parseReviewFlags(args []string) (reviewOptions, error) {
 	a.StringVarP(&opts.background, "background", "b", "", "optional requirement/business context for the review")
 	a.IntVar(&opts.maxTools, "max-tools", 0, "max tool call rounds per file; only takes effect when greater than template default")
 	a.IntVar(&opts.maxGitProcs, "max-git-procs", 16, "max concurrent git subprocesses")
+	a.StringVar(&opts.minSeverity, "min-severity", "", "minimum severity to include in output: critical, warning, suggestion, nitpick")
 	a.BoolVarP(&opts.preview, "preview", "p", false, "preview which files will be reviewed without running the LLM")
 
 	if err := a.Parse(args); err != nil {
@@ -161,6 +165,10 @@ func parseReviewFlags(args []string) (reviewOptions, error) {
 		return opts, fmt.Errorf("invalid --audience value %q: must be 'human' or 'agent'", opts.audience)
 	}
 
+	if opts.minSeverity != "" && !model.ValidSeverity(opts.minSeverity) {
+		return opts, fmt.Errorf("invalid --min-severity value %q: must be one of critical, warning, suggestion, nitpick", opts.minSeverity)
+	}
+
 	if opts.maxTools < 0 {
 		return opts, fmt.Errorf("--max-tools must be a non-negative integer (0 means use template default)")
 	}
@@ -197,6 +205,9 @@ Examples:
   # Agent mode (summary only, no progress lines)
   ocr review --audience agent
 
+  # Only show warnings and above (skip suggestion/nitpick)
+  ocr review --min-severity warning
+
   # Preview which files will be reviewed
   ocr review --preview
   ocr review -c abc123 -p
@@ -208,6 +219,7 @@ Flags:
   -f, --format string     output format: text or json (default "text")
   --concurrency int       max concurrent file reviews (default 8)
   --max-git-procs int     max concurrent git subprocesses (default 16)
+  --min-severity string   minimum severity to include: critical, warning, suggestion, nitpick
   --from string           source ref to start diff from (e.g., 'main')
   --max-tools int         max tool call rounds per file; only takes effect when greater than template default
   -p, --preview           preview which files will be reviewed without running the LLM

diff --git a/cmd/opencodereview/output.go b/cmd/opencodereview/output.go
@@ -12,6 +12,37 @@ import (
 	"github.com/open-code-review/open-code-review/internal/suggestdiff"
 )
 
+func severityBadge(s model.Severity) string {
+	switch s {
+	case model.SeverityCritical:
+		return "\033[91m[critical]\033[0m"
+	case model.SeverityWarning:
+		return "\033[93m[warning]\033[0m"
+	case model.SeveritySuggestion:
+		return "\033[96m[suggestion]\033[0m"
+	case model.SeverityNitpick:
+		return "\033[2m[nitpick]\033[0m"
+	default:
+		return ""
+	}
+}
+
+func filterBySeverity(comments []model.LlmComment, minSeverity string) []model.LlmComment {
+	if minSeverity == "" {
+		return comments
+	}
+	minRank := model.Severity(minSeverity).Rank()
+	filtered := make([]model.LlmComment, 0)
+	for _, c := range comments {
+		// Unset severity (Rank 0) is treated as lowest priority and filtered out
+		// when --min-severity is specified.
+		if c.Severity.Rank() >= minRank {
+			filtered = append(filtered, c)
+		}
+	}
+	return filtered
+}
+
 func outputText(comments []model.LlmComment) {
 	if len(comments) == 0 {
 		fmt.Println("No comments generated. Looks good to me.")
@@ -57,7 +88,11 @@ func renderComment(comment model.LlmComment) {
 		return
 	}
 
-	fmt.Printf("\n\033[2m─── %s:%d-%d ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine)
+	severityTag := ""
+	if comment.Severity != "" {
+		severityTag = " " + severityBadge(comment.Severity)
+	}
+	fmt.Printf("\n\033[2m─── %s:%d-%d%s ───\033[0m\n", comment.Path, comment.StartLine, comment.EndLine, severityTag)
 
 	if comment.Content != "" {
 		for _, ln := range wrapByRunes(comment.Content, 100) {

diff --git a/cmd/opencodereview/output_test.go b/cmd/opencodereview/output_test.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"testing"
+
+	"github.com/open-code-review/open-code-review/internal/model"
+)
+
+func TestFilterBySeverity(t *testing.T) {
+	comments := []model.LlmComment{
+		{Path: "a.go", Content: "critical bug", Severity: model.SeverityCritical},
+		{Path: "b.go", Content: "warning issue", Severity: model.SeverityWarning},
+		{Path: "c.go", Content: "suggestion", Severity: model.SeveritySuggestion},
+		{Path: "d.go", Content: "nitpick", Severity: model.SeverityNitpick},
+		{Path: "e.go", Content: "no severity"},
+	}
+
+	tests := []struct {
+		minSeverity string
+		wantCount   int
+		wantPaths   []string
+	}{
+		// No filter: all comments returned.
+		{"", 5, []string{"a.go", "b.go", "c.go", "d.go", "e.go"}},
+		// nitpick: keeps all with severity, filters out unset (rank 0 < 1).
+		{"nitpick", 4, []string{"a.go", "b.go", "c.go", "d.go"}},
+		// suggestion: keeps critical + warning + suggestion.
+		{"suggestion", 3, []string{"a.go", "b.go", "c.go"}},
+		// warning: keeps critical + warning.
+		{"warning", 2, []string{"a.go", "b.go"}},
+		// critical: keeps only critical.
+		{"critical", 1, []string{"a.go"}},
+	}
+
+	for _, tt := range tests {
+		t.Run("min="+tt.minSeverity, func(t *testing.T) {
+			got := filterBySeverity(comments, tt.minSeverity)
+			if len(got) != tt.wantCount {
+				t.Fatalf("got %d comments, want %d", len(got), tt.wantCount)
+			}
+			for i, want := range tt.wantPaths {
+				if got[i].Path != want {
+					t.Errorf("got[%d].Path = %q, want %q", i, got[i].Path, want)
+				}
+			}
+		})
+	}
+}
+
+func TestFilterBySeverity_Empty(t *testing.T) {
+	got := filterBySeverity(nil, "warning")
+	if len(got) != 0 {
+		t.Errorf("expected empty slice, got %v", got)
+	}
+}
+
+func TestFilterBySeverity_ReturnsNonNilSlice(t *testing.T) {
+	// When minSeverity is set, result should always be non-nil (for JSON: [] not null).
+	comments := []model.LlmComment{
+		{Path: "a.go", Content: "nitpick", Severity: model.SeverityNitpick},
+	}
+	got := filterBySeverity(comments, "critical")
+	if got == nil {
+		t.Fatal("expected non-nil empty slice, got nil")
+	}
+	if len(got) != 0 {
+		t.Errorf("expected 0 comments, got %d", len(got))
+	}
+}
diff --git a/cmd/opencodereview/review_cmd.go b/cmd/opencodereview/review_cmd.go
@@ -150,6 +150,9 @@ func runReview(args []string) error {
 	// Resolve line numbers by matching existing_code against diff hunks.
 	comments = diff.ResolveLineNumbers(comments, ag.Diffs())
 
+	// Filter by minimum severity if requested.
+	comments = filterBySeverity(comments, opts.minSeverity)
+
 	// Record summary metrics (files_reviewed is refined by agent.Run).
 	duration := time.Since(startTime)
 	telemetry.RecordReviewDuration(ctx, duration)

diff --git a/internal/config/template/task_template.json b/internal/config/template/task_template.json
@@ -3,7 +3,7 @@
     "messages": [
       {
         "role": "system",
-        "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in <user_task>, and you will use <tool> to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback.\n- If additional context is needed to confirm the issue, call the appropriate context tool."
+        "content": "## Role\nYou are a code review assistant developed by Alibaba. You are skilled at code review in the software development process and are responsible for providing professional review feedback for code changes that are about to be submitted. Your feedback perfectly combines detailed analysis with contextual explanations.\nYou are working in an IDE with editor concepts for open files and an integrated terminal. The user's developed code is stored in the IDE's staging area.\nBefore users commit staged code to remote repositories, they will send you tasks to help them complete the process successfully. Each time a user sends a task, it will be placed in <user_task>, and you will use <tool> to interact with the real world when executing tasks.\nPlease keep your responses concise and objective.\n\n## Capabilities\n- Think step by step progressively.\n- First understand the code changes to be reviewed. Code changes are provided in Unified Diff format, where lines starting with `-` indicate deleted code, lines starting with `+` indicate added code, consecutive `-` and `+` lines represent modified code, and other lines represent unchanged code.\n- Be objective and neutral, make judgments based on facts and logic, avoid subjective assumptions. When the context is unclear, use tools to obtain contextual information rather than judging based on assumptions.\n- For the current code changes, provide feedback opinions, pointing out areas for improvement or potential issues. Focus on issues in newly added code.\n- Avoid commenting on correct code or unchanged code.\n- Avoid commenting on deleted code; deleted code serves only as reference context.\n- Focus on clarity, practicality, and comprehensiveness.\n- Use developer-friendly terminology and analogies in explanations.\n- Focus primarily on the actual code logic and functionality. Avoid commenting on or providing feedback about non-functional elements such as code comments, tool-generated indicators (like @Generated annotations), or other metadata, unless the user explicitly requests you to review these elements.\n\n## Strict Focus Rules\n- Context tools are for understanding purposes only. Findings from other files must NOT become the subject of your comments.\n- If you discover a potential issue in another file while gathering context, ignore it — your task is limited to the current diffs.\n\n## Severity Classification\nWhen reporting issues via `code_comment`, you MUST assign a severity level to each comment:\n- `critical`: Security vulnerabilities, data loss, system crashes, or critical functional failures.\n- `warning`: Potential bugs, performance issues, race conditions, or edge-case problems.\n- `suggestion`: Improvements to maintainability, readability, or best practice adoption.\n- `nitpick`: Minor style or formatting issues with no functional impact.\n\n## Reply limit\n- If the current code review task is complete, call `task_done` to end the task.\n- If a code issue has been identified and confirmed, call the `code_comment` tool to provide feedback with an appropriate severity level.\n- If additional context is needed to confirm the issue, call the appropriate context tool."
       },
       {
         "role": "user",

diff --git a/internal/config/toolsconfig/tools.json b/internal/config/toolsconfig/tools.json
@@ -51,11 +51,17 @@
                 "suggestion_code": {
                   "type": "string",
                   "description": "Corresponding suggested code snippet, maintaining consistent code style."
+                },
+                "severity": {
+                  "type": "string",
+                  "enum": ["critical", "warning", "suggestion", "nitpick"],
+                  "description": "Severity level of the issue. critical: security vulnerabilities, data loss, system crashes. warning: potential bugs, performance issues, edge cases. suggestion: improvements to maintainability, readability, best practices. nitpick: minor style or formatting issues."
                 }
               },
               "required": [
                 "content",
-                "existing_code"
+                "existing_code",
+                "severity"
               ]
             }
           }

diff --git a/internal/model/review.go b/internal/model/review.go
@@ -1,14 +1,50 @@
 package model
 
+// Severity represents the severity level of a review comment.
+type Severity string
+
+const (
+	SeverityCritical   Severity = "critical"
+	SeverityWarning    Severity = "warning"
+	SeveritySuggestion Severity = "suggestion"
+	SeverityNitpick    Severity = "nitpick"
+)
+
+// SeverityRank returns an integer rank for ordering (higher = more severe).
+func (s Severity) Rank() int {
+	switch s {
+	case SeverityCritical:
+		return 4
+	case SeverityWarning:
+		return 3
+	case SeveritySuggestion:
+		return 2
+	case SeverityNitpick:
+		return 1
+	default:
+		return 0
+	}
+}
+
+// ValidSeverity reports whether s is a recognized severity level.
+func ValidSeverity(s string) bool {
+	switch Severity(s) {
+	case SeverityCritical, SeverityWarning, SeveritySuggestion, SeverityNitpick:
+		return true
+	}
+	return false
+}
+
 // LlmComment represents a code review comment generated by the LLM.
 type LlmComment struct {
-	Path           string `json:"path"`
-	Content        string `json:"content"`
-	SuggestionCode string `json:"suggestion_code,omitempty"`
-	ExistingCode   string `json:"existing_code,omitempty"`
-	StartLine      int    `json:"start_line"`
-	EndLine        int    `json:"end_line"`
-	Thinking       string `json:"thinking,omitempty"`
+	Path           string   `json:"path"`
+	Content        string   `json:"content"`
+	Severity       Severity `json:"severity,omitempty"`
+	SuggestionCode string   `json:"suggestion_code,omitempty"`
+	ExistingCode   string   `json:"existing_code,omitempty"`
+	StartLine      int      `json:"start_line"`
+	EndLine        int      `json:"end_line"`
+	Thinking       string   `json:"thinking,omitempty"`
 }
 
 // CodeReviewResult holds raw LLM-generated review suggestion for a code segment.

diff --git a/internal/model/review_test.go b/internal/model/review_test.go
@@ -0,0 +1,49 @@
+package model
+
+import "testing"
+
+func TestSeverity_Rank(t *testing.T) {
+	tests := []struct {
+		severity Severity
+		wantRank int
+	}{
+		{SeverityCritical, 4},
+		{SeverityWarning, 3},
+		{SeveritySuggestion, 2},
+		{SeverityNitpick, 1},
+		{Severity("unknown"), 0},
+		{Severity(""), 0},
+	}
+	for _, tt := range tests {
+		if got := tt.severity.Rank(); got != tt.wantRank {
+			t.Errorf("Severity(%q).Rank() = %d, want %d", tt.severity, got, tt.wantRank)
+		}
+	}
+}
+
+func TestSeverity_Ordering(t *testing.T) {
+	if SeverityCritical.Rank() <= SeverityWarning.Rank() {
+		t.Error("critical should outrank warning")
+	}
+	if SeverityWarning.Rank() <= SeveritySuggestion.Rank() {
+		t.Error("warning should outrank suggestion")
+	}
+	if SeveritySuggestion.Rank() <= SeverityNitpick.Rank() {
+		t.Error("suggestion should outrank nitpick")
+	}
+}
+
+func TestValidSeverity(t *testing.T) {
+	valid := []string{"critical", "warning", "suggestion", "nitpick"}
+	for _, s := range valid {
+		if !ValidSeverity(s) {
+			t.Errorf("ValidSeverity(%q) = false, want true", s)
+		}
+	}
+	invalid := []string{"", "high", "low", "error", "Critical"}
+	for _, s := range invalid {
+		if ValidSeverity(s) {
+			t.Errorf("ValidSeverity(%q) = true, want false", s)
+		}
+	}
+}
diff --git a/internal/tool/code_comment.go b/internal/tool/code_comment.go
@@ -63,6 +63,9 @@ func ParseComments(args map[string]any) ([]model.LlmComment, string) {
 		if existing, ok := obj["existing_code"].(string); ok {
 			cm.ExistingCode = existing
 		}
+		if severity, ok := obj["severity"].(string); ok && model.ValidSeverity(severity) {
+			cm.Severity = model.Severity(severity)
+		}
 		if thinking, ok := obj["thinking"].(string); ok {
 			cm.Thinking = thinking
 		}