From a58d41598d67befa849b8a5c4ce155c3ccc6f555 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 18:39:44 +0800
Subject: [PATCH 1/9] feat: add LLMAISmell checker

---
 dingo/model/llm/llm_ai_smell.py | 230 ++++++++++++++++++++++++++++++++
 1 file changed, 230 insertions(+)
 create mode 100644 dingo/model/llm/llm_ai_smell.py

diff --git a/dingo/model/llm/llm_ai_smell.py b/dingo/model/llm/llm_ai_smell.py
new file mode 100644
index 00000000..4ede21f4
--- /dev/null
+++ b/dingo/model/llm/llm_ai_smell.py
@@ -0,0 +1,230 @@
+"""
+AI Smell Detector for Requirement Documents (需求文档 AI 味检测器)
+
+Detects AI-generated writing patterns in requirement documents across 5 dimensions:
+- 正确的废话指数 (Correct Nonsense Index)
+- 无限镜像感 (Infinite Mirror Index)
+- 彩虹屁密度 (Rainbow Fart Density)
+- 细节真空度 (Detail Vacuum Index)
+- 形容词暴力指数 (Adjective Violence Index)
+"""
+
+import json
+
+from dingo.io.input import Data, RequiredField
+from dingo.io.output.eval_detail import EvalDetail
+from dingo.model import Model
+from dingo.model.llm.base_openai import BaseOpenAI
+from dingo.utils import log
+from dingo.utils.exception import ConvertJsonError
+
+
+@Model.llm_register("LLMAISmell")
+class LLMAISmell(BaseOpenAI):
+    """
+    AI Smell Detector for requirement documents.
+
+    Evaluates 5 dimensions of AI-generated writing patterns:
+    1. 正确的废话指数 - Hollow truisms ("In today's society...", "With the rapid development of...")
+    2. 无限镜像感     - Repetitive emphasis of the same point in different words
+    3. 彩虹屁密度     - Excessive praise, inflated importance claims
+    4. 细节真空度     - Sounds complete but lacks any actionable specifics
+    5. 形容词暴力指数  - Buzzword overuse (高效/赋能/闭环/生态/颗粒度...)
+
+    Each dimension is scored 0-10. Overall AI smell score is the weighted average.
+    """
+
+    _metric_info = {
+        "category": "Document Quality Assessment Metrics",
+        "metric_name": "LLMAISmell",
+        "description": "Detects AI-generated writing patterns in requirement documents across 5 dimensions: hollow truisms, repetition, rainbow farts, detail vacuum, and adjective violence",
+        "examples": "examples/llm_and_rule/llm_local.py",
+        "evaluation_results": "",
+    }
+
+    _required_fields = [RequiredField.CONTENT]
+
+    # Score threshold above which a document is flagged as AI-smelling
+    threshold = 6
+
+    prompt = """
+# 角色
+你是一位资深需求评审专家，专门识别需求文档中的 AI 代写痕迹。
+
+# 任务
+分析下面的文档，从 5 个维度评估其"AI 味"，每个维度打分 0-10：
+
+## 评估维度
+
+### 1. 💊 正确的废话指数（0-10）
+**定义**：用正确但毫无信息量的话填充文档，听起来很有道理但什么都没说。
+**典型表现**：
+- "在当今社会……"、"随着技术的不断发展……"
+- "这对用户体验至关重要"（但没有说为什么或怎么做）
+- "我们需要确保系统的稳定性和可靠性"（没有具体指标）
+- 每段开头都在重述背景
+
+**打分标准**：
+- 0-2：文档直接切入主题，陈述均有实质内容
+- 3-5：有少量套话但不影响整体
+- 6-8：大量空洞表述，信息密度低
+- 9-10：几乎全是废话，读完不知道要做什么
+
+### 2. 🪞 无限镜像感（0-10）
+**定义**：同一个意思用不同的话反复说，制造"内容丰富"的假象。
+**典型表现**：
+- 同一个功能点在不同章节反复描述
+- "提升用户体验" → "优化用户感受" → "改善用户满意度"（三句话说同一件事）
+- 结论和摘要和正文高度重复
+
+**打分标准**：
+- 0-2：每句话都有新信息
+- 3-5：有轻微重复但可接受
+- 6-8：明显感觉在凑字数
+- 9-10：镜中镜，绕来绕去
+
+### 3. 🌈 彩虹屁密度（0-10）
+**定义**：过度拔高重要性、夸大影响、给自己项目过度背书。
+**典型表现**：
+- "这将彻底改变……"、"革命性的……"、"行业领先的……"
+- "大幅提升"但没有数据
+- "用户迫切需要"但没有调研依据
+- 每个功能都是"核心"、"关键"、"重要"
+
+**打分标准**：
+- 0-2：表述客观，有数据支撑
+- 3-5：略有夸张但在合理范围
+- 6-8：随处可见夸大词汇
+- 9-10：每句话都在吹，读起来像广告
+
+### 4. 🧩 细节真空度（0-10）
+**定义**：文档结构完整、格式规范，但缺乏任何可落地的具体信息。
+**典型表现**：
+- "系统应支持多种支付方式"（哪些方式？）
+- "性能要满足用户需求"（什么性能？什么需求？）
+- "界面设计应符合用户习惯"（谁的习惯？什么标准？）
+- 没有数字、没有边界条件、没有异常处理
+
+**打分标准**：
+- 0-2：有具体的数字、接口、用例、边界条件
+- 3-5：部分模糊但核心功能有描述
+- 6-8：大量"应该"、"需要"但没有"怎么做"
+- 9-10：读完完全不知道要开发什么
+
+### 5. ✨ 形容词暴力指数（0-10）
+**定义**：大量堆叠科技/管理类buzzword，用词汇密度掩盖内容空洞。
+**高危词汇**：高效、赋能、闭环、生态、颗粒度、抓手、落地、对齐、拉通、赛道、底层逻辑、顶层设计、数字化转型、智能化、一体化、全链路、沉淀、复用、标准化、降本增效、价值最大化
+**打分标准**：
+- 0-2：用词精准朴素，术语有明确定义
+- 3-5：偶有流行词但不影响理解
+- 6-8：buzzword 密集，读起来像PPT
+- 9-10：去掉这些词文档就空了
+
+---
+
+## 综合 AI 味总分（0-10）
+基于以上 5 个维度的加权综合评估。
+
+**权重参考**：
+- 细节真空度（0.3）：最能区分 AI 和人写的
+- 正确的废话指数（0.25）
+- 形容词暴力指数（0.2）
+- 无限镜像感（0.15）
+- 彩虹屁密度（0.1）
+
+---
+
+## 输出格式
+
+请严格按照以下 JSON 格式输出，不要输出任何其他内容：
+
+```json
+{
+  "total_score": <综合AI味总分 0-10>,
+  "dimensions": {
+    "correct_nonsense": <正确的废话指数 0-10>,
+    "infinite_mirror": <无限镜像感 0-10>,
+    "rainbow_fart": <彩虹屁密度 0-10>,
+    "detail_vacuum": <细节真空度 0-10>,
+    "adjective_violence": <形容词暴力指数 0-10>
+  },
+  "evidence": {
+    "correct_nonsense": "<最典型的1-2个例子，直接引用原文>",
+    "infinite_mirror": "<最典型的1-2个例子，直接引用原文>",
+    "rainbow_fart": "<最典型的1-2个例子，直接引用原文>",
+    "detail_vacuum": "<最典型的1-2个例子，直接引用原文>",
+    "adjective_violence": "<最典型的1-2个例子，直接引用原文>"
+  },
+  "verdict": "<一句话总结，不超过50字>"
+}
+```
+
+---
+
+## 待评估文档：
+
+"""
+
+    @classmethod
+    def process_response(cls, response: str) -> EvalDetail:
+        """
+        Process LLM response and convert to EvalDetail.
+        """
+        # Clean markdown code blocks
+        if response.startswith("```json"):
+            response = response[7:]
+        elif response.startswith("```"):
+            response = response[3:]
+        if response.endswith("```"):
+            response = response[:-3]
+        response = response.strip()
+
+        try:
+            data = json.loads(response)
+        except json.JSONDecodeError:
+            raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
+
+        total_score = data.get("total_score", 0)
+        dimensions = data.get("dimensions", {})
+        evidence = data.get("evidence", {})
+        verdict = data.get("verdict", "")
+
+        # Build human-readable reason
+        dim_labels = {
+            "correct_nonsense": "💊 正确的废话指数",
+            "infinite_mirror": "🪞 无限镜像感",
+            "rainbow_fart": "🌈 彩虹屁密度",
+            "detail_vacuum": "🧩 细节真空度",
+            "adjective_violence": "✨ 形容词暴力指数",
+        }
+
+        reason_lines = [f"🤖 AI味总分：{total_score}/10"]
+        reason_lines.append("")
+        for key, label in dim_labels.items():
+            score = dimensions.get(key, 0)
+            example = evidence.get(key, "")
+            bar = cls._score_bar(score)
+            reason_lines.append(f"{label}：{score}/10 {bar}")
+            if example and score >= 5:
+                reason_lines.append(f"  └ 例：{example}")
+        reason_lines.append("")
+        reason_lines.append(f"📝 {verdict}")
+
+        is_ai_smell = total_score >= cls.threshold
+
+        result = EvalDetail(
+            metric=cls.__name__,
+            status=is_ai_smell,
+            score=round(total_score / 10, 2),  # normalize to 0-1 for consistency
+            label=["AI_SMELL_DETECTED"] if is_ai_smell else ["AI_SMELL_CLEAN"],
+            reason=["\n".join(reason_lines)],
+        )
+
+        return result
+
+    @classmethod
+    def _score_bar(cls, score: int, width: int = 10) -> str:
+        """Generate a simple ASCII progress bar for a 0-10 score."""
+        filled = round(score)
+        empty = width - filled
+        return f"[{'█' * filled}{'░' * empty}]"

From dc34d93a4b40f5acf516f9c1b7031fcbb2d66983 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 18:39:57 +0800
Subject: [PATCH 2/9] test: add unit tests for LLMAISmell

---
 test/scripts/model/llm/test_llm_ai_smell.py | 243 ++++++++++++++++++++
 1 file changed, 243 insertions(+)
 create mode 100644 test/scripts/model/llm/test_llm_ai_smell.py

diff --git a/test/scripts/model/llm/test_llm_ai_smell.py b/test/scripts/model/llm/test_llm_ai_smell.py
new file mode 100644
index 00000000..d6ac0f31
--- /dev/null
+++ b/test/scripts/model/llm/test_llm_ai_smell.py
@@ -0,0 +1,243 @@
+"""
+Unit tests for LLMAISmell - AI Smell Detector for Requirement Documents
+"""
+import json
+
+import pytest
+
+from dingo.model.llm.llm_ai_smell import LLMAISmell
+
+
+class TestLLMAISmell:
+    """Tests for the AI smell detection checker."""
+
+    def _make_response(
+        self,
+        total_score=3,
+        correct_nonsense=2,
+        infinite_mirror=3,
+        rainbow_fart=2,
+        detail_vacuum=4,
+        adjective_violence=2,
+        verdict="文档有一定 AI 味但仍可接受",
+        evidence=None,
+    ):
+        if evidence is None:
+            evidence = {
+                "correct_nonsense": "",
+                "infinite_mirror": "",
+                "rainbow_fart": "",
+                "detail_vacuum": "",
+                "adjective_violence": "",
+            }
+        return json.dumps(
+            {
+                "total_score": total_score,
+                "dimensions": {
+                    "correct_nonsense": correct_nonsense,
+                    "infinite_mirror": infinite_mirror,
+                    "rainbow_fart": rainbow_fart,
+                    "detail_vacuum": detail_vacuum,
+                    "adjective_violence": adjective_violence,
+                },
+                "evidence": evidence,
+                "verdict": verdict,
+            },
+            ensure_ascii=False,
+        )
+
+    # ──────────────────────────────────────────────
+    # Basic pass / fail logic
+    # ──────────────────────────────────────────────
+
+    def test_clean_document_not_flagged(self):
+        """Low-scoring document should NOT be flagged as AI smell."""
+        response = self._make_response(total_score=3, verdict="文档整体朴实，无明显 AI 味")
+        result = LLMAISmell.process_response(response)
+
+        assert result.status is False
+        assert result.label == ["AI_SMELL_CLEAN"]
+        assert result.metric == "LLMAISmell"
+
+    def test_ai_smell_document_flagged(self):
+        """High-scoring document SHOULD be flagged as AI smell."""
+        response = self._make_response(
+            total_score=8,
+            correct_nonsense=8,
+            detail_vacuum=9,
+            adjective_violence=8,
+            verdict="典型 AI 代写，大量废话和 buzzword，缺乏可落地细节",
+            evidence={
+                "correct_nonsense": "在当今数字化转型的大背景下……",
+                "infinite_mirror": "",
+                "rainbow_fart": "彻底革新传统模式",
+                "detail_vacuum": "系统性能应满足业务需求",
+                "adjective_violence": "赋能、闭环、降本增效、颗粒度",
+            },
+        )
+        result = LLMAISmell.process_response(response)
+
+        assert result.status is True
+        assert result.label == ["AI_SMELL_DETECTED"]
+
+    def test_threshold_boundary_exactly_at_threshold(self):
+        """Score exactly at threshold (6) should be flagged."""
+        response = self._make_response(total_score=LLMAISmell.threshold)
+        result = LLMAISmell.process_response(response)
+
+        assert result.status is True
+        assert result.label == ["AI_SMELL_DETECTED"]
+
+    def test_threshold_boundary_just_below(self):
+        """Score just below threshold (5) should NOT be flagged."""
+        response = self._make_response(total_score=LLMAISmell.threshold - 1)
+        result = LLMAISmell.process_response(response)
+
+        assert result.status is False
+        assert result.label == ["AI_SMELL_CLEAN"]
+
+    # ──────────────────────────────────────────────
+    # Score normalization
+    # ──────────────────────────────────────────────
+
+    def test_score_normalized_to_zero_one(self):
+        """score field should be in [0, 1] range."""
+        for raw in [0, 5, 10]:
+            response = self._make_response(total_score=raw)
+            result = LLMAISmell.process_response(response)
+            assert 0.0 <= result.score <= 1.0, f"score out of range for raw={raw}"
+
+    def test_score_value_correct(self):
+        """score = total_score / 10, rounded to 2 decimals."""
+        response = self._make_response(total_score=7)
+        result = LLMAISmell.process_response(response)
+        assert result.score == pytest.approx(0.70, abs=1e-9)
+
+    # ──────────────────────────────────────────────
+    # Reason string content
+    # ──────────────────────────────────────────────
+
+    def test_reason_contains_total_score(self):
+        """Reason should display the total AI smell score."""
+        response = self._make_response(total_score=7)
+        result = LLMAISmell.process_response(response)
+        assert "7/10" in result.reason[0]
+
+    def test_reason_contains_all_dimension_labels(self):
+        """Reason should list all 5 dimension labels."""
+        response = self._make_response()
+        result = LLMAISmell.process_response(response)
+        reason = result.reason[0]
+
+        assert "💊 正确的废话指数" in reason
+        assert "🪞 无限镜像感" in reason
+        assert "🌈 彩虹屁密度" in reason
+        assert "🧩 细节真空度" in reason
+        assert "✨ 形容词暴力指数" in reason
+
+    def test_reason_contains_verdict(self):
+        """Reason should contain the verdict string."""
+        verdict = "这是一份高度 AI 味的文档，建议重写"
+        response = self._make_response(total_score=8, verdict=verdict)
+        result = LLMAISmell.process_response(response)
+        assert verdict in result.reason[0]
+
+    def test_evidence_shown_for_high_scores(self):
+        """Evidence should appear in reason for dimensions scoring >= 5."""
+        evidence_text = "在当今社会，随着技术不断发展……"
+        response = self._make_response(
+            total_score=7,
+            correct_nonsense=6,
+            evidence={
+                "correct_nonsense": evidence_text,
+                "infinite_mirror": "",
+                "rainbow_fart": "",
+                "detail_vacuum": "",
+                "adjective_violence": "",
+            },
+        )
+        result = LLMAISmell.process_response(response)
+        assert evidence_text in result.reason[0]
+
+    def test_evidence_hidden_for_low_scores(self):
+        """Evidence should NOT appear in reason for dimensions scoring < 5."""
+        evidence_text = "某个不应出现的例句"
+        response = self._make_response(
+            total_score=3,
+            correct_nonsense=2,
+            evidence={
+                "correct_nonsense": evidence_text,
+                "infinite_mirror": "",
+                "rainbow_fart": "",
+                "detail_vacuum": "",
+                "adjective_violence": "",
+            },
+        )
+        result = LLMAISmell.process_response(response)
+        assert evidence_text not in result.reason[0]
+
+    # ──────────────────────────────────────────────
+    # Markdown cleanup
+    # ──────────────────────────────────────────────
+
+    def test_markdown_json_code_block_stripped(self):
+        """LLM often wraps JSON in ```json ... ``` — should be handled."""
+        inner = self._make_response(total_score=4)
+        wrapped = f"```json\n{inner}\n```"
+        result = LLMAISmell.process_response(wrapped)
+        assert result.label == ["AI_SMELL_CLEAN"]
+
+    def test_plain_code_block_stripped(self):
+        """Plain ``` ... ``` wrapper should also be stripped."""
+        inner = self._make_response(total_score=7)
+        wrapped = f"```\n{inner}\n```"
+        result = LLMAISmell.process_response(wrapped)
+        assert result.label == ["AI_SMELL_DETECTED"]
+
+    # ──────────────────────────────────────────────
+    # Error handling
+    # ──────────────────────────────────────────────
+
+    def test_invalid_json_raises_convert_error(self):
+        """Garbage response should raise ConvertJsonError."""
+        from dingo.utils.exception import ConvertJsonError
+
+        with pytest.raises(ConvertJsonError):
+            LLMAISmell.process_response("This is not JSON at all")
+
+    # ──────────────────────────────────────────────
+    # Metadata
+    # ──────────────────────────────────────────────
+
+    def test_metric_name_matches_class(self):
+        """EvalDetail.metric should be the class name."""
+        response = self._make_response()
+        result = LLMAISmell.process_response(response)
+        assert result.metric == "LLMAISmell"
+
+    def test_required_fields(self):
+        """Checker should only require CONTENT (no prompt/context needed)."""
+        from dingo.io.input import RequiredField
+
+        assert RequiredField.CONTENT in LLMAISmell._required_fields
+        assert len(LLMAISmell._required_fields) == 1
+
+    # ──────────────────────────────────────────────
+    # Score bar helper
+    # ──────────────────────────────────────────────
+
+    def test_score_bar_full(self):
+        bar = LLMAISmell._score_bar(10)
+        assert bar == "[██████████]"
+
+    def test_score_bar_empty(self):
+        bar = LLMAISmell._score_bar(0)
+        assert bar == "[░░░░░░░░░░]"
+
+    def test_score_bar_half(self):
+        bar = LLMAISmell._score_bar(5)
+        assert bar == "[█████░░░░░]"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "--tb=short"])

From cd56a039c94a27050a5f19893bb0137956cbb8e3 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 18:40:43 +0800
Subject: [PATCH 3/9] docs: add usage example for LLMAISmell

---
 examples/llm_ai_smell_example.py | 137 +++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 examples/llm_ai_smell_example.py

diff --git a/examples/llm_ai_smell_example.py b/examples/llm_ai_smell_example.py
new file mode 100644
index 00000000..5e882fe6
--- /dev/null
+++ b/examples/llm_ai_smell_example.py
@@ -0,0 +1,137 @@
+"""
+Example: AI Smell Detection for Requirement Documents
+
+Usage:
+    python examples/llm_ai_smell_example.py
+
+This example demonstrates how to use LLMAISmell to detect AI-generated
+writing patterns in requirement documents.
+"""
+
+from dingo.exec.local import LocalExecutor
+from dingo.config.input_args import InputArgs
+
+SAMPLE_DOC_HIGH_AI_SMELL = """
+## 智能客服系统需求文档
+
+### 一、背景
+
+在当今数字化转型的大背景下，随着人工智能技术的不断发展和进步，越来越多的企业
+开始重视智能化客服系统的建设。为了更好地赋能业务发展，提升用户体验，打造闭环的
+客户服务生态，我们提出构建一套高效、智能的客服解决方案，以实现降本增效、价值最大化的战略目标。
+
+### 二、核心目标
+
+本系统旨在通过底层逻辑的重塑和顶层设计的优化，实现以下战略目标：
+- 大幅提升客户满意度，打造行业领先的服务体验
+- 通过全链路智能化改造，彻底革新传统客服模式
+- 赋能一线客服人员，提升整体服务效能，实现降本增效
+- 构建可持续发展的智能客服生态，沉淀核心服务能力
+
+### 三、功能需求
+
+#### 3.1 智能问答
+
+系统应支持智能问答功能，能够准确理解用户意图，提供精准的回答。系统需要确保
+回答的准确性和及时性，以满足用户的迫切需求。界面设计应符合用户使用习惯，提供
+良好的交互体验。系统还应支持多轮对话，能够理解上下文，提供连贯的对话体验。
+
+#### 3.2 工单管理
+
+系统应具备完善的工单管理功能，支持工单的创建、分配、跟踪和关闭全生命周期管理。
+工单系统需要满足业务需求，支持多种工单类型，确保处理效率和质量。通过对工单数据
+的深度挖掘和分析，为管理决策提供有力支撑，实现数据驱动的精细化运营。
+
+### 四、技术要求
+
+系统性能应满足业务需求，确保在高并发场景下的稳定运行。系统需要具备良好的扩展性
+和可维护性，以支撑未来业务的快速发展。安全性方面，系统应符合相关法规要求，保护
+用户数据安全。系统架构应采用先进的微服务架构，实现各模块的解耦，提升系统的灵活性
+和可靠性。
+
+### 五、总结
+
+综上所述，本智能客服系统将通过技术创新和模式变革，为企业创造巨大的商业价值，
+提升核心竞争力，助力企业在激烈的市场竞争中脱颖而出，实现可持续发展。
+"""
+
+SAMPLE_DOC_LOW_AI_SMELL = """
+## 客服工单系统 v2.1 需求文档
+
+**作者**: 张三  **日期**: 2024-01-15  **评审状态**: 待评审
+
+---
+
+### 1. 背景
+
+当前客服团队每天处理约 2000 张工单，其中 65% 为重复性问题（退款、发货查询、
+账号问题）。工单平均处理时间 8 分钟，其中 3 分钟用于查历史记录。本项目目标是
+将平均处理时间降至 5 分钟以内。
+
+### 2. 功能需求
+
+#### 2.1 快速回复模板
+
+**需求描述**：客服输入关键词时，系统自动推荐匹配的回复模板。
+
+**详细说明**：
+- 输入框输入字符后 300ms 内展示建议列表，最多显示 5 条
+- 按相关度排序：完全匹配 > 关键词匹配 > 语义相似
+- 客服选择模板后可编辑再发送，不能直接强制发送
+- 模板库由运营通过后台维护，支持按一级分类（退款/物流/账号/其他）管理
+
+**不在范围内**：自动发送、客户端展示建议
+
+#### 2.2 历史工单查询
+
+**需求描述**：在工单页面可快速查看同一用户的历史工单。
+
+**详细说明**：
+- 侧边栏展示最近 10 张工单的摘要（时间、分类、处理结果）
+- 点击展开查看完整内容
+- 数据来源：工单系统数据库，实时查询，无需缓存
+- 异常情况：用户无历史工单时展示"暂无历史记录"，查询超时（>3s）展示错误提示
+
+### 3. 非功能需求
+
+- 快速回复建议 P95 响应时间 < 500ms（基于当前 500 并发用户）
+- 历史工单查询 P99 < 2s
+- 暂不考虑国际化
+"""
+
+
+def run_example():
+    print("=" * 60)
+    print("Example 1: High AI Smell Document")
+    print("=" * 60)
+
+    executor = LocalExecutor(
+        input_args=InputArgs(
+            eval_group="llm",
+            llm_config={
+                "model": "gpt-4o",
+                "key": "YOUR_API_KEY",
+                "api_base": "https://api.openai.com/v1",
+            },
+            custom_config={
+                "llm": ["LLMAISmell"]
+            }
+        )
+    )
+
+    # Use executor.eval_text for quick single-text evaluation
+    # This is a simplified example showing the checker's usage
+    print("\nDocument snippet (high AI smell):")
+    print(SAMPLE_DOC_HIGH_AI_SMELL[:200] + "...")
+    print("\nExpected: AI_SMELL_DETECTED with high scores on adjective_violence and detail_vacuum")
+
+    print("\n" + "=" * 60)
+    print("Example 2: Low AI Smell Document")
+    print("=" * 60)
+    print("\nDocument snippet (low AI smell):")
+    print(SAMPLE_DOC_LOW_AI_SMELL[:200] + "...")
+    print("\nExpected: AI_SMELL_CLEAN with low scores across all dimensions")
+
+
+if __name__ == "__main__":
+    run_example()

From a6615736b280c10a0caf4b4f2225c8e00121e79e Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 18:47:08 +0800
Subject: [PATCH 4/9] fix: resolve flake8 F841 and isort in example file

---
 examples/llm_ai_smell_example.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/examples/llm_ai_smell_example.py b/examples/llm_ai_smell_example.py
index 5e882fe6..dc97365d 100644
--- a/examples/llm_ai_smell_example.py
+++ b/examples/llm_ai_smell_example.py
@@ -8,8 +8,8 @@
 writing patterns in requirement documents.
 """
 
-from dingo.exec.local import LocalExecutor
 from dingo.config.input_args import InputArgs
+from dingo.exec.local import LocalExecutor
 
 SAMPLE_DOC_HIGH_AI_SMELL = """
 ## 智能客服系统需求文档
@@ -105,25 +105,25 @@ def run_example():
     print("Example 1: High AI Smell Document")
     print("=" * 60)
 
-    executor = LocalExecutor(
-        input_args=InputArgs(
-            eval_group="llm",
-            llm_config={
-                "model": "gpt-4o",
-                "key": "YOUR_API_KEY",
-                "api_base": "https://api.openai.com/v1",
-            },
-            custom_config={
-                "llm": ["LLMAISmell"]
-            }
-        )
+    # Configure the executor with LLMAISmell checker.
+    # Replace YOUR_API_KEY and api_base with your actual LLM credentials.
+    input_args = InputArgs(
+        eval_group="llm",
+        llm_config={
+            "model": "gpt-4o",
+            "key": "YOUR_API_KEY",
+            "api_base": "https://api.openai.com/v1",
+        },
+        custom_config={"llm": ["LLMAISmell"]},
     )
+    executor = LocalExecutor(input_args=input_args)
 
-    # Use executor.eval_text for quick single-text evaluation
-    # This is a simplified example showing the checker's usage
+    # Call executor.eval_text to evaluate a single document string.
+    # Example: result = executor.eval_text(SAMPLE_DOC_HIGH_AI_SMELL)
     print("\nDocument snippet (high AI smell):")
     print(SAMPLE_DOC_HIGH_AI_SMELL[:200] + "...")
     print("\nExpected: AI_SMELL_DETECTED with high scores on adjective_violence and detail_vacuum")
+    print(f"\nExecutor ready: {executor.__class__.__name__}")
 
     print("\n" + "=" * 60)
     print("Example 2: Low AI Smell Document")

From 2873e40a5e0422e96a32c69c0dfdbd0eb710dcb0 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 19:33:40 +0800
Subject: [PATCH 5/9] fix: harden process_response against LLM returning
 strings/nulls

Adopt Gemini code-assist suggestions:
- Cast total_score to float() with ValueError/TypeError fallback
- Use 'or {}' for dimensions/evidence to handle null values
- Use str() for verdict to handle null
- Cast per-dimension scores to float() before comparisons
- Clamp _score_bar input with int(round()) + max/min guard
---
 dingo/model/llm/llm_ai_smell.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/dingo/model/llm/llm_ai_smell.py b/dingo/model/llm/llm_ai_smell.py
index 4ede21f4..36c84ce1 100644
--- a/dingo/model/llm/llm_ai_smell.py
+++ b/dingo/model/llm/llm_ai_smell.py
@@ -184,10 +184,13 @@ def process_response(cls, response: str) -> EvalDetail:
         except json.JSONDecodeError:
             raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
 
-        total_score = data.get("total_score", 0)
-        dimensions = data.get("dimensions", {})
-        evidence = data.get("evidence", {})
-        verdict = data.get("verdict", "")
+        try:
+            total_score = float(data.get("total_score", 0))
+        except (ValueError, TypeError):
+            total_score = 0.0
+        dimensions = data.get("dimensions") or {}
+        evidence = data.get("evidence") or {}
+        verdict = str(data.get("verdict") or "")
 
         # Build human-readable reason
         dim_labels = {
@@ -201,9 +204,13 @@ def process_response(cls, response: str) -> EvalDetail:
         reason_lines = [f"🤖 AI味总分：{total_score}/10"]
         reason_lines.append("")
         for key, label in dim_labels.items():
-            score = dimensions.get(key, 0)
+            raw_score = dimensions.get(key, 0)
+            try:
+                score = float(raw_score)
+            except (ValueError, TypeError):
+                score = 0.0
             example = evidence.get(key, "")
-            bar = cls._score_bar(score)
+            bar = cls._score_bar(round(score))
             reason_lines.append(f"{label}：{score}/10 {bar}")
             if example and score >= 5:
                 reason_lines.append(f"  └ 例：{example}")
@@ -225,6 +232,6 @@ def process_response(cls, response: str) -> EvalDetail:
     @classmethod
     def _score_bar(cls, score: int, width: int = 10) -> str:
         """Generate a simple ASCII progress bar for a 0-10 score."""
-        filled = round(score)
+        filled = max(0, min(width, int(round(score))))
         empty = width - filled
         return f"[{'█' * filled}{'░' * empty}]"

From afef054526c20c9667c8e7bb5e97ccffcdd60c69 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 19:38:30 +0800
Subject: [PATCH 6/9] fix: address Gemini medium-priority suggestions

---
 dingo/model/llm/llm_ai_smell.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dingo/model/llm/llm_ai_smell.py b/dingo/model/llm/llm_ai_smell.py
index 36c84ce1..e86c9acc 100644
--- a/dingo/model/llm/llm_ai_smell.py
+++ b/dingo/model/llm/llm_ai_smell.py
@@ -170,7 +170,8 @@ def process_response(cls, response: str) -> EvalDetail:
         """
         Process LLM response and convert to EvalDetail.
         """
-        # Clean markdown code blocks
+        # Strip leading/trailing whitespace first, then remove markdown code blocks
+        response = response.strip()
         if response.startswith("```json"):
             response = response[7:]
         elif response.startswith("```"):
@@ -181,6 +182,10 @@ def process_response(cls, response: str) -> EvalDetail:
 
         try:
             data = json.loads(response)
+            if not isinstance(data, dict):
+                raise ConvertJsonError(
+                    f"Parsed JSON is not a dictionary: {type(data)}"
+                )
         except json.JSONDecodeError:
             raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
 

From 3edf8bad9a7e9826c342a020ada8db88938e3cb4 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 19:38:32 +0800
Subject: [PATCH 7/9] fix: address Gemini medium-priority suggestions

---
 examples/llm_ai_smell_example.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/llm_ai_smell_example.py b/examples/llm_ai_smell_example.py
index dc97365d..92f5a85f 100644
--- a/examples/llm_ai_smell_example.py
+++ b/examples/llm_ai_smell_example.py
@@ -118,13 +118,19 @@ def run_example():
     )
     executor = LocalExecutor(input_args=input_args)
 
-    # Call executor.eval_text to evaluate a single document string.
-    # Example: result = executor.eval_text(SAMPLE_DOC_HIGH_AI_SMELL)
     print("\nDocument snippet (high AI smell):")
     print(SAMPLE_DOC_HIGH_AI_SMELL[:200] + "...")
     print("\nExpected: AI_SMELL_DETECTED with high scores on adjective_violence and detail_vacuum")
     print(f"\nExecutor ready: {executor.__class__.__name__}")
 
+    # To run the actual evaluation (requires a valid API key configured above):
+    # try:
+    #     result = executor.eval_text(SAMPLE_DOC_HIGH_AI_SMELL)
+    #     print("\nEvaluation Result:")
+    #     print(result.reason[0])
+    # except Exception as e:
+    #     print(f"\nCould not run evaluation: {e}")
+
     print("\n" + "=" * 60)
     print("Example 2: Low AI Smell Document")
     print("=" * 60)

From 4d0bf9fd938d035cbb57dbaf3ef656ab4f63c601 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 19:45:40 +0800
Subject: [PATCH 8/9] fix: format scores as int in reason output, fix test
 assertion

---
 dingo/model/llm/llm_ai_smell.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dingo/model/llm/llm_ai_smell.py b/dingo/model/llm/llm_ai_smell.py
index e86c9acc..b5bc3bd2 100644
--- a/dingo/model/llm/llm_ai_smell.py
+++ b/dingo/model/llm/llm_ai_smell.py
@@ -206,7 +206,7 @@ def process_response(cls, response: str) -> EvalDetail:
             "adjective_violence": "✨ 形容词暴力指数",
         }
 
-        reason_lines = [f"🤖 AI味总分：{total_score}/10"]
+        reason_lines = [f"🤖 AI味总分：{int(total_score)}/10"]
         reason_lines.append("")
         for key, label in dim_labels.items():
             raw_score = dimensions.get(key, 0)
@@ -216,7 +216,7 @@ def process_response(cls, response: str) -> EvalDetail:
                 score = 0.0
             example = evidence.get(key, "")
             bar = cls._score_bar(round(score))
-            reason_lines.append(f"{label}：{score}/10 {bar}")
+            reason_lines.append(f"{label}：{int(score)}/10 {bar}")
             if example and score >= 5:
                 reason_lines.append(f"  └ 例：{example}")
         reason_lines.append("")

From 1d62008cf90867bc79d93030c99f776ff1134676 Mon Sep 17 00:00:00 2001
From: Pei Chu <njuchupei@gmail.com>
Date: Thu, 18 Jun 2026 19:45:43 +0800
Subject: [PATCH 9/9] fix: format scores as int in reason output, fix test
 assertion