MigoXLab · e06084 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/dingo/model/llm/llm_ai_smell.py b/dingo/model/llm/llm_ai_smell.py
@@ -0,0 +1,242 @@
+"""
+AI Smell Detector for Requirement Documents (需求文档 AI 味检测器)
+
+Detects AI-generated writing patterns in requirement documents across 5 dimensions:
+- 正确的废话指数 (Correct Nonsense Index)
+- 无限镜像感 (Infinite Mirror Index)
+- 彩虹屁密度 (Rainbow Fart Density)
+- 细节真空度 (Detail Vacuum Index)
+- 形容词暴力指数 (Adjective Violence Index)
+"""
+
+import json
+
+from dingo.io.input import Data, RequiredField
+from dingo.io.output.eval_detail import EvalDetail
+from dingo.model import Model
+from dingo.model.llm.base_openai import BaseOpenAI
+from dingo.utils import log
+from dingo.utils.exception import ConvertJsonError
+
+
+@Model.llm_register("LLMAISmell")
+class LLMAISmell(BaseOpenAI):
+    """
+    AI Smell Detector for requirement documents.
+
+    Evaluates 5 dimensions of AI-generated writing patterns:
+    1. 正确的废话指数 - Hollow truisms ("In today's society...", "With the rapid development of...")
+    2. 无限镜像感     - Repetitive emphasis of the same point in different words
+    3. 彩虹屁密度     - Excessive praise, inflated importance claims
+    4. 细节真空度     - Sounds complete but lacks any actionable specifics
+    5. 形容词暴力指数  - Buzzword overuse (高效/赋能/闭环/生态/颗粒度...)
+
+    Each dimension is scored 0-10. Overall AI smell score is the weighted average.
+    """
+
+    _metric_info = {
+        "category": "Document Quality Assessment Metrics",
+        "metric_name": "LLMAISmell",
+        "description": "Detects AI-generated writing patterns in requirement documents across 5 dimensions: hollow truisms, repetition, rainbow farts, detail vacuum, and adjective violence",
+        "examples": "examples/llm_and_rule/llm_local.py",
+        "evaluation_results": "",
+    }
+
+    _required_fields = [RequiredField.CONTENT]
+
+    # Score threshold above which a document is flagged as AI-smelling
+    threshold = 6
+
+    prompt = """
+# 角色
+你是一位资深需求评审专家，专门识别需求文档中的 AI 代写痕迹。
+
+# 任务
+分析下面的文档，从 5 个维度评估其"AI 味"，每个维度打分 0-10：
+
+## 评估维度
+
+### 1. 💊 正确的废话指数（0-10）
+**定义**：用正确但毫无信息量的话填充文档，听起来很有道理但什么都没说。
+**典型表现**：
+- "在当今社会……"、"随着技术的不断发展……"
+- "这对用户体验至关重要"（但没有说为什么或怎么做）
+- "我们需要确保系统的稳定性和可靠性"（没有具体指标）
+- 每段开头都在重述背景
+
+**打分标准**：
+- 0-2：文档直接切入主题，陈述均有实质内容
+- 3-5：有少量套话但不影响整体
+- 6-8：大量空洞表述，信息密度低
+- 9-10：几乎全是废话，读完不知道要做什么
+
+### 2. 🪞 无限镜像感（0-10）
+**定义**：同一个意思用不同的话反复说，制造"内容丰富"的假象。
+**典型表现**：
+- 同一个功能点在不同章节反复描述
+- "提升用户体验" → "优化用户感受" → "改善用户满意度"（三句话说同一件事）
+- 结论和摘要和正文高度重复
+
+**打分标准**：
+- 0-2：每句话都有新信息
+- 3-5：有轻微重复但可接受
+- 6-8：明显感觉在凑字数
+- 9-10：镜中镜，绕来绕去
+
+### 3. 🌈 彩虹屁密度（0-10）
+**定义**：过度拔高重要性、夸大影响、给自己项目过度背书。
+**典型表现**：
+- "这将彻底改变……"、"革命性的……"、"行业领先的……"
+- "大幅提升"但没有数据
+- "用户迫切需要"但没有调研依据
+- 每个功能都是"核心"、"关键"、"重要"
+
+**打分标准**：
+- 0-2：表述客观，有数据支撑
+- 3-5：略有夸张但在合理范围
+- 6-8：随处可见夸大词汇
+- 9-10：每句话都在吹，读起来像广告
+
+### 4. 🧩 细节真空度（0-10）
+**定义**：文档结构完整、格式规范，但缺乏任何可落地的具体信息。
+**典型表现**：
+- "系统应支持多种支付方式"（哪些方式？）
+- "性能要满足用户需求"（什么性能？什么需求？）
+- "界面设计应符合用户习惯"（谁的习惯？什么标准？）
+- 没有数字、没有边界条件、没有异常处理
+
+**打分标准**：
+- 0-2：有具体的数字、接口、用例、边界条件
+- 3-5：部分模糊但核心功能有描述
+- 6-8：大量"应该"、"需要"但没有"怎么做"
+- 9-10：读完完全不知道要开发什么
+
+### 5. ✨ 形容词暴力指数（0-10）
+**定义**：大量堆叠科技/管理类buzzword，用词汇密度掩盖内容空洞。
+**高危词汇**：高效、赋能、闭环、生态、颗粒度、抓手、落地、对齐、拉通、赛道、底层逻辑、顶层设计、数字化转型、智能化、一体化、全链路、沉淀、复用、标准化、降本增效、价值最大化
+**打分标准**：
+- 0-2：用词精准朴素，术语有明确定义
+- 3-5：偶有流行词但不影响理解
+- 6-8：buzzword 密集，读起来像PPT
+- 9-10：去掉这些词文档就空了
+
+---
+
+## 综合 AI 味总分（0-10）
+基于以上 5 个维度的加权综合评估。
+
+**权重参考**：
+- 细节真空度（0.3）：最能区分 AI 和人写的
+- 正确的废话指数（0.25）
+- 形容词暴力指数（0.2）
+- 无限镜像感（0.15）
+- 彩虹屁密度（0.1）
+
+---
+
+## 输出格式
+
+请严格按照以下 JSON 格式输出，不要输出任何其他内容：
+
+```json
+{
+  "total_score": <综合AI味总分 0-10>,
+  "dimensions": {
+    "correct_nonsense": <正确的废话指数 0-10>,
+    "infinite_mirror": <无限镜像感 0-10>,
+    "rainbow_fart": <彩虹屁密度 0-10>,
+    "detail_vacuum": <细节真空度 0-10>,
+    "adjective_violence": <形容词暴力指数 0-10>
+  },
+  "evidence": {
+    "correct_nonsense": "<最典型的1-2个例子，直接引用原文>",
+    "infinite_mirror": "<最典型的1-2个例子，直接引用原文>",
+    "rainbow_fart": "<最典型的1-2个例子，直接引用原文>",
+    "detail_vacuum": "<最典型的1-2个例子，直接引用原文>",
+    "adjective_violence": "<最典型的1-2个例子，直接引用原文>"
+  },
+  "verdict": "<一句话总结，不超过50字>"
+}
+```
+
+---
+
+## 待评估文档：
+
+"""
+
+    @classmethod
+    def process_response(cls, response: str) -> EvalDetail:
+        """
+        Process LLM response and convert to EvalDetail.
+        """
+        # Strip leading/trailing whitespace first, then remove markdown code blocks
+        response = response.strip()
+        if response.startswith("```json"):
+            response = response[7:]
+        elif response.startswith("```"):
+            response = response[3:]
+        if response.endswith("```"):
+            response = response[:-3]
+        response = response.strip()
-        if response.startswith("```json"):
-            response = response[7:]
-        elif response.startswith("```"):
-            response = response[3:]
-        if response.endswith("```"):
-            response = response[:-3]
-        response = response.strip()
+        response = response.strip()
+        if response.startswith(chr(96) * 3 + "json"):
+            response = response[7:]
+        elif response.startswith(chr(96) * 3):
+            response = response[3:]
+        if response.endswith(chr(96) * 3):
+            response = response[:-3]
+        response = response.strip()
-        if response.startswith("```json"):
-            response = response[7:]
-        elif response.startswith("```"):
-            response = response[3:]
-        if response.endswith("```"):
-            response = response[:-3]
-        response = response.strip()
+        response = response.strip()
+        if response.startswith(chr(96) * 3 + "json"):
+            response = response[7:]
+        elif response.startswith(chr(96) * 3):
+            response = response[3:]
+        if response.endswith(chr(96) * 3):
+            response = response[:-3]
+        response = response.strip()
+
+        try:
+            data = json.loads(response)
+            if not isinstance(data, dict):
+                raise ConvertJsonError(
+                    f"Parsed JSON is not a dictionary: {type(data)}"
+                )
+        except json.JSONDecodeError:
+            raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
-        try:
-            data = json.loads(response)
-        except json.JSONDecodeError:
-            raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
+        try:
+            data = json.loads(response)
+            if not isinstance(data, dict):
+                raise ConvertJsonError(f"Parsed JSON is not a dictionary: {type(data)}")
+        except json.JSONDecodeError:
+            raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
-        try:
-            data = json.loads(response)
-        except json.JSONDecodeError:
-            raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
+        try:
+            data = json.loads(response)
+            if not isinstance(data, dict):
+                raise ConvertJsonError(f"Parsed JSON is not a dictionary: {type(data)}")
+        except json.JSONDecodeError:
+            raise ConvertJsonError(f"Failed to parse AI smell response as JSON: {response[:200]}")
+
+        try:
+            total_score = float(data.get("total_score", 0))
+        except (ValueError, TypeError):
+            total_score = 0.0
+        dimensions = data.get("dimensions") or {}
+        evidence = data.get("evidence") or {}
+        verdict = str(data.get("verdict") or "")
+
+        # Build human-readable reason
+        dim_labels = {
+            "correct_nonsense": "💊 正确的废话指数",
+            "infinite_mirror": "🪞 无限镜像感",
+            "rainbow_fart": "🌈 彩虹屁密度",
+            "detail_vacuum": "🧩 细节真空度",
+            "adjective_violence": "✨ 形容词暴力指数",
+        }
+
+        reason_lines = [f"🤖 AI味总分：{int(total_score)}/10"]
+        reason_lines.append("")
+        for key, label in dim_labels.items():
+            raw_score = dimensions.get(key, 0)
+            try:
+                score = float(raw_score)
+            except (ValueError, TypeError):
+                score = 0.0
+            example = evidence.get(key, "")
+            bar = cls._score_bar(round(score))
+            reason_lines.append(f"{label}：{int(score)}/10 {bar}")
+            if example and score >= 5:
+                reason_lines.append(f"  └ 例：{example}")
+        reason_lines.append("")
+        reason_lines.append(f"📝 {verdict}")
+
+        is_ai_smell = total_score >= cls.threshold
+
+        result = EvalDetail(
+            metric=cls.__name__,
+            status=is_ai_smell,
+            score=round(total_score / 10, 2),  # normalize to 0-1 for consistency
+            label=["AI_SMELL_DETECTED"] if is_ai_smell else ["AI_SMELL_CLEAN"],
+            reason=["\n".join(reason_lines)],
+        )
+
+        return result
+
+    @classmethod
+    def _score_bar(cls, score: int, width: int = 10) -> str:
+        """Generate a simple ASCII progress bar for a 0-10 score."""
+        filled = max(0, min(width, int(round(score))))
+        empty = width - filled
+        return f"[{'█' * filled}{'░' * empty}]"
-    def _score_bar(cls, score: int, width: int = 10) -> str:
-        """Generate a simple ASCII progress bar for a 0-10 score."""
-        filled = round(score)
-        empty = width - filled
-        return f"[{'█' * filled}{'░' * empty}]"
+    @classmethod
+    def _score_bar(cls, score: float, width: int = 10) -> str:
+        """Generate a simple ASCII progress bar for a 0-10 score."""
+        filled = max(0, min(width, round(score)))
+        empty = width - filled
+        return f"[{'█' * filled}{'░' * empty}]"
-    def _score_bar(cls, score: int, width: int = 10) -> str:
-        """Generate a simple ASCII progress bar for a 0-10 score."""
-        filled = round(score)
-        empty = width - filled
-        return f"[{'█' * filled}{'░' * empty}]"
+    @classmethod
+    def _score_bar(cls, score: float, width: int = 10) -> str:
+        """Generate a simple ASCII progress bar for a 0-10 score."""
+        filled = max(0, min(width, round(score)))
+        empty = width - filled
+        return f"[{'█' * filled}{'░' * empty}]"
diff --git a/examples/llm_ai_smell_example.py b/examples/llm_ai_smell_example.py
@@ -0,0 +1,143 @@
+"""
+Example: AI Smell Detection for Requirement Documents
+
+Usage:
+    python examples/llm_ai_smell_example.py
+
+This example demonstrates how to use LLMAISmell to detect AI-generated
+writing patterns in requirement documents.
+"""
+
+from dingo.config.input_args import InputArgs
+from dingo.exec.local import LocalExecutor
+
+SAMPLE_DOC_HIGH_AI_SMELL = """
+## 智能客服系统需求文档
+
+### 一、背景
+
+在当今数字化转型的大背景下，随着人工智能技术的不断发展和进步，越来越多的企业
+开始重视智能化客服系统的建设。为了更好地赋能业务发展，提升用户体验，打造闭环的
+客户服务生态，我们提出构建一套高效、智能的客服解决方案，以实现降本增效、价值最大化的战略目标。
+
+### 二、核心目标
+
+本系统旨在通过底层逻辑的重塑和顶层设计的优化，实现以下战略目标：
+- 大幅提升客户满意度，打造行业领先的服务体验
+- 通过全链路智能化改造，彻底革新传统客服模式
+- 赋能一线客服人员，提升整体服务效能，实现降本增效
+- 构建可持续发展的智能客服生态，沉淀核心服务能力
+
+### 三、功能需求
+
+#### 3.1 智能问答
+
+系统应支持智能问答功能，能够准确理解用户意图，提供精准的回答。系统需要确保
+回答的准确性和及时性，以满足用户的迫切需求。界面设计应符合用户使用习惯，提供
+良好的交互体验。系统还应支持多轮对话，能够理解上下文，提供连贯的对话体验。
+
+#### 3.2 工单管理
+
+系统应具备完善的工单管理功能，支持工单的创建、分配、跟踪和关闭全生命周期管理。
+工单系统需要满足业务需求，支持多种工单类型，确保处理效率和质量。通过对工单数据
+的深度挖掘和分析，为管理决策提供有力支撑，实现数据驱动的精细化运营。
+
+### 四、技术要求
+
+系统性能应满足业务需求，确保在高并发场景下的稳定运行。系统需要具备良好的扩展性
+和可维护性，以支撑未来业务的快速发展。安全性方面，系统应符合相关法规要求，保护
+用户数据安全。系统架构应采用先进的微服务架构，实现各模块的解耦，提升系统的灵活性
+和可靠性。
+
+### 五、总结
+
+综上所述，本智能客服系统将通过技术创新和模式变革，为企业创造巨大的商业价值，
+提升核心竞争力，助力企业在激烈的市场竞争中脱颖而出，实现可持续发展。
+"""
+
+SAMPLE_DOC_LOW_AI_SMELL = """
+## 客服工单系统 v2.1 需求文档
+
+**作者**: 张三  **日期**: 2024-01-15  **评审状态**: 待评审
+
+---
+
+### 1. 背景
+
+当前客服团队每天处理约 2000 张工单，其中 65% 为重复性问题（退款、发货查询、
+账号问题）。工单平均处理时间 8 分钟，其中 3 分钟用于查历史记录。本项目目标是
+将平均处理时间降至 5 分钟以内。
+
+### 2. 功能需求
+
+#### 2.1 快速回复模板
+
+**需求描述**：客服输入关键词时，系统自动推荐匹配的回复模板。
+
+**详细说明**：
+- 输入框输入字符后 300ms 内展示建议列表，最多显示 5 条
+- 按相关度排序：完全匹配 > 关键词匹配 > 语义相似
+- 客服选择模板后可编辑再发送，不能直接强制发送
+- 模板库由运营通过后台维护，支持按一级分类（退款/物流/账号/其他）管理
+
+**不在范围内**：自动发送、客户端展示建议
+
+#### 2.2 历史工单查询
+
+**需求描述**：在工单页面可快速查看同一用户的历史工单。
+
+**详细说明**：
+- 侧边栏展示最近 10 张工单的摘要（时间、分类、处理结果）
+- 点击展开查看完整内容
+- 数据来源：工单系统数据库，实时查询，无需缓存
+- 异常情况：用户无历史工单时展示"暂无历史记录"，查询超时（>3s）展示错误提示
+
+### 3. 非功能需求
+
+- 快速回复建议 P95 响应时间 < 500ms（基于当前 500 并发用户）
+- 历史工单查询 P99 < 2s
+- 暂不考虑国际化
+"""
+
+
+def run_example():
+    print("=" * 60)
+    print("Example 1: High AI Smell Document")
+    print("=" * 60)
+
+    # Configure the executor with LLMAISmell checker.
+    # Replace YOUR_API_KEY and api_base with your actual LLM credentials.
+    input_args = InputArgs(
+        eval_group="llm",
+        llm_config={
+            "model": "gpt-4o",
+            "key": "YOUR_API_KEY",
+            "api_base": "https://api.openai.com/v1",
+        },
+        custom_config={"llm": ["LLMAISmell"]},
+    )
+    executor = LocalExecutor(input_args=input_args)
+
+    print("\nDocument snippet (high AI smell):")
+    print(SAMPLE_DOC_HIGH_AI_SMELL[:200] + "...")
+    print("\nExpected: AI_SMELL_DETECTED with high scores on adjective_violence and detail_vacuum")
+    print(f"\nExecutor ready: {executor.__class__.__name__}")
+
+    # To run the actual evaluation (requires a valid API key configured above):
+    # try:
+    #     result = executor.eval_text(SAMPLE_DOC_HIGH_AI_SMELL)
+    #     print("\nEvaluation Result:")
+    #     print(result.reason[0])
+    # except Exception as e:
+    #     print(f"\nCould not run evaluation: {e}")
+
+    print("\n" + "=" * 60)
+    print("Example 2: Low AI Smell Document")
+    print("=" * 60)
+    print("\nDocument snippet (low AI smell):")
+    print(SAMPLE_DOC_LOW_AI_SMELL[:200] + "...")
+    print("\nExpected: AI_SMELL_CLEAN with low scores across all dimensions")
+
+
+if __name__ == "__main__":
+    run_example()