sopaco · lethanhson9901 · Mar 5, 2026
diff --git a/src/generator/compose/agents/architecture_editor.rs b/src/generator/compose/agents/architecture_editor.rs
@@ -32,14 +32,28 @@ impl StepForwardAgent for ArchitectureEditor {
                 DataSource::ResearchResult(ResearchAgentType::WorkflowResearcher.to_string()),
             ],
             // Use architecture, deployment, database and ADR docs
-            optional_sources: vec![DataSource::knowledge_categories(vec!["architecture", "deployment", "database", "adr"])],
+            optional_sources: vec![DataSource::knowledge_categories(vec![
+                "architecture",
+                "deployment",
+                "database",
+                "adr",
+            ])],
         }
     }
 
     fn prompt_template(&self) -> PromptTemplate {
         PromptTemplate {
             system_prompt: r#"You are a professional software architecture documentation expert, focused on generating complete, in-depth, and detailed C4 architecture model documentation. Your task is to write an architecture documentation titled `Architecture Overview` based on the provided research reports.
 
+## Mermaid Diagram Safety Rules (MUST follow):
+- Always output Mermaid that compiles in strict Mermaid parsers.
+- Use ASCII-only node IDs: `[A-Za-z0-9_]` (example: `WebDemo`, `InferenceService`, `FabricAPI`).
+- Keep business/localized text in labels only, e.g. `WebDemo["Web Demo hội thoại"]`.
+- Define all nodes first, then declare edges between existing IDs.
+- Use only supported headers (`graph TD`, `graph LR`, `flowchart TD`, `sequenceDiagram`, `erDiagram`).
+- Do not include hidden characters, smart quotes, or non-standard symbols in Mermaid source.
+- Keep edge labels short plain text; avoid markdown and overly complex punctuation.
+
 ## Your Professional Capabilities:
 1. **Architecture Analysis Capability**: Deep understanding of system architecture patterns, design principles, and technology selection
 2. **Documentation Writing Capability**: Proficient in C4 model, UML diagrams, and architecture visualization, with rich and detailed language descriptions

diff --git a/src/generator/compose/agents/overview_editor.rs b/src/generator/compose/agents/overview_editor.rs
@@ -43,6 +43,15 @@ impl StepForwardAgent for OverviewEditor {
 
 Your task is to write a complete, in-depth, detailed, and easy-to-read C4 SystemContext document titled `Project Overview` based on the provided system context research report and domain module analysis results.
 
+## Mermaid Diagram Safety Rules (MUST follow):
+- Always generate Mermaid that is syntactically valid in strict parsers.
+- Use ASCII-only node IDs: `[A-Za-z0-9_]` (e.g. `ClientApp`, `BackendAPI`).
+- Put localized/human-readable text only inside node labels, e.g. `ClientApp["Ứng dụng khách hàng"]`.
+- Define every node ID before using it in edges.
+- Use only standard diagram headers like `graph TD`, `graph LR`, `flowchart TD`, `sequenceDiagram`, `erDiagram`.
+- Do not use hidden/zero-width characters, smart quotes, or unusual Unicode symbols in Mermaid code.
+- Keep edge labels simple plain text without markdown formatting.
+
 ## External Knowledge Integration:
 You may have access to existing product description, requirements and architecture documentation from external sources.
 If available:

diff --git a/src/generator/compose/agents/workflow_editor.rs b/src/generator/compose/agents/workflow_editor.rs
@@ -32,7 +32,10 @@ impl StepForwardAgent for WorkflowEditor {
                 DataSource::CODE_INSIGHTS,
             ],
             // Use workflow docs for workflow documentation
-            optional_sources: vec![DataSource::knowledge_categories(vec!["workflow", "architecture"])],
+            optional_sources: vec![DataSource::knowledge_categories(vec![
+                "workflow",
+                "architecture",
+            ])],
         }
     }
 
@@ -42,6 +45,15 @@ impl StepForwardAgent for WorkflowEditor {
 
 Your task is to write a complete, in-depth, and detailed workflow document titled `Core Workflows` based on the provided multi-dimensional research analysis results.
 
+## Mermaid Diagram Safety Rules (MUST follow):
+- Always produce Mermaid syntax that is valid for strict Mermaid parsers.
+- Use ASCII-only node IDs: `[A-Za-z0-9_]` (e.g. `StartNode`, `ValidateInput`, `CallBackend`).
+- Put localized text in labels only, e.g. `StartNode["Người dùng bắt đầu quy trình"]`.
+- Declare all node IDs before referencing them in edges.
+- Use standard diagram headers only (`flowchart TD`, `graph TD`, `graph LR`, `sequenceDiagram`).
+- Avoid hidden characters, smart quotes, markdown formatting, and unusual Unicode symbols inside Mermaid source.
+- Keep edge labels concise plain text.
+
 ## Your Professional Capabilities:
 1. **Workflow Analysis Skills**: Deep understanding of system core workflows, business processes, and technical processes
 2. **Process Visualization Skills**: Proficient in flowchart design, sequence diagrams, and workflow diagram design

diff --git a/src/generator/preprocess/agents/code_analyze.rs b/src/generator/preprocess/agents/code_analyze.rs
@@ -42,15 +42,24 @@ impl CodeAnalyze {
 
                 Box::pin(async move {
                     let code_analyze = CodeAnalyze { language_processor };
-                    let agent_params = code_analyze
+                    let (agent_params, mut static_insight) = code_analyze
                         .prepare_single_code_agent_params(&project_structure_clone, &code_clone)
                         .await?;
-                    let mut code_insight =
-                        extract::<CodeInsight>(&context_clone, agent_params).await?;
-
-                    // LLM will rewrite source_summary, so exclude it and override here
+                    static_insight.code_dossier.source_summary = code_clone.source_summary.to_owned();
+
+                    let mut code_insight = match extract::<CodeInsight>(&context_clone, agent_params).await {
+                        Ok(insight) => insight,
+                        Err(e) => {
+                            eprintln!(
+                                "⚠️ AI code insight failed for {}: {}. Falling back to static analysis.",
+                                code_clone.name, e
+                            );
+                            return Result::<CodeInsight>::Ok(static_insight);
+                        }
+                    };
+
+                    // LLM may rewrite source_summary, so exclude it and override here
                     code_insight.code_dossier.source_summary = code_clone.source_summary.to_owned();
-
                     Result::<CodeInsight>::Ok(code_insight)
                 })
             })
@@ -73,7 +82,10 @@ impl CodeAnalyze {
             }
         }
 
-        println!("✓ Concurrent code analysis completed, successfully analyzed {} files", code_insights.len());
+        println!(
+            "✓ Concurrent code analysis completed, successfully analyzed {} files",
+            code_insights.len()
+        );
         Ok(code_insights)
     }
 }
@@ -83,20 +95,23 @@ impl CodeAnalyze {
         &self,
         project_structure: &ProjectStructure,
         codes: &CodeDossier,
-    ) -> Result<AgentExecuteParams> {
+    ) -> Result<(AgentExecuteParams, CodeInsight)> {
         // First perform static analysis
         let code_analyse = self.analyze_code_by_rules(codes, project_structure).await?;
 
         // Then use AI for enhanced analysis
         let prompt_user = self.build_code_analysis_prompt(project_structure, &code_analyse);
         let prompt_sys = include_str!("prompts/code_analyze_sys.tpl").to_string();
 
-        Ok(AgentExecuteParams {
-            prompt_sys,
-            prompt_user,
-            cache_scope: "ai_code_insight".to_string(),
-            log_tag: codes.name.to_string(),
-        })
+        Ok((
+            AgentExecuteParams {
+                prompt_sys,
+                prompt_user,
+                cache_scope: "ai_code_insight".to_string(),
+                log_tag: codes.name.to_string(),
+            },
+            code_analyse,
+        ))
     }
 }
 

diff --git a/src/generator/preprocess/agents/code_purpose_analyze.rs b/src/generator/preprocess/agents/code_purpose_analyze.rs
@@ -1,21 +1,75 @@
 use anyhow::Result;
 use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize};
 use std::path::Path;
 
-use crate::{
-    types::code::{CodePurpose, CodePurposeMapper},
-};
 use crate::generator::agent_executor::{AgentExecuteParams, extract};
 use crate::generator::context::GeneratorContext;
+use crate::types::code::{CodePurpose, CodePurposeMapper};
+
+fn deserialize_code_purpose_from_any<'de, D>(deserializer: D) -> Result<CodePurpose, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let value = serde_json::Value::deserialize(deserializer)?;
+    let raw = match value {
+        serde_json::Value::Null => String::new(),
+        serde_json::Value::String(s) => s,
+        serde_json::Value::Bool(v) => v.to_string(),
+        serde_json::Value::Number(v) => v.to_string(),
+        serde_json::Value::Array(v) => serde_json::to_string(&v).unwrap_or_default(),
+        serde_json::Value::Object(v) => serde_json::to_string(&v).unwrap_or_default(),
+    };
+    Ok(CodePurposeMapper::map_from_raw(&raw))
+}
+
+fn deserialize_f64_lenient<'de, D>(deserializer: D) -> Result<f64, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let value = serde_json::Value::deserialize(deserializer)?;
+    let result = match value {
+        serde_json::Value::Number(n) => n.as_f64().unwrap_or(0.0),
+        serde_json::Value::String(s) => s.parse::<f64>().unwrap_or(0.0),
+        serde_json::Value::Bool(v) => {
+            if v {
+                1.0
+            } else {
+                0.0
+            }
+        }
+        _ => 0.0,
+    };
+    Ok(result)
+}
+
+fn deserialize_string_lenient<'de, D>(deserializer: D) -> Result<String, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let value = serde_json::Value::deserialize(deserializer)?;
+    let result = match value {
+        serde_json::Value::Null => String::new(),
+        serde_json::Value::String(s) => s,
+        serde_json::Value::Bool(v) => v.to_string(),
+        serde_json::Value::Number(v) => v.to_string(),
+        serde_json::Value::Array(v) => serde_json::to_string(&v).unwrap_or_default(),
+        serde_json::Value::Object(v) => serde_json::to_string(&v).unwrap_or_default(),
+    };
+    Ok(result)
+}
 
 /// AI component type analysis result
-#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)]
+#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)]
+#[serde(default)]
 pub struct AICodePurposeAnalysis {
     // Inferred code functionality classification
+    #[serde(default, deserialize_with = "deserialize_code_purpose_from_any")]
     pub code_purpose: CodePurpose,
     // Confidence of the inference result (min 0.0, max 1.0), confidence is high when > 0.7.
+    #[serde(default, deserialize_with = "deserialize_f64_lenient")]
     pub confidence: f64,
+    #[serde(default, deserialize_with = "deserialize_string_lenient")]
     pub reasoning: String,
 }
 
@@ -32,8 +86,8 @@ impl CodePurposeEnhancer {
         context: &GeneratorContext,
         file_path: &Path,
         file_name: &str,
-        file_content: &str) -> Result<CodePurpose>
-    {
+        file_content: &str,
+    ) -> Result<CodePurpose> {
         // First use rule mapping
         let rule_based_type =
             CodePurposeMapper::map_by_path_and_name(&file_path.to_string_lossy(), file_name);
@@ -45,14 +99,19 @@ impl CodePurposeEnhancer {
 
         // If there's AI analyzer and file content, use AI enhanced analysis
         let prompt_sys = "You are a professional code architecture analyst specializing in analyzing component types of code files.".to_string();
-        let prompt_user = self.build_code_purpose_analysis_prompt(file_path, file_content, file_name);
+        let prompt_user =
+            self.build_code_purpose_analysis_prompt(file_path, file_content, file_name);
 
-        let analyze_result = extract::<AICodePurposeAnalysis>(context, AgentExecuteParams {
-            prompt_sys,
-            prompt_user,
-            cache_scope: "ai_code_purpose".to_string(),
-            log_tag: file_name.to_string(),
-        }).await;
+        let analyze_result = extract::<AICodePurposeAnalysis>(
+            context,
+            AgentExecuteParams {
+                prompt_sys,
+                prompt_user,
+                cache_scope: "ai_code_purpose".to_string(),
+                log_tag: file_name.to_string(),
+            },
+        )
+        .await;
 
         return match analyze_result {
             Ok(ai_analysis) => {
@@ -71,7 +130,7 @@ impl CodePurposeEnhancer {
                 // AI analysis failed, use rule result
                 Ok(rule_based_type)
             }
-        }
+        };
     }
 
     /// Build component type analysis prompt
@@ -97,3 +156,38 @@ impl CodePurposeEnhancer {
         )
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::AICodePurposeAnalysis;
+    use crate::types::code::CodePurpose;
+
+    #[test]
+    fn test_ai_code_purpose_analysis_deserialize_unknown_variant_text() {
+        let payload = serde_json::json!({
+            "code_purpose": "Migration configuration script (Alembic env file)",
+            "confidence": "0.91",
+            "reasoning": {"summary":"matched migration config"}
+        });
+
+        let parsed: AICodePurposeAnalysis = serde_json::from_value(payload)
+            .expect("AICodePurposeAnalysis should deserialize loose purpose variant");
+
+        assert_eq!(parsed.code_purpose, CodePurpose::Config);
+        assert_eq!(parsed.confidence, 0.91);
+    }
+
+    #[test]
+    fn test_ai_code_purpose_analysis_deserialize_short_service_api_text() {
+        let payload = serde_json::json!({
+            "code_purpose": "Service API for external calls",
+            "confidence": 0.8,
+            "reasoning": "api classification"
+        });
+
+        let parsed: AICodePurposeAnalysis = serde_json::from_value(payload)
+            .expect("AICodePurposeAnalysis should deserialize shortened API variant");
+
+        assert_eq!(parsed.code_purpose, CodePurpose::Api);
+    }
+}
diff --git a/src/generator/research/types.rs b/src/generator/research/types.rs
@@ -182,7 +182,8 @@ pub struct BusinessFlow {
 }
 
 /// Core component analysis result
-#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, Default, JsonSchema)]
+#[serde(default)]
 pub struct KeyModuleReport {
     /// Domain name
     pub domain_name: String,
@@ -498,3 +499,22 @@ impl Default for DatabaseOverviewReport {
 
 // https://c4model.com/abstractions/software-system
 // System name, project's role and value, system type, who is using it, how to use, which external systems it interacts with, diagram
+
+#[cfg(test)]
+mod tests {
+    use super::KeyModuleReport;
+
+    #[test]
+    fn test_key_module_report_deserialize_with_missing_module_name() {
+        let payload = serde_json::json!({
+            "domain_name": "Tài liệu & IaC",
+            "module_description": "Infrastructure and documentation module"
+        });
+
+        let report: KeyModuleReport = serde_json::from_value(payload)
+            .expect("KeyModuleReport should deserialize when module_name is missing");
+
+        assert_eq!(report.module_name, "");
+        assert_eq!(report.domain_name, "Tài liệu & IaC");
+    }
+}