From a1bb5c96eee3cbefdb5e55fde503e80990474d53 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Thu, 16 Apr 2026 14:53:32 +0000
Subject: [PATCH 1/4] test(ocr): Add comprehensive unit tests for CCCD parsing
 logic

- Added tests covering happy paths for both Vietnamese (next-line values) and English (same-line values) CCCD formats.
- Tested edge cases including empty/missing data and malformed/random text inputs.
- Validated default value assignments (e.g., gender, nationality).

Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com>
---
 mhm/src-tauri/src/ocr.rs | 86 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
diff --git a/mhm/src-tauri/src/ocr.rs b/mhm/src-tauri/src/ocr.rs
index f1d4b43..2d0bff1 100644
--- a/mhm/src-tauri/src/ocr.rs
+++ b/mhm/src-tauri/src/ocr.rs
@@ -159,3 +159,89 @@ fn extract_field_value(lines: &[String], labels: &[&str]) -> Option<String> {
 /// Thread-safe wrapper for OcrEngine
 #[allow(dead_code)]
 pub struct OcrEngineWrapper(pub Mutex<OcrEngine>);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_cccd_happy_path_vietnamese_next_line() {
+        let lines = vec![
+            "CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM".to_string(),
+            "Độc lập - Tự do - Hạnh phúc".to_string(),
+            "CĂN CƯỚC CÔNG DÂN".to_string(),
+            "Số/No: 012345678901".to_string(),
+            "Họ và tên".to_string(),
+            "NGUYỄN VĂN A".to_string(),
+            "Ngày sinh: 01/01/1990".to_string(),
+            "Giới tính: Nam".to_string(),
+            "Nơi thường trú".to_string(),
+            "123 Đường ABC, Quận XYZ, TP HCM".to_string(),
+        ];
+
+        let info = parse_cccd(&lines);
+
+        assert_eq!(info.doc_number, "012345678901");
+        assert_eq!(info.full_name, "NGUYỄN VĂN A");
+        assert_eq!(info.dob, "01/01/1990");
+        assert_eq!(info.gender, "Nam");
+        assert_eq!(info.nationality, "Việt Nam"); // default since it's not in the text
+        assert_eq!(info.address, "123 Đường ABC, Quận XYZ, TP HCM");
+    }
+
+    #[test]
+    fn test_parse_cccd_english_labels_same_line() {
+        let lines = vec![
+            "CITIZEN IDENTITY CARD".to_string(),
+            "098765432109".to_string(),
+            "Full name: Jane Doe".to_string(),
+            "Date of birth: 12/12/1985".to_string(),
+            "Sex: Female".to_string(),
+            "Nationality: United States".to_string(),
+            "Place of residence: 456 Elm St, Anytown".to_string(),
+        ];
+
+        let info = parse_cccd(&lines);
+
+        assert_eq!(info.doc_number, "098765432109");
+        assert_eq!(info.full_name, "Jane Doe");
+        assert_eq!(info.dob, "12/12/1985");
+        assert_eq!(info.gender, "Nữ"); // It maps Female to Nữ
+        assert_eq!(info.nationality, "United States");
+        assert_eq!(info.address, "456 Elm St, Anytown");
+    }
+
+    #[test]
+    fn test_parse_cccd_empty_and_missing() {
+        let lines: Vec<String> = vec![];
+        let info = parse_cccd(&lines);
+
+        assert_eq!(info.doc_number, "");
+        assert_eq!(info.full_name, "");
+        assert_eq!(info.dob, "");
+        assert_eq!(info.gender, "");
+        assert_eq!(info.nationality, "Việt Nam");
+        assert_eq!(info.address, "");
+    }
+
+    #[test]
+    fn test_parse_cccd_random_text() {
+        let lines = vec![
+            "Just some random text".to_string(),
+            "123456789".to_string(), // only 9 digits, should not be doc number
+            "Ho va ten".to_string(),
+            "  ".to_string(), // next line is empty, shouldn't panic
+            "Maleish".to_string(), // contains Male, should parse as Nam
+            "11-22-3333".to_string(), // not matching dob regex
+        ];
+
+        let info = parse_cccd(&lines);
+
+        assert_eq!(info.doc_number, "");
+        assert_eq!(info.full_name, "");
+        assert_eq!(info.dob, "");
+        assert_eq!(info.gender, "Nam");
+        assert_eq!(info.nationality, "Việt Nam");
+        assert_eq!(info.address, "");
+    }
+}

From cf3dbaa720009c9ebaa505a0a5d5596a0b6f0738 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Thu, 16 Apr 2026 15:44:44 +0000
Subject: [PATCH 2/4] fix(groups): use `sort_by_key` instead of `sort_by`

Replaced the closure inside `sort_by` that calls `.cmp()` with a more concise `sort_by_key` using `std::cmp::Reverse` which is recommended by clippy to resolve the GitHub Actions CI workflow check errors.

Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com>
---
 mhm/src-tauri/src/commands/groups.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mhm/src-tauri/src/commands/groups.rs b/mhm/src-tauri/src/commands/groups.rs
index 8efa2f1..93bd111 100644
--- a/mhm/src-tauri/src/commands/groups.rs
+++ b/mhm/src-tauri/src/commands/groups.rs
@@ -297,7 +297,7 @@ pub async fn auto_assign_rooms(
     }
 
     let mut floors_sorted: Vec<(i32, Vec<&Room>)> = floor_groups.into_iter().collect();
-    floors_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+    floors_sorted.sort_by_key(|b| std::cmp::Reverse(b.1.len()));
 
     let mut assignments = Vec::new();
     let needed = req.room_count as usize;

From c3d1561013b499b10321050095048a5ea6c17b2a Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 01:41:15 +0000
Subject: [PATCH 3/4] test(ocr): Add unit tests for CCCD parsing logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Added comprehensive unit tests for `parse_cccd` in `mhm/src-tauri/src/ocr.rs`.
- Fixed a false positive in the gender parser where `Maleish` resulted in `Nam`. The gender parser now correctly bounds its searches to whole words (`\\b(Nam|Male)\\b` and `\\b(Nữ|Female)\\b`).
- Ensured testing accounts for English/Vietnamese differences and fields ending on the next line or the same line.
- Assured empty text edge cases default back to empty strings, save for nationality defaulting to Việt Nam.

Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com>
---
 mhm/src-tauri/src/commands/groups.rs | 2 +-
 mhm/src-tauri/src/ocr.rs             | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/mhm/src-tauri/src/commands/groups.rs b/mhm/src-tauri/src/commands/groups.rs
index 93bd111..8efa2f1 100644
--- a/mhm/src-tauri/src/commands/groups.rs
+++ b/mhm/src-tauri/src/commands/groups.rs
@@ -297,7 +297,7 @@ pub async fn auto_assign_rooms(
     }
 
     let mut floors_sorted: Vec<(i32, Vec<&Room>)> = floor_groups.into_iter().collect();
-    floors_sorted.sort_by_key(|b| std::cmp::Reverse(b.1.len()));
+    floors_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
 
     let mut assignments = Vec::new();
     let needed = req.room_count as usize;
diff --git a/mhm/src-tauri/src/ocr.rs b/mhm/src-tauri/src/ocr.rs
index 2d0bff1..8cae8dc 100644
--- a/mhm/src-tauri/src/ocr.rs
+++ b/mhm/src-tauri/src/ocr.rs
@@ -105,9 +105,9 @@ pub fn parse_cccd(lines: &[String]) -> CccdInfo {
         .and_then(|re| re.find(&full_text).map(|m| m.as_str().to_string()))
         .unwrap_or_default();
 
-    let gender = if full_text.contains("Nam") || full_text.contains("Male") {
+    let gender = if Regex::new(r"\b(Nam|Male)\b").unwrap().is_match(&full_text) {
         "Nam".to_string()
-    } else if full_text.contains("Nữ") || full_text.contains("Female") {
+    } else if Regex::new(r"\b(Nữ|Female)\b").unwrap().is_match(&full_text) {
         "Nữ".to_string()
     } else {
         String::new()
@@ -231,7 +231,7 @@ mod tests {
             "123456789".to_string(), // only 9 digits, should not be doc number
             "Ho va ten".to_string(),
             "  ".to_string(), // next line is empty, shouldn't panic
-            "Maleish".to_string(), // contains Male, should parse as Nam
+            "Maleish".to_string(), // contains Male but not as a word boundary, should not parse as Nam
             "11-22-3333".to_string(), // not matching dob regex
         ];
 
@@ -240,7 +240,7 @@ mod tests {
         assert_eq!(info.doc_number, "");
         assert_eq!(info.full_name, "");
         assert_eq!(info.dob, "");
-        assert_eq!(info.gender, "Nam");
+        assert_eq!(info.gender, "");
         assert_eq!(info.nationality, "Việt Nam");
         assert_eq!(info.address, "");
     }

From 197eb4260eb7275c4a6c63f51699b93773068712 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 01:55:04 +0000
Subject: [PATCH 4/4] fix(groups): use `sort_by_key` instead of `sort_by`

Replaced the closure inside `sort_by` that calls `.cmp()` with a more concise `sort_by_key` using `std::cmp::Reverse` which is recommended by clippy to resolve the GitHub Actions CI workflow check errors.

Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com>
---
 mhm/src-tauri/src/commands/groups.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mhm/src-tauri/src/commands/groups.rs b/mhm/src-tauri/src/commands/groups.rs
index 8efa2f1..93bd111 100644
--- a/mhm/src-tauri/src/commands/groups.rs
+++ b/mhm/src-tauri/src/commands/groups.rs
@@ -297,7 +297,7 @@ pub async fn auto_assign_rooms(
     }
 
     let mut floors_sorted: Vec<(i32, Vec<&Room>)> = floor_groups.into_iter().collect();
-    floors_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+    floors_sorted.sort_by_key(|b| std::cmp::Reverse(b.1.len()));
 
     let mut assignments = Vec::new();
     let needed = req.room_count as usize;