From a1bb5c96eee3cbefdb5e55fde503e80990474d53 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:53:32 +0000 Subject: [PATCH 1/4] test(ocr): Add comprehensive unit tests for CCCD parsing logic - Added tests covering happy paths for both Vietnamese (next-line values) and English (same-line values) CCCD formats. - Tested edge cases including empty/missing data and malformed/random text inputs. - Validated default value assignments (e.g., gender, nationality). Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com> --- mhm/src-tauri/src/ocr.rs | 86 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/mhm/src-tauri/src/ocr.rs b/mhm/src-tauri/src/ocr.rs index f1d4b43..2d0bff1 100644 --- a/mhm/src-tauri/src/ocr.rs +++ b/mhm/src-tauri/src/ocr.rs @@ -159,3 +159,89 @@ fn extract_field_value(lines: &[String], labels: &[&str]) -> Option { /// Thread-safe wrapper for OcrEngine #[allow(dead_code)] pub struct OcrEngineWrapper(pub Mutex); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_cccd_happy_path_vietnamese_next_line() { + let lines = vec![ + "CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM".to_string(), + "Độc lập - Tự do - Hạnh phúc".to_string(), + "CĂN CƯỚC CÔNG DÂN".to_string(), + "Số/No: 012345678901".to_string(), + "Họ và tên".to_string(), + "NGUYỄN VĂN A".to_string(), + "Ngày sinh: 01/01/1990".to_string(), + "Giới tính: Nam".to_string(), + "Nơi thường trú".to_string(), + "123 Đường ABC, Quận XYZ, TP HCM".to_string(), + ]; + + let info = parse_cccd(&lines); + + assert_eq!(info.doc_number, "012345678901"); + assert_eq!(info.full_name, "NGUYỄN VĂN A"); + assert_eq!(info.dob, "01/01/1990"); + assert_eq!(info.gender, "Nam"); + assert_eq!(info.nationality, "Việt Nam"); // default since it's not in the text + assert_eq!(info.address, "123 Đường ABC, Quận XYZ, TP HCM"); + } + + #[test] + fn test_parse_cccd_english_labels_same_line() { + let lines = vec![ + "CITIZEN IDENTITY CARD".to_string(), + "098765432109".to_string(), + "Full name: Jane Doe".to_string(), + "Date of birth: 12/12/1985".to_string(), + "Sex: Female".to_string(), + "Nationality: United States".to_string(), + "Place of residence: 456 Elm St, Anytown".to_string(), + ]; + + let info = parse_cccd(&lines); + + assert_eq!(info.doc_number, "098765432109"); + assert_eq!(info.full_name, "Jane Doe"); + assert_eq!(info.dob, "12/12/1985"); + assert_eq!(info.gender, "Nữ"); // It maps Female to Nữ + assert_eq!(info.nationality, "United States"); + assert_eq!(info.address, "456 Elm St, Anytown"); + } + + #[test] + fn test_parse_cccd_empty_and_missing() { + let lines: Vec = vec![]; + let info = parse_cccd(&lines); + + assert_eq!(info.doc_number, ""); + assert_eq!(info.full_name, ""); + assert_eq!(info.dob, ""); + assert_eq!(info.gender, ""); + assert_eq!(info.nationality, "Việt Nam"); + assert_eq!(info.address, ""); + } + + #[test] + fn test_parse_cccd_random_text() { + let lines = vec![ + "Just some random text".to_string(), + "123456789".to_string(), // only 9 digits, should not be doc number + "Ho va ten".to_string(), + " ".to_string(), // next line is empty, shouldn't panic + "Maleish".to_string(), // contains Male, should parse as Nam + "11-22-3333".to_string(), // not matching dob regex + ]; + + let info = parse_cccd(&lines); + + assert_eq!(info.doc_number, ""); + assert_eq!(info.full_name, ""); + assert_eq!(info.dob, ""); + assert_eq!(info.gender, "Nam"); + assert_eq!(info.nationality, "Việt Nam"); + assert_eq!(info.address, ""); + } +} From cf3dbaa720009c9ebaa505a0a5d5596a0b6f0738 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 16 Apr 2026 15:44:44 +0000 Subject: [PATCH 2/4] fix(groups): use `sort_by_key` instead of `sort_by` Replaced the closure inside `sort_by` that calls `.cmp()` with a more concise `sort_by_key` using `std::cmp::Reverse` which is recommended by clippy to resolve the GitHub Actions CI workflow check errors. Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com> --- mhm/src-tauri/src/commands/groups.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mhm/src-tauri/src/commands/groups.rs b/mhm/src-tauri/src/commands/groups.rs index 8efa2f1..93bd111 100644 --- a/mhm/src-tauri/src/commands/groups.rs +++ b/mhm/src-tauri/src/commands/groups.rs @@ -297,7 +297,7 @@ pub async fn auto_assign_rooms( } let mut floors_sorted: Vec<(i32, Vec<&Room>)> = floor_groups.into_iter().collect(); - floors_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + floors_sorted.sort_by_key(|b| std::cmp::Reverse(b.1.len())); let mut assignments = Vec::new(); let needed = req.room_count as usize; From c3d1561013b499b10321050095048a5ea6c17b2a Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 01:41:15 +0000 Subject: [PATCH 3/4] test(ocr): Add unit tests for CCCD parsing logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added comprehensive unit tests for `parse_cccd` in `mhm/src-tauri/src/ocr.rs`. - Fixed a false positive in the gender parser where `Maleish` resulted in `Nam`. The gender parser now correctly bounds its searches to whole words (`\\b(Nam|Male)\\b` and `\\b(Nữ|Female)\\b`). - Ensured testing accounts for English/Vietnamese differences and fields ending on the next line or the same line. - Assured empty text edge cases default back to empty strings, save for nationality defaulting to Việt Nam. Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com> --- mhm/src-tauri/src/commands/groups.rs | 2 +- mhm/src-tauri/src/ocr.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mhm/src-tauri/src/commands/groups.rs b/mhm/src-tauri/src/commands/groups.rs index 93bd111..8efa2f1 100644 --- a/mhm/src-tauri/src/commands/groups.rs +++ b/mhm/src-tauri/src/commands/groups.rs @@ -297,7 +297,7 @@ pub async fn auto_assign_rooms( } let mut floors_sorted: Vec<(i32, Vec<&Room>)> = floor_groups.into_iter().collect(); - floors_sorted.sort_by_key(|b| std::cmp::Reverse(b.1.len())); + floors_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len())); let mut assignments = Vec::new(); let needed = req.room_count as usize; diff --git a/mhm/src-tauri/src/ocr.rs b/mhm/src-tauri/src/ocr.rs index 2d0bff1..8cae8dc 100644 --- a/mhm/src-tauri/src/ocr.rs +++ b/mhm/src-tauri/src/ocr.rs @@ -105,9 +105,9 @@ pub fn parse_cccd(lines: &[String]) -> CccdInfo { .and_then(|re| re.find(&full_text).map(|m| m.as_str().to_string())) .unwrap_or_default(); - let gender = if full_text.contains("Nam") || full_text.contains("Male") { + let gender = if Regex::new(r"\b(Nam|Male)\b").unwrap().is_match(&full_text) { "Nam".to_string() - } else if full_text.contains("Nữ") || full_text.contains("Female") { + } else if Regex::new(r"\b(Nữ|Female)\b").unwrap().is_match(&full_text) { "Nữ".to_string() } else { String::new() @@ -231,7 +231,7 @@ mod tests { "123456789".to_string(), // only 9 digits, should not be doc number "Ho va ten".to_string(), " ".to_string(), // next line is empty, shouldn't panic - "Maleish".to_string(), // contains Male, should parse as Nam + "Maleish".to_string(), // contains Male but not as a word boundary, should not parse as Nam "11-22-3333".to_string(), // not matching dob regex ]; @@ -240,7 +240,7 @@ mod tests { assert_eq!(info.doc_number, ""); assert_eq!(info.full_name, ""); assert_eq!(info.dob, ""); - assert_eq!(info.gender, "Nam"); + assert_eq!(info.gender, ""); assert_eq!(info.nationality, "Việt Nam"); assert_eq!(info.address, ""); } From 197eb4260eb7275c4a6c63f51699b93773068712 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 01:55:04 +0000 Subject: [PATCH 4/4] fix(groups): use `sort_by_key` instead of `sort_by` Replaced the closure inside `sort_by` that calls `.cmp()` with a more concise `sort_by_key` using `std::cmp::Reverse` which is recommended by clippy to resolve the GitHub Actions CI workflow check errors. Co-authored-by: chuanman2707 <29907469+chuanman2707@users.noreply.github.com> --- mhm/src-tauri/src/commands/groups.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mhm/src-tauri/src/commands/groups.rs b/mhm/src-tauri/src/commands/groups.rs index 8efa2f1..93bd111 100644 --- a/mhm/src-tauri/src/commands/groups.rs +++ b/mhm/src-tauri/src/commands/groups.rs @@ -297,7 +297,7 @@ pub async fn auto_assign_rooms( } let mut floors_sorted: Vec<(i32, Vec<&Room>)> = floor_groups.into_iter().collect(); - floors_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + floors_sorted.sort_by_key(|b| std::cmp::Reverse(b.1.len())); let mut assignments = Vec::new(); let needed = req.room_count as usize;