From d324be5cbe3361e41d90bbe1ba12cba5078abd73 Mon Sep 17 00:00:00 2001 From: Timo Naroska Date: Fri, 22 May 2026 11:03:27 -0700 Subject: [PATCH 1/2] fix: relax try_csv to match libmagic semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardcoded CSV detector required exactly 10 records before reporting text/csv, so typical small CSVs (configs, fixtures, short exports) were silently classified as plain ASCII text. Upstream libmagic's is_csv.c treats CSV_LINES as an early-exit cap, not a minimum, and accepts any input with `tf > 1 && nl >= 2` — file(1) itself loosened this in 2023 (PR/463 "CSV can be also only 2 lines"). Drop the 10-record floor: read records until EOF, require >=2 records with consistent column count. Disable csv::Reader's header inference since libmagic counts newlines (not data rows), so a 2-line "a,b\n1,2\n" must qualify. Add five regression tests covering: 2-row positive, 5-row positive, 12-row positive (the previously-passing case), single-field reject, ragged-columns reject. Co-Authored-By: Claude Opus 4.7 --- pure-magic/src/lib.rs | 61 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/pure-magic/src/lib.rs b/pure-magic/src/lib.rs index 7dba487..5ec4b2b 100644 --- a/pure-magic/src/lib.rs +++ b/pure-magic/src/lib.rs @@ -3357,7 +3357,9 @@ impl MagicDb { }; let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?; - let mut reader = csv::Reader::from_reader(io::Cursor::new(buf)); + let mut reader = csv::ReaderBuilder::new() + .has_headers(false) + .from_reader(io::Cursor::new(buf)); let mut records = reader.records(); let Some(Ok(first)) = records.next() else { @@ -3371,21 +3373,18 @@ impl MagicDb { return Ok(false); } - // we already parsed first line let mut n = 1; - for i in records.take(9) { - if let Ok(rec) = i { - if first.len() != rec.len() { - return Ok(false); - } - } else { + for i in records { + let Ok(rec) = i else { + return Ok(false); + }; + if first.len() != rec.len() { return Ok(false); } n += 1; } - // we need at least 10 lines - if n != 10 { + if n < 2 { return Ok(false); } @@ -4887,4 +4886,46 @@ HelloWorld db.load_bulk(rules.into_iter()); assert!(matches!(db.verify(), Err(Error::Verify(_, _, _)))); } + + // try_csv runs before any rule; pass a never-matching rule so the + // harness only exercises the hardcoded CSV detector. + fn csv_magic(content: &[u8]) -> Magic<'static> { + first_magic( + "0\tstring\t__NEVER_MATCH__\tnope\n", + content, + StreamKind::Text(TextEncoding::Utf8), + ) + .unwrap() + } + + #[test] + fn test_csv_two_rows_two_cols() { + let m = csv_magic(b"a,b\n1,2\n"); + assert_eq!(m.mime_type(), "text/csv"); + } + + #[test] + fn test_csv_short_consistent_rows() { + let m = csv_magic(b"a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,12\n"); + assert_eq!(m.mime_type(), "text/csv"); + } + + #[test] + fn test_csv_many_rows_still_detected() { + let body: &[u8] = b"a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,12\n13,14,15\n16,17,18\n19,20,21\n22,23,24\n25,26,27\n28,29,30\n31,32,33\n"; + let m = csv_magic(body); + assert_eq!(m.mime_type(), "text/csv"); + } + + #[test] + fn test_csv_single_field_rejected() { + let m = csv_magic(b"hello\nworld\nfoo\n"); + assert_ne!(m.mime_type(), "text/csv"); + } + + #[test] + fn test_csv_ragged_columns_rejected() { + let m = csv_magic(b"a,b,c\n1,2\n3,4,5\n"); + assert_ne!(m.mime_type(), "text/csv"); + } } From 19f11288e739ddc50bc8d874417ac6ad64ff44db Mon Sep 17 00:00:00 2001 From: Timo Naroska Date: Sun, 31 May 2026 02:07:20 -0700 Subject: [PATCH 2/2] fix review comments; make the change less intrusive --- pure-magic/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pure-magic/src/lib.rs b/pure-magic/src/lib.rs index 5ec4b2b..f1d36bd 100644 --- a/pure-magic/src/lib.rs +++ b/pure-magic/src/lib.rs @@ -3373,17 +3373,20 @@ impl MagicDb { return Ok(false); } + // we already parsed first line let mut n = 1; - for i in records { - let Ok(rec) = i else { - return Ok(false); - }; - if first.len() != rec.len() { + for i in records.take(9) { + if let Ok(rec) = i { + if first.len() != rec.len() { + return Ok(false); + } + } else { return Ok(false); } n += 1; } + // we need at least 2 lines (matches file command https://github.com/file/file/commit/b4e621d1d5b3e9d142dd23030cca09f6f198e18b) if n < 2 { return Ok(false); }