diff --git a/.rustfmt.toml b/.rustfmt.toml index 83e6e61..26a4ca8 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,7 +1,7 @@ version = "Two" use_field_init_shorthand = true -imports_granularity = "Crate" -wrap_comments = true +imports_granularity = "Module" +wrap_comments = false use_try_shorthand = true format_code_in_doc_comments = true format_strings = true diff --git a/CHANGELOG.org b/CHANGELOG.org index 6a84334..a250c04 100644 --- a/CHANGELOG.org +++ b/CHANGELOG.org @@ -7,6 +7,8 @@ binary application not a library with a contract. * [Unreleased] +** Added +- Refactored encoding to use iterators generating chunks [[https://github.com/OverkillGuy/qrxfil/pull/21][#21]] * [0.1.0] - 2021-03-27 diff --git a/Cargo.lock b/Cargo.lock index e8317eb..5ae1cec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -147,7 +147,6 @@ dependencies = [ "textwrap", "unicode-width", "vec_map", - "yaml-rust", ] [[package]] @@ -642,6 +641,7 @@ dependencies = [ "rand", "rqrr", "test-case", + "thiserror", ] [[package]] @@ -898,6 +898,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.9", + "syn 1.0.63", +] + [[package]] name = "thread_local" version = "1.1.3" @@ -1016,9 +1036,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "yaml-rust" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e66366e18dc58b46801afbf2ca7661a9f59cc8c5962c29892b6039b4f86fa992" diff --git a/Cargo.toml b/Cargo.toml index 49ba366..d37c71a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,9 @@ edition = "2018" qrcode = "0.12" image = "^0.23" base64 = "0.13.0" -clap = {version = "2.33.3", features = ["yaml"]} +clap = "2.33.3" itertools = "^0.10" - +thiserror = "1.0.24" [dev-dependencies] rand = "0.8.3" diff --git a/src/chunk_iterator.rs b/src/chunk_iterator.rs new file mode 100644 index 0000000..bd5a4b1 --- /dev/null +++ b/src/chunk_iterator.rs @@ -0,0 +1,253 @@ +use std::io; +use std::io::{BufRead, Read}; + +use crate::parser::EncodedChunk; + +/// An iterator for reading `chunk_size` bytes off the given `reader` +pub struct BufferedIterator +where + T: BufRead, +{ + reader: T, + chunk_size: u64, +} + +impl BufferedIterator +where + T: BufRead, +{ + /// Get a new iterator ready to read `chunk_size` bytes from `reader` + pub fn new(reader: T, chunk_size: u64) -> Self { + Self { reader, chunk_size } + } +} + +impl Iterator for BufferedIterator +where + T: BufRead, +{ + type Item = io::Result; + + fn next(&mut self) -> Option { + let mut buf = String::new(); + match self + .reader + .by_ref() + .take(self.chunk_size) + .read_to_string(&mut buf) + { + Ok(0) => None, + Ok(_n) => Some(Ok(buf)), + Err(e) => Some(Err(e)), + } + } +} + +/// An iterator for generating chunk strings +/// Taking total payload size and keeping count of chunk ID/total +pub struct ChunkIterator +where + T: BufRead, +{ + reader: BufferedIterator, + /// How many chunks are there in total + pub chunk_total: u16, + /// Chunk counter incremented on each next() + pub current_chunk_id: u16, +} + +impl ChunkIterator +where + T: BufRead, +{ + #[allow(dead_code)] // Temporary while no consumer of this API + /// Get a new iterator ready to read `chunk_size` bytes from `reader`, representing + pub fn new(reader: T, total_size: u64, chunk_size: u64) -> Self { + // When given a stream to read, calculate number of chunks as + // "how many read operations", nevermind the overhead. This + // will force output size to be `chunk_size` + OVERHEAD (8). + // As opposed to using payload_size::number_chunks_overhead() + // which would cause a 30 bytes stream read 10 bytes at a time + // to give 4 chunks (one for overhead) + let chunk_total = (total_size as f64 / chunk_size as f64).ceil() as u16; + Self { + reader: BufferedIterator::new(reader, chunk_size), + chunk_total, + current_chunk_id: 1, + } + } +} + +impl Iterator for ChunkIterator +where + T: BufRead, +{ + type Item = io::Result; + + fn next(&mut self) -> Option { + match self.reader.next() { + None => None, + Some(Ok(buf)) => { + let chunk = EncodedChunk { + id: self.current_chunk_id, + total: self.chunk_total, + payload: buf, + }; + self.current_chunk_id += 1; + Some(Ok(chunk)) + } + Some(Err(e)) => Some(Err(e)), + } + } +} + +#[cfg(test)] +mod iterator_tests { + use super::*; + + #[test] + fn read_ok_twice_noleftover_test() { + // Given a 10 byte payload + let payload: String = String::from("foobarbaz!"); + let cursor = io::Cursor::new(payload.clone()); + // And an iterator taking 5 bytes + let mut iter = BufferedIterator::new(cursor, 5); + + // When I call iterator.next() three times + let res = match iter.next() { + None => panic!("iterator returned no data first time"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + // Then the first two yield Some payload + assert_eq!(res, payload[..5]); + let res2 = match iter.next() { + None => panic!("iterator returned no data"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + assert_eq!(res2, payload[5..]); + // But the third returns None + let res3 = iter.next(); + assert!(res3.is_none()); + } + + #[test] + fn read_ok_twice_leftover_test() { + // Given a 11 byte payload + let payload: String = String::from("foobarmore!"); + let cursor = io::Cursor::new(payload.clone()); + // And an iterator taking 6 bytes + let mut iter = BufferedIterator::new(cursor, 6); + + // When I call iterator.next() thrice + let res = match iter.next() { + None => panic!("iterator returned no data first time"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + // Then the first yields Some payload + assert_eq!(res, payload[..6]); + let res2 = match iter.next() { + None => panic!("iterator returned no data"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + // And the second returns leftover payload + assert_eq!(res2, payload[6..]); + // But the third returns None + let res3 = iter.next(); + assert!(res3.is_none()); + } +} + +#[cfg(test)] +mod chunk_iterator_tests { + use super::*; + use std::convert::TryInto; + + #[test] + // Scenario: Reading 2 chunks with no leftover data + fn read_ok_thrice_noleftover_test() { + // Given a 50 byte payload + let payload = String::from("Pellentesque condimentum ut suscipit hendrerit est"); + let cursor = io::Cursor::new(payload.clone()); + // And an iterator taking 25 bytes + let mut chunk_iter = ChunkIterator::new(cursor, payload.len().try_into().unwrap(), 25); + + // When I call iterator.next() three times + let res = match chunk_iter.next() { + None => panic!("iterator returned no data first time"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(i)) => i, + }; + // Then the first two yield Some payload + assert_eq!( + res, + EncodedChunk { + id: 1, + total: 2, + payload: payload[..25].to_string() + } + ); + let res2 = match chunk_iter.next() { + None => panic!("iterator returned no data"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + assert_eq!( + res2, + EncodedChunk { + id: 2, + total: 2, + payload: payload[25..].to_string() + } + ); + // But the third returns None for empty iterator + let res3 = chunk_iter.next(); + assert!(res3.is_none()); + } + + #[test] + // Scenario: Reading a chunk with leftover data on second read + fn read_ok_twice_leftover_test() { + // Given a 40 byte payload + let payload = String::from("Nullam ante vel est convallis dignissim."); + let cursor = io::Cursor::new(payload.clone()); + // And an iterator taking 23 bytes + let mut chunk_iter = ChunkIterator::new(cursor, payload.len().try_into().unwrap(), 23); + + // When I call iterator.next() three times + let res = match chunk_iter.next() { + None => panic!("iterator returned no data first time"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + // Then the first yields Some payload + assert_eq!( + res, + EncodedChunk { + id: 1, + total: 2, + payload: payload[..23].to_string() + } + ); + let res2 = match chunk_iter.next() { + None => panic!("iterator returned no data"), + Some(Err(e)) => panic!("Iteration failed on known buffer. Got {}", e), + Some(Ok(buf)) => buf, + }; + // And the second returns the leftover payload + assert_eq!( + res2, + EncodedChunk { + id: 2, + total: 2, + payload: payload[23..].to_string() + } + ); + // But the third returns None + let res3 = chunk_iter.next(); + assert!(res3.is_none()); + } +} diff --git a/src/main.rs b/src/main.rs index 3dbc1ac..5a29c14 100644 --- a/src/main.rs +++ b/src/main.rs @@ -42,6 +42,7 @@ extern crate clap; extern crate image; extern crate qrcode; +mod chunk_iterator; mod parser; mod payload_size; @@ -49,8 +50,8 @@ mod payload_size; /// /// `output_folder` (and parent directories) will be created if /// doesn't exist -fn encode(input_file: &Path, output_folder: &Path) { - let mut input_file = match fs::File::open(input_file) { +fn encode(input_filename: &Path, output_folder: &Path) { + let mut input_file = match fs::File::open(input_filename) { Ok(f) => f, Err(err) => panic!("File error: {}", err), }; @@ -88,61 +89,44 @@ fn encode(input_file: &Path, output_folder: &Path) { .sync_data() .expect("Error syncing base64 file to disk"); - let mut base64_file = fs::File::open(output_folder.join("input_b64.txt")) + let base64_file = fs::File::open(output_folder.join("input_b64.txt")) .expect("Error reopening the base64 file to chunk"); + let base64_reader = BufReader::new(base64_file); - let chunk_size: usize = 1024; // 1 KB + let chunk_size: u64 = 1024; // 1 KB - let chunk_totals = - payload_size::number_chunks_overhead(base64_filesize_bytes, chunk_size as u16); + let chunk_iter = chunk_iterator::ChunkIterator::new( + base64_reader, + base64_filesize_bytes, + chunk_size - payload_size::HEADER_SIZE_BYTES, + ); + let chunk_total = chunk_iter.chunk_total; println!( "File {:?}. base64 size: {} bytes = {} chunks of 1KB", - input_file, base64_filesize_bytes, chunk_totals + input_filename.to_str(), + base64_filesize_bytes, + chunk_total ); - let mut chunk_count = 1; - let header_size = format!("{:03}OF{:03}", 1, 10).len(); - let expected_chunk_bytes_read = chunk_size - header_size; - loop { - let mut chunk_header: Vec = - format!("{:03}OF{:03}", chunk_count, chunk_totals).into_bytes(); - - let mut chunk = Vec::::with_capacity(chunk_size); - chunk.append(&mut chunk_header); // FIXME write prefix to buffer - - let bytes_read_chunk = std::io::Read::by_ref(&mut base64_file) - .take(expected_chunk_bytes_read as u64) - .read_to_end(&mut chunk) - .expect("Error reading chunk off file"); - if bytes_read_chunk == 0 { - break; - } + for c in chunk_iter { + let chunk = c.expect("Problem reading reader"); // Encode some data into bits. - let code = QrCode::new(&chunk).expect("Error encoding chunk into QR code"); + let code = QrCode::new(format!("{}", chunk).as_bytes()) + .expect("Error encoding chunk into QR code"); // Render the bits into an image. let image = code.render::>().build(); // Save the image. image - .save(output_folder.join(format!("{:03}.png", chunk_count))) + .save(output_folder.join(format!("{:03}.png", chunk.id))) .expect("Error saving chunk's QR code file"); - println!("Saved QR {:03}/{}", chunk_count, chunk_totals); - // let mut out_file = fs::File::create()) - - // out_file - // .write(chunk_header.as_bytes()) - // .expect("Error writing out file chunk header"); - // out_file - // .write_all(&chunk) - // .expect("Error writing out file chunk"); - chunk_count += 1; + println!("Saving QR {:03}/{}", chunk.id, chunk.total); } println!( "Split file in {} QR chunks, in folder {:?}", - chunk_count - 1, - output_folder + chunk_total, output_folder ); } diff --git a/src/parser.rs b/src/parser.rs index 25f5dff..b992ac0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -31,10 +31,20 @@ pub struct EncodedChunk { pub payload: String, } -#[derive(Debug, PartialEq, Eq)] +impl fmt::Display for EncodedChunk { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:03}OF{:03}{}", self.id, self.total, self.payload) + } +} + /// Things that can go wrong when restoring a chunked file +#[derive(Debug, PartialEq, Eq, thiserror::Error)] pub enum RestoreError { /// Not enough chunks for the expected total + #[error( + "Missing some chunks! Expected to see {expected_total} chunks, but missing chunks: \ + {missing_chunk_ids:?}" + )] MissingChunk { /// How many we thought we'd find expected_total: u16, @@ -42,6 +52,10 @@ pub enum RestoreError { missing_chunk_ids: Vec, }, /// Unexpectedly too many chunks ("52 of 51") + #[error( + "Too many chunks were found! Expected {expected_total} chunks, found extra chunks \ + {unexpected_chunk_ids:?}" + )] TooManyChunks { /// How many we thought we'd find expected_total: u16, @@ -50,6 +64,10 @@ pub enum RestoreError { }, /// A chunk's total doesn't match the expected total /// "Expected" total is set from first decoded chunks as reference + #[error( + "A chunk reported a total that didn't match the expected total. Reference chunk \ + said {} chunks total, clashing chunk said {}", reference_chunk.total, clashing_chunk.total + )] TotalMismatch { /// The original chunk we used as reference for total reference_chunk: EncodedChunk, @@ -57,12 +75,16 @@ pub enum RestoreError { clashing_chunk: EncodedChunk, }, /// Chunks were found twice but with mismatching payload + #[error( + "Chunks were found multiple times with clashing payload! Clashing: {mismatching_chunks:?}" + )] MismatchingDuplicateChunk { /// Chunks we found multiple times with mismatch, as an array /// of (reference chunk, payload that mismatched) mismatching_chunks: Vec<(EncodedChunk, String)>, }, /// A chunk could not be parsed + #[error("Error decoding chunk! Error was: {error} for chunk '{raw_chunk}'")] ChunkDecodeError { /// What went wrong error: ChunkParseError, @@ -71,73 +93,25 @@ pub enum RestoreError { }, } -impl fmt::Display for RestoreError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - RestoreError::ChunkDecodeError { raw_chunk, error } => write!( - f, - "Error decoding chunk! Error was: {:?} for chunk '{}'", - error, raw_chunk - ), - RestoreError::MissingChunk { - expected_total, - missing_chunk_ids, - } => write!( - f, - "Missing some chunks! Expected to see {} chunks, \ - but missing chunks: {:?}", - expected_total, missing_chunk_ids - ), - RestoreError::MismatchingDuplicateChunk { mismatching_chunks } => write!( - f, - "Chunks were found multiple times with clashing payload! Clashing: {:?}", - mismatching_chunks - ), - RestoreError::TotalMismatch { - reference_chunk, - clashing_chunk, - } => write!( - f, - "A chunk reported a total that didn't match the expected total. \ - Reference chunk said {} chunks total, clashing chunk said {}", - reference_chunk.total, clashing_chunk.total - ), - RestoreError::TooManyChunks { - expected_total, - unexpected_chunk_ids, - } => write!( - f, - "Too many chunks were found! Expected {} chunks, \ - found extra chunks {:?}", - expected_total, unexpected_chunk_ids - ), - } - } -} - -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, thiserror::Error)] /// Result type for errors of Chunk parsing pub enum ChunkParseError { + #[error("Chunk identifier not found")] IdMissing, + + #[error("Total number of chunks not found")] TotalMissing, + + #[error("Chunk has no payload")] PayloadMissing, - BadSeparator, -} -impl fmt::Display for ChunkParseError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - ChunkParseError::IdMissing => write!(f, "Chunk identifier not found"), - ChunkParseError::TotalMissing => write!(f, "Total number of chunks not found"), - ChunkParseError::PayloadMissing => write!(f, "Chunk has no payload"), - ChunkParseError::BadSeparator => write!(f, "Chunk id/total separator incorrect"), - } - } + #[error("Chunk id/total separator incorrect")] + BadSeparator, } /// Checks that chunks with the same ID are identical in payload /// Returns the chunk contents if all are identical, or -/// RestoreError::MismatchingDuplicateChunk otherwise. +/// `RestoreError::MismatchingDuplicateChunk` otherwise. /// Assumes the chunks are already sorted by ID (groupable) fn collapse_chunks(chunks: &[EncodedChunk]) -> Result, RestoreError> { let _expected_total: u16 = chunks[0].total; @@ -148,26 +122,24 @@ fn collapse_chunks(chunks: &[EncodedChunk]) -> Result, Restore let mut mismatching = Vec::<(EncodedChunk, String)>::new(); // Group by chunk id - for (_chunk_id, duplicate_chunks) in &chunks.into_iter().group_by(|chunk| chunk.id) { - let mut duplicate_chunks_iter = duplicate_chunks.into_iter(); - let reference_chunk = match duplicate_chunks_iter.next() { + for (_chunk_id, mut duplicate_chunks) in &chunks.iter().group_by(|chunk| chunk.id) { + let reference_chunk = match duplicate_chunks.next() { Some(v) => v, None => panic!("Couldn't get 1 item per groupby"), }; - for duplicate_chunk in duplicate_chunks_iter { + for duplicate_chunk in duplicate_chunks { if duplicate_chunk.payload != reference_chunk.payload { mismatching.push((reference_chunk.clone(), duplicate_chunk.payload.clone())); } } chunks_out.push(reference_chunk.clone()); } - match mismatching.is_empty() { - true => return Ok(chunks_out.to_vec()), - false => { - return Err(RestoreError::MismatchingDuplicateChunk { - mismatching_chunks: mismatching, - }) - } + if mismatching.is_empty() { + Ok(chunks_out) + } else { + Err(RestoreError::MismatchingDuplicateChunk { + mismatching_chunks: mismatching, + }) } } diff --git a/src/payload_size.rs b/src/payload_size.rs index 07a125f..22e1d80 100644 --- a/src/payload_size.rs +++ b/src/payload_size.rs @@ -20,12 +20,14 @@ /// Size of a header in bytes /// Three digits twice (id / total) plus delimiter string "OF" e.g. /// 013OF078 -const HEADER_SIZE_BYTES: u64 = 8; +pub const HEADER_SIZE_BYTES: u64 = 8; -/// How many chunks of `chunk_size_bytes` to send for a given payload of `payload_size_bytes` -/// Taking into account the overhead of HEADER_SIZE_BYTES per chunk +#[allow(dead_code)] // TODO No need for this func anymore? +/// How many chunks of `chunk_size_bytes` to send for a given payload +/// of `payload_size_bytes` Taking into account the overhead of +/// `HEADER_SIZE_BYTES` per chunk pub fn number_chunks_overhead(payload_size_bytes: u64, chunk_size_bytes: u16) -> u64 { - let chunk_payload_size_bytes: u64 = (chunk_size_bytes as u64) - HEADER_SIZE_BYTES; + let chunk_payload_size_bytes: u64 = (u64::from(chunk_size_bytes)) - HEADER_SIZE_BYTES; ((payload_size_bytes as f64) / (chunk_payload_size_bytes as f64)).ceil() as u64 } diff --git a/tests/end_to_end_test.rs b/tests/end_to_end_test.rs index a848e9b..76ab7da 100644 --- a/tests/end_to_end_test.rs +++ b/tests/end_to_end_test.rs @@ -240,8 +240,6 @@ fn missing_chunk_error() { temp.close().expect("Error deleting temporary folder"); } -// TODO trigger parser::RestoreError's other enum cases (TooManyChunks, ChunkDecodeError, TotalMismatch) as unittest - #[test] // Scenario: Restoring with a duplicate chunk succeeds fn duplicate_chunk_skips() {