Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions src/uu/sort/src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::{
GeneralBigDecimalParseResult, GlobalSettings, Line, SortMode, numeric_str_cmp::NumInfo,
};

const MAYBE_L1_CACHE_SIZE: usize = 64 * 1024;
const MAX_TOKEN_BUFFER_BYTES: usize = 4 * 1024 * 1024;
const MAX_TOKEN_BUFFER_ELEMS: usize = MAX_TOKEN_BUFFER_BYTES / size_of::<Range<usize>>();

Expand Down Expand Up @@ -180,7 +181,13 @@ pub fn read<T: Read>(
mut buffer,
} = recycled_chunk;
if buffer.len() < carry_over.len() {
buffer.resize(carry_over.len() + 10 * 1024, 0);
// keep cost of 0 fill minimal
// but avoid cost of allocation by reserving huge size too
buffer.resize(carry_over.len(), 0);
let new_len = (carry_over.len() * 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add a comment explaining why

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

.max(MAYBE_L1_CACHE_SIZE)
.min(carry_over.len() + 16 * 1024 * 1024);
buffer.reserve(new_len - buffer.len());
}
buffer[..carry_over.len()].copy_from_slice(carry_over);
let (read, should_continue) = read_to_buffer(
Expand Down Expand Up @@ -252,9 +259,6 @@ fn parse_lines<'a>(
assert!(line_data.parsed_floats.is_empty());
assert!(line_data.line_num_floats.is_empty());
token_buffer.clear();
if token_buffer.capacity() > MAX_TOKEN_BUFFER_ELEMS {
token_buffer.shrink_to(MAX_TOKEN_BUFFER_ELEMS);
}
const SMALL_CHUNK_BYTES: usize = 64 * 1024;
let mut estimated = (*line_count_hint).max(1);
let mut exact_line_count = None;
Expand All @@ -267,8 +271,8 @@ fn parse_lines<'a>(
exact_line_count = Some(count);
estimated = count;
} else if estimated == 1 {
const LINE_LEN_HINT: usize = 32;
estimated = (read.len() / LINE_LEN_HINT).max(1);
const LINE_LEN_HINT: usize = 128;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattsu2020 how did you decide to use 32 for LINE_LEN_HINT in the existing code?

This comment was marked as off-topic.

estimated = (read.len() / LINE_LEN_HINT).clamp(1, 1024);
}
lines.reserve(estimated);
if settings.precomputed.selections_per_line > 0 {
Expand Down Expand Up @@ -349,12 +353,9 @@ fn read_to_buffer<T: Read>(
if max_buffer_size > buffer.len() {
// we can grow the buffer
let prev_len = buffer.len();
let target = if buffer.len() < max_buffer_size / 2 {
buffer.len().saturating_mul(2)
} else {
max_buffer_size
};
buffer.resize(target.min(max_buffer_size), 0);
let grow_by = (max_buffer_size - prev_len).min(MAYBE_L1_CACHE_SIZE);
buffer.reserve(grow_by);
buffer.resize(prev_len + MAYBE_L1_CACHE_SIZE, 0);
read_target = &mut buffer[prev_len..];
continue;
}
Expand All @@ -374,8 +375,9 @@ fn read_to_buffer<T: Read>(

// We need to read more lines
let len = buffer.len();
let grow_by = (len / 2).max(1024 * 1024);
buffer.resize(len + grow_by, 0);
let grow_by = len.clamp(MAYBE_L1_CACHE_SIZE, 16 * 1024 * 1024);
buffer.reserve(grow_by);
buffer.resize(len + MAYBE_L1_CACHE_SIZE, 0);
read_target = &mut buffer[len..];
} else {
// This file has been fully read.
Expand Down
Loading