Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions kmod/src/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,15 @@ static inline u64 ext_last(struct scoutfs_extent *ext)
* This can waste a lot of space for small or sparse files but is
* reasonable when a file population is known to be large and dense but
* known to be written with non-streaming write patterns.
*
* In either strategy, a minimum block count threshold can be configured
* to suppress preallocation for small files and then ramp up
* preallocation proportionally with the file's online block count.
* Files below the threshold get single block allocations. Files above
* the threshold limit preallocation to their current online block count,
* regardless of write offset. This avoids overallocation for small
* files in mixed-size workloads while still allowing large files to
* benefit from full preallocation.
*/
static int alloc_block(struct super_block *sb, struct inode *inode,
struct scoutfs_extent *ext, u64 iblock,
Expand All @@ -400,6 +409,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
struct scoutfs_extent found;
struct scoutfs_extent pre = {0,};
bool undo_pre = false;
bool have_onoff = false;
u64 blkno = 0;
u64 online;
u64 offline;
Expand Down Expand Up @@ -445,6 +455,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
* blocks.
*/
scoutfs_inode_get_onoff(inode, &online, &offline);
have_onoff = true;
if (iblock > 1 && iblock == online) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
Expand Down Expand Up @@ -491,6 +502,23 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
/* overall prealloc limit */
count = min_t(u64, count, opts.data_prealloc_blocks);

/*
* Limit preallocation based on the number of blocks already
* allocated in the file. Files with fewer online blocks than
* the configured minimum get no preallocation. Once past the
* minimum, preallocation ramps up proportionally with the
* file's online block count rather than jumping to the full
* prealloc size.
*/
if (opts.data_prealloc_blocks_min > 0 && !ext->len) {
if (!have_onoff)
scoutfs_inode_get_onoff(inode, &online, &offline);
if (online < opts.data_prealloc_blocks_min)
count = 1;
else
count = min(count, online);
}

ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
&datinf->dalloc, count, &blkno, &count);
if (ret < 0)
Expand Down
60 changes: 60 additions & 0 deletions kmod/src/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
enum {
Opt_acl,
Opt_data_prealloc_blocks,
Opt_data_prealloc_blocks_min,
Opt_data_prealloc_contig_only,
Opt_ino_alloc_per_lock,
Opt_lock_idle_count,
Expand All @@ -48,6 +49,7 @@ enum {
static const match_table_t tokens = {
{Opt_acl, "acl"},
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
{Opt_data_prealloc_blocks_min, "data_prealloc_blocks_min=%s"},
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
{Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"},
{Opt_lock_idle_count, "lock_idle_count=%s"},
Expand Down Expand Up @@ -252,6 +254,18 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
opts->data_prealloc_blocks = nr64;
break;

case Opt_data_prealloc_blocks_min:
ret = match_u64(args, &nr64);
if (ret < 0 || nr64 > MAX_DATA_PREALLOC_BLOCKS) {
scoutfs_err(sb, "invalid data_prealloc_blocks_min option, must be between 0 and %llu",
MAX_DATA_PREALLOC_BLOCKS);
if (ret == 0)
ret = -EINVAL;
return ret;
}
opts->data_prealloc_blocks_min = nr64;
break;

case Opt_data_prealloc_contig_only:
ret = match_int(args, &nr);
if (ret < 0 || nr < 0 || nr > 1) {
Expand Down Expand Up @@ -366,6 +380,12 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
return -EINVAL;
}

if (opts->data_prealloc_blocks_min > opts->data_prealloc_blocks) {
scoutfs_err(sb, "data_prealloc_blocks_min %llu must not exceed data_prealloc_blocks %llu",
opts->data_prealloc_blocks_min, opts->data_prealloc_blocks);
return -EINVAL;
}

return 0;
}

Expand Down Expand Up @@ -437,6 +457,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
if (is_acl)
seq_puts(seq, ",acl");
seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks);
seq_printf(seq, ",data_prealloc_blocks_min=%llu", opts.data_prealloc_blocks_min);
seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only);
seq_printf(seq, ",ino_alloc_per_lock=%u", opts.ino_alloc_per_lock);
seq_printf(seq, ",metadev_path=%s", opts.metadev_path);
Expand Down Expand Up @@ -480,6 +501,8 @@ static ssize_t data_prealloc_blocks_store(struct kobject *kobj, struct kobj_attr
MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS);
return -EINVAL;
}
if (val < optinf->opts.data_prealloc_blocks_min)
return -EINVAL;

write_seqlock(&optinf->seqlock);
optinf->opts.data_prealloc_blocks = val;
Expand All @@ -489,6 +512,42 @@ static ssize_t data_prealloc_blocks_store(struct kobject *kobj, struct kobj_attr
}
SCOUTFS_ATTR_RW(data_prealloc_blocks);

static ssize_t data_prealloc_blocks_min_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
struct scoutfs_mount_options opts;

scoutfs_options_read(sb, &opts);

return snprintf(buf, PAGE_SIZE, "%llu", opts.data_prealloc_blocks_min);
}
static ssize_t data_prealloc_blocks_min_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
DECLARE_OPTIONS_INFO(sb, optinf);
char nullterm[30]; /* more than enough for octal -U64_MAX */
u64 val;
int len;
int ret;

len = min(count, sizeof(nullterm) - 1);
memcpy(nullterm, buf, len);
nullterm[len] = '\0';

ret = kstrtoll(nullterm, 0, &val);
if (ret < 0 || val > optinf->opts.data_prealloc_blocks)
return -EINVAL;

write_seqlock(&optinf->seqlock);
optinf->opts.data_prealloc_blocks_min = val;
write_sequnlock(&optinf->seqlock);

return count;
}
SCOUTFS_ATTR_RW(data_prealloc_blocks_min);

static ssize_t data_prealloc_contig_only_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
Expand Down Expand Up @@ -742,6 +801,7 @@ SCOUTFS_ATTR_RO(quorum_slot_nr);

static struct attribute *options_attrs[] = {
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
SCOUTFS_ATTR_PTR(data_prealloc_blocks_min),
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
SCOUTFS_ATTR_PTR(ino_alloc_per_lock),
SCOUTFS_ATTR_PTR(lock_idle_count),
Expand Down
1 change: 1 addition & 0 deletions kmod/src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

struct scoutfs_mount_options {
u64 data_prealloc_blocks;
u64 data_prealloc_blocks_min;
bool data_prealloc_contig_only;
unsigned int ino_alloc_per_lock;
int lock_idle_count;
Expand Down
4 changes: 3 additions & 1 deletion tests/funcs/filter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@ t_filter_dmesg()
re="$re|device-mapper:.*uevent:.*version"
re="$re|device-mapper:.*ioctl:.*initialised"

# some tests try invalid devices
# some tests try invalid devices and options
re="$re|scoutfs .* error reading super block"
re="$re|scoutfs .* error.*invalid data_prealloc_blocks"
re="$re|scoutfs .* error.*data_prealloc_blocks_min .* must not exceed"
re="$re| EXT4-fs (.*): get root inode failed"
re="$re| EXT4-fs (.*): mount failed"
re="$re| EXT4-fs (.*): no journal found"
Expand Down
3 changes: 3 additions & 0 deletions tests/golden/basic-bad-mounts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
== both meta devices
== both data devices
== good volume, bad option and good options
== blocks_min greater than blocks
== blocks_min greater than default blocks
== blocks_min greater than blocks, reversed order
15 changes: 15 additions & 0 deletions tests/golden/data-prealloc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,21 @@
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 3
/mnt/test/test/data-prealloc/file-2: extents: 3
== blocks_min rejects values greater than blocks
blocks_min after rejected write: 0
== blocks rejects values less than blocks_min
blocks after rejected write: 32
== blocks_min accepts value equal to blocks
blocks_min after accepted write: 32
== blocks_min suppresses prealloc for small files
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
== blocks_min prealloc ramps up past threshold
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
== blocks_min with region prealloc
/mnt/test/test/data-prealloc/file-1: extents: 12
/mnt/test/test/data-prealloc/file-2: extents: 12
== block writes into region allocs hole
wrote blk 24
wrote blk 32
Expand Down
9 changes: 9 additions & 0 deletions tests/tests/basic-bad-mounts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,13 @@ mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_
echo "== good volume, bad option and good options"
mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

echo "== blocks_min greater than blocks"
mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0,data_prealloc_blocks=32,data_prealloc_blocks_min=33 "$T_EX_DATA_DEV" "$T_MSCR"

echo "== blocks_min greater than default blocks"
mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0,data_prealloc_blocks_min=2049 "$T_EX_DATA_DEV" "$T_MSCR"

echo "== blocks_min greater than blocks, reversed order"
mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0,data_prealloc_blocks_min=33,data_prealloc_blocks=32 "$T_EX_DATA_DEV" "$T_MSCR"

t_pass
54 changes: 53 additions & 1 deletion tests/tests/data-prealloc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,17 @@ print_extents_found()
done
}

read_opt()
{
cat "$(t_sysfs_path 0)/mount_options/$1"
}

t_save_all_sysfs_mount_options data_prealloc_blocks
t_save_all_sysfs_mount_options data_prealloc_blocks_min
t_save_all_sysfs_mount_options data_prealloc_contig_only
restore_options()
{
t_restore_all_sysfs_mount_options data_prealloc_blocks_min
t_restore_all_sysfs_mount_options data_prealloc_blocks
t_restore_all_sysfs_mount_options data_prealloc_contig_only
}
Expand Down Expand Up @@ -153,6 +160,50 @@ t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
write_forwards $prefix 3
print_extents_found $prefix

echo "== blocks_min rejects values greater than blocks"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 33
echo "blocks_min after rejected write: $(read_opt data_prealloc_blocks_min)"

echo "== blocks rejects values less than blocks_min"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 16
t_set_sysfs_mount_option 0 data_prealloc_blocks 8
echo "blocks after rejected write: $(read_opt data_prealloc_blocks)"

echo "== blocks_min accepts value equal to blocks"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 32
echo "blocks_min after accepted write: $(read_opt data_prealloc_blocks_min)"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0

echo "== blocks_min suppresses prealloc for small files"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 16
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_forwards $prefix 8
print_extents_found $prefix

echo "== blocks_min prealloc ramps up past threshold"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 4
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_forwards $prefix 64
print_extents_found $prefix

echo "== blocks_min with region prealloc"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 16
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 8
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
write_forwards $prefix 64
print_extents_found $prefix

#
# prepare aligned regions of 8 blocks that we'll write into.
# We'll right into the first, last, and middle block of each
Expand All @@ -163,7 +214,8 @@ print_extents_found $prefix
# through. The correct output is tied to preallocation strategy so it
# has to be verified each time we change preallocation.
#
echo "== block writes into region allocs hole"
echo "== block writes into region allocs hole"
t_set_sysfs_mount_option 0 data_prealloc_blocks_min 0
t_set_sysfs_mount_option 0 data_prealloc_blocks 8
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
touch "$prefix"
Expand Down