diff --git a/drivers/md/dm-vdo/constants.h b/drivers/md/dm-vdo/constants.h index 2a8b03779f87..b84e7edeb22e 100644 --- a/drivers/md/dm-vdo/constants.h +++ b/drivers/md/dm-vdo/constants.h @@ -44,6 +44,9 @@ enum { /* The default size of each slab journal, in blocks */ DEFAULT_VDO_SLAB_JOURNAL_SIZE = 224, + /* The recovery journal starting sequence number set at format time */ + RECOVERY_JOURNAL_STARTING_SEQUENCE_NUMBER = 1, + /* * The initial size of lbn_operations and pbn_operations, which is based upon the expected * maximum number of outstanding VIOs. This value was chosen to make it highly unlikely @@ -57,8 +60,14 @@ enum { /* The maximum number of physical zones */ MAX_VDO_PHYSICAL_ZONES = 16, - /* The base-2 logarithm of the maximum blocks in one slab */ - MAX_VDO_SLAB_BITS = 23, + /* The default blocks in one slab */ + DEFAULT_VDO_SLAB_BLOCKS = 1U << 19, + + /* The minimum blocks in one slab */ + MIN_VDO_SLAB_BLOCKS = 1U << 13, + + /* The maximum blocks in one slab */ + MAX_VDO_SLAB_BLOCKS = 1U << 23, /* The maximum number of slabs the slab depot supports */ MAX_VDO_SLABS = 8192, diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index 7eb676e58ed5..0135a6f941fd 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,11 @@ enum admin_phases { LOAD_PHASE_DRAIN_JOURNAL, LOAD_PHASE_WAIT_FOR_READ_ONLY, PRE_LOAD_PHASE_START, + PRE_LOAD_PHASE_FORMAT_START, + PRE_LOAD_PHASE_FORMAT_SUPER, + PRE_LOAD_PHASE_FORMAT_GEOMETRY, + PRE_LOAD_PHASE_FORMAT_END, + PRE_LOAD_PHASE_LOAD_SUPER, PRE_LOAD_PHASE_LOAD_COMPONENTS, PRE_LOAD_PHASE_END, PREPARE_GROW_PHYSICAL_PHASE_START, @@ -109,6 +115,11 @@ static const char * const ADMIN_PHASE_NAMES[] = { "LOAD_PHASE_DRAIN_JOURNAL", "LOAD_PHASE_WAIT_FOR_READ_ONLY", "PRE_LOAD_PHASE_START", + "PRE_LOAD_PHASE_FORMAT_START", + "PRE_LOAD_PHASE_FORMAT_SUPER", + "PRE_LOAD_PHASE_FORMAT_GEOMETRY", + "PRE_LOAD_PHASE_FORMAT_END", + "PRE_LOAD_PHASE_LOAD_SUPER", "PRE_LOAD_PHASE_LOAD_COMPONENTS", "PRE_LOAD_PHASE_END", "PREPARE_GROW_PHYSICAL_PHASE_START", @@ -377,6 +388,75 @@ static inline int __must_check parse_bool(const char *bool_str, const char *true return VDO_SUCCESS; } +/** + * parse_memory() - Parse a string into an index memory value. + * @memory_str: The string value to convert to a memory value. + * @memory_ptr: A pointer to return the memory value in. + * + * Return: VDO_SUCCESS or an error + */ +static int __must_check parse_memory(const char *memory_str, + uds_memory_config_size_t *memory_ptr) +{ + uds_memory_config_size_t memory; + + if (strcmp(memory_str, "0.25") == 0) { + memory = UDS_MEMORY_CONFIG_256MB; + } else if ((strcmp(memory_str, "0.5") == 0) || (strcmp(memory_str, "0.50") == 0)) { + memory = UDS_MEMORY_CONFIG_512MB; + } else if (strcmp(memory_str, "0.75") == 0) { + memory = UDS_MEMORY_CONFIG_768MB; + } else { + unsigned int value; + int result; + + result = kstrtouint(memory_str, 10, &value); + if (result) { + vdo_log_error("optional parameter error: invalid memory size, must be a postive integer"); + return -EINVAL; + } + + if (value > UDS_MEMORY_CONFIG_MAX) { + vdo_log_error("optional parameter error: invalid memory size, must not be greater than %d", + UDS_MEMORY_CONFIG_MAX); + return -EINVAL; + } + + memory = value; + } + + *memory_ptr = memory; + return VDO_SUCCESS; +} + +/** + * parse_slab_size() - Parse a string option into a slab size value. + * @slab_str: The string value representing slab size. + * @slab_size_ptr: A pointer to return the slab size in. + * + * Return: VDO_SUCCESS or an error + */ +static int __must_check parse_slab_size(const char *slab_str, block_count_t *slab_size_ptr) +{ + block_count_t value; + int result; + + result = kstrtoull(slab_str, 10, &value); + if (result) { + vdo_log_error("optional parameter error: invalid slab size, must be a postive integer"); + return -EINVAL; + } + + if (value < MIN_VDO_SLAB_BLOCKS || value > MAX_VDO_SLAB_BLOCKS || (!is_power_of_2(value))) { + vdo_log_error("optional parameter error: invalid slab size, must be a power of two between %u and %u", + MIN_VDO_SLAB_BLOCKS, MAX_VDO_SLAB_BLOCKS); + return -EINVAL; + } + + *slab_size_ptr = value; + return VDO_SUCCESS; +} + /** * process_one_thread_config_spec() - Process one component of a thread parameter configuration * string and update the configuration data structure. @@ -566,7 +646,7 @@ static int process_one_key_value_pair(const char *key, unsigned int value, } /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */ if (value > (UINT_MAX / VDO_BLOCK_SIZE)) { - vdo_log_error("optional parameter error: at most %d max discard blocks are allowed", + vdo_log_error("optional parameter error: at most %d max discard blocks are allowed", UINT_MAX / VDO_BLOCK_SIZE); return -EINVAL; } @@ -598,7 +678,16 @@ static int parse_one_key_value_pair(const char *key, const char *value, if (strcmp(key, "compression") == 0) return parse_bool(value, "on", "off", &config->compression); - /* The remaining arguments must have integral values. */ + if (strcmp(key, "indexSparse") == 0) + return parse_bool(value, "on", "off", &config->index_sparse); + + if (strcmp(key, "indexMemory") == 0) + return parse_memory(value, &config->index_memory); + + if (strcmp(key, "slabSize") == 0) + return parse_slab_size(value, &config->slab_blocks); + + /* The remaining arguments must have non-negative integral values. */ result = kstrtouint(value, 10, &count); if (result) { vdo_log_error("optional config string error: integer value needed, found \"%s\"", @@ -713,6 +802,12 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti, struct device_config *config = NULL; int result; + if (logical_bytes > (MAXIMUM_VDO_LOGICAL_BLOCKS * VDO_BLOCK_SIZE)) { + handle_parse_error(config, error_ptr, + "Logical size exceeds the maximum"); + return VDO_BAD_CONFIGURATION; + } + if ((logical_bytes % VDO_BLOCK_SIZE) != 0) { handle_parse_error(config, error_ptr, "Logical size must be a multiple of 4096"); @@ -756,6 +851,9 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti, config->max_discard_blocks = 1; config->deduplication = true; config->compression = false; + config->index_memory = UDS_MEMORY_CONFIG_256MB; + config->index_sparse = false; + config->slab_blocks = DEFAULT_VDO_SLAB_BLOCKS; arg_set.argc = argc; arg_set.argv = argv; @@ -781,7 +879,7 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti, /* Get the physical blocks, if known. */ if (config->version >= 1) { result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks); - if (result != VDO_SUCCESS) { + if (result) { handle_parse_error(config, error_ptr, "Invalid physical block count"); return VDO_BAD_CONFIGURATION; @@ -802,7 +900,7 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti, /* Get the page cache size. */ result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size); - if (result != VDO_SUCCESS) { + if (result) { handle_parse_error(config, error_ptr, "Invalid block map page cache size"); return VDO_BAD_CONFIGURATION; @@ -810,7 +908,7 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti, /* Get the block map era length. */ result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age); - if (result != VDO_SUCCESS) { + if (result) { handle_parse_error(config, error_ptr, "Invalid block map maximum age"); return VDO_BAD_CONFIGURATION; } @@ -1399,7 +1497,33 @@ static void pre_load_callback(struct vdo_completion *completion) vdo_continue_completion(completion, result); return; } + if (vdo->needs_formatting) + vdo->admin.phase = PRE_LOAD_PHASE_FORMAT_START; + else + vdo->admin.phase = PRE_LOAD_PHASE_LOAD_SUPER; + + vdo_continue_completion(completion, VDO_SUCCESS); + return; + + case PRE_LOAD_PHASE_FORMAT_START: + vdo_continue_completion(completion, vdo_clear_layout(vdo)); + return; + + case PRE_LOAD_PHASE_FORMAT_SUPER: + vdo_save_super_block(vdo, completion); + return; + + case PRE_LOAD_PHASE_FORMAT_GEOMETRY: + vdo_save_geometry_block(vdo, completion); + return; + + case PRE_LOAD_PHASE_FORMAT_END: + /* cleanup layout before load adds to it */ + vdo_uninitialize_layout(&vdo->states.layout); + vdo_continue_completion(completion, VDO_SUCCESS); + return; + case PRE_LOAD_PHASE_LOAD_SUPER: vdo_load_super_block(vdo, completion); return; @@ -1457,10 +1581,13 @@ static int vdo_initialize(struct dm_target *ti, unsigned int instance, vdo_log_debug("Logical blocks = %llu", logical_blocks); vdo_log_debug("Physical block size = %llu", (u64) block_size); vdo_log_debug("Physical blocks = %llu", config->physical_blocks); + vdo_log_debug("Slab size = %llu", config->slab_blocks); vdo_log_debug("Block map cache blocks = %u", config->cache_size); vdo_log_debug("Block map maximum age = %u", config->block_map_maximum_age); vdo_log_debug("Deduplication = %s", (config->deduplication ? "on" : "off")); vdo_log_debug("Compression = %s", (config->compression ? "on" : "off")); + vdo_log_debug("Index memory = %u", config->index_memory); + vdo_log_debug("Index sparse = %s", (config->index_sparse ? "on" : "off")); vdo = vdo_find_matching(vdo_uses_device, config); if (vdo != NULL) { @@ -2856,7 +2983,7 @@ static void vdo_resume(struct dm_target *ti) static struct target_type vdo_target_bio = { .features = DM_TARGET_SINGLETON, .name = "vdo", - .version = { 9, 1, 0 }, + .version = { 9, 2, 0 }, .module = THIS_MODULE, .ctr = vdo_ctr, .dtr = vdo_dtr, diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c index ec98c539701e..d75e023df637 100644 --- a/drivers/md/dm-vdo/encodings.c +++ b/drivers/md/dm-vdo/encodings.c @@ -12,15 +12,10 @@ #include "permassert.h" #include "constants.h" +#include "indexer.h" #include "status-codes.h" #include "types.h" -/** The maximum logical space is 4 petabytes, which is 1 terablock. */ -static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024; - -/** The maximum physical space is 256 terabytes, which is 64 gigablocks. */ -static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64; - struct geometry_block { char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE]; struct packed_header header; @@ -292,6 +287,62 @@ static void decode_volume_geometry(u8 *buffer, size_t *offset, }; } +/** + * vdo_encode_volume_geometry() - Encode the on-disk representation of a volume geometry into a buffer. + * @buffer: A buffer to store the encoding. + * @geometry: The geometry to encode. + * @version: The geometry block version to encode. + * + * Return: VDO_SUCCESS or an error. + */ +int vdo_encode_volume_geometry(u8 *buffer, const struct volume_geometry *geometry, + u32 version) +{ + int result; + enum volume_region_id id; + u32 checksum; + size_t offset = 0; + const struct header *header; + + memcpy(buffer, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE); + offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE; + + header = (version > 4) ? &GEOMETRY_BLOCK_HEADER_5_0 : &GEOMETRY_BLOCK_HEADER_4_0; + vdo_encode_header(buffer, &offset, header); + + /* This is for backwards compatibility */ + encode_u32_le(buffer, &offset, geometry->unused); + encode_u64_le(buffer, &offset, geometry->nonce); + memcpy(buffer + offset, (unsigned char *) &geometry->uuid, sizeof(uuid_t)); + offset += sizeof(uuid_t); + + if (version > 4) + encode_u64_le(buffer, &offset, geometry->bio_offset); + + for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) { + encode_u32_le(buffer, &offset, geometry->regions[id].id); + encode_u64_le(buffer, &offset, geometry->regions[id].start_block); + } + + encode_u32_le(buffer, &offset, geometry->index_config.mem); + encode_u32_le(buffer, &offset, 0); + + if (geometry->index_config.sparse) + buffer[offset++] = 1; + else + buffer[offset++] = 0; + + result = VDO_ASSERT(header->size == offset + sizeof(u32), + "should have encoded up to the geometry checksum"); + if (result != VDO_SUCCESS) + return result; + + checksum = vdo_crc32(buffer, offset); + encode_u32_le(buffer, &offset, checksum); + + return VDO_SUCCESS; +} + /** * vdo_parse_geometry_block() - Decode and validate an encoded geometry block. * @block: The encoded geometry block. @@ -1219,9 +1270,9 @@ int vdo_validate_config(const struct vdo_config *config, if (result != VDO_SUCCESS) return result; - result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS), - "slab size must be less than or equal to 2^%d", - MAX_VDO_SLAB_BITS); + result = VDO_ASSERT(config->slab_size <= MAX_VDO_SLAB_BLOCKS, + "slab size must be a power of two less than or equal to %d", + MAX_VDO_SLAB_BLOCKS); if (result != VDO_SUCCESS) return result; @@ -1486,3 +1537,153 @@ int vdo_decode_super_block(u8 *buffer) return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS); } + +/** + * vdo_initialize_component_states() - Initialize the components so they can be written out. + * @vdo_config: The config used for component state initialization. + * @geometry: The volume geometry used to calculate the data region offset. + * @nonce: The nonce to use to identify the vdo. + * @states: The component states to initialize. + * + * Return: VDO_SUCCESS or an error code. + */ +int vdo_initialize_component_states(const struct vdo_config *vdo_config, + const struct volume_geometry *geometry, + nonce_t nonce, + struct vdo_component_states *states) +{ + int result; + struct slab_config slab_config; + struct partition *partition; + + states->vdo.config = *vdo_config; + states->vdo.nonce = nonce; + states->volume_version = VDO_VOLUME_VERSION_67_0; + + states->recovery_journal = (struct recovery_journal_state_7_0) { + .journal_start = RECOVERY_JOURNAL_STARTING_SEQUENCE_NUMBER, + .logical_blocks_used = 0, + .block_map_data_blocks = 0, + }; + + /* + * The layout starts 1 block past the beginning of the data region, as the + * data region contains the super block but the layout does not. + */ + result = vdo_initialize_layout(vdo_config->physical_blocks, + vdo_get_data_region_start(*geometry) + 1, + DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT, + vdo_config->recovery_journal_size, + VDO_SLAB_SUMMARY_BLOCKS, + &states->layout); + if (result != VDO_SUCCESS) + return result; + + result = vdo_configure_slab(vdo_config->slab_size, + vdo_config->slab_journal_blocks, + &slab_config); + if (result != VDO_SUCCESS) { + vdo_uninitialize_layout(&states->layout); + return result; + } + + result = vdo_get_partition(&states->layout, VDO_SLAB_DEPOT_PARTITION, + &partition); + if (result != VDO_SUCCESS) { + vdo_uninitialize_layout(&states->layout); + return result; + } + + result = vdo_configure_slab_depot(partition, slab_config, 0, + &states->slab_depot); + if (result != VDO_SUCCESS) { + vdo_uninitialize_layout(&states->layout); + return result; + } + + result = vdo_get_partition(&states->layout, VDO_BLOCK_MAP_PARTITION, + &partition); + if (result != VDO_SUCCESS) { + vdo_uninitialize_layout(&states->layout); + return result; + } + + states->block_map = (struct block_map_state_2_0) { + .flat_page_origin = VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN, + .flat_page_count = 0, + .root_origin = partition->offset, + .root_count = DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT, + }; + + states->vdo.state = VDO_NEW; + + return VDO_SUCCESS; +} + +/** + * vdo_compute_index_blocks() - Compute the number of blocks that the indexer will use. + * @config: The index config from which the blocks are calculated. + * @index_blocks_ptr: The number of blocks the index will use. + * + * Return: VDO_SUCCESS or an error code. + */ +static int vdo_compute_index_blocks(const struct index_config *config, + block_count_t *index_blocks_ptr) +{ + int result; + u64 index_bytes; + struct uds_parameters uds_parameters = { + .memory_size = config->mem, + .sparse = config->sparse, + }; + + result = uds_compute_index_size(&uds_parameters, &index_bytes); + if (result != UDS_SUCCESS) + return vdo_log_error_strerror(result, "error computing index size"); + + *index_blocks_ptr = index_bytes / VDO_BLOCK_SIZE; + return VDO_SUCCESS; +} + +/** + * vdo_initialize_volume_geometry() - Initialize the volume geometry so it can be written out. + * @nonce: The nonce to use to identify the vdo. + * @uuid: The uuid to use to identify the vdo. + * @index_config: The config used for structure initialization. + * @geometry: The volume geometry to initialize. + * + * Return: VDO_SUCCESS or an error code. + */ +int vdo_initialize_volume_geometry(nonce_t nonce, uuid_t *uuid, + const struct index_config *index_config, + struct volume_geometry *geometry) +{ + int result; + block_count_t index_blocks = 0; + + result = vdo_compute_index_blocks(index_config, &index_blocks); + if (result != VDO_SUCCESS) + return result; + + *geometry = (struct volume_geometry) { + /* This is for backwards compatibility. */ + .unused = 0, + .nonce = nonce, + .bio_offset = 0, + .regions = { + [VDO_INDEX_REGION] = { + .id = VDO_INDEX_REGION, + .start_block = 1, + }, + [VDO_DATA_REGION] = { + .id = VDO_DATA_REGION, + .start_block = 1 + index_blocks, + } + } + }; + + memcpy(&(geometry->uuid), uuid, sizeof(uuid_t)); + memcpy(&geometry->index_config, index_config, sizeof(struct index_config)); + + return VDO_SUCCESS; +} diff --git a/drivers/md/dm-vdo/encodings.h b/drivers/md/dm-vdo/encodings.h index 87b7d2f3b545..67ff0ff2ffda 100644 --- a/drivers/md/dm-vdo/encodings.h +++ b/drivers/md/dm-vdo/encodings.h @@ -608,6 +608,12 @@ struct vdo_config { block_count_t slab_journal_blocks; /* number of slab journal blocks */ }; +/** The maximum logical space is 4 petabytes, which is 1 terablock. */ +#define MAXIMUM_VDO_LOGICAL_BLOCKS ((block_count_t)(1024ULL * 1024 * 1024 * 1024)) + +/** The maximum physical space is 256 terabytes, which is 64 gigablocks. */ +#define MAXIMUM_VDO_PHYSICAL_BLOCKS ((block_count_t)(1024ULL * 1024 * 1024 * 64)) + /* This is the structure that captures the vdo fields saved as a super block component. */ struct vdo_component { enum vdo_state state; @@ -803,6 +809,12 @@ vdo_get_index_region_size(struct volume_geometry geometry) vdo_get_index_region_start(geometry); } +int vdo_initialize_volume_geometry(nonce_t nonce, uuid_t *uuid, + const struct index_config *index_config, + struct volume_geometry *geometry); + +int vdo_encode_volume_geometry(u8 *buffer, const struct volume_geometry *geometry, + u32 version); int __must_check vdo_parse_geometry_block(unsigned char *block, struct volume_geometry *geometry); @@ -1264,6 +1276,11 @@ int __must_check vdo_validate_component_states(struct vdo_component_states *stat void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states); int __must_check vdo_decode_super_block(u8 *buffer); +int vdo_initialize_component_states(const struct vdo_config *vdo_config, + const struct volume_geometry *geometry, + nonce_t nonce, + struct vdo_component_states *states); + /* We start with 0L and postcondition with ~0L to match our historical usage in userspace. */ static inline u32 vdo_crc32(const void *buf, unsigned long len) { diff --git a/drivers/md/dm-vdo/indexer/index-layout.c b/drivers/md/dm-vdo/indexer/index-layout.c index 798badcf80ea..74b7774b4be4 100644 --- a/drivers/md/dm-vdo/indexer/index-layout.c +++ b/drivers/md/dm-vdo/indexer/index-layout.c @@ -249,6 +249,32 @@ static int __must_check compute_sizes(const struct uds_configuration *config, return UDS_SUCCESS; } +int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size) +{ + int result; + struct uds_configuration *index_config; + struct save_layout_sizes sizes; + + if (index_size == NULL) { + vdo_log_error("Missing output size pointer"); + return -EINVAL; + } + + result = uds_make_configuration(parameters, &index_config); + if (result != UDS_SUCCESS) { + vdo_log_error_strerror(result, "cannot compute index size"); + return result; + } + + result = compute_sizes(index_config, &sizes); + uds_free_configuration(index_config); + if (result != UDS_SUCCESS) + return result; + + *index_size = sizes.total_size; + return UDS_SUCCESS; +} + /* Create unique data using the current time and a pseudorandom number. */ static void create_unique_nonce_data(u8 *buffer) { diff --git a/drivers/md/dm-vdo/indexer/indexer.h b/drivers/md/dm-vdo/indexer/indexer.h index 7c1fc4577f5b..d765f24328eb 100644 --- a/drivers/md/dm-vdo/indexer/indexer.h +++ b/drivers/md/dm-vdo/indexer/indexer.h @@ -282,6 +282,10 @@ struct uds_request { ); }; +/* Compute the number of bytes needed to store an index. */ +int __must_check uds_compute_index_size(const struct uds_parameters *parameters, + u64 *index_size); + /* A session is required for most index operations. */ int __must_check uds_create_index_session(struct uds_index_session **session); diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c index 0e9932929fee..0916c8609543 100644 --- a/drivers/md/dm-vdo/io-submitter.c +++ b/drivers/md/dm-vdo/io-submitter.c @@ -364,6 +364,33 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, vdo_launch_completion_with_priority(completion, get_metadata_priority(vio)); } +/** + * vdo_submit_metadata_vio_wait() - Submit I/O for a metadata vio and wait for completion. + * @vio: the vio for which to issue I/O + * @physical: the physical block number to read or write + * @operation: the type of I/O to perform + * + * The function operates similarly to __submit_metadata_vio except that it will + * block until the work is done. It can be used to do i/o before work queues + * and thread completions are set up. + * + * Return: VDO_SUCCESS or an error. + */ +int vdo_submit_metadata_vio_wait(struct vio *vio, + physical_block_number_t physical, + blk_opf_t operation) +{ + int result; + + result = vio_reset_bio(vio, vio->data, NULL, operation | REQ_META, physical); + if (result != VDO_SUCCESS) + return result; + + bio_set_dev(vio->bio, vdo_get_backing_device(vio->completion.vdo)); + submit_bio_wait(vio->bio); + return blk_status_to_errno(vio->bio->bi_status); +} + /** * vdo_make_io_submitter() - Create an io_submitter structure. * @thread_count: Number of bio-submission threads to set up. diff --git a/drivers/md/dm-vdo/io-submitter.h b/drivers/md/dm-vdo/io-submitter.h index 3088f11055fd..0f320a60e9e8 100644 --- a/drivers/md/dm-vdo/io-submitter.h +++ b/drivers/md/dm-vdo/io-submitter.h @@ -56,4 +56,8 @@ static inline void vdo_submit_flush_vio(struct vio *vio, bio_end_io_t callback, REQ_OP_WRITE | REQ_PREFLUSH, NULL, 0); } +int vdo_submit_metadata_vio_wait(struct vio *vio, + physical_block_number_t physical, + blk_opf_t operation); + #endif /* VDO_IO_SUBMITTER_H */ diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c index dd252d660b6d..9df5e4d7f884 100644 --- a/drivers/md/dm-vdo/status-codes.c +++ b/drivers/md/dm-vdo/status-codes.c @@ -80,6 +80,8 @@ int vdo_status_to_errno(int error) /* VDO or UDS error */ switch (error) { + case VDO_BAD_CONFIGURATION: + return -EINVAL; case VDO_NO_SPACE: return -ENOSPC; case VDO_READ_ONLY: diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h index cdf36e7d7702..0d60a88aa086 100644 --- a/drivers/md/dm-vdo/types.h +++ b/drivers/md/dm-vdo/types.h @@ -227,6 +227,9 @@ struct device_config { bool compression; struct thread_count_config thread_counts; block_count_t max_discard_blocks; + block_count_t slab_blocks; + int index_memory; + bool index_sparse; }; enum vdo_completion_type { diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c index 167cf93a284a..7bec2418c121 100644 --- a/drivers/md/dm-vdo/vdo.c +++ b/drivers/md/dm-vdo/vdo.c @@ -34,7 +34,9 @@ #include #include #include +#include #include +#include #include "logger.h" #include "memory-alloc.h" @@ -55,6 +57,7 @@ #include "slab-depot.h" #include "statistics.h" #include "status-codes.h" +#include "time-utils.h" #include "vio.h" #define PARANOID_THREAD_CONSISTENCY_CHECKS 0 @@ -255,56 +258,35 @@ static int __must_check initialize_thread_config(struct thread_count_config coun return VDO_SUCCESS; } -/** - * read_geometry_block() - Synchronously read the geometry block from a vdo's underlying block - * device. - * @vdo: The vdo whose geometry is to be read. - * - * Return: VDO_SUCCESS or an error code. - */ -static int __must_check read_geometry_block(struct vdo *vdo) +static int initialize_geometry_block(struct vdo *vdo, + struct vdo_geometry_block *geometry_block) { - struct vio *vio; - char *block; int result; - result = vdo_allocate(VDO_BLOCK_SIZE, __func__, &block); + result = vdo_allocate(VDO_BLOCK_SIZE, "encoded geometry block", + (char **) &vdo->geometry_block.buffer); if (result != VDO_SUCCESS) return result; - result = create_metadata_vio(vdo, VIO_TYPE_GEOMETRY, VIO_PRIORITY_HIGH, NULL, - block, &vio); - if (result != VDO_SUCCESS) { - vdo_free(block); - return result; - } + return allocate_vio_components(vdo, VIO_TYPE_GEOMETRY, + VIO_PRIORITY_METADATA, NULL, 1, + (char *) geometry_block->buffer, + &vdo->geometry_block.vio); +} - /* - * This is only safe because, having not already loaded the geometry, the vdo's geometry's - * bio_offset field is 0, so the fact that vio_reset_bio() will subtract that offset from - * the supplied pbn is not a problem. - */ - result = vio_reset_bio(vio, block, NULL, REQ_OP_READ, - VDO_GEOMETRY_BLOCK_LOCATION); - if (result != VDO_SUCCESS) { - free_vio(vdo_forget(vio)); - vdo_free(block); - return result; - } +static int initialize_super_block(struct vdo *vdo, struct vdo_super_block *super_block) +{ + int result; - bio_set_dev(vio->bio, vdo_get_backing_device(vdo)); - submit_bio_wait(vio->bio); - result = blk_status_to_errno(vio->bio->bi_status); - free_vio(vdo_forget(vio)); - if (result != 0) { - vdo_log_error_strerror(result, "synchronous read failed"); - vdo_free(block); - return -EIO; - } + result = vdo_allocate(VDO_BLOCK_SIZE, "encoded super block", + (char **) &vdo->super_block.buffer); + if (result != VDO_SUCCESS) + return result; - result = vdo_parse_geometry_block((u8 *) block, &vdo->geometry); - vdo_free(block); - return result; + return allocate_vio_components(vdo, VIO_TYPE_SUPER_BLOCK, + VIO_PRIORITY_METADATA, NULL, 1, + (char *) super_block->buffer, + &vdo->super_block.vio); } static bool get_zone_thread_name(const thread_id_t thread_ids[], zone_count_t count, @@ -451,6 +433,69 @@ static int register_vdo(struct vdo *vdo) return result; } +/** + * vdo_format() - Format a block device to function as a new VDO. + * @vdo: The vdo to format. + * @error_ptr: The reason for any failure during this call. + * + * This function must be called on a device before a VDO can be loaded for the first time. + * Once a device has been formatted, the VDO can be loaded and shut down repeatedly. + * If a new VDO is desired, this function should be called again. + * + * Return: VDO_SUCCESS or an error + **/ +static int __must_check vdo_format(struct vdo *vdo, char **error_ptr) +{ + int result; + uuid_t uuid; + nonce_t nonce = current_time_us(); + struct device_config *config = vdo->device_config; + + struct index_config index_config = { + .mem = config->index_memory, + .sparse = config->index_sparse, + }; + + struct vdo_config vdo_config = { + .logical_blocks = config->logical_blocks, + .physical_blocks = config->physical_blocks, + .slab_size = config->slab_blocks, + .slab_journal_blocks = DEFAULT_VDO_SLAB_JOURNAL_SIZE, + .recovery_journal_size = DEFAULT_VDO_RECOVERY_JOURNAL_SIZE, + }; + + uuid_gen(&uuid); + result = vdo_initialize_volume_geometry(nonce, &uuid, &index_config, &vdo->geometry); + if (result != VDO_SUCCESS) { + *error_ptr = "Could not initialize volume geometry during format"; + return result; + } + + result = vdo_initialize_component_states(&vdo_config, &vdo->geometry, nonce, &vdo->states); + if (result == VDO_NO_SPACE) { + block_count_t slab_blocks = config->slab_blocks; + /* 1 is counting geometry block */ + block_count_t fixed_layout_size = 1 + + vdo->geometry.regions[VDO_DATA_REGION].start_block + + DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT + + DEFAULT_VDO_RECOVERY_JOURNAL_SIZE + VDO_SLAB_SUMMARY_BLOCKS; + block_count_t necessary_size = fixed_layout_size + slab_blocks; + + vdo_log_error("Minimum required size for VDO volume: %llu bytes", + (unsigned long long) necessary_size * VDO_BLOCK_SIZE); + *error_ptr = "Could not allocate enough space for VDO during format"; + return result; + } + if (result != VDO_SUCCESS) { + *error_ptr = "Could not initialize data layout during format"; + return result; + } + + vdo->needs_formatting = true; + + return VDO_SUCCESS; +} + /** * initialize_vdo() - Do the portion of initializing a vdo which will clean up after itself on * error. @@ -474,12 +519,39 @@ static int initialize_vdo(struct vdo *vdo, struct device_config *config, vdo_initialize_completion(&vdo->admin.completion, vdo, VDO_ADMIN_COMPLETION); init_completion(&vdo->admin.callback_sync); mutex_init(&vdo->stats_mutex); - result = read_geometry_block(vdo); + + result = initialize_geometry_block(vdo, &vdo->geometry_block); + if (result != VDO_SUCCESS) { + *reason = "Could not initialize geometry block"; + return result; + } + + result = initialize_super_block(vdo, &vdo->super_block); + if (result != VDO_SUCCESS) { + *reason = "Could not initialize super block"; + return result; + } + + result = vdo_submit_metadata_vio_wait(&vdo->geometry_block.vio, + VDO_GEOMETRY_BLOCK_LOCATION, REQ_OP_READ); if (result != VDO_SUCCESS) { *reason = "Could not load geometry block"; return result; } + if (mem_is_zero(vdo->geometry_block.vio.data, VDO_BLOCK_SIZE)) { + result = vdo_format(vdo, reason); + if (result != VDO_SUCCESS) + return result; + } else { + result = vdo_parse_geometry_block(vdo->geometry_block.buffer, + &vdo->geometry); + if (result != VDO_SUCCESS) { + *reason = "Could not parse geometry block"; + return result; + } + } + result = initialize_thread_config(config->thread_counts, &vdo->thread_config); if (result != VDO_SUCCESS) { *reason = "Cannot create thread configuration"; @@ -646,6 +718,12 @@ static void free_listeners(struct vdo_thread *thread) } } +static void uninitialize_geometry_block(struct vdo_geometry_block *geometry_block) +{ + free_vio_components(&geometry_block->vio); + vdo_free(geometry_block->buffer); +} + static void uninitialize_super_block(struct vdo_super_block *super_block) { free_vio_components(&super_block->vio); @@ -693,6 +771,7 @@ void vdo_destroy(struct vdo *vdo) vdo_uninitialize_layout(&vdo->next_layout); if (vdo->partition_copier) dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); + uninitialize_geometry_block(&vdo->geometry_block); uninitialize_super_block(&vdo->super_block); vdo_free_block_map(vdo_forget(vdo->block_map)); vdo_free_hash_zones(vdo_forget(vdo->hash_zones)); @@ -718,20 +797,6 @@ void vdo_destroy(struct vdo *vdo) vdo_free(vdo); } -static int initialize_super_block(struct vdo *vdo, struct vdo_super_block *super_block) -{ - int result; - - result = vdo_allocate(VDO_BLOCK_SIZE, "encoded super block", &vdo->super_block.buffer); - if (result != VDO_SUCCESS) - return result; - - return allocate_vio_components(vdo, VIO_TYPE_SUPER_BLOCK, - VIO_PRIORITY_METADATA, NULL, 1, - (char *) super_block->buffer, - &vdo->super_block.vio); -} - /** * finish_reading_super_block() - Continue after loading the super block. * @completion: The super block vio. @@ -775,14 +840,6 @@ static void read_super_block_endio(struct bio *bio) */ void vdo_load_super_block(struct vdo *vdo, struct vdo_completion *parent) { - int result; - - result = initialize_super_block(vdo, &vdo->super_block); - if (result != VDO_SUCCESS) { - vdo_continue_completion(parent, result); - return; - } - vdo->super_block.vio.completion.parent = parent; vdo_submit_metadata_vio(&vdo->super_block.vio, vdo_get_data_region_start(vdo->geometry), @@ -896,24 +953,101 @@ static void record_vdo(struct vdo *vdo) vdo->states.layout = vdo->layout; } +static int __must_check clear_partition(struct vdo *vdo, enum partition_id id) +{ + struct partition *partition; + int result; + + result = vdo_get_partition(&vdo->states.layout, id, &partition); + if (result != VDO_SUCCESS) + return result; + + return blkdev_issue_zeroout(vdo_get_backing_device(vdo), + partition->offset * VDO_SECTORS_PER_BLOCK, + partition->count * VDO_SECTORS_PER_BLOCK, + GFP_NOWAIT, 0); +} + +int vdo_clear_layout(struct vdo *vdo) +{ + int result; + + /* Zero out the uds index's first block. */ + result = blkdev_issue_zeroout(vdo_get_backing_device(vdo), + VDO_SECTORS_PER_BLOCK, + VDO_SECTORS_PER_BLOCK, + GFP_NOWAIT, 0); + if (result != VDO_SUCCESS) + return result; + + result = clear_partition(vdo, VDO_BLOCK_MAP_PARTITION); + if (result != VDO_SUCCESS) + return result; + + return clear_partition(vdo, VDO_RECOVERY_JOURNAL_PARTITION); +} + /** - * continue_super_block_parent() - Continue the parent of a super block save operation. - * @completion: The super block vio. + * continue_parent() - Continue the parent of a save operation. + * @completion: The completion to continue. * - * This callback is registered in vdo_save_components(). */ -static void continue_super_block_parent(struct vdo_completion *completion) +static void continue_parent(struct vdo_completion *completion) { vdo_continue_completion(vdo_forget(completion->parent), completion->result); } +static void handle_write_endio(struct bio *bio) +{ + struct vio *vio = bio->bi_private; + struct vdo_completion *parent = vio->completion.parent; + + continue_vio_after_io(vio, continue_parent, + parent->callback_thread_id); +} + /** - * handle_save_error() - Log a super block save error. + * handle_geometry_block_save_error() - Log a geometry block save error. + * @completion: The super block vio. + * + * This error handler is registered in vdo_save_geometry_block(). + */ +static void handle_geometry_block_save_error(struct vdo_completion *completion) +{ + struct vdo_geometry_block *geometry_block = + container_of(as_vio(completion), struct vdo_geometry_block, vio); + + vio_record_metadata_io_error(&geometry_block->vio); + vdo_log_error_strerror(completion->result, "geometry block save failed"); + completion->callback(completion); +} + +/** + * vdo_save_geometry_block() - Encode the vdo and save the geometry block asynchronously. + * @vdo: The vdo whose state is being saved. + * @parent: The completion to notify when the save is complete. + */ +void vdo_save_geometry_block(struct vdo *vdo, struct vdo_completion *parent) +{ + struct vdo_geometry_block *geometry_block = &vdo->geometry_block; + + vdo_encode_volume_geometry(geometry_block->buffer, &vdo->geometry, + VDO_DEFAULT_GEOMETRY_BLOCK_VERSION); + geometry_block->vio.completion.parent = parent; + geometry_block->vio.completion.callback_thread_id = parent->callback_thread_id; + vdo_submit_metadata_vio(&geometry_block->vio, + VDO_GEOMETRY_BLOCK_LOCATION, + handle_write_endio, handle_geometry_block_save_error, + REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA); +} + +/** + * handle_super_block_save_error() - Log a super block save error. * @completion: The super block vio. * * This error handler is registered in vdo_save_components(). */ -static void handle_save_error(struct vdo_completion *completion) +static void handle_super_block_save_error(struct vdo_completion *completion) { struct vdo_super_block *super_block = container_of(as_vio(completion), struct vdo_super_block, vio); @@ -932,17 +1066,27 @@ static void handle_save_error(struct vdo_completion *completion) completion->callback(completion); } -static void super_block_write_endio(struct bio *bio) +/** + * vdo_save_super_block() - Save the component states to the super block asynchronously. + * @vdo: The vdo whose state is being saved. + * @parent: The completion to notify when the save is complete. + */ +void vdo_save_super_block(struct vdo *vdo, struct vdo_completion *parent) { - struct vio *vio = bio->bi_private; - struct vdo_completion *parent = vio->completion.parent; + struct vdo_super_block *super_block = &vdo->super_block; - continue_vio_after_io(vio, continue_super_block_parent, - parent->callback_thread_id); + vdo_encode_super_block(super_block->buffer, &vdo->states); + super_block->vio.completion.parent = parent; + super_block->vio.completion.callback_thread_id = parent->callback_thread_id; + vdo_submit_metadata_vio(&super_block->vio, + vdo_get_data_region_start(vdo->geometry), + handle_write_endio, handle_super_block_save_error, + REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA); } /** - * vdo_save_components() - Encode the vdo and save the super block asynchronously. + * vdo_save_components() - Copy the current state of the VDO to the states struct and save + * it to the super block asynchronously. * @vdo: The vdo whose state is being saved. * @parent: The completion to notify when the save is complete. */ @@ -961,14 +1105,7 @@ void vdo_save_components(struct vdo *vdo, struct vdo_completion *parent) } record_vdo(vdo); - - vdo_encode_super_block(super_block->buffer, &vdo->states); - super_block->vio.completion.parent = parent; - super_block->vio.completion.callback_thread_id = parent->callback_thread_id; - vdo_submit_metadata_vio(&super_block->vio, - vdo_get_data_region_start(vdo->geometry), - super_block_write_endio, handle_save_error, - REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA); + vdo_save_super_block(vdo, parent); } /** diff --git a/drivers/md/dm-vdo/vdo.h b/drivers/md/dm-vdo/vdo.h index 1aaba73997b7..9a63f5d45ce3 100644 --- a/drivers/md/dm-vdo/vdo.h +++ b/drivers/md/dm-vdo/vdo.h @@ -144,6 +144,13 @@ struct thread_config { struct thread_count_config; +struct vdo_geometry_block { + /* The vio for reading and writing the geometry block to disk */ + struct vio vio; + /* A buffer to hold the geometry block */ + u8 *buffer; +}; + struct vdo_super_block { /* The vio for reading and writing the super block to disk */ struct vio vio; @@ -186,6 +193,9 @@ struct vdo { /* The thread mapping */ struct thread_config thread_config; + /* The geometry block */ + struct vdo_geometry_block geometry_block; + /* The super block */ struct vdo_super_block super_block; @@ -236,6 +246,7 @@ struct vdo { const struct admin_state_code *suspend_type; bool allocations_allowed; bool dump_on_shutdown; + bool needs_formatting; atomic_t processing_message; /* @@ -304,6 +315,10 @@ int __must_check vdo_make(unsigned int instance, struct device_config *config, void vdo_destroy(struct vdo *vdo); +int __must_check vdo_format_components(struct vdo *vdo); + +void vdo_format_super_block(struct vdo *vdo, struct vdo_completion *parent); + void vdo_load_super_block(struct vdo *vdo, struct vdo_completion *parent); struct block_device * __must_check vdo_get_backing_device(const struct vdo *vdo); @@ -326,6 +341,10 @@ enum vdo_state __must_check vdo_get_state(const struct vdo *vdo); void vdo_set_state(struct vdo *vdo, enum vdo_state state); +int vdo_clear_layout(struct vdo *vdo); +void vdo_save_geometry_block(struct vdo *vdo, struct vdo_completion *parent); +void vdo_save_super_block(struct vdo *vdo, struct vdo_completion *parent); + void vdo_save_components(struct vdo *vdo, struct vdo_completion *parent); int vdo_register_read_only_listener(struct vdo *vdo, void *listener,