From 4e2b79a14f95037420b908a86f593da7288b7d06 Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Fri, 15 May 2026 15:37:53 -0700 Subject: [PATCH 1/2] feat(consumer): per-storage max_insert_block_size override Add an optional per-storage knob, read from snuba.state at `clickhouse_max_insert_block_size:`, that appends `&max_insert_block_size=` to the rust consumer's INSERT URL. Absent or non-positive values leave the setting at ClickHouse's default. Lets us tune insert block size on a single storage at runtime without a deploy or restart, matching the per-storage pattern already used for `clickhouse_load_balancing:`. Co-Authored-By: Claude --- rust_snuba/src/runtime_config.rs | 11 +++++ .../src/strategies/clickhouse/writer_v2.rs | 42 ++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/rust_snuba/src/runtime_config.rs b/rust_snuba/src/runtime_config.rs index 33fc73c36e..7b016a2d71 100644 --- a/rust_snuba/src/runtime_config.rs +++ b/rust_snuba/src/runtime_config.rs @@ -71,6 +71,17 @@ pub fn get_load_balancing_config(storage_name: &str) -> LoadBalancingConfig { } } +/// Returns Some(n) if `clickhouse_max_insert_block_size:` is set +/// to a positive integer in snuba.state, otherwise None. Callers should append +/// `&max_insert_block_size=` to the INSERT URL when Some. +pub fn get_max_insert_block_size(storage_name: &str) -> Option { + get_str_config(&format!("clickhouse_max_insert_block_size:{storage_name}")) + .ok() + .flatten() + .and_then(|s| s.parse::().ok()) + .filter(|&n| n > 0) +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust_snuba/src/strategies/clickhouse/writer_v2.rs b/rust_snuba/src/strategies/clickhouse/writer_v2.rs index e4730cb869..08e2b2adbc 100644 --- a/rust_snuba/src/strategies/clickhouse/writer_v2.rs +++ b/rust_snuba/src/strategies/clickhouse/writer_v2.rs @@ -13,7 +13,7 @@ use sentry_arroyo::types::Message; use sentry_arroyo::{counter, timer}; use crate::config::ClickhouseConfig; -use crate::runtime_config::get_load_balancing_config; +use crate::runtime_config::{get_load_balancing_config, get_max_insert_block_size}; use crate::types::{BytesInsertBatch, RowData}; fn clickhouse_task_runner( @@ -196,6 +196,9 @@ impl ClickhouseClient { if let Some(offset) = lb_config.first_offset { url.push_str(&format!("&load_balancing_first_offset={offset}")); } + if let Some(block_size) = get_max_insert_block_size(&self.storage_name) { + url.push_str(&format!("&max_insert_block_size={block_size}")); + } url } @@ -356,6 +359,43 @@ mod tests { assert!(url.contains("load_balancing_first_offset=1")); } + #[test] + fn test_url_with_max_insert_block_size() { + crate::testutils::initialize_python(); + let config = make_test_config(); + let client = ClickhouseClient::new( + &config, + "test_table", + "writer_v2_block_size_test".to_string(), + ); + + // Default (key absent): no suffix. + let url = client.build_url(); + assert!(!url.contains("max_insert_block_size")); + + // Per-storage override sets the suffix. + crate::runtime_config::patch_str_config_for_test( + "clickhouse_max_insert_block_size:writer_v2_block_size_test", + Some("1000000"), + ); + let url = client.build_url(); + assert!(url.contains("&max_insert_block_size=1000000")); + + // A different storage isn't affected. + let other_client = + ClickhouseClient::new(&config, "test_table", "writer_v2_other_storage".to_string()); + let url = other_client.build_url(); + assert!(!url.contains("max_insert_block_size")); + + // 0 means "leave unset". + crate::runtime_config::patch_str_config_for_test( + "clickhouse_max_insert_block_size:writer_v2_block_size_test", + Some("0"), + ); + let url = client.build_url(); + assert!(!url.contains("max_insert_block_size")); + } + #[tokio::test] async fn test_retry_with_exponential_backoff() { crate::testutils::initialize_python(); From 223898c26be69e9880604b8033dbd7b0f53292cb Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Fri, 15 May 2026 15:47:46 -0700 Subject: [PATCH 2/2] feat(consumer): reject max_insert_block_size below ClickHouse default Treat the per-storage `clickhouse_max_insert_block_size:` override as a floor only: values below ClickHouse's compiled-in default (1_048_449) are silently dropped instead of being applied. Pulling block size below the server default would not raise it past what ClickHouse already produces on its own, so the URL suffix is now only appended for values that actually take effect. Co-Authored-By: Claude --- rust_snuba/src/runtime_config.rs | 11 +++++++++-- .../src/strategies/clickhouse/writer_v2.rs | 18 +++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/rust_snuba/src/runtime_config.rs b/rust_snuba/src/runtime_config.rs index 7b016a2d71..d5d7b68472 100644 --- a/rust_snuba/src/runtime_config.rs +++ b/rust_snuba/src/runtime_config.rs @@ -71,15 +71,22 @@ pub fn get_load_balancing_config(storage_name: &str) -> LoadBalancingConfig { } } +/// ClickHouse's compiled-in default for `max_insert_block_size`. We refuse to +/// apply any override below this to avoid silently shrinking blocks below what +/// the server would already produce on its own. +pub const CLICKHOUSE_DEFAULT_MAX_INSERT_BLOCK_SIZE: u64 = 1_048_449; + /// Returns Some(n) if `clickhouse_max_insert_block_size:` is set -/// to a positive integer in snuba.state, otherwise None. Callers should append +/// to an integer >= ClickHouse's default (1_048_449); otherwise None. Values +/// below the default are rejected, since they wouldn't increase the block size +/// past what ClickHouse already does by default. Callers should append /// `&max_insert_block_size=` to the INSERT URL when Some. pub fn get_max_insert_block_size(storage_name: &str) -> Option { get_str_config(&format!("clickhouse_max_insert_block_size:{storage_name}")) .ok() .flatten() .and_then(|s| s.parse::().ok()) - .filter(|&n| n > 0) + .filter(|&n| n >= CLICKHOUSE_DEFAULT_MAX_INSERT_BLOCK_SIZE) } #[cfg(test)] diff --git a/rust_snuba/src/strategies/clickhouse/writer_v2.rs b/rust_snuba/src/strategies/clickhouse/writer_v2.rs index 08e2b2adbc..3f48d4e97c 100644 --- a/rust_snuba/src/strategies/clickhouse/writer_v2.rs +++ b/rust_snuba/src/strategies/clickhouse/writer_v2.rs @@ -373,13 +373,13 @@ mod tests { let url = client.build_url(); assert!(!url.contains("max_insert_block_size")); - // Per-storage override sets the suffix. + // Per-storage override at or above the ClickHouse default sets the suffix. crate::runtime_config::patch_str_config_for_test( "clickhouse_max_insert_block_size:writer_v2_block_size_test", - Some("1000000"), + Some("2000000"), ); let url = client.build_url(); - assert!(url.contains("&max_insert_block_size=1000000")); + assert!(url.contains("&max_insert_block_size=2000000")); // A different storage isn't affected. let other_client = @@ -387,13 +387,21 @@ mod tests { let url = other_client.build_url(); assert!(!url.contains("max_insert_block_size")); - // 0 means "leave unset". + // Values below the ClickHouse default (1_048_449) are rejected. crate::runtime_config::patch_str_config_for_test( "clickhouse_max_insert_block_size:writer_v2_block_size_test", - Some("0"), + Some("1000000"), ); let url = client.build_url(); assert!(!url.contains("max_insert_block_size")); + + // Exactly the default is accepted. + crate::runtime_config::patch_str_config_for_test( + "clickhouse_max_insert_block_size:writer_v2_block_size_test", + Some("1048449"), + ); + let url = client.build_url(); + assert!(url.contains("&max_insert_block_size=1048449")); } #[tokio::test]