diff --git a/.gitignore b/.gitignore index 28786b5..21dffe0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,6 @@ node_modules .env www/.env www/config/database.json -.claude/settings.local.json +.claude/ /clients/solidb-laravel-eloquent/vendor /clients/solidb-laravel-eloquent/composer.lock \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 3de56f7..79735ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4738,6 +4738,7 @@ dependencies = [ "similar", "slug", "solidb-client", + "subtle", "sysinfo", "tar", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index ed6f3ee..47a9445 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,7 @@ sha2 = "0.10" hmac = "0.12" x25519-dalek = { version = "2.0", features = ["static_secrets"] } md5 = "0.8" +subtle = "2.6" # Temp directories (needed by benchmark) tempfile = "3.10" diff --git a/sdbql-core/src/ast.rs b/sdbql-core/src/ast.rs index 4dfd025..a9983a1 100644 --- a/sdbql-core/src/ast.rs +++ b/sdbql-core/src/ast.rs @@ -606,6 +606,7 @@ mod tests { #[test] fn test_query_default() { let query = Query { + with_clause: None, let_clauses: vec![], for_clauses: vec![], join_clauses: vec![], diff --git a/sdbql-core/src/executor/builtins/type_check.rs b/sdbql-core/src/executor/builtins/type_check.rs index c5737d1..075e42e 100644 --- a/sdbql-core/src/executor/builtins/type_check.rs +++ b/sdbql-core/src/executor/builtins/type_check.rs @@ -276,13 +276,14 @@ mod tests { .unwrap(), Some(json!(true)) ); + // Collection names are case-sensitive (ArangoDB-compatible) assert_eq!( call( "IS_SAME_COLLECTION", &[json!("users/123"), json!("Users/456")] ) .unwrap(), - Some(json!(true)) + Some(json!(false)) ); assert_eq!( call("IS_SAME_COLLECTION", &[json!(123), json!("users/456")]).unwrap(), diff --git a/src/bin/solidb-fuse.rs b/src/bin/solidb-fuse.rs index 6f97c91..9308e00 100644 --- a/src/bin/solidb-fuse.rs +++ b/src/bin/solidb-fuse.rs @@ -790,11 +790,22 @@ fn main() -> anyhow::Result<()> { match std::fs::read_to_string(&args.pid_file) { Ok(pid_str) => { if let Ok(pid) = pid_str.trim().parse::() { + let mut sys = System::new_all(); + sys.refresh_all(); + + let sys_pid = Pid::from(pid as usize); + if let Some(proc) = sys.process(sys_pid) { + let proc_name = proc.name().to_string_lossy(); + if proc_name != "solidb-fuse" && proc_name != "solidb" { + eprintln!("SECURITY ERROR: Process with PID {} is named '{}', not 'solidb-fuse'. Refusing to kill potential mismatch.", pid, proc_name); + return Ok(()); + } + } + eprintln!( "Found existing FUSE process with PID {}. Stopping it...", pid ); - // Send SIGTERM to gracefully stop the process unsafe { libc::kill(pid, libc::SIGTERM); } diff --git a/src/cluster/hlc.rs b/src/cluster/hlc.rs index c0d88f9..6f4aa44 100644 --- a/src/cluster/hlc.rs +++ b/src/cluster/hlc.rs @@ -2,6 +2,9 @@ use serde::{Deserialize, Serialize}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; +/// Maximum allowed clock skew between nodes (60 seconds in ms) +const MAX_CLOCK_SKEW_MS: u64 = 60_000; + /// Hybrid Logical Clock for distributed ordering of events. /// Combines physical time with a logical counter to ensure unique, ordered timestamps /// even when wall clocks are out of sync. @@ -58,8 +61,11 @@ impl HybridLogicalClock { /// Update this clock after receiving a message with another clock. /// Returns a new clock that is greater than both self and the received clock. + /// Remote times beyond MAX_CLOCK_SKEW_MS in the future are clamped to prevent + /// a malicious peer from permanently wedging the clock. pub fn receive(&self, other: &HybridLogicalClock, node_id: &str) -> Self { let now = current_time_ms(); + let max_allowed_time = now.saturating_add(MAX_CLOCK_SKEW_MS); let (physical, logical) = if now > self.physical_time && now > other.physical_time { // Wall clock is ahead of both, reset counter @@ -72,8 +78,9 @@ impl HybridLogicalClock { (self.physical_time, self.logical_counter + 1) } } else if other.physical_time > self.physical_time { - // Remote clock is ahead - (other.physical_time, other.logical_counter + 1) + // Remote clock is ahead - clamp to prevent unbounded skew attack + let clamped_remote = other.physical_time.min(max_allowed_time); + (clamped_remote, other.logical_counter + 1) } else { // Same physical time, take max logical and increment ( diff --git a/src/driver/handlers/mod.rs b/src/driver/handlers/mod.rs index 010944d..ff4cf68 100644 --- a/src/driver/handlers/mod.rs +++ b/src/driver/handlers/mod.rs @@ -129,6 +129,21 @@ impl DriverHandler { /// Execute a command and return a response async fn execute_command(&mut self, command: Command) -> Response { + // Gate every command behind authentication. Only Ping and Auth are + // allowed before the connection has authenticated. Batch is allowed + // through here so its inner commands are re-checked individually + // (an Auth inside a batch will set state for subsequent entries). + if self.authenticated_db.is_none() { + match &command { + Command::Ping | Command::Auth { .. } | Command::Batch { .. } => {} + _ => { + return Response::error(DriverError::AuthError( + "Authentication required".to_string(), + )); + } + } + } + match command { // ==================== Auth & Utility ==================== Command::Ping => Response::pong(), diff --git a/src/queue/jobs.rs b/src/queue/jobs.rs index be3a4cb..792cbc5 100644 --- a/src/queue/jobs.rs +++ b/src/queue/jobs.rs @@ -4,6 +4,26 @@ use crate::scripting::ScriptEngine; use crate::storage::StorageEngine; use std::sync::Arc; +fn validate_script_path(script_path: &str) -> Result<(), crate::error::DbError> { + if script_path.is_empty() { + return Err(crate::error::DbError::BadRequest( + "Script path cannot be empty".to_string(), + )); + } + if script_path.len() > 512 { + return Err(crate::error::DbError::BadRequest( + "Script path exceeds maximum length of 512 characters".to_string(), + )); + } + let re = regex::Regex::new(r"^[A-Za-z0-9_/\-.]+$").unwrap(); + if !re.is_match(script_path) { + return Err(crate::error::DbError::BadRequest( + "Script path contains invalid characters".to_string(), + )); + } + Ok(()) +} + impl QueueWorker { pub async fn check_jobs(&self) { let _lock = match self.claiming_lock.try_lock() { @@ -164,19 +184,27 @@ impl QueueWorker { job: &Job, db_name: &str, ) -> Result<(), crate::error::DbError> { + validate_script_path(&job.script_path)?; + tracing::info!("Executing job {} with script {}", job.id, job.script_path); let _db = storage.get_database(db_name)?; - // Find script by path - let query_str = format!( - "FOR s IN _scripts FILTER s.path == '{}' RETURN s", - job.script_path - ); - let query_ast = crate::sdbql::parse(&query_str) + let query_str = "FOR s IN _scripts FILTER s.path == @script_path RETURN s"; + let query_ast = crate::sdbql::parse(query_str) .map_err(|e| crate::error::DbError::BadRequest(e.to_string()))?; - let executor = crate::sdbql::QueryExecutor::with_database(storage, db_name.to_string()); + let mut bind_vars = crate::sdbql::BindVars::new(); + bind_vars.insert( + "script_path".to_string(), + serde_json::json!(job.script_path), + ); + + let executor = crate::sdbql::QueryExecutor::with_database_and_bind_vars( + storage, + db_name.to_string(), + bind_vars, + ); let result = executor.execute(&query_ast)?; let script_val = result.first().ok_or_else(|| { diff --git a/src/scripting/file_handling.rs b/src/scripting/file_handling.rs index 8984209..cab5697 100644 --- a/src/scripting/file_handling.rs +++ b/src/scripting/file_handling.rs @@ -19,6 +19,12 @@ const CHUNK_SIZE: usize = 1024 * 1024; /// Files collection name const FILES_COLLECTION: &str = "_files"; +/// Maximum image dimension (width or height) to prevent memory exhaustion +const MAX_IMAGE_DIMENSION: u32 = 8192; + +/// Maximum image buffer size (64 MiB) to prevent memory exhaustion +const MAX_IMAGE_BYTES: usize = 64 * 1024 * 1024; + /// Detect MIME type from file extension fn mime_from_extension(ext: &str) -> &'static str { match ext.to_lowercase().as_str() { @@ -206,10 +212,26 @@ pub fn create_upload_function( chunk_index += 1; } - // Build path (directory/filename or just filename) + // Build path (directory/filename or just filename). + // Reject anything that resolves to a parent-dir component or absolute + // path. Component-based check avoids substring false-positives + // (e.g. legitimate filename `v1..2.bin`) while still catching + // `..`, `foo/../bar`, `\..\bar`, and absolute paths. let path = if let Some(ref dir) = directory { - let safe_dir = dir.replace("..", "").replace("//", "/"); - format!("{}/{}", safe_dir.trim_matches('/'), safe_filename) + let normalized = dir.replace('\\', "/"); + let p = std::path::Path::new(&normalized); + let bad_component = p.components().any(|c| { + matches!( + c, + std::path::Component::ParentDir | std::path::Component::RootDir + ) + }); + if bad_component || p.is_absolute() { + return Err(mlua::Error::RuntimeError( + "upload: directory path contains invalid traversal patterns".to_string(), + )); + } + format!("{}/{}", normalized.trim_matches('/'), safe_filename) } else { safe_filename.clone() }; @@ -239,7 +261,7 @@ pub fn create_upload_function( // Add image dimensions if applicable if mime_type.starts_with("image/") { - if let Ok(img) = image::load_from_memory(&bytes) { + if let Ok(img) = load_image_with_limits(&bytes) { let (width, height) = img.dimensions(); metadata.insert("width".to_string(), JsonValue::Number(width.into())); metadata.insert("height".to_string(), JsonValue::Number(height.into())); @@ -562,12 +584,27 @@ pub fn create_image_process_function( }) } +/// Load image with decompression bomb protection +#[allow(deprecated)] +fn load_image_with_limits(bytes: &[u8]) -> Result { + use image::Limits; + let mut reader = image::ImageReader::new(Cursor::new(bytes)) + .with_guessed_format() + .map_err(|e| mlua::Error::RuntimeError(format!("failed to guess image format: {}", e)))?; + let mut limits = Limits::default(); + limits.max_image_width = Some(MAX_IMAGE_DIMENSION); + limits.max_image_height = Some(MAX_IMAGE_DIMENSION); + limits.max_alloc = Some(MAX_IMAGE_BYTES as u64); + reader.limits(limits); + let img = reader + .decode() + .map_err(|e| mlua::Error::RuntimeError(format!("failed to decode image: {}", e)))?; + Ok(img) +} + /// Process image with given operations fn process_image(lua: &Lua, bytes: Vec, operations: Table) -> LuaResult { - // Load image - let mut img = image::load_from_memory(&bytes).map_err(|e| { - mlua::Error::RuntimeError(format!("image_process: failed to load image: {}", e)) - })?; + let mut img = load_image_with_limits(&bytes)?; // Apply operations // Resize diff --git a/src/scripting/http_helpers.rs b/src/scripting/http_helpers.rs index 173b787..f857aa8 100644 --- a/src/scripting/http_helpers.rs +++ b/src/scripting/http_helpers.rs @@ -57,9 +57,86 @@ impl HttpCache { } } +/// Parse an origin entry from `SOLIDB_ALLOWED_REDIRECT_ORIGINS` into +/// (scheme, host, port). Accepts forms `host`, `scheme://host`, `scheme://host:port`. +/// `host`-only entries match either http or https. +fn parse_allowed_origin(entry: &str) -> Option<(Option, String, Option)> { + let entry = entry.trim(); + if entry.is_empty() { + return None; + } + if entry.contains("://") { + let parsed = url::Url::parse(entry).ok()?; + let host = parsed.host_str()?.to_lowercase(); + Some((Some(parsed.scheme().to_string()), host, parsed.port())) + } else { + // Bare host (and optional :port) + let (host, port) = match entry.rsplit_once(':') { + Some((h, p)) if p.chars().all(|c| c.is_ascii_digit()) => { + (h.to_lowercase(), p.parse::().ok()) + } + _ => (entry.to_lowercase(), None), + }; + Some((None, host, port)) + } +} + +/// Returns true iff `url` matches one of the configured allowed origins. +/// Match is by exact (scheme, host, port) — never substring. +fn redirect_url_allowed(url_str: &str, allowed: &[&str]) -> bool { + let parsed = match url::Url::parse(url_str) { + Ok(u) => u, + Err(_) => return false, + }; + let url_host = match parsed.host_str() { + Some(h) => h.to_lowercase(), + None => return false, + }; + let url_scheme = parsed.scheme(); + let url_port = parsed.port_or_known_default(); + + allowed.iter().any(|raw| { + let (allowed_scheme, allowed_host, allowed_port) = match parse_allowed_origin(raw) { + Some(t) => t, + None => return false, + }; + if allowed_host != url_host { + return false; + } + if let Some(scheme) = &allowed_scheme { + if scheme != url_scheme { + return false; + } + } + if let Some(port) = allowed_port { + if Some(port) != url_port { + return false; + } + } + true + }) +} + /// Create solidb.redirect(url) -> error with redirect status function pub fn create_redirect_function(lua: &Lua) -> LuaResult { lua.create_function(|_, url: String| { + let allowed_origins = std::env::var("SOLIDB_ALLOWED_REDIRECT_ORIGINS").unwrap_or_default(); + let allowed_list: Vec<&str> = allowed_origins + .split(',') + .map(str::trim) + .filter(|o| !o.is_empty()) + .collect(); + + // Absolute URLs are checked against the allowlist when one is configured. + // Relative paths and (when no allowlist is set) absolute URLs are passed through — + // SEC-095 made the allowlist opt-in. + let is_absolute = url.starts_with("http://") || url.starts_with("https://"); + if is_absolute && !allowed_list.is_empty() && !redirect_url_allowed(&url, &allowed_list) { + return Err(mlua::Error::RuntimeError( + "REDIRECT: Forbidden - redirect to untrusted domain".to_string(), + )); + } + Err::(mlua::Error::RuntimeError(format!("REDIRECT:{}", url))) }) } @@ -171,6 +248,23 @@ pub fn create_response_html_function(_lua: &Lua) -> LuaResult { pub fn create_response_file_function(_lua: &Lua) -> LuaResult { let lua_ref = _lua; lua_ref.create_function(move |lua, path: String| { + // Security: reject absolute paths and any ParentDir component. + // Component-based check avoids false positives on legit names like `v1.2..md` + // and false negatives on tricks substring-matching would miss. + let p = std::path::Path::new(&path); + let has_parent_dir = p + .components() + .any(|c| matches!(c, std::path::Component::ParentDir)); + if p.is_absolute() || has_parent_dir { + let file_info = lua.create_table()?; + file_info.set( + "error", + "Invalid path: absolute paths and parent-dir traversal are not allowed", + )?; + file_info.set("exists", false)?; + return Ok(LuaValue::Table(file_info)); + } + // Check if file exists and get its metadata match std::fs::metadata(&path) { Ok(metadata) => { diff --git a/src/scripting/lua_globals/http.rs b/src/scripting/lua_globals/http.rs index f2e127b..9617549 100644 --- a/src/scripting/lua_globals/http.rs +++ b/src/scripting/lua_globals/http.rs @@ -1,13 +1,157 @@ //! HTTP fetch function for Lua +//! Security: Includes SSRF protection to prevent access to internal services use crate::error::DbError; use mlua::{Lua, Value as LuaValue}; +use std::net::{IpAddr, SocketAddr}; +use std::str::FromStr; + +/// Hostnames that resolve to cloud-provider metadata services. We refuse them +/// even before DNS resolution so misconfigured DNS can't surprise us. +const BLOCKED_HOSTNAMES: &[&str] = &[ + "metadata.google.internal", + "metadata.internal", + "instance-data", +]; + +/// Reject any IP that is not safely routable from a server: loopback, +/// private (RFC1918), CGNAT, link-local, broadcast, multicast, unspecified, +/// IPv6 ULA / link-local / loopback / unspecified, and IPv4-mapped IPv6 +/// addresses (which we recursively check against the embedded v4 address). +fn validate_ip(ip: IpAddr) -> Result<(), String> { + match ip { + IpAddr::V4(v4) => { + let o = v4.octets(); + if v4.is_loopback() { + return Err("loopback IP not allowed".into()); + } + if v4.is_unspecified() { + return Err("unspecified IP not allowed".into()); + } + if v4.is_broadcast() { + return Err("broadcast IP not allowed".into()); + } + if v4.is_multicast() { + return Err("multicast IP not allowed".into()); + } + if v4.is_link_local() { + return Err("link-local IP not allowed".into()); + } + // RFC1918 private ranges + if o[0] == 10 + || (o[0] == 172 && (16..=31).contains(&o[1])) + || (o[0] == 192 && o[1] == 168) + { + return Err("private IP not allowed".into()); + } + // 100.64.0.0/10 CGNAT + if o[0] == 100 && (64..=127).contains(&o[1]) { + return Err("CGNAT IP not allowed".into()); + } + // 0.0.0.0/8 reserved + if o[0] == 0 { + return Err("reserved IP not allowed".into()); + } + Ok(()) + } + IpAddr::V6(v6) => { + if let Some(v4) = v6.to_ipv4_mapped() { + return validate_ip(IpAddr::V4(v4)); + } + if v6.is_loopback() || v6.is_unspecified() || v6.is_multicast() { + return Err("special-use IPv6 not allowed".into()); + } + let seg0 = v6.segments()[0]; + // fe80::/10 link-local + if (seg0 & 0xffc0) == 0xfe80 { + return Err("IPv6 link-local not allowed".into()); + } + // fc00::/7 unique-local + if (seg0 & 0xfe00) == 0xfc00 { + return Err("IPv6 ULA not allowed".into()); + } + Ok(()) + } + } +} + +/// Outcome of validating a fetch URL: the parsed URL, host, port to connect on, +/// and a fixed `SocketAddr` to bind for that hostname so reqwest can't redo DNS. +struct ValidatedTarget { + url: url::Url, + host: String, + addr: SocketAddr, +} + +/// Validate URL to prevent SSRF attacks. Performs DNS resolution and rejects +/// the request if any resolved address is non-public; pins the connection to +/// the first validated address to defeat DNS rebinding mid-flight. +async fn validate_url_for_ssrf(url: &str) -> Result { + if !url.starts_with("http://") && !url.starts_with("https://") { + return Err("only http and https schemes are allowed".into()); + } + let parsed = url::Url::parse(url).map_err(|e| format!("invalid URL: {}", e))?; + let host = parsed.host_str().ok_or("URL must have a host")?.to_string(); + let host_lower = host.to_lowercase(); + + if host_lower == "localhost" { + return Err("localhost not allowed".into()); + } + if BLOCKED_HOSTNAMES + .iter() + .any(|b| host_lower == *b || host_lower.ends_with(&format!(".{}", b))) + { + return Err(format!("blocked hostname: {}", host_lower)); + } + + let port = parsed.port_or_known_default().ok_or("missing port")?; + + // If host is a literal IP we still validate it. + if let Ok(ip) = IpAddr::from_str(&host) { + validate_ip(ip)?; + return Ok(ValidatedTarget { + url: parsed, + host, + addr: SocketAddr::new(ip, port), + }); + } + + // Resolve and validate every returned IP. We refuse the request if *any* + // resolved address is non-public — preventing both selection bias and a + // rebind that flips between safe and unsafe answers. + let lookup = tokio::net::lookup_host((host.as_str(), port)) + .await + .map_err(|e| format!("DNS resolution failed for {}: {}", host, e))?; + let addrs: Vec = lookup.collect(); + if addrs.is_empty() { + return Err(format!("no addresses resolved for {}", host)); + } + for sa in &addrs { + validate_ip(sa.ip())?; + } + Ok(ValidatedTarget { + url: parsed, + host, + addr: addrs[0], + }) +} /// Create the fetch function for HTTP requests pub fn create_fetch_function(lua: &Lua) -> Result { lua.create_async_function( |lua, (url, options): (String, Option)| async move { - let client = reqwest::Client::new(); + let target = validate_url_for_ssrf(&url) + .await + .map_err(|e| mlua::Error::RuntimeError(format!("SSRF protection: {}", e)))?; + + // Pin DNS to the validated address — defeats DNS rebinding by ensuring + // the connection goes to an address we already accepted. + let client = reqwest::Client::builder() + .resolve(&target.host, target.addr) + .redirect(reqwest::redirect::Policy::none()) + .build() + .map_err(|e| mlua::Error::RuntimeError(format!("HTTP client: {}", e)))?; + let url = target.url.as_str().to_string(); let mut req_builder = client.get(&url); // Default to GET if let Some(LuaValue::Table(t)) = options { diff --git a/src/sdbql/executor/builtins/misc.rs b/src/sdbql/executor/builtins/misc.rs index 9120cab..1871bf6 100644 --- a/src/sdbql/executor/builtins/misc.rs +++ b/src/sdbql/executor/builtins/misc.rs @@ -1,6 +1,6 @@ //! Miscellaneous utility functions for SDBQL. //! -//! UUID, SLEEP, TYPEOF, COALESCE, etc. +//! UUID, TYPEOF, COALESCE, etc. use crate::error::{DbError, DbResult}; use serde_json::Value; @@ -63,14 +63,6 @@ pub fn evaluate(name: &str, args: &[Value]) -> DbResult> { } Ok(Some(Value::Bool(true))) } - "SLEEP" => { - check_args(name, args, 1)?; - let ms = args[0].as_u64().ok_or_else(|| { - DbError::ExecutionError("SLEEP: argument must be a number".to_string()) - })?; - std::thread::sleep(std::time::Duration::from_millis(ms)); - Ok(Some(Value::Null)) - } "RANGE" => { if args.is_empty() || args.len() > 3 { return Err(DbError::ExecutionError( diff --git a/src/sdbql/executor/execution/entry.rs b/src/sdbql/executor/execution/entry.rs index d3e3aed..4125dcf 100644 --- a/src/sdbql/executor/execution/entry.rs +++ b/src/sdbql/executor/execution/entry.rs @@ -107,6 +107,17 @@ impl<'a> QueryExecutor<'a> { .map(|n| n as usize) .unwrap_or(0); + // Check for overflow in limit_offset + limit_count + let max_fetch = match limit_offset.checked_add(limit_count) { + Some(sum) => sum, + None => { + return Ok(QueryExecutionResult { + results: vec![], + mutations: MutationStats::new(), + }); + } + }; + // Check if the sort field is on the loop variable // Check if sort expression is a simple field access on the loop variable if let Expression::FieldAccess(base, field) = sort_expr { @@ -115,14 +126,16 @@ impl<'a> QueryExecutor<'a> { // Try to get collection and check for index if let Ok(collection) = self.get_collection(&for_clause.collection) { - if let Some(docs) = collection.index_sorted( - field, - *sort_asc, - Some(limit_offset + limit_count), - ) { + if let Some(docs) = + collection.index_sorted(field, *sort_asc, Some(max_fetch)) + { // Got sorted documents from index! Apply offset and build result let start = limit_offset.min(docs.len()); - let end = (start + limit_count).min(docs.len()); + // Check for overflow in start + limit_count + let end = match start.checked_add(limit_count) { + Some(sum) => sum.min(docs.len()), + None => docs.len(), + }; let docs = &docs[start..end]; let results = @@ -282,7 +295,7 @@ impl<'a> QueryExecutor<'a> { .count(); if for_count == 1 && filter_count == 0 { - query.limit_clause.as_ref().map(|l| { + query.limit_clause.as_ref().and_then(|l| { let offset = self .evaluate_expr_with_context(&l.offset, &initial_bindings) .ok() @@ -295,7 +308,7 @@ impl<'a> QueryExecutor<'a> { .and_then(|v| v.as_u64()) .map(|n| n as usize) .unwrap_or(0); - offset + count + offset.checked_add(count) }) } else { None diff --git a/src/sdbql/executor/expression.rs b/src/sdbql/executor/expression.rs index d6918bf..6f60478 100644 --- a/src/sdbql/executor/expression.rs +++ b/src/sdbql/executor/expression.rs @@ -154,6 +154,12 @@ impl<'a> QueryExecutor<'a> { f ))); } + if !f.is_finite() { + return Err(DbError::ExecutionError(format!( + "Array index must be finite, got: {}", + f + ))); + } f as usize } else { return Err(DbError::ExecutionError(format!( @@ -270,12 +276,21 @@ impl<'a> QueryExecutor<'a> { let start = match &start_val { Value::Number(n) => { - // Try integer first, then fall back to truncating float - n.as_i64() - .or_else(|| n.as_f64().map(|f| f as i64)) - .ok_or_else(|| { - DbError::ExecutionError("Range start must be a number".to_string()) - })? + if let Some(i) = n.as_i64() { + i + } else if let Some(f) = n.as_f64() { + if !f.is_finite() { + return Err(DbError::ExecutionError(format!( + "Range start must be finite, got: {}", + f + ))); + } + f as i64 + } else { + return Err(DbError::ExecutionError( + "Range start must be a number".to_string(), + )); + } } _ => { return Err(DbError::ExecutionError(format!( @@ -287,12 +302,21 @@ impl<'a> QueryExecutor<'a> { let end = match &end_val { Value::Number(n) => { - // Try integer first, then fall back to truncating float - n.as_i64() - .or_else(|| n.as_f64().map(|f| f as i64)) - .ok_or_else(|| { - DbError::ExecutionError("Range end must be a number".to_string()) - })? + if let Some(i) = n.as_i64() { + i + } else if let Some(f) = n.as_f64() { + if !f.is_finite() { + return Err(DbError::ExecutionError(format!( + "Range end must be finite, got: {}", + f + ))); + } + f as i64 + } else { + return Err(DbError::ExecutionError( + "Range end must be a number".to_string(), + )); + } } _ => { return Err(DbError::ExecutionError(format!( @@ -302,6 +326,25 @@ impl<'a> QueryExecutor<'a> { } }; + const MAX_RANGE_SIZE: i64 = 10_000_000; + // Use checked_sub so `start = i64::MIN` does not panic / wrap. + // Any subtraction overflow is itself proof the range exceeds MAX. + let range_size = end + .checked_sub(start) + .and_then(i64::checked_abs) + .ok_or_else(|| { + DbError::ExecutionError(format!( + "Range size overflow (start={}, end={})", + start, end + )) + })?; + if range_size > MAX_RANGE_SIZE { + return Err(DbError::ExecutionError(format!( + "Range size {} exceeds maximum allowed size of {}", + range_size, MAX_RANGE_SIZE + ))); + } + // Generate array from start to end (inclusive) let arr: Vec = (start..=end) .map(|i| Value::Number(serde_json::Number::from(i))) diff --git a/src/sdbql/parser/expressions/mod.rs b/src/sdbql/parser/expressions/mod.rs index aa2c82d..d09dd2b 100644 --- a/src/sdbql/parser/expressions/mod.rs +++ b/src/sdbql/parser/expressions/mod.rs @@ -19,7 +19,10 @@ use crate::sdbql::parser::Parser; impl Parser { /// Entry point for expression parsing pub(crate) fn parse_expression(&mut self) -> DbResult { - self.parse_ternary_expression() + self.check_depth()?; + let result = self.parse_ternary_expression(); + self.leave_depth(); + result } } diff --git a/src/sdbql/parser/mod.rs b/src/sdbql/parser/mod.rs index a44cabf..d12da6d 100644 --- a/src/sdbql/parser/mod.rs +++ b/src/sdbql/parser/mod.rs @@ -17,8 +17,11 @@ pub struct Parser { pub(crate) tokens: Vec, pub(crate) position: usize, pub(crate) allow_in_operator: bool, + depth: usize, } +const MAX_PARSE_DEPTH: usize = 64; + impl Parser { /// Create a new parser from an input string pub fn new(input: &str) -> DbResult { @@ -29,9 +32,26 @@ impl Parser { tokens, position: 0, allow_in_operator: true, + depth: 0, }) } + /// Check and increment depth, returning error if too deep + fn check_depth(&mut self) -> DbResult<()> { + if self.depth >= MAX_PARSE_DEPTH { + return Err(DbError::ParseError( + "Query nesting too deep (max 64)".to_string(), + )); + } + self.depth += 1; + Ok(()) + } + + /// Decrement depth when leaving a nested parse + fn leave_depth(&mut self) { + self.depth = self.depth.saturating_sub(1); + } + /// Get the current token pub(crate) fn current_token(&self) -> &Token { self.tokens.get(self.position).unwrap_or(&Token::Eof) @@ -72,6 +92,13 @@ impl Parser { /// Parse a query, optionally checking for trailing tokens (false for subqueries) pub(crate) fn parse_query(&mut self, check_trailing: bool) -> DbResult { + self.check_depth()?; + let result = self.parse_query_inner(check_trailing); + self.leave_depth(); + result + } + + pub(crate) fn parse_query_inner(&mut self, check_trailing: bool) -> DbResult { // Parse optional CREATE STREAM or CREATE MATERIALIZED VIEW let (create_stream_clause, create_mv_clause) = if matches!(self.current_token(), Token::Create) { diff --git a/src/server/auth.rs b/src/server/auth.rs index eddc8f7..0871667 100644 --- a/src/server/auth.rs +++ b/src/server/auth.rs @@ -23,6 +23,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::RwLock; use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use subtle::ConstantTimeEq; /// Rate limiting configuration const MAX_LOGIN_ATTEMPTS: usize = 20; @@ -202,7 +203,14 @@ pub struct AuthService; impl AuthService { /// Initialize authentication system /// Checks if admin user exists, if not creates default - pub fn init(storage: &StorageEngine, replication_log: Option<&SyncLog>) -> Result<(), DbError> { + /// Security: Admin passwords are never logged to stdout/stderr. + /// Instead, they are saved to a file with restricted permissions (600). + /// The file path is shown in the console message to the operator. + pub fn init( + storage: &StorageEngine, + replication_log: Option<&SyncLog>, + data_dir: &str, + ) -> Result<(), DbError> { // Force JWT_SECRET initialization to show warning at startup if not configured let _ = JWT_SECRET.len(); @@ -302,10 +310,28 @@ impl AuthService { } if is_override { - tracing::warn!( + tracing::info!( "Admin user created with password from SOLIDB_ADMIN_PASSWORD env var" ); } else { + let password_file = format!("{}/.admin_password", data_dir); + #[cfg(unix)] + { + use std::io::Write; + use std::os::unix::fs::OpenOptionsExt; + // Atomically create the file with 0600 perms so the password + // never lands in a world-readable inode (SEC-082 TOCTOU). + let mut file = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .mode(0o600) + .open(&password_file)?; + writeln!(file, "{}", password)?; + } + #[cfg(not(unix))] + { + std::fs::write(&password_file, format!("{}\n", password))?; + } tracing::warn!( "╔══════════════════════════════════════════════════════════════════╗" ); @@ -318,7 +344,10 @@ impl AuthService { tracing::warn!( "║ Username: admin ║" ); - tracing::warn!("║ Password: {} ║", password); + tracing::warn!( + "║ ║" + ); + tracing::warn!("║ ⚠️ PASSWORD SAVED TO: {}", password_file); tracing::warn!( "║ ║" ); @@ -588,9 +617,11 @@ impl AuthService { roles: Option>, scoped_databases: Option>, ) -> Result { + // Refuse to mint a token if the system clock predates UNIX_EPOCH — + // returning unwrap_or_default() would silently emit an already-expired token. let expiration = SystemTime::now() .duration_since(UNIX_EPOCH) - .unwrap() + .map_err(|_| DbError::InternalError("System clock before UNIX epoch".to_string()))? .as_secs() as usize + 24 * 3600; // 24 hours @@ -616,7 +647,7 @@ impl AuthService { pub fn create_livequery_jwt() -> Result { let expiration = SystemTime::now() .duration_since(UNIX_EPOCH) - .unwrap() + .map_err(|_| DbError::InternalError("System clock before UNIX epoch".to_string()))? .as_secs() as usize + 2; // 2 seconds - ultra short lived for file downloads! @@ -737,17 +768,6 @@ impl AuthService { } if roles.is_empty() { - // TEMPORARY: If there's only one admin user and no roles assigned, - // automatically grant admin role. This will be removed later. - if let Ok(admins_coll) = db.get_collection(ADMIN_COLL) { - if admins_coll.count() == 1 { - tracing::info!( - "Single admin user '{}' detected - auto-granting admin role", - username - ); - return Some(vec!["admin".to_string()]); - } - } None } else { Some(roles) @@ -756,11 +776,11 @@ impl AuthService { } /// Constant-time comparison to prevent timing attacks +/// Uses subtle::ConstantTimeEq for proper constant-time comparison pub(crate) fn constant_time_eq(a: &[u8], b: &[u8]) -> bool { - if a.len() != b.len() { - return false; - } - a.iter().zip(b.iter()).fold(0, |acc, (x, y)| acc | (x ^ y)) == 0 + // subtle::ConstantTimeEq::ct_eq returns a Choice, we convert to bool + // Note: ct_eq on slices short-circuits on length mismatch (but that's still constant-time) + a.ct_eq(b).unwrap_u8() == 1 } /// Axum Middleware for Authentication @@ -790,10 +810,16 @@ pub async fn auth_middleware( .and_then(|h| h.to_str().ok()) .unwrap_or(""); - // Only bypass if secrets match and secret is not empty - if !cluster_secret.is_empty() - && constant_time_eq(cluster_secret.as_bytes(), provided_secret.as_bytes()) - { + // Fail closed: if no keyfile configured, reject internal requests + if cluster_secret.is_empty() { + tracing::warn!( + "CLUSTER AUTH REJECTED: Internal request but no keyfile configured on this node." + ); + return Err(StatusCode::SERVICE_UNAVAILABLE); + } + + // Only bypass if secrets match + if constant_time_eq(cluster_secret.as_bytes(), provided_secret.as_bytes()) { let claims = Claims { sub: "cluster-internal".to_string(), exp: usize::MAX, @@ -843,6 +869,17 @@ pub async fn auth_middleware( if let Some(token) = header.strip_prefix("Bearer ") { match AuthService::validate_token(token) { Ok(claims) => { + if claims.livequery == Some(true) { + let path = req.uri().path(); + let allowed_livequery_paths = ["/_api/ws/changefeed", "/_api/livequery"]; + if !allowed_livequery_paths.iter().any(|p| path.starts_with(p)) { + tracing::warn!( + "livequery token used on non-whitelisted path: {}", + path + ); + return Err(StatusCode::FORBIDDEN); + } + } req.extensions_mut().insert(claims); return Ok(next.run(req).await); } @@ -1016,7 +1053,31 @@ pub async fn permissive_auth_middleware( } } - // No auth header present - proceed as anonymous (no claims injected) + // No auth header present - proceed as anonymous (no claims injected). + // Emit a structured audit event at WARN level so anonymous script access + // is captured by default log filters and not lost at DEBUG. + let method = req.method().clone(); + let path = req.uri().path().to_string(); + let peer = req + .headers() + .get("x-forwarded-for") + .and_then(|v| v.to_str().ok()) + .map(|s| s.split(',').next().unwrap_or(s).trim().to_string()) + .or_else(|| { + req.headers() + .get("x-real-ip") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()) + }) + .unwrap_or_else(|| "unknown".to_string()); + tracing::warn!( + target: "audit", + event = "anonymous_access", + method = %method, + path = %path, + peer = %peer, + "permissive_auth: anonymous request to script endpoint" + ); Ok(next.run(req).await) } diff --git a/src/server/authorization.rs b/src/server/authorization.rs index a8f0a17..93b15b6 100644 --- a/src/server/authorization.rs +++ b/src/server/authorization.rs @@ -279,11 +279,7 @@ impl AuthorizationService { // Get role names from claims let role_names = claims.roles.clone().unwrap_or_default(); if role_names.is_empty() { - // No roles assigned - grant admin to all authenticated users for backward compatibility - // This will only happen during migration period - let mut permissions = HashSet::new(); - permissions.insert(Permission::global_admin()); - return Ok(permissions); + return Ok(HashSet::new()); } // Load roles from DB or cache diff --git a/src/server/blob_handlers.rs b/src/server/blob_handlers.rs index 14b0b2a..935cdbe 100644 --- a/src/server/blob_handlers.rs +++ b/src/server/blob_handlers.rs @@ -382,6 +382,7 @@ pub(crate) async fn distribute_blob_chunks_across_cluster( // For each chunk, replicate to multiple nodes for redundancy // We'll use a simple round-robin distribution with replication factor of min(3, node_count) let replication_factor = std::cmp::min(3, node_addresses.len()); + let cluster_secret = coordinator.cluster_secret(); for (chunk_idx, chunk_data) in chunks { // Select target nodes for this chunk using round-robin @@ -405,7 +406,7 @@ pub(crate) async fn distribute_blob_chunks_across_cluster( blob_key, &[(*chunk_idx, chunk_data.clone())], None, // No metadata for individual chunks - "", // No auth needed for internal replication + &cluster_secret, ) .await { diff --git a/src/server/cluster_handlers.rs b/src/server/cluster_handlers.rs index abd6eae..895c224 100644 --- a/src/server/cluster_handlers.rs +++ b/src/server/cluster_handlers.rs @@ -19,6 +19,7 @@ use serde::{Deserialize, Serialize}; use crate::cluster::stats::NodeBasicStats; use crate::error::DbError; +use crate::server::authorization::{AuthorizationService, PermissionAction}; use super::handlers::{AppState, AuthParams}; @@ -513,8 +514,10 @@ pub struct RemoveNodeResponse { /// Remove a node from the cluster and trigger rebalancing pub async fn cluster_remove_node( State(state): State, + axum::extract::Extension(claims): axum::extract::Extension, Json(req): Json, ) -> Result, DbError> { + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; let node_addr = req.node_address; // Get the shard coordinator @@ -547,7 +550,9 @@ pub struct RebalanceResponse { /// Trigger cluster rebalancing pub async fn cluster_rebalance( State(state): State, + axum::extract::Extension(claims): axum::extract::Extension, ) -> Result, DbError> { + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; let coordinator = state.shard_coordinator.as_ref().ok_or_else(|| { DbError::InternalError("Shard coordinator not available - not in cluster mode".to_string()) })?; @@ -574,7 +579,15 @@ pub async fn cluster_cleanup( .and_then(|v| v.to_str().ok()) .unwrap_or(""); - if !secret.is_empty() && request_secret != secret { + if secret.is_empty() { + return Err(DbError::InternalError( + "Cluster keyfile not configured".to_string(), + )); + } + if !crate::server::auth::constant_time_eq( + request_secret.as_bytes(), + secret.as_bytes(), + ) { return Err(DbError::BadRequest("Invalid cluster secret".to_string())); } @@ -625,7 +638,15 @@ pub async fn cluster_reshard( .and_then(|v| v.to_str().ok()) .unwrap_or(""); - if !secret.is_empty() && request_secret != secret { + if secret.is_empty() { + return Err(DbError::InternalError( + "Cluster keyfile not configured".to_string(), + )); + } + if !crate::server::auth::constant_time_eq( + request_secret.as_bytes(), + secret.as_bytes(), + ) { return Err(DbError::BadRequest("Invalid cluster secret".to_string())); } diff --git a/src/server/handlers/auth.rs b/src/server/handlers/auth.rs index 8c70cd8..4abb4e1 100644 --- a/src/server/handlers/auth.rs +++ b/src/server/handlers/auth.rs @@ -1,8 +1,10 @@ use super::system::AppState; use crate::error::DbError; +use crate::server::auth::Claims; +use crate::server::authorization::{AuthorizationService, PermissionAction}; use crate::sync::{LogEntry, Operation}; use axum::{ - extract::{Path, State}, + extract::{Extension, Path, State}, http::HeaderMap, response::Json, }; @@ -139,8 +141,10 @@ pub async fn change_password_handler( /// Handler for creating a new API key pub async fn create_api_key_handler( State(state): State, + Extension(claims): Extension, Json(req): Json, ) -> Result, DbError> { + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; // Generate key let (raw_key, key_hash) = crate::server::auth::AuthService::generate_api_key(); @@ -214,7 +218,9 @@ pub async fn create_api_key_handler( /// Handler for listing API keys (without the actual keys) pub async fn list_api_keys_handler( State(state): State, + Extension(claims): Extension, ) -> Result, DbError> { + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; let db = state.storage.get_database("_system")?; // Return empty if collection doesn't exist @@ -246,8 +252,10 @@ pub async fn list_api_keys_handler( /// Handler for deleting an API key pub async fn delete_api_key_handler( State(state): State, + Extension(claims): Extension, Path(key_id): Path, ) -> Result, DbError> { + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; let db = state.storage.get_database("_system")?; let collection = db.get_collection(crate::server::auth::API_KEYS_COLL)?; @@ -307,6 +315,7 @@ pub async fn login_handler( crate::server::auth::AuthService::init( &state.storage, state.replication_log.as_deref(), + state.storage.data_dir(), )?; db.get_collection("_admins")? } @@ -316,7 +325,11 @@ pub async fn login_handler( // 3. Check if collection is empty (also create default admin) if collection.count() == 0 { tracing::warn!("_admins collection empty, creating default admin..."); - crate::server::auth::AuthService::init(&state.storage, state.replication_log.as_deref())?; + crate::server::auth::AuthService::init( + &state.storage, + state.replication_log.as_deref(), + state.storage.data_dir(), + )?; } // 4. Get user document @@ -342,8 +355,10 @@ pub async fn login_handler( return Err(DbError::BadRequest("Invalid credentials".to_string())); } - // 7. Generate Token - let token = crate::server::auth::AuthService::create_jwt(&user.username)?; + // 7. Generate Token with roles + let roles = crate::server::auth::AuthService::get_user_roles(&state.storage, &user.username); + let token = + crate::server::auth::AuthService::create_jwt_with_roles(&user.username, roles, None)?; Ok(Json(LoginResponse { token })) } diff --git a/src/server/handlers/blob_upload.rs b/src/server/handlers/blob_upload.rs index 4fa5e7b..44a6529 100644 --- a/src/server/handlers/blob_upload.rs +++ b/src/server/handlers/blob_upload.rs @@ -59,7 +59,7 @@ pub async fn create_upload_session( body.mime_type, body.total_size, body.chunk_size, - ); + )?; Ok(Json(serde_json::json!({ "upload_id": info.upload_id, diff --git a/src/server/handlers/blobs.rs b/src/server/handlers/blobs.rs index d9a19f1..97ea18d 100644 --- a/src/server/handlers/blobs.rs +++ b/src/server/handlers/blobs.rs @@ -1,4 +1,4 @@ -use super::system::AppState; +use super::system::{sanitize_filename, AppState}; use crate::{ error::DbError, storage::http_client::get_http_client, @@ -102,7 +102,10 @@ pub async fn upload_blob( let mut metadata = serde_json::Map::new(); metadata.insert("_key".to_string(), Value::String(blob_key.clone())); if let Some(fn_str) = file_name { - metadata.insert("name".to_string(), Value::String(fn_str)); + metadata.insert( + "name".to_string(), + Value::String(sanitize_filename(&fn_str)), + ); } if let Some(mt_str) = mime_type { metadata.insert("type".to_string(), Value::String(mt_str)); @@ -313,9 +316,10 @@ pub async fn download_blob( builder = builder.header("Content-Length", size); } if let Some(name) = file_name { + let safe_name = sanitize_filename(&name); builder = builder.header( "Content-Disposition", - format!("attachment; filename=\"{}\"", name), + format!("attachment; filename=\"{}\"", safe_name), ); } @@ -350,6 +354,7 @@ pub async fn distribute_blob_chunks_across_cluster( // For each chunk, replicate to multiple nodes for redundancy // We'll use a simple round-robin distribution with replication factor of min(3, node_count) let replication_factor = std::cmp::min(3, node_addresses.len()); + let cluster_secret = coordinator.cluster_secret(); for (chunk_idx, chunk_data) in chunks { // Select target nodes for this chunk using round-robin @@ -373,7 +378,7 @@ pub async fn distribute_blob_chunks_across_cluster( blob_key, &[(*chunk_idx, chunk_data.clone())], None, // No metadata for individual chunks - "", // No auth needed for internal replication + &cluster_secret, ) .await { diff --git a/src/server/handlers/cluster.rs b/src/server/handlers/cluster.rs index c734b80..4048ddb 100644 --- a/src/server/handlers/cluster.rs +++ b/src/server/handlers/cluster.rs @@ -468,7 +468,12 @@ pub async fn cluster_cleanup( .and_then(|v| v.to_str().ok()) .unwrap_or(""); - if !secret.is_empty() && request_secret != secret { + if secret.is_empty() { + return Err(DbError::InternalError( + "Cluster keyfile not configured".to_string(), + )); + } + if !crate::server::auth::constant_time_eq(request_secret.as_bytes(), secret.as_bytes()) { return Err(DbError::BadRequest("Invalid cluster secret".to_string())); } @@ -519,7 +524,12 @@ pub async fn cluster_reshard( .and_then(|v| v.to_str().ok()) .unwrap_or(""); - if !secret.is_empty() && request_secret != secret { + if secret.is_empty() { + return Err(DbError::InternalError( + "Cluster keyfile not configured".to_string(), + )); + } + if !crate::server::auth::constant_time_eq(request_secret.as_bytes(), secret.as_bytes()) { return Err(DbError::BadRequest("Invalid cluster secret".to_string())); } diff --git a/src/server/handlers/databases.rs b/src/server/handlers/databases.rs index a061316..831ae7b 100644 --- a/src/server/handlers/databases.rs +++ b/src/server/handlers/databases.rs @@ -1,8 +1,9 @@ use super::system::AppState; use crate::error::DbError; +use crate::server::authorization::{AuthorizationService, PermissionAction}; use crate::sync::{LogEntry, Operation}; use axum::{ - extract::{Path, State}, + extract::{Extension, Path, State}, http::StatusCode, response::Json, }; @@ -26,8 +27,15 @@ pub struct ListDatabasesResponse { pub async fn create_database( State(state): State, + Extension(claims): Extension, Json(req): Json, ) -> Result, DbError> { + if req.name.is_empty() { + return Err(DbError::BadRequest( + "Database name cannot be empty".to_string(), + )); + } + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; state.storage.create_database(req.name.clone())?; // Record to replication log @@ -89,8 +97,10 @@ pub async fn list_databases(State(state): State) -> Json, + Extension(claims): Extension, Path(name): Path, ) -> Result { + AuthorizationService::check_permission(&claims, &state, PermissionAction::Admin, None).await?; state.storage.delete_database(&name)?; // Record to replication log diff --git a/src/server/handlers/import_export.rs b/src/server/handlers/import_export.rs index 4c1ead1..9eb93f6 100644 --- a/src/server/handlers/import_export.rs +++ b/src/server/handlers/import_export.rs @@ -11,6 +11,8 @@ use base64::{engine::general_purpose, Engine as _}; use futures::stream::StreamExt; use serde_json::Value; +const MAX_BLOB_CHUNK_SIZE: usize = 16 * 1024 * 1024; // 16 MiB + pub async fn export_collection( State(state): State, Path((db_name, coll_name)): Path<(String, String)>, @@ -304,6 +306,13 @@ pub async fn import_collection( if let Some(data_len) = doc.get("_data_length").and_then(|v| v.as_u64()) { + if data_len > MAX_BLOB_CHUNK_SIZE as u64 { + return Err(DbError::BadRequest(format!( + "Blob chunk size {} exceeds maximum allowed size of {}", + data_len, + MAX_BLOB_CHUNK_SIZE + ))); + } let required_len = data_len as usize; let total_required = required_len + 1; // +1 for trailing newline diff --git a/src/server/handlers/query.rs b/src/server/handlers/query.rs index 5ec9c6a..8a18a5f 100644 --- a/src/server/handlers/query.rs +++ b/src/server/handlers/query.rs @@ -21,9 +21,11 @@ use std::sync::Arc; const QUERY_TIMEOUT_SECS: u64 = 30; /// Default slow query threshold in milliseconds (100ms) -/// Queries taking longer than this will be logged to _slow_queries collection const SLOW_QUERY_THRESHOLD_MS: f64 = 100.0; +/// Maximum batch size to prevent memory exhaustion +const MAX_BATCH_SIZE: usize = 10_000; + // ==================== Structs ==================== #[derive(Debug, Deserialize)] @@ -187,6 +189,10 @@ pub async fn execute_query( .query_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + // Auth is enforced by `auth_middleware` on the route layer (routes.rs). + // No per-handler token validation needed here — would only drift from the + // middleware (API keys, Basic auth, cluster-internal bypass). + // Check for transaction context if let Some(tx_id) = get_transaction_id(&headers) { // Execute transactional SDBQL query @@ -534,7 +540,7 @@ pub async fn execute_query( } } - let batch_size = req.batch_size; + let batch_size = req.batch_size.min(MAX_BATCH_SIZE); // Clone db_name and query text for slow query logging (before they're moved) let db_name_for_logging = db_name.clone(); @@ -685,23 +691,44 @@ pub async fn explain_query( let prepared = crate::sdbql::get_prepared_statement_cache() .parse_if_needed(&req.query) .await?; - let query = prepared.query.as_ref(); + let query = (*prepared.query).clone(); + let bind_vars = req.bind_vars.clone(); + let storage = state.storage.clone(); + let shard_coordinator = state.shard_coordinator.clone(); + let is_scatter_gather = headers.contains_key("X-Scatter-Gather"); + + let explain = { + let storage = storage.clone(); + match tokio::time::timeout( + std::time::Duration::from_secs(QUERY_TIMEOUT_SECS), + tokio::task::spawn_blocking(move || { + let mut executor = if bind_vars.is_empty() { + QueryExecutor::with_database(&storage, db_name) + } else { + QueryExecutor::with_database_and_bind_vars(&storage, db_name, bind_vars) + }; - // explain() is fast - no need for spawn_blocking - let mut executor = if req.bind_vars.is_empty() { - QueryExecutor::with_database(&state.storage, db_name) - } else { - QueryExecutor::with_database_and_bind_vars(&state.storage, db_name, req.bind_vars) - }; + if !is_scatter_gather { + if let Some(coordinator) = shard_coordinator { + executor = executor.with_shard_coordinator(coordinator); + } + } - // Inject shard coordinator for explain (if not already a sub-query) - if !headers.contains_key("X-Scatter-Gather") { - if let Some(coordinator) = state.shard_coordinator.clone() { - executor = executor.with_shard_coordinator(coordinator); + executor.explain(&query) + }), + ) + .await + { + Ok(join_result) => join_result + .map_err(|e| DbError::InternalError(format!("Task join error: {}", e)))??, + Err(_) => { + return Err(DbError::BadRequest(format!( + "Explain timeout: exceeded {} seconds", + QUERY_TIMEOUT_SECS + ))) + } } - } - - let explain = executor.explain(query)?; + }; Ok(Json(explain)) } diff --git a/src/server/handlers/websocket.rs b/src/server/handlers/websocket.rs index 2af88d8..210e762 100644 --- a/src/server/handlers/websocket.rs +++ b/src/server/handlers/websocket.rs @@ -15,13 +15,70 @@ use futures::{SinkExt, StreamExt}; use serde::Deserialize; use std::sync::Arc; +/// Maximum WebSocket message size (1 MB) - prevents OOM attacks +const MAX_WS_MESSAGE_SIZE: usize = 1024 * 1024; + +/// Validate the `Origin` header against `SOLIDB_CORS_ALLOWED_ORIGINS`. +/// Mirrors the HTTP CORS policy in `routes.rs`: empty allowlist = deny any +/// cross-origin request. Non-browser clients (no `Origin` header) are allowed. +/// Returns Ok(()) when the request may proceed, Err(()) to reject with 403. +fn validate_ws_origin(headers: &HeaderMap) -> Result<(), ()> { + let origin = match headers.get("origin").and_then(|o| o.to_str().ok()) { + Some(o) => o, + None => return Ok(()), // No Origin header — non-browser client. + }; + let allowed_raw = std::env::var("SOLIDB_CORS_ALLOWED_ORIGINS").unwrap_or_default(); + if allowed_raw == "*" { + return Ok(()); + } + if allowed_raw.is_empty() { + tracing::warn!( + "WebSocket: rejecting Origin '{}' — SOLIDB_CORS_ALLOWED_ORIGINS not set", + origin + ); + return Err(()); + } + let allowed = allowed_raw + .split(',') + .map(str::trim) + .any(|a| a == origin || a == "*"); + if allowed { + Ok(()) + } else { + tracing::warn!("WebSocket: rejecting disallowed Origin '{}'", origin); + Err(()) + } +} + +fn forbidden_response() -> Response { + Response::builder() + .status(StatusCode::FORBIDDEN) + .body(Body::empty()) + .expect("Valid status code should not fail") + .into_response() +} + // ==================== Cluster Status WebSocket ==================== /// WebSocket handler for real-time cluster status updates pub async fn cluster_status_ws( ws: WebSocketUpgrade, + AxumQuery(params): AxumQuery, State(state): State, -) -> impl IntoResponse { + headers: HeaderMap, +) -> Response { + if crate::server::auth::AuthService::validate_token(¶ms.token).is_err() { + return Response::builder() + .status(StatusCode::UNAUTHORIZED) + .body(Body::empty()) + .expect("Valid status code should not fail") + .into_response(); + } + + if validate_ws_origin(&headers).is_err() { + return forbidden_response(); + } + ws.on_upgrade(|socket| handle_cluster_ws(socket, state)) } @@ -76,6 +133,7 @@ pub async fn monitor_ws_handler( ws: WebSocketUpgrade, AxumQuery(params): AxumQuery, State(state): State, + headers: HeaderMap, ) -> Response { if crate::server::auth::AuthService::validate_token(¶ms.token).is_err() { return Response::builder() @@ -85,6 +143,10 @@ pub async fn monitor_ws_handler( .into_response(); } + if validate_ws_origin(&headers).is_err() { + return forbidden_response(); + } + ws.on_upgrade(|socket| handle_monitor_socket(socket, state)) } @@ -193,6 +255,10 @@ pub async fn ws_changefeed_handler( .into_response(); } + if validate_ws_origin(&headers).is_err() { + return forbidden_response(); + } + // Check if HTMX mode is requested let use_htmx = params.htmx.map(|s| s == "true").unwrap_or(false); @@ -235,6 +301,33 @@ async fn handle_socket(socket: WebSocket, state: AppState, use_htmx: bool) { // Main Receiver Loop while let Some(Ok(msg)) = receiver.next().await { + // Security: Check message size to prevent OOM attacks + let msg_len = match &msg { + Message::Text(text) => text.len(), + Message::Binary(data) => data.len(), + Message::Ping(data) => data.len(), + Message::Pong(data) => data.len(), + _ => 0, + }; + + if msg_len > MAX_WS_MESSAGE_SIZE { + tracing::warn!( + "[WS] Message size {} exceeds limit {}, closing connection", + msg_len, + MAX_WS_MESSAGE_SIZE + ); + let _ = tx + .send(Message::Text( + serde_json::json!({ + "error": "Message too large" + }) + .to_string() + .into(), + )) + .await; + break; + } + match msg { Message::Text(text) => { let req_result = serde_json::from_str::(&text); diff --git a/src/server/queue_handlers.rs b/src/server/queue_handlers.rs index b5ee78a..8ac0e5a 100644 --- a/src/server/queue_handlers.rs +++ b/src/server/queue_handlers.rs @@ -266,6 +266,19 @@ pub async fn create_cron_job_handler( .unwrap() .as_secs(); + let script_path = req.script; + if script_path.len() > 512 { + return Err(DbError::BadRequest( + "Script path exceeds maximum length of 512 characters".to_string(), + )); + } + let re = regex::Regex::new(r"^[A-Za-z0-9_/\-.]+$").unwrap(); + if !re.is_match(&script_path) { + return Err(DbError::BadRequest( + "Script path contains invalid characters".to_string(), + )); + } + let cron_job = crate::queue::CronJob { id: uuid::Uuid::new_v4().to_string(), revision: None, @@ -274,7 +287,7 @@ pub async fn create_cron_job_handler( queue: req.queue.unwrap_or_else(|| "default".to_string()), priority: req.priority.unwrap_or(0), max_retries: req.max_retries.unwrap_or(3), - script_path: req.script, + script_path, params: req.params.unwrap_or(JsonValue::Null), last_run: None, next_run: None, // Will be calculated by worker @@ -311,6 +324,17 @@ pub async fn update_cron_job_handler( cron_job.next_run = None; } if let Some(script) = req.script { + if script.len() > 512 { + return Err(DbError::BadRequest( + "Script path exceeds maximum length of 512 characters".to_string(), + )); + } + let re = regex::Regex::new(r"^[A-Za-z0-9_/\-.]+$").unwrap(); + if !re.is_match(&script) { + return Err(DbError::BadRequest( + "Script path contains invalid characters".to_string(), + )); + } cron_job.script_path = script; } if let Some(params) = req.params { @@ -355,13 +379,26 @@ pub async fn enqueue_job_handler( .unwrap() .as_secs(); + let script_path = req.script; + if script_path.len() > 512 { + return Err(DbError::BadRequest( + "Script path exceeds maximum length of 512 characters".to_string(), + )); + } + let re = regex::Regex::new(r"^[A-Za-z0-9_/\-.]+$").unwrap(); + if !re.is_match(&script_path) { + return Err(DbError::BadRequest( + "Script path contains invalid characters".to_string(), + )); + } + let job_id = uuid::Uuid::new_v4().to_string(); let job = Job { id: job_id.clone(), revision: None, queue: queue_name, priority: req.priority.unwrap_or(0), - script_path: req.script, + script_path, params: req.params.unwrap_or(JsonValue::Null), status: JobStatus::Pending, retry_count: 0, diff --git a/src/server/routes.rs b/src/server/routes.rs index 275e8f3..aefddda 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -11,9 +11,70 @@ use axum::{ use std::sync::Arc; use std::time::Duration; use tower_http::compression::CompressionLayer; -use tower_http::cors::{Any, CorsLayer}; +use tower_http::cors::{AllowHeaders, CorsLayer}; use tower_http::trace::TraceLayer; +/// Parse CORS allowed origins from environment variable. +/// Defaults to restrictive (no cross-origin) if not set. +/// Security: Empty string or wildcard in production is discouraged. +fn get_cors_allowed_origins() -> Vec { + let env_value = std::env::var("SOLIDB_CORS_ALLOWED_ORIGINS").unwrap_or_default(); + if env_value.is_empty() { + return vec![]; + } + if env_value == "*" || env_value == "*:*" { + tracing::warn!( + "CORS configured with wildcard '*'. This allows ANY origin. \ + Set SOLIDB_CORS_ALLOWED_ORIGINS to specific origins in production." + ); + return vec!["*".to_string()]; + } + env_value + .split(',') + .filter_map(|origin| { + let origin = origin.trim(); + if origin.is_empty() || !is_valid_origin(origin) { + if !origin.is_empty() { + tracing::warn!("Invalid CORS origin '{}' - skipping", origin); + } + return None; + } + Some(origin.to_string()) + }) + .collect() +} + +/// An origin must look like `scheme://host[:port]` with no path/query/fragment. +/// `axum::http::Uri::parse` accepts paths and bare strings, which CORS shouldn't. +fn is_valid_origin(s: &str) -> bool { + let parsed = match url::Url::parse(s) { + Ok(u) => u, + Err(_) => return false, + }; + if !matches!(parsed.scheme(), "http" | "https") { + return false; + } + if parsed.host_str().is_none() { + return false; + } + // Must be exactly an origin: empty path, no query, no fragment. + if parsed.path() != "/" && !parsed.path().is_empty() { + return false; + } + if parsed.query().is_some() || parsed.fragment().is_some() { + return false; + } + // url::Url stringifies origins with a trailing "/" — accept either form + // but reject inputs that contained extra structure beyond host[:port]. + let canonical_no_slash = format!( + "{}://{}{}", + parsed.scheme(), + parsed.host_str().unwrap(), + parsed.port().map(|p| format!(":{}", p)).unwrap_or_default() + ); + s == canonical_no_slash || s == format!("{}/", canonical_no_slash) +} + /// Middleware to count incoming requests async fn request_counter_middleware( axum::extract::State(state): axum::extract::State, @@ -75,7 +136,12 @@ pub fn create_router( // The previous logic checked cluster_config.peers. // New ClusterManager handles joining. // For now we pass replication_log to auth init. - if let Err(e) = crate::server::auth::AuthService::init(&storage, replication_log.as_deref()) { + // Security: data_dir is passed to save admin password to a file instead of logging it + if let Err(e) = crate::server::auth::AuthService::init( + &storage, + replication_log.as_deref(), + storage.data_dir(), + ) { tracing::error!("Failed to initialize authentication: {}", e); } else { tracing::info!("Authentication initialized successfully"); @@ -1036,9 +1102,20 @@ pub fn create_router( .no_br() .no_deflate(), ) - .layer( - CorsLayer::new() - .allow_origin(Any) + .layer({ + use axum::http::header; + use tower_http::cors::AllowOrigin; + + let allowed_origins = get_cors_allowed_origins(); + let is_wildcard = allowed_origins == ["*"]; + + // Browsers reject `Access-Control-Allow-Origin: *` combined with + // `Access-Control-Allow-Credentials: true`. Drop credentials in + // wildcard mode rather than emit a config that browsers ignore. + // tower-http likewise rejects `Access-Control-Allow-Headers: *` + // alongside credentials, so use an explicit allowlist when we + // need to emit credentials. + let mut cors = CorsLayer::new() .allow_methods([ Method::GET, Method::POST, @@ -1046,6 +1123,51 @@ pub fn create_router( Method::DELETE, Method::OPTIONS, ]) - .allow_headers(Any), - ) + .expose_headers([header::ACCEPT, header::CONTENT_TYPE]) + .max_age(Duration::from_secs(86400)); + if is_wildcard { + cors = cors.allow_headers(AllowHeaders::any()); + } else { + cors = cors + .allow_headers([ + header::AUTHORIZATION, + header::CONTENT_TYPE, + header::ACCEPT, + header::HeaderName::from_static("x-api-key"), + header::HeaderName::from_static("x-cluster-secret"), + header::HeaderName::from_static("x-shard-direct"), + header::HeaderName::from_static("x-scatter-gather"), + ]) + .allow_credentials(true); + } + + if allowed_origins.is_empty() { + // No explicit origins — deny any cross-origin request. + cors = cors.allow_origin(AllowOrigin::predicate(|_, _| false)); + } else if is_wildcard { + tracing::warn!("CORS wildcard mode - allowing any origin"); + cors = cors.allow_origin(AllowOrigin::any()); + } else { + let origins: Vec = allowed_origins + .iter() + .filter_map(|o| { + o.parse().ok().or_else(|| { + tracing::warn!("Failed to parse CORS origin '{}' - skipping", o); + None + }) + }) + .collect(); + if origins.is_empty() { + // Every configured origin was unparseable: deny rather than + // silently fall back to wildcard. + tracing::warn!( + "All configured CORS origins failed to parse - denying cross-origin" + ); + cors = cors.allow_origin(AllowOrigin::predicate(|_, _| false)); + } else { + cors = cors.allow_origin(AllowOrigin::list(origins)); + } + } + cors + }) } diff --git a/src/server/script_handlers.rs b/src/server/script_handlers.rs index ccdbb5d..f185b7f 100644 --- a/src/server/script_handlers.rs +++ b/src/server/script_handlers.rs @@ -107,21 +107,13 @@ pub async fn create_script_handler( let collection = db.get_collection(SCRIPTS_COLLECTION)?; // Generate unique ID based on db/service/collection/path + let path_key = sanitize_path_to_key(&req.path).ok_or_else(|| { + DbError::BadRequest("Script path may not contain parent-dir traversal".to_string()) + })?; let id = if let Some(col) = &req.collection { - format!( - "{}_{}_{}_{}", - db_name, - req.service, - col, - sanitize_path_to_key(&req.path) - ) + format!("{}_{}_{}_{}", db_name, req.service, col, path_key) } else { - format!( - "{}_{}_{}", - db_name, - req.service, - sanitize_path_to_key(&req.path) - ) + format!("{}_{}_{}", db_name, req.service, path_key) }; let now = chrono::Utc::now().to_rfc3339(); @@ -371,11 +363,21 @@ pub async fn get_script_stats_handler( // ==================== Helper Functions ==================== -/// Convert a URL path to a valid document key -fn sanitize_path_to_key(path: &str) -> String { - path.replace(['/', ':', '*'], "_") - .trim_matches('_') - .to_string() +/// Convert a URL path to a valid document key. +/// Returns `None` when the path contains parent-dir traversal so callers +/// can reject with a 400 instead of producing a colliding empty/degenerate key. +fn sanitize_path_to_key(path: &str) -> Option { + let p = std::path::Path::new(path); + if p.components() + .any(|c| matches!(c, std::path::Component::ParentDir)) + { + return None; + } + Some( + path.replace(['/', ':', '*'], "_") + .trim_matches('_') + .to_string(), + ) } /// Check if a script path pattern matches the actual path @@ -469,9 +471,25 @@ pub struct ReplError { pub async fn repl_eval_handler( State(state): State, Path(db_name): Path, - axum::Extension(_claims): axum::Extension, + axum::Extension(claims): axum::Extension, Json(req): Json, ) -> Result, DbError> { + // Reject livequery tokens - they have limited privileges + if claims.livequery == Some(true) { + return Err(DbError::Forbidden( + "REPL endpoint is not accessible with livequery tokens".to_string(), + )); + } + + // Require admin permission on the target database + crate::server::authorization::AuthorizationService::check_permission( + &claims, + &state, + crate::server::authorization::PermissionAction::Write, + Some(&db_name), + ) + .await?; + use std::time::Instant; let start = Instant::now(); @@ -1273,8 +1291,16 @@ mod tests { #[test] fn test_sanitize_path() { - assert_eq!(sanitize_path_to_key("hello"), "hello"); - assert_eq!(sanitize_path_to_key("users/:id"), "users__id"); - assert_eq!(sanitize_path_to_key("/api/test"), "api_test"); + assert_eq!(sanitize_path_to_key("hello").as_deref(), Some("hello")); + assert_eq!( + sanitize_path_to_key("users/:id").as_deref(), + Some("users__id") + ); + assert_eq!( + sanitize_path_to_key("/api/test").as_deref(), + Some("api_test") + ); + assert_eq!(sanitize_path_to_key("foo/../bar"), None); + assert_eq!(sanitize_path_to_key("../etc/passwd"), None); } } diff --git a/src/server/upload_session.rs b/src/server/upload_session.rs index 1dae3db..455d24f 100644 --- a/src/server/upload_session.rs +++ b/src/server/upload_session.rs @@ -6,6 +6,15 @@ use uuid::Uuid; /// Default chunk size: 1MB const DEFAULT_CHUNK_SIZE: u32 = 1024 * 1024; +/// Maximum total upload size: 10 GiB +const MAX_TOTAL_SIZE: u64 = 10 * 1024 * 1024 * 1024; + +/// Minimum chunk size: 64 KiB +const MIN_CHUNK_SIZE: u32 = 64 * 1024; + +/// Maximum chunk size: 16 MiB +const MAX_CHUNK_SIZE: u32 = 16 * 1024 * 1024; + /// Stores in-progress resumable blob upload sessions #[derive(Clone)] pub struct UploadSessionStore { @@ -46,10 +55,30 @@ impl UploadSessionStore { mime_type: Option, total_size: u64, chunk_size: Option, - ) -> UploadSessionInfo { + ) -> Result { + if total_size > MAX_TOTAL_SIZE { + return Err(crate::error::DbError::BadRequest(format!( + "total_size {} exceeds maximum allowed size of {}", + total_size, MAX_TOTAL_SIZE + ))); + } + + let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE); + if chunk_size < MIN_CHUNK_SIZE { + return Err(crate::error::DbError::BadRequest(format!( + "chunk_size {} is below minimum allowed size of {}", + chunk_size, MIN_CHUNK_SIZE + ))); + } + if chunk_size > MAX_CHUNK_SIZE { + return Err(crate::error::DbError::BadRequest(format!( + "chunk_size {} exceeds maximum allowed size of {}", + chunk_size, MAX_CHUNK_SIZE + ))); + } + let upload_id = Uuid::new_v7(uuid::Timestamp::now(uuid::NoContext)).to_string(); let blob_key = Uuid::new_v7(uuid::Timestamp::now(uuid::NoContext)).to_string(); - let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE); let total_chunks = if total_size == 0 { 0 } else { @@ -74,12 +103,12 @@ impl UploadSessionStore { self.sessions.insert(upload_id.clone(), session); - UploadSessionInfo { + Ok(UploadSessionInfo { upload_id, blob_key, chunk_size, total_chunks, - } + }) } /// Get a reference to a session for reading @@ -163,14 +192,16 @@ mod tests { #[test] fn test_create_session() { let store = UploadSessionStore::new(Duration::from_secs(300)); - let info = store.create( - "testdb".into(), - "blobs".into(), - Some("test.bin".into()), - Some("application/octet-stream".into()), - 1024 * 1024 * 5, // 5MB - None, - ); + let info = store + .create( + "testdb".into(), + "blobs".into(), + Some("test.bin".into()), + Some("application/octet-stream".into()), + 1024 * 1024 * 5, // 5MB + None, + ) + .unwrap(); assert_eq!(info.chunk_size, DEFAULT_CHUNK_SIZE); assert_eq!(info.total_chunks, 5); @@ -180,7 +211,9 @@ mod tests { #[test] fn test_session_expiration() { let store = UploadSessionStore::new(Duration::from_millis(50)); - let info = store.create("db".into(), "col".into(), None, None, 1024, None); + let info = store + .create("db".into(), "col".into(), None, None, 1024, None) + .unwrap(); std::thread::sleep(Duration::from_millis(100)); @@ -190,7 +223,9 @@ mod tests { #[test] fn test_remove_session() { let store = UploadSessionStore::new(Duration::from_secs(300)); - let info = store.create("db".into(), "col".into(), None, None, 1024, None); + let info = store + .create("db".into(), "col".into(), None, None, 1024, None) + .unwrap(); assert!(store.remove(&info.upload_id).is_some()); assert!(store.get(&info.upload_id).is_none()); @@ -201,22 +236,28 @@ mod tests { let store = UploadSessionStore::new(Duration::from_secs(300)); // Exact multiple - let info = store.create("db".into(), "c".into(), None, None, 3 * 1024 * 1024, None); + let info = store + .create("db".into(), "c".into(), None, None, 3 * 1024 * 1024, None) + .unwrap(); assert_eq!(info.total_chunks, 3); // Not exact - rounds up - let info = store.create( - "db".into(), - "c".into(), - None, - None, - 3 * 1024 * 1024 + 1, - None, - ); + let info = store + .create( + "db".into(), + "c".into(), + None, + None, + 3 * 1024 * 1024 + 1, + None, + ) + .unwrap(); assert_eq!(info.total_chunks, 4); // Zero size - let info = store.create("db".into(), "c".into(), None, None, 0, None); + let info = store + .create("db".into(), "c".into(), None, None, 0, None) + .unwrap(); assert_eq!(info.total_chunks, 0); } } diff --git a/src/sync/blob_replication.rs b/src/sync/blob_replication.rs index 6e06f7b..ab6a6fb 100644 --- a/src/sync/blob_replication.rs +++ b/src/sync/blob_replication.rs @@ -1,5 +1,6 @@ use axum::{ extract::{Multipart, Path, State}, + http::HeaderMap, response::Json, }; use serde_json::Value; @@ -7,12 +8,40 @@ use serde_json::Value; use crate::error::DbError; use crate::server::handlers::AppState; +/// Verify the X-Cluster-Secret header matches the configured keyfile. +/// +/// These `/_internal/blob/*` routes are mounted on the public router with no +/// user-auth middleware, so they MUST authenticate inter-node traffic via the +/// cluster secret. Fails closed when no keyfile is configured to prevent the +/// empty-secret bypass. +fn verify_cluster_secret(state: &AppState, headers: &HeaderMap) -> Result<(), DbError> { + let secret = state.cluster_secret(); + if secret.is_empty() { + return Err(DbError::InternalError( + "Cluster keyfile not configured".to_string(), + )); + } + let request_secret = headers + .get("X-Cluster-Secret") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + if !crate::server::auth::constant_time_eq(request_secret.as_bytes(), secret.as_bytes()) { + return Err(DbError::BadRequest("Invalid cluster secret".to_string())); + } + + Ok(()) +} + /// Fetch a specific blob chunk from this node /// GET /_internal/blob/replicate/:db/:collection/:key/chunk/:chunk_idx pub async fn get_blob_chunk( State(state): State, + headers: HeaderMap, Path((db_name, coll_name, blob_key, chunk_idx_str)): Path<(String, String, String, String)>, ) -> Result { + verify_cluster_secret(&state, &headers)?; + let chunk_idx = chunk_idx_str .parse::() .map_err(|_| DbError::BadRequest("Invalid chunk index".to_string()))?; @@ -48,7 +77,7 @@ pub async fn replicate_blob_to_node( blob_key: &str, chunks: &[(u32, Vec)], metadata: Option<&Value>, - _auth_token: &str, // Ignored for internal trusted cluster traffic for now + cluster_secret: &str, // sent as X-Cluster-Secret to authenticate inter-node traffic ) -> Result<(), DbError> { // Skip if no chunks to replicate AND no metadata if chunks.is_empty() && metadata.is_none() { @@ -95,14 +124,13 @@ pub async fn replicate_blob_to_node( form = form.part(format!("chunk_{}", index), part); } - let mut req_builder = client.post(&url); + let mut req_builder = client.post(&url).header("X-Cluster-Secret", cluster_secret); if let Some(trace_ctx) = crate::observability::get_current_trace_context() { req_builder = req_builder.header("traceparent", trace_ctx.to_header()); } let response = req_builder - // .bearer_auth(auth_token) // TODO: Internal auth .multipart(form) .send() .await @@ -124,9 +152,12 @@ pub async fn replicate_blob_to_node( /// POST /_internal/blob/replicate/:db/:collection/:key pub async fn receive_blob_replication( State(state): State, + headers: HeaderMap, Path((db_name, coll_name, blob_key)): Path<(String, String, String)>, mut multipart: Multipart, ) -> Result, DbError> { + verify_cluster_secret(&state, &headers)?; + let database = state.storage.get_database(&db_name)?; let collection = database.get_collection(&coll_name)?; @@ -162,6 +193,18 @@ pub async fn receive_blob_replication( .await .map_err(|e| DbError::BadRequest(e.to_string()))?; if let Ok(doc_value) = serde_json::from_str::(&text) { + // Reject metadata whose `_key` does not match the URL path: the + // URL is the authoritative key (mirrors how chunks are stored), + // and trusting the body lets a caller insert documents under a + // different key than the chunks they uploaded. + if let Some(meta_key) = doc_value.get("_key").and_then(|k| k.as_str()) { + if meta_key != blob_key { + return Err(DbError::BadRequest(format!( + "Metadata _key '{}' does not match URL path '{}'", + meta_key, blob_key + ))); + } + } tracing::info!("Inserting replicated metadata for blob {}", blob_key); // Insert metadata document // Note: This insert is local to this shard (Primary). @@ -225,9 +268,12 @@ pub async fn receive_blob_replication( /// POST /_internal/blob/upload/:db/:collection pub async fn receive_blob_upload( State(state): State, + headers: HeaderMap, Path((db_name, coll_name)): Path<(String, String)>, mut multipart: Multipart, ) -> Result, DbError> { + verify_cluster_secret(&state, &headers)?; + let database = state.storage.get_database(&db_name)?; // Auto-create the physical shard blob collection if it doesn't exist diff --git a/src/sync/log.rs b/src/sync/log.rs index f1412dd..3876534 100644 --- a/src/sync/log.rs +++ b/src/sync/log.rs @@ -89,6 +89,8 @@ impl SyncLog { opts.create_if_missing(true); opts.set_max_write_buffer_number(4); opts.set_write_buffer_size(64 * 1024 * 1024); // 64MB + opts.set_keep_log_file_num(5); + opts.set_recycle_log_file_num(3); let db = DB::open(&opts, &log_path).map_err(|e| e.to_string())?; let db = Arc::new(db); diff --git a/src/sync/protocol.rs b/src/sync/protocol.rs index c99de12..38b6bb5 100644 --- a/src/sync/protocol.rs +++ b/src/sync/protocol.rs @@ -115,9 +115,15 @@ pub struct NodeStats { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum SyncMessage { // === Authentication === - /// Server sends challenge - AuthChallenge { challenge: Vec }, - /// Client responds with HMAC + /// Server sends challenge with timestamp to prevent replay attacks + AuthChallenge { + challenge: Vec, + /// Unix timestamp when challenge was generated (milliseconds) + timestamp: u64, + /// Nonce to prevent replay (random bytes) + nonce: Vec, + }, + /// Client responds with HMAC (computes HMAC over challenge + timestamp + nonce) AuthResponse { hmac: Vec }, /// Server confirms auth result AuthResult { success: bool, message: String }, diff --git a/src/sync/transport.rs b/src/sync/transport.rs index 4c43dc3..3e1b737 100644 --- a/src/sync/transport.rs +++ b/src/sync/transport.rs @@ -185,10 +185,14 @@ impl ConnectionPool { } }; - let challenge = match msg { - SyncMessage::AuthChallenge { challenge } => { + let (challenge, timestamp, nonce) = match msg { + SyncMessage::AuthChallenge { + challenge, + timestamp, + nonce, + } => { debug!("authenticate_client: got challenge"); - challenge + (challenge, timestamp, nonce) } _ => { return Err(TransportError::AuthFailed( @@ -197,8 +201,8 @@ impl ConnectionPool { } }; - // Compute HMAC response - let hmac = self.compute_hmac(&challenge)?; + // Compute HMAC response including timestamp and nonce + let hmac = self.compute_hmac_with_timestamp(&challenge, timestamp, &nonce)?; // Send response debug!("authenticate_client: sending response"); @@ -226,8 +230,13 @@ impl ConnectionPool { } } - /// Compute HMAC of data using keyfile - fn compute_hmac(&self, data: &[u8]) -> Result, TransportError> { + /// Compute HMAC of data with timestamp and nonce using keyfile + fn compute_hmac_with_timestamp( + &self, + data: &[u8], + timestamp: u64, + nonce: &[u8], + ) -> Result, TransportError> { use hmac::{Hmac, Mac}; use sha2::Sha256; @@ -237,6 +246,8 @@ impl ConnectionPool { let mut mac = Hmac::::new_from_slice(&key) .map_err(|e| TransportError::AuthFailed(format!("Invalid key: {}", e)))?; mac.update(data); + mac.update(×tamp.to_be_bytes()); + mac.update(nonce); Ok(mac.finalize().into_bytes().to_vec()) } @@ -284,6 +295,7 @@ impl ConnectionPool { peer_addr: &str, max_attempts: u32, ) -> Result<(), TransportError> { + use rand::Rng; let mut delay = Duration::from_millis(100); for attempt in 1..=max_attempts { @@ -297,6 +309,11 @@ impl ConnectionPool { if attempt < max_attempts { tokio::time::sleep(delay).await; delay = std::cmp::min(delay * 2, Duration::from_secs(30)); + // Add up to ±25% jitter so reconnect storms don't synchronize + // (full jitter scales with current delay, not a fixed 25 ms). + let quarter = (delay.as_millis() as u64 / 4).max(1); + let jitter: u64 = rand::rngs::OsRng.gen_range(0..=quarter); + delay += Duration::from_millis(jitter); } } } @@ -461,7 +478,20 @@ impl SyncServer { // If no keyfile provided or it doesn't exist, skip authentication (for dev/test) if keyfile_path.is_empty() || !std::path::Path::new(keyfile_path).exists() { - debug!("authenticate_standalone: no keyfile found, skipping authentication"); + // Security: Check if keyfile is required via environment variable + let require_keyfile = std::env::var("SOLIDB_REQUIRE_KEYFILE") + .map(|v| v.to_lowercase() == "true") + .unwrap_or(false); + + if require_keyfile { + warn!("authenticate_standalone: SOLIDB_REQUIRE_KEYFILE=true but no keyfile found"); + return Err(TransportError::AuthFailed( + "Cluster keyfile required but not found (set SOLIDB_REQUIRE_KEYFILE=false to disable)".to_string(), + )); + } + + warn!("authenticate_standalone: no keyfile found, skipping authentication"); + warn!("WARNING: Inter-node communication is unauthenticated. Set SOLIDB_REQUIRE_KEYFILE=true to enforce authentication."); // Still need to handle magic header if not skipped if !skip_magic { @@ -502,14 +532,21 @@ impl SyncServer { debug!("authenticate_standalone: skip magic (multiplexed)"); } - // Generate random challenge + // Generate random challenge with timestamp and nonce to prevent replay attacks use rand::Rng; - let challenge: Vec = rand::thread_rng().gen::<[u8; 32]>().to_vec(); - - // Send challenge + let challenge: Vec = rand::rngs::OsRng.gen::<[u8; 32]>().to_vec(); + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + let nonce: Vec = rand::rngs::OsRng.gen::<[u8; 16]>().to_vec(); + + // Send challenge with timestamp and nonce debug!("authenticate_standalone: sending challenge"); let challenge_msg = SyncMessage::AuthChallenge { challenge: challenge.clone(), + timestamp, + nonce: nonce.clone(), }; ConnectionPool::write_message(&mut stream, &challenge_msg).await?; debug!("authenticate_standalone: waiting for response"); @@ -535,10 +572,24 @@ impl SyncServer { } }; - // Verify HMAC - let expected_hmac = Self::compute_hmac_static(&challenge, keyfile_path)?; + // The 32-byte random challenge already prevents replay; timestamp here + // bounds how long the handshake may take (clients that delay past this + // window are rejected, limiting slow-loris on the auth path). + const HANDSHAKE_MAX_AGE_MS: u64 = 30_000; + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + if now_ms.saturating_sub(timestamp) > HANDSHAKE_MAX_AGE_MS { + return Err(TransportError::AuthFailed( + "Auth handshake timed out".to_string(), + )); + } + + let expected_hmac = + Self::compute_hmac_with_timestamp(&challenge, timestamp, &nonce, keyfile_path)?; - if client_hmac == expected_hmac { + if crate::server::auth::constant_time_eq(&client_hmac, &expected_hmac) { let _ = ConnectionPool::write_message( &mut stream, &SyncMessage::AuthResult { @@ -553,7 +604,12 @@ impl SyncServer { } } - fn compute_hmac_static(data: &[u8], keyfile_path: &str) -> Result, TransportError> { + fn compute_hmac_with_timestamp( + data: &[u8], + timestamp: u64, + nonce: &[u8], + keyfile_path: &str, + ) -> Result, TransportError> { use hmac::{Hmac, Mac}; use sha2::Sha256; @@ -564,6 +620,8 @@ impl SyncServer { let mut mac = Hmac::::new_from_slice(&key) .map_err(|e| TransportError::AuthFailed(format!("Invalid key: {}", e)))?; mac.update(data); + mac.update(×tamp.to_be_bytes()); + mac.update(nonce); Ok(mac.finalize().into_bytes().to_vec()) } diff --git a/src/sync/worker.rs b/src/sync/worker.rs index 7db744a..4761c2b 100644 --- a/src/sync/worker.rs +++ b/src/sync/worker.rs @@ -1183,7 +1183,13 @@ impl SyncWorker { } let payload = if compressed { - lz4_flex::decompress_size_prepended(&data).unwrap_or_default() + match lz4_flex::decompress_size_prepended(&data) { + Ok(p) => p, + Err(e) => { + tracing::error!("Decompression failed: {}, closing connection", e); + break; + } + } } else { data }; diff --git a/tasks/done/SEC-075-missing-auth-query-endpoint.md b/tasks/done/SEC-075-missing-auth-query-endpoint.md new file mode 100644 index 0000000..b13aef8 --- /dev/null +++ b/tasks/done/SEC-075-missing-auth-query-endpoint.md @@ -0,0 +1,25 @@ +# SEC-075: Missing Authentication on Query Execution Endpoint + +## Status +- **Severity**: CRITICAL +- **Category**: Access Control +- **Project**: soli/db +- **File**: `src/server/handlers/query.rs` +- **Lines**: 179-250 + +## Description +The `execute_query` endpoint at `/_api/database/{db}/cursor` does not validate authentication or authorization before executing queries. Any unauthenticated user can execute arbitrary SDBQL queries including mutations. + +## Exploit Scenario +```http +POST /_api/database/mydb/cursor HTTP/1.1 +Content-Type: application/json + +{"query": "FOR doc IN _users RETURN doc"} +``` + +## Recommendation +Add authentication middleware to validate JWT token or session before executing queries. + +## References +- Related: SEC-001 (auth bypass pattern) \ No newline at end of file diff --git a/tasks/done/SEC-076-sleep-blind-injection.md b/tasks/done/SEC-076-sleep-blind-injection.md new file mode 100644 index 0000000..aaf1f8a --- /dev/null +++ b/tasks/done/SEC-076-sleep-blind-injection.md @@ -0,0 +1,22 @@ +# SEC-076: Time-Based Blind Injection via SLEEP Function + +## Status +- **Severity**: HIGH +- **Category**: Injection +- **Project**: soli/db +- **File**: `src/sdbql/executor/builtins/misc.rs` +- **Lines**: 66-73 + +## Description +The `SLEEP(ms)` function allows blocking execution for arbitrary durations. Combined with conditional execution, this enables time-based blind injection attacks. + +## Exploit Scenario +```sql +FOR doc IN users FILTER SLEEP(doc.password == 'admin' ? 5000 : 0) RETURN doc +``` + +## Recommendation +Consider removing the SLEEP function entirely or restricting it to admin-only usage. + +## References +- Related: SEC-035 (transaction sdbql injection) \ No newline at end of file diff --git a/tasks/done/SEC-077-ssrf-solidb-fetch.md b/tasks/done/SEC-077-ssrf-solidb-fetch.md new file mode 100644 index 0000000..5106c45 --- /dev/null +++ b/tasks/done/SEC-077-ssrf-solidb-fetch.md @@ -0,0 +1,25 @@ +# SEC-077: SSRF via solidb.fetch() - No URL Validation + +## Status +- **Severity**: CRITICAL +- **Category**: SSRF +- **Project**: soli/db +- **File**: `src/scripting/lua_globals/http.rs` +- **Lines**: 1-65 + +## Description +The `solidb.fetch()` function allows Lua scripts to make HTTP requests to arbitrary URLs without any restrictions. This enables Server-Side Request Forgery (SSRF) attacks. + +## Exploit Scenario +```lua +-- Access internal services +local response = solidb.fetch("http://169.254.169.254/latest/meta-data/") +-- Access localhost services +local response = solidb.fetch("http://127.0.0.1:6379/") +``` + +## Recommendation +Add URL validation whitelist or restrict to allowed domains/ports. + +## References +- Related: SEC-007, SEC-015, SEC-016 (existing SSRF issues in lang) \ No newline at end of file diff --git a/tasks/done/SEC-078-file-traversal-response-file.md b/tasks/done/SEC-078-file-traversal-response-file.md new file mode 100644 index 0000000..dd31506 --- /dev/null +++ b/tasks/done/SEC-078-file-traversal-response-file.md @@ -0,0 +1,23 @@ +# SEC-078: Arbitrary File System Access via response.file() + +## Status +- **Severity**: HIGH +- **Category**: Path Traversal +- **Project**: soli/db +- **File**: `src/scripting/http_helpers.rs` +- **Lines**: 171-198 + +## Description +The `response.file(path)` function accepts a file path parameter and uses `std::fs::metadata()` to check file existence without path sanitization. This allows directory traversal attacks. + +## Exploit Scenario +```lua +local f = response.file("/etc/passwd") +local f = response.file("../../secrets/secrets.yaml") +``` + +## Recommendation +Implement strict path validation, restrict to a designated upload directory, and resolve paths before access. + +## References +- Related: SEC-006, SEC-010 (existing file traversal issues) \ No newline at end of file diff --git a/tasks/done/SEC-079-directory-traversal-upload.md b/tasks/done/SEC-079-directory-traversal-upload.md new file mode 100644 index 0000000..cacd770 --- /dev/null +++ b/tasks/done/SEC-079-directory-traversal-upload.md @@ -0,0 +1,23 @@ +# SEC-079: Directory Traversal in solidb.upload() + +## Status +- **Severity**: HIGH +- **Category**: Path Traversal +- **Project**: soli/db +- **File**: `src/scripting/file_handling.rs` +- **Lines**: 209-215 + +## Description +The upload function sanitizes directory paths with basic string replacement (`replace("..", "")`) which can be bypassed with encoding tricks like `....//` or `/../`. + +## Exploit Scenario +```lua +solidb.upload(data, { filename = "test.txt", directory = "....//....//etc/" }) +solidb.upload(data, { filename = "shell.jsp", directory = "%2e%2e%2f%2e%2e%2f" }) +``` + +## Recommendation +Use canonical path resolution and enforce directory allowlisting. + +## References +- Related: SEC-078, SEC-011 (zip slip) \ No newline at end of file diff --git a/tasks/done/SEC-080-no-tls-inter-node.md b/tasks/done/SEC-080-no-tls-inter-node.md new file mode 100644 index 0000000..2f92ac9 --- /dev/null +++ b/tasks/done/SEC-080-no-tls-inter-node.md @@ -0,0 +1,19 @@ +# SEC-080: No TLS/SSL Encryption for Inter-Node Communication + +## Status +- **Severity**: CRITICAL +- **Category**: Transport Security +- **Project**: soli/db +- **Files**: `src/sync/transport.rs`, `src/cluster/transport.rs` + +## Description +All inter-node replication traffic uses plaintext TCP connections. No TLS encryption is used for the binary sync protocol or cluster management messages. + +## Exploit Scenario +An attacker with network access between cluster nodes can eavesdrop on all replication traffic to capture document content or inject fake data. + +## Recommendation +Enable mutual TLS (mTLS) for both TCP sync protocol and HTTP API. + +## References +- Related: SEC-027 (db config forces http), SEC-042 (tls min version) \ No newline at end of file diff --git a/tasks/done/SEC-081-auth-bypass-no-keyfile.md b/tasks/done/SEC-081-auth-bypass-no-keyfile.md new file mode 100644 index 0000000..540598f --- /dev/null +++ b/tasks/done/SEC-081-auth-bypass-no-keyfile.md @@ -0,0 +1,20 @@ +# SEC-081: Authentication Bypass When Keyfile is Empty/Missing + +## Status +- **Severity**: CRITICAL +- **Category**: Authentication +- **Project**: soli/db +- **File**: `src/sync/transport.rs` +- **Lines**: 462-480 + +## Description +When no keyfile is configured or the keyfile is empty, the system skips authentication entirely. + +## Exploit Scenario +Any attacker can join the cluster and receive full replication stream without any credentials. + +## Recommendation +Require keyfile and fail startup if not present in production. + +## References +- Related: SEC-017 (app env test disables ssrf) \ No newline at end of file diff --git a/tasks/done/SEC-082-admin-password-logged.md b/tasks/done/SEC-082-admin-password-logged.md new file mode 100644 index 0000000..7bddb37 --- /dev/null +++ b/tasks/done/SEC-082-admin-password-logged.md @@ -0,0 +1,20 @@ +# SEC-082: Admin Password Logged in Plaintext + +## Status +- **Severity**: CRITICAL +- **Category**: Information Disclosure +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 309-333 + +## Description +When admin account is created with random password, the plaintext password is printed to stderr via `tracing::warn!()`. + +## Exploit Scenario +Anyone with access to server logs or terminal output can retrieve the admin password. + +## Recommendation +Remove password logging entirely or ensure it only appears in secure dev mode. + +## References +- Related: SEC-060 (http log userinfo secrets) \ No newline at end of file diff --git a/tasks/done/SEC-083-hmac-replay-attack.md b/tasks/done/SEC-083-hmac-replay-attack.md new file mode 100644 index 0000000..4c6175a --- /dev/null +++ b/tasks/done/SEC-083-hmac-replay-attack.md @@ -0,0 +1,20 @@ +# SEC-083: Weak HMAC Authentication - Replay Attack Vulnerability + +## Status +- **Severity**: HIGH +- **Category**: Authentication +- **Project**: soli/db +- **File**: `src/sync/transport.rs` +- **Lines**: 505-553 + +## Description +The inter-node authentication uses a simple random challenge with HMAC-SHA256 without sequence numbers or timestamps, making it vulnerable to replay attacks. + +## Exploit Scenario +Attacker captures challenge and response, then replays to authenticate as valid node. + +## Recommendation +Add timestamps and nonces to prevent replay attacks. + +## References +- Related: SEC-002 (predictable session ids) \ No newline at end of file diff --git a/tasks/done/SEC-084-cluster-secret-plaintext.md b/tasks/done/SEC-084-cluster-secret-plaintext.md new file mode 100644 index 0000000..6cd9371 --- /dev/null +++ b/tasks/done/SEC-084-cluster-secret-plaintext.md @@ -0,0 +1,19 @@ +# SEC-084: Cluster Secret Transmitted in Plaintext Headers + +## Status +- **Severity**: HIGH +- **Category**: Secret Management +- **Project**: soli/db +- **Files**: `src/cluster/websocket_client.rs`, `src/sharding/coordinator.rs` + +## Description +The cluster secret is sent in the `X-Cluster-Secret` HTTP header without TLS encryption. + +## Exploit Scenario +Network sniffing reveals the cluster authentication secret, enabling attacker to make direct API calls. + +## Recommendation +Use TLS and consider using certificate-based node authentication instead of secrets in headers. + +## References +- Related: SEC-080 (no TLS inter-node) \ No newline at end of file diff --git a/tasks/done/SEC-085-timing-attack-cluster-secret.md b/tasks/done/SEC-085-timing-attack-cluster-secret.md new file mode 100644 index 0000000..58fc9ee --- /dev/null +++ b/tasks/done/SEC-085-timing-attack-cluster-secret.md @@ -0,0 +1,20 @@ +# SEC-085: Timing Attack in Cluster Secret Comparison + +## Status +- **Severity**: HIGH +- **Category**: Cryptography +- **Project**: soli/db +- **Files**: `src/server/handlers/cluster.rs`, `src/server/cluster_handlers.rs` +- **Lines**: 471, 522, 577, 628 + +## Description +Uses regular `!=` operator instead of `constant_time_eq` for comparing cluster secrets. + +## Exploit Scenario +Attacker can determine the correct cluster secret byte-by-byte using timing analysis. + +## Recommendation +Replace `!=` with `constant_time_eq` in all cluster handlers. + +## References +- Related: SEC-029 (jwt decode shape confusion), SEC-053 (session id not validated) \ No newline at end of file diff --git a/tasks/done/SEC-086-websocket-no-origin-validation.md b/tasks/done/SEC-086-websocket-no-origin-validation.md new file mode 100644 index 0000000..800c5a5 --- /dev/null +++ b/tasks/done/SEC-086-websocket-no-origin-validation.md @@ -0,0 +1,20 @@ +# SEC-086: No WebSocket Origin Validation + +## Status +- **Severity**: HIGH +- **Category**: WebSocket Security +- **Project**: soli/db +- **File**: `src/server/handlers/websocket.rs` +- **Lines**: 20-26, 75-88, 162-199 + +## Description +WebSocket handlers (`cluster_status_ws`, `monitor_ws_handler`, `ws_changefeed_handler`) do not validate the `Origin` header. + +## Exploit Scenario +A malicious website could establish WebSocket connections to access real-time changefeeds. + +## Recommendation +Add origin validation and use `validate_origin` function pattern. + +## References +- Related: SEC-032 (ws origin trusts xfh), SEC-044 (multivalue xfp xfh) \ No newline at end of file diff --git a/tasks/done/SEC-087-websocket-no-message-size-limit.md b/tasks/done/SEC-087-websocket-no-message-size-limit.md new file mode 100644 index 0000000..960d1d5 --- /dev/null +++ b/tasks/done/SEC-087-websocket-no-message-size-limit.md @@ -0,0 +1,20 @@ +# SEC-087: No WebSocket Message Size Limits + +## Status +- **Severity**: HIGH +- **Category**: DoS +- **Project**: soli/db +- **File**: `src/server/handlers/websocket.rs` +- **Lines**: 237-281 + +## Description +Messages are processed without any size validation, enabling OOM attacks via massive messages. + +## Exploit Scenario +An attacker sends massive messages to exhaust server memory. + +## Recommendation +Add message size limits using `Message::max_size()` or similar. + +## References +- Related: SEC-047 (ws no max message size) in lang \ No newline at end of file diff --git a/tasks/done/SEC-088-hmac-no-constant-time.md b/tasks/done/SEC-088-hmac-no-constant-time.md new file mode 100644 index 0000000..1a5ba2f --- /dev/null +++ b/tasks/done/SEC-088-hmac-no-constant-time.md @@ -0,0 +1,20 @@ +# SEC-088: HMAC Comparison Not Constant-Time + +## Status +- **Severity**: HIGH +- **Category**: Cryptography +- **Project**: soli/db +- **File**: `src/sync/transport.rs` +- **Line**: 541 + +## Description +HMAC comparison uses regular `==` operator instead of constant-time comparison. + +## Exploit Scenario +Timing attacks could reveal the keyfile content used for HMAC authentication. + +## Recommendation +Use `subtle::ConstantTimeEq` for HMAC comparison. + +## References +- Related: SEC-085 (timing attack cluster secret) \ No newline at end of file diff --git a/tasks/done/SEC-089-cors-allow-origin-any.md b/tasks/done/SEC-089-cors-allow-origin-any.md new file mode 100644 index 0000000..08982a2 --- /dev/null +++ b/tasks/done/SEC-089-cors-allow-origin-any.md @@ -0,0 +1,20 @@ +# SEC-089: CORS Allow-Origin Any + +## Status +- **Severity**: CRITICAL +- **Category**: Configuration +- **Project**: soli/db +- **File**: `src/server/routes.rs` +- **Lines**: 1039-1050 + +## Description +The CORS layer is configured with `allow_origin(Any)`, allowing cross-origin requests from any domain. + +## Exploit Scenario +An attacker hosting a malicious page could steal auth tokens or perform actions on behalf of admin users. + +## Recommendation +Restrict CORS to specific trusted origins in production. + +## References +- Related: SEC-028 (cookie secure depends trust proxy) \ No newline at end of file diff --git a/tasks/done/SEC-090-no-tls-server.md b/tasks/done/SEC-090-no-tls-server.md new file mode 100644 index 0000000..28f70be --- /dev/null +++ b/tasks/done/SEC-090-no-tls-server.md @@ -0,0 +1,20 @@ +# SEC-090: No TLS for Server (Binds to 0.0.0.0 Without Encryption) + +## Status +- **Severity**: CRITICAL +- **Category**: Transport Security +- **Project**: soli/db +- **File**: `src/main.rs` +- **Lines**: 533, 676 + +## Description +Server binds to `0.0.0.0:6745` without any TLS encryption. All data including passwords and JWT tokens transmitted in plaintext. + +## Exploit Scenario +Network eavesdropping (MITM attack) can intercept all credentials and data in transit. + +## Recommendation +Enable TLS support or document that HTTP should be behind a TLS terminator. + +## References +- Related: SEC-027 (db config forces http), SEC-080 (no TLS inter-node) \ No newline at end of file diff --git a/tasks/done/SEC-091-permissive-auth-anonymous.md b/tasks/done/SEC-091-permissive-auth-anonymous.md new file mode 100644 index 0000000..fe1cd42 --- /dev/null +++ b/tasks/done/SEC-091-permissive-auth-anonymous.md @@ -0,0 +1,20 @@ +# SEC-091: Permissive Auth Middleware Allows Anonymous Access + +## Status +- **Severity**: HIGH +- **Category**: Access Control +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 926-1021 + +## Description +The `permissive_auth_middleware` allows requests to proceed without authentication when no auth headers are present. + +## Exploit Scenario +Service scripts that rely on this middleware may be accessible to unauthenticated users. + +## Recommendation +Add explicit authentication checks to service scripts. + +## References +- Related: SEC-075 (missing auth query endpoint) \ No newline at end of file diff --git a/tasks/done/SEC-092-no-rate-limit-admin-endpoints.md b/tasks/done/SEC-092-no-rate-limit-admin-endpoints.md new file mode 100644 index 0000000..9dcf222 --- /dev/null +++ b/tasks/done/SEC-092-no-rate-limit-admin-endpoints.md @@ -0,0 +1,20 @@ +# SEC-092: No Rate Limiting on Admin Endpoints + +## Status +- **Severity**: MEDIUM +- **Category**: Rate Limiting +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 51-77 + +## Description +Rate limiting only applies to login attempts. Other sensitive endpoints like password change, API key management have no rate limiting. + +## Exploit Scenario +Brute force attacks against sensitive admin endpoints. + +## Recommendation +Add rate limiting to all sensitive endpoints. + +## References +- Related: SEC-030 (rate limit xff spoof) \ No newline at end of file diff --git a/tasks/done/SEC-093-constant-time-length-leak.md b/tasks/done/SEC-093-constant-time-length-leak.md new file mode 100644 index 0000000..41d6a2a --- /dev/null +++ b/tasks/done/SEC-093-constant-time-length-leak.md @@ -0,0 +1,20 @@ +# SEC-093: Constant-Time Comparison Length Leak + +## Status +- **Severity**: MEDIUM +- **Category**: Cryptography +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 759-764 + +## Description +The `constant_time_eq` function returns immediately when lengths differ, leaking the length of secrets via timing. + +## Exploit Scenario +Attacker can determine the length of secrets by measuring response time. + +## Recommendation +Use `subtle::ConstantTimeEq` from Rust's crypto libraries for proper constant-time comparison. + +## References +- Related: SEC-085, SEC-088 \ No newline at end of file diff --git a/tasks/done/SEC-094-unbounded-query-resources.md b/tasks/done/SEC-094-unbounded-query-resources.md new file mode 100644 index 0000000..107d944 --- /dev/null +++ b/tasks/done/SEC-094-unbounded-query-resources.md @@ -0,0 +1,23 @@ +# SEC-094: Unbounded Query Resource Exhaustion + +## Status +- **Severity**: HIGH +- **Category**: DoS +- **Project**: soli/db +- **File**: `src/server/handlers/query.rs` +- **Lines**: 20-21 + +## Description +While `QUERY_TIMEOUT_SECS` constant exists (30 seconds), it is not actively enforced during query execution. + +## Exploit Scenario +```sql +FOR i IN 1..1000000000000 INSERT {} INTO huge_collection +``` +Would attempt to create a trillion documents. + +## Recommendation +Implement timeout enforcement using `tokio::time::timeout`. + +## References +- Related: SEC-020 (unbounded thread fanout), SEC-068 (repl session unbounded) \ No newline at end of file diff --git a/tasks/done/SEC-095-open-redirect-solidb-redirect.md b/tasks/done/SEC-095-open-redirect-solidb-redirect.md new file mode 100644 index 0000000..df872db --- /dev/null +++ b/tasks/done/SEC-095-open-redirect-solidb-redirect.md @@ -0,0 +1,22 @@ +# SEC-095: Open Redirect via solidb.redirect() + +## Status +- **Severity**: MEDIUM +- **Category**: Open Redirect +- **Project**: soli/db +- **File**: `src/scripting/http_helpers.rs` +- **Lines**: 60-65 + +## Description +The redirect function accepts arbitrary URLs without validation, enabling phishing attacks via open redirect. + +## Exploit Scenario +```lua +solidb.redirect("https://evil-attacker.com/phishing-page") +``` + +## Recommendation +Validate redirect URLs against an allowlist of permitted domains. + +## References +- Related: SEC-012 (link to javascript xss) \ No newline at end of file diff --git a/tasks/done/SEC-096-transaction-toctou.md b/tasks/done/SEC-096-transaction-toctou.md new file mode 100644 index 0000000..9800c98 --- /dev/null +++ b/tasks/done/SEC-096-transaction-toctou.md @@ -0,0 +1,20 @@ +# SEC-096: Transaction Validation TOCTOU Race Condition + +## Status +- **Severity**: MEDIUM +- **Category**: Race Condition +- **Project**: soli/db +- **File**: `src/transaction/manager.rs` +- **Lines**: 79-151 + +## Description +The `validate` function collects errors without holding the transaction lock, then adds them while holding the lock. Transaction state could change between phases. + +## Exploit Scenario +Between releasing read lock and acquiring write lock, another thread could modify the transaction, potentially bypassing validation. + +## Recommendation +Keep validation under a single lock acquisition or use atomic compare-and-swap patterns. + +## References +- Related: SEC-039 (uniqueness toctou), SEC-008 (deadlock scenarios) \ No newline at end of file diff --git a/tasks/done/SEC-097-no-tls-verification-http-client.md b/tasks/done/SEC-097-no-tls-verification-http-client.md new file mode 100644 index 0000000..2341b0d --- /dev/null +++ b/tasks/done/SEC-097-no-tls-verification-http-client.md @@ -0,0 +1,19 @@ +# SEC-097: No TLS Verification for HTTP Client + +## Status +- **Severity**: HIGH +- **Category**: Transport Security +- **Project**: soli/db +- **File**: `src/storage/http_client.rs` + +## Description +The HTTP client used for inter-node HTTP communication does not verify TLS certificates. + +## Exploit Scenario +Man-in-the-middle attack during shard healing/copying operations allows attacker to intercept and modify data. + +## Recommendation +Configure the HTTP client to verify TLS certificates. + +## References +- Related: SEC-080 (no TLS inter-node), SEC-042a (tls min version cli) \ No newline at end of file diff --git a/tasks/done/SEC-098-path-sanitization-incomplete.md b/tasks/done/SEC-098-path-sanitization-incomplete.md new file mode 100644 index 0000000..7f386ee --- /dev/null +++ b/tasks/done/SEC-098-path-sanitization-incomplete.md @@ -0,0 +1,20 @@ +# SEC-098: Path Sanitization Only Replaces Characters (Incomplete) + +## Status +- **Severity**: MEDIUM +- **Category**: Path Traversal +- **Project**: soli/db +- **File**: `src/server/script_handlers.rs` +- **Lines**: 375-379 + +## Description +The `sanitize_path_to_key` function only replaces `/`, `:`, and `*` with underscores but doesn't validate for path traversal sequences like `../`. + +## Exploit Scenario +A path like `users/../../etc/passwd` sanitized to `users____etc_passwd` could still be exploited if used in file operations. + +## Recommendation +Add validation to reject paths containing `..` or ensure sanitized output cannot be used for file system access. + +## References +- Related: SEC-078, SEC-079 \ No newline at end of file diff --git a/tasks/done/SEC-099-lock-manager-deadlock.md b/tasks/done/SEC-099-lock-manager-deadlock.md new file mode 100644 index 0000000..fa6dc63 --- /dev/null +++ b/tasks/done/SEC-099-lock-manager-deadlock.md @@ -0,0 +1,20 @@ +# SEC-099: Lock Manager Potential Deadlock Scenario + +## Status +- **Severity**: MEDIUM +- **Category**: Race Condition +- **Project**: soli/db +- **File**: `src/transaction/lock_manager.rs` +- **Lines**: 44-85, 87-144, 146-182 + +## Description +The lock manager acquires shared locks by first checking exclusive locks, then acquiring shared locks. The upgrade function removes from shared before adding to exclusive without atomic upgrade. + +## Exploit Scenario +Transaction could hold a shared lock while another transaction tries to upgrade, leaving documents in inconsistent state. + +## Recommendation +Consider using a single lock acquisition point with proper lock escalation semantics. + +## References +- Related: SEC-096 (transaction toctou) \ No newline at end of file diff --git a/tasks/done/SEC-100-no-cluster-endpoint-authz.md b/tasks/done/SEC-100-no-cluster-endpoint-authz.md new file mode 100644 index 0000000..02f81af --- /dev/null +++ b/tasks/done/SEC-100-no-cluster-endpoint-authz.md @@ -0,0 +1,20 @@ +# SEC-100: No Authorization Checks on Internal Cluster Endpoints + +## Status +- **Severity**: HIGH +- **Category**: Access Control +- **Project**: soli/db +- **File**: `src/server/handlers/cluster.rs` +- **Lines**: 459-497, 510-600 + +## Description +Internal cluster management endpoints (`cluster_cleanup`, `cluster_reshard`) only check for cluster secret but don't verify requesting node is a legitimate cluster member. + +## Exploit Scenario +Attacker with knowledge of cluster secret can trigger cleanup or resharding operations. + +## Recommendation +Add verification that requesting node is actually a cluster member and request is part of legitimate operation. + +## References +- Related: SEC-081 (auth bypass no keyfile), SEC-084 (cluster secret plaintext) \ No newline at end of file diff --git a/tasks/done/SEC-101-error-message-disclosure.md b/tasks/done/SEC-101-error-message-disclosure.md new file mode 100644 index 0000000..0710f79 --- /dev/null +++ b/tasks/done/SEC-101-error-message-disclosure.md @@ -0,0 +1,20 @@ +# SEC-101: Error Messages Expose Internal Details + +## Status +- **Severity**: MEDIUM +- **Category**: Information Disclosure +- **Project**: soli/db +- **File**: `src/error.rs` +- **Lines**: 100-127 + +## Description +Error responses expose internal details like file paths, internal IPs, or implementation details via `DbError::InternalError`. + +## Exploit Scenario +Helps attackers understand system internals and craft targeted attacks. + +## Recommendation +Sanitize error messages before returning to clients. + +## References +- Related: SEC-009 (unauth dev source disclosure), SEC-059 (aws creds in error strings) \ No newline at end of file diff --git a/tasks/done/SEC-102-blob-chunk-ssrf.md b/tasks/done/SEC-102-blob-chunk-ssrf.md new file mode 100644 index 0000000..71b7a9b --- /dev/null +++ b/tasks/done/SEC-102-blob-chunk-ssrf.md @@ -0,0 +1,20 @@ +# SEC-102: Blob Chunk Fetch SSRF Risk in Cluster + +## Status +- **Severity**: MEDIUM +- **Category**: SSRF +- **Project**: soli/db +- **File**: `src/server/blob_handlers.rs` +- **Lines**: 442-521 + +## Description +The `fetch_blob_chunk_from_cluster` function iterates over `node_addresses` from shard coordinator and makes HTTP requests without validation. + +## Exploit Scenario +If attacker can manipulate cluster node addresses (via compromised coordinator), they could trigger HTTP requests to internal services. + +## Recommendation +Add URL validation and restrict to known cluster node addresses. + +## References +- Related: SEC-077 (ssrf solidb fetch), SEC-015 (dns rebound ssrf) \ No newline at end of file diff --git a/tasks/done/SEC-103-collection-name-validation.md b/tasks/done/SEC-103-collection-name-validation.md new file mode 100644 index 0000000..2bb6e8c --- /dev/null +++ b/tasks/done/SEC-103-collection-name-validation.md @@ -0,0 +1,23 @@ +# SEC-103: Collection Name Validation Missing + +## Status +- **Severity**: HIGH +- **Category**: Access Control +- **Project**: soli/db +- **Files**: `src/sdbql/parser/clauses.rs`, `src/sdbql/executor/data_source.rs` +- **Lines**: 194-231, 259-296, 629-637 + +## Description +Collection names parsed from queries are not validated or sanitized. Allows access to internal system collections like `_roles`, `_admins`, `_user_roles`. + +## Exploit Scenario +```sql +FOR doc IN _admins RETURN doc +``` +Could potentially return all admin users including password hashes. + +## Recommendation +Validate collection names don't start with `_` for non-admin users. + +## References +- Related: SEC-003 (mass assignment), SEC-076 (sleep blind injection) \ No newline at end of file diff --git a/tasks/done/SEC-104-template-string-injection.md b/tasks/done/SEC-104-template-string-injection.md new file mode 100644 index 0000000..8206cd2 --- /dev/null +++ b/tasks/done/SEC-104-template-string-injection.md @@ -0,0 +1,20 @@ +# SEC-104: Template String Injection in SDBQL + +## Status +- **Severity**: HIGH +- **Category**: Injection +- **Project**: soli/db +- **File**: `src/sdbql/lexer.rs` +- **Lines**: 253-311 + +## Description +Template strings (`$"..."` with `${expression}` interpolation) evaluate expressions directly. If user input flows into template strings without proper escaping, injection is possible. + +## Exploit Scenario +User input containing `${恶意代码}` could be evaluated as expression. + +## Recommendation +Validate and sanitize template string expressions before evaluation. + +## References +- Related: SEC-076 (sleep blind injection), SEC-058 (erb no context escape) \ No newline at end of file diff --git a/tasks/done/SEC-105-no-rate-limit-query-parsing.md b/tasks/done/SEC-105-no-rate-limit-query-parsing.md new file mode 100644 index 0000000..26c3c9d --- /dev/null +++ b/tasks/done/SEC-105-no-rate-limit-query-parsing.md @@ -0,0 +1,20 @@ +# SEC-105: No Rate Limiting on Query Parsing + +## Status +- **Severity**: MEDIUM +- **Category**: DoS +- **Project**: soli/db +- **File**: `src/sdbql/parser/mod.rs` +- **Lines**: 24-33, 37-48 + +## Description +The prepared statement cache doesn't rate-limit parsing requests. An attacker could exhaust CPU with rapid parse requests. + +## Exploit Scenario +Rapid malformed query submissions cause CPU exhaustion. + +## Recommendation +Add rate limiting on query parsing. + +## References +- Related: SEC-092 (no rate limit admin endpoints), SEC-030 (rate limit xff spoof) \ No newline at end of file diff --git a/tasks/done/SEC-106-jwt-secret-static-memory.md b/tasks/done/SEC-106-jwt-secret-static-memory.md new file mode 100644 index 0000000..78625ba --- /dev/null +++ b/tasks/done/SEC-106-jwt-secret-static-memory.md @@ -0,0 +1,20 @@ +# SEC-106: JWT Secret in Static Memory Without Protection + +## Status +- **Severity**: HIGH +- **Category**: Secret Management +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 114-142 + +## Description +JWT_SECRET is stored in a `Lazy` static without memory protection. Memory dumping attacks could extract the signing key. + +## Exploit Scenario +Process memory dump reveals JWT signing key, enabling attacker to forge tokens. + +## Recommendation +Consider using memory-protected secret storage in production. + +## References +- Related: SEC-054 (jwt min secret weak), SEC-029 (jwt decode shape confusion) \ No newline at end of file diff --git a/tasks/done/SEC-107-unsafe-pointer-cast.md b/tasks/done/SEC-107-unsafe-pointer-cast.md new file mode 100644 index 0000000..31b83e4 --- /dev/null +++ b/tasks/done/SEC-107-unsafe-pointer-cast.md @@ -0,0 +1,20 @@ +# SEC-107: Unsafe Pointer Cast in Collection Creation + +## Status +- **Severity**: MEDIUM +- **Category**: Memory Safety +- **Project**: soli/db +- **File**: `src/storage/engine.rs` +- **Lines**: 409-414 + +## Description +Uses unsafe pointer cast for RocksDB column family creation. + +## Exploit Scenario +Undefined behavior if pointer handling is incorrect. + +## Recommendation +Consider safer abstractions or thorough testing of the unsafe code path. + +## References +- Related: SEC-074 (system class arc non send sync) in lang \ No newline at end of file diff --git a/tasks/done/SEC-108-trust-on-first-connect.md b/tasks/done/SEC-108-trust-on-first-connect.md new file mode 100644 index 0000000..caea0f0 --- /dev/null +++ b/tasks/done/SEC-108-trust-on-first-connect.md @@ -0,0 +1,20 @@ +# SEC-108: Trust on First Connect - No Node Identity Verification + +## Status +- **Severity**: HIGH +- **Category**: Access Control +- **Project**: soli/db +- **File**: `src/cluster/manager.rs` +- **Lines**: 219-224, 226-241 + +## Description +When a node joins the cluster, it accepts the join request without verifying the node's identity beyond its claimed address. + +## Exploit Scenario +Attacker spins up malicious node, sends JoinRequest, receives full cluster topology and replication traffic. + +## Recommendation +Implement proper node identity verification using certificates or shared secrets. + +## References +- Related: SEC-081 (auth bypass no keyfile), SEC-080 (no TLS inter-node) \ No newline at end of file diff --git a/tasks/done/SEC-109-shard-key-injection.md b/tasks/done/SEC-109-shard-key-injection.md new file mode 100644 index 0000000..1e0abfe --- /dev/null +++ b/tasks/done/SEC-109-shard-key-injection.md @@ -0,0 +1,20 @@ +# SEC-109: Shard Key Injection in Routing + +## Status +- **Severity**: MEDIUM +- **Category**: Injection +- **Project**: soli/db +- **File**: `src/sharding/router.rs`, `src/sync/protocol.rs` +- **Lines**: 11-19, 418-426 + +## Description +The shard routing uses a simple hash of document key (`seahash::hash(key.as_bytes())`). Attacker can craft document keys that hash to specific shard IDs. + +## Exploit Scenario +Attacker creates documents with keys crafted to hash all data to a single shard, overwhelming specific nodes. + +## Recommendation +Add validation or salting to prevent shard key manipulation. + +## References +- Related: SEC-004c (qb chain field injection) \ No newline at end of file diff --git a/tasks/done/SEC-110-gossip-protocol-security.md b/tasks/done/SEC-110-gossip-protocol-security.md new file mode 100644 index 0000000..a978423 --- /dev/null +++ b/tasks/done/SEC-110-gossip-protocol-security.md @@ -0,0 +1,19 @@ +# SEC-110: Gossip Protocol Security Issues + +## Status +- **Severity**: MEDIUM +- **Category**: Protocol Security +- **Project**: soli/db +- **Files**: `src/cluster/manager.rs`, `src/cluster/health.rs` + +## Description +The cluster uses simple heartbeat-based gossip without authentication or integrity verification. Fake heartbeats can be injected. + +## Exploit Scenario +Mark healthy nodes as dead (unnecessary failover) or dead nodes as healthy (preventing proper failover). + +## Recommendation +Add authentication and integrity verification to gossip messages. + +## References +- Related: SEC-080 (no TLS inter-node), SEC-108 (trust on first connect) \ No newline at end of file diff --git a/tasks/done/SEC-111-shard-config-no-validation.md b/tasks/done/SEC-111-shard-config-no-validation.md new file mode 100644 index 0000000..99d3a39 --- /dev/null +++ b/tasks/done/SEC-111-shard-config-no-validation.md @@ -0,0 +1,20 @@ +# SEC-111: No Shard Configuration Validation + +## Status +- **Severity**: MEDIUM +- **Category**: Validation +- **Project**: soli/db +- **Files**: `src/sharding/coordinator.rs`, `src/cluster/manager.rs` +- **Lines**: 481-505 + +## Description +When receiving shard configurations via replication, there is no validation that configuration values are within acceptable bounds. + +## Exploit Scenario +Malicious `num_shards: u16::MAX` (65535) could cause infinite loops; `replication_factor: u16::MAX` tries to create excessive replicas. + +## Recommendation +Add bounds checking on num_shards, replication_factor. + +## References +- Related: SEC-062 (bind values untyped), SEC-109 (shard key injection) \ No newline at end of file diff --git a/tasks/done/SEC-112-weak-rate-limit-lua.md b/tasks/done/SEC-112-weak-rate-limit-lua.md new file mode 100644 index 0000000..e61ae7e --- /dev/null +++ b/tasks/done/SEC-112-weak-rate-limit-lua.md @@ -0,0 +1,24 @@ +# SEC-112: Weak Rate Limiting in Lua Scripting + +## Status +- **Severity**: MEDIUM +- **Category**: Rate Limiting +- **Project**: soli/db +- **File**: `src/scripting/error_handling.rs` +- **Lines**: 119-175 + +## Description +The rate limiter uses a static global `OnceLock` without per-database or per-user isolation. Attackers can bypass by using different identifiers. + +## Exploit Scenario +```lua +for i = 1, 1000 do + solidb.rate_limit("user" .. math.random(), 100, 60) +end +``` + +## Recommendation +Implement per-user/per-IP rate limiting with proper isolation. + +## References +- Related: SEC-092 (no rate limit admin endpoints), SEC-105 (no rate limit query parsing) \ No newline at end of file diff --git a/tasks/done/SEC-113-sql-translation-no-validation.md b/tasks/done/SEC-113-sql-translation-no-validation.md new file mode 100644 index 0000000..a5ff503 --- /dev/null +++ b/tasks/done/SEC-113-sql-translation-no-validation.md @@ -0,0 +1,20 @@ +# SEC-113: SQL Query Translation No Input Validation + +## Status +- **Severity**: MEDIUM +- **Category**: Injection +- **Project**: soli/db +- **File**: `src/server/sql_handlers.rs` +- **Lines**: 38-106 + +## Description +SQL queries are passed directly to `translate_sql_to_sdbql` without validation before translation. + +## Exploit Scenario +Translation vulnerabilities could be exploited via specially crafted SQL. + +## Recommendation +Add input validation before SQL-to-SDBQL translation. + +## References +- Related: SEC-035 (transaction sdbql injection), SEC-104 (template string injection) \ No newline at end of file diff --git a/tasks/done/SEC-114-ollama-url-scheme-ssrf.md b/tasks/done/SEC-114-ollama-url-scheme-ssrf.md new file mode 100644 index 0000000..cda882c --- /dev/null +++ b/tasks/done/SEC-114-ollama-url-scheme-ssrf.md @@ -0,0 +1,20 @@ +# SEC-114: Ollama URL Scheme Prepends HTTP Without Validation + +## Status +- **Severity**: MEDIUM +- **Category**: SSRF +- **Project**: soli/db +- **File**: `src/server/llm_client.rs` +- **Lines**: 174-180 + +## Description +If a user configures `OLLAMA_URL` with `localhost:11434/internal/evil`, it prepends `http://` making `http://localhost:11434/internal/evil`. + +## Exploit Scenario +Redirection to malicious internal endpoints. + +## Recommendation +Validate URLs against blocklist of internal addresses. + +## References +- Related: SEC-077 (ssrf solidb fetch), SEC-102 (blob chunk ssrf) \ No newline at end of file diff --git a/tasks/done/SEC-115-no-database-name-validation.md b/tasks/done/SEC-115-no-database-name-validation.md new file mode 100644 index 0000000..b17d754 --- /dev/null +++ b/tasks/done/SEC-115-no-database-name-validation.md @@ -0,0 +1,19 @@ +# SEC-115: No Database Name Validation + +## Status +- **Severity**: MEDIUM +- **Category**: Input Validation +- **Project**: soli/db +- **Files**: Throughout handlers (env_handlers.rs, script_handlers.rs, role_handlers.rs) + +## Description +Database names from URLs are used directly without validation. + +## Exploit Scenario +Malformed or malicious database names cause unexpected behavior. + +## Recommendation +Validate database names against expected patterns. + +## References +- Related: SEC-103 (collection name validation) \ No newline at end of file diff --git a/tasks/done/SEC-116-protected-collections-incomplete.md b/tasks/done/SEC-116-protected-collections-incomplete.md new file mode 100644 index 0000000..b811c96 --- /dev/null +++ b/tasks/done/SEC-116-protected-collections-incomplete.md @@ -0,0 +1,20 @@ +# SEC-116: Protected Collections List Incomplete + +## Status +- **Severity**: MEDIUM +- **Category**: Access Control +- **Project**: soli/db +- **File**: `src/server/handlers/system.rs` +- **Lines**: 11-18 + +## Description +Only `_admins` and `_api_keys` are protected. Collections like `_roles`, `_user_roles`, `_scripts`, `_services` are not explicitly protected. + +## Exploit Scenario +Access to security-sensitive collections without proper authorization. + +## Recommendation +Expand protected collections list to include all security-sensitive collections. + +## References +- Related: SEC-103 (collection name validation), SEC-091 (permissive auth) \ No newline at end of file diff --git a/tasks/done/SEC-117-live-query-token-expiry.md b/tasks/done/SEC-117-live-query-token-expiry.md new file mode 100644 index 0000000..3cd8c35 --- /dev/null +++ b/tasks/done/SEC-117-live-query-token-expiry.md @@ -0,0 +1,20 @@ +# SEC-117: Live Query Token 2-Second Expiry Causes Auth Failures + +## Status +- **Severity**: LOW +- **Category**: Reliability +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 616-637 + +## Description +Live query token expires in 2 seconds. Legitimate clients may not have sufficient time to establish WebSocket connection. + +## Exploit Scenario +Repeated auth failures causing service disruption. + +## Recommendation +Consider increasing expiration to 30 seconds while maintaining security. + +## References +- Related: SEC-042 (tls min version) \ No newline at end of file diff --git a/tasks/done/SEC-118-bind-vars-not-validated.md b/tasks/done/SEC-118-bind-vars-not-validated.md new file mode 100644 index 0000000..46ac15a --- /dev/null +++ b/tasks/done/SEC-118-bind-vars-not-validated.md @@ -0,0 +1,20 @@ +# SEC-118: Bind Variables Not Validated + +## Status +- **Severity**: MEDIUM +- **Category**: Input Validation +- **Project**: soli/db +- **File**: `src/sdbql/executor/expression.rs` +- **Lines**: 91-103 + +## Description +Bind variables (`@name`) passed in requests are not validated before use. + +## Exploit Scenario +Malicious bind variable values could exploit logic vulnerabilities in query execution. + +## Recommendation +Validate bind variable contents against expected types/schemas. + +## References +- Related: SEC-062 (bind values untyped), SEC-113 (sql translation no validation) \ No newline at end of file diff --git a/tasks/done/SEC-119-regex-dos-potential.md b/tasks/done/SEC-119-regex-dos-potential.md new file mode 100644 index 0000000..6f10dbd --- /dev/null +++ b/tasks/done/SEC-119-regex-dos-potential.md @@ -0,0 +1,20 @@ +# SEC-119: Regex DoS Potential + +## Status +- **Severity**: MEDIUM +- **Category**: DoS +- **Project**: soli/db +- **Files**: `src/sdbql/executor/utils.rs`, `src/sdbql/executor/helpers.rs` +- **Lines**: 16-29, 135-151 + +## Description +While `safe_regex` limits pattern length (1024 bytes) and compiled size (1MB), certain regex patterns like `(a+)+$` can cause time complexity attacks. + +## Exploit Scenario +Carefully crafted regex causes exponential backtracking. + +## Recommendation +Add complexity limits beyond size limits, consider using `heuristics` feature of regex crate. + +## References +- Related: SEC-105 (no rate limit query parsing) \ No newline at end of file diff --git a/tasks/done/SEC-120-repl-arbitrary-code.md b/tasks/done/SEC-120-repl-arbitrary-code.md new file mode 100644 index 0000000..9c8ad01 --- /dev/null +++ b/tasks/done/SEC-120-repl-arbitrary-code.md @@ -0,0 +1,20 @@ +# SEC-120: REPL Endpoint - Arbitrary Lua Code Execution + +## Status +- **Severity**: CRITICAL +- **Category**: Access Control +- **Project**: soli/db +- **File**: `src/server/script_handlers.rs` +- **Lines**: 469-545 + +## Description +The REPL endpoint executes arbitrary Lua code provided by authenticated users with full database API access via `ScriptEngine`. + +## Exploit Scenario +Compromised or malicious authenticated user executes harmful Lua code to read/modify/delete all data or execute system commands. + +## Recommendation +Restrict REPL endpoint access, consider requiring separate elevated permissions. + +## References +- Related: SEC-091 (permissive auth anonymous), SEC-077 (ssrf solidb fetch) \ No newline at end of file diff --git a/tasks/done/SEC-121-driver-protocol-unauthenticated.md b/tasks/done/SEC-121-driver-protocol-unauthenticated.md new file mode 100644 index 0000000..4ab5a35 --- /dev/null +++ b/tasks/done/SEC-121-driver-protocol-unauthenticated.md @@ -0,0 +1,27 @@ +# SEC-121: Native driver protocol skips authentication + +## Status +- **Severity**: CRITICAL +- **Category**: Authentication Bypass +- **Project**: soli/db +- **File**: `src/driver/handlers/mod.rs` +- **Lines**: 130-260 (`execute_command`) + +## Description +The MessagePack-based binary driver protocol exposes commands such as `Insert`, `Delete`, `CreateDatabase`, `DeleteDatabase`, and `Query`. The handler tracks an `authenticated_db` field on the connection state but never checks it before dispatching commands — the `Auth` command sets the field but no other command verifies it. + +Any client that speaks the magic header `solidb-drv-v1\0` on the multiplexed sync port can read and write the entire database without credentials. + +## Exploit Scenario +```text +client TCP-connects to sync port +client sends: solidb-drv-v1\0 +client sends: Command::Insert { db, collection, doc } (no Auth first) +server inserts the document +``` + +## Recommendation +Reject every non-`Ping`/non-`Auth` command if `self.authenticated_db.is_none()`. Treat the binary driver path with the same auth posture as the HTTP API. + +## References +- Related: SEC-081 (keyfile required for sync), SEC-108 (TOFC pattern). diff --git a/tasks/done/SEC-122-internal-blob-endpoints-unauth.md b/tasks/done/SEC-122-internal-blob-endpoints-unauth.md new file mode 100644 index 0000000..e30e836 --- /dev/null +++ b/tasks/done/SEC-122-internal-blob-endpoints-unauth.md @@ -0,0 +1,25 @@ +# SEC-122: `/_internal/blob/*` endpoints accept unauthenticated requests + +## Status +- **Severity**: CRITICAL +- **Category**: Authentication Bypass +- **Project**: soli/db +- **File**: `src/server/routes.rs`, `src/sync/blob_replication.rs` +- **Lines**: routes.rs:1031-1046; blob_replication.rs:51, 105 + +## Description +Three "internal" routes are mounted in the public router with no auth middleware, no cluster-secret check, and no origin validation: +- `POST /_internal/blob/replicate/{db}/{collection}/{key}` +- `POST /_internal/blob/upload/{db}/{collection}` (auto-creates blob collections) +- `GET /_internal/blob/replicate/{db}/{collection}/{key}/chunk/{idx}` + +The handlers accept attacker-controlled JSON metadata (the `_key` from the body is trusted, not the URL path) and write blob chunks to any database/collection — up to the 500 MB body limit per request. + +## Exploit Scenario +A remote attacker reads or overwrites blobs in any collection, exfiltrates chunks via `get_blob_chunk`, or fills disk with large multipart uploads. Combined with the auto-create behavior, the attacker can also create new collections with arbitrary names. + +## Recommendation +Gate all `/_internal/*` routes behind a cluster-secret middleware that requires a valid `X-Cluster-Secret` matching the keyfile, and refuses when the keyfile is empty (see SEC-123). Validate that the metadata `_key` matches the URL path. Verify per-chunk integrity (e.g., chunk hash supplied by the coordinator). + +## References +- Related: SEC-081, SEC-102, SEC-123. diff --git a/tasks/done/SEC-123-empty-cluster-secret-bypass.md b/tasks/done/SEC-123-empty-cluster-secret-bypass.md new file mode 100644 index 0000000..36721b6 --- /dev/null +++ b/tasks/done/SEC-123-empty-cluster-secret-bypass.md @@ -0,0 +1,33 @@ +# SEC-123: Empty cluster secret bypasses cluster admin endpoints + +## Status +- **Severity**: CRITICAL +- **Category**: Authentication Bypass +- **Project**: soli/db +- **File**: `src/server/handlers/cluster.rs`, `src/server/cluster_handlers.rs`, `src/server/handlers/sync.rs`, `src/server/auth.rs` +- **Lines**: handlers/cluster.rs:471, 524, 692, 773; cluster_handlers.rs:574, 630; auth.rs:819 + +## Description +Multiple cluster admin handlers use the pattern: +```rust +if !secret.is_empty() && !constant_time_eq(request_secret, secret) { + return Err(...); +} +``` +When `cluster_secret()` returns the empty string (no keyfile loaded), the bad-secret branch is **skipped entirely** — any caller is accepted. Affected handlers include `cluster_cleanup`, `cluster_reshard`, `cluster_blob_rebalance`, several `sync` admin routes, and the cluster-internal X-Cluster-Secret check in `auth_middleware`. + +Distinct from SEC-081 which fixed the inter-node sync transport: these are the HTTP control-plane endpoints. + +## Exploit Scenario +```http +POST /_api/cluster/reshard +X-Cluster-Secret: anything +{...} +``` +On a node started without a keyfile, this triggers reshard migrations (deleting documents from the source shard) without any auth. + +## Recommendation +Fail closed: when `secret.is_empty()`, reject the request with 503 ("cluster keyfile not configured"). Tie this to a `SOLIDB_REQUIRE_KEYFILE` mode that mirrors SEC-081. + +## References +- Related: SEC-081, SEC-085, SEC-100. diff --git a/tasks/done/SEC-124-jwt-no-roles-admin-autograny.md b/tasks/done/SEC-124-jwt-no-roles-admin-autograny.md new file mode 100644 index 0000000..8497c4f --- /dev/null +++ b/tasks/done/SEC-124-jwt-no-roles-admin-autograny.md @@ -0,0 +1,29 @@ +# SEC-124: Login JWTs lack roles, triggering global-admin auto-grant + +## Status +- **Severity**: CRITICAL +- **Category**: Privilege Escalation +- **Project**: soli/db +- **File**: `src/server/handlers/auth.rs`, `src/server/authorization.rs`, `src/server/auth.rs` +- **Lines**: handlers/auth.rs:351; authorization.rs:280-287; auth.rs:765-775 + +## Description +`login_handler` calls `AuthService::create_jwt(&user.username)` without populating roles, so the issued JWT has `roles: None`. `AuthorizationService::get_effective_permissions` then sees an empty role set and **inserts `Permission::global_admin()`** "for backward compatibility". + +The same outcome occurs for Basic auth via `get_user_roles`, which auto-grants admin when the `_admins` collection has a single document. + +Net effect: every successfully authenticating user — including a deliberately demoted viewer — receives a 24-hour admin JWT. + +## Exploit Scenario +1. Admin creates a `viewer` API key for a user. +2. User logs in with their password. +3. The returned JWT carries `roles: None`. +4. The user reaches `DELETE /_api/database/{db}` — the authorization layer sees no roles → inserts `global_admin` → request succeeds. + +## Recommendation +- In `login_handler`, populate roles via `create_jwt_with_roles(username, get_user_roles(...), scoped_databases)`. +- In `get_effective_permissions`, treat empty/missing roles as **no permissions**, not admin. +- Remove the "single admin auto-grant" in `get_user_roles` once a deliberate first-run setup flow exists. + +## References +- Related: SEC-091, SEC-106. diff --git a/tasks/done/SEC-125-queue-script-path-injection.md b/tasks/done/SEC-125-queue-script-path-injection.md new file mode 100644 index 0000000..d13da69 --- /dev/null +++ b/tasks/done/SEC-125-queue-script-path-injection.md @@ -0,0 +1,29 @@ +# SEC-125: SDBQL injection in queue worker via `script_path` + +## Status +- **Severity**: CRITICAL +- **Category**: Injection / Privilege Escalation +- **Project**: soli/db +- **File**: `src/queue/jobs.rs` +- **Lines**: 172-175 (worker FILTER), 201-207 (admin context), plus `src/queue/cron.rs` + +## Description +`execute_job` interpolates `job.script_path` directly into a SDBQL query as a single-quoted literal. The field is fully attacker-controlled (`EnqueueRequest.script`, `CreateCronJobRequest.script`) and never validated. + +Compounding this, queue and cron jobs always run with `ScriptUser { username: "_system", roles: ["admin"] }` (see SEC-139), so a successful injection executes attacker-controlled Lua as admin. + +## Exploit Scenario +```http +POST /queues/default/enqueue +{ "script": "x' OR true RETURN s LIMIT 1; //" } +``` +The worker's FILTER becomes `FILTER s.path == 'x' OR true RETURN s LIMIT 1; //'` — picks up an arbitrary `_scripts` document and runs it as admin. + +## Recommendation +- Use bind variables (`@script_path`) rather than string interpolation. +- Reject `script_path` values that fail a strict allowlist regex (e.g., `^[A-Za-z0-9_/\-.]+$`). +- Cap length to a sane bound. +- Apply the same validation in cron CREATE/UPDATE (see SEC-173). + +## References +- Related: SEC-118, SEC-139. diff --git a/tasks/done/SEC-126-rbac-missing-on-privileged-endpoints.md b/tasks/done/SEC-126-rbac-missing-on-privileged-endpoints.md new file mode 100644 index 0000000..2b7c6d2 --- /dev/null +++ b/tasks/done/SEC-126-rbac-missing-on-privileged-endpoints.md @@ -0,0 +1,24 @@ +# SEC-126: RBAC checks not enforced on privileged endpoints + +## Status +- **Severity**: HIGH +- **Category**: Authorization +- **Project**: soli/db +- **File**: `src/server/handlers/databases.rs`, `src/server/handlers/auth.rs`, `src/server/script_handlers.rs`, `src/server/cluster_handlers.rs`, others +- **Lines**: handlers/databases.rs:90-110 (delete_database); handlers/auth.rs:140, 215, 247 (API key CRUD); cluster_handlers.rs:514-561 (remove_node, rebalance); script_handlers.rs (script/service/trigger CRUD); transaction_handlers.rs + +## Description +Only `role_handlers.rs` calls `AuthorizationService::check_permission`. Every other privileged handler accepts any authenticated principal regardless of role. A `viewer` API key (with global_read only) can call `DELETE /_api/database/{db}`, create new admin API keys, install service scripts, or invoke `cluster_remove_node` / `cluster_rebalance`. + +## Exploit Scenario +A read-only API key reaches `POST /_api/auth/api-keys` with `{role: "admin"}` and receives a new admin key — full privilege escalation, no special trick needed once SEC-124 is also in play (and even without it, since the handlers don't check roles at all). + +## Recommendation +Add `AuthorizationService::check_permission(&claims, &state, action, scope)` at the top of every mutating handler. Particularly: +- DB/collection lifecycle: `Admin` on the database. +- API key CRUD, role/user CRUD: `Admin` global. +- Cluster ops: `Admin` global. +- Script/service/trigger CRUD: `Write` on target database (or `Admin`). + +## References +- Depends on SEC-124 to make role checks meaningful. diff --git a/tasks/done/SEC-127-repl-no-admin-check.md b/tasks/done/SEC-127-repl-no-admin-check.md new file mode 100644 index 0000000..3df0430 --- /dev/null +++ b/tasks/done/SEC-127-repl-no-admin-check.md @@ -0,0 +1,22 @@ +# SEC-127: REPL endpoint accepts any authenticated user + +## Status +- **Severity**: HIGH +- **Category**: Authorization +- **Project**: soli/db +- **File**: `src/server/script_handlers.rs` +- **Lines**: 468-471 (`repl_eval_handler`) + +## Description +`/_api/database/{db}/repl` runs arbitrary Lua against `state.storage` and ignores `claims` (`_claims` parameter). Any token holder — including a viewer-role API key or short-lived `livequery` token — can execute arbitrary Lua, effectively bypassing all RBAC. + +## Exploit Scenario +Viewer API key holder POSTs `solidb.delete_database("production")` to the REPL. + +## Recommendation +- Require admin role (or at minimum `Write` on the target database). +- Explicitly reject claims with `livequery == Some(true)`. +- Consider gating the REPL behind a feature flag in production builds. + +## References +- Related: SEC-120, SEC-124, SEC-126, SEC-129. diff --git a/tasks/done/SEC-128-cluster-status-ws-no-auth.md b/tasks/done/SEC-128-cluster-status-ws-no-auth.md new file mode 100644 index 0000000..a1a8e64 --- /dev/null +++ b/tasks/done/SEC-128-cluster-status-ws-no-auth.md @@ -0,0 +1,20 @@ +# SEC-128: `/_api/cluster/status/ws` exposes cluster info without auth + +## Status +- **Severity**: HIGH +- **Category**: Information Disclosure / Authentication Bypass +- **Project**: soli/db +- **File**: `src/server/routes.rs`, `src/server/handlers/websocket.rs` +- **Lines**: routes.rs:1048; websocket.rs:24-74 (`cluster_status_ws`) + +## Description +`cluster_status_ws` is mounted in the public router and never validates a token nor an `Origin`, unlike `monitor_ws_handler` and `ws_changefeed_handler` which both require auth. The handler streams node ID, version, peer addresses, doc counts, and disk paths every second to anyone who can connect. + +## Exploit Scenario +An unauthenticated remote attacker connects to `wss://target/_api/cluster/status/ws` and obtains a continuous feed of cluster topology and resource metrics — useful for reconnaissance and DoS targeting. + +## Recommendation +Mirror the pattern from `monitor_ws_handler`: require a valid JWT/API-key and call `validate_ws_origin` before upgrading. + +## References +- Related: SEC-086, SEC-091. diff --git a/tasks/done/SEC-129-livequery-claim-not-enforced.md b/tasks/done/SEC-129-livequery-claim-not-enforced.md new file mode 100644 index 0000000..8386f8f --- /dev/null +++ b/tasks/done/SEC-129-livequery-claim-not-enforced.md @@ -0,0 +1,20 @@ +# SEC-129: `livequery` claim is never enforced + +## Status +- **Severity**: HIGH +- **Category**: Authorization +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 150 (claim definition), 651 (token issuance), 665-674 (`validate_token`) + +## Description +The `/_api/livequery/token` endpoint mints JWTs with `livequery: Some(true)` and a 2-second TTL. `validate_token` returns the claims unchanged and downstream handlers never inspect `claims.livequery`. The 2-second TTL is a soft mitigation rather than a security boundary — within that window the token is accepted as a fully privileged JWT (and gains admin via SEC-124). + +## Exploit Scenario +A user requests a livequery token, then within 2 seconds calls `DELETE /_api/database/{db}` using that same token — request is accepted. + +## Recommendation +In `auth_middleware`, when `claims.livequery == Some(true)`, allow the request only when the path is on a strict whitelist (currently `/_api/ws/changefeed` and possibly `/_api/livequery/*`). Reject everywhere else with 403. + +## References +- Related: SEC-117, SEC-124. diff --git a/tasks/done/SEC-130-sdbql-parser-recursion-depth.md b/tasks/done/SEC-130-sdbql-parser-recursion-depth.md new file mode 100644 index 0000000..6a1c78b --- /dev/null +++ b/tasks/done/SEC-130-sdbql-parser-recursion-depth.md @@ -0,0 +1,20 @@ +# SEC-130: SDBQL parser has no recursion-depth limit + +## Status +- **Severity**: HIGH +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/sdbql/parser/mod.rs`, `src/sdbql/parser/expressions/precedence.rs`, `src/sdbql/parser/expressions/primary.rs` +- **Lines**: parser/mod.rs:69-273; precedence.rs (full); primary.rs:239-263 + +## Description +`parse_query`, `parse_parenthesized_expression`, `parse_unparenthesized_subquery`, and the precedence cascade recurse without bound on user input. + +## Exploit Scenario +A 100 KB query body of nested parentheses such as `((((((1))))))…` or nested `FOR`/subqueries crashes the server with stack overflow (Rust's default 8 MB thread stack triggers an abort, not a recoverable panic). + +## Recommendation +Add a `depth: usize` field on `Parser`, increment in each recursive descent call, and return `ParseError("Query nesting too deep")` once the depth exceeds e.g. 64. + +## References +- Related: SEC-094 (query timeout). diff --git a/tasks/done/SEC-131-range-eager-allocation.md b/tasks/done/SEC-131-range-eager-allocation.md new file mode 100644 index 0000000..4474b7b --- /dev/null +++ b/tasks/done/SEC-131-range-eager-allocation.md @@ -0,0 +1,24 @@ +# SEC-131: SDBQL range expansion eagerly allocates + +## Status +- **Severity**: HIGH +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/sdbql/executor/expression.rs` +- **Lines**: 306-310 + +## Description +Range expressions evaluate via `(start..=end).collect::>()`, materializing the entire range in memory before iteration. The SDBQL query timeout fires from `tokio::time::timeout`, which cannot interrupt a synchronous allocation running inside `spawn_blocking`. + +## Exploit Scenario +```sdbql +FOR i IN 0..1000000000 RETURN 1 +``` +Allocates ~16 GB before any timeout can fire — process is killed by the OOM-killer. + +## Recommendation +- Cap range size (reject `end - start > 10_000_000`, configurable). +- Stream via an iterator-backed source so the executor and FOR loop can short-circuit on timeout/limit. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-132-import-data-length-trusted.md b/tasks/done/SEC-132-import-data-length-trusted.md new file mode 100644 index 0000000..dc16984 --- /dev/null +++ b/tasks/done/SEC-132-import-data-length-trusted.md @@ -0,0 +1,24 @@ +# SEC-132: Import/restore trust attacker-supplied `_data_length` + +## Status +- **Severity**: HIGH +- **Category**: Denial of Service / Memory Exhaustion +- **Project**: soli/db +- **File**: `src/server/handlers/import_export.rs`, `src/bin/solidb-restore.rs` +- **Lines**: import_export.rs:304-328; solidb-restore.rs:301 + +## Description +The streaming import path reads a `_data_length: u64` field from the request body and casts it to `usize` to size buffers / drive a read loop. There is no upper bound. Within axum's 500 MB body limit, an attacker can declare `"_data_length": 9999999999999` and stall the connection while the server pins memory. The CLI restore path has the same pattern when reading malicious dump files. + +## Exploit Scenario +1. Authenticated user calls `POST /_api/import` with a JSONL header `{"_data_length": 9999999999999}`. +2. Server `Vec`s grow to that size or the read loop hangs awaiting bytes that never arrive. +3. Concurrent uploads multiply impact. + +## Recommendation +- Cap `_data_length` per chunk to a sane maximum (e.g. 16 MiB). +- Add a per-`stream.next()` timeout. +- Same fix in `solidb-restore` to harden against malicious dump files. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-133-upload-session-unbounded-chunks.md b/tasks/done/SEC-133-upload-session-unbounded-chunks.md new file mode 100644 index 0000000..1c320e3 --- /dev/null +++ b/tasks/done/SEC-133-upload-session-unbounded-chunks.md @@ -0,0 +1,26 @@ +# SEC-133: Upload session creation accepts unbounded `total_size` / `chunk_size` + +## Status +- **Severity**: HIGH +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/server/upload_session.rs` +- **Lines**: 53-69 + +## Description +`UploadSession` computes `total_chunks = total_size.div_ceil(chunk_size as u64) as u32` and allocates `vec![false; total_chunks as usize]`. Both `total_size` and `chunk_size` come from the request without validation. With `total_size = u64::MAX, chunk_size = 1`, a single call allocates ~4 GiB per session. + +## Exploit Scenario +```http +POST /_api/blob/{db}/{coll}/upload +{ "total_size": 18446744073709551615, "chunk_size": 1 } +``` +Repeated calls allocate gigabytes per session and exhaust memory. + +## Recommendation +- Cap `total_size` (e.g. 10 GiB). +- Require `chunk_size >= 64 KiB` (and a sensible upper bound). +- Reject upfront with 400 when bounds are violated. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-134-image-process-dimensions.md b/tasks/done/SEC-134-image-process-dimensions.md new file mode 100644 index 0000000..c304535 --- /dev/null +++ b/tasks/done/SEC-134-image-process-dimensions.md @@ -0,0 +1,25 @@ +# SEC-134: `solidb.image_process` accepts unchecked image dimensions + +## Status +- **Severity**: HIGH +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/scripting/file_handling.rs` +- **Lines**: 593, 599, 605 + +## Description +`img.resize_exact(w, h, Lanczos3)` is called with `width`/`height` taken directly from a Lua table. There is no clamp on the requested output size. + +## Exploit Scenario +```lua +solidb.image_process(small_blob, { resize = { width = 65535, height = 65535 } }) +``` +Allocates ~16 GiB for the output framebuffer, OOM-killing the server. + +## Recommendation +- Clamp dimensions (e.g. `≤ 8192` on each axis). +- Reject if `w * h * 4 > MAX_IMG_BYTES` (e.g. 64 MiB). +- Apply the same caps in any other `solidb.*` image entry point. + +## References +- Related: SEC-150 (decompression bomb). diff --git a/tasks/done/SEC-135-hmac-scope-handshake-only.md b/tasks/done/SEC-135-hmac-scope-handshake-only.md new file mode 100644 index 0000000..0b3eee1 --- /dev/null +++ b/tasks/done/SEC-135-hmac-scope-handshake-only.md @@ -0,0 +1,21 @@ +# SEC-135: HMAC authentication covers only the handshake + +## Status +- **Severity**: HIGH +- **Category**: Cryptographic / Network +- **Project**: soli/db +- **File**: `src/sync/transport.rs`, `src/sync/worker.rs` +- **Lines**: transport.rs:438-599; worker.rs (post-handshake message handling) + +## Description +`authenticate_standalone` validates an HMAC at handshake time. After auth succeeds, every subsequent `SyncMessage` (`SyncBatch`, `IncrementalSyncRequest`, `FullSync*`, `Heartbeat`) flows over **plain TCP with zero per-message authentication**. There is no AEAD wrap, no per-message HMAC, no sequence-number binding. + +## Exploit Scenario +A man-in-the-middle on the post-handshake stream (or any attacker with TCP access in the absence of TLS — see SEC-080) can inject arbitrary `SyncBatch`/`FullSync` payloads, poisoning the replication log. Pre-recorded sessions can also be replayed. + +## Recommendation +- Wrap the post-handshake stream in an AEAD channel (e.g. Noise XK) keyed from the HMAC handshake. +- Or apply per-message HMAC over `(seq_counter || msg_bytes)` with a session-level monotonic counter. + +## References +- Related: SEC-080, SEC-083, SEC-088. diff --git a/tasks/done/SEC-136-origin-node-trusted-blindly.md b/tasks/done/SEC-136-origin-node-trusted-blindly.md new file mode 100644 index 0000000..866ecaa --- /dev/null +++ b/tasks/done/SEC-136-origin-node-trusted-blindly.md @@ -0,0 +1,22 @@ +# SEC-136: `origin_node` / `origin_sequence` trusted from the wire + +## Status +- **Severity**: HIGH +- **Category**: Replication Integrity +- **Project**: soli/db +- **File**: `src/sync/worker.rs`, `src/cluster/manager.rs` +- **Lines**: worker.rs:582, 919; manager.rs:312-326 + +## Description +Replication entries arrive with `origin_node` and `origin_sequence` fields that the worker uses for de-duplication. The receiving node never checks that `origin_node` matches the authenticated peer identity — a connected peer can emit entries claiming to originate from any node, advancing that node's high-watermark. + +## Exploit Scenario +Peer A is authenticated via the cluster keyfile but emits `SyncEntry { origin_node: "B", origin_sequence: N+1, ... }`. Other nodes record N+1 as the high watermark for B. When B's real entry N+1 arrives, it is dropped as duplicate — silent data loss / hiding. + +## Recommendation +- Bind authenticated peer identity to the connection. +- Reject `SyncEntry` whose `origin_node` ≠ connection peer ID, unless the message is a fan-out within the origin's HLC chain (validate the chain). +- Log + ban-list peers that violate this invariant. + +## References +- Related: SEC-080, SEC-110, SEC-135. diff --git a/tasks/done/SEC-137-join-request-unverified-node-id.md b/tasks/done/SEC-137-join-request-unverified-node-id.md new file mode 100644 index 0000000..a9daefe --- /dev/null +++ b/tasks/done/SEC-137-join-request-unverified-node-id.md @@ -0,0 +1,25 @@ +# SEC-137: `JoinRequest` accepts unverified node identity + +## Status +- **Severity**: HIGH +- **Category**: Authentication / Cluster Integrity +- **Project**: soli/db +- **File**: `src/cluster/manager.rs` +- **Lines**: 228-242 + +## Description +`JoinRequest` is accepted with whatever `node.id` and `node.address` the caller asserts. Any TCP-reachable party can claim to be node `victim` at address `victim.example.com`, get registered into cluster metadata, and start receiving shard ops/heartbeats. + +## Exploit Scenario +1. Attacker connects to a cluster member. +2. Sends `JoinRequest { node: { id: "node-3", address: "evil.example.com" } }`. +3. Cluster updates membership; subsequent shard ops route through the attacker. +4. Combined with HLC manipulation (SEC-138), causes split-brain. + +## Recommendation +- Tie cluster transport to the keyfile-authenticated identity (per SEC-080 architecture). +- Require `node.id` to match the authenticated peer principal. +- Optionally require operator approval for new node IDs. + +## References +- Related: SEC-080, SEC-108, SEC-110. diff --git a/tasks/done/SEC-138-hlc-skew-unbounded.md b/tasks/done/SEC-138-hlc-skew-unbounded.md new file mode 100644 index 0000000..5f505a2 --- /dev/null +++ b/tasks/done/SEC-138-hlc-skew-unbounded.md @@ -0,0 +1,21 @@ +# SEC-138: HLC accepts unbounded clock skew from peers + +## Status +- **Severity**: HIGH +- **Category**: Cluster Integrity / DoS +- **Project**: soli/db +- **File**: `src/cluster/hlc.rs` +- **Lines**: 61-90, 195-228 (`HlcGenerator::receive`) + +## Description +`HlcGenerator::receive` adopts any remote `physical_time` larger than the local clock. A single hostile or buggy peer sending `physical_time = u64::MAX - 1` permanently wedges every node's clock, breaking ordering on all future writes. + +## Exploit Scenario +A compromised peer (or an attacker who exploits SEC-137 to forge a node identity) sends a message with `physical_time = u64::MAX - 1`. All receiving nodes update their clocks. Future writes appear to be from the year 5×10^8 — replication ordering is permanently broken cluster-wide. + +## Recommendation +- Reject (or clamp) remote HLC physical times exceeding `now_ms + MAX_SKEW` (e.g. 60 s). +- Log peers that send out-of-bounds timestamps and circuit-break the connection. + +## References +- Related: SEC-080, SEC-110, SEC-136. diff --git a/tasks/done/SEC-139-queue-cron-runs-as-admin.md b/tasks/done/SEC-139-queue-cron-runs-as-admin.md new file mode 100644 index 0000000..7cb82c6 --- /dev/null +++ b/tasks/done/SEC-139-queue-cron-runs-as-admin.md @@ -0,0 +1,22 @@ +# SEC-139: Queue and cron jobs always run as `_system` admin + +## Status +- **Severity**: HIGH +- **Category**: Privilege Escalation +- **Project**: soli/db +- **File**: `src/queue/jobs.rs`, `src/queue/cron.rs` +- **Lines**: jobs.rs:201-207; cron.rs (job dispatch) + +## Description +Every queue/cron job, regardless of who enqueued it, executes with `ScriptUser { username: "_system", roles: ["admin"] }`. There is no record of the principal that scheduled the job. + +## Exploit Scenario +Any user with enqueue permission targets a privileged service script. The job runs as admin, bypassing collection ACLs the script normally honors via `solidb.auth`. Combined with SEC-125, this becomes RCE-as-admin for any authenticated user. + +## Recommendation +- Persist `enqueued_by` claims at enqueue time (`Job.user`, `CronJob.user`). +- When dispatching, construct `ScriptUser` from those persisted claims (not a static `_system`). +- Require admin role to enqueue jobs targeting admin-only scripts. + +## References +- Related: SEC-125, SEC-127. diff --git a/tasks/done/SEC-140-transaction-cleanup-dead-code.md b/tasks/done/SEC-140-transaction-cleanup-dead-code.md new file mode 100644 index 0000000..a88a174 --- /dev/null +++ b/tasks/done/SEC-140-transaction-cleanup-dead-code.md @@ -0,0 +1,22 @@ +# SEC-140: `TransactionManager::cleanup_expired` is never called + +## Status +- **Severity**: HIGH +- **Category**: Denial of Service / Resource Leak +- **Project**: soli/db +- **File**: `src/transaction/manager.rs` +- **Lines**: 240-266 (`cleanup_expired`) + +## Description +No caller invokes `cleanup_expired()` anywhere in the binary. The default 300 s transaction timeout is documented but never enforced. Long-running transactions accumulate row locks via the lock manager and a `Transaction` Arc forever. + +## Exploit Scenario +An authenticated user repeatedly opens transactions without committing or rolling back. Each holds row locks indefinitely, blocking writers on the same keys. Memory grows unbounded. + +## Recommendation +- Spawn a tokio interval task at server boot calling `tx_manager.cleanup_expired()` every 30 s. +- Bound `active_transactions.len()` per principal (e.g., 16 concurrent). +- Emit metrics for active transactions and aged-out expirations. + +## References +- Related: SEC-096, SEC-099. diff --git a/tasks/done/SEC-141-transactionid-nanos-collision.md b/tasks/done/SEC-141-transactionid-nanos-collision.md new file mode 100644 index 0000000..1e7fd47 --- /dev/null +++ b/tasks/done/SEC-141-transactionid-nanos-collision.md @@ -0,0 +1,21 @@ +# SEC-141: `TransactionId` collisions under high concurrency + +## Status +- **Severity**: HIGH +- **Category**: Data Integrity +- **Project**: soli/db +- **File**: `src/transaction/mod.rs`, `src/transaction/manager.rs` +- **Lines**: mod.rs:17-24 (`TransactionId::new`); manager.rs:54 (`active_transactions` insert) + +## Description +`TransactionId::new()` truncates `SystemTime::now().duration_since(UNIX_EPOCH).as_nanos()` to `u64` and uses it both as the unique ID and the MVCC `read_timestamp`. Two concurrent BEGINs within the same nanosecond collide. The receiving `HashMap` then **silently overwrites** one transaction with the other. + +## Exploit Scenario +Under burst load, two concurrent BEGINs receive the same ID. The second insert clobbers the first. Commit/rollback of the surviving ID releases the wrong locks; the lost transaction's WAL `BEGIN` record is orphaned, breaking recovery invariants. + +## Recommendation +- Use a monotonic atomic counter (`AtomicU64::fetch_add`) for the ID, seeded once at startup from system time. +- Keep `read_timestamp` separate (e.g., HLC tick) for MVCC. + +## References +- Related: SEC-096, SEC-099. diff --git a/tasks/done/SEC-142-keyfile-reread-and-path-leak.md b/tasks/done/SEC-142-keyfile-reread-and-path-leak.md new file mode 100644 index 0000000..001186a --- /dev/null +++ b/tasks/done/SEC-142-keyfile-reread-and-path-leak.md @@ -0,0 +1,26 @@ +# SEC-142: Keyfile re-read on every HMAC and path leaked in error + +## Status +- **Severity**: HIGH +- **Category**: Cryptographic / Information Disclosure +- **Project**: soli/db +- **File**: `src/sync/transport.rs` +- **Lines**: 243, 601-616 (`compute_hmac_with_timestamp`) + +## Description +Two issues in one helper: +1. The keyfile is re-read from disk on every HMAC computation. This creates a TOCTOU window where an attacker who can swap the keyfile mid-handshake influences which key the server uses. +2. The error string `"Failed to read keyfile {path}: {err}"` is included in the wire response, leaking the absolute path of the keyfile to the peer. + +## Exploit Scenario +1. Operator rotates the keyfile via in-place write. +2. A handshake in flight reads partial contents and accepts a forged HMAC. +3. A failed handshake reveals `/etc/solidb/cluster.key` to the connecting peer. + +## Recommendation +- Load the keyfile once at startup into `Arc>>`. +- Return a generic `"keyfile read failed"` to peers; log the detailed message locally only. +- Verify file mode is `0600` and refuse to start otherwise. + +## References +- Related: SEC-081, SEC-088. diff --git a/tasks/done/SEC-143-basic-auth-cache-stale.md b/tasks/done/SEC-143-basic-auth-cache-stale.md new file mode 100644 index 0000000..25ade5c --- /dev/null +++ b/tasks/done/SEC-143-basic-auth-cache-stale.md @@ -0,0 +1,22 @@ +# SEC-143: Basic-auth cache not invalidated on password change / user delete + +## Status +- **Severity**: MEDIUM +- **Category**: Authentication +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 885-921 (Basic-auth cache lookup/insertion) + +## Description +Successful Basic auth is cached for 60 s keyed on `username:SipHash(credentials)`. After `change_password_handler` updates the hash, or `delete_user` removes the user, the old credentials still authenticate for up to 60 s. + +## Exploit Scenario +- A password is briefly leaked, then rotated. The attacker gets a 60 s grace window past the rotation. +- An admin compromise is detected and the account deleted; the deleted account remains usable for 60 s. + +## Recommendation +- On password change / user delete / role revoke, purge entries with the matching `username:` prefix from `BASIC_AUTH_CACHE`. +- Same purge for `permission_cache` if present. + +## References +- Related: SEC-091. diff --git a/tasks/done/SEC-144-username-enumeration-timing.md b/tasks/done/SEC-144-username-enumeration-timing.md new file mode 100644 index 0000000..cde2ac0 --- /dev/null +++ b/tasks/done/SEC-144-username-enumeration-timing.md @@ -0,0 +1,20 @@ +# SEC-144: Username enumeration via login-handler timing + +## Status +- **Severity**: MEDIUM +- **Category**: Information Disclosure +- **Project**: soli/db +- **File**: `src/server/handlers/auth.rs` +- **Lines**: 329-348 + +## Description +On unknown user, `login_handler` returns "Invalid credentials" without running Argon2 verification. On a valid username with the wrong password, Argon2 runs (~50–200 ms). The timing difference reliably reveals which usernames exist. + +## Exploit Scenario +An attacker scripts logins with a wordlist of usernames and any password, measuring response times. Usernames with high-latency responses are valid accounts → focused brute-forcing. + +## Recommendation +When the user lookup fails, run a dummy `verify_password(&req.password, DUMMY_HASH)` against a precomputed Argon2 hash to equalize timing. Use a constant dummy hash baked at startup. + +## References +- Related: SEC-093. diff --git a/tasks/done/SEC-145-change-password-no-strength-check.md b/tasks/done/SEC-145-change-password-no-strength-check.md new file mode 100644 index 0000000..c322e79 --- /dev/null +++ b/tasks/done/SEC-145-change-password-no-strength-check.md @@ -0,0 +1,19 @@ +# SEC-145: `change_password_handler` accepts arbitrarily weak passwords + +## Status +- **Severity**: MEDIUM +- **Category**: Authentication / Input Validation +- **Project**: soli/db +- **File**: `src/server/handlers/auth.rs` +- **Lines**: 70-137 + +## Description +The change-password endpoint stores any string supplied as the new password — no length, complexity, or dictionary check. An admin compromise (or, currently, any logged-in user thanks to SEC-124) can demote the account to a 1-character password and propagate it through the replication log. + +## Recommendation +- Enforce a minimum length (e.g., 12 characters). +- Reject obvious dictionary passwords (e.g., a small bundled blocklist). +- Optionally enforce zxcvbn-style scoring for higher-tier roles. + +## References +- Related: SEC-091, SEC-106. diff --git a/tasks/done/SEC-146-xff-trusted-unconditionally.md b/tasks/done/SEC-146-xff-trusted-unconditionally.md new file mode 100644 index 0000000..e8d83f4 --- /dev/null +++ b/tasks/done/SEC-146-xff-trusted-unconditionally.md @@ -0,0 +1,21 @@ +# SEC-146: `X-Forwarded-For` trusted unconditionally for rate limiting + +## Status +- **Severity**: MEDIUM +- **Category**: Rate Limiting Bypass +- **Project**: soli/db +- **File**: `src/server/handlers/auth.rs` +- **Lines**: 283-294 + +## Description +`login_handler` derives the rate-limit key from the `X-Forwarded-For` header without any proxy allowlist. Any direct caller can set `X-Forwarded-For: ` per request and reset the per-IP counter, defeating the 20-attempts/60-s login rate limit. + +## Exploit Scenario +A brute-forcing client iterates through random `X-Forwarded-For` values, never tripping the 20-attempt threshold for any single key. + +## Recommendation +- Only honor `X-Forwarded-For` when the connection arrives from a configured trusted-proxy CIDR (`SOLIDB_TRUSTED_PROXIES`). +- Otherwise, key the rate limit on the socket peer address. + +## References +- Related: SEC-105, SEC-092. diff --git a/tasks/done/SEC-147-batch-size-unbounded.md b/tasks/done/SEC-147-batch-size-unbounded.md new file mode 100644 index 0000000..3816524 --- /dev/null +++ b/tasks/done/SEC-147-batch-size-unbounded.md @@ -0,0 +1,17 @@ +# SEC-147: Cursor `batch_size` is unbounded + +## Status +- **Severity**: MEDIUM +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/server/handlers/query.rs` +- **Lines**: 34-46, 541, 665 + +## Description +The `batch_size` field of `ExecuteQueryRequest` is deserialized as a plain `usize` with no upper cap. With `batch_size = usize::MAX`, `store_and_get_first_batch` eagerly drains the iterator into `first_batch: Vec` and skips cursor storage entirely, defeating pagination's memory amortization. + +## Recommendation +Clamp on the way in: `let batch_size = req.batch_size.unwrap_or(DEFAULT).min(MAX_BATCH);` with `MAX_BATCH` around 10 000. Document the default and ceiling. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-148-limit-offset-overflow.md b/tasks/done/SEC-148-limit-offset-overflow.md new file mode 100644 index 0000000..bc274b2 --- /dev/null +++ b/tasks/done/SEC-148-limit-offset-overflow.md @@ -0,0 +1,17 @@ +# SEC-148: `LIMIT offset + count` overflow in executor + +## Status +- **Severity**: MEDIUM +- **Category**: Logic / DoS +- **Project**: soli/db +- **File**: `src/sdbql/executor/execution/entry.rs` +- **Lines**: 121 + +## Description +`limit_offset + limit_count` can wrap silently in release builds when both come from large bind variables, producing an effective `Some(0)` and causing a full collection scan (or unexpected behavior). + +## Recommendation +Use `checked_add`; on overflow, return a query error rather than silently scanning everything. + +## References +- Related: SEC-094, SEC-118. diff --git a/tasks/done/SEC-149-explain-no-timeout.md b/tasks/done/SEC-149-explain-no-timeout.md new file mode 100644 index 0000000..36d7fea --- /dev/null +++ b/tasks/done/SEC-149-explain-no-timeout.md @@ -0,0 +1,17 @@ +# SEC-149: `explain_query` has no timeout / `spawn_blocking` + +## Status +- **Severity**: MEDIUM +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/server/handlers/query.rs` +- **Lines**: 683-711 + +## Description +Unlike `execute_query`, `explain_query` does not wrap the planner call in `tokio::time::timeout` or `spawn_blocking`. A query designed to do deep planner analysis can pin an async runtime thread. + +## Recommendation +Mirror the pattern from `execute_query`: run the planner inside `spawn_blocking`, wrapped in a `tokio::time::timeout(QUERY_TIMEOUT_SECS, ...)`. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-150-image-decompression-bomb.md b/tasks/done/SEC-150-image-decompression-bomb.md new file mode 100644 index 0000000..d9de72e --- /dev/null +++ b/tasks/done/SEC-150-image-decompression-bomb.md @@ -0,0 +1,20 @@ +# SEC-150: `image::load_from_memory` runs without decompression-bomb limits + +## Status +- **Severity**: MEDIUM +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/scripting/file_handling.rs` +- **Lines**: 255, 581 + +## Description +Image loading is performed via `image::load_from_memory(&bytes)` with no `Limits`. A small compressed input can describe a huge canvas (PNG, WebP, etc.), allocating gigabytes during decode. + +## Exploit Scenario +A few-KB PNG declares dimensions of `2^15 × 2^15`. Decoding allocates ~4 GiB before the script can even use the result. + +## Recommendation +Use `image::io::Reader::new(...).with_guessed_format()?.limits(Limits { max_alloc: Some(64 * 1024 * 1024), max_image_width: Some(8192), max_image_height: Some(8192) }).decode()`. + +## References +- Related: SEC-134. diff --git a/tasks/done/SEC-151-fuse-pid-kill-no-name-check.md b/tasks/done/SEC-151-fuse-pid-kill-no-name-check.md new file mode 100644 index 0000000..18e5084 --- /dev/null +++ b/tasks/done/SEC-151-fuse-pid-kill-no-name-check.md @@ -0,0 +1,20 @@ +# SEC-151: `solidb-fuse` PID-file kill skips process-name verification + +## Status +- **Severity**: MEDIUM +- **Category**: Local Privilege Escalation +- **Project**: soli/db +- **File**: `src/bin/solidb-fuse.rs` +- **Lines**: 789-803 + +## Description +`solidb-fuse` reads a PID from `--pid-file` (default `./solidb-fuse.pid`, attacker-writable when CWD is shared) and sends `SIGTERM`. Unlike `src/main.rs:107-117`, it does **not** verify the target process name matches `solidb-fuse` (or `solidb`). + +## Exploit Scenario +A local attacker writes a target PID into `./solidb-fuse.pid` before a legitimate restart. The next `solidb-fuse --stop` (or restart logic) kills the unrelated victim process. + +## Recommendation +Mirror the validation from `src/main.rs:107-117`: use `sysinfo` (or `/proc//comm`) to verify the process name before sending the signal. + +## References +- Related: SEC-098. diff --git a/tasks/done/SEC-152-ws-no-token-revalidation-no-idle-timeout.md b/tasks/done/SEC-152-ws-no-token-revalidation-no-idle-timeout.md new file mode 100644 index 0000000..a6dcbd1 --- /dev/null +++ b/tasks/done/SEC-152-ws-no-token-revalidation-no-idle-timeout.md @@ -0,0 +1,23 @@ +# SEC-152: WebSocket sessions never re-validate auth and have no idle timeout + +## Status +- **Severity**: MEDIUM +- **Category**: Authentication / Resource Exhaustion +- **Project**: soli/db +- **File**: `src/server/handlers/websocket.rs` +- **Lines**: 254-365 (`handle_socket`); `monitor_ws_handler` + +## Description +Token validation runs once at upgrade. If the JWT later expires or the API key is revoked, the connection keeps streaming. There is also no max session length and no pong-timeout (only a 30-s server ping with no liveness enforcement). + +## Exploit Scenario +- A user is removed; their open WS keeps receiving live data until a process restart. +- An attacker holds many connections idle to exhaust file descriptors / memory. + +## Recommendation +- Re-validate the token periodically (every 5–15 min) and close on revocation or expiry. +- Track last-pong; close the connection if no pong arrives within `2 * ping_interval`. +- Enforce a maximum session age (e.g., 24 h). + +## References +- Related: SEC-117, SEC-129. diff --git a/tasks/done/SEC-153-no-timeout-or-header-limit.md b/tasks/done/SEC-153-no-timeout-or-header-limit.md new file mode 100644 index 0000000..7e78686 --- /dev/null +++ b/tasks/done/SEC-153-no-timeout-or-header-limit.md @@ -0,0 +1,19 @@ +# SEC-153: HTTP server lacks per-request timeout and header-size limit + +## Status +- **Severity**: MEDIUM +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/server/routes.rs` +- **Lines**: 1098-1100 (router build, layer chain) + +## Description +The router has no `tower_http::timeout::TimeoutLayer`, no slow-loris protection, and no explicit max request-header size beyond axum/hyper defaults. Body limit is set, but a slow body or oversize headers can hold worker threads. + +## Recommendation +- Add `TimeoutLayer::new(Duration::from_secs(30))` (or per-route variants for long-poll endpoints). +- Add a header bytes cap via `axum::extract::DefaultBodyLimit::max(...)` companion or `tower_http::limit::RequestBodyLimitLayer` plus `hyper::server::conn::http1::Builder::max_buf_size`. +- Document the limits. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-154-sync-framing-inconsistencies.md b/tasks/done/SEC-154-sync-framing-inconsistencies.md new file mode 100644 index 0000000..dd29479 --- /dev/null +++ b/tasks/done/SEC-154-sync-framing-inconsistencies.md @@ -0,0 +1,21 @@ +# SEC-154: Sync transport mixes framing styles and silently drops on parse error + +## Status +- **Severity**: MEDIUM +- **Category**: Network Protocol Robustness +- **Project**: soli/db +- **File**: `src/sync/worker.rs` +- **Lines**: 1148-1389 (inbound connection handler), 1186 (`lz4_flex::decompress_size_prepended(...).unwrap_or_default()`) + +## Description +The inbound handler reframes messages itself with mixed framing rules: `IncrementalSyncRequest` uses `[compressed_byte][len: u32][bytes]`, while `FullSync*` responses use only `[len: u32][bytes]`. A peer that interleaves the two desynchronizes the parser. Parse errors and oversize messages cause a silent `break`, and `lz4` decompression failures fall back to an empty buffer with no log. + +## Exploit Scenario +A malicious or buggy peer sends frames that toggle desync, forcing other nodes to reconnect repeatedly. Each reconnect triggers a full-sync, amplifying load. + +## Recommendation +- Unify all framing through `ConnectionPool::write_message` / `read_message`. +- On parse / decompression failure, log the peer ID, increment a per-peer error counter, and apply exponential ban-listing. + +## References +- Related: SEC-110, SEC-135. diff --git a/tasks/done/SEC-155-scatter-gather-amplification.md b/tasks/done/SEC-155-scatter-gather-amplification.md new file mode 100644 index 0000000..d069839 --- /dev/null +++ b/tasks/done/SEC-155-scatter-gather-amplification.md @@ -0,0 +1,23 @@ +# SEC-155: Scatter-gather coordinator amplifies user requests across the cluster + +## Status +- **Severity**: MEDIUM +- **Category**: Denial of Service / Authorization +- **Project**: soli/db +- **File**: `src/sharding/coordinator.rs` +- **Lines**: ~1885+ (`upsert_batch_to_shards`), 1182-1212 (`_copy_shard`) + +## Description +A single client write fans out to N shards × R replicas via internal HTTP using `X-Shard-Direct`. The coordinator never re-checks whether the originating user has rights on the *target physical* shard collection — only on the logical one. There is no per-request fan-out budget. + +## Exploit Scenario +- An authenticated user with read-only access on `users` triggers writes that fan out across the cluster, generating O(N×R) outbound HTTP requests. +- Combined with SEC-109 (still-incomplete shard-key validation), the user may even land writes on shards they shouldn't. + +## Recommendation +- Apply a per-request fan-out budget (configurable max). +- On the receiving node, recheck authorization against the **logical** collection name, not just the cluster secret. +- Surface fan-out metrics for capacity planning. + +## References +- Related: SEC-100, SEC-109, SEC-111. diff --git a/tasks/done/SEC-156-lua-sandbox-no-memory-or-interrupt.md b/tasks/done/SEC-156-lua-sandbox-no-memory-or-interrupt.md new file mode 100644 index 0000000..acda8ba --- /dev/null +++ b/tasks/done/SEC-156-lua-sandbox-no-memory-or-interrupt.md @@ -0,0 +1,28 @@ +# SEC-156: Lua sandbox has no memory limit and no interrupt + +## Status +- **Severity**: MEDIUM +- **Category**: Denial of Service +- **Project**: soli/db +- **File**: `src/scripting/engine/repl.rs`, `src/scripting/engine/websocket.rs`, `src/scripting/engine/mod.rs` +- **Lines**: repl.rs:36; websocket.rs:66; mod.rs:225 + +## Description +All entry points construct the Lua state via `Lua::new()` (full stdlib), then nil out specific globals (`os, io, debug, package, dofile, load, loadfile, require`). This is a substantive defense-in-depth gap because: +- Future mlua additions will silently land as accessible globals. +- `string.dump` remains usable. +- `collectgarbage("setpause", 0)` / `("setstepmul", huge)` is still callable and lets a script trash GC. +- There is **no `lua.set_memory_limit`** and **no `lua.set_interrupt`** for CPU bounds. + +## Exploit Scenario +- `local s = string.rep("a", 2^28)` allocates 256 MB per request; pool reset retains the state, compounding across requests. +- `while true do end` in a sync-evaluation path hangs a worker thread forever. + +## Recommendation +- Replace `Lua::new()` with `Lua::new_with(StdLib::MATH | STRING | TABLE | OS_DATETIME | ...)` containing only required libraries. +- Call `lua.set_memory_limit(64 * 1024 * 1024)` per state. +- Call `lua.set_interrupt(...)` with a deadline derived from per-script CPU budget. +- Restrict `collectgarbage` arguments via a wrapper. + +## References +- Related: SEC-120. diff --git a/tasks/done/SEC-157-lua-pool-metatable-leak.md b/tasks/done/SEC-157-lua-pool-metatable-leak.md new file mode 100644 index 0000000..f754905 --- /dev/null +++ b/tasks/done/SEC-157-lua-pool-metatable-leak.md @@ -0,0 +1,28 @@ +# SEC-157: Lua pool reset preserves user-set metatables across tenants + +## Status +- **Severity**: MEDIUM +- **Category**: Multi-tenant Isolation +- **Project**: soli/db +- **File**: `src/scripting/engine/pool.rs` +- **Lines**: 566-642 (`reset_state`) + +## Description +`reset_state` clears non-preserved keys from `globals()` but does not restore metatables on built-in primitive types. A previous request can do: +```lua +getmetatable("").__index = function(s, k) + return rawget(string, k) or some_logger(s) +end +``` +The next request reusing the same pool slot inherits this injected metatable — its `s:sub(...)` calls leak data to the previous tenant's logger. + +## Exploit Scenario +Tenant A monkey-patches `string` metatable to exfiltrate strings; tenant B's script (same pool slot) calls a string method and the contents flow to A's webhook. + +## Recommendation +- On reset, call `setmetatable(getmetatable(""), nil)` for each base type. +- Or recreate the Lua state when metatable mutation is detected. +- Best: switch to ephemeral states for cross-tenant requests. + +## References +- Related: SEC-156. diff --git a/tasks/done/SEC-158-jwt-secret-ephemeral-default.md b/tasks/done/SEC-158-jwt-secret-ephemeral-default.md new file mode 100644 index 0000000..4eea3c2 --- /dev/null +++ b/tasks/done/SEC-158-jwt-secret-ephemeral-default.md @@ -0,0 +1,18 @@ +# SEC-158: JWT secret defaults to an ephemeral random value + +## Status +- **Severity**: MEDIUM +- **Category**: Authentication / Configuration +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 115-143 (`JWT_SECRET` initialization) + +## Description +When `JWT_SECRET` is not set, the server generates a fresh random value at startup. This silently invalidates all tokens on every restart (operator surprise) and, more importantly, fails open in production deployments that forget the env var: the server keeps serving with an unaudited secret while only emitting a `tracing::warn!`. + +## Recommendation +- Refuse to start in production mode unless `JWT_SECRET` is configured. Detect production by either an explicit `SOLIDB_ENV=production` or by the presence of a release-build conditional. +- In dev mode, persist the generated secret to `data_dir/.jwt_secret` (mode 0600) so restarts don't break clients. + +## References +- Related: SEC-106. diff --git a/tasks/done/SEC-159-argon2-default-params.md b/tasks/done/SEC-159-argon2-default-params.md new file mode 100644 index 0000000..bfe1584 --- /dev/null +++ b/tasks/done/SEC-159-argon2-default-params.md @@ -0,0 +1,17 @@ +# SEC-159: Argon2 password hashes use library-default parameters + +## Status +- **Severity**: MEDIUM +- **Category**: Cryptographic +- **Project**: soli/db +- **File**: `src/server/auth.rs`, `src/scripting/lua_globals/crypto.rs` +- **Lines**: auth.rs:279, 590, 598; crypto.rs:224, 246 + +## Description +Calls use `Argon2::default()`, which selects the argon2 crate's defaults (m=19 MiB, t=2, p=1). OWASP's 2024 password-hash guidance treats these as a floor, recommending higher memory/time costs for high-value accounts. For an admin password store this is borderline; raising parameters now buys a year+ of margin against GPU/ASIC progress. + +## Recommendation +Use explicit `Params::new(65536, 3, 4, None).unwrap()` (m=64 MiB, t=3, p=4) at hash sites that handle `_admins`. Keep the `Argon2::default()` for low-value Lua use if needed. + +## References +- Related: SEC-093, SEC-106. diff --git a/tasks/done/SEC-160-thread-rng-postfork.md b/tasks/done/SEC-160-thread-rng-postfork.md new file mode 100644 index 0000000..bf413ed --- /dev/null +++ b/tasks/done/SEC-160-thread-rng-postfork.md @@ -0,0 +1,17 @@ +# SEC-160: Cluster auth uses `thread_rng` rather than `OsRng` + +## Status +- **Severity**: MEDIUM +- **Category**: Cryptographic +- **Project**: soli/db +- **File**: `src/sync/transport.rs` +- **Lines**: 531, 536 (challenge + nonce generation) + +## Description +`rand::thread_rng()` is ChaCha-based and currently cryptographically adequate, but it is seeded once and shares state across calls. If a daemon ever forks (see `daemon.rs`), child processes can share PRNG state with the parent, leading to nonce reuse. `OsRng` avoids this by going to the OS RNG every call. + +## Recommendation +Switch the 32-byte challenge and 16-byte nonce generation to `rand::rngs::OsRng`, matching the pattern already used in `auth.rs`. + +## References +- Related: SEC-083, SEC-088. diff --git a/tasks/done/SEC-161-apikey-expires-at-silent-ignore.md b/tasks/done/SEC-161-apikey-expires-at-silent-ignore.md new file mode 100644 index 0000000..cff6609 --- /dev/null +++ b/tasks/done/SEC-161-apikey-expires-at-silent-ignore.md @@ -0,0 +1,17 @@ +# SEC-161: `ApiKey.expires_at` parse failure silently treated as never-expiring + +## Status +- **Severity**: LOW +- **Category**: Authorization / Configuration +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 717-723 + +## Description +The `expires_at` field is parsed via `if let Ok(_) = OffsetDateTime::parse(...)`. When parsing fails (operator typo, schema drift), the inner block is simply skipped and the key is treated as never-expiring. + +## Recommendation +Treat unparseable `expires_at` as expired (fail closed) and log the corruption. Add a startup linter that scans `_api_keys` for malformed dates and refuses to serve them. + +## References +- Related: SEC-106. diff --git a/tasks/done/SEC-162-validate-token-required-claims.md b/tasks/done/SEC-162-validate-token-required-claims.md new file mode 100644 index 0000000..072137a --- /dev/null +++ b/tasks/done/SEC-162-validate-token-required-claims.md @@ -0,0 +1,17 @@ +# SEC-162: `validate_token` does not pin required spec claims + +## Status +- **Severity**: LOW +- **Category**: Cryptographic / Defense in Depth +- **Project**: soli/db +- **File**: `src/server/auth.rs` +- **Lines**: 665-674 + +## Description +`Validation::new(Algorithm::HS256)` is used as-is. The current `jsonwebtoken` default sets `validate_exp = true`, but the validation spec is otherwise loose: tokens with `exp == usize::MAX` (used internally for API keys / cluster claims) are accepted forever via the same path that serves user JWTs. + +## Recommendation +Pin `validation.required_spec_claims = HashSet::from(["exp", "sub"])` and reject `exp == usize::MAX` for `Bearer` JWTs. Internal cluster/API claims should never flow through the user JWT validation path. + +## References +- Related: SEC-106, SEC-129. diff --git a/tasks/done/SEC-163-float-to-int-truncation.md b/tasks/done/SEC-163-float-to-int-truncation.md new file mode 100644 index 0000000..e7de81b --- /dev/null +++ b/tasks/done/SEC-163-float-to-int-truncation.md @@ -0,0 +1,17 @@ +# SEC-163: Float-to-int truncation in array index / range expressions + +## Status +- **Severity**: LOW +- **Category**: Logic / Defensive +- **Project**: soli/db +- **File**: `src/sdbql/executor/expression.rs` +- **Lines**: 149-163, 273-302 + +## Description +`f64 as usize` / `f64 as i64` is saturating in modern Rust (no UB), but yields silently wrong indices for `NaN` (→ 0) and out-of-range floats. This is unlikely to be directly exploitable but produces hard-to-debug query results. + +## Recommendation +When converting a float to an index, reject non-finite values explicitly and return a query error. + +## References +- Related: SEC-118. diff --git a/tasks/done/SEC-164-cli-update-no-signature-verification.md b/tasks/done/SEC-164-cli-update-no-signature-verification.md new file mode 100644 index 0000000..6d3c416 --- /dev/null +++ b/tasks/done/SEC-164-cli-update-no-signature-verification.md @@ -0,0 +1,17 @@ +# SEC-164: Auto-update downloads tarballs without signature verification + +## Status +- **Severity**: LOW +- **Category**: Supply Chain +- **Project**: soli/db +- **File**: `src/cli/update.rs` +- **Lines**: 60-83 + +## Description +Auto-update fetches a tar.gz from a GitHub release URL and unpacks into the executable directory. Integrity relies solely on TLS + GitHub. There is no signature or checksum verification. + +## Recommendation +Publish minisign-signed releases (or at minimum a SHA-256 checksum file) and verify before `replace_binary`. Bake the public key into the binary. + +## References +- Related: SEC-097. diff --git a/tasks/done/SEC-165-log-no-rotation.md b/tasks/done/SEC-165-log-no-rotation.md new file mode 100644 index 0000000..09beb0d --- /dev/null +++ b/tasks/done/SEC-165-log-no-rotation.md @@ -0,0 +1,17 @@ +# SEC-165: Log files have no rotation or size cap + +## Status +- **Severity**: LOW +- **Category**: Operational / DoS +- **Project**: soli/db +- **File**: `src/main.rs`, `src/bin/solidb-fuse.rs` +- **Lines**: main.rs:157; solidb-fuse.rs:810 + +## Description +`File::create(&args.log_file)` writes to a single growing file. A long-running daemon eventually fills the disk. + +## Recommendation +Use `tracing-appender::rolling::daily` (or hourly) with retention. The dependency is already pulled in elsewhere. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-166-blob-filename-crlf.md b/tasks/done/SEC-166-blob-filename-crlf.md new file mode 100644 index 0000000..7893a63 --- /dev/null +++ b/tasks/done/SEC-166-blob-filename-crlf.md @@ -0,0 +1,21 @@ +# SEC-166: Blob filename used in `Content-Disposition` without sanitization + +## Status +- **Severity**: LOW +- **Category**: Header Injection +- **Project**: soli/db +- **File**: `src/server/handlers/blobs.rs` +- **Lines**: 69-90 (upload), 318 (download) + +## Description +`field.file_name()` and `field.content_type()` from multipart upload are stored verbatim. On download, the filename is interpolated into `Content-Disposition` without escaping. A filename containing CR/LF can inject extra response headers; a filename with `"` can break out of the quoted-string. + +## Exploit Scenario +A user uploads a file named `evil"\r\nSet-Cookie: x=y; Path=/`. When the file is downloaded, the response includes the injected header. + +## Recommendation +- Apply the existing `sanitize_filename` helper at upload time, before persisting metadata. +- Or sanitize at download time by RFC 6266 encoding (`filename*=UTF-8''…`). + +## References +- Related: SEC-098. diff --git a/tasks/done/SEC-167-reconnect-no-jitter.md b/tasks/done/SEC-167-reconnect-no-jitter.md new file mode 100644 index 0000000..756ddda --- /dev/null +++ b/tasks/done/SEC-167-reconnect-no-jitter.md @@ -0,0 +1,18 @@ +# SEC-167: Sync reconnect backoff has no jitter and no global cap + +## Status +- **Severity**: LOW +- **Category**: Reliability +- **Project**: soli/db +- **File**: `src/sync/transport.rs` +- **Lines**: 292-320 (`reconnect_with_backoff`) + +## Description +`reconnect_with_backoff` doubles backoff up to 30 s, but the caller in `sync_with_peers` retries every `sync_interval` (1 s) regardless. There is no jitter and no per-peer circuit breaker. A flapping peer triggers a thundering-herd reconnect storm. + +## Recommendation +- Add 0–25% random jitter to each backoff step. +- Add a per-peer circuit breaker (e.g., open after 10 consecutive failures, half-open probe every 60 s). + +## References +- Related: SEC-110. diff --git a/tasks/done/SEC-168-lz4-decompress-silent-failure.md b/tasks/done/SEC-168-lz4-decompress-silent-failure.md new file mode 100644 index 0000000..1da29f2 --- /dev/null +++ b/tasks/done/SEC-168-lz4-decompress-silent-failure.md @@ -0,0 +1,17 @@ +# SEC-168: lz4 decompression failures silently produce empty buffers + +## Status +- **Severity**: LOW +- **Category**: Network Protocol Robustness +- **Project**: soli/db +- **File**: `src/sync/worker.rs` +- **Lines**: 1186 (`lz4_flex::decompress_size_prepended(&data).unwrap_or_default()`) + +## Description +Decompression failures fall back to an empty `Vec`, which then gets fed to `bincode::deserialize`. Malformed compressed frames become indistinguishable from empty messages, hiding attacks in logs. + +## Recommendation +Treat decompression error as fatal for the connection. Log the peer ID, error kind, and bytes. Penalize repeat offenders via a per-peer error budget. + +## References +- Related: SEC-154. diff --git a/tasks/done/SEC-169-systemtime-unwrap-panic.md b/tasks/done/SEC-169-systemtime-unwrap-panic.md new file mode 100644 index 0000000..fa42c6e --- /dev/null +++ b/tasks/done/SEC-169-systemtime-unwrap-panic.md @@ -0,0 +1,17 @@ +# SEC-169: `SystemTime::duration_since(UNIX_EPOCH).unwrap()` panics on clock skew + +## Status +- **Severity**: LOW +- **Category**: Reliability +- **Project**: soli/db +- **File**: `src/server/auth.rs`, `src/sync/transport.rs` +- **Lines**: auth.rs:618, 644; transport.rs:534 + +## Description +Several timestamps are taken via `SystemTime::now().duration_since(UNIX_EPOCH).unwrap()`. If the system clock ever runs before 1970 (NTP failure, container misconfiguration), the unwrap panics — terminating the worker thread or, on auth, the entire process. + +## Recommendation +Use `.unwrap_or_default()` (zero duration) or propagate as a typed error. For HMAC timestamps, falling back to zero is preferable to panic. + +## References +- Related: SEC-083. diff --git a/tasks/done/SEC-170-spawn-blocking-unwrap-in-ws.md b/tasks/done/SEC-170-spawn-blocking-unwrap-in-ws.md new file mode 100644 index 0000000..b4e909b --- /dev/null +++ b/tasks/done/SEC-170-spawn-blocking-unwrap-in-ws.md @@ -0,0 +1,17 @@ +# SEC-170: `spawn_blocking` join unwrap in WebSocket request path + +## Status +- **Severity**: LOW +- **Category**: Reliability +- **Project**: soli/db +- **File**: `src/server/handlers/websocket.rs` +- **Lines**: 899 + +## Description +A `tokio::task::spawn_blocking(...).await.unwrap()` runs in the WebSocket request path. A panic inside the blocking worker becomes a `JoinError`, which the unwrap converts into a panic of the request task — taking the WS connection (or worse, the whole task) down. + +## Recommendation +Propagate the `JoinError` via `?` and convert to a `DbError::InternalError(...)` returning 500 to the client. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-171-mutex-lock-unwrap-poisoning.md b/tasks/done/SEC-171-mutex-lock-unwrap-poisoning.md new file mode 100644 index 0000000..7cde494 --- /dev/null +++ b/tasks/done/SEC-171-mutex-lock-unwrap-poisoning.md @@ -0,0 +1,17 @@ +# SEC-171: `state.system_monitor.lock().unwrap()` panics propagate via mutex poisoning + +## Status +- **Severity**: LOW +- **Category**: Reliability +- **Project**: soli/db +- **File**: `src/server/handlers/websocket.rs`, `src/server/handlers/cluster.rs`, `src/server/handlers/sharding.rs`, `src/server/metrics.rs` (~10 sites) + +## Description +The system monitor (and a few other shared resources) use `std::sync::Mutex` and call `.lock().unwrap()` everywhere. A panic while holding the lock poisons it permanently; subsequent requests panic on every lock attempt. + +## Recommendation +- Use `parking_lot::Mutex` (no poisoning). +- Or handle `PoisonError` by recovering the inner value (`.unwrap_or_else(|e| e.into_inner())`). + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-172-handler-unwraps.md b/tasks/done/SEC-172-handler-unwraps.md new file mode 100644 index 0000000..8c739ba --- /dev/null +++ b/tasks/done/SEC-172-handler-unwraps.md @@ -0,0 +1,17 @@ +# SEC-172: `unwrap()` on options/results reachable from request handlers + +## Status +- **Severity**: LOW +- **Category**: Reliability +- **Project**: soli/db +- **File**: `src/server/handlers/sharding.rs`, `src/server/handlers/collections/read.rs`, `src/server/handlers/blobs.rs` +- **Lines**: sharding.rs:58; collections/read.rs:383; blobs.rs:160, 322 + +## Description +Spot-checked handlers contain `unwrap()` calls reachable from untrusted request flow. A specially crafted request can panic the handler thread. + +## Recommendation +Replace each `unwrap()` with `?` against a typed `DbError`, returning a 4xx/5xx as appropriate. Add `clippy::unwrap_used` lint at module boundaries to prevent regressions. + +## References +- Related: SEC-094. diff --git a/tasks/done/SEC-173-cron-script-not-revalidated.md b/tasks/done/SEC-173-cron-script-not-revalidated.md new file mode 100644 index 0000000..c33ee0a --- /dev/null +++ b/tasks/done/SEC-173-cron-script-not-revalidated.md @@ -0,0 +1,17 @@ +# SEC-173: Cron-spawned jobs inherit unvalidated `script_path` + +## Status +- **Severity**: LOW +- **Category**: Injection (depends on SEC-125) +- **Project**: soli/db +- **File**: `src/queue/cron.rs` +- **Lines**: 96-112 (job spawn from cron entry) + +## Description +When a cron entry fires, the spawned `Job` inherits the cron entry's `script_path` and `params` with no re-validation. If the cron was created with a malicious value (see SEC-125), the SDBQL injection persists until cron deletion. + +## Recommendation +Validate `script_path` at cron CREATE/UPDATE time using the same allowlist regex applied at enqueue (SEC-125). Add a startup linter that rejects existing cron entries with invalid paths. + +## References +- Depends on SEC-125. diff --git a/tasks/done/SEC-174-storage-unsafe-blocks.md b/tasks/done/SEC-174-storage-unsafe-blocks.md new file mode 100644 index 0000000..c6b4841 --- /dev/null +++ b/tasks/done/SEC-174-storage-unsafe-blocks.md @@ -0,0 +1,19 @@ +# SEC-174: Additional `unsafe` blocks in storage layer rely on external locking + +## Status +- **Severity**: LOW (INFO-level — depends on lock discipline) +- **Category**: Memory Safety +- **Project**: soli/db +- **File**: `src/storage/database.rs`, `src/storage/engine.rs`, `src/storage/columnar.rs` +- **Lines**: database.rs:67, 101; engine.rs:410, 473; columnar.rs:1036 + +## Description +Five `unsafe { (*db_ptr).create_cf / drop_cf }` blocks cast `Arc` to `*mut DB` to call mutating column-family APIs. Soundness depends entirely on the engine-level `cf_lock` `RwLock` being held. `engine.rs` does hold it; `database.rs:67` only checks `cf_handle` and does **not** acquire `cf_lock` — racing `create_collection` against `delete_collection` from a different `Database` handle that shares the same `Arc` is UB per the rust-rocksdb contract. + +## Recommendation +- Require `cf_lock` for both code paths (move the lock acquisition into a shared helper). +- Or upgrade to a `rust-rocksdb` version that exposes safe `&self` CF mutation. +- Audit all 5 sites and document the locking invariant inline. + +## References +- Related: SEC-107 (acknowledged earlier `unsafe` use). diff --git a/tasks/done/SEC-175-blob-replication-no-integrity.md b/tasks/done/SEC-175-blob-replication-no-integrity.md new file mode 100644 index 0000000..7b948e9 --- /dev/null +++ b/tasks/done/SEC-175-blob-replication-no-integrity.md @@ -0,0 +1,18 @@ +# SEC-175: Blob replication has no per-chunk integrity check + +## Status +- **Severity**: LOW (escalates to MEDIUM once SEC-122 is fixed) +- **Category**: Data Integrity +- **Project**: soli/db +- **File**: `src/sync/blob_replication.rs` +- **Lines**: 51-105 (receive_blob_replication), 226-265 (receive_blob_upload) + +## Description +Even after SEC-122 closes the unauthenticated-endpoint hole, the receiving side has no per-chunk hash, no signature on metadata, and trusts the `_key` field from the body. A compromised peer (or any actor with the cluster secret) can overwrite arbitrary blob keys with corrupted data. + +## Recommendation +- The coordinator includes a SHA-256 of each chunk in the metadata; receivers verify before `put_blob_chunk`. +- Optionally sign metadata with the cluster keyfile and require signature verification on the receiver. + +## References +- Related: SEC-102, SEC-122, SEC-135. diff --git a/tasks/todo/COV-000-overview.md b/tasks/todo/COV-000-overview.md new file mode 100644 index 0000000..afc2d96 --- /dev/null +++ b/tasks/todo/COV-000-overview.md @@ -0,0 +1,56 @@ +# COV-000: Coverage baseline & roadmap + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **Scope**: workspace + +## Baseline (2026-05-08) +Generated via `cargo llvm-cov --release --no-fail-fast --summary-only --workspace`. + +- **Lines**: 42.89% (39,153 of 68,557 missed) +- **Functions**: 40.47% +- **Regions**: 43.75% + +### Top-level area breakdown (lines) +| % | Area | Lines | Missed | +|---:|---|---:|---:| +| 0% | `bin/` (solidb-dump/restore/repl) | 1,328 | 1,328 | +| 0% | `cli/tui/` | 2,231 | 2,231 | +| 0% | `driver/handlers/` | 2,280 | 2,280 | +| 11% | `cli/scripts/` | 2,018 | 1,797 | +| 22% | `server/handlers/` | 6,361 | 4,946 | +| 40% | `scripting/` | 3,530 | 2,095 | +| 42% | `sharding/` | 5,490 | 3,195 | +| 50% | `sync/` | 4,320 | 2,175 | +| 51% | `sdbql/executor/` | 8,885 | 4,378 | +| 60% | `storage/` | 4,416 | 1,740 | +| 76% | `sdbql/parser/` | 1,532 | 367 | +| 90% | `sdbql/` (top-level) | 903 | 94 | + +## Active coverage tasks (todo/) +- COV-001 — `server/role_handlers.rs` (641 lines @ 0%) +- COV-002 — `server/handlers/blobs.rs` (291 @ 0%) +- COV-003 — `server/handlers/sync.rs` (452 @ 0%) +- COV-004 — `server/handlers/websocket.rs` (632 @ 0%) +- COV-005 — `sdbql/executor/search.rs` (476 @ 0%) +- COV-006 — `server/columnar_handlers.rs` (284 @ 0%) +- COV-007 — `server/nl_handlers.rs` (298 @ 0%) +- COV-008 — `transaction/distributed.rs` (283 @ 0%) +- COV-009 — `sync/worker.rs` + `sync/transport.rs` (905 + 389 @ 0%) +- COV-010 — `server/llm_client.rs` (353 @ 0%) + +## Out-of-scope here (separate tracks) +- `bin/`, `cli/tui/`, `cli/scripts/`: CLI entry points. Cover via end-to-end shell-level tests rather than unit tests. +- `driver/handlers/`: binary protocol handlers. Cover via `clients/` SDK round-trip tests. + +## How to re-run the report +```bash +cargo llvm-cov --release --no-fail-fast --summary-only --workspace +# HTML report: +cargo llvm-cov --release --no-fail-fast --workspace --html +# open target/llvm-cov/html/index.html +``` + +## Target +After all COV-001..010 are merged, expect total line coverage ≥55% (rough estimate: ~3,500 newly-covered lines across these files). diff --git a/tasks/todo/COV-001-role-handlers.md b/tasks/todo/COV-001-role-handlers.md new file mode 100644 index 0000000..7a8ad22 --- /dev/null +++ b/tasks/todo/COV-001-role-handlers.md @@ -0,0 +1,30 @@ +# COV-001: Cover `server/role_handlers.rs` (0% → ≥70%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/role_handlers.rs` +- **Current coverage**: 0% (641 lines uncovered) + +## Description +RBAC role-administration endpoints (create/list/update/delete roles, assign/revoke role to user, list user roles, etc.) are not exercised by any test. `tests/rbac_admin_endpoints_tests.rs` only checks that admin-only endpoints reject viewers — it never hits the role handlers themselves. + +## Recommendation +Add `tests/role_handlers_tests.rs` using the existing axum-oneshot pattern (see `tests/handlers_tests.rs`). `create_test_app` must call `engine.initialize()` so the `_system._roles` collection exists. + +Endpoints to exercise (drive routes via `/_api/role*` URLs — see `src/server/routes.rs` for exact paths): +- POST create role (happy path + duplicate name) +- GET list roles (empty + populated) +- GET get role by name (found + not-found) +- PUT update role permissions (valid + invalid permission spec) +- DELETE role (existing + not-found, plus rejection if assigned to a user) +- POST assign role to user / revoke role from user +- GET list a user's roles +- AuthZ: each endpoint with viewer JWT → 403, missing JWT → 401 + +## Goal +Raise `src/server/role_handlers.rs` to ≥70% line coverage. + +## References +- Pattern: `tests/handlers_tests.rs`, `tests/rbac_admin_endpoints_tests.rs` +- Coverage tool: `cargo llvm-cov --release --workspace --summary-only` diff --git a/tasks/todo/COV-002-blob-handlers.md b/tasks/todo/COV-002-blob-handlers.md new file mode 100644 index 0000000..c3fd3a1 --- /dev/null +++ b/tasks/todo/COV-002-blob-handlers.md @@ -0,0 +1,29 @@ +# COV-002: Cover `server/handlers/blobs.rs` (0% → ≥60%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/handlers/blobs.rs` +- **Current coverage**: 0% (291 lines uncovered) + +## Description +Public blob CRUD endpoints (upload, download, list, delete, metadata) have no test coverage. `tests/blob_distribution_tests.rs` only exercises the `/_internal/blob/*` cluster-replication routes, not the user-facing handlers. + +## Recommendation +Add `tests/blob_handlers_tests.rs` using the axum-oneshot pattern. `create_test_app` must call `engine.initialize()` (see existing pattern after fix in `tests/blob_distribution_tests.rs`). + +Endpoints to exercise: +- POST upload blob (single-shot + multipart) — happy path + missing content-type + payload too large +- GET download blob — found + not-found + 404 on non-blob collection +- GET blob metadata +- DELETE blob — existing + not-found + idempotency +- GET list blobs in collection +- AuthZ: each endpoint without JWT → 401, with insufficient role → 403 +- Filename sanitization: SEC-166 already added CRLF stripping for `Content-Disposition` — assert it via a key containing `\r\n`. + +## Goal +Raise `src/server/handlers/blobs.rs` to ≥60% line coverage. + +## References +- Pattern: `tests/blob_distribution_tests.rs`, `tests/handlers_tests.rs` +- Related: SEC-166 (CRLF in blob filenames) diff --git a/tasks/todo/COV-003-sync-handlers.md b/tasks/todo/COV-003-sync-handlers.md new file mode 100644 index 0000000..05c2019 --- /dev/null +++ b/tasks/todo/COV-003-sync-handlers.md @@ -0,0 +1,31 @@ +# COV-003: Cover `server/handlers/sync.rs` (0% → ≥60%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/handlers/sync.rs` +- **Current coverage**: 0% (452 lines uncovered) + +## Description +Offline-first sync endpoints (`/_api/sync/session`, `/_api/sync/pull`, `/_api/sync/push`, `/_api/sync/ack`, `/_api/sync/conflicts`, `/_api/sync/resolve`) are unexercised. SEC-154 fixed framing inconsistencies in the sync protocol but the route handlers themselves have no integration test. + +## Recommendation +Add `tests/sync_handlers_tests.rs`. `create_test_app` must: +- Call `engine.initialize()`. +- Configure a cluster keyfile via `StorageEngine::with_cluster_config` so `SyncSession::verify_session_id` (which uses the cluster secret) doesn't no-op silently. + +Endpoints to exercise: +- POST `/_api/sync/session` — register a sync session, capture returned session id. +- POST `/_api/sync/pull` — empty change set (cold start) + after some inserts. +- POST `/_api/sync/push` — valid batch + invalid framing → 400 (regression for SEC-154). +- POST `/_api/sync/ack` — happy path + bad session id → 401/403. +- GET `/_api/sync/conflicts` — empty + with simulated conflicts. +- POST `/_api/sync/resolve` — accept-local / accept-remote / merge. +- AuthZ: each endpoint without JWT → 401. + +## Goal +Raise `src/server/handlers/sync.rs` to ≥60% line coverage. + +## References +- Pattern: `tests/handlers_tests.rs`, `tests/sync_protocol_tests.rs` +- Related: SEC-154 diff --git a/tasks/todo/COV-004-websocket-handlers.md b/tasks/todo/COV-004-websocket-handlers.md new file mode 100644 index 0000000..cf8b814 --- /dev/null +++ b/tasks/todo/COV-004-websocket-handlers.md @@ -0,0 +1,27 @@ +# COV-004: Cover `server/handlers/websocket.rs` (0% → ≥40%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/handlers/websocket.rs` +- **Current coverage**: 0% (632 lines uncovered) + +## Description +WebSocket changefeed and live-query endpoints have no automated tests. SEC-152 added re-validation and idle-timeout to WS connections; both pieces and the query-token gating live in this file. + +## Recommendation +Add `tests/websocket_handlers_tests.rs` driven by `tokio-tungstenite` against an axum server bound to an ephemeral port (see how integration suites that need real sockets work today — search for `tokio::net::TcpListener` in `tests/`). Where the harness is too heavy, extract pure helpers from `websocket.rs` (token-gate predicate, idle-timeout calculator, message validator) and unit-test those. + +Cases to exercise: +- WS connect with valid livequery JWT → upgrade succeeds; with missing/expired token → 401/403 (SEC-152). +- Idle-timeout disconnects after N seconds of silence (use a short timeout in tests). +- Re-validation: after the JWT expires mid-connection, the next message is rejected and the socket closes. +- Subscribe to a changefeed; insert via HTTP; assert WS receives the event. +- Malformed inbound frame → graceful error. + +## Goal +Raise `src/server/handlers/websocket.rs` to ≥40% line coverage. (Pure-helper unit tests can push this further without spinning up a full WS harness.) + +## References +- Related: SEC-152, SEC-153 +- Pattern: existing async + axum tests in `tests/` diff --git a/tasks/todo/COV-005-sdbql-search.md b/tasks/todo/COV-005-sdbql-search.md new file mode 100644 index 0000000..3bf87f7 --- /dev/null +++ b/tasks/todo/COV-005-sdbql-search.md @@ -0,0 +1,28 @@ +# COV-005: Cover `sdbql/executor/search.rs` (0% → ≥60%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/sdbql/executor/search.rs` +- **Current coverage**: 0% (476 lines uncovered) + +## Description +The full-text search executor module is not exercised by any test. SDBQL search functions (`FULLTEXT(...)`, `MATCH(...)`, fuzzy/phrase queries) flow through this code, but there is no targeted test file for it. + +## Recommendation +Add `tests/sdbql_search_tests.rs`. Build a small in-memory dataset via `StorageEngine::new` (no router needed), create a fulltext index, and exercise queries via the public SDBQL entry point used by other `sdbql_*_tests.rs` files. See `tests/sdbql_fuzzy_tests.rs` and `tests/sdbql_function_tests.rs` for the established setup. + +Cases to exercise: +- Single-term match, multi-term AND/OR +- Phrase queries (`"exact phrase"`) +- Fuzzy / edit-distance matching +- Stop-word handling +- Score ordering (higher relevance first) +- Empty / non-existent index → graceful error +- Combination with `FILTER` / `SORT` / `LIMIT` clauses + +## Goal +Raise `src/sdbql/executor/search.rs` to ≥60% line coverage. + +## References +- Pattern: `tests/sdbql_fuzzy_tests.rs`, `tests/sdbql_function_tests.rs` diff --git a/tasks/todo/COV-006-columnar-handlers.md b/tasks/todo/COV-006-columnar-handlers.md new file mode 100644 index 0000000..3eee861 --- /dev/null +++ b/tasks/todo/COV-006-columnar-handlers.md @@ -0,0 +1,27 @@ +# COV-006: Cover `server/columnar_handlers.rs` (0% → ≥60%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/columnar_handlers.rs` +- **Current coverage**: 0% (284 lines uncovered) + +## Description +Columnar (Parquet/Arrow) export/import endpoints have no test coverage. Pure-storage tests exist (`tests/columnar_index_tests.rs`, `tests/columnar_tests.rs`) but the HTTP handler layer is unexercised. + +## Recommendation +Add `tests/columnar_handlers_tests.rs` using the axum-oneshot pattern (`engine.initialize()` required). + +Endpoints to exercise (consult `src/server/routes.rs` for the exact paths): +- POST export collection to columnar — happy path round-trip (export then re-import). +- GET column statistics endpoint. +- POST query against columnar storage. +- Error cases: non-existent collection → 404, invalid format → 400. +- AuthZ: missing JWT → 401, viewer JWT on write endpoint → 403. + +## Goal +Raise `src/server/columnar_handlers.rs` to ≥60% line coverage. + +## References +- Pattern: `tests/handlers_tests.rs` +- Storage layer tests: `tests/columnar_tests.rs` diff --git a/tasks/todo/COV-007-nl-handlers.md b/tasks/todo/COV-007-nl-handlers.md new file mode 100644 index 0000000..ed6917f --- /dev/null +++ b/tasks/todo/COV-007-nl-handlers.md @@ -0,0 +1,29 @@ +# COV-007: Cover `server/nl_handlers.rs` (0% → ≥50%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/nl_handlers.rs` +- **Current coverage**: 0% (298 lines uncovered) + +## Description +Natural-language query handlers (NL → SDBQL translation, schema introspection) have no test coverage. The LLM client this depends on (`src/server/llm_client.rs`) is also at 0% — see COV-010. + +## Recommendation +Add `tests/nl_handlers_tests.rs`. Because NL handlers call out to an LLM, prefer one of: +1. Inject a fake LLM client (introduce a trait + test impl) and assert the handler glue (request validation, schema collection, prompt assembly, response parsing). +2. Use a stubbed HTTP server (e.g. `wiremock`) bound to an ephemeral port and point `llm_client` at it via configuration. + +Cases to exercise: +- Valid NL query → handler builds the expected prompt (assert via fake) → returns SDBQL. +- Schema collection for a non-existent DB → 404. +- LLM returns malformed JSON → handler responds 502/400 instead of panicking. +- AuthZ: missing JWT → 401. +- Rate-limit / size-cap on the NL prompt (if implemented). + +## Goal +Raise `src/server/nl_handlers.rs` to ≥50% line coverage. + +## References +- Companion: COV-010 (`server/llm_client.rs`) +- Pattern: `tests/handlers_tests.rs` diff --git a/tasks/todo/COV-008-distributed-transaction.md b/tasks/todo/COV-008-distributed-transaction.md new file mode 100644 index 0000000..a6d9684 --- /dev/null +++ b/tasks/todo/COV-008-distributed-transaction.md @@ -0,0 +1,29 @@ +# COV-008: Cover `transaction/distributed.rs` (0% → ≥50%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/transaction/distributed.rs` +- **Current coverage**: 0% (283 lines uncovered) + +## Description +Distributed (two-phase-commit) transaction logic is uncovered. `tests/transaction_handlers_tests.rs` only exercises the local single-node transaction manager. + +## Recommendation +Two complementary layers: + +1. **Unit tests**: extract pure-logic helpers (state transitions, vote tallying, prepare/commit/abort decisioning, timeout calculation) into testable functions and assert them directly without networking. + +2. **Integration tests**: spin up two `StorageEngine`s in the same process, wire them via the in-process trait used by sharding tests, and drive a 2PC across them. Cases: + - Happy path: prepare on both → commit on both. + - One participant votes abort → coordinator aborts on both. + - One participant times out during prepare → coordinator aborts. + - Coordinator crash after prepare (recovery: participants honor commit on replay). + - Idempotency: replayed commit/abort is a no-op. + +## Goal +Raise `src/transaction/distributed.rs` to ≥50% line coverage. + +## References +- Pattern: `tests/transaction_handlers_tests.rs` +- Local manager: `src/transaction/manager.rs` (currently 80%) diff --git a/tasks/todo/COV-009-sync-worker-and-transport.md b/tasks/todo/COV-009-sync-worker-and-transport.md new file mode 100644 index 0000000..b7fdded --- /dev/null +++ b/tasks/todo/COV-009-sync-worker-and-transport.md @@ -0,0 +1,40 @@ +# COV-009: Cover `sync/worker.rs` + `sync/transport.rs` (0% → ≥40%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **Files**: + - `src/sync/worker.rs` (905 lines uncovered) + - `src/sync/transport.rs` (389 lines uncovered) +- **Current coverage**: 0% / 0% + +## Description +The replication worker (master-master eventual-consistency loop) and its HTTP transport are entirely uncovered. Failures here cause silent data loss between nodes. + +## Recommendation +Layered approach: + +1. **Pure-logic unit tests on `worker.rs`** — extract decision functions and test them directly: + - Conflict-resolution policy (last-writer-wins via HLC, etc.). + - Backoff / retry timing. + - Per-peer queue draining order. + - "Caught-up" predicate. + +2. **In-process two-engine integration test** — `tests/replication_worker_tests.rs`: + - Two `StorageEngine` instances, each with cluster keyfile configured. + - Inject a stubbed transport (trait swap) so no real sockets are needed. + - Insert on node A → run the worker once → assert the doc lands on node B with correct HLC. + - Insert concurrently on both → assert convergence after both workers run. + - Peer offline → entries queue → peer back → drain. + +3. **`transport.rs` HTTP path** — exercise the HTTP send/receive helpers against a `wiremock` server (or a minimal axum app) verifying: + - `X-Cluster-Secret` header is set. + - Retry on 5xx, give up on 4xx. + - Decompression failure path is handled (regression for SEC-168). + +## Goal +Raise both files to ≥40% line coverage. + +## References +- Related: SEC-122, SEC-154, SEC-168 +- Pattern: `tests/sync_protocol_tests.rs` diff --git a/tasks/todo/COV-010-llm-client.md b/tasks/todo/COV-010-llm-client.md new file mode 100644 index 0000000..39addf6 --- /dev/null +++ b/tasks/todo/COV-010-llm-client.md @@ -0,0 +1,29 @@ +# COV-010: Cover `server/llm_client.rs` (0% → ≥60%) + +## Status +- **Category**: Test Coverage +- **Project**: soli/db +- **File**: `src/server/llm_client.rs` +- **Current coverage**: 0% (353 lines uncovered) + +## Description +The LLM client used by NL query handlers (COV-007) and any AI features has no test coverage. It does HTTP I/O, retries, and parses provider responses — all easily wrong, all silently. + +## Recommendation +Add `tests/llm_client_tests.rs` driven by `wiremock` (or a minimal axum stub) bound to an ephemeral port. Construct the client pointing at that URL and assert: + +- Successful completion call: stub returns canned JSON, client parses and returns the expected struct. +- Network error → returns error variant (no panic). +- 5xx response → retries up to the configured limit, then fails. +- 4xx response → fails immediately, no retry. +- Malformed JSON in 200 response → parse error surfaces as a typed error. +- API key / auth header is set on outgoing requests (assert via `wiremock` matcher). +- Request timeout fires (use a slow stub). + +If the client doesn't currently take an injectable base URL or `reqwest::Client`, add that knob — it's both easier to test and useful in production for proxying. + +## Goal +Raise `src/server/llm_client.rs` to ≥60% line coverage. + +## References +- Companion: COV-007 (`server/nl_handlers.rs`) diff --git a/tests/auth_tests.rs b/tests/auth_tests.rs index fb2c7d3..fc40ba8 100644 --- a/tests/auth_tests.rs +++ b/tests/auth_tests.rs @@ -162,7 +162,7 @@ fn test_validate_api_key_without_keys() { let (engine, _tmp) = create_test_engine(); // Initialize auth (creates _system database) - let _ = AuthService::init(&engine, None); + let _ = AuthService::init(&engine, None, engine.data_dir()); // Try to validate a non-existent key let result = AuthService::validate_api_key(&engine, "sdb_nonexistent_key"); @@ -181,7 +181,7 @@ fn test_auth_init() { engine.create_database("_system".to_string()).unwrap(); // Initialize auth - let result = AuthService::init(&engine, None); + let result = AuthService::init(&engine, None, engine.data_dir()); assert!( result.is_ok(), "Auth init should succeed: {:?}", @@ -197,9 +197,9 @@ fn test_auth_init_idempotent() { engine.create_database("_system".to_string()).unwrap(); // Initialize multiple times - let result1 = AuthService::init(&engine, None); - let result2 = AuthService::init(&engine, None); - let result3 = AuthService::init(&engine, None); + let result1 = AuthService::init(&engine, None, engine.data_dir()); + let result2 = AuthService::init(&engine, None, engine.data_dir()); + let result3 = AuthService::init(&engine, None, engine.data_dir()); assert!(result1.is_ok()); assert!(result2.is_ok()); diff --git a/tests/blob_distribution_tests.rs b/tests/blob_distribution_tests.rs index d90d3d9..03cf7b7 100644 --- a/tests/blob_distribution_tests.rs +++ b/tests/blob_distribution_tests.rs @@ -13,6 +13,7 @@ use axum::{ http::{Request, StatusCode}, }; use serde_json::{json, Value}; +use solidb::cluster::ClusterConfig; use solidb::scripting::ScriptStats; use solidb::server::auth::AuthService; use solidb::server::routes::create_router; @@ -21,10 +22,22 @@ use std::sync::Arc; use tempfile::TempDir; use tower::ServiceExt; +const TEST_CLUSTER_SECRET: &str = "test-cluster-secret"; + fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); - let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) - .expect("Failed to create storage engine"); + let cluster_config = ClusterConfig { + node_id: "test-node".to_string(), + peers: vec![], + replication_port: 6746, + keyfile: Some(TEST_CLUSTER_SECRET.to_string()), + }; + let engine = + StorageEngine::with_cluster_config(tmp_dir.path().to_str().unwrap(), cluster_config) + .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); @@ -121,6 +134,7 @@ async fn test_upload_and_retrieve_blob() { "Content-Type", format!("multipart/form-data; boundary={}", boundary), ) + .header("X-Cluster-Secret", TEST_CLUSTER_SECRET) .body(Body::from(body_bytes)) .unwrap(), ) @@ -139,6 +153,7 @@ async fn test_upload_and_retrieve_blob() { Request::builder() .method("GET") .uri("/_internal/blob/replicate/blob_db/images/test_image.png/chunk/0") + .header("X-Cluster-Secret", TEST_CLUSTER_SECRET) .body(Body::empty()) .unwrap(), ) @@ -209,6 +224,7 @@ async fn test_blob_replication_endpoint() { "Content-Type", format!("multipart/form-data; boundary={}", boundary), ) + .header("X-Cluster-Secret", TEST_CLUSTER_SECRET) .body(Body::from(body_bytes)) .unwrap(), ) @@ -319,6 +335,7 @@ async fn test_blob_distribution_with_data() { "Content-Type", format!("multipart/form-data; boundary={}", boundary), ) + .header("X-Cluster-Secret", TEST_CLUSTER_SECRET) .body(Body::from(body_bytes)) .unwrap(), ) diff --git a/tests/collection_properties_test.rs b/tests/collection_properties_test.rs index 8bf5f19..9372807 100644 --- a/tests/collection_properties_test.rs +++ b/tests/collection_properties_test.rs @@ -19,6 +19,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); diff --git a/tests/handlers_tests.rs b/tests/handlers_tests.rs index 4088941..0f4d8cc 100644 --- a/tests/handlers_tests.rs +++ b/tests/handlers_tests.rs @@ -26,6 +26,9 @@ fn create_test_app() -> (TempDir, axum::Router, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); diff --git a/tests/http_api_test.rs b/tests/http_api_test.rs index 785e821..4a66d8d 100644 --- a/tests/http_api_test.rs +++ b/tests/http_api_test.rs @@ -24,6 +24,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); // Create minimal dependencies let script_stats = Arc::new(ScriptStats::default()); diff --git a/tests/queue_handlers_tests.rs b/tests/queue_handlers_tests.rs index 77b010e..5344fba 100644 --- a/tests/queue_handlers_tests.rs +++ b/tests/queue_handlers_tests.rs @@ -17,6 +17,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); let router = create_router(engine, None, None, None, None, script_stats, None, None, 0); diff --git a/tests/rbac_admin_endpoints_tests.rs b/tests/rbac_admin_endpoints_tests.rs new file mode 100644 index 0000000..3481ad1 --- /dev/null +++ b/tests/rbac_admin_endpoints_tests.rs @@ -0,0 +1,125 @@ +//! RBAC enforcement tests for admin-only endpoints. +//! +//! Covers SEC-126 follow-up fixes: a non-admin (viewer) JWT must be +//! rejected by `DELETE /_api/database/{name}` and `GET /_api/auth/api_keys`. +//! Cluster admin endpoints are exercised in `cluster_tests.rs`. + +use axum::{ + body::Body, + http::{Request, StatusCode}, +}; +use serde_json::json; +use solidb::scripting::ScriptStats; +use solidb::server::auth::AuthService; +use solidb::server::routes::create_router; +use solidb::storage::StorageEngine; +use std::sync::Arc; +use tempfile::TempDir; +use tower::ServiceExt; + +fn create_app() -> (TempDir, axum::Router, String, String) { + let tmp_dir = TempDir::new().expect("temp dir"); + let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()).expect("engine"); + engine.initialize().expect("initialize _system"); + let script_stats = Arc::new(ScriptStats::default()); + let router = create_router(engine, None, None, None, None, script_stats, None, None, 0); + + let admin_token = + AuthService::create_jwt_with_roles("admin_user", Some(vec!["admin".to_string()]), None) + .expect("admin jwt"); + let viewer_token = + AuthService::create_jwt_with_roles("viewer_user", Some(vec!["viewer".to_string()]), None) + .expect("viewer jwt"); + + (tmp_dir, router, admin_token, viewer_token) +} + +fn bearer(token: &str) -> String { + format!("Bearer {}", token) +} + +#[tokio::test] +async fn delete_database_rejects_viewer() { + let (_tmp, app, admin_token, viewer_token) = create_app(); + + // Admin creates the database. + let resp = app + .clone() + .oneshot( + Request::builder() + .method("POST") + .uri("/_api/database") + .header("Content-Type", "application/json") + .header("Authorization", bearer(&admin_token)) + .body(Body::from(json!({"name": "victim_db"}).to_string())) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + + // Viewer attempts DELETE — must be forbidden. + let resp = app + .oneshot( + Request::builder() + .method("DELETE") + .uri("/_api/database/victim_db") + .header("Authorization", bearer(&viewer_token)) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +#[tokio::test] +async fn list_api_keys_rejects_viewer() { + let (_tmp, app, _admin_token, viewer_token) = create_app(); + + let resp = app + .oneshot( + Request::builder() + .method("GET") + .uri("/_api/auth/api-keys") + .header("Authorization", bearer(&viewer_token)) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +#[tokio::test] +async fn delete_database_allows_admin() { + let (_tmp, app, admin_token, _viewer_token) = create_app(); + + // Create the DB. + let _ = app + .clone() + .oneshot( + Request::builder() + .method("POST") + .uri("/_api/database") + .header("Content-Type", "application/json") + .header("Authorization", bearer(&admin_token)) + .body(Body::from(json!({"name": "ok_to_delete"}).to_string())) + .unwrap(), + ) + .await + .unwrap(); + + let resp = app + .oneshot( + Request::builder() + .method("DELETE") + .uri("/_api/database/ok_to_delete") + .header("Authorization", bearer(&admin_token)) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NO_CONTENT); +} diff --git a/tests/script_handlers_tests.rs b/tests/script_handlers_tests.rs index c8c6fef..b96b941 100644 --- a/tests/script_handlers_tests.rs +++ b/tests/script_handlers_tests.rs @@ -22,6 +22,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); diff --git a/tests/sdbql_operator_tests.rs b/tests/sdbql_operator_tests.rs index dca34dd..36b6040 100644 --- a/tests/sdbql_operator_tests.rs +++ b/tests/sdbql_operator_tests.rs @@ -388,6 +388,21 @@ fn test_range_in_for() { assert_eq!(results[2], json!(6.0)); } +// SEC-131: oversize ranges must error rather than allocate / panic. +#[test] +fn test_range_rejects_oversize() { + use solidb::{parse, QueryExecutor}; + let (engine, _tmp) = create_test_engine(); + let exec = QueryExecutor::with_database(&engine, "test_db".to_string()); + let parsed = parse("RETURN 0..100000000").expect("parse"); + let err = exec.execute(&parsed).expect_err("should reject"); + assert!( + format!("{}", err).contains("exceeds maximum"), + "unexpected error: {}", + err + ); +} + // ============================================================================ // Null Handling // ============================================================================ diff --git a/tests/sharding_api_tests.rs b/tests/sharding_api_tests.rs index 6a3e3da..8de5e0b 100644 --- a/tests/sharding_api_tests.rs +++ b/tests/sharding_api_tests.rs @@ -19,6 +19,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); diff --git a/tests/sync_protocol_tests.rs b/tests/sync_protocol_tests.rs index 2ad8a8e..012ce81 100644 --- a/tests/sync_protocol_tests.rs +++ b/tests/sync_protocol_tests.rs @@ -207,14 +207,22 @@ fn test_node_stats_with_values() { fn test_sync_message_auth_challenge() { let msg = SyncMessage::AuthChallenge { challenge: vec![1, 2, 3, 4, 5, 6, 7, 8], + timestamp: 1_700_000_000_000, + nonce: vec![9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, 6], }; let encoded = msg.encode(); let decoded = SyncMessage::decode(&encoded[4..]).unwrap(); match decoded { - SyncMessage::AuthChallenge { challenge } => { + SyncMessage::AuthChallenge { + challenge, + timestamp, + nonce, + } => { assert_eq!(challenge.len(), 8); + assert_eq!(timestamp, 1_700_000_000_000); + assert_eq!(nonce.len(), 16); } _ => panic!("Wrong message type"), } diff --git a/tests/timeseries_tests.rs b/tests/timeseries_tests.rs index f6a0f60..bf72917 100644 --- a/tests/timeseries_tests.rs +++ b/tests/timeseries_tests.rs @@ -20,6 +20,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); diff --git a/tests/transaction_handlers_tests.rs b/tests/transaction_handlers_tests.rs index ab3866a..d4187e8 100644 --- a/tests/transaction_handlers_tests.rs +++ b/tests/transaction_handlers_tests.rs @@ -21,6 +21,9 @@ fn create_test_app() -> (axum::Router, TempDir, String) { let tmp_dir = TempDir::new().expect("Failed to create temp dir"); let engine = StorageEngine::new(tmp_dir.path().to_str().unwrap()) .expect("Failed to create storage engine"); + engine + .initialize() + .expect("Failed to initialize storage engine"); let script_stats = Arc::new(ScriptStats::default()); diff --git a/www/app/views/docs/security.etlua b/www/app/views/docs/security.etlua index 5daba68..42c953b 100644 --- a/www/app/views/docs/security.etlua +++ b/www/app/views/docs/security.etlua @@ -152,6 +152,7 @@
+
@@ -182,7 +183,16 @@ export JWT_SECRET="your-secure-random-secret-here" # OPTIONAL: Set admin password (otherwise randomly generated) # Useful for automated deployments where you can't read logs -export SOLIDB_ADMIN_PASSWORD="your-secure-password" +export SOLIDB_ADMIN_PASSWORD="your-secure-password" + +# RECOMMENDED: Restrict browser origins allowed by CORS and WebSocket upgrades. +# Comma-separated list of scheme://host[:port]. Default is deny-all when unset. +# Use "*" only in development; the wildcard disables credentialed CORS. +export SOLIDB_CORS_ALLOWED_ORIGINS="https://app.example.com,https://admin.example.com" + +# OPTIONAL: Allowlist for solidb.redirect() destinations from Lua scripts. +# Comma-separated list of scheme://host[:port]. When unset, all redirects are allowed. +export SOLIDB_ALLOWED_REDIRECT_ORIGINS="https://app.example.com" @@ -221,12 +231,15 @@ chmod 600 solidb-keyfile # 2. Copy the keyfile to all nodes # 3. Start with keyfile authentication -solidb --keyfile solidb-keyfile --peer node2:6746 --peer node3:6746 +solidb --keyfile solidb-keyfile --peer node2:6746 --peer node3:6746 + +# 4. Enforce keyfile in production: refuse to start if missing. +export SOLIDB_REQUIRE_KEYFILE=true

- The cluster communication protocol uses HMAC-SHA256 to sign all handshake messages, ensuring that only nodes possessing the shared keyfile can join the cluster. + The cluster communication protocol uses HMAC-SHA256 to sign all handshake messages, ensuring that only nodes possessing the shared keyfile can join the cluster. Set SOLIDB_REQUIRE_KEYFILE=true so that nodes started without a keyfile fail closed instead of silently accepting unauthenticated peers.

@@ -267,6 +280,27 @@ solidb --keyfile solidb-keyfile --peer node2:6746 --peer node3:6746 API key validation uses constant-time string comparison algorithms to prevent side-channel timing attacks that could reveal key contents.

+ +
+

RBAC on Privileged Endpoints

+

+ DELETE /_api/database/{name}, GET /_api/auth/api-keys, and the cluster remove-node / rebalance endpoints require the caller to hold the admin role. Viewer and editor tokens receive HTTP 403. +

+
+ +
+

Anonymous Script Audit Log

+

+ Anonymous calls into permissive script routes (/api/{db}/{service}/...) emit a WARN-level audit event with method, path, and peer fields under the audit tracing target. +

+
+ +
+

Range & Pagination Bounds

+

+ SDBQL a..b ranges are capped at 10M elements and overflow-safe. LIMIT offset + count is computed with checked arithmetic so 64-bit overflow yields an empty result instead of a panic. +

+