From 61f08c95712f9ac21d2e081bdecdcdd263842d43 Mon Sep 17 00:00:00 2001 From: Shantanu Singh Date: Tue, 21 Apr 2026 19:12:31 -0400 Subject: [PATCH 1/3] fix(reader): honor absolute paths in duckdb:// URIs, fail fast on missing files Closes #345. - `duckdb:///abs/path` now opens `/abs/path` (SQLAlchemy convention) instead of stripping the leading slash and silently creating a relative phantom DB. - Reject `duckdb:////...` (4+ slashes) with a clear ambiguity message. - `DuckDBReader::from_connection_string` errors up-front when the target file does not exist, surfacing a helpful message instead of a confusing downstream "Table not found" error. - Apply the same `//` vs `///` parsing rules to `sqlite://` URIs. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reader/connection.rs | 88 +++++++++++++++++++++++++++++++--------- src/reader/duckdb.rs | 70 ++++++++++++++++++++++++++++++-- 2 files changed, 135 insertions(+), 23 deletions(-) diff --git a/src/reader/connection.rs b/src/reader/connection.rs index b97bd553..87203666 100644 --- a/src/reader/connection.rs +++ b/src/reader/connection.rs @@ -22,15 +22,43 @@ pub enum ConnectionInfo { ODBC(String), } +/// Parse a DuckDB/SQLite URI body into a filesystem path, following +/// SQLAlchemy conventions. +/// +/// After the `scheme://` prefix has been stripped, `body` is interpreted as: +/// - `memory` -> handled by the caller (only valid for `duckdb://memory`) +/// - `` -> relative path, returned verbatim +/// - `/` -> absolute path, returned with the leading `/` +/// - `//...` or empty -> error (ambiguous / missing path) +fn parse_uri_path(scheme: &str, body: &str) -> Result { + if body.is_empty() { + return Err(GgsqlError::ReaderError(format!( + "{} file path cannot be empty", + scheme + ))); + } + + if body.starts_with("//") { + return Err(GgsqlError::ReaderError(format!( + "Ambiguous {scheme} URI '{scheme}://{body}': use '{scheme}://relative/path' \ + for a relative path or '{scheme}:///absolute/path' for an absolute path", + scheme = scheme, + body = body, + ))); + } + + Ok(body.to_string()) +} + /// Parse a connection string into connection information /// /// # Supported Formats /// /// - `duckdb://memory` - DuckDB in-memory database -/// - `duckdb:///absolute/path/file.db` - DuckDB file (absolute path) +/// - `duckdb:///absolute/path/file.db` - DuckDB file (absolute path, SQLAlchemy convention) /// - `duckdb://relative/file.db` - DuckDB file (relative path) /// - `postgres://...` - PostgreSQL connection string -/// - `sqlite://...` - SQLite file path +/// - `sqlite://...` - SQLite file path (same `//` vs `///` rules as DuckDB) /// /// # Examples /// @@ -42,35 +70,27 @@ pub enum ConnectionInfo { /// /// let info = parse_connection_string("duckdb://data.db").unwrap(); /// assert_eq!(info, ConnectionInfo::DuckDBFile("data.db".to_string())); +/// +/// let info = parse_connection_string("duckdb:///tmp/data.db").unwrap(); +/// assert_eq!(info, ConnectionInfo::DuckDBFile("/tmp/data.db".to_string())); /// ``` pub fn parse_connection_string(uri: &str) -> Result { if uri == "duckdb://memory" { return Ok(ConnectionInfo::DuckDBMemory); } - if let Some(path) = uri.strip_prefix("duckdb://") { - // Remove leading slashes for file paths - let cleaned_path = path.trim_start_matches('/'); - if cleaned_path.is_empty() { - return Err(GgsqlError::ReaderError( - "DuckDB file path cannot be empty".to_string(), - )); - } - return Ok(ConnectionInfo::DuckDBFile(cleaned_path.to_string())); + if let Some(body) = uri.strip_prefix("duckdb://") { + let path = parse_uri_path("duckdb", body)?; + return Ok(ConnectionInfo::DuckDBFile(path)); } if uri.starts_with("postgres://") || uri.starts_with("postgresql://") { return Ok(ConnectionInfo::PostgreSQL(uri.to_string())); } - if let Some(path) = uri.strip_prefix("sqlite://") { - let cleaned_path = path.trim_start_matches('/'); - if cleaned_path.is_empty() { - return Err(GgsqlError::ReaderError( - "SQLite file path cannot be empty".to_string(), - )); - } - return Ok(ConnectionInfo::SQLite(cleaned_path.to_string())); + if let Some(body) = uri.strip_prefix("sqlite://") { + let path = parse_uri_path("sqlite", body)?; + return Ok(ConnectionInfo::SQLite(path)); } if let Some(conn_str) = uri.strip_prefix("odbc://") { @@ -106,8 +126,14 @@ mod tests { #[test] fn test_duckdb_file_absolute() { + // Three slashes -> absolute path (SQLAlchemy convention). The leading + // `/` must be preserved so DuckDB opens the intended file rather than + // silently creating a relative phantom DB. See issue #345. let info = parse_connection_string("duckdb:///tmp/data.db").unwrap(); - assert_eq!(info, ConnectionInfo::DuckDBFile("tmp/data.db".to_string())); + assert_eq!( + info, + ConnectionInfo::DuckDBFile("/tmp/data.db".to_string()) + ); } #[test] @@ -119,6 +145,28 @@ mod tests { ); } + #[test] + fn test_duckdb_file_four_slashes_rejected() { + // Four slashes is ambiguous — reject with a clear message instead of + // silently interpreting as an absolute path. + let result = parse_connection_string("duckdb:////tmp/data.db"); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Ambiguous"), "unexpected error: {}", err); + } + + #[test] + fn test_sqlite_absolute() { + let info = parse_connection_string("sqlite:///tmp/data.db").unwrap(); + assert_eq!(info, ConnectionInfo::SQLite("/tmp/data.db".to_string())); + } + + #[test] + fn test_sqlite_four_slashes_rejected() { + let result = parse_connection_string("sqlite:////tmp/data.db"); + assert!(result.is_err()); + } + #[test] fn test_postgres() { let uri = "postgres://user:pass@localhost/db"; diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 53dfb288..284985b0 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -112,9 +112,22 @@ impl DuckDBReader { ConnectionInfo::DuckDBMemory => Connection::open_in_memory().map_err(|e| { GgsqlError::ReaderError(format!("Failed to open in-memory DuckDB: {}", e)) })?, - ConnectionInfo::DuckDBFile(path) => Connection::open(&path).map_err(|e| { - GgsqlError::ReaderError(format!("Failed to open DuckDB file '{}': {}", path, e)) - })?, + ConnectionInfo::DuckDBFile(path) => { + // Fail fast if the path does not exist. DuckDB's default + // `open` creates the file if missing, which silently masks + // typos and produces confusing "Table not found" errors + // downstream. See issue #345. + if !std::path::Path::new(&path).exists() { + return Err(GgsqlError::ReaderError(format!( + "DuckDB file '{path}' does not exist. \ + Use 'duckdb://memory' for an in-memory database, \ + or create the file first (e.g. `duckdb {path}` to initialize an empty DB)." + ))); + } + Connection::open(&path).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to open DuckDB file '{}': {}", path, e)) + })? + } _ => { return Err(GgsqlError::ReaderError(format!( "Connection string '{}' is not supported by DuckDBReader", @@ -645,6 +658,57 @@ mod tests { assert!(reader.is_ok()); } + #[test] + fn test_missing_file_fails_fast() { + // Regression test for issue #345: opening a non-existent file must + // error instead of silently creating a phantom empty DB. + let missing = std::env::temp_dir().join("ggsql_does_not_exist_345.duckdb"); + // Ensure it really doesn't exist. + let _ = std::fs::remove_file(&missing); + + let uri = format!("duckdb://{}", missing.display()); + let result = DuckDBReader::from_connection_string(&uri); + let err = match result { + Ok(_) => panic!("expected error for missing file"), + Err(e) => e.to_string(), + }; + assert!( + err.contains("does not exist"), + "unexpected error message: {}", + err + ); + // And the reader must not have created the file as a side effect. + assert!( + !missing.exists(), + "reader created phantom DB at {}", + missing.display() + ); + } + + #[test] + fn test_absolute_path_opens_existing_file() { + // Regression test for issue #345: `duckdb:///abs/path` must open the + // absolute path, not strip the leading slash and treat it as relative. + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("abs_path_345.duckdb"); + + // Seed a DB with a known table. + { + let conn = Connection::open(&path).unwrap(); + conn.execute("CREATE TABLE t AS SELECT 1 AS x, 2 AS y", params![]) + .unwrap(); + } + + // Use the three-slash (absolute) URI form. + let uri = format!("duckdb://{}", path.display()); + let reader = DuckDBReader::from_connection_string(&uri) + .expect("should open existing absolute path"); + + // Table must be visible — confirming we opened the right file. + let df = reader.execute_sql("SELECT * FROM t").unwrap(); + assert_eq!(df.shape(), (1, 2)); + } + #[test] fn test_simple_query() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); From a410e502ae81ea05ace702e067a8bb48f75e1350 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 22 Apr 2026 09:23:46 +0100 Subject: [PATCH 2/3] Suggestions from code review --- src/reader/connection.rs | 89 +++++++++++----------------------------- src/reader/duckdb.rs | 70 ++----------------------------- 2 files changed, 26 insertions(+), 133 deletions(-) diff --git a/src/reader/connection.rs b/src/reader/connection.rs index 87203666..09796c9f 100644 --- a/src/reader/connection.rs +++ b/src/reader/connection.rs @@ -22,43 +22,14 @@ pub enum ConnectionInfo { ODBC(String), } -/// Parse a DuckDB/SQLite URI body into a filesystem path, following -/// SQLAlchemy conventions. -/// -/// After the `scheme://` prefix has been stripped, `body` is interpreted as: -/// - `memory` -> handled by the caller (only valid for `duckdb://memory`) -/// - `` -> relative path, returned verbatim -/// - `/` -> absolute path, returned with the leading `/` -/// - `//...` or empty -> error (ambiguous / missing path) -fn parse_uri_path(scheme: &str, body: &str) -> Result { - if body.is_empty() { - return Err(GgsqlError::ReaderError(format!( - "{} file path cannot be empty", - scheme - ))); - } - - if body.starts_with("//") { - return Err(GgsqlError::ReaderError(format!( - "Ambiguous {scheme} URI '{scheme}://{body}': use '{scheme}://relative/path' \ - for a relative path or '{scheme}:///absolute/path' for an absolute path", - scheme = scheme, - body = body, - ))); - } - - Ok(body.to_string()) -} - /// Parse a connection string into connection information /// /// # Supported Formats /// /// - `duckdb://memory` - DuckDB in-memory database -/// - `duckdb:///absolute/path/file.db` - DuckDB file (absolute path, SQLAlchemy convention) -/// - `duckdb://relative/file.db` - DuckDB file (relative path) +/// - `duckdb://...` - DuckDB path /// - `postgres://...` - PostgreSQL connection string -/// - `sqlite://...` - SQLite file path (same `//` vs `///` rules as DuckDB) +/// - `sqlite://...` - SQLite file path /// /// # Examples /// @@ -79,18 +50,26 @@ pub fn parse_connection_string(uri: &str) -> Result { return Ok(ConnectionInfo::DuckDBMemory); } - if let Some(body) = uri.strip_prefix("duckdb://") { - let path = parse_uri_path("duckdb", body)?; - return Ok(ConnectionInfo::DuckDBFile(path)); + if let Some(path) = uri.strip_prefix("duckdb://") { + if path.is_empty() { + return Err(GgsqlError::ReaderError( + "DuckDB file path cannot be empty".to_string(), + )); + } + return Ok(ConnectionInfo::DuckDBFile(path.to_string())); } if uri.starts_with("postgres://") || uri.starts_with("postgresql://") { return Ok(ConnectionInfo::PostgreSQL(uri.to_string())); } - if let Some(body) = uri.strip_prefix("sqlite://") { - let path = parse_uri_path("sqlite", body)?; - return Ok(ConnectionInfo::SQLite(path)); + if let Some(path) = uri.strip_prefix("sqlite://") { + if path.is_empty() { + return Err(GgsqlError::ReaderError( + "SQLite file path cannot be empty".to_string(), + )); + } + return Ok(ConnectionInfo::SQLite(path.to_string())); } if let Some(conn_str) = uri.strip_prefix("odbc://") { @@ -126,14 +105,8 @@ mod tests { #[test] fn test_duckdb_file_absolute() { - // Three slashes -> absolute path (SQLAlchemy convention). The leading - // `/` must be preserved so DuckDB opens the intended file rather than - // silently creating a relative phantom DB. See issue #345. let info = parse_connection_string("duckdb:///tmp/data.db").unwrap(); - assert_eq!( - info, - ConnectionInfo::DuckDBFile("/tmp/data.db".to_string()) - ); + assert_eq!(info, ConnectionInfo::DuckDBFile("/tmp/data.db".to_string())); } #[test] @@ -145,28 +118,6 @@ mod tests { ); } - #[test] - fn test_duckdb_file_four_slashes_rejected() { - // Four slashes is ambiguous — reject with a clear message instead of - // silently interpreting as an absolute path. - let result = parse_connection_string("duckdb:////tmp/data.db"); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!(err.contains("Ambiguous"), "unexpected error: {}", err); - } - - #[test] - fn test_sqlite_absolute() { - let info = parse_connection_string("sqlite:///tmp/data.db").unwrap(); - assert_eq!(info, ConnectionInfo::SQLite("/tmp/data.db".to_string())); - } - - #[test] - fn test_sqlite_four_slashes_rejected() { - let result = parse_connection_string("sqlite:////tmp/data.db"); - assert!(result.is_err()); - } - #[test] fn test_postgres() { let uri = "postgres://user:pass@localhost/db"; @@ -187,6 +138,12 @@ mod tests { assert_eq!(info, ConnectionInfo::SQLite("data.db".to_string())); } + #[test] + fn test_sqlite_absolute() { + let info = parse_connection_string("sqlite:///tmp/data.db").unwrap(); + assert_eq!(info, ConnectionInfo::SQLite("/tmp/data.db".to_string())); + } + #[test] fn test_empty_duckdb_path() { let result = parse_connection_string("duckdb://"); diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 284985b0..53dfb288 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -112,22 +112,9 @@ impl DuckDBReader { ConnectionInfo::DuckDBMemory => Connection::open_in_memory().map_err(|e| { GgsqlError::ReaderError(format!("Failed to open in-memory DuckDB: {}", e)) })?, - ConnectionInfo::DuckDBFile(path) => { - // Fail fast if the path does not exist. DuckDB's default - // `open` creates the file if missing, which silently masks - // typos and produces confusing "Table not found" errors - // downstream. See issue #345. - if !std::path::Path::new(&path).exists() { - return Err(GgsqlError::ReaderError(format!( - "DuckDB file '{path}' does not exist. \ - Use 'duckdb://memory' for an in-memory database, \ - or create the file first (e.g. `duckdb {path}` to initialize an empty DB)." - ))); - } - Connection::open(&path).map_err(|e| { - GgsqlError::ReaderError(format!("Failed to open DuckDB file '{}': {}", path, e)) - })? - } + ConnectionInfo::DuckDBFile(path) => Connection::open(&path).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to open DuckDB file '{}': {}", path, e)) + })?, _ => { return Err(GgsqlError::ReaderError(format!( "Connection string '{}' is not supported by DuckDBReader", @@ -658,57 +645,6 @@ mod tests { assert!(reader.is_ok()); } - #[test] - fn test_missing_file_fails_fast() { - // Regression test for issue #345: opening a non-existent file must - // error instead of silently creating a phantom empty DB. - let missing = std::env::temp_dir().join("ggsql_does_not_exist_345.duckdb"); - // Ensure it really doesn't exist. - let _ = std::fs::remove_file(&missing); - - let uri = format!("duckdb://{}", missing.display()); - let result = DuckDBReader::from_connection_string(&uri); - let err = match result { - Ok(_) => panic!("expected error for missing file"), - Err(e) => e.to_string(), - }; - assert!( - err.contains("does not exist"), - "unexpected error message: {}", - err - ); - // And the reader must not have created the file as a side effect. - assert!( - !missing.exists(), - "reader created phantom DB at {}", - missing.display() - ); - } - - #[test] - fn test_absolute_path_opens_existing_file() { - // Regression test for issue #345: `duckdb:///abs/path` must open the - // absolute path, not strip the leading slash and treat it as relative. - let dir = tempfile::tempdir().expect("tempdir"); - let path = dir.path().join("abs_path_345.duckdb"); - - // Seed a DB with a known table. - { - let conn = Connection::open(&path).unwrap(); - conn.execute("CREATE TABLE t AS SELECT 1 AS x, 2 AS y", params![]) - .unwrap(); - } - - // Use the three-slash (absolute) URI form. - let uri = format!("duckdb://{}", path.display()); - let reader = DuckDBReader::from_connection_string(&uri) - .expect("should open existing absolute path"); - - // Table must be visible — confirming we opened the right file. - let df = reader.execute_sql("SELECT * FROM t").unwrap(); - assert_eq!(df.shape(), (1, 2)); - } - #[test] fn test_simple_query() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); From c54ab7c03db24a2c30f182e355085290ad8be0c9 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 22 Apr 2026 09:25:19 +0100 Subject: [PATCH 3/3] Tidy up comment --- src/reader/connection.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/reader/connection.rs b/src/reader/connection.rs index 09796c9f..2d18d641 100644 --- a/src/reader/connection.rs +++ b/src/reader/connection.rs @@ -30,20 +30,6 @@ pub enum ConnectionInfo { /// - `duckdb://...` - DuckDB path /// - `postgres://...` - PostgreSQL connection string /// - `sqlite://...` - SQLite file path -/// -/// # Examples -/// -/// ``` -/// use ggsql::reader::connection::{parse_connection_string, ConnectionInfo}; -/// -/// let info = parse_connection_string("duckdb://memory").unwrap(); -/// assert_eq!(info, ConnectionInfo::DuckDBMemory); -/// -/// let info = parse_connection_string("duckdb://data.db").unwrap(); -/// assert_eq!(info, ConnectionInfo::DuckDBFile("data.db".to_string())); -/// -/// let info = parse_connection_string("duckdb:///tmp/data.db").unwrap(); -/// assert_eq!(info, ConnectionInfo::DuckDBFile("/tmp/data.db".to_string())); /// ``` pub fn parse_connection_string(uri: &str) -> Result { if uri == "duckdb://memory" {