From cc35c203534ee04604d563339dc5ba510c240ea0 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Mon, 15 Jun 2026 16:20:56 -0400 Subject: [PATCH 1/6] Add Style::PgDump ruleutils renderer (views + functions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new "pg_dump" style that reproduces PostgreSQL's ruleutils.c deparser layout — the output of pg_get_viewdef / pg_get_functiondef. Its correctness bar is byte-idempotency: formatting genuine deparser output must return it unchanged, which makes it usable for canonicalizing/diffing catalog-dumped DDL (the pglifecycle use case). Unlike the river / left-aligned engine, PgDump uses a dedicated renderer (formatter/pgdump.rs) that re-imposes ruleutils' exact indentation (clause keywords right-aligned to column 7, targets indent 4, joins indent 5, set-op keywords at column 1) while reproducing each expression verbatim — the deparser has already normalized casts, parens and spacing, so the renderer folds whitespace and re-lays-out rather than re-formatting expressions. It genuinely normalizes arbitrary equivalent SQL into that layout, not just echoes input. First increment covers flat single-level views (target list, FROM with joins, WHERE, GROUP BY, HAVING, ORDER BY, set operations) and functions (CREATE FUNCTION signature + attributes + verbatim body). CTEs, scalar subqueries, multi-line CASE and DISTINCT-target indentation are deferred; unrecognized statements fall back to verbatim source (still idempotent on deparser output). Fixtures are genuine pg_get_viewdef/pg_get_functiondef captures, regenerable from a throwaway cluster via tests/fixtures/pg_dump/generate.sh (just gen-pgdump-fixtures). tests/pgdump_idempotency_test.rs asserts each round-trips byte-identically; _deferred/ fixtures mark next-increment targets. Co-Authored-By: Claude Opus 4.8 (1M context) --- Justfile | 4 + src/formatter/mod.rs | 23 +- src/formatter/pgdump.rs | 264 ++++++++++++++++++ src/formatter/select.rs | 2 +- src/style.rs | 8 + .../pg_dump/_deferred/view_distinct_case.sql | 6 + .../pg_dump/_deferred/view_recent_cte.sql | 10 + tests/fixtures/pg_dump/_deferred/view_sub.sql | 4 + tests/fixtures/pg_dump/func_add.sql | 6 + tests/fixtures/pg_dump/func_bump.sql | 9 + tests/fixtures/pg_dump/generate.sh | 85 ++++++ tests/fixtures/pg_dump/view_order_totals.sql | 9 + tests/fixtures/pg_dump/view_uni.sql | 5 + tests/fixtures/pg_dump/view_us_users.sql | 5 + tests/fixtures/pg_dump/view_win.sql | 3 + tests/fixtures_test.rs | 1 + tests/pgdump_idempotency_test.rs | 49 ++++ 17 files changed, 491 insertions(+), 2 deletions(-) create mode 100644 src/formatter/pgdump.rs create mode 100644 tests/fixtures/pg_dump/_deferred/view_distinct_case.sql create mode 100644 tests/fixtures/pg_dump/_deferred/view_recent_cte.sql create mode 100644 tests/fixtures/pg_dump/_deferred/view_sub.sql create mode 100644 tests/fixtures/pg_dump/func_add.sql create mode 100644 tests/fixtures/pg_dump/func_bump.sql create mode 100755 tests/fixtures/pg_dump/generate.sh create mode 100644 tests/fixtures/pg_dump/view_order_totals.sql create mode 100644 tests/fixtures/pg_dump/view_uni.sql create mode 100644 tests/fixtures/pg_dump/view_us_users.sql create mode 100644 tests/fixtures/pg_dump/view_win.sql create mode 100644 tests/pgdump_idempotency_test.rs diff --git a/Justfile b/Justfile index 9f1db3d..77e5e94 100644 --- a/Justfile +++ b/Justfile @@ -59,6 +59,10 @@ publish-dry: publish: cargo publish +# Regenerate Style::PgDump fixtures from a throwaway PostgreSQL cluster +gen-pgdump-fixtures: + bash tests/fixtures/pg_dump/generate.sh + # Clean build artifacts clean: cargo clean diff --git a/src/formatter/mod.rs b/src/formatter/mod.rs index 70173fc..64fed78 100644 --- a/src/formatter/mod.rs +++ b/src/formatter/mod.rs @@ -1,4 +1,5 @@ mod expr; +mod pgdump; mod plpgsql; mod select; mod stmt; @@ -138,6 +139,22 @@ impl StyleConfig { strip_inner_join: true, wrap_case_else: true, }, + // PgDump uses a dedicated renderer (see formatter::pgdump); these + // values are placeholders and are not consulted on that path. + Style::PgDump => Self { + upper_keywords: true, + indent: " ", + leading_commas: false, + joins_in_river: false, + explicit_inner_join: false, + blank_lines_between_clauses: false, + river: false, + compact_ctes: false, + join_on_same_line: false, + blank_lines_in_ctes: false, + strip_inner_join: false, + wrap_case_else: false, + }, } } } @@ -166,7 +183,11 @@ impl<'a> Formatter<'a> { if child.kind() == "toplevel_stmt" && let Some(stmt) = child.find_child("stmt") { - results.push(self.format_stmt(stmt)?); + if self.style == Style::PgDump { + results.push(self.format_pgdump_stmt(stmt)?); + } else { + results.push(self.format_stmt(stmt)?); + } } } if results.is_empty() { diff --git a/src/formatter/pgdump.rs b/src/formatter/pgdump.rs new file mode 100644 index 0000000..9f3d1f3 --- /dev/null +++ b/src/formatter/pgdump.rs @@ -0,0 +1,264 @@ +//! `Style::PgDump` renderer — reproduces PostgreSQL's `ruleutils.c` deparser +//! layout (the output of `pg_get_viewdef` / `pg_get_functiondef`). +//! +//! Unlike the river / left-aligned engine, this renderer's correctness bar is +//! byte-idempotency: feeding genuine deparser output back through it must +//! return that output unchanged. It therefore reproduces ruleutils' exact +//! indentation rather than reading `StyleConfig` flags, and reproduces each +//! expression verbatim (the deparser has already normalized casts, parens and +//! spacing) rather than re-formatting it. +//! +//! Layout rules observed from `pg_get_viewdef(oid, true)` at the top level: +//! clause keywords right-align so their first word ends at column 7 +//! (`SELECT`/`FROM`/`WHERE`/`HAVING`/`GROUP BY`/`ORDER BY`); continuation +//! targets indent 4; JOIN steps indent 5; set-operation keywords sit at +//! column 1. +//! +//! Scope (first increment): flat, single-level views (target list, FROM with +//! joins, WHERE, GROUP BY, HAVING, ORDER BY, set operations) and functions +//! (`CREATE FUNCTION` header + attributes + verbatim body). CTEs, scalar +//! subqueries, multi-line CASE and DISTINCT-target indentation are not yet +//! handled; statements the renderer doesn't recognize fall back to verbatim +//! source (still idempotent on deparser output). + +use crate::error::FormatError; +use crate::formatter::Formatter; +use crate::formatter::select::SelectClauses; +use crate::node_helpers::{NodeExt, flatten_list}; +use tree_sitter::Node; + +/// Column at which a clause keyword's first word ends (1 leading space before +/// `SELECT`). Leading spaces for a keyword = `RIVER_END - first_word_len`. +const RIVER_END: usize = 7; +/// Indentation for target-list continuation lines. +const TARGET_INDENT: usize = 4; +/// Indentation for JOIN continuation lines. +const JOIN_INDENT: usize = 5; + +impl<'a> Formatter<'a> { + /// Render a single top-level statement in ruleutils (`pg_dump`) layout. + pub(crate) fn format_pgdump_stmt(&self, stmt: Node<'a>) -> Result { + let Some(inner) = stmt.named_children_vec().into_iter().next() else { + return Ok(self.collapse_ws(self.text(stmt))); + }; + match inner.kind() { + "SelectStmt" => Ok(format!("{};", self.pgdump_select(inner))), + "CreateFunctionStmt" => Ok(self.pgdump_create_function(inner)), + // Out of scope: reproduce verbatim so deparser output still + // round-trips and nothing is mangled. + _ => Ok(self.text(inner).trim().to_string()), + } + } + + /// Collapse all runs of whitespace to a single space and trim. A no-op on + /// canonical (single-line, single-spaced) deparser expressions; it folds + /// the deparser's line breaks so the layout can be re-imposed. + fn collapse_ws(&self, text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + /// Leading spaces so that `word` (a clause keyword's first token) ends at + /// [`RIVER_END`]. + fn river_pad(&self, word_len: usize) -> String { + " ".repeat(RIVER_END.saturating_sub(word_len)) + } + + fn pgdump_select(&self, node: Node<'a>) -> String { + let snp = node.find_child("select_no_parens").unwrap_or(node); + let clauses = self.collect_select_clauses(snp); + self.pgdump_render_clauses(&clauses) + } + + /// Render collected SELECT clauses as a ruleutils body (no trailing `;`). + fn pgdump_render_clauses(&self, c: &SelectClauses<'a>) -> String { + let mut s = String::new(); + + // SELECT [DISTINCT] target, target, ... + s.push(' '); + s.push_str("SELECT "); + if let Some(d) = c.distinct { + s.push_str(&self.collapse_ws(self.text(d))); + s.push(' '); + } + let targets: Vec = if c.targets.is_empty() { + vec!["*".to_string()] + } else { + c.targets + .iter() + .map(|t| self.collapse_ws(self.text(*t))) + .collect() + }; + let cont = format!(",\n{}", " ".repeat(TARGET_INDENT)); + s.push_str(&targets.join(&cont)); + + // FROM + if let Some(from) = c.from { + let items = self.pgdump_from_items(from); + if let Some((first, joins)) = items.split_first() { + s.push('\n'); + s.push_str(&self.river_pad(4)); // FROM + s.push_str("FROM "); + s.push_str(first); + let join_pad = " ".repeat(JOIN_INDENT); + for j in joins { + s.push('\n'); + s.push_str(&join_pad); + s.push_str(j); + } + } + } + + // WHERE + if let Some(w) = c.where_clause + && let Some(expr) = w.find_child_any(&["a_expr", "c_expr"]) + { + s.push('\n'); + s.push_str(&self.river_pad(5)); // WHERE + s.push_str("WHERE "); + s.push_str(&self.collapse_ws(self.text(expr))); + } + + // GROUP BY + if let Some(g) = c.group_clause + && let Some(list) = g.find_child("group_by_list") + { + let items: Vec = flatten_list(list, "group_by_list") + .iter() + .map(|i| self.collapse_ws(self.text(*i))) + .collect(); + s.push('\n'); + s.push_str(&self.river_pad(5)); // GROUP + s.push_str("GROUP BY "); + s.push_str(&items.join(", ")); + } + + // HAVING + if let Some(h) = c.having_clause + && let Some(expr) = h.find_child_any(&["a_expr", "c_expr"]) + { + s.push('\n'); + s.push_str(&self.river_pad(6)); // HAVING + s.push_str("HAVING "); + s.push_str(&self.collapse_ws(self.text(expr))); + } + + // ORDER BY + if let Some(sort) = c.sort_clause + && let Some(list) = sort.find_child("sortby_list") + { + let items: Vec = flatten_list(list, "sortby_list") + .iter() + .map(|i| self.collapse_ws(self.text(*i))) + .collect(); + s.push('\n'); + s.push_str(&self.river_pad(5)); // ORDER + s.push_str("ORDER BY "); + s.push_str(&items.join(", ")); + } + + // Set operation (UNION / INTERSECT / EXCEPT): keyword at column 1, + // then the right-hand select rendered the same way. + if let Some(so) = &c.set_op { + s.push('\n'); + s.push_str(&so.keyword); + if let Some(q) = &so.quantifier { + s.push(' '); + s.push_str(q); + } + s.push('\n'); + let right = if let Some(rc) = &so.right_clauses { + self.pgdump_render_clauses(rc) + } else { + let rc = self.collect_select_clauses(so.right); + self.pgdump_render_clauses(&rc) + }; + s.push_str(&right); + } + + s + } + + /// Flatten a `from_clause` into ruleutils FROM items: the base relation + /// followed by one entry per JOIN step (`JOIN tbl ON ...`). + fn pgdump_from_items(&self, from_clause: Node<'a>) -> Vec { + let mut items = Vec::new(); + if let Some(list) = from_clause.find_child("from_list") { + for r in flatten_list(list, "from_list") { + self.pgdump_table_ref_items(r, &mut items); + } + } + items + } + + fn pgdump_table_ref_items(&self, node: Node<'a>, items: &mut Vec) { + let joined = if node.kind() == "joined_table" { + Some(node) + } else { + node.find_child("joined_table") + }; + if let Some(jt) = joined { + // Left side first (may itself be a join), then this join step: + // everything from the end of the left table_ref to the end of the + // joined_table (the JOIN keyword, right table and qualifier), + // reproduced verbatim. + if let Some(left) = jt.find_child("table_ref") { + self.pgdump_table_ref_items(left, items); + let step = self.collapse_ws(&self.source[left.end_byte()..jt.end_byte()]); + items.push(step); + } else { + items.push(self.collapse_ws(self.text(jt))); + } + } else { + items.push(self.collapse_ws(self.text(node))); + } + } + + /// Render a `CREATE FUNCTION` statement in `pg_get_functiondef` layout: + /// signature on the first line, `RETURNS` and each option on its own + /// space-prefixed line, and the `AS` clause (body verbatim) last. + fn pgdump_create_function(&self, node: Node<'a>) -> String { + let mut s = String::new(); + + // Signature: CREATE [OR REPLACE] FUNCTION name(args) — verbatim up to + // and including the argument list's closing paren. + let sig_end = node + .find_child("func_args_with_defaults") + .or_else(|| node.find_child("func_args")) + .map(|n| n.end_byte()) + .unwrap_or_else(|| { + node.find_child("func_name") + .map(|n| n.end_byte()) + .unwrap_or(node.end_byte()) + }); + s.push_str(&self.collapse_ws(&self.source[node.start_byte()..sig_end])); + + // RETURNS + if let Some(ret) = node.find_child("func_return") { + s.push_str("\n RETURNS "); + s.push_str(&self.collapse_ws(self.text(ret))); + } + + // Options (LANGUAGE, volatility, STRICT, AS body, ...) in source order, + // which matches the deparser's canonical order on genuine input. + if let Some(opts) = node + .find_child("opt_createfunc_opt_list") + .and_then(|n| n.find_child("createfunc_opt_list")) + .or_else(|| node.find_child("createfunc_opt_list")) + { + for item in flatten_list(opts, "createfunc_opt_list") { + if let Some(as_kw) = item.find_child("kw_as") { + // AS clause: body reproduced verbatim (dollar-quoted text + // may span multiple lines and must not be collapsed). + let body = self.source[as_kw.end_byte()..item.end_byte()].trim_start(); + s.push_str("\nAS "); + s.push_str(body); + } else { + s.push_str("\n "); + s.push_str(&self.collapse_ws(self.text(item))); + } + } + } + + s + } +} diff --git a/src/formatter/select.rs b/src/formatter/select.rs index 1e88b02..f57dc7f 100644 --- a/src/formatter/select.rs +++ b/src/formatter/select.rs @@ -60,7 +60,7 @@ impl<'a> Formatter<'a> { } /// Collect all clauses from a select_no_parens (or simple_select) node tree. - fn collect_select_clauses(&self, node: Node<'a>) -> SelectClauses<'a> { + pub(crate) fn collect_select_clauses(&self, node: Node<'a>) -> SelectClauses<'a> { let mut clauses = SelectClauses { distinct: None, targets: Vec::new(), diff --git a/src/style.rs b/src/style.rs index f2f2076..2fe1821 100644 --- a/src/style.rs +++ b/src/style.rs @@ -34,6 +34,11 @@ pub enum Style { Kickstarter, /// mattmc3 style — lowercase river with leading commas. Mattmc3, + /// PostgreSQL "internal" style — mimics the `ruleutils.c` deparser output + /// emitted by `pg_get_viewdef`/`pg_get_functiondef` (i.e. `pg_dump`). The + /// correctness bar is byte-identical round-tripping of genuine deparser + /// output. + PgDump, } impl Style { @@ -46,6 +51,7 @@ impl Style { Style::Gitlab, Style::Kickstarter, Style::Mattmc3, + Style::PgDump, ]; } @@ -59,6 +65,7 @@ impl fmt::Display for Style { Style::Gitlab => write!(f, "gitlab"), Style::Kickstarter => write!(f, "kickstarter"), Style::Mattmc3 => write!(f, "mattmc3"), + Style::PgDump => write!(f, "pg_dump"), } } } @@ -75,6 +82,7 @@ impl FromStr for Style { "gitlab" => Ok(Style::Gitlab), "kickstarter" => Ok(Style::Kickstarter), "mattmc3" => Ok(Style::Mattmc3), + "pg_dump" | "pgdump" | "postgres" => Ok(Style::PgDump), _ => Err(format!("Unsupported style: '{s}'")), } } diff --git a/tests/fixtures/pg_dump/_deferred/view_distinct_case.sql b/tests/fixtures/pg_dump/_deferred/view_distinct_case.sql new file mode 100644 index 0000000..9ab8163 --- /dev/null +++ b/tests/fixtures/pg_dump/_deferred/view_distinct_case.sql @@ -0,0 +1,6 @@ + SELECT DISTINCT country, + CASE + WHEN active THEN 'on'::text + ELSE 'off'::text + END AS st + FROM app.users; diff --git a/tests/fixtures/pg_dump/_deferred/view_recent_cte.sql b/tests/fixtures/pg_dump/_deferred/view_recent_cte.sql new file mode 100644 index 0000000..a372dc6 --- /dev/null +++ b/tests/fixtures/pg_dump/_deferred/view_recent_cte.sql @@ -0,0 +1,10 @@ + WITH recent AS ( + SELECT orders.user_id, + orders.total + FROM app.orders + WHERE orders.placed_at > (now() - '7 days'::interval) + ) + SELECT user_id, + sum(total) AS wk + FROM recent + GROUP BY user_id; diff --git a/tests/fixtures/pg_dump/_deferred/view_sub.sql b/tests/fixtures/pg_dump/_deferred/view_sub.sql new file mode 100644 index 0000000..99ff0a1 --- /dev/null +++ b/tests/fixtures/pg_dump/_deferred/view_sub.sql @@ -0,0 +1,4 @@ + SELECT email + FROM app.users u + WHERE (id IN ( SELECT orders.user_id + FROM app.orders)); diff --git a/tests/fixtures/pg_dump/func_add.sql b/tests/fixtures/pg_dump/func_add.sql new file mode 100644 index 0000000..f9be4d1 --- /dev/null +++ b/tests/fixtures/pg_dump/func_add.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION app.add(a integer, b integer) + RETURNS integer + LANGUAGE sql + IMMUTABLE +AS $function$ SELECT a + b $function$ + diff --git a/tests/fixtures/pg_dump/func_bump.sql b/tests/fixtures/pg_dump/func_bump.sql new file mode 100644 index 0000000..ee25dec --- /dev/null +++ b/tests/fixtures/pg_dump/func_bump.sql @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION app.bump(p_id bigint) + RETURNS void + LANGUAGE plpgsql +AS $function$ +BEGIN + UPDATE app.users SET active = true WHERE id = p_id; +END; +$function$ + diff --git a/tests/fixtures/pg_dump/generate.sh b/tests/fixtures/pg_dump/generate.sh new file mode 100755 index 0000000..19c852f --- /dev/null +++ b/tests/fixtures/pg_dump/generate.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# +# Regenerate the Style::PgDump idempotency fixtures from genuine PostgreSQL +# deparser output. +# +# Stands up a throwaway local cluster (nothing global is touched), creates a +# set of representative objects, captures pg_get_viewdef / pg_get_functiondef +# output into this directory, then tears the cluster down. The captured files +# are committed so the test suite needs no PostgreSQL to run. +# +# Requires PostgreSQL client+server binaries on PATH (initdb, pg_ctl, psql). +# Usage: bash tests/fixtures/pg_dump/generate.sh +set -euo pipefail + +FIXDIR="$(cd "$(dirname "$0")" && pwd)" +PORT=5599 +WORK="$(mktemp -d "${TMPDIR:-/tmp}/pgdump_fix.XXXXXX")" +export PGDATA="$WORK/data" +SOCK="$WORK/sock" +mkdir -p "$SOCK" + +cleanup() { + pg_ctl -D "$PGDATA" -w stop >/dev/null 2>&1 || true + rm -rf "$WORK" +} +trap cleanup EXIT + +echo "initdb ($WORK) ..." +initdb -D "$PGDATA" --no-locale -E UTF8 -U postgres >/dev/null +pg_ctl -D "$PGDATA" -o "-k $SOCK -p $PORT -h ''" -l "$WORK/log" -w start >/dev/null + +psql() { command psql -h "$SOCK" -p "$PORT" -U postgres -d postgres "$@"; } +cap() { psql -At -c "$1"; } + +echo "creating objects ..." +psql -v ON_ERROR_STOP=1 -q >/dev/null <<'SQL' +CREATE SCHEMA app; +CREATE TABLE app.users (id bigint PRIMARY KEY, email text NOT NULL, country text, created_at timestamptz, active boolean DEFAULT true); +CREATE TABLE app.orders (id bigint PRIMARY KEY, user_id bigint, total numeric(10,2), placed_at timestamptz); + +CREATE VIEW app.us_users AS + SELECT id, email, created_at AT TIME ZONE 'UTC' AS created_utc + FROM app.users WHERE country = 'US' AND active; + +CREATE VIEW app.order_totals AS + SELECT u.email, count(*) AS n, sum(o.total) AS revenue + FROM app.users u JOIN app.orders o ON o.user_id = u.id + WHERE o.placed_at > now() - interval '30 days' + GROUP BY u.email HAVING sum(o.total) > 100 ORDER BY revenue DESC; + +CREATE VIEW app.win AS + SELECT email, row_number() OVER (PARTITION BY country ORDER BY created_at) AS rn FROM app.users; + +CREATE VIEW app.uni AS + SELECT id FROM app.users UNION SELECT user_id FROM app.orders; + +-- Deferred (nested) shapes for later increments. +CREATE VIEW app.recent_cte AS + WITH recent AS (SELECT user_id, total FROM app.orders WHERE placed_at > now() - interval '7 days') + SELECT user_id, sum(total) AS wk FROM recent GROUP BY user_id; +CREATE VIEW app.sub AS + SELECT u.email FROM app.users u WHERE u.id IN (SELECT user_id FROM app.orders); +CREATE VIEW app.distinct_case AS + SELECT DISTINCT country, CASE WHEN active THEN 'on' ELSE 'off' END AS st FROM app.users; + +CREATE FUNCTION app.add(a integer, b integer) RETURNS integer LANGUAGE sql IMMUTABLE AS $$ SELECT a + b $$; +CREATE FUNCTION app.bump(p_id bigint) RETURNS void LANGUAGE plpgsql AS $fn$ +BEGIN + UPDATE app.users SET active = true WHERE id = p_id; +END; +$fn$; +SQL + +echo "capturing fixtures ..." +mkdir -p "$FIXDIR/_deferred" +for v in us_users order_totals win uni; do + cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/view_$v.sql" +done +for v in recent_cte sub distinct_case; do + cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/_deferred/view_$v.sql" +done +cap "SELECT pg_get_functiondef('app.add(integer,integer)'::regprocedure);" > "$FIXDIR/func_add.sql" +cap "SELECT pg_get_functiondef('app.bump(bigint)'::regprocedure);" > "$FIXDIR/func_bump.sql" + +echo "done — fixtures written to $FIXDIR" diff --git a/tests/fixtures/pg_dump/view_order_totals.sql b/tests/fixtures/pg_dump/view_order_totals.sql new file mode 100644 index 0000000..c45af94 --- /dev/null +++ b/tests/fixtures/pg_dump/view_order_totals.sql @@ -0,0 +1,9 @@ + SELECT u.email, + count(*) AS n, + sum(o.total) AS revenue + FROM app.users u + JOIN app.orders o ON o.user_id = u.id + WHERE o.placed_at > (now() - '30 days'::interval) + GROUP BY u.email + HAVING sum(o.total) > 100::numeric + ORDER BY (sum(o.total)) DESC; diff --git a/tests/fixtures/pg_dump/view_uni.sql b/tests/fixtures/pg_dump/view_uni.sql new file mode 100644 index 0000000..70a810b --- /dev/null +++ b/tests/fixtures/pg_dump/view_uni.sql @@ -0,0 +1,5 @@ + SELECT users.id + FROM app.users +UNION + SELECT orders.user_id AS id + FROM app.orders; diff --git a/tests/fixtures/pg_dump/view_us_users.sql b/tests/fixtures/pg_dump/view_us_users.sql new file mode 100644 index 0000000..6aa04a3 --- /dev/null +++ b/tests/fixtures/pg_dump/view_us_users.sql @@ -0,0 +1,5 @@ + SELECT id, + email, + (created_at AT TIME ZONE 'UTC'::text) AS created_utc + FROM app.users + WHERE country = 'US'::text AND active; diff --git a/tests/fixtures/pg_dump/view_win.sql b/tests/fixtures/pg_dump/view_win.sql new file mode 100644 index 0000000..88b418e --- /dev/null +++ b/tests/fixtures/pg_dump/view_win.sql @@ -0,0 +1,3 @@ + SELECT email, + row_number() OVER (PARTITION BY country ORDER BY created_at) AS rn + FROM app.users; diff --git a/tests/fixtures_test.rs b/tests/fixtures_test.rs index ae9231a..0b982e5 100644 --- a/tests/fixtures_test.rs +++ b/tests/fixtures_test.rs @@ -10,6 +10,7 @@ fn run_fixture(style: Style, name: &str) { Style::Gitlab => "gitlab", Style::Kickstarter => "kickstarter", Style::Mattmc3 => "mattmc3", + Style::PgDump => "pg_dump", }; let base = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests") diff --git a/tests/pgdump_idempotency_test.rs b/tests/pgdump_idempotency_test.rs new file mode 100644 index 0000000..32e32b1 --- /dev/null +++ b/tests/pgdump_idempotency_test.rs @@ -0,0 +1,49 @@ +//! Idempotency harness for `Style::PgDump`. +//! +//! Each fixture under `tests/fixtures/pg_dump/` is genuine PostgreSQL deparser +//! output (`pg_get_viewdef` / `pg_get_functiondef`). The correctness bar is +//! byte-idempotency: formatting that output with `Style::PgDump` must return it +//! unchanged. Fixtures under `_deferred/` exercise constructs not yet supported +//! and are intentionally excluded. + +use std::fs; +use std::path::Path; + +use libpgfmt::format; +use libpgfmt::style::Style; +use pretty_assertions::assert_eq; + +#[test] +fn pgdump_fixtures_are_idempotent() { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/pg_dump"); + let mut checked = 0; + let mut entries: Vec<_> = fs::read_dir(&dir) + .expect("read pg_dump fixtures dir") + .filter_map(Result::ok) + .map(|e| e.path()) + .filter(|p| p.extension().is_some_and(|e| e == "sql")) + .collect(); + entries.sort(); + + for path in entries { + let raw = fs::read_to_string(&path).expect("read fixture"); + // Files keep a trailing newline for git friendliness; the deparser + // output itself has none, so compare against the trimmed-end form. + let expected = raw.trim_end_matches('\n'); + let got = format(expected, Style::PgDump) + .unwrap_or_else(|e| panic!("{}: format error: {e}", path.display())); + assert_eq!( + got, + expected, + "\n{} is not idempotent under Style::PgDump", + path.display() + ); + checked += 1; + } + + assert!( + checked > 0, + "no pg_dump fixtures found in {}", + dir.display() + ); +} From a4125486a1983415124a9bc4a63f2b8f177391e0 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Mon, 15 Jun 2026 16:39:51 -0400 Subject: [PATCH 2/6] Extend Style::PgDump to CTEs, CASE blocks and comma-FROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the ruleutils renderer depth-aware (each nesting level adds 8 columns) and add three nested constructs, all reverse-engineered from genuine deparser output and verified by byte-idempotency: - WITH / CTEs: each common table expression renders as `name AS ( )` with the body at depth+1 and the closing paren at 8*(depth+1); subsequent CTEs continue on the closing-paren line (`), y AS (`). - Multi-line CASE targets: rendered as blocks at column 8+8*depth (WHEN/ELSE at 12+8*depth, END back at 8); a CASE as the first target forces SELECT onto its own line. Probing showed DISTINCT does not change continuation indent — the block indent comes from the CASE itself. - Comma-separated FROM items continue at 4+8*depth with commas, distinct from JOIN steps at 5+8*depth. Fixtures regenerated from the (now authoritative) generate.sh: recent_cte and distinct_case promoted out of _deferred, plus new two_cte, case_plain and case_first. Scalar subqueries embedded in expressions remain deferred (their layout is output-column-relative); the sub fixture stays in _deferred. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/formatter/pgdump.rs | 304 +++++++++++++----- tests/fixtures/pg_dump/generate.sh | 19 +- tests/fixtures/pg_dump/view_case_first.sql | 7 + tests/fixtures/pg_dump/view_case_plain.sql | 6 + .../{_deferred => }/view_distinct_case.sql | 0 .../{_deferred => }/view_recent_cte.sql | 0 tests/fixtures/pg_dump/view_two_cte.sql | 11 + 7 files changed, 256 insertions(+), 91 deletions(-) create mode 100644 tests/fixtures/pg_dump/view_case_first.sql create mode 100644 tests/fixtures/pg_dump/view_case_plain.sql rename tests/fixtures/pg_dump/{_deferred => }/view_distinct_case.sql (100%) rename tests/fixtures/pg_dump/{_deferred => }/view_recent_cte.sql (100%) create mode 100644 tests/fixtures/pg_dump/view_two_cte.sql diff --git a/src/formatter/pgdump.rs b/src/formatter/pgdump.rs index 9f3d1f3..efc05af 100644 --- a/src/formatter/pgdump.rs +++ b/src/formatter/pgdump.rs @@ -5,21 +5,25 @@ //! byte-idempotency: feeding genuine deparser output back through it must //! return that output unchanged. It therefore reproduces ruleutils' exact //! indentation rather than reading `StyleConfig` flags, and reproduces each -//! expression verbatim (the deparser has already normalized casts, parens and -//! spacing) rather than re-formatting it. +//! single-line expression verbatim (the deparser has already normalized casts, +//! parens and spacing) rather than re-formatting it. //! -//! Layout rules observed from `pg_get_viewdef(oid, true)` at the top level: -//! clause keywords right-align so their first word ends at column 7 -//! (`SELECT`/`FROM`/`WHERE`/`HAVING`/`GROUP BY`/`ORDER BY`); continuation -//! targets indent 4; JOIN steps indent 5; set-operation keywords sit at -//! column 1. +//! Layout rules observed from `pg_get_viewdef(oid, true)`. At nesting depth `d` +//! (subqueries inside CTEs add one level), with `STEP = 8`: +//! - `SELECT` / `WITH` keywords start at column `2 + STEP*d` (one leading +//! space at the top level); +//! - the other clause keywords (`FROM`/`WHERE`/`GROUP BY`/`HAVING`/ +//! `ORDER BY`) right-align so their first word ends at column `7 + STEP*d`; +//! - target / comma-separated-FROM continuation lines indent `4 + STEP*d`; +//! - JOIN steps indent `5 + STEP*d`; +//! - a multi-line `CASE` target is a block at `8 + STEP*d`, its `WHEN`/`ELSE` +//! lines at `12 + STEP*d`, `END` back at `8 + STEP*d`; +//! - a CTE body renders at depth `d+1`; its closing paren sits at +//! `STEP*(d+1)`; set-operation keywords sit at column 1. //! -//! Scope (first increment): flat, single-level views (target list, FROM with -//! joins, WHERE, GROUP BY, HAVING, ORDER BY, set operations) and functions -//! (`CREATE FUNCTION` header + attributes + verbatim body). CTEs, scalar -//! subqueries, multi-line CASE and DISTINCT-target indentation are not yet -//! handled; statements the renderer doesn't recognize fall back to verbatim -//! source (still idempotent on deparser output). +//! Not yet handled: scalar subqueries embedded in expressions (their layout is +//! output-column-relative). Statements the renderer doesn't recognize fall +//! back to verbatim source (still idempotent on deparser output). use crate::error::FormatError; use crate::formatter::Formatter; @@ -27,13 +31,11 @@ use crate::formatter::select::SelectClauses; use crate::node_helpers::{NodeExt, flatten_list}; use tree_sitter::Node; -/// Column at which a clause keyword's first word ends (1 leading space before -/// `SELECT`). Leading spaces for a keyword = `RIVER_END - first_word_len`. +/// Indentation added per nesting level. +const STEP: usize = 8; +/// Column (1-based) at which a right-aligned clause keyword's first word ends, +/// at depth 0. const RIVER_END: usize = 7; -/// Indentation for target-list continuation lines. -const TARGET_INDENT: usize = 4; -/// Indentation for JOIN continuation lines. -const JOIN_INDENT: usize = 5; impl<'a> Formatter<'a> { /// Render a single top-level statement in ruleutils (`pg_dump`) layout. @@ -42,7 +44,7 @@ impl<'a> Formatter<'a> { return Ok(self.collapse_ws(self.text(stmt))); }; match inner.kind() { - "SelectStmt" => Ok(format!("{};", self.pgdump_select(inner))), + "SelectStmt" => Ok(format!("{};", self.pgdump_select(inner, 0))), "CreateFunctionStmt" => Ok(self.pgdump_create_function(inner)), // Out of scope: reproduce verbatim so deparser output still // round-trips and nothing is mangled. @@ -57,55 +59,34 @@ impl<'a> Formatter<'a> { text.split_whitespace().collect::>().join(" ") } - /// Leading spaces so that `word` (a clause keyword's first token) ends at - /// [`RIVER_END`]. - fn river_pad(&self, word_len: usize) -> String { - " ".repeat(RIVER_END.saturating_sub(word_len)) + /// Leading spaces so `word` (a right-aligned clause keyword's first token) + /// ends at the river column for `depth`. + fn river_pad(&self, word_len: usize, depth: usize) -> String { + " ".repeat((RIVER_END + STEP * depth).saturating_sub(word_len)) } - fn pgdump_select(&self, node: Node<'a>) -> String { + fn pgdump_select(&self, node: Node<'a>, depth: usize) -> String { let snp = node.find_child("select_no_parens").unwrap_or(node); let clauses = self.collect_select_clauses(snp); - self.pgdump_render_clauses(&clauses) + self.pgdump_render_clauses(&clauses, depth) } /// Render collected SELECT clauses as a ruleutils body (no trailing `;`). - fn pgdump_render_clauses(&self, c: &SelectClauses<'a>) -> String { + fn pgdump_render_clauses(&self, c: &SelectClauses<'a>, depth: usize) -> String { let mut s = String::new(); - // SELECT [DISTINCT] target, target, ... - s.push(' '); - s.push_str("SELECT "); - if let Some(d) = c.distinct { - s.push_str(&self.collapse_ws(self.text(d))); - s.push(' '); + // WITH ... CTE list. + if let Some(w) = c.with_clause { + s.push_str(&self.pgdump_with(w, depth)); + s.push('\n'); } - let targets: Vec = if c.targets.is_empty() { - vec!["*".to_string()] - } else { - c.targets - .iter() - .map(|t| self.collapse_ws(self.text(*t))) - .collect() - }; - let cont = format!(",\n{}", " ".repeat(TARGET_INDENT)); - s.push_str(&targets.join(&cont)); + + // SELECT [DISTINCT] target, target, ... (CASE targets render as blocks). + s.push_str(&self.pgdump_targets(c, depth)); // FROM if let Some(from) = c.from { - let items = self.pgdump_from_items(from); - if let Some((first, joins)) = items.split_first() { - s.push('\n'); - s.push_str(&self.river_pad(4)); // FROM - s.push_str("FROM "); - s.push_str(first); - let join_pad = " ".repeat(JOIN_INDENT); - for j in joins { - s.push('\n'); - s.push_str(&join_pad); - s.push_str(j); - } - } + s.push_str(&self.pgdump_from(from, depth)); } // WHERE @@ -113,7 +94,7 @@ impl<'a> Formatter<'a> { && let Some(expr) = w.find_child_any(&["a_expr", "c_expr"]) { s.push('\n'); - s.push_str(&self.river_pad(5)); // WHERE + s.push_str(&self.river_pad(5, depth)); s.push_str("WHERE "); s.push_str(&self.collapse_ws(self.text(expr))); } @@ -127,7 +108,7 @@ impl<'a> Formatter<'a> { .map(|i| self.collapse_ws(self.text(*i))) .collect(); s.push('\n'); - s.push_str(&self.river_pad(5)); // GROUP + s.push_str(&self.river_pad(5, depth)); s.push_str("GROUP BY "); s.push_str(&items.join(", ")); } @@ -137,7 +118,7 @@ impl<'a> Formatter<'a> { && let Some(expr) = h.find_child_any(&["a_expr", "c_expr"]) { s.push('\n'); - s.push_str(&self.river_pad(6)); // HAVING + s.push_str(&self.river_pad(6, depth)); s.push_str("HAVING "); s.push_str(&self.collapse_ws(self.text(expr))); } @@ -151,13 +132,13 @@ impl<'a> Formatter<'a> { .map(|i| self.collapse_ws(self.text(*i))) .collect(); s.push('\n'); - s.push_str(&self.river_pad(5)); // ORDER + s.push_str(&self.river_pad(5, depth)); s.push_str("ORDER BY "); s.push_str(&items.join(", ")); } - // Set operation (UNION / INTERSECT / EXCEPT): keyword at column 1, - // then the right-hand select rendered the same way. + // Set operation (UNION / INTERSECT / EXCEPT): keyword at column 1, then + // the right-hand select rendered the same way. if let Some(so) = &c.set_op { s.push('\n'); s.push_str(&so.keyword); @@ -167,10 +148,10 @@ impl<'a> Formatter<'a> { } s.push('\n'); let right = if let Some(rc) = &so.right_clauses { - self.pgdump_render_clauses(rc) + self.pgdump_render_clauses(rc, depth) } else { let rc = self.collect_select_clauses(so.right); - self.pgdump_render_clauses(&rc) + self.pgdump_render_clauses(&rc, depth) }; s.push_str(&right); } @@ -178,39 +159,190 @@ impl<'a> Formatter<'a> { s } - /// Flatten a `from_clause` into ruleutils FROM items: the base relation - /// followed by one entry per JOIN step (`JOIN tbl ON ...`). - fn pgdump_from_items(&self, from_clause: Node<'a>) -> Vec { - let mut items = Vec::new(); - if let Some(list) = from_clause.find_child("from_list") { - for r in flatten_list(list, "from_list") { - self.pgdump_table_ref_items(r, &mut items); + /// Render `SELECT [DISTINCT]` and the target list. Simple targets render + /// inline at indent `4 + STEP*depth`; `CASE` targets render as blocks. + fn pgdump_targets(&self, c: &SelectClauses<'a>, depth: usize) -> String { + let lead = STEP * depth + 1; // SELECT keyword leading spaces + let cont = " ".repeat(STEP * depth + 4); + let mut s = format!("{}SELECT", " ".repeat(lead)); + if let Some(d) = c.distinct { + s.push(' '); + s.push_str(&self.collapse_ws(self.text(d))); + } + + let targets = &c.targets; + if targets.is_empty() { + s.push_str(" *"); + return s; + } + + let last = targets.len() - 1; + for (i, t) in targets.iter().enumerate() { + if let Some(case_node) = self.target_case_expr(*t) { + // CASE block always starts on its own line. + s.push('\n'); + s.push_str(&self.render_case_block(*t, case_node, depth)); + } else if i == 0 { + s.push(' '); + s.push_str(&self.collapse_ws(self.text(*t))); + } else { + s.push('\n'); + s.push_str(&cont); + s.push_str(&self.collapse_ws(self.text(*t))); + } + if i != last { + s.push(','); + } + } + s + } + + /// If a target's expression is (only) a `CASE`, return its `case_expr` + /// node. Descends single-child `a_expr`/`c_expr` wrappers; returns `None` + /// when the CASE is part of a larger expression. + fn target_case_expr(&self, target_el: Node<'a>) -> Option> { + let mut node = target_el.find_child_any(&["a_expr", "c_expr", "case_expr"])?; + loop { + match node.kind() { + "case_expr" => return Some(node), + "a_expr" | "c_expr" => { + let kids = node.named_children_vec(); + if kids.len() == 1 { + node = kids[0]; + } else { + return None; + } + } + _ => return None, } } - items } - fn pgdump_table_ref_items(&self, node: Node<'a>, items: &mut Vec) { + /// Render a `CASE` target as a ruleutils block, including the trailing + /// `AS alias` (taken verbatim from after the `case_expr`). + fn render_case_block(&self, target_el: Node<'a>, case_node: Node<'a>, depth: usize) -> String { + let ci = " ".repeat(STEP * depth + 8); // CASE / END + let wi = " ".repeat(STEP * depth + 12); // WHEN / ELSE + let mut s = format!("{ci}CASE"); + + // Optional simple-CASE argument: CASE WHEN ... + let mut cursor = case_node.walk(); + for child in case_node.named_children(&mut cursor) { + match child.kind() { + "kw_case" | "when_clause_list" | "case_default" | "kw_end" => {} + _ => { + s.push(' '); + s.push_str(&self.collapse_ws(self.text(child))); + break; + } + } + } + + if let Some(wcl) = case_node.find_child("when_clause_list") { + for wc in flatten_list(wcl, "when_clause_list") { + s.push('\n'); + s.push_str(&wi); + s.push_str(&self.collapse_ws(self.text(wc))); + } + } + if let Some(def) = case_node.find_child("case_default") { + s.push('\n'); + s.push_str(&wi); + s.push_str(&self.collapse_ws(self.text(def))); + } + s.push('\n'); + s.push_str(&ci); + s.push_str("END"); + + // Trailing `AS alias` (everything in the target after the CASE). + let tail = self.collapse_ws(&self.source[case_node.end_byte()..target_el.end_byte()]); + if !tail.is_empty() { + s.push(' '); + s.push_str(&tail); + } + s + } + + /// Render the FROM clause: comma-separated items at indent `4 + STEP*depth`, + /// each item's JOIN steps at indent `5 + STEP*depth`. + fn pgdump_from(&self, from_clause: Node<'a>, depth: usize) -> String { + let Some(list) = from_clause.find_child("from_list") else { + return String::new(); + }; + let items: Vec = flatten_list(list, "from_list") + .iter() + .map(|tr| self.render_table_ref(*tr, depth)) + .collect(); + let Some((first, rest)) = items.split_first() else { + return String::new(); + }; + let mut s = format!("\n{}FROM {first}", self.river_pad(4, depth)); + let cont = " ".repeat(STEP * depth + 4); + for item in rest { + s.push_str(",\n"); + s.push_str(&cont); + s.push_str(item); + } + s + } + + /// Render one FROM item (a relation, possibly with JOINs) as a (possibly + /// multi-line) string: the base relation followed by one line per JOIN step. + fn render_table_ref(&self, node: Node<'a>, depth: usize) -> String { let joined = if node.kind() == "joined_table" { Some(node) } else { node.find_child("joined_table") }; - if let Some(jt) = joined { - // Left side first (may itself be a join), then this join step: - // everything from the end of the left table_ref to the end of the - // joined_table (the JOIN keyword, right table and qualifier), - // reproduced verbatim. - if let Some(left) = jt.find_child("table_ref") { - self.pgdump_table_ref_items(left, items); - let step = self.collapse_ws(&self.source[left.end_byte()..jt.end_byte()]); - items.push(step); - } else { - items.push(self.collapse_ws(self.text(jt))); + let Some(jt) = joined else { + return self.collapse_ws(self.text(node)); + }; + let Some(left) = jt.find_child("table_ref") else { + return self.collapse_ws(self.text(jt)); + }; + let mut s = self.render_table_ref(left, depth); + let step = self.collapse_ws(&self.source[left.end_byte()..jt.end_byte()]); + s.push('\n'); + s.push_str(&" ".repeat(STEP * depth + 5)); + s.push_str(&step); + s + } + + /// Render a `WITH` clause: each CTE as `name AS ( )`, + /// with subsequent CTEs continuing on the closing-paren line. + fn pgdump_with(&self, w: Node<'a>, depth: usize) -> String { + let mut s = format!("{}WITH ", " ".repeat(STEP * depth + 1)); + let close = " ".repeat(STEP * (depth + 1)); + let Some(list) = w.find_child("cte_list") else { + return s; + }; + let ctes = flatten_list(list, "cte_list"); + let last = ctes.len().saturating_sub(1); + for (i, cte) in ctes.iter().enumerate() { + let name = cte + .find_child("name") + .map(|n| self.collapse_ws(self.text(n))) + .unwrap_or_default(); + s.push_str(&name); + s.push_str(" AS ("); + let body = cte + .find_child("PreparableStmt") + .and_then(|p| p.find_child("SelectStmt")) + .and_then(|s| s.find_child("select_no_parens")) + .or_else(|| cte.find_child("select_no_parens")); + if let Some(body) = body { + let clauses = self.collect_select_clauses(body); + s.push('\n'); + s.push_str(&self.pgdump_render_clauses(&clauses, depth + 1)); + } + s.push('\n'); + s.push_str(&close); + s.push(')'); + if i != last { + s.push_str(", "); } - } else { - items.push(self.collapse_ws(self.text(node))); } + s } /// Render a `CREATE FUNCTION` statement in `pg_get_functiondef` layout: diff --git a/tests/fixtures/pg_dump/generate.sh b/tests/fixtures/pg_dump/generate.sh index 19c852f..bfe2622 100755 --- a/tests/fixtures/pg_dump/generate.sh +++ b/tests/fixtures/pg_dump/generate.sh @@ -54,14 +54,23 @@ CREATE VIEW app.win AS CREATE VIEW app.uni AS SELECT id FROM app.users UNION SELECT user_id FROM app.orders; --- Deferred (nested) shapes for later increments. +-- Nested shapes: CTEs, CASE blocks, comma-separated FROM. CREATE VIEW app.recent_cte AS WITH recent AS (SELECT user_id, total FROM app.orders WHERE placed_at > now() - interval '7 days') SELECT user_id, sum(total) AS wk FROM recent GROUP BY user_id; -CREATE VIEW app.sub AS - SELECT u.email FROM app.users u WHERE u.id IN (SELECT user_id FROM app.orders); +CREATE VIEW app.two_cte AS + WITH x AS (SELECT id AS a FROM app.users), y AS (SELECT id AS b FROM app.orders) + SELECT x.a, y.b FROM x, y; CREATE VIEW app.distinct_case AS SELECT DISTINCT country, CASE WHEN active THEN 'on' ELSE 'off' END AS st FROM app.users; +CREATE VIEW app.case_plain AS + SELECT id, CASE WHEN active THEN 'on' ELSE 'off' END AS st FROM app.users; +CREATE VIEW app.case_first AS + SELECT CASE WHEN active THEN 1 ELSE 0 END AS x, id FROM app.users; + +-- Deferred (still unsupported): scalar subquery embedded in an expression. +CREATE VIEW app.sub AS + SELECT u.email FROM app.users u WHERE u.id IN (SELECT user_id FROM app.orders); CREATE FUNCTION app.add(a integer, b integer) RETURNS integer LANGUAGE sql IMMUTABLE AS $$ SELECT a + b $$; CREATE FUNCTION app.bump(p_id bigint) RETURNS void LANGUAGE plpgsql AS $fn$ @@ -73,10 +82,10 @@ SQL echo "capturing fixtures ..." mkdir -p "$FIXDIR/_deferred" -for v in us_users order_totals win uni; do +for v in us_users order_totals win uni recent_cte two_cte distinct_case case_plain case_first; do cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/view_$v.sql" done -for v in recent_cte sub distinct_case; do +for v in sub; do cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/_deferred/view_$v.sql" done cap "SELECT pg_get_functiondef('app.add(integer,integer)'::regprocedure);" > "$FIXDIR/func_add.sql" diff --git a/tests/fixtures/pg_dump/view_case_first.sql b/tests/fixtures/pg_dump/view_case_first.sql new file mode 100644 index 0000000..e373438 --- /dev/null +++ b/tests/fixtures/pg_dump/view_case_first.sql @@ -0,0 +1,7 @@ + SELECT + CASE + WHEN active THEN 1 + ELSE 0 + END AS x, + id + FROM app.users; diff --git a/tests/fixtures/pg_dump/view_case_plain.sql b/tests/fixtures/pg_dump/view_case_plain.sql new file mode 100644 index 0000000..9ea5d80 --- /dev/null +++ b/tests/fixtures/pg_dump/view_case_plain.sql @@ -0,0 +1,6 @@ + SELECT id, + CASE + WHEN active THEN 'on'::text + ELSE 'off'::text + END AS st + FROM app.users; diff --git a/tests/fixtures/pg_dump/_deferred/view_distinct_case.sql b/tests/fixtures/pg_dump/view_distinct_case.sql similarity index 100% rename from tests/fixtures/pg_dump/_deferred/view_distinct_case.sql rename to tests/fixtures/pg_dump/view_distinct_case.sql diff --git a/tests/fixtures/pg_dump/_deferred/view_recent_cte.sql b/tests/fixtures/pg_dump/view_recent_cte.sql similarity index 100% rename from tests/fixtures/pg_dump/_deferred/view_recent_cte.sql rename to tests/fixtures/pg_dump/view_recent_cte.sql diff --git a/tests/fixtures/pg_dump/view_two_cte.sql b/tests/fixtures/pg_dump/view_two_cte.sql new file mode 100644 index 0000000..88b6a74 --- /dev/null +++ b/tests/fixtures/pg_dump/view_two_cte.sql @@ -0,0 +1,11 @@ + WITH x AS ( + SELECT users.id AS a + FROM app.users + ), y AS ( + SELECT orders.id AS b + FROM app.orders + ) + SELECT x.a, + y.b + FROM x, + y; From 412d7fc8d4d2a67454e1c5491ba98a79ba8f7343 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Mon, 15 Jun 2026 16:47:18 -0400 Subject: [PATCH 3/6] Render subqueries embedded in expressions for Style::PgDump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle IN (SELECT …), EXISTS (…) and scalar subqueries inside WHERE / HAVING / target expressions — the last deferred view construct. ruleutils lays these out relative to the output column: the subquery's SELECT sits inline right after the open paren, while its own clauses align at the deeper (depth+1) river column. render_expr_text() walks an expression, reproduces everything outside a subquery verbatim (whitespace collapsed, boundary spaces preserved), and for each embedded select_with_parens renders the body at depth+1 with its first line de-indented, splicing it back in. With no embedded subquery it is just the prior collapse_ws path, so flat expressions are unchanged. Fixtures (regenerated from generate.sh): sub promoted out of _deferred, plus new sub_exists and sub_scalar. The _deferred directory is now empty and removed. Remaining gaps: subqueries in the FROM list (derived tables), in GROUP BY / ORDER BY items, or inside CASE arms. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/formatter/pgdump.rs | 97 +++++++++++++++++-- tests/fixtures/pg_dump/generate.sh | 13 +-- .../pg_dump/{_deferred => }/view_sub.sql | 0 tests/fixtures/pg_dump/view_sub_exists.sql | 5 + tests/fixtures/pg_dump/view_sub_scalar.sql | 5 + 5 files changed, 107 insertions(+), 13 deletions(-) rename tests/fixtures/pg_dump/{_deferred => }/view_sub.sql (100%) create mode 100644 tests/fixtures/pg_dump/view_sub_exists.sql create mode 100644 tests/fixtures/pg_dump/view_sub_scalar.sql diff --git a/src/formatter/pgdump.rs b/src/formatter/pgdump.rs index efc05af..2306588 100644 --- a/src/formatter/pgdump.rs +++ b/src/formatter/pgdump.rs @@ -21,9 +21,12 @@ //! - a CTE body renders at depth `d+1`; its closing paren sits at //! `STEP*(d+1)`; set-operation keywords sit at column 1. //! -//! Not yet handled: scalar subqueries embedded in expressions (their layout is -//! output-column-relative). Statements the renderer doesn't recognize fall -//! back to verbatim source (still idempotent on deparser output). +//! Subqueries embedded in WHERE / HAVING / target expressions (`IN (SELECT …)`, +//! `EXISTS (…)`, scalar subqueries) are rendered inline at `d+1` and spliced +//! back in. Not yet handled: subqueries in the FROM list (derived tables), +//! GROUP BY / ORDER BY items or inside CASE arms. Statements the renderer +//! doesn't recognize fall back to verbatim source (still idempotent on +//! deparser output). use crate::error::FormatError; use crate::formatter::Formatter; @@ -59,6 +62,86 @@ impl<'a> Formatter<'a> { text.split_whitespace().collect::>().join(" ") } + /// Like [`collapse_ws`] but keeps a single boundary space when the original + /// began or ended with whitespace, so collapsed fragments concatenate with + /// correct spacing around a spliced-in subquery. + fn collapse_preserve_edges(&self, text: &str) -> String { + if text.is_empty() { + return String::new(); + } + let lead = text.starts_with(char::is_whitespace); + let trail = text.ends_with(char::is_whitespace); + let core = self.collapse_ws(text); + if core.is_empty() { + return if lead || trail { + " ".into() + } else { + String::new() + }; + } + format!( + "{}{core}{}", + if lead { " " } else { "" }, + if trail { " " } else { "" } + ) + } + + /// Render an expression that may embed sub-`SELECT`s. Parts outside a + /// subquery are reproduced verbatim (whitespace collapsed); each + /// `select_with_parens` is rendered at `depth + 1` and spliced inline, the + /// way ruleutils lays out `IN (SELECT ...)`, `EXISTS (SELECT ...)` and + /// scalar subqueries. With no embedded subquery this is just `collapse_ws`. + fn render_expr_text(&self, node: Node<'a>, depth: usize) -> String { + let mut subs = Vec::new(); + self.collect_select_with_parens(node, &mut subs); + if subs.is_empty() { + return self.collapse_ws(self.text(node)); + } + let full = self.source; + let mut out = String::new(); + let mut pos = node.start_byte(); + for swp in subs { + out.push_str(&self.collapse_preserve_edges(&full[pos..swp.start_byte()])); + out.push_str(&self.render_select_with_parens(swp, depth)); + pos = swp.end_byte(); + } + out.push_str(&self.collapse_preserve_edges(&full[pos..node.end_byte()])); + out + } + + /// Collect top-level `select_with_parens` nodes (not recursing into a + /// subquery once found), in source order. + fn collect_select_with_parens(&self, node: Node<'a>, out: &mut Vec>) { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() == "select_with_parens" { + out.push(child); + } else { + self.collect_select_with_parens(child, out); + } + } + } + + /// Render a parenthesized subquery inline: `( )` with the + /// body's first line de-indented so `SELECT` follows the open paren and + /// subsequent clauses align at the deeper river column. + fn render_select_with_parens(&self, swp: Node<'a>, depth: usize) -> String { + let body = swp.find_child("select_no_parens").or_else(|| { + swp.find_child("SelectStmt") + .and_then(|s| s.find_child("select_no_parens")) + }); + let Some(body) = body else { + return self.collapse_ws(self.text(swp)); + }; + let clauses = self.collect_select_clauses(body); + let inner = self.pgdump_render_clauses(&clauses, depth + 1); + let dedented = match inner.split_once('\n') { + Some((first, rest)) => format!("{}\n{rest}", first.trim_start()), + None => inner.trim_start().to_string(), + }; + format!("( {dedented})") + } + /// Leading spaces so `word` (a right-aligned clause keyword's first token) /// ends at the river column for `depth`. fn river_pad(&self, word_len: usize, depth: usize) -> String { @@ -96,7 +179,7 @@ impl<'a> Formatter<'a> { s.push('\n'); s.push_str(&self.river_pad(5, depth)); s.push_str("WHERE "); - s.push_str(&self.collapse_ws(self.text(expr))); + s.push_str(&self.render_expr_text(expr, depth)); } // GROUP BY @@ -120,7 +203,7 @@ impl<'a> Formatter<'a> { s.push('\n'); s.push_str(&self.river_pad(6, depth)); s.push_str("HAVING "); - s.push_str(&self.collapse_ws(self.text(expr))); + s.push_str(&self.render_expr_text(expr, depth)); } // ORDER BY @@ -184,11 +267,11 @@ impl<'a> Formatter<'a> { s.push_str(&self.render_case_block(*t, case_node, depth)); } else if i == 0 { s.push(' '); - s.push_str(&self.collapse_ws(self.text(*t))); + s.push_str(&self.render_expr_text(*t, depth)); } else { s.push('\n'); s.push_str(&cont); - s.push_str(&self.collapse_ws(self.text(*t))); + s.push_str(&self.render_expr_text(*t, depth)); } if i != last { s.push(','); diff --git a/tests/fixtures/pg_dump/generate.sh b/tests/fixtures/pg_dump/generate.sh index bfe2622..1bfd33d 100755 --- a/tests/fixtures/pg_dump/generate.sh +++ b/tests/fixtures/pg_dump/generate.sh @@ -68,9 +68,14 @@ CREATE VIEW app.case_plain AS CREATE VIEW app.case_first AS SELECT CASE WHEN active THEN 1 ELSE 0 END AS x, id FROM app.users; --- Deferred (still unsupported): scalar subquery embedded in an expression. +-- Subqueries embedded in expressions (IN, EXISTS, scalar in target list). CREATE VIEW app.sub AS SELECT u.email FROM app.users u WHERE u.id IN (SELECT user_id FROM app.orders); +CREATE VIEW app.sub_exists AS + SELECT u.email FROM app.users u + WHERE EXISTS (SELECT 1 FROM app.orders o WHERE o.user_id = u.id); +CREATE VIEW app.sub_scalar AS + SELECT u.email, (SELECT count(*) FROM app.orders o WHERE o.user_id = u.id) AS n FROM app.users u; CREATE FUNCTION app.add(a integer, b integer) RETURNS integer LANGUAGE sql IMMUTABLE AS $$ SELECT a + b $$; CREATE FUNCTION app.bump(p_id bigint) RETURNS void LANGUAGE plpgsql AS $fn$ @@ -81,13 +86,9 @@ $fn$; SQL echo "capturing fixtures ..." -mkdir -p "$FIXDIR/_deferred" -for v in us_users order_totals win uni recent_cte two_cte distinct_case case_plain case_first; do +for v in us_users order_totals win uni recent_cte two_cte distinct_case case_plain case_first sub sub_exists sub_scalar; do cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/view_$v.sql" done -for v in sub; do - cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/_deferred/view_$v.sql" -done cap "SELECT pg_get_functiondef('app.add(integer,integer)'::regprocedure);" > "$FIXDIR/func_add.sql" cap "SELECT pg_get_functiondef('app.bump(bigint)'::regprocedure);" > "$FIXDIR/func_bump.sql" diff --git a/tests/fixtures/pg_dump/_deferred/view_sub.sql b/tests/fixtures/pg_dump/view_sub.sql similarity index 100% rename from tests/fixtures/pg_dump/_deferred/view_sub.sql rename to tests/fixtures/pg_dump/view_sub.sql diff --git a/tests/fixtures/pg_dump/view_sub_exists.sql b/tests/fixtures/pg_dump/view_sub_exists.sql new file mode 100644 index 0000000..d0b5395 --- /dev/null +++ b/tests/fixtures/pg_dump/view_sub_exists.sql @@ -0,0 +1,5 @@ + SELECT email + FROM app.users u + WHERE (EXISTS ( SELECT 1 + FROM app.orders o + WHERE o.user_id = u.id)); diff --git a/tests/fixtures/pg_dump/view_sub_scalar.sql b/tests/fixtures/pg_dump/view_sub_scalar.sql new file mode 100644 index 0000000..c2b7ad5 --- /dev/null +++ b/tests/fixtures/pg_dump/view_sub_scalar.sql @@ -0,0 +1,5 @@ + SELECT email, + ( SELECT count(*) AS count + FROM app.orders o + WHERE o.user_id = u.id) AS n + FROM app.users u; From bdf4155223715579b4016493b3fb2d02bdacadb6 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Mon, 15 Jun 2026 16:51:31 -0400 Subject: [PATCH 4/6] Add LIMIT/OFFSET, FROM derived tables, set-op ordering to Style::PgDump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the remaining common view gaps in the ruleutils renderer: - LIMIT / OFFSET: previously collected but never emitted (silently dropped). ruleutils emits OFFSET before LIMIT, both at the SELECT keyword's column. - Clause ordering for set operations: ORDER BY / OFFSET / LIMIT apply to the whole UNION/INTERSECT/EXCEPT, so they now render after the right-hand select rather than before it. For a plain query the order is unchanged. - FROM derived tables `( SELECT … ) alias` and subqueries on a JOIN's right side: generalized the subquery splicer to operate over an arbitrary byte range (splice_range), so render_table_ref and JOIN steps reuse the same inline-at-depth+1 rendering as WHERE/target expressions. New fixtures (regenerated from generate.sh): lim, derived, derived_join, union_order. Remaining gaps: subqueries inside GROUP BY items or CASE arms. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/formatter/pgdump.rs | 95 +++++++++++++------- tests/fixtures/pg_dump/generate.sh | 14 ++- tests/fixtures/pg_dump/view_derived.sql | 4 + tests/fixtures/pg_dump/view_derived_join.sql | 7 ++ tests/fixtures/pg_dump/view_lim.sql | 5 ++ tests/fixtures/pg_dump/view_union_order.sql | 7 ++ 6 files changed, 97 insertions(+), 35 deletions(-) create mode 100644 tests/fixtures/pg_dump/view_derived.sql create mode 100644 tests/fixtures/pg_dump/view_derived_join.sql create mode 100644 tests/fixtures/pg_dump/view_lim.sql create mode 100644 tests/fixtures/pg_dump/view_union_order.sql diff --git a/src/formatter/pgdump.rs b/src/formatter/pgdump.rs index 2306588..7d5912c 100644 --- a/src/formatter/pgdump.rs +++ b/src/formatter/pgdump.rs @@ -19,14 +19,15 @@ //! - a multi-line `CASE` target is a block at `8 + STEP*d`, its `WHEN`/`ELSE` //! lines at `12 + STEP*d`, `END` back at `8 + STEP*d`; //! - a CTE body renders at depth `d+1`; its closing paren sits at -//! `STEP*(d+1)`; set-operation keywords sit at column 1. +//! `STEP*(d+1)`; set-operation keywords sit at column 1, with a trailing +//! `ORDER BY` / `OFFSET` / `LIMIT` applying to the whole set operation; +//! - `OFFSET` and `LIMIT` (in that order) sit at the `SELECT` column. //! -//! Subqueries embedded in WHERE / HAVING / target expressions (`IN (SELECT …)`, -//! `EXISTS (…)`, scalar subqueries) are rendered inline at `d+1` and spliced -//! back in. Not yet handled: subqueries in the FROM list (derived tables), -//! GROUP BY / ORDER BY items or inside CASE arms. Statements the renderer -//! doesn't recognize fall back to verbatim source (still idempotent on -//! deparser output). +//! Subqueries (`IN (SELECT …)`, `EXISTS (…)`, scalar subqueries, and FROM-list +//! derived tables / subqueries on a JOIN's right side) are rendered inline at +//! `d+1` and spliced back in. Not yet handled: subqueries inside GROUP BY items +//! or CASE arms. Statements the renderer doesn't recognize fall back to +//! verbatim source (still idempotent on deparser output). use crate::error::FormatError; use crate::formatter::Formatter; @@ -92,20 +93,28 @@ impl<'a> Formatter<'a> { /// way ruleutils lays out `IN (SELECT ...)`, `EXISTS (SELECT ...)` and /// scalar subqueries. With no embedded subquery this is just `collapse_ws`. fn render_expr_text(&self, node: Node<'a>, depth: usize) -> String { - let mut subs = Vec::new(); - self.collect_select_with_parens(node, &mut subs); - if subs.is_empty() { - return self.collapse_ws(self.text(node)); - } + self.splice_range(node.start_byte(), node.end_byte(), node, depth) + } + + /// Reproduce the source bytes `[start, end)` (searching `root` for embedded + /// subqueries), collapsing whitespace outside subqueries and rendering each + /// `select_with_parens` inline at `depth + 1`. Used for expressions, FROM + /// derived tables and JOIN steps alike. + fn splice_range(&self, start: usize, end: usize, root: Node<'a>, depth: usize) -> String { + let mut all = Vec::new(); + self.collect_select_with_parens(root, &mut all); let full = self.source; let mut out = String::new(); - let mut pos = node.start_byte(); - for swp in subs { + let mut pos = start; + for swp in all { + if swp.start_byte() < start || swp.end_byte() > end { + continue; + } out.push_str(&self.collapse_preserve_edges(&full[pos..swp.start_byte()])); out.push_str(&self.render_select_with_parens(swp, depth)); pos = swp.end_byte(); } - out.push_str(&self.collapse_preserve_edges(&full[pos..node.end_byte()])); + out.push_str(&self.collapse_preserve_edges(&full[pos..end])); out } @@ -206,22 +215,9 @@ impl<'a> Formatter<'a> { s.push_str(&self.render_expr_text(expr, depth)); } - // ORDER BY - if let Some(sort) = c.sort_clause - && let Some(list) = sort.find_child("sortby_list") - { - let items: Vec = flatten_list(list, "sortby_list") - .iter() - .map(|i| self.collapse_ws(self.text(*i))) - .collect(); - s.push('\n'); - s.push_str(&self.river_pad(5, depth)); - s.push_str("ORDER BY "); - s.push_str(&items.join(", ")); - } - // Set operation (UNION / INTERSECT / EXCEPT): keyword at column 1, then - // the right-hand select rendered the same way. + // the right-hand select rendered the same way. ORDER BY / LIMIT below + // apply to the whole set operation, so they follow the right side. if let Some(so) = &c.set_op { s.push('\n'); s.push_str(&so.keyword); @@ -239,6 +235,34 @@ impl<'a> Formatter<'a> { s.push_str(&right); } + // ORDER BY + if let Some(sort) = c.sort_clause + && let Some(list) = sort.find_child("sortby_list") + { + let items: Vec = flatten_list(list, "sortby_list") + .iter() + .map(|i| self.render_expr_text(*i, depth)) + .collect(); + s.push('\n'); + s.push_str(&self.river_pad(5, depth)); + s.push_str("ORDER BY "); + s.push_str(&items.join(", ")); + } + + // OFFSET then LIMIT (ruleutils emits OFFSET first), both at the SELECT + // keyword's column. + let limit_lead = " ".repeat(STEP * depth + 1); + if let Some(off) = c.offset_clause { + s.push('\n'); + s.push_str(&limit_lead); + s.push_str(&self.collapse_ws(self.text(off))); + } + if let Some(lim) = c.limit_clause { + s.push('\n'); + s.push_str(&limit_lead); + s.push_str(&self.collapse_ws(self.text(lim))); + } + s } @@ -378,16 +402,19 @@ impl<'a> Formatter<'a> { node.find_child("joined_table") }; let Some(jt) = joined else { - return self.collapse_ws(self.text(node)); + // Plain relation or a derived table `( SELECT … ) alias`. + return self.render_expr_text(node, depth); }; let Some(left) = jt.find_child("table_ref") else { - return self.collapse_ws(self.text(jt)); + return self.render_expr_text(jt, depth); }; let mut s = self.render_table_ref(left, depth); - let step = self.collapse_ws(&self.source[left.end_byte()..jt.end_byte()]); + // The JOIN step (keyword, right relation, ON/USING) may itself contain a + // derived table or subquery, so splice over its byte range. + let step = self.splice_range(left.end_byte(), jt.end_byte(), jt, depth); s.push('\n'); s.push_str(&" ".repeat(STEP * depth + 5)); - s.push_str(&step); + s.push_str(step.trim_start()); s } diff --git a/tests/fixtures/pg_dump/generate.sh b/tests/fixtures/pg_dump/generate.sh index 1bfd33d..8fe65d3 100755 --- a/tests/fixtures/pg_dump/generate.sh +++ b/tests/fixtures/pg_dump/generate.sh @@ -77,6 +77,17 @@ CREATE VIEW app.sub_exists AS CREATE VIEW app.sub_scalar AS SELECT u.email, (SELECT count(*) FROM app.orders o WHERE o.user_id = u.id) AS n FROM app.users u; +-- LIMIT / OFFSET, derived tables in FROM, set-op with trailing ORDER BY/LIMIT. +CREATE VIEW app.lim AS + SELECT id FROM app.users ORDER BY id LIMIT 10 OFFSET 5; +CREATE VIEW app.derived AS + SELECT s.email FROM (SELECT email FROM app.users WHERE active) s; +CREATE VIEW app.derived_join AS + SELECT u.email, t.n FROM app.users u + JOIN (SELECT user_id, count(*) AS n FROM app.orders GROUP BY user_id) t ON t.user_id = u.id; +CREATE VIEW app.union_order AS + SELECT id FROM app.users UNION SELECT user_id FROM app.orders ORDER BY 1 LIMIT 3; + CREATE FUNCTION app.add(a integer, b integer) RETURNS integer LANGUAGE sql IMMUTABLE AS $$ SELECT a + b $$; CREATE FUNCTION app.bump(p_id bigint) RETURNS void LANGUAGE plpgsql AS $fn$ BEGIN @@ -86,7 +97,8 @@ $fn$; SQL echo "capturing fixtures ..." -for v in us_users order_totals win uni recent_cte two_cte distinct_case case_plain case_first sub sub_exists sub_scalar; do +for v in us_users order_totals win uni recent_cte two_cte distinct_case case_plain case_first \ + sub sub_exists sub_scalar lim derived derived_join union_order; do cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/view_$v.sql" done cap "SELECT pg_get_functiondef('app.add(integer,integer)'::regprocedure);" > "$FIXDIR/func_add.sql" diff --git a/tests/fixtures/pg_dump/view_derived.sql b/tests/fixtures/pg_dump/view_derived.sql new file mode 100644 index 0000000..836b434 --- /dev/null +++ b/tests/fixtures/pg_dump/view_derived.sql @@ -0,0 +1,4 @@ + SELECT email + FROM ( SELECT users.email + FROM app.users + WHERE users.active) s; diff --git a/tests/fixtures/pg_dump/view_derived_join.sql b/tests/fixtures/pg_dump/view_derived_join.sql new file mode 100644 index 0000000..1436c5e --- /dev/null +++ b/tests/fixtures/pg_dump/view_derived_join.sql @@ -0,0 +1,7 @@ + SELECT u.email, + t.n + FROM app.users u + JOIN ( SELECT orders.user_id, + count(*) AS n + FROM app.orders + GROUP BY orders.user_id) t ON t.user_id = u.id; diff --git a/tests/fixtures/pg_dump/view_lim.sql b/tests/fixtures/pg_dump/view_lim.sql new file mode 100644 index 0000000..b7b9bfd --- /dev/null +++ b/tests/fixtures/pg_dump/view_lim.sql @@ -0,0 +1,5 @@ + SELECT id + FROM app.users + ORDER BY id + OFFSET 5 + LIMIT 10; diff --git a/tests/fixtures/pg_dump/view_union_order.sql b/tests/fixtures/pg_dump/view_union_order.sql new file mode 100644 index 0000000..d1349e4 --- /dev/null +++ b/tests/fixtures/pg_dump/view_union_order.sql @@ -0,0 +1,7 @@ + SELECT users.id + FROM app.users +UNION + SELECT orders.user_id AS id + FROM app.orders + ORDER BY 1 + LIMIT 3; From 8e8c2fe10e09c1a002cfed224a61a20ec43e1050 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Mon, 15 Jun 2026 16:55:18 -0400 Subject: [PATCH 5/6] Handle nested grouping parens in Style::PgDump; validation sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A broad sweep of 20 diverse view shapes (FILTER, GROUPING SETS, window frames, array subscripts/slices, COALESCE/NULLIF, casts, simple-form CASE, LATERAL, DISTINCT ON, BETWEEN, ROW(), string_agg with ORDER BY, set-returning functions, nested subqueries) found one gap: a scalar subquery nested two levels deep renders as `(( SELECT … ))`, where the deparser nests one select_with_parens directly inside another. render_select_with_parens looked for select_no_parens, missed it, and collapsed to one line. It now recurses through the redundant grouping paren so only the innermost subquery gets the `( SELECT` form and the depth increment; the other 19 shapes already round-tripped. Locked five high-value shapes from the sweep as fixtures: nested_sub, lateral, window_frame, distinct_on, filter_agg (23 fixtures total). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/formatter/pgdump.rs | 7 +++++++ tests/fixtures/pg_dump/generate.sh | 19 ++++++++++++++++++- tests/fixtures/pg_dump/view_distinct_on.sql | 4 ++++ tests/fixtures/pg_dump/view_filter_agg.sql | 3 +++ tests/fixtures/pg_dump/view_lateral.sql | 8 ++++++++ tests/fixtures/pg_dump/view_nested_sub.sql | 7 +++++++ tests/fixtures/pg_dump/view_window_frame.sql | 3 +++ 7 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/pg_dump/view_distinct_on.sql create mode 100644 tests/fixtures/pg_dump/view_filter_agg.sql create mode 100644 tests/fixtures/pg_dump/view_lateral.sql create mode 100644 tests/fixtures/pg_dump/view_nested_sub.sql create mode 100644 tests/fixtures/pg_dump/view_window_frame.sql diff --git a/src/formatter/pgdump.rs b/src/formatter/pgdump.rs index 7d5912c..23573b5 100644 --- a/src/formatter/pgdump.rs +++ b/src/formatter/pgdump.rs @@ -135,6 +135,13 @@ impl<'a> Formatter<'a> { /// body's first line de-indented so `SELECT` follows the open paren and /// subsequent clauses align at the deeper river column. fn render_select_with_parens(&self, swp: Node<'a>, depth: usize) -> String { + // Redundant grouping parens: `(( SELECT … ))` nests one + // select_with_parens directly inside another. The outer is a plain + // paren (no introducing space); recurse so only the innermost gets the + // `( SELECT` form and the depth increment. + if let Some(inner) = swp.find_child("select_with_parens") { + return format!("({})", self.render_select_with_parens(inner, depth)); + } let body = swp.find_child("select_no_parens").or_else(|| { swp.find_child("SelectStmt") .and_then(|s| s.find_child("select_no_parens")) diff --git a/tests/fixtures/pg_dump/generate.sh b/tests/fixtures/pg_dump/generate.sh index 8fe65d3..b69e210 100755 --- a/tests/fixtures/pg_dump/generate.sh +++ b/tests/fixtures/pg_dump/generate.sh @@ -88,6 +88,22 @@ CREATE VIEW app.derived_join AS CREATE VIEW app.union_order AS SELECT id FROM app.users UNION SELECT user_id FROM app.orders ORDER BY 1 LIMIT 3; +-- Deeper / varied real-world shapes (validation sweep). +CREATE VIEW app.nested_sub AS + SELECT id, email FROM app.users u1 + WHERE EXISTS (SELECT 1 FROM app.orders o + WHERE o.user_id = u1.id AND o.total > (SELECT avg(total) FROM app.orders)); +CREATE VIEW app.lateral AS + SELECT t.uid, t.s FROM app.users u, + LATERAL (SELECT u.id AS uid, sum(total) AS s FROM app.orders o WHERE o.user_id = u.id GROUP BY u.id) t; +CREATE VIEW app.window_frame AS + SELECT id, sum(total) OVER (PARTITION BY user_id ORDER BY placed_at + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS run FROM app.orders; +CREATE VIEW app.distinct_on AS + SELECT DISTINCT ON (country) country, id FROM app.users ORDER BY country, id; +CREATE VIEW app.filter_agg AS + SELECT count(*) FILTER (WHERE active) AS act, count(*) AS cnt FROM app.users; + CREATE FUNCTION app.add(a integer, b integer) RETURNS integer LANGUAGE sql IMMUTABLE AS $$ SELECT a + b $$; CREATE FUNCTION app.bump(p_id bigint) RETURNS void LANGUAGE plpgsql AS $fn$ BEGIN @@ -98,7 +114,8 @@ SQL echo "capturing fixtures ..." for v in us_users order_totals win uni recent_cte two_cte distinct_case case_plain case_first \ - sub sub_exists sub_scalar lim derived derived_join union_order; do + sub sub_exists sub_scalar lim derived derived_join union_order \ + nested_sub lateral window_frame distinct_on filter_agg; do cap "SELECT pg_get_viewdef('app.$v'::regclass, true);" > "$FIXDIR/view_$v.sql" done cap "SELECT pg_get_functiondef('app.add(integer,integer)'::regprocedure);" > "$FIXDIR/func_add.sql" diff --git a/tests/fixtures/pg_dump/view_distinct_on.sql b/tests/fixtures/pg_dump/view_distinct_on.sql new file mode 100644 index 0000000..7174986 --- /dev/null +++ b/tests/fixtures/pg_dump/view_distinct_on.sql @@ -0,0 +1,4 @@ + SELECT DISTINCT ON (country) country, + id + FROM app.users + ORDER BY country, id; diff --git a/tests/fixtures/pg_dump/view_filter_agg.sql b/tests/fixtures/pg_dump/view_filter_agg.sql new file mode 100644 index 0000000..4144f76 --- /dev/null +++ b/tests/fixtures/pg_dump/view_filter_agg.sql @@ -0,0 +1,3 @@ + SELECT count(*) FILTER (WHERE active) AS act, + count(*) AS cnt + FROM app.users; diff --git a/tests/fixtures/pg_dump/view_lateral.sql b/tests/fixtures/pg_dump/view_lateral.sql new file mode 100644 index 0000000..c80120c --- /dev/null +++ b/tests/fixtures/pg_dump/view_lateral.sql @@ -0,0 +1,8 @@ + SELECT t.uid, + t.s + FROM app.users u, + LATERAL ( SELECT u.id AS uid, + sum(o.total) AS s + FROM app.orders o + WHERE o.user_id = u.id + GROUP BY u.id) t; diff --git a/tests/fixtures/pg_dump/view_nested_sub.sql b/tests/fixtures/pg_dump/view_nested_sub.sql new file mode 100644 index 0000000..3f72956 --- /dev/null +++ b/tests/fixtures/pg_dump/view_nested_sub.sql @@ -0,0 +1,7 @@ + SELECT id, + email + FROM app.users u1 + WHERE (EXISTS ( SELECT 1 + FROM app.orders o + WHERE o.user_id = u1.id AND o.total > (( SELECT avg(orders.total) AS avg + FROM app.orders)))); diff --git a/tests/fixtures/pg_dump/view_window_frame.sql b/tests/fixtures/pg_dump/view_window_frame.sql new file mode 100644 index 0000000..75e2125 --- /dev/null +++ b/tests/fixtures/pg_dump/view_window_frame.sql @@ -0,0 +1,3 @@ + SELECT id, + sum(total) OVER (PARTITION BY user_id ORDER BY placed_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS run + FROM app.orders; From 8b196cee6d8a95fd457c8f84a61fe7c4bd04e09c Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Mon, 15 Jun 2026 17:00:03 -0400 Subject: [PATCH 6/6] Round out Style::PgDump function rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function-variety sweep (DEFAULT args, OUT params, VARIADIC, RETURNS TABLE/SETOF, behavior attributes, SET, SQL-standard RETURN body) found three gaps in the CREATE FUNCTION renderer: - RETURNS TABLE(...) / SETOF were dropped: the return type is no longer a func_return node. Render the whole RETURNS span (signature end → option list start) uniformly, which covers scalar, SETOF and TABLE forms. - Behavior attributes (volatility, LEAKPROOF, STRICT, SECURITY, PARALLEL, WINDOW) were emitted one per line; pg_get_functiondef groups them on a single line. Group consecutive such opt-items, keeping LANGUAGE / SET / COST / ROWS / SUPPORT / AS on their own lines. - SQL-standard `RETURN expr` bodies (LANGUAGE sql without AS) were dropped: emit opt_routine_body at column 0. Eight function shapes locked as fixtures (func_default, func_out, func_variadic, func_table, func_setof, func_behavior, func_set, func_return); 31 fixtures total. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/formatter/pgdump.rs | 70 +++++++++++++++++++----- tests/fixtures/pg_dump/func_behavior.sql | 6 ++ tests/fixtures/pg_dump/func_default.sql | 5 ++ tests/fixtures/pg_dump/func_out.sql | 5 ++ tests/fixtures/pg_dump/func_return.sql | 5 ++ tests/fixtures/pg_dump/func_set.sql | 6 ++ tests/fixtures/pg_dump/func_setof.sql | 5 ++ tests/fixtures/pg_dump/func_table.sql | 5 ++ tests/fixtures/pg_dump/func_variadic.sql | 5 ++ tests/fixtures/pg_dump/generate.sh | 13 +++++ 10 files changed, 111 insertions(+), 14 deletions(-) create mode 100644 tests/fixtures/pg_dump/func_behavior.sql create mode 100644 tests/fixtures/pg_dump/func_default.sql create mode 100644 tests/fixtures/pg_dump/func_out.sql create mode 100644 tests/fixtures/pg_dump/func_return.sql create mode 100644 tests/fixtures/pg_dump/func_set.sql create mode 100644 tests/fixtures/pg_dump/func_setof.sql create mode 100644 tests/fixtures/pg_dump/func_table.sql create mode 100644 tests/fixtures/pg_dump/func_variadic.sql diff --git a/src/formatter/pgdump.rs b/src/formatter/pgdump.rs index 23573b5..ab05312 100644 --- a/src/formatter/pgdump.rs +++ b/src/formatter/pgdump.rs @@ -463,8 +463,9 @@ impl<'a> Formatter<'a> { } /// Render a `CREATE FUNCTION` statement in `pg_get_functiondef` layout: - /// signature on the first line, `RETURNS` and each option on its own - /// space-prefixed line, and the `AS` clause (body verbatim) last. + /// signature on the first line, then `RETURNS`, `LANGUAGE`, the grouped + /// behavior attributes, `SET`/`COST`/… and finally the `AS` clause (body + /// verbatim) or a SQL-standard `RETURN`. fn pgdump_create_function(&self, node: Node<'a>) -> String { let mut s = String::new(); @@ -481,33 +482,74 @@ impl<'a> Formatter<'a> { }); s.push_str(&self.collapse_ws(&self.source[node.start_byte()..sig_end])); - // RETURNS - if let Some(ret) = node.find_child("func_return") { - s.push_str("\n RETURNS "); - s.push_str(&self.collapse_ws(self.text(ret))); - } - - // Options (LANGUAGE, volatility, STRICT, AS body, ...) in source order, - // which matches the deparser's canonical order on genuine input. - if let Some(opts) = node + let opts = node .find_child("opt_createfunc_opt_list") .and_then(|n| n.find_child("createfunc_opt_list")) - .or_else(|| node.find_child("createfunc_opt_list")) - { - for item in flatten_list(opts, "createfunc_opt_list") { + .or_else(|| node.find_child("createfunc_opt_list")); + + // RETURNS spec (scalar, SETOF or TABLE(...)) — everything between the + // signature and the option list, rendered on its own line. + if let Some(ol) = opts { + let ret = self.collapse_ws(&self.source[sig_end..ol.start_byte()]); + if !ret.is_empty() { + s.push_str("\n "); + s.push_str(&ret); + } + } + + // Options in source order (which matches the deparser's canonical + // order). pg_get_functiondef puts LANGUAGE, SET, COST, … each on their + // own line, but groups the behavior attributes (volatility, LEAKPROOF, + // STRICT, SECURITY, PARALLEL, WINDOW) together on one line. + if let Some(ol) = opts { + let mut behavior: Vec = Vec::new(); + for item in flatten_list(ol, "createfunc_opt_list") { if let Some(as_kw) = item.find_child("kw_as") { + self.flush_behavior(&mut s, &mut behavior); // AS clause: body reproduced verbatim (dollar-quoted text // may span multiple lines and must not be collapsed). let body = self.source[as_kw.end_byte()..item.end_byte()].trim_start(); s.push_str("\nAS "); s.push_str(body); + } else if self.is_behavior_opt(item) { + behavior.push(self.collapse_ws(self.text(item))); } else { + self.flush_behavior(&mut s, &mut behavior); s.push_str("\n "); s.push_str(&self.collapse_ws(self.text(item))); } } + self.flush_behavior(&mut s, &mut behavior); + } + + // SQL-standard body: `RETURN expr` (no AS), emitted at column 0. + if let Some(body) = node.find_child("opt_routine_body") { + s.push('\n'); + s.push_str(&self.collapse_ws(self.text(body))); } s } + + /// A `createfunc_opt_item` is a "behavior" attribute (grouped onto one line + /// by the deparser) when it wraps a `common_func_opt_item` that is not a + /// SET / COST / ROWS / SUPPORT clause (those get their own lines). + fn is_behavior_opt(&self, item: Node<'a>) -> bool { + let Some(cfo) = item.find_child("common_func_opt_item") else { + return false; + }; + !(cfo.has_child("FunctionSetResetClause") + || cfo.has_child("kw_cost") + || cfo.has_child("kw_rows") + || cfo.has_child("kw_support")) + } + + /// Emit any pending behavior attributes as a single space-joined line. + fn flush_behavior(&self, s: &mut String, behavior: &mut Vec) { + if !behavior.is_empty() { + s.push_str("\n "); + s.push_str(&behavior.join(" ")); + behavior.clear(); + } + } } diff --git a/tests/fixtures/pg_dump/func_behavior.sql b/tests/fixtures/pg_dump/func_behavior.sql new file mode 100644 index 0000000..cb02c8d --- /dev/null +++ b/tests/fixtures/pg_dump/func_behavior.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION app.fn_behavior(x integer) + RETURNS integer + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ SELECT x $function$ + diff --git a/tests/fixtures/pg_dump/func_default.sql b/tests/fixtures/pg_dump/func_default.sql new file mode 100644 index 0000000..2188d2c --- /dev/null +++ b/tests/fixtures/pg_dump/func_default.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE FUNCTION app.fn_default(a integer, b integer DEFAULT 0) + RETURNS integer + LANGUAGE sql +AS $function$ SELECT a + b $function$ + diff --git a/tests/fixtures/pg_dump/func_out.sql b/tests/fixtures/pg_dump/func_out.sql new file mode 100644 index 0000000..c551d88 --- /dev/null +++ b/tests/fixtures/pg_dump/func_out.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE FUNCTION app.fn_out(a integer, OUT q integer, OUT r integer) + RETURNS record + LANGUAGE sql +AS $function$ SELECT a / 2, a % 2 $function$ + diff --git a/tests/fixtures/pg_dump/func_return.sql b/tests/fixtures/pg_dump/func_return.sql new file mode 100644 index 0000000..e3ec165 --- /dev/null +++ b/tests/fixtures/pg_dump/func_return.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE FUNCTION app.fn_return(x integer) + RETURNS integer + LANGUAGE sql +RETURN (x + 1) + diff --git a/tests/fixtures/pg_dump/func_set.sql b/tests/fixtures/pg_dump/func_set.sql new file mode 100644 index 0000000..892ff21 --- /dev/null +++ b/tests/fixtures/pg_dump/func_set.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION app.fn_set(x integer) + RETURNS integer + LANGUAGE sql + SET search_path TO 'public' +AS $function$ SELECT x $function$ + diff --git a/tests/fixtures/pg_dump/func_setof.sql b/tests/fixtures/pg_dump/func_setof.sql new file mode 100644 index 0000000..c51e96a --- /dev/null +++ b/tests/fixtures/pg_dump/func_setof.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE FUNCTION app.fn_setof(x integer) + RETURNS SETOF integer + LANGUAGE sql +AS $function$ SELECT generate_series(1, x) $function$ + diff --git a/tests/fixtures/pg_dump/func_table.sql b/tests/fixtures/pg_dump/func_table.sql new file mode 100644 index 0000000..f29801a --- /dev/null +++ b/tests/fixtures/pg_dump/func_table.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE FUNCTION app.fn_table(x integer) + RETURNS TABLE(a integer, b text) + LANGUAGE sql +AS $function$ SELECT x, 'y'::text $function$ + diff --git a/tests/fixtures/pg_dump/func_variadic.sql b/tests/fixtures/pg_dump/func_variadic.sql new file mode 100644 index 0000000..f750134 --- /dev/null +++ b/tests/fixtures/pg_dump/func_variadic.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE FUNCTION app.fn_variadic(VARIADIC arr integer[]) + RETURNS integer + LANGUAGE sql +AS $function$ SELECT array_length(arr, 1) $function$ + diff --git a/tests/fixtures/pg_dump/generate.sh b/tests/fixtures/pg_dump/generate.sh index b69e210..649b385 100755 --- a/tests/fixtures/pg_dump/generate.sh +++ b/tests/fixtures/pg_dump/generate.sh @@ -110,6 +110,16 @@ BEGIN UPDATE app.users SET active = true WHERE id = p_id; END; $fn$; +-- Function variety: DEFAULT args, OUT params, VARIADIC, RETURNS TABLE/SETOF, +-- grouped behavior attributes, SET, SQL-standard RETURN body. +CREATE FUNCTION app.fn_default(a integer, b integer DEFAULT 0) RETURNS integer LANGUAGE sql AS $$ SELECT a + b $$; +CREATE FUNCTION app.fn_out(IN a integer, OUT q integer, OUT r integer) LANGUAGE sql AS $$ SELECT a / 2, a % 2 $$; +CREATE FUNCTION app.fn_variadic(VARIADIC arr integer[]) RETURNS integer LANGUAGE sql AS $$ SELECT array_length(arr, 1) $$; +CREATE FUNCTION app.fn_table(x integer) RETURNS TABLE(a integer, b text) LANGUAGE sql AS $$ SELECT x, 'y'::text $$; +CREATE FUNCTION app.fn_setof(x integer) RETURNS SETOF integer LANGUAGE sql AS $$ SELECT generate_series(1, x) $$; +CREATE FUNCTION app.fn_behavior(x integer) RETURNS integer LANGUAGE sql STRICT IMMUTABLE PARALLEL SAFE AS $$ SELECT x $$; +CREATE FUNCTION app.fn_set(x integer) RETURNS integer LANGUAGE sql SET search_path TO 'public' AS $$ SELECT x $$; +CREATE FUNCTION app.fn_return(x integer) RETURNS integer LANGUAGE sql RETURN x + 1; SQL echo "capturing fixtures ..." @@ -120,5 +130,8 @@ for v in us_users order_totals win uni recent_cte two_cte distinct_case case_pla done cap "SELECT pg_get_functiondef('app.add(integer,integer)'::regprocedure);" > "$FIXDIR/func_add.sql" cap "SELECT pg_get_functiondef('app.bump(bigint)'::regprocedure);" > "$FIXDIR/func_bump.sql" +for f in fn_default fn_out fn_variadic fn_table fn_setof fn_behavior fn_set fn_return; do + cap "SELECT pg_get_functiondef(p.oid) FROM pg_proc p WHERE p.proname = '$f';" > "$FIXDIR/func_${f#fn_}.sql" +done echo "done — fixtures written to $FIXDIR"