From f397f55343d30cbe17c176798df310d9c957690c Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:23:59 +0200 Subject: [PATCH 01/56] Starting on auto generation --- scripts/parser/gen_transformer_v2.py | 133 +++++++++++++++++++++++++++ scripts/parser/inline_grammar.py | 7 ++ 2 files changed, 140 insertions(+) create mode 100644 scripts/parser/gen_transformer_v2.py diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py new file mode 100644 index 000000000000..deabfd73286a --- /dev/null +++ b/scripts/parser/gen_transformer_v2.py @@ -0,0 +1,133 @@ +import re +import sys +from pathlib import Path + +# NOTE: inline_grammar.py has module-level side effects (writes keyword_map.cpp +# and inlined_grammar.hpp on import). This is acceptable for a generation script. +sys.path.insert(0, str(Path(__file__).parent)) +from inline_grammar import parse_peg_grammar, PEGTokenType +from generate_transformer import load_grammar_types + +scripts_dir = Path(__file__).parent.parent +peg_dir = scripts_dir.parent / 'src' / 'parser' / 'peg' +statements_dir = peg_dir / 'grammar' / 'statements' +type_dir = scripts_dir / 'parser' + + +def to_snake_case(name): + s1 = re.sub(r'(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + +def is_simple_rule(rule): + """True if all tokens are LITERAL or REFERENCE (no operators, choices, groups).""" + return all(t.type in (PEGTokenType.LITERAL, PEGTokenType.REFERENCE) for t in rule.tokens) + + +def get_semantic_children(rule): + """Returns [(index, rule_name)] for every REFERENCE token in the rule.""" + return [(i, tok.text) for i, tok in enumerate(rule.tokens) if tok.type == PEGTokenType.REFERENCE] + + +def generate_internal_declaration(rule_name, return_type): + return (f"\tstatic {return_type} Transform{rule_name}Internal" + f"(PEGTransformer &transformer, ParseResult &parse_result);\n") + + +def generate_body_declaration(rule_name, return_type, semantic_children, rule_to_type): + params = ", ".join( + f"{rule_to_type[name]} {to_snake_case(name)}" for _, name in semantic_children + ) + return f"\tstatic {return_type} Transform{rule_name}({params});\n" + + +def generate_internal_wrapper(rule_name, return_type, semantic_children, rule_to_type): + """Generates the Internal .cpp function that extracts children and calls the body.""" + arg_lines = [] + arg_names = [] + for idx, child_name in semantic_children: + var = to_snake_case(child_name) + child_type = rule_to_type[child_name] + arg_lines.append(f"\tauto {var} = transformer.Transform<{child_type}>(list_pr, {idx});") + arg_names.append(var) + + body = [] + if arg_lines: + body.append("\tauto &list_pr = parse_result.Cast();") + body.extend(arg_lines) + body.append(f"\treturn Transform{rule_name}({', '.join(arg_names)});") + + return ( + f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + + "\n".join(body) + + "\n}\n" + ) + + +def generate_registration(rule_name): + return f'Register("{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal);\n' + + +def generate_output(rules, rule_to_type, gram_stem): + declarations = [] + implementations = [] + registrations = [] + skipped = [] + + for rule_name, rule in rules.items(): + return_type = rule.return_type + if return_type is None: + skipped.append((rule_name, "no return type in grammar_types.yml")) + continue + if not is_simple_rule(rule): + skipped.append((rule_name, "complex rule (has operators/choices/groups)")) + continue + + children = get_semantic_children(rule) + unknown = [name for _, name in children if name not in rule_to_type] + if unknown: + skipped.append((rule_name, f"unknown child types: {unknown}")) + continue + + declarations.append(generate_internal_declaration(rule_name, return_type)) + declarations.append(generate_body_declaration(rule_name, return_type, children, rule_to_type)) + implementations.append(generate_internal_wrapper(rule_name, return_type, children, rule_to_type)) + registrations.append(generate_registration(rule_name)) + + if skipped: + print("=== SKIPPED (manual implementation required) ===") + for rule_name, reason in skipped: + print(f" {rule_name}: {reason}") + print() + + print("=== DECLARATIONS (peg_transformer_generated.hpp) ===") + print("".join(declarations)) + + print(f"=== IMPLEMENTATION (generated/transform_{gram_stem}_generated.cpp) ===") + print("".join(implementations)) + + print(f"=== REGISTRATION (in Register{gram_stem.capitalize()}() in peg_transformer_factory.cpp) ===") + print("".join(registrations)) + + +def main(): + use_file_path = statements_dir / 'use.gram' + rules = {} + with open(use_file_path, 'r') as f: + file_content = f.read() + try: + rules = parse_peg_grammar(file_content) + except Exception as e: + raise Exception(f"{use_file_path.name}: {e}") from None + + rule_to_type = load_grammar_types(type_dir / 'grammar_types.yml') + for rule_name, return_type in rule_to_type.items(): + if rule_name in rules: + rules[rule_name].return_type = return_type + + generate_output(rules, rule_to_type, gram_stem="use") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/parser/inline_grammar.py b/scripts/parser/inline_grammar.py index 0f0bdf1ffe94..e951bcfed065 100644 --- a/scripts/parser/inline_grammar.py +++ b/scripts/parser/inline_grammar.py @@ -132,12 +132,16 @@ class PEGTokenType(Enum): REGEX = auto() OPERATOR = auto() + def __str__(self): + return self.value + class PEGGrammarRule: def __init__(self): self.rule_name = None self.tokens = [] self.parameters = {} + self.return_type = None def has_tokens(self): return bool(self.tokens) @@ -145,10 +149,13 @@ def has_tokens(self): def clear(self): self.tokens = [] self.parameters = {} + self.return_type = None def references(self): return [t.text for t in self.tokens if t.type in (PEGTokenType.REFERENCE, PEGTokenType.FUNCTION_CALL)] + def __str__(self): + return f"{self.rule_name} [{self.return_type}]<- {self.tokens}" class ParseState(Enum): RULE_NAME = auto() From 2c150b34abd514f37aae03940af5082f01a0686d Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:50:18 +0200 Subject: [PATCH 02/56] Clean up inline_grammar.py --- scripts/parser/inline_grammar.py | 218 ++++++++++++++----------------- 1 file changed, 100 insertions(+), 118 deletions(-) diff --git a/scripts/parser/inline_grammar.py b/scripts/parser/inline_grammar.py index e951bcfed065..a5190f0f4d4b 100644 --- a/scripts/parser/inline_grammar.py +++ b/scripts/parser/inline_grammar.py @@ -3,27 +3,12 @@ from enum import Enum, auto from pathlib import Path -parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files') -parser.add_argument( - '--print', action='store_true', help='Print the grammar instead of writing to a file', default=False -) -parser.add_argument( - '--grammar-file', - action='store_true', - help='Write the grammar to a .gram file instead of a C++ header', - default=False, -) - -args = parser.parse_args() - scripts_dir = Path(__file__).parent.parent peg_dir = scripts_dir.parent / 'src' / 'parser' / 'peg' statements_dir = peg_dir / 'grammar' / 'statements' keywords_dir = peg_dir / 'grammar' / 'keywords' target_file = scripts_dir.parent / 'src' / 'include' / 'duckdb' / 'parser' / 'peg' / 'inlined_grammar.hpp' -contents = "" - IMPLICIT_RULES = {'%whitespace'} # Maps filenames to string categories. @@ -46,50 +31,13 @@ "TYPE_NAME_KEYWORD": "typename_keyword_map", } -# Use a dictionary of sets to collect keywords for each category, preventing duplicates -keyword_sets = {category: set() for category in CPP_MAP_NAMES} - -# --- Validation and Loading (largely unchanged) --- -# For validation during the loading phase -reserved_set = set() -unreserved_set = set() - def load_keywords(path): with open(path, "r") as f: return [line.strip().lower() for line in f if line.strip()] -for filepath in keywords_dir.iterdir(): - if filepath.name not in FILENAME_TO_CATEGORY: - continue - - category = FILENAME_TO_CATEGORY[filepath.name] - keywords = load_keywords(filepath) - - for kw in keywords: - # Validation logic remains the same to enforce rules - if category == "RESERVED_KEYWORD": - if kw in reserved_set or kw in unreserved_set: - print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") - exit(1) - reserved_set.add(kw) - elif category == "UNRESERVED_KEYWORD": - if kw in reserved_set or kw in unreserved_set: - print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") - exit(1) - unreserved_set.add(kw) - - # Add the keyword to the primary set - keyword_sets[category].add(kw) - - # type_name_keyword.list also populates typename_keyword_map - if filepath.name == "type_name_keyword.list": - keyword_sets["TYPE_NAME_KEYWORD"].add(kw) - - -def write_keyword_map(): - # --- C++ Code Generation --- +def write_keyword_map(keyword_sets): output_path = peg_dir / "keyword_map.cpp" with open(output_path, "w") as f: f.write("/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */\n") @@ -99,19 +47,12 @@ def write_keyword_map(): f.write("\tif (initialized) {\n\t\treturn;\n\t};\n") f.write("\tinitialized = true;\n\n") - # Get the total number of categories to handle the last item differently num_categories = len(keyword_sets) - - # Iterate through each category and generate code for each map for i, (category, keywords) in enumerate(keyword_sets.items()): cpp_map_name = CPP_MAP_NAMES[category] f.write(f"\t// Populating {cpp_map_name}\n") - # Sort keywords for deterministic output for kw in sorted(keywords): - # Populate the C++ set with insert f.write(f'\t{cpp_map_name}.insert("{kw}");\n') - - # Add a newline for all but the last block if i < num_categories - 1: f.write("\n") f.write("}\n") @@ -157,6 +98,7 @@ def references(self): def __str__(self): return f"{self.rule_name} [{self.return_type}]<- {self.tokens}" + class ParseState(Enum): RULE_NAME = auto() RULE_SEPARATOR = auto() @@ -364,58 +306,6 @@ def check_undefined_rules(all_rules): exit(1) -rules = {} - -with open(statements_dir / "common.gram", 'r') as f: - file_content = f.read() - try: - update_rules(rules, parse_peg_grammar(file_content), "common.gram") - except Exception as e: - raise Exception(f"common.gram: {e}") from None - contents += file_content + "\n" - - -for filepath in keywords_dir.iterdir(): - if filepath.suffix != '.list': - continue - rule_name = filename_to_upper_camel(filepath) - rule = f"{rule_name} <- " - with open(filepath, 'r') as f: - lines = [f"'{line.strip()}'" for line in f if line.strip()] - rule += " /\n".join(lines) + "\n" - contents += rule - update_rules(rules, parse_peg_grammar(rule), filepath.name) - -for filepath in statements_dir.iterdir(): - if filepath.suffix != '.gram': - if not filepath.is_dir(): - raise Exception(f"File {filepath.name} does not end with .gram") - continue - if filepath.name != "common.gram": - with open(filepath, 'r') as f: - file_content = f.read() - try: - update_rules(rules, parse_peg_grammar(file_content), filepath.name) - except Exception as e: - raise Exception(f"{filepath.name}: {e}") from None - contents += file_content + "\n" - -check_unused_rules(rules) -check_undefined_rules(rules) - -write_keyword_map() - -if args.print: - print(contents) - exit(0) - -if args.grammar_file: - grammar_file = target_file.with_suffix('.gram') - with open(grammar_file, 'w+') as f: - f.write(contents) - exit(0) - - def get_grammar_bytes(contents): result_text = "" for line in contents.split('\n'): @@ -425,19 +315,111 @@ def get_grammar_bytes(contents): return result_text -with open(target_file, 'w+') as f: - f.write( - '''/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */ +def load_all_rules(): + """Load and validate all grammar rules from statements/ and keywords/. Returns (rules, contents).""" + keyword_sets = {category: set() for category in CPP_MAP_NAMES} + reserved_set = set() + unreserved_set = set() + contents = "" + rules = {} + + for filepath in keywords_dir.iterdir(): + if filepath.name not in FILENAME_TO_CATEGORY: + continue + category = FILENAME_TO_CATEGORY[filepath.name] + keywords = load_keywords(filepath) + for kw in keywords: + if category == "RESERVED_KEYWORD": + if kw in reserved_set or kw in unreserved_set: + print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") + exit(1) + reserved_set.add(kw) + elif category == "UNRESERVED_KEYWORD": + if kw in reserved_set or kw in unreserved_set: + print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") + exit(1) + unreserved_set.add(kw) + keyword_sets[category].add(kw) + if filepath.name == "type_name_keyword.list": + keyword_sets["TYPE_NAME_KEYWORD"].add(kw) + + with open(statements_dir / "common.gram", 'r') as f: + file_content = f.read() + try: + update_rules(rules, parse_peg_grammar(file_content), "common.gram") + except Exception as e: + raise Exception(f"common.gram: {e}") from None + contents += file_content + "\n" + + for filepath in keywords_dir.iterdir(): + if filepath.suffix != '.list': + continue + rule_name = filename_to_upper_camel(filepath) + rule = f"{rule_name} <- " + with open(filepath, 'r') as f: + lines = [f"'{line.strip()}'" for line in f if line.strip()] + rule += " /\n".join(lines) + "\n" + contents += rule + update_rules(rules, parse_peg_grammar(rule), filepath.name) + + for filepath in statements_dir.iterdir(): + if filepath.suffix != '.gram': + if not filepath.is_dir(): + raise Exception(f"File {filepath.name} does not end with .gram") + continue + if filepath.name != "common.gram": + with open(filepath, 'r') as f: + file_content = f.read() + try: + update_rules(rules, parse_peg_grammar(file_content), filepath.name) + except Exception as e: + raise Exception(f"{filepath.name}: {e}") from None + contents += file_content + "\n" + + check_unused_rules(rules) + check_undefined_rules(rules) + + return rules, contents, keyword_sets + + +def main(): + arg_parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files') + arg_parser.add_argument('--print', action='store_true', default=False, + help='Print the grammar instead of writing to a file') + arg_parser.add_argument('--grammar-file', action='store_true', default=False, + help='Write the grammar to a .gram file instead of a C++ header') + args = arg_parser.parse_args() + + rules, contents, keyword_sets = load_all_rules() + write_keyword_map(keyword_sets) + + if args.print: + print(contents) + return + + if args.grammar_file: + grammar_file = target_file.with_suffix('.gram') + with open(grammar_file, 'w+') as f: + f.write(contents) + return + + with open(target_file, 'w+') as f: + f.write( + '''/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */ #pragma once namespace duckdb { const char INLINED_PEG_GRAMMAR[] = { ''' - + get_grammar_bytes(contents) - + ''' + + get_grammar_bytes(contents) + + ''' }; } // namespace duckdb ''' - ) + ) + + +if __name__ == "__main__": + main() \ No newline at end of file From 973dd1604ba114d0215dc5f2bbb8d10e6c8f96b0 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:50:45 +0200 Subject: [PATCH 03/56] Add --write mode to write autogenerated transformer files --- scripts/parser/gen_transformer_v2.py | 90 ++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index deabfd73286a..ffc3b964a1eb 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -1,17 +1,22 @@ +import argparse import re import sys from pathlib import Path -# NOTE: inline_grammar.py has module-level side effects (writes keyword_map.cpp -# and inlined_grammar.hpp on import). This is acceptable for a generation script. sys.path.insert(0, str(Path(__file__).parent)) from inline_grammar import parse_peg_grammar, PEGTokenType from generate_transformer import load_grammar_types scripts_dir = Path(__file__).parent.parent -peg_dir = scripts_dir.parent / 'src' / 'parser' / 'peg' +src_dir = scripts_dir.parent / 'src' +peg_dir = src_dir / 'parser' / 'peg' statements_dir = peg_dir / 'grammar' / 'statements' type_dir = scripts_dir / 'parser' +transformer_dir = peg_dir / 'transformer' +generated_dir = transformer_dir / 'generated' +include_peg_dir = src_dir / 'include' / 'duckdb' / 'parser' / 'peg' / 'transformer' + +GENERATED_HEADER = "// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT\n" def to_snake_case(name): @@ -69,7 +74,8 @@ def generate_registration(rule_name): return f'Register("{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal);\n' -def generate_output(rules, rule_to_type, gram_stem): +def collect_generated(rules, rule_to_type): + """Classify all rules; return lists of generated content and skipped rules.""" declarations = [] implementations = [] registrations = [] @@ -95,6 +101,10 @@ def generate_output(rules, rule_to_type, gram_stem): implementations.append(generate_internal_wrapper(rule_name, return_type, children, rule_to_type)) registrations.append(generate_registration(rule_name)) + return declarations, implementations, registrations, skipped + + +def print_output(declarations, implementations, registrations, skipped, gram_stem): if skipped: print("=== SKIPPED (manual implementation required) ===") for rule_name, reason in skipped: @@ -111,22 +121,82 @@ def generate_output(rules, rule_to_type, gram_stem): print("".join(registrations)) +def cpp_file_content(implementations): + return ( + GENERATED_HEADER + + '#include "duckdb/parser/peg/transformer/peg_transformer.hpp"\n' + + "\nnamespace duckdb {\n\n" + + "\n".join(implementations) + + "\n} // namespace duckdb\n" + ) + + +def cmake_content(cpp_filenames): + files = "\n ".join(cpp_filenames) + return ( + "add_library_unity(\n" + " duckdb_parser_peg_transformer_generated\n" + " OBJECT\n" + f" {files})\n\n" + "set(ALL_OBJECT_FILES\n" + " ${ALL_OBJECT_FILES} $\n" + " PARENT_SCOPE)\n" + ) + + +def write_files(implementations, declarations, gram_stem): + generated_dir.mkdir(parents=True, exist_ok=True) + + cpp_path = generated_dir / f"transform_{gram_stem}_generated.cpp" + cpp_path.write_text(cpp_file_content(implementations)) + print(f"Wrote {cpp_path}") + + hpp_path = include_peg_dir / "peg_transformer_generated.hpp" + hpp_path.write_text(GENERATED_HEADER + "".join(declarations)) + print(f"Wrote {hpp_path}") + + existing_cpp = sorted(p.name for p in generated_dir.glob("*_generated.cpp")) + cmake_path = generated_dir / "CMakeLists.txt" + cmake_path.write_text(cmake_content(existing_cpp)) + print(f"Wrote {cmake_path}") + + print() + print("Remaining manual steps:") + print(f" 1. In {include_peg_dir / 'peg_transformer.hpp'}, inside PEGTransformerFactory class:") + print(f" Add: #include \"duckdb/parser/peg/transformer/peg_transformer_generated.hpp\"") + print(f" Remove superseded TransformUseStatement(PEGTransformer &, ParseResult &) declaration") + print(f" 2. Add 'add_subdirectory(generated)' in {transformer_dir / 'CMakeLists.txt'}") + print(f" 3. In peg_transformer_factory.cpp RegisterUse(), replace:") + print(f" REGISTER_TRANSFORM(TransformUseStatement)") + print(f" with:") + print(f" Register(\"UseStatement\", &PEGTransformerFactory::TransformUseStatementInternal);") + print(f" 4. Remove TransformUseStatementInternal from transform_use.cpp") + + def main(): + arg_parser = argparse.ArgumentParser(description="Generate Internal transformer wrappers from grammar rules.") + arg_parser.add_argument("--write", action="store_true", help="Write generated files to disk.") + args = arg_parser.parse_args() + use_file_path = statements_dir / 'use.gram' - rules = {} with open(use_file_path, 'r') as f: file_content = f.read() - try: - rules = parse_peg_grammar(file_content) - except Exception as e: - raise Exception(f"{use_file_path.name}: {e}") from None + try: + rules = parse_peg_grammar(file_content) + except Exception as e: + raise Exception(f"{use_file_path.name}: {e}") from None rule_to_type = load_grammar_types(type_dir / 'grammar_types.yml') for rule_name, return_type in rule_to_type.items(): if rule_name in rules: rules[rule_name].return_type = return_type - generate_output(rules, rule_to_type, gram_stem="use") + declarations, implementations, registrations, skipped = collect_generated(rules, rule_to_type) + + if args.write: + write_files(implementations, declarations, gram_stem="use") + else: + print_output(declarations, implementations, registrations, skipped, gram_stem="use") if __name__ == "__main__": From 6ac4e6b2f2c8079f836aec8044dd4a36e1243fd0 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:50:56 +0200 Subject: [PATCH 04/56] Add generated subdirectory --- src/parser/peg/transformer/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser/peg/transformer/CMakeLists.txt b/src/parser/peg/transformer/CMakeLists.txt index 563679da48c3..48d7d3ab895c 100644 --- a/src/parser/peg/transformer/CMakeLists.txt +++ b/src/parser/peg/transformer/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(generated) + add_library_unity( duckdb_parser_peg_transformer OBJECT From b62515f54a67fdce28091dd5d8cd76e56f0a7f6b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:51:17 +0200 Subject: [PATCH 05/56] Add generated header --- src/include/duckdb/parser/peg/transformer/peg_transformer.hpp | 3 ++- .../parser/peg/transformer/peg_transformer_generated.hpp | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 71f482fd1668..46fdb74f8f95 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -1216,7 +1216,6 @@ class PEGTransformerFactory { static string TransformUpdateSetColumnTarget(PEGTransformer &transformer, ParseResult &parse_result); // use.gram - static unique_ptr TransformUseStatement(PEGTransformer &transformer, ParseResult &parse_result); static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &parse_result); static QualifiedName TransformUseTargetCatalogSchema(PEGTransformer &transformer, ParseResult &parse_result); @@ -1228,6 +1227,8 @@ class PEGTransformerFactory { static string TransformVacuumOption(PEGTransformer &transformer, ParseResult &parse_result); static vector TransformNameList(PEGTransformer &transformer, ParseResult &parse_result); +#include "duckdb/parser/peg/transformer/peg_transformer_generated.hpp" + private: PEGParser parser; case_insensitive_map_t sql_transform_functions; diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp new file mode 100644 index 000000000000..8053968e6893 --- /dev/null +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -0,0 +1,3 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT + static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformUseStatement(QualifiedName use_target); From 056c08546f2d5a60cad8bf07ca5a0d54a9b694a0 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:51:42 +0200 Subject: [PATCH 06/56] Add generated Internal wrapper, fix up written case --- src/parser/peg/transformer/generated/CMakeLists.txt | 8 ++++++++ .../generated/transform_use_generated.cpp | 13 +++++++++++++ .../peg/transformer/peg_transformer_factory.cpp | 2 +- src/parser/peg/transformer/transform_use.cpp | 6 +----- 4 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 src/parser/peg/transformer/generated/CMakeLists.txt create mode 100644 src/parser/peg/transformer/generated/transform_use_generated.cpp diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt new file mode 100644 index 000000000000..add3108d3e45 --- /dev/null +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library_unity( + duckdb_parser_peg_transformer_generated + OBJECT + transform_use_generated.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp new file mode 100644 index 000000000000..9ff2f0d95b51 --- /dev/null +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -0,0 +1,13 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#include "duckdb/parser/peg/transformer/peg_transformer.hpp" + +namespace duckdb { + +unique_ptr PEGTransformerFactory::TransformUseStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto use_target = transformer.Transform(list_pr, 1); + return TransformUseStatement(use_target); +} + +} // namespace duckdb diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 925a0828d358..014670fab7db 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -883,7 +883,7 @@ void PEGTransformerFactory::RegisterSelect() { void PEGTransformerFactory::RegisterUse() { // use.gram - REGISTER_TRANSFORM(TransformUseStatement); + Register("UseStatement", &PEGTransformerFactory::TransformUseStatementInternal); REGISTER_TRANSFORM(TransformUseTarget); REGISTER_TRANSFORM(TransformUseTargetCatalogSchema); } diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index eac46a89feee..c703a8aa8263 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -4,11 +4,7 @@ namespace duckdb { // UseStatement <- 'USE' UseTarget -unique_ptr PEGTransformerFactory::TransformUseStatement(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto qn = transformer.Transform(list_pr, 1); - +unique_ptr PEGTransformerFactory::TransformUseStatement(QualifiedName qn) { string value_str; if (IsInvalidSchema(qn.schema)) { value_str = SQLIdentifier::ToString(qn.name); From 270e9a3bc403f096fcfb64300b5cca20406d8e85 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 10:53:11 +0200 Subject: [PATCH 07/56] Update variable name --- src/parser/peg/transformer/transform_use.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index c703a8aa8263..3e7da86d8d2c 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -4,12 +4,12 @@ namespace duckdb { // UseStatement <- 'USE' UseTarget -unique_ptr PEGTransformerFactory::TransformUseStatement(QualifiedName qn) { +unique_ptr PEGTransformerFactory::TransformUseStatement(QualifiedName use_target) { string value_str; - if (IsInvalidSchema(qn.schema)) { - value_str = SQLIdentifier::ToString(qn.name); + if (IsInvalidSchema(use_target.schema)) { + value_str = SQLIdentifier::ToString(use_target.name); } else { - value_str = SQLIdentifier(qn.schema) + "." + SQLIdentifier(qn.name); + value_str = SQLIdentifier(use_target.schema) + "." + SQLIdentifier(use_target.name); } auto value_expr = make_uniq(Value(value_str)); From 512b491019415116a6bbc3d77659d02ee52c802c Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 12:03:52 +0200 Subject: [PATCH 08/56] Handle choice rules as well --- scripts/parser/gen_transformer_v2.py | 318 +++++++++++++++++- .../peg/transformer/peg_transformer.hpp | 1 - .../transformer/peg_transformer_generated.hpp | 2 + .../generated/transform_use_generated.cpp | 7 + .../transformer/peg_transformer_factory.cpp | 2 +- src/parser/peg/transformer/transform_use.cpp | 10 +- 6 files changed, 322 insertions(+), 18 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index ffc3b964a1eb..a7477d33519d 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -1,12 +1,211 @@ import argparse import re import sys +from dataclasses import dataclass, field from pathlib import Path +from typing import List sys.path.insert(0, str(Path(__file__).parent)) from inline_grammar import parse_peg_grammar, PEGTokenType from generate_transformer import load_grammar_types + +# --------------------------------------------------------------------------- +# Grammar AST - mirrors the Matcher class hierarchy in matcher.cpp +# --------------------------------------------------------------------------- + +class GrammarNode: + pass + + +@dataclass +class LiteralNode(GrammarNode): + """Keyword literal ('keyword'). Corresponds to KeywordMatcher.""" + text: str + + +@dataclass +class ReferenceNode(GrammarNode): + """Reference to a named grammar rule. Resolved to another Matcher at build time.""" + name: str + + +@dataclass +class RegexNode(GrammarNode): + """Regex or character-class match ([...] or <...>). + In practice, rules that contain raw regex patterns are overridden in the + matcher via AddRuleOverride() and therefore never exercise this path at + runtime. We keep the node so the AST parser stays complete.""" + pattern: str + + +@dataclass +class FunctionCallNode(GrammarNode): + """Macro call like Parens(inner) or List(inner).""" + func_name: str + inner: GrammarNode + + +@dataclass +class SequenceNode(GrammarNode): + """Ordered sequence of matchers. Corresponds to ListMatcher.""" + children: List[GrammarNode] + + +@dataclass +class ChoiceNode(GrammarNode): + """Ordered choice A / B / C. Corresponds to ChoiceMatcher.""" + alternatives: List[GrammarNode] + + +@dataclass +class OptionalNode(GrammarNode): + """Optional match A?. Corresponds to OptionalMatcher.""" + child: GrammarNode + + +@dataclass +class RepeatNode(GrammarNode): + """Repeat match A+ (min=1) or A* (min=0). Corresponds to RepeatMatcher.""" + child: GrammarNode + min_count: int + + +@dataclass +class NegationNode(GrammarNode): + """Negative lookahead !A.""" + child: GrammarNode + + +def tokens_to_ast(tokens): + """ + Parse a flat PEGToken list into a GrammarNode AST tree. + + Grammar of PEG rule bodies (simplified): + choice = sequence ('/' sequence)* + sequence = term+ + term = atom ('?' | '*' | '+')? + atom = LITERAL | REFERENCE | REGEX + | FUNCTION_CALL choice ')' + | '(' choice ')' + | '!' atom + """ + pos = [0] + + def peek(): + return tokens[pos[0]] if pos[0] < len(tokens) else None + + def consume(): + tok = tokens[pos[0]] + pos[0] += 1 + return tok + + def parse_choice(): + alts = [parse_sequence()] + while peek() and peek().type == PEGTokenType.OPERATOR and peek().text == '/': + consume() # consume '/' + alts.append(parse_sequence()) + return ChoiceNode(alts) if len(alts) > 1 else alts[0] + + def parse_sequence(): + children = [] + while True: + t = peek() + if t is None: + break + if t.type == PEGTokenType.OPERATOR and t.text in ('/', ')'): + break + children.append(parse_term()) + if not children: + return SequenceNode([]) + return SequenceNode(children) if len(children) > 1 else children[0] + + def parse_term(): + node = parse_atom() + t = peek() + if t and t.type == PEGTokenType.OPERATOR and t.text in ('?', '*', '+'): + op = consume().text + if op == '?': + return OptionalNode(node) + elif op == '*': + return RepeatNode(node, 0) + else: + return RepeatNode(node, 1) + return node + + def parse_atom(): + t = peek() + if t is None: + raise Exception("Unexpected end of tokens in grammar AST parse") + if t.type == PEGTokenType.LITERAL: + return LiteralNode(consume().text) + elif t.type == PEGTokenType.REFERENCE: + return ReferenceNode(consume().text) + elif t.type == PEGTokenType.REGEX: + return RegexNode(consume().text) + elif t.type == PEGTokenType.FUNCTION_CALL: + # inline_grammar already consumed the '(' and bumped bracket_count + func_name = consume().text + inner = parse_choice() + if peek() and peek().type == PEGTokenType.OPERATOR and peek().text == ')': + consume() + return FunctionCallNode(func_name, inner) + elif t.type == PEGTokenType.OPERATOR and t.text == '(': + consume() + inner = parse_choice() + if peek() and peek().type == PEGTokenType.OPERATOR and peek().text == ')': + consume() + return inner # anonymous group - transparent node + elif t.type == PEGTokenType.OPERATOR and t.text == '!': + consume() + return NegationNode(parse_atom()) + else: + raise Exception(f"Unexpected token in grammar AST parse: {t}") + + result = parse_choice() + if pos[0] < len(tokens): + raise Exception(f"Tokens remaining after grammar AST parse: {tokens[pos[0]:]}") + return result + + +def rule_to_ast(rule): + """Convert a PEGGrammarRule (flat token list) to a GrammarNode AST.""" + return tokens_to_ast(rule.tokens) + + +# --------------------------------------------------------------------------- +# Rule overrides - mirrors AddRuleOverride() calls in matcher.cpp. +# These rules are replaced with special matchers that produce IdentifierParseResult +# (or similar) directly, bypassing the generic ListParseResult path. +# When one of these appears as an alternative in a choice rule the generated +# Internal must check ParseResultType rather than calling transformer.Transform<>. +# --------------------------------------------------------------------------- + +IDENTIFIER_OVERRIDE_RULES = { + 'Identifier', 'ReservedIdentifier', + 'CatalogName', 'SchemaName', 'ReservedSchemaName', + 'TableName', 'ReservedTableName', + 'ColumnName', 'ReservedColumnName', + 'IndexName', 'SequenceName', + 'FunctionName', 'ReservedFunctionName', 'TableFunctionName', + 'TypeName', 'PragmaName', 'SettingName', 'CopyOptionName', +} + +# Rules overridden with non-identifier special matchers (kept separate so +# callers can distinguish the parse-result type if needed in the future). +NUMBER_LITERAL_OVERRIDE_RULES = {'NumberLiteral'} +STRING_LITERAL_OVERRIDE_RULES = {'StringLiteral'} +OPERATOR_LITERAL_OVERRIDE_RULES = {'OperatorLiteral'} + +# Union of all override rules for quick membership tests. +ALL_OVERRIDE_RULES = ( + IDENTIFIER_OVERRIDE_RULES + | NUMBER_LITERAL_OVERRIDE_RULES + | STRING_LITERAL_OVERRIDE_RULES + | OPERATOR_LITERAL_OVERRIDE_RULES +) + + scripts_dir = Path(__file__).parent.parent src_dir = scripts_dir.parent / 'src' peg_dir = src_dir / 'parser' / 'peg' @@ -74,6 +273,75 @@ def generate_registration(rule_name): return f'Register("{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal);\n' +# --------------------------------------------------------------------------- +# Choice-rule helpers +# --------------------------------------------------------------------------- + +def is_pure_reference_choice(ast): + """True if ast is a ChoiceNode whose every alternative is a ReferenceNode.""" + return isinstance(ast, ChoiceNode) and all(isinstance(a, ReferenceNode) for a in ast.alternatives) + + +def classify_choice_alternatives(alternatives, rule_to_type): + """ + Split choice alternatives into two groups: + - transformer_alts: names with a registered transformer (in rule_to_type) + - identifier_alts: names that are identifier overrides (produce IdentifierParseResult) + Returns (transformer_alts, identifier_alts, unknown_alts). + unknown_alts are neither registered nor known overrides - these need manual handling. + """ + transformer_alts = [] + identifier_alts = [] + unknown_alts = [] + for ref in alternatives: + name = ref.name + if name in rule_to_type: + transformer_alts.append(name) + elif name in IDENTIFIER_OVERRIDE_RULES: + identifier_alts.append(name) + else: + unknown_alts.append(name) + return transformer_alts, identifier_alts, unknown_alts + + +def generate_choice_internal_full(rule_name, return_type): + """ + Fully auto-generated Internal for a pure-transformer choice rule. + All alternatives have registered transformers so we can delegate directly. + """ + return ( + f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + f"\tauto &list_pr = parse_result.Cast();\n" + f"\tauto &choice_pr = list_pr.Child(0);\n" + f"\treturn transformer.Transform<{return_type}>(choice_pr.GetResult());\n" + f"}}\n" + ) + + +def generate_choice_internal_with_body(rule_name, return_type): + """ + Internal for a choice rule that has identifier-override alternatives. + Extracts the ChoiceParseResult then delegates to a hand-written body. + """ + return ( + f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + f"\tauto &list_pr = parse_result.Cast();\n" + f"\tauto &choice_pr = list_pr.Child(0);\n" + f"\treturn Transform{rule_name}(transformer, choice_pr.GetResult());\n" + f"}}\n" + ) + + +def generate_choice_body_declaration(rule_name, return_type): + """Declaration for the manual body that handles identifier alternatives.""" + return ( + f"\tstatic {return_type} Transform{rule_name}" + f"(PEGTransformer &transformer, ParseResult &choice_result);\n" + ) + + def collect_generated(rules, rule_to_type): """Classify all rules; return lists of generated content and skipped rules.""" declarations = [] @@ -86,20 +354,50 @@ def collect_generated(rules, rule_to_type): if return_type is None: skipped.append((rule_name, "no return type in grammar_types.yml")) continue - if not is_simple_rule(rule): - skipped.append((rule_name, "complex rule (has operators/choices/groups)")) + + if is_simple_rule(rule): + children = get_semantic_children(rule) + unknown = [name for _, name in children if name not in rule_to_type] + if unknown: + skipped.append((rule_name, f"unknown child types: {unknown}")) + continue + declarations.append(generate_internal_declaration(rule_name, return_type)) + declarations.append(generate_body_declaration(rule_name, return_type, children, rule_to_type)) + implementations.append(generate_internal_wrapper(rule_name, return_type, children, rule_to_type)) + registrations.append(generate_registration(rule_name)) + continue + + try: + ast = rule_to_ast(rule) + except Exception as e: + skipped.append((rule_name, f"AST parse error: {e}")) continue - children = get_semantic_children(rule) - unknown = [name for _, name in children if name not in rule_to_type] - if unknown: - skipped.append((rule_name, f"unknown child types: {unknown}")) + if is_pure_reference_choice(ast): + transformer_alts, identifier_alts, unknown_alts = classify_choice_alternatives( + ast.alternatives, rule_to_type + ) + if unknown_alts: + skipped.append((rule_name, f"choice has unknown alternatives: {unknown_alts}")) + continue + + declarations.append(generate_internal_declaration(rule_name, return_type)) + registrations.append(generate_registration(rule_name)) + + if not identifier_alts: + # All alternatives have registered transformers - fully auto-generate. + implementations.append(generate_choice_internal_full(rule_name, return_type)) + else: + # Some alternatives are identifier overrides - need a manual body. + declarations.append(generate_choice_body_declaration(rule_name, return_type)) + implementations.append(generate_choice_internal_with_body(rule_name, return_type)) + skipped.append(( + f"{rule_name} (choice body)", + f"manual body needed; identifier alternatives: {identifier_alts}", + )) continue - declarations.append(generate_internal_declaration(rule_name, return_type)) - declarations.append(generate_body_declaration(rule_name, return_type, children, rule_to_type)) - implementations.append(generate_internal_wrapper(rule_name, return_type, children, rule_to_type)) - registrations.append(generate_registration(rule_name)) + skipped.append((rule_name, "complex rule (has operators/choices/groups)")) return declarations, implementations, registrations, skipped diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 46fdb74f8f95..c8718561461e 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -1216,7 +1216,6 @@ class PEGTransformerFactory { static string TransformUpdateSetColumnTarget(PEGTransformer &transformer, ParseResult &parse_result); // use.gram - static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &parse_result); static QualifiedName TransformUseTargetCatalogSchema(PEGTransformer &transformer, ParseResult &parse_result); // vacuum.gram diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 8053968e6893..97bff3b75420 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -1,3 +1,5 @@ // AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformUseStatement(QualifiedName use_target); + static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); + static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp index 9ff2f0d95b51..bd0dc0ba0778 100644 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -10,4 +10,11 @@ unique_ptr PEGTransformerFactory::TransformUseStatementInternal( return TransformUseStatement(use_target); } +QualifiedName PEGTransformerFactory::TransformUseTargetInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto &choice_pr = list_pr.Child(0); + return TransformUseTarget(transformer, choice_pr.GetResult()); +} + } // namespace duckdb diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 014670fab7db..c223cc516423 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -884,7 +884,7 @@ void PEGTransformerFactory::RegisterSelect() { void PEGTransformerFactory::RegisterUse() { // use.gram Register("UseStatement", &PEGTransformerFactory::TransformUseStatementInternal); - REGISTER_TRANSFORM(TransformUseTarget); + Register("UseTarget", &PEGTransformerFactory::TransformUseTargetInternal); REGISTER_TRANSFORM(TransformUseTargetCatalogSchema); } diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index 3e7da86d8d2c..f54af38e9e14 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -17,15 +17,13 @@ unique_ptr PEGTransformerFactory::TransformUseStatement(QualifiedN } // UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName -QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transformer, ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto &choice_pr = list_pr.Child(0); - if (choice_pr.GetResult().type == ParseResultType::IDENTIFIER) { +QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transformer, ParseResult &pr) { + if (pr.type == ParseResultType::IDENTIFIER) { QualifiedName result; - result.name = choice_pr.GetResult().Cast().identifier; + result.name = pr.Cast().identifier; return result; } - return transformer.Transform(choice_pr.GetResult()); + return transformer.Transform(pr); } // UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* From 794e79e330ccfab7ace2366487f9a29d949d639e Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 14:15:44 +0200 Subject: [PATCH 09/56] Move DotIdentifier to separate rule --- scripts/parser/grammar_types.yml | 1 + src/include/duckdb/parser/peg/inlined_grammar.gram | 5 +++-- src/include/duckdb/parser/peg/inlined_grammar.hpp | 3 ++- src/parser/peg/grammar/statements/use.gram | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/parser/grammar_types.yml b/scripts/parser/grammar_types.yml index 21e0a0440d0e..1722a38bdd2b 100644 --- a/scripts/parser/grammar_types.yml +++ b/scripts/parser/grammar_types.yml @@ -773,6 +773,7 @@ String: - ConstraintName - ExplainOptionName - OperatorLiteral + - DotIdentifier Bool: type: "bool" diff --git a/src/include/duckdb/parser/peg/inlined_grammar.gram b/src/include/duckdb/parser/peg/inlined_grammar.gram index a60df681d392..e4d07ed0df61 100644 --- a/src/include/duckdb/parser/peg/inlined_grammar.gram +++ b/src/include/duckdb/parser/peg/inlined_grammar.gram @@ -790,7 +790,7 @@ MapExpression <- 'MAP' MapStructExpression MapStructExpression <- '{' List(MapStructField)? '}' MapStructField <- Expression ':' Expression -GroupingExpression <- GroupingOrGroupingId Parens(List(Expression)) +GroupingExpression <- GroupingOrGroupingId Parens(List(Expression)?) GroupingOrGroupingId <- 'GROUPING' / 'GROUPING_ID' Parameter <- QuestionMarkNumberedParameter / AnonymousParameter / NumberedParameter / ColLabelParameter QuestionMarkNumberedParameter <- '?' NumberLiteral @@ -1652,7 +1652,8 @@ DetachStatement <- 'DETACH' Database? IfExists? CatalogName UseStatement <- 'USE' UseTarget UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName -UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* +UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* +DotIdentifier <- '.' Identifier CallStatement <- 'CALL' QualifiedTableFunction TableFunctionArguments diff --git a/src/include/duckdb/parser/peg/inlined_grammar.hpp b/src/include/duckdb/parser/peg/inlined_grammar.hpp index 5527052577fc..5789febcaabd 100644 --- a/src/include/duckdb/parser/peg/inlined_grammar.hpp +++ b/src/include/duckdb/parser/peg/inlined_grammar.hpp @@ -1476,7 +1476,8 @@ const char INLINED_PEG_GRAMMAR[] = { "DetachStatement <- 'DETACH' Database? IfExists? CatalogName\n" "UseStatement <- 'USE' UseTarget\n" "UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName\n" - "UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)*\n" + "UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier*\n" + "DotIdentifier <- '.' Identifier\n" "CallStatement <- 'CALL' QualifiedTableFunction TableFunctionArguments\n" }; diff --git a/src/parser/peg/grammar/statements/use.gram b/src/parser/peg/grammar/statements/use.gram index 8a3140156004..742362e5c5ff 100644 --- a/src/parser/peg/grammar/statements/use.gram +++ b/src/parser/peg/grammar/statements/use.gram @@ -1,4 +1,5 @@ UseStatement <- 'USE' UseTarget UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName -UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* +UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* +DotIdentifier <- '.' Identifier From a5ba7ccd83a78f3896f3d6313960ff83d1e10390 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 14:16:20 +0200 Subject: [PATCH 10/56] Auto generate move of use.gram --- scripts/parser/gen_transformer_v2.py | 192 ++++++++++++++++-- .../transformer/peg_transformer_generated.hpp | 4 + .../generated/transform_use_generated.cpp | 23 +++ .../transformer/peg_transformer_factory.cpp | 3 +- src/parser/peg/transformer/transform_use.cpp | 21 +- 5 files changed, 215 insertions(+), 28 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index a7477d33519d..97ba66ff9ed6 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -251,8 +251,11 @@ def generate_internal_wrapper(rule_name, return_type, semantic_children, rule_to arg_names = [] for idx, child_name in semantic_children: var = to_snake_case(child_name) - child_type = rule_to_type[child_name] - arg_lines.append(f"\tauto {var} = transformer.Transform<{child_type}>(list_pr, {idx});") + if child_name in IDENTIFIER_OVERRIDE_RULES: + arg_lines.append(f"\tauto {var} = list_pr.Child({idx}).identifier;") + else: + child_type = rule_to_type[child_name] + arg_lines.append(f"\tauto {var} = transformer.Transform<{child_type}>(list_pr, {idx});") arg_names.append(var) body = [] @@ -342,6 +345,152 @@ def generate_choice_body_declaration(rule_name, return_type): ) +# --------------------------------------------------------------------------- +# Sequence-element classification +# +# Mirrors the per-token-type dispatch inside MatcherFactory::CreateMatcher() +# in matcher.cpp. Each helper handles exactly one matcher/parse-result kind: +# +# _classify_literal <- LITERAL -> KeywordMatcher -> KeywordParseResult (skip) +# _classify_reference <- REFERENCE -> named rule OR identifier override +# _classify_star_repeat <- OPERATOR* -> Optional(Repeat) -> OptionalParseResult(RepeatParseResult) +# +# classify_sequence_element() is the top-level dispatch (= the switch in CreateMatcher). +# classify_sequence_elements() iterates all children of a SequenceNode (= the token loop). +# --------------------------------------------------------------------------- + +@dataclass +class SeqElement: + """One classified position in a sequence rule.""" + idx: int + skip: bool # True for LiteralNode - no semantic value + var_name: str = "" + cpp_type: str = "" + extraction_lines: List[str] = field(default_factory=list) + + +def _classify_literal(idx): + """LITERAL token -> KeywordMatcher -> KeywordParseResult. No semantic value.""" + return SeqElement(idx=idx, skip=True) + + +def _classify_reference(name, idx, rule_to_type): + """ + REFERENCE token -> CreateMatcher(rule_name). + Two sub-cases matching the two branches in CreateMatcher: + - rule in IDENTIFIER_OVERRIDE_RULES -> AddRuleOverride IdentifierMatcher + -> Child().identifier + - rule in rule_to_type -> regular ListMatcher -> transformer.Transform() + Override rules take priority because they bypass the transformer dispatch: + their parse results have an empty name and cannot be looked up in transform_functions. + """ + if name in IDENTIFIER_OVERRIDE_RULES: + var_name = to_snake_case(name) + lines = [f"\tauto {var_name} = list_pr.Child({idx}).identifier;"] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type="string", extraction_lines=lines) + if name in rule_to_type: + cpp_type = rule_to_type[name] + var_name = to_snake_case(name) + lines = [f"\tauto {var_name} = transformer.Transform<{cpp_type}>(list_pr, {idx});"] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type=cpp_type, extraction_lines=lines) + return None + + +def _classify_star_repeat(node, idx, rule_to_type): + """ + OPERATOR '*' -> Optional(Repeat(child)) -> OptionalParseResult wrapping RepeatParseResult. + Only supported when the repeated element is a plain reference with a known type. + Produces vector. + """ + if not isinstance(node.child, ReferenceNode): + return None + ref_name = node.child.name + if ref_name not in rule_to_type: + return None + child_type = rule_to_type[ref_name] + var_name = to_snake_case(ref_name) + lines = [ + f"\tauto &{var_name}_opt = list_pr.Child({idx});", + f"\tvector<{child_type}> {var_name};", + f"\tif ({var_name}_opt.HasResult()) {{", + f"\t\tauto &{var_name}_repeat = {var_name}_opt.GetResult().Cast();", + f"\t\tfor (auto {var_name}_item : {var_name}_repeat.GetChildren()) {{", + f"\t\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t\t}}", + f"\t}}", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type=f"vector<{child_type}>", extraction_lines=lines) + + +def classify_sequence_element(child, idx, rule_to_type): + """ + Classify one element of a SequenceNode. + Mirrors the token-type switch in MatcherFactory::CreateMatcher(). + Returns SeqElement or None if the element cannot be auto-generated. + """ + if isinstance(child, LiteralNode): + return _classify_literal(idx) + if isinstance(child, ReferenceNode): + return _classify_reference(child.name, idx, rule_to_type) + if isinstance(child, RepeatNode) and child.min_count == 0: + return _classify_star_repeat(child, idx, rule_to_type) + return None + + +def classify_sequence_elements(children, rule_to_type): + """ + Classify all children of a SequenceNode. + Mirrors the token loop in MatcherFactory::CreateMatcher(). + Returns list of SeqElement, or None if any element cannot be classified. + """ + elements = [] + for idx, child in enumerate(children): + elem = classify_sequence_element(child, idx, rule_to_type) + if elem is None: + return None + elements.append(elem) + return elements + + +# --------------------------------------------------------------------------- +# Extended sequence-rule code generation +# --------------------------------------------------------------------------- + +def is_auto_sequence_ast(ast, rule_to_type): + """True if ast is a SequenceNode whose every element can be classified.""" + return (isinstance(ast, SequenceNode) + and classify_sequence_elements(ast.children, rule_to_type) is not None) + + +def generate_sequence_body_decl(rule_name, return_type, elements): + """Declaration for the hand-written body that receives extracted typed args.""" + params = ", ".join(f"{e.cpp_type} {e.var_name}" for e in elements if not e.skip) + return f"\tstatic {return_type} Transform{rule_name}({params});\n" + + +def generate_sequence_internal(rule_name, return_type, elements): + """ + Internal wrapper that casts to ListParseResult, extracts each element, + then calls the hand-written body. Mirrors what ListMatcher::MatchParseResult + does at runtime but in the code-generation direction. + """ + semantic = [e for e in elements if not e.skip] + body = ["\tauto &list_pr = parse_result.Cast();"] + for elem in semantic: + body.extend(elem.extraction_lines) + arg_names = ", ".join(e.var_name for e in semantic) + body.append(f"\treturn Transform{rule_name}({arg_names});") + return ( + f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + + "\n".join(body) + + "\n}\n" + ) + + def collect_generated(rules, rule_to_type): """Classify all rules; return lists of generated content and skipped rules.""" declarations = [] @@ -385,10 +534,8 @@ def collect_generated(rules, rule_to_type): registrations.append(generate_registration(rule_name)) if not identifier_alts: - # All alternatives have registered transformers - fully auto-generate. implementations.append(generate_choice_internal_full(rule_name, return_type)) else: - # Some alternatives are identifier overrides - need a manual body. declarations.append(generate_choice_body_declaration(rule_name, return_type)) implementations.append(generate_choice_internal_with_body(rule_name, return_type)) skipped.append(( @@ -397,6 +544,14 @@ def collect_generated(rules, rule_to_type): )) continue + if is_auto_sequence_ast(ast, rule_to_type): + elements = classify_sequence_elements(ast.children, rule_to_type) + declarations.append(generate_internal_declaration(rule_name, return_type)) + declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) + implementations.append(generate_sequence_internal(rule_name, return_type, elements)) + registrations.append(generate_registration(rule_name)) + continue + skipped.append((rule_name, "complex rule (has operators/choices/groups)")) return declarations, implementations, registrations, skipped @@ -442,7 +597,7 @@ def cmake_content(cpp_filenames): ) -def write_files(implementations, declarations, gram_stem): +def write_files(implementations, declarations, registrations, gram_stem): generated_dir.mkdir(parents=True, exist_ok=True) cpp_path = generated_dir / f"transform_{gram_stem}_generated.cpp" @@ -458,17 +613,20 @@ def write_files(implementations, declarations, gram_stem): cmake_path.write_text(cmake_content(existing_cpp)) print(f"Wrote {cmake_path}") - print() - print("Remaining manual steps:") - print(f" 1. In {include_peg_dir / 'peg_transformer.hpp'}, inside PEGTransformerFactory class:") - print(f" Add: #include \"duckdb/parser/peg/transformer/peg_transformer_generated.hpp\"") - print(f" Remove superseded TransformUseStatement(PEGTransformer &, ParseResult &) declaration") - print(f" 2. Add 'add_subdirectory(generated)' in {transformer_dir / 'CMakeLists.txt'}") - print(f" 3. In peg_transformer_factory.cpp RegisterUse(), replace:") - print(f" REGISTER_TRANSFORM(TransformUseStatement)") - print(f" with:") - print(f" Register(\"UseStatement\", &PEGTransformerFactory::TransformUseStatementInternal);") - print(f" 4. Remove TransformUseStatementInternal from transform_use.cpp") + reg_lines = "".join(f" {r.strip()}\n" for r in registrations) + print(f""" +Remaining manual steps: + 1. In {include_peg_dir / 'peg_transformer.hpp'}: + - Add inside class PEGTransformerFactory: + #include "duckdb/parser/peg/transformer/peg_transformer_generated.hpp" + - Remove any declarations now covered by peg_transformer_generated.hpp + 2. In {transformer_dir / 'CMakeLists.txt'}: + - Add: add_subdirectory(generated) + 3. In peg_transformer_factory.cpp Register{gram_stem.capitalize()}(): + - Replace REGISTER_TRANSFORM macros for generated rules with: +{reg_lines} 4. In transform_{gram_stem}.cpp: + - Remove Internal wrappers now generated (keep only hand-written bodies) + - Update body function signatures to match the generated declarations""") def main(): @@ -492,7 +650,7 @@ def main(): declarations, implementations, registrations, skipped = collect_generated(rules, rule_to_type) if args.write: - write_files(implementations, declarations, gram_stem="use") + write_files(implementations, declarations, registrations, gram_stem="use") else: print_output(declarations, implementations, registrations, skipped, gram_stem="use") diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 97bff3b75420..5101a7cd387a 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -3,3 +3,7 @@ static unique_ptr TransformUseStatement(QualifiedName use_target); static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); + static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); + static QualifiedName TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, vector dot_identifier); + static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); + static string TransformDotIdentifier(string identifier); diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp index bd0dc0ba0778..50fe94fc3902 100644 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -17,4 +17,27 @@ QualifiedName PEGTransformerFactory::TransformUseTargetInternal( return TransformUseTarget(transformer, choice_pr.GetResult()); } +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto catalog_name = list_pr.Child(0).identifier; + auto reserved_schema_name = list_pr.Child(2).identifier; + auto &dot_identifier_opt = list_pr.Child(3); + vector dot_identifier; + if (dot_identifier_opt.HasResult()) { + auto &dot_identifier_repeat = dot_identifier_opt.GetResult().Cast(); + for (auto dot_identifier_item : dot_identifier_repeat.GetChildren()) { + dot_identifier.push_back(transformer.Transform(dot_identifier_item)); + } + } + return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); +} + +string PEGTransformerFactory::TransformDotIdentifierInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto identifier = list_pr.Child(1).identifier; + return TransformDotIdentifier(identifier); +} + } // namespace duckdb diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index c223cc516423..0539f6fbc7eb 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -885,7 +885,8 @@ void PEGTransformerFactory::RegisterUse() { // use.gram Register("UseStatement", &PEGTransformerFactory::TransformUseStatementInternal); Register("UseTarget", &PEGTransformerFactory::TransformUseTargetInternal); - REGISTER_TRANSFORM(TransformUseTargetCatalogSchema); + Register("UseTargetCatalogSchema", &PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal); + Register("DotIdentifier", &PEGTransformerFactory::TransformDotIdentifierInternal); } void PEGTransformerFactory::RegisterSet() { diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index f54af38e9e14..65c8ddbbd23c 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -26,20 +26,21 @@ QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transfor return transformer.Transform(pr); } -// UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto catalog = list_pr.Child(0).identifier; - auto schema = list_pr.Child(2).identifier; - auto &extra_opt = list_pr.Child(3); - if (extra_opt.HasResult()) { +// UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(string catalog_name, + string reserved_schema_name, + vector dot_identifier) { + if (!dot_identifier.empty()) { throw ParserException("Expected \"USE database\" or \"USE database.schema\""); } QualifiedName result; result.catalog = INVALID_CATALOG; - result.schema = catalog; - result.name = schema; + result.schema = std::move(catalog_name); + result.name = std::move(reserved_schema_name); return result; } + +string PEGTransformerFactory::TransformDotIdentifier(string identifier) { + return identifier; +} } // namespace duckdb From ad7fcc8c805f768736ff6c265ec904b3478adf29 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 15:04:55 +0200 Subject: [PATCH 11/56] Remove old TransfomrUseTargetCatalog --- src/include/duckdb/parser/peg/transformer/peg_transformer.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index c8718561461e..b6bdf8472d2d 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -1215,9 +1215,6 @@ class PEGTransformerFactory { ParseResult &parse_result); static string TransformUpdateSetColumnTarget(PEGTransformer &transformer, ParseResult &parse_result); - // use.gram - static QualifiedName TransformUseTargetCatalogSchema(PEGTransformer &transformer, ParseResult &parse_result); - // vacuum.gram static unique_ptr TransformVacuumStatement(PEGTransformer &transformer, ParseResult &parse_result); static VacuumOptions TransformVacuumOptions(PEGTransformer &transformer, ParseResult &parse_result); From 8959074b05efe322b5ef6df2632a6b33c45fefb1 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 15:05:10 +0200 Subject: [PATCH 12/56] Support List and Parens more explicitly --- scripts/parser/gen_transformer_v2.py | 134 ++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 11 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 97ba66ff9ed6..a61816dd5754 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -39,9 +39,23 @@ class RegexNode(GrammarNode): pattern: str +@dataclass +class ParensNode(GrammarNode): + """Parens(D) <- '(' D ')'. Anonymous ListMatcher; child[1] is D's result. + Use ExtractResultFromParens() to reach inside.""" + inner: GrammarNode + + +@dataclass +class ListMacroNode(GrammarNode): + """List(D) <- D (',' D)* ','?. Anonymous ListMatcher. + Use ExtractParseResultsFromList() to get all D results.""" + inner: GrammarNode + + @dataclass class FunctionCallNode(GrammarNode): - """Macro call like Parens(inner) or List(inner).""" + """Unknown macro call (not Parens or List). Not auto-generated.""" func_name: str inner: GrammarNode @@ -129,8 +143,10 @@ def parse_term(): return OptionalNode(node) elif op == '*': return RepeatNode(node, 0) - else: + elif op == '+': return RepeatNode(node, 1) + else: + raise Exception("Unknown operator '{}'".format(op)) return node def parse_atom(): @@ -149,6 +165,10 @@ def parse_atom(): inner = parse_choice() if peek() and peek().type == PEGTokenType.OPERATOR and peek().text == ')': consume() + if func_name == 'Parens': + return ParensNode(inner) + elif func_name == 'List': + return ListMacroNode(inner) return FunctionCallNode(func_name, inner) elif t.type == PEGTokenType.OPERATOR and t.text == '(': consume() @@ -398,6 +418,85 @@ def _classify_reference(name, idx, rule_to_type): return None +def _classify_parens(inner_node, idx, rule_to_type): + """ + ParensNode -> Parens(D) <- '(' D ')'. + Uses ExtractResultFromParens() to reach child[1]. + Only supported when inner is a plain ReferenceNode. + """ + if not isinstance(inner_node, ReferenceNode): + return None + name = inner_node.name + var_name = to_snake_case(name) + if name in IDENTIFIER_OVERRIDE_RULES: + lines = [ + f"\tauto {var_name} = ExtractResultFromParens(list_pr.GetChild({idx}))" + f".Cast().identifier;", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type="string", extraction_lines=lines) + if name in rule_to_type: + cpp_type = rule_to_type[name] + lines = [ + f"\tauto {var_name} = transformer.Transform<{cpp_type}>" + f"(ExtractResultFromParens(list_pr.GetChild({idx})));", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type=cpp_type, extraction_lines=lines) + return None + + +def _classify_list_macro(inner_node, idx, rule_to_type): + """ + ListMacroNode -> List(D) <- D (',' D)* ','?. + Uses ExtractParseResultsFromList() to collect all D results. + Only supported when inner is a plain ReferenceNode with a known type. + Produces vector. + """ + if not isinstance(inner_node, ReferenceNode): + return None + name = inner_node.name + if name not in rule_to_type: + return None + child_type = rule_to_type[name] + var_name = to_snake_case(name) + lines = [ + f"\tauto {var_name}_items = ExtractParseResultsFromList(list_pr.GetChild({idx}));", + f"\tvector<{child_type}> {var_name};", + f"\tfor (auto &{var_name}_item : {var_name}_items) {{", + f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t}}", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type=f"vector<{child_type}>", extraction_lines=lines) + + +def _classify_parens_list(inner_list_node, idx, rule_to_type): + """ + ParensNode(ListMacroNode(D)) -> Parens(List(D)). + Uses ExtractParseResultsFromList(ExtractResultFromParens(...)) to collect all D results. + Only supported when the ListMacroNode's inner is a plain ReferenceNode with a known type. + Produces vector. + """ + if not isinstance(inner_list_node.inner, ReferenceNode): + return None + name = inner_list_node.inner.name + if name not in rule_to_type: + return None + child_type = rule_to_type[name] + var_name = to_snake_case(name) + lines = [ + f"\tauto {var_name}_items = ExtractParseResultsFromList(" + f"ExtractResultFromParens(list_pr.GetChild({idx})));", + f"\tvector<{child_type}> {var_name};", + f"\tfor (auto &{var_name}_item : {var_name}_items) {{", + f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t}}", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type=f"vector<{child_type}>", extraction_lines=lines) + + def _classify_star_repeat(node, idx, rule_to_type): """ OPERATOR '*' -> Optional(Repeat(child)) -> OptionalParseResult wrapping RepeatParseResult. @@ -437,6 +536,12 @@ def classify_sequence_element(child, idx, rule_to_type): return _classify_reference(child.name, idx, rule_to_type) if isinstance(child, RepeatNode) and child.min_count == 0: return _classify_star_repeat(child, idx, rule_to_type) + if isinstance(child, ParensNode): + if isinstance(child.inner, ListMacroNode): + return _classify_parens_list(child.inner, idx, rule_to_type) + return _classify_parens(child.inner, idx, rule_to_type) + if isinstance(child, ListMacroNode): + return _classify_list_macro(child.inner, idx, rule_to_type) return None @@ -492,11 +597,12 @@ def generate_sequence_internal(rule_name, return_type, elements): def collect_generated(rules, rule_to_type): - """Classify all rules; return lists of generated content and skipped rules.""" + """Classify all rules; return lists of generated content, skipped rules, and manual bodies.""" declarations = [] implementations = [] registrations = [] skipped = [] + manual_bodies = [] for rule_name, rule in rules.items(): return_type = rule.return_type @@ -538,9 +644,9 @@ def collect_generated(rules, rule_to_type): else: declarations.append(generate_choice_body_declaration(rule_name, return_type)) implementations.append(generate_choice_internal_with_body(rule_name, return_type)) - skipped.append(( - f"{rule_name} (choice body)", - f"manual body needed; identifier alternatives: {identifier_alts}", + manual_bodies.append(( + rule_name, + f"choice body; identifier alternatives: {identifier_alts}", )) continue @@ -554,16 +660,22 @@ def collect_generated(rules, rule_to_type): skipped.append((rule_name, "complex rule (has operators/choices/groups)")) - return declarations, implementations, registrations, skipped + return declarations, implementations, registrations, skipped, manual_bodies -def print_output(declarations, implementations, registrations, skipped, gram_stem): +def print_output(declarations, implementations, registrations, skipped, manual_bodies, gram_stem): if skipped: - print("=== SKIPPED (manual implementation required) ===") + print("=== SKIPPED (nothing generated) ===") for rule_name, reason in skipped: print(f" {rule_name}: {reason}") print() + if manual_bodies: + print("=== MANUAL BODY NEEDED (Internal generated, body must be hand-written) ===") + for rule_name, reason in manual_bodies: + print(f" {rule_name}: {reason}") + print() + print("=== DECLARATIONS (peg_transformer_generated.hpp) ===") print("".join(declarations)) @@ -647,12 +759,12 @@ def main(): if rule_name in rules: rules[rule_name].return_type = return_type - declarations, implementations, registrations, skipped = collect_generated(rules, rule_to_type) + declarations, implementations, registrations, skipped, manual_bodies = collect_generated(rules, rule_to_type) if args.write: write_files(implementations, declarations, registrations, gram_stem="use") else: - print_output(declarations, implementations, registrations, skipped, gram_stem="use") + print_output(declarations, implementations, registrations, skipped, manual_bodies, gram_stem="use") if __name__ == "__main__": From faf913954248bfc11e14c77d2df537aa9aaa0d72 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 15:29:55 +0200 Subject: [PATCH 13/56] Auto generate parts of transform_transaction --- scripts/parser/grammar_types.yml | 10 +++------ .../peg/transformer/peg_transformer.hpp | 10 +++------ .../transformer/peg_transformer_generated.hpp | 3 +++ .../peg/transformer/generated/CMakeLists.txt | 1 + .../transformer/peg_transformer_factory.cpp | 4 ++-- .../peg/transformer/transform_transaction.cpp | 21 ++++++------------- 6 files changed, 18 insertions(+), 31 deletions(-) diff --git a/scripts/parser/grammar_types.yml b/scripts/parser/grammar_types.yml index 1722a38bdd2b..b666f1ff21d8 100644 --- a/scripts/parser/grammar_types.yml +++ b/scripts/parser/grammar_types.yml @@ -69,6 +69,9 @@ SQLStatement: - SelectStatement - SetStatement - TransactionStatement + - BeginTransaction + - CommitTransaction + - RollbackTransaction - TriggerBody - UpdateStatement - UseStatement @@ -99,13 +102,6 @@ SetStatement: - SetTimeZone - StandardAssignment -TransactionStatement: - type: "unique_ptr" - rules: - - BeginTransaction - - CommitTransaction - - RollbackTransaction - QueryNode: type: "unique_ptr" rules: diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index b6bdf8472d2d..4600e05b36ea 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -1192,14 +1192,10 @@ class PEGTransformerFactory { static string TransformIdentifierOrKeyword(PEGTransformer &transformer, ParseResult &parse_result); // transaction.gram - static unique_ptr TransformTransactionStatement(PEGTransformer &transformer, - ParseResult &parse_result); - static unique_ptr TransformBeginTransaction(PEGTransformer &transformer, - ParseResult &parse_result); - static TransactionModifierType TransformReadOrWrite(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformBeginTransaction(PEGTransformer &transformer, ParseResult &parse_result); static TransactionModifierType TransformReadOnlyOrReadWrite(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformCommitTransaction(PEGTransformer &, ParseResult &parse_result); - static unique_ptr TransformRollbackTransaction(PEGTransformer &, ParseResult &parse_result); + static unique_ptr TransformCommitTransaction(PEGTransformer &, ParseResult &parse_result); + static unique_ptr TransformRollbackTransaction(PEGTransformer &, ParseResult &parse_result); // update.gram static unique_ptr TransformUpdateStatement(PEGTransformer &transformer, ParseResult &parse_result); diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 5101a7cd387a..fcd97f10bb64 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -7,3 +7,6 @@ static QualifiedName TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, vector dot_identifier); static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); static string TransformDotIdentifier(string identifier); + static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); + static TransactionModifierType TransformReadOrWrite(TransactionModifierType read_only_or_read_write); diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt index add3108d3e45..4ac1c21bd395 100644 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -1,6 +1,7 @@ add_library_unity( duckdb_parser_peg_transformer_generated OBJECT + transform_transaction_generated.cpp transform_use_generated.cpp) set(ALL_OBJECT_FILES diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 0539f6fbc7eb..89621cbe405d 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -906,9 +906,9 @@ void PEGTransformerFactory::RegisterSet() { void PEGTransformerFactory::RegisterTransaction() { // transaction.gram - REGISTER_TRANSFORM(TransformTransactionStatement); + Register("TransactionStatement", &PEGTransformerFactory::TransformTransactionStatementInternal); + Register("ReadOrWrite", &PEGTransformerFactory::TransformReadOrWriteInternal); REGISTER_TRANSFORM(TransformBeginTransaction); - REGISTER_TRANSFORM(TransformReadOrWrite); REGISTER_TRANSFORM(TransformReadOnlyOrReadWrite); REGISTER_TRANSFORM(TransformCommitTransaction); REGISTER_TRANSFORM(TransformRollbackTransaction); diff --git a/src/parser/peg/transformer/transform_transaction.cpp b/src/parser/peg/transformer/transform_transaction.cpp index 77a14926f2b6..00e8c012b3d7 100644 --- a/src/parser/peg/transformer/transform_transaction.cpp +++ b/src/parser/peg/transformer/transform_transaction.cpp @@ -3,15 +3,8 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformTransactionStatement(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto &choice_pr = list_pr.Child(0); - return transformer.Transform>(choice_pr.GetResult()); -} - -unique_ptr PEGTransformerFactory::TransformBeginTransaction(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformBeginTransaction(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto info = make_uniq(TransactionType::BEGIN_TRANSACTION); auto &read_or_write = list_pr.Child(2); @@ -23,10 +16,8 @@ unique_ptr PEGTransformerFactory::TransformBeginTransactio return make_uniq(std::move(info)); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWrite(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - return transformer.Transform(list_pr.Child(1)); +TransactionModifierType PEGTransformerFactory::TransformReadOrWrite(TransactionModifierType read_only_or_read_write) { + return read_only_or_read_write; } TransactionModifierType PEGTransformerFactory::TransformReadOnlyOrReadWrite(PEGTransformer &transformer, @@ -35,11 +26,11 @@ TransactionModifierType PEGTransformerFactory::TransformReadOnlyOrReadWrite(PEGT return transformer.TransformEnum(list_pr.Child(0).GetResult()); } -unique_ptr PEGTransformerFactory::TransformCommitTransaction(PEGTransformer &, ParseResult &) { +unique_ptr PEGTransformerFactory::TransformCommitTransaction(PEGTransformer &, ParseResult &) { return make_uniq(make_uniq(TransactionType::COMMIT)); } -unique_ptr PEGTransformerFactory::TransformRollbackTransaction(PEGTransformer &, ParseResult &) { +unique_ptr PEGTransformerFactory::TransformRollbackTransaction(PEGTransformer &, ParseResult &) { return make_uniq(make_uniq(TransactionType::ROLLBACK)); } } // namespace duckdb From 602886c1516083afecd9db9cca15678c4de79bca Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 15:30:07 +0200 Subject: [PATCH 14/56] Update gen to write into class --- scripts/parser/gen_transformer_v2.py | 64 ++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index a61816dd5754..7ac0ef8fbba1 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -596,8 +596,18 @@ def generate_sequence_internal(rule_name, return_type, elements): ) -def collect_generated(rules, rule_to_type): - """Classify all rules; return lists of generated content, skipped rules, and manual bodies.""" +@dataclass +class GramFileResult: + gram_stem: str + declarations: list + implementations: list + registrations: list + skipped: list # (rule_name, reason) — nothing generated + manual_bodies: list # (rule_name, reason) — Internal generated, body is hand-written + + +def collect_generated(gram_stem, rules, rule_to_type): + """Classify all rules; return a GramFileResult.""" declarations = [] implementations = [] registrations = [] @@ -660,7 +670,7 @@ def collect_generated(rules, rule_to_type): skipped.append((rule_name, "complex rule (has operators/choices/groups)")) - return declarations, implementations, registrations, skipped, manual_bodies + return GramFileResult(gram_stem, declarations, implementations, registrations, skipped, manual_bodies) def print_output(declarations, implementations, registrations, skipped, manual_bodies, gram_stem): @@ -709,15 +719,16 @@ def cmake_content(cpp_filenames): ) -def write_files(implementations, declarations, registrations, gram_stem): +def write_cpp_file(implementations, gram_stem): generated_dir.mkdir(parents=True, exist_ok=True) - cpp_path = generated_dir / f"transform_{gram_stem}_generated.cpp" cpp_path.write_text(cpp_file_content(implementations)) print(f"Wrote {cpp_path}") + +def write_shared_files(all_declarations): hpp_path = include_peg_dir / "peg_transformer_generated.hpp" - hpp_path.write_text(GENERATED_HEADER + "".join(declarations)) + hpp_path.write_text(GENERATED_HEADER + "".join(all_declarations)) print(f"Wrote {hpp_path}") existing_cpp = sorted(p.name for p in generated_dir.glob("*_generated.cpp")) @@ -725,6 +736,8 @@ def write_files(implementations, declarations, registrations, gram_stem): cmake_path.write_text(cmake_content(existing_cpp)) print(f"Wrote {cmake_path}") + +def print_manual_steps(registrations, gram_stem): reg_lines = "".join(f" {r.strip()}\n" for r in registrations) print(f""" Remaining manual steps: @@ -741,30 +754,45 @@ def write_files(implementations, declarations, registrations, gram_stem): - Update body function signatures to match the generated declarations""") -def main(): - arg_parser = argparse.ArgumentParser(description="Generate Internal transformer wrappers from grammar rules.") - arg_parser.add_argument("--write", action="store_true", help="Write generated files to disk.") - args = arg_parser.parse_args() - - use_file_path = statements_dir / 'use.gram' - with open(use_file_path, 'r') as f: +def process_gram_file(gram_filename, rule_to_type): + gram_stem = gram_filename.removesuffix('.gram') + gram_path = statements_dir / gram_filename + with open(gram_path, 'r') as f: file_content = f.read() try: rules = parse_peg_grammar(file_content) except Exception as e: - raise Exception(f"{use_file_path.name}: {e}") from None + raise Exception(f"{gram_filename}: {e}") from None - rule_to_type = load_grammar_types(type_dir / 'grammar_types.yml') for rule_name, return_type in rule_to_type.items(): if rule_name in rules: rules[rule_name].return_type = return_type - declarations, implementations, registrations, skipped, manual_bodies = collect_generated(rules, rule_to_type) + return collect_generated(gram_stem, rules, rule_to_type) + + +def main(): + arg_parser = argparse.ArgumentParser(description="Generate Internal transformer wrappers from grammar rules.") + arg_parser.add_argument("--write", action="store_true", help="Write generated files to disk.") + args = arg_parser.parse_args() + + gram_files_to_gen = ['use.gram', 'transaction.gram'] + rule_to_type = load_grammar_types(type_dir / 'grammar_types.yml') + results = [process_gram_file(f, rule_to_type) for f in gram_files_to_gen] if args.write: - write_files(implementations, declarations, registrations, gram_stem="use") + all_declarations = [d for r in results for d in r.declarations] + write_shared_files(all_declarations) + for r in results: + write_cpp_file(r.implementations, r.gram_stem) + print_manual_steps(r.registrations, r.gram_stem) else: - print_output(declarations, implementations, registrations, skipped, manual_bodies, gram_stem="use") + for r in results: + print(f"\n{'=' * 60}") + print(f" {r.gram_stem}.gram") + print(f"{'=' * 60}") + print_output(r.declarations, r.implementations, r.registrations, + r.skipped, r.manual_bodies, gram_stem=r.gram_stem) if __name__ == "__main__": From 0c7c697e3bf9acb169fb01ea9bf467960a49de64 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 15:30:28 +0200 Subject: [PATCH 15/56] Add first version of generated file as well --- .../transform_transaction_generated.cpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/parser/peg/transformer/generated/transform_transaction_generated.cpp diff --git a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp new file mode 100644 index 000000000000..a7b860b8a145 --- /dev/null +++ b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp @@ -0,0 +1,20 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#include "duckdb/parser/peg/transformer/peg_transformer.hpp" + +namespace duckdb { + +unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto &choice_pr = list_pr.Child(0); + return transformer.Transform>(choice_pr.GetResult()); +} + +TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto read_only_or_read_write = transformer.Transform(list_pr, 1); + return TransformReadOrWrite(read_only_or_read_write); +} + +} // namespace duckdb From 1e27d702439d3e3c006a7d1ea2c5c9f07ecaaf20 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 17:11:30 +0200 Subject: [PATCH 16/56] Move excluded rules to yml file --- scripts/parser/gen_transformer_v2.py | 93 +++++++++++++++----- scripts/parser/generate_transformer.py | 114 +++++-------------------- scripts/parser/grammar_types.yml | 79 ++++++++++++++++- 3 files changed, 170 insertions(+), 116 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 7ac0ef8fbba1..ff742d950108 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -394,21 +394,21 @@ def _classify_literal(idx): return SeqElement(idx=idx, skip=True) -def _classify_reference(name, idx, rule_to_type): +def _classify_reference(name, idx, rule_to_type, excluded_rules): """ REFERENCE token -> CreateMatcher(rule_name). - Two sub-cases matching the two branches in CreateMatcher: - - rule in IDENTIFIER_OVERRIDE_RULES -> AddRuleOverride IdentifierMatcher - -> Child().identifier - - rule in rule_to_type -> regular ListMatcher -> transformer.Transform() - Override rules take priority because they bypass the transformer dispatch: - their parse results have an empty name and cannot be looked up in transform_functions. + Priority order mirrors runtime dispatch: + 1. IDENTIFIER_OVERRIDE_RULES -> IdentifierMatcher -> Child() + 2. excluded_rules -> keyword-only rule, no semantic value -> skip + 3. rule_to_type -> regular ListMatcher -> transformer.Transform() """ if name in IDENTIFIER_OVERRIDE_RULES: var_name = to_snake_case(name) lines = [f"\tauto {var_name} = list_pr.Child({idx}).identifier;"] return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + if name in excluded_rules: + return _classify_literal(idx) if name in rule_to_type: cpp_type = rule_to_type[name] var_name = to_snake_case(name) @@ -418,6 +418,38 @@ def _classify_reference(name, idx, rule_to_type): return None +def _classify_optional_reference(name, idx, rule_to_type, excluded_rules): + """ + OptionalNode(ReferenceNode) -> OptionalMatcher wrapping a named rule. + Priority order matches _classify_reference: + 1. excluded_rules -> keyword-only optional (Transaction?) -> skip + 2. IDENTIFIER_OVERRIDE_RULES -> optional identifier, extracted via HasResult() + 3. rule_to_type -> optional typed rule, extracted via TransformOptional + """ + if name in excluded_rules: + return _classify_literal(idx) + var_name = to_snake_case(name) + if name in IDENTIFIER_OVERRIDE_RULES: + lines = [ + f"\tstring {var_name};", + f"\tauto &{var_name}_opt = list_pr.Child({idx});", + f"\tif ({var_name}_opt.HasResult()) {{", + f"\t\t{var_name} = {var_name}_opt.GetResult().Cast().identifier;", + f"\t}}", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type="string", extraction_lines=lines) + if name in rule_to_type: + cpp_type = rule_to_type[name] + lines = [ + f"\t{cpp_type} {var_name} {{}};", + f"\ttransformer.TransformOptional(list_pr, {idx}, {var_name});", + ] + return SeqElement(idx=idx, skip=False, var_name=var_name, + cpp_type=cpp_type, extraction_lines=lines) + return None + + def _classify_parens(inner_node, idx, rule_to_type): """ ParensNode -> Parens(D) <- '(' D ')'. @@ -524,7 +556,7 @@ def _classify_star_repeat(node, idx, rule_to_type): cpp_type=f"vector<{child_type}>", extraction_lines=lines) -def classify_sequence_element(child, idx, rule_to_type): +def classify_sequence_element(child, idx, rule_to_type, excluded_rules): """ Classify one element of a SequenceNode. Mirrors the token-type switch in MatcherFactory::CreateMatcher(). @@ -533,7 +565,14 @@ def classify_sequence_element(child, idx, rule_to_type): if isinstance(child, LiteralNode): return _classify_literal(idx) if isinstance(child, ReferenceNode): - return _classify_reference(child.name, idx, rule_to_type) + return _classify_reference(child.name, idx, rule_to_type, excluded_rules) + if isinstance(child, OptionalNode): + inner = child.child + if isinstance(inner, LiteralNode): + return _classify_literal(idx) + if isinstance(inner, ReferenceNode): + return _classify_optional_reference(inner.name, idx, rule_to_type, excluded_rules) + return None # OptionalNode(ParensNode) etc. - deferred if isinstance(child, RepeatNode) and child.min_count == 0: return _classify_star_repeat(child, idx, rule_to_type) if isinstance(child, ParensNode): @@ -545,7 +584,7 @@ def classify_sequence_element(child, idx, rule_to_type): return None -def classify_sequence_elements(children, rule_to_type): +def classify_sequence_elements(children, rule_to_type, excluded_rules): """ Classify all children of a SequenceNode. Mirrors the token loop in MatcherFactory::CreateMatcher(). @@ -553,7 +592,7 @@ def classify_sequence_elements(children, rule_to_type): """ elements = [] for idx, child in enumerate(children): - elem = classify_sequence_element(child, idx, rule_to_type) + elem = classify_sequence_element(child, idx, rule_to_type, excluded_rules) if elem is None: return None elements.append(elem) @@ -564,10 +603,10 @@ def classify_sequence_elements(children, rule_to_type): # Extended sequence-rule code generation # --------------------------------------------------------------------------- -def is_auto_sequence_ast(ast, rule_to_type): +def is_auto_sequence_ast(ast, rule_to_type, excluded_rules): """True if ast is a SequenceNode whose every element can be classified.""" return (isinstance(ast, SequenceNode) - and classify_sequence_elements(ast.children, rule_to_type) is not None) + and classify_sequence_elements(ast.children, rule_to_type, excluded_rules) is not None) def generate_sequence_body_decl(rule_name, return_type, elements): @@ -583,9 +622,17 @@ def generate_sequence_internal(rule_name, return_type, elements): does at runtime but in the code-generation direction. """ semantic = [e for e in elements if not e.skip] - body = ["\tauto &list_pr = parse_result.Cast();"] - for elem in semantic: - body.extend(elem.extraction_lines) + has_semantic_elements = len(semantic) > 0 + + body = [] + # Only emit the list_pr cast when there are elements to extract from it. + # All-skip rules (e.g. CommitTransaction <- CommitOrEnd Transaction?) + # produce no arguments and must not declare an unused list_pr variable. + if has_semantic_elements: + body.append("\tauto &list_pr = parse_result.Cast();") + for elem in semantic: + body.extend(elem.extraction_lines) + arg_names = ", ".join(e.var_name for e in semantic) body.append(f"\treturn Transform{rule_name}({arg_names});") return ( @@ -606,7 +653,7 @@ class GramFileResult: manual_bodies: list # (rule_name, reason) — Internal generated, body is hand-written -def collect_generated(gram_stem, rules, rule_to_type): +def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): """Classify all rules; return a GramFileResult.""" declarations = [] implementations = [] @@ -660,8 +707,8 @@ def collect_generated(gram_stem, rules, rule_to_type): )) continue - if is_auto_sequence_ast(ast, rule_to_type): - elements = classify_sequence_elements(ast.children, rule_to_type) + if is_auto_sequence_ast(ast, rule_to_type, excluded_rules): + elements = classify_sequence_elements(ast.children, rule_to_type, excluded_rules) declarations.append(generate_internal_declaration(rule_name, return_type)) declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) implementations.append(generate_sequence_internal(rule_name, return_type, elements)) @@ -754,7 +801,7 @@ def print_manual_steps(registrations, gram_stem): - Update body function signatures to match the generated declarations""") -def process_gram_file(gram_filename, rule_to_type): +def process_gram_file(gram_filename, rule_to_type, excluded_rules): gram_stem = gram_filename.removesuffix('.gram') gram_path = statements_dir / gram_filename with open(gram_path, 'r') as f: @@ -768,7 +815,7 @@ def process_gram_file(gram_filename, rule_to_type): if rule_name in rules: rules[rule_name].return_type = return_type - return collect_generated(gram_stem, rules, rule_to_type) + return collect_generated(gram_stem, rules, rule_to_type, excluded_rules) def main(): @@ -777,8 +824,8 @@ def main(): args = arg_parser.parse_args() gram_files_to_gen = ['use.gram', 'transaction.gram'] - rule_to_type = load_grammar_types(type_dir / 'grammar_types.yml') - results = [process_gram_file(f, rule_to_type) for f in gram_files_to_gen] + rule_to_type, excluded_rules = load_grammar_types(type_dir / 'grammar_types.yml') + results = [process_gram_file(f, rule_to_type, excluded_rules) for f in gram_files_to_gen] if args.write: all_declarations = [d for r in results for d in r.declarations] diff --git a/scripts/parser/generate_transformer.py b/scripts/parser/generate_transformer.py index 67afa018046d..1f4ecc9f4e93 100644 --- a/scripts/parser/generate_transformer.py +++ b/scripts/parser/generate_transformer.py @@ -29,82 +29,13 @@ # Matches: Register("RuleName", &SomeFunction) — direct registration bypassing the macro DIRECT_REGISTER_REGEX = re.compile(r'Register\s*\(\s*"(\w+)"\s*,') -EXCLUDED_RULES = { - "Program", - "FunctionType", - "IfExists", - "Database", - "AbortOrRollback", - "CommitOrEnd", - "StartOrBegin", - "Transaction", - "VariableAssign", - "MacroOrFunction", - "SettingScope", - "ColLabel", - "MacroOrFunction", - "GroupingOrGroupingId", - "DefaultValues", - "RowOrRows", - "Recursive", - "StarSymbol", - "IfNotExists", - "PlainIdentifier", - "QuotedIdentifier", - "CreateTableColumnElement", - "OrReplace", - "ReservedIdentifier", - "CatalogName", - "SchemaName", - "ReservedSchemaName", - "ReservedIdentifier", - "TableName", - "ConstraintName", - "IntervalNumber", - "ReservedTableName", - "ColumnName", - "ReservedColumnName", - "FunctionName", - "ReservedFunctionName", - "TableFunctionName", - "TypeName", - "PragmaName", - "SettingName", - "CopyOptionName", - "AtTimeZoneOperator", - "Generated", - "ColumnConstraint", - "AlwaysOrByDefault", - "Lateral", - "ConstraintNameClause", - "ReservedSchemaQualification", - "UsingSample", - "TableSample", - "TypeList", - "NamedParameterAssignment", - "WithOrdinality", - "ByName", - "CollateOperator", - "ExportClause", - "ValueOrValues", - "PivotKeyword", - "UnpivotKeyword", - "Unique", - "DefArg", - "NoneLiteral", - "RowOrStruct", - "ForEachRow", - "ForEachStatement", - "SetData", - "CTEBodyContent", - "SingleArrowPair", - "OperatorLiteral", -} def load_grammar_types(types_file): """ - Loads grammar_types.yml and returns a dict mapping rule name -> C++ return type. + Loads grammar_types.yml and returns (rule_to_type, excluded_rules) where + rule_to_type maps rule name -> C++ return type, and excluded_rules is the + set of rules that should be skipped during stub generation. """ if yaml is None: print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) @@ -125,23 +56,23 @@ def load_grammar_types(types_file): rule_to_source = {} # tracks where each rule was first seen for error messages duplicates = [] - def register(rule_name, cpp_type, source): - rule_name = str(rule_name) - if rule_name in rule_to_type: - duplicates.append(f" '{rule_name}' in '{source}' (already listed in '{rule_to_source[rule_name]}')") + def register(name, cpp_type, source): + name = str(name) + if name in rule_to_type: + duplicates.append(f" '{name}' in '{source}' (already listed in '{rule_to_source[name]}')") else: - rule_to_type[rule_name] = str(cpp_type) - rule_to_source[rule_name] = source + rule_to_type[name] = str(cpp_type) + rule_to_source[name] = source # Top-level overrides: flat RuleName -> "type" map overrides = data.get("overrides", {}) if isinstance(overrides, dict): - for rule_name, cpp_type in overrides.items(): - register(rule_name, cpp_type, "overrides") + for name, cpp_type in overrides.items(): + register(name, cpp_type, "overrides") # Category entries: CategoryName -> {type: "...", rules: [...]} for key, value in data.items(): - if key == "overrides": + if key in ("overrides", "excluded_rules"): continue if not isinstance(value, dict): continue @@ -149,8 +80,8 @@ def register(rule_name, cpp_type, source): rules = value.get("rules", []) if not cpp_type or not isinstance(rules, list): continue - for rule_name in rules: - register(rule_name, cpp_type, key) + for name in rules: + register(name, cpp_type, key) if duplicates: print(f"Error: {types_file} contains duplicate rule listings:", file=sys.stderr) @@ -158,7 +89,8 @@ def register(rule_name, cpp_type, source): print(msg, file=sys.stderr) sys.exit(1) - return rule_to_type + excluded_rules = set(data.get("excluded_rules", [])) + return rule_to_type, excluded_rules def find_grammar_rules(grammar_path): @@ -342,7 +274,7 @@ def main(): args = parser.parse_args() - rule_to_type = load_grammar_types(GRAMMAR_TYPES_FILE) + rule_to_type, excluded_rules = load_grammar_types(GRAMMAR_TYPES_FILE) grammar_rules_by_file = find_grammar_rules(Path(GRAMMAR_DIR)) transformer_impls = find_transformer_rules(Path(TRANSFORMER_DIR)) enum_rules, registered_rules, directly_registered_rules = find_factory_registrations(Path(FACTORY_REG_FILE)) @@ -379,7 +311,7 @@ def main(): for rule_name in sorted(grammar_rules): total_rules_scanned += 1 - if rule_name in EXCLUDED_RULES: + if rule_name in excluded_rules: print(f"{'[ EXCLUDED ]':<14} {rule_name}") continue @@ -428,7 +360,7 @@ def main(): print("\n--- Summary: Rule Coverage ---") print(f"{'TOTAL RULES SCANNED':<25} : {total_rules_scanned}") - print(f" {' - Excluded':<23} : {len(EXCLUDED_RULES)}") + print(f" {' - Excluded':<23} : {len(excluded_rules)}") print("---------------------------------------") print(f"{'TOTAL ACTIONABLE RULES':<25} : {total_grammar_rules}") print(f"{'TOTAL COVERED':<25} : {total_covered} ({coverage:.2f}%)") @@ -442,25 +374,25 @@ def main(): print(f"{file_name:<25} : {count} issues") print("\n--- Orphan / Mismatch Check ---") - orphan_transformers = transformer_impls - all_grammar_rules_flat - EXCLUDED_RULES + orphan_transformers = transformer_impls - all_grammar_rules_flat - excluded_rules if orphan_transformers: print("\n[!] Orphan Transformer Functions (No matching grammar rule):") for rule in sorted(list(orphan_transformers)): print(f" - Transform{rule}") - orphan_enums = enum_rules - all_grammar_rules_flat - EXCLUDED_RULES + orphan_enums = enum_rules - all_grammar_rules_flat - excluded_rules if orphan_enums: print("\n[!] Orphan Enum Rules (No matching grammar rule):") for rule in sorted(list(orphan_enums)): print(f' - RegisterEnum("{rule}")') - orphan_registrations = registered_rules - all_grammar_rules_flat - EXCLUDED_RULES + orphan_registrations = registered_rules - all_grammar_rules_flat - excluded_rules if orphan_registrations: print("\n[!] Orphan Registrations (No matching grammar rule):") for rule in sorted(list(orphan_registrations)): print(f" - REGISTER_TRANSFORM(Transform{rule})") - orphan_direct = directly_registered_rules - all_grammar_rules_flat - EXCLUDED_RULES + orphan_direct = directly_registered_rules - all_grammar_rules_flat - excluded_rules if orphan_direct: print("\n[!] Orphan Direct Registrations (No matching grammar rule):") for rule in sorted(list(orphan_direct)): diff --git a/scripts/parser/grammar_types.yml b/scripts/parser/grammar_types.yml index b666f1ff21d8..af09f389f6f5 100644 --- a/scripts/parser/grammar_types.yml +++ b/scripts/parser/grammar_types.yml @@ -19,7 +19,10 @@ # # Rules handled by RegisterEnum() are NOT listed here (they are auto-detected # by generate_transformer.py from peg_transformer_factory.cpp). -# Rules in EXCLUDED_RULES in generate_transformer.py are also NOT listed here. +# +# excluded_rules: rules skipped during stub generation. When encountered as +# children in auto-generated sequence rules they are treated as skip elements +# (no semantic value extracted). # --------------------------------------------------------------------------- # unique_ptr categories @@ -914,4 +917,76 @@ overrides: NullsFirstOrLast: "OrderByNullType" # update.gram UpdateSetElement: "pair>" - UpdateSetColumnTarget: "string" \ No newline at end of file + UpdateSetColumnTarget: "string" +# --------------------------------------------------------------------------- +# excluded_rules: no transformer stub generated, no semantic value extracted +# --------------------------------------------------------------------------- + +excluded_rules: + - Program + - FunctionType + - IfExists + - Database + - AbortOrRollback + - CommitOrEnd + - StartOrBegin + - Transaction + - VariableAssign + - MacroOrFunction + - SettingScope + - ColLabel + - GroupingOrGroupingId + - DefaultValues + - RowOrRows + - Recursive + - StarSymbol + - IfNotExists + - PlainIdentifier + - QuotedIdentifier + - CreateTableColumnElement + - OrReplace + - ReservedIdentifier + - CatalogName + - SchemaName + - ReservedSchemaName + - TableName + - ConstraintName + - IntervalNumber + - ReservedTableName + - ColumnName + - ReservedColumnName + - FunctionName + - ReservedFunctionName + - TableFunctionName + - TypeName + - PragmaName + - SettingName + - CopyOptionName + - AtTimeZoneOperator + - Generated + - ColumnConstraint + - AlwaysOrByDefault + - Lateral + - ConstraintNameClause + - ReservedSchemaQualification + - UsingSample + - TableSample + - TypeList + - NamedParameterAssignment + - WithOrdinality + - ByName + - CollateOperator + - ExportClause + - ValueOrValues + - PivotKeyword + - UnpivotKeyword + - Unique + - DefArg + - NoneLiteral + - RowOrStruct + - ForEachRow + - ForEachStatement + - SetData + - CTEBodyContent + - SingleArrowPair + - OperatorLiteral From 42e4bbd45743162183a60aa6d46aa494f0a98a0b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 17:13:53 +0200 Subject: [PATCH 17/56] Auto generate more of transaction --- .../parser/peg/transformer/peg_transformer.hpp | 3 --- .../transformer/peg_transformer_generated.hpp | 6 ++++++ .../transform_transaction_generated.cpp | 18 ++++++++++++++++++ .../transformer/peg_transformer_factory.cpp | 6 +++--- .../peg/transformer/transform_transaction.cpp | 15 ++++----------- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 4600e05b36ea..468f39f0187a 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -1192,10 +1192,7 @@ class PEGTransformerFactory { static string TransformIdentifierOrKeyword(PEGTransformer &transformer, ParseResult &parse_result); // transaction.gram - static unique_ptr TransformBeginTransaction(PEGTransformer &transformer, ParseResult &parse_result); static TransactionModifierType TransformReadOnlyOrReadWrite(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformCommitTransaction(PEGTransformer &, ParseResult &parse_result); - static unique_ptr TransformRollbackTransaction(PEGTransformer &, ParseResult &parse_result); // update.gram static unique_ptr TransformUpdateStatement(PEGTransformer &transformer, ParseResult &parse_result); diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index fcd97f10bb64..65b18f2611e7 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -8,5 +8,11 @@ static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); static string TransformDotIdentifier(string identifier); static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformBeginTransaction(TransactionModifierType read_or_write); + static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformRollbackTransaction(); + static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformCommitTransaction(); static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); static TransactionModifierType TransformReadOrWrite(TransactionModifierType read_only_or_read_write); diff --git a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp index a7b860b8a145..19f9d92a264b 100644 --- a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp @@ -10,6 +10,24 @@ unique_ptr PEGTransformerFactory::TransformTransactionStatementInt return transformer.Transform>(choice_pr.GetResult()); } +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + TransactionModifierType read_or_write {}; + transformer.TransformOptional(list_pr, 2, read_or_write); + return TransformBeginTransaction(read_or_write); +} + +unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + return TransformRollbackTransaction(); +} + +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal( + PEGTransformer &transformer, ParseResult &parse_result) { + return TransformCommitTransaction(); +} + TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal( PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 89621cbe405d..1286e67e535b 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -908,10 +908,10 @@ void PEGTransformerFactory::RegisterTransaction() { // transaction.gram Register("TransactionStatement", &PEGTransformerFactory::TransformTransactionStatementInternal); Register("ReadOrWrite", &PEGTransformerFactory::TransformReadOrWriteInternal); - REGISTER_TRANSFORM(TransformBeginTransaction); + Register("BeginTransaction", &PEGTransformerFactory::TransformBeginTransactionInternal); + Register("CommitTransaction", &PEGTransformerFactory::TransformCommitTransactionInternal); + Register("RollbackTransaction", &PEGTransformerFactory::TransformRollbackTransactionInternal); REGISTER_TRANSFORM(TransformReadOnlyOrReadWrite); - REGISTER_TRANSFORM(TransformCommitTransaction); - REGISTER_TRANSFORM(TransformRollbackTransaction); } void PEGTransformerFactory::RegisterUpdate() { diff --git a/src/parser/peg/transformer/transform_transaction.cpp b/src/parser/peg/transformer/transform_transaction.cpp index 00e8c012b3d7..c0a4d2614790 100644 --- a/src/parser/peg/transformer/transform_transaction.cpp +++ b/src/parser/peg/transformer/transform_transaction.cpp @@ -3,16 +3,9 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformBeginTransaction(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); +unique_ptr PEGTransformerFactory::TransformBeginTransaction(TransactionModifierType read_or_write) { auto info = make_uniq(TransactionType::BEGIN_TRANSACTION); - auto &read_or_write = list_pr.Child(2); - if (read_or_write.HasResult()) { - info->modifier = transformer.Transform(read_or_write.GetResult()); - } else { - info->modifier = TransactionModifierType::TRANSACTION_DEFAULT_MODIFIER; - } + info->modifier = read_or_write; return make_uniq(std::move(info)); } @@ -26,11 +19,11 @@ TransactionModifierType PEGTransformerFactory::TransformReadOnlyOrReadWrite(PEGT return transformer.TransformEnum(list_pr.Child(0).GetResult()); } -unique_ptr PEGTransformerFactory::TransformCommitTransaction(PEGTransformer &, ParseResult &) { +unique_ptr PEGTransformerFactory::TransformCommitTransaction() { return make_uniq(make_uniq(TransactionType::COMMIT)); } -unique_ptr PEGTransformerFactory::TransformRollbackTransaction(PEGTransformer &, ParseResult &) { +unique_ptr PEGTransformerFactory::TransformRollbackTransaction() { return make_uniq(make_uniq(TransactionType::ROLLBACK)); } } // namespace duckdb From c9b4fa141b6d44fc95cd243ca855671890d7552d Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 17:19:02 +0200 Subject: [PATCH 18/56] Format fix --- scripts/parser/gen_transformer_v2.py | 127 ++++++++++-------- scripts/parser/generate_transformer.py | 1 - scripts/parser/inline_grammar.py | 15 ++- .../transformer/peg_transformer_generated.hpp | 39 +++--- .../peg/transformer/generated/CMakeLists.txt | 9 +- .../transform_transaction_generated.cpp | 20 +-- .../generated/transform_use_generated.cpp | 15 +-- src/parser/peg/transformer/transform_use.cpp | 3 +- 8 files changed, 127 insertions(+), 102 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index ff742d950108..79495e6cb5e3 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -14,6 +14,7 @@ # Grammar AST - mirrors the Matcher class hierarchy in matcher.cpp # --------------------------------------------------------------------------- + class GrammarNode: pass @@ -21,12 +22,14 @@ class GrammarNode: @dataclass class LiteralNode(GrammarNode): """Keyword literal ('keyword'). Corresponds to KeywordMatcher.""" + text: str @dataclass class ReferenceNode(GrammarNode): """Reference to a named grammar rule. Resolved to another Matcher at build time.""" + name: str @@ -36,6 +39,7 @@ class RegexNode(GrammarNode): In practice, rules that contain raw regex patterns are overridden in the matcher via AddRuleOverride() and therefore never exercise this path at runtime. We keep the node so the AST parser stays complete.""" + pattern: str @@ -43,6 +47,7 @@ class RegexNode(GrammarNode): class ParensNode(GrammarNode): """Parens(D) <- '(' D ')'. Anonymous ListMatcher; child[1] is D's result. Use ExtractResultFromParens() to reach inside.""" + inner: GrammarNode @@ -50,12 +55,14 @@ class ParensNode(GrammarNode): class ListMacroNode(GrammarNode): """List(D) <- D (',' D)* ','?. Anonymous ListMatcher. Use ExtractParseResultsFromList() to get all D results.""" + inner: GrammarNode @dataclass class FunctionCallNode(GrammarNode): """Unknown macro call (not Parens or List). Not auto-generated.""" + func_name: str inner: GrammarNode @@ -63,24 +70,28 @@ class FunctionCallNode(GrammarNode): @dataclass class SequenceNode(GrammarNode): """Ordered sequence of matchers. Corresponds to ListMatcher.""" + children: List[GrammarNode] @dataclass class ChoiceNode(GrammarNode): """Ordered choice A / B / C. Corresponds to ChoiceMatcher.""" + alternatives: List[GrammarNode] @dataclass class OptionalNode(GrammarNode): """Optional match A?. Corresponds to OptionalMatcher.""" + child: GrammarNode @dataclass class RepeatNode(GrammarNode): """Repeat match A+ (min=1) or A* (min=0). Corresponds to RepeatMatcher.""" + child: GrammarNode min_count: int @@ -88,6 +99,7 @@ class RepeatNode(GrammarNode): @dataclass class NegationNode(GrammarNode): """Negative lookahead !A.""" + child: GrammarNode @@ -202,13 +214,24 @@ def rule_to_ast(rule): # --------------------------------------------------------------------------- IDENTIFIER_OVERRIDE_RULES = { - 'Identifier', 'ReservedIdentifier', - 'CatalogName', 'SchemaName', 'ReservedSchemaName', - 'TableName', 'ReservedTableName', - 'ColumnName', 'ReservedColumnName', - 'IndexName', 'SequenceName', - 'FunctionName', 'ReservedFunctionName', 'TableFunctionName', - 'TypeName', 'PragmaName', 'SettingName', 'CopyOptionName', + 'Identifier', + 'ReservedIdentifier', + 'CatalogName', + 'SchemaName', + 'ReservedSchemaName', + 'TableName', + 'ReservedTableName', + 'ColumnName', + 'ReservedColumnName', + 'IndexName', + 'SequenceName', + 'FunctionName', + 'ReservedFunctionName', + 'TableFunctionName', + 'TypeName', + 'PragmaName', + 'SettingName', + 'CopyOptionName', } # Rules overridden with non-identifier special matchers (kept separate so @@ -254,14 +277,14 @@ def get_semantic_children(rule): def generate_internal_declaration(rule_name, return_type): - return (f"\tstatic {return_type} Transform{rule_name}Internal" - f"(PEGTransformer &transformer, ParseResult &parse_result);\n") + return ( + f"\tstatic {return_type} Transform{rule_name}Internal" + f"(PEGTransformer &transformer, ParseResult &parse_result);\n" + ) def generate_body_declaration(rule_name, return_type, semantic_children, rule_to_type): - params = ", ".join( - f"{rule_to_type[name]} {to_snake_case(name)}" for _, name in semantic_children - ) + params = ", ".join(f"{rule_to_type[name]} {to_snake_case(name)}" for _, name in semantic_children) return f"\tstatic {return_type} Transform{rule_name}({params});\n" @@ -286,9 +309,7 @@ def generate_internal_wrapper(rule_name, return_type, semantic_children, rule_to return ( f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" - f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" - + "\n".join(body) - + "\n}\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + "\n".join(body) + "\n}\n" ) @@ -300,6 +321,7 @@ def generate_registration(rule_name): # Choice-rule helpers # --------------------------------------------------------------------------- + def is_pure_reference_choice(ast): """True if ast is a ChoiceNode whose every alternative is a ReferenceNode.""" return isinstance(ast, ChoiceNode) and all(isinstance(a, ReferenceNode) for a in ast.alternatives) @@ -360,8 +382,7 @@ def generate_choice_internal_with_body(rule_name, return_type): def generate_choice_body_declaration(rule_name, return_type): """Declaration for the manual body that handles identifier alternatives.""" return ( - f"\tstatic {return_type} Transform{rule_name}" - f"(PEGTransformer &transformer, ParseResult &choice_result);\n" + f"\tstatic {return_type} Transform{rule_name}" f"(PEGTransformer &transformer, ParseResult &choice_result);\n" ) @@ -379,11 +400,13 @@ def generate_choice_body_declaration(rule_name, return_type): # classify_sequence_elements() iterates all children of a SequenceNode (= the token loop). # --------------------------------------------------------------------------- + @dataclass class SeqElement: """One classified position in a sequence rule.""" + idx: int - skip: bool # True for LiteralNode - no semantic value + skip: bool # True for LiteralNode - no semantic value var_name: str = "" cpp_type: str = "" extraction_lines: List[str] = field(default_factory=list) @@ -405,16 +428,14 @@ def _classify_reference(name, idx, rule_to_type, excluded_rules): if name in IDENTIFIER_OVERRIDE_RULES: var_name = to_snake_case(name) lines = [f"\tauto {var_name} = list_pr.Child({idx}).identifier;"] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type="string", extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in excluded_rules: return _classify_literal(idx) if name in rule_to_type: cpp_type = rule_to_type[name] var_name = to_snake_case(name) lines = [f"\tauto {var_name} = transformer.Transform<{cpp_type}>(list_pr, {idx});"] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) return None @@ -437,16 +458,14 @@ def _classify_optional_reference(name, idx, rule_to_type, excluded_rules): f"\t\t{var_name} = {var_name}_opt.GetResult().Cast().identifier;", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type="string", extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in rule_to_type: cpp_type = rule_to_type[name] lines = [ f"\t{cpp_type} {var_name} {{}};", f"\ttransformer.TransformOptional(list_pr, {idx}, {var_name});", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) return None @@ -465,16 +484,14 @@ def _classify_parens(inner_node, idx, rule_to_type): f"\tauto {var_name} = ExtractResultFromParens(list_pr.GetChild({idx}))" f".Cast().identifier;", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type="string", extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in rule_to_type: cpp_type = rule_to_type[name] lines = [ f"\tauto {var_name} = transformer.Transform<{cpp_type}>" f"(ExtractResultFromParens(list_pr.GetChild({idx})));", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) return None @@ -499,8 +516,7 @@ def _classify_list_macro(inner_node, idx, rule_to_type): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) def _classify_parens_list(inner_list_node, idx, rule_to_type): @@ -518,15 +534,13 @@ def _classify_parens_list(inner_list_node, idx, rule_to_type): child_type = rule_to_type[name] var_name = to_snake_case(name) lines = [ - f"\tauto {var_name}_items = ExtractParseResultsFromList(" - f"ExtractResultFromParens(list_pr.GetChild({idx})));", + f"\tauto {var_name}_items = ExtractParseResultsFromList(" f"ExtractResultFromParens(list_pr.GetChild({idx})));", f"\tvector<{child_type}> {var_name};", f"\tfor (auto &{var_name}_item : {var_name}_items) {{", f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) def _classify_star_repeat(node, idx, rule_to_type): @@ -552,8 +566,7 @@ def _classify_star_repeat(node, idx, rule_to_type): f"\t\t}}", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, - cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) def classify_sequence_element(child, idx, rule_to_type, excluded_rules): @@ -603,10 +616,13 @@ def classify_sequence_elements(children, rule_to_type, excluded_rules): # Extended sequence-rule code generation # --------------------------------------------------------------------------- + def is_auto_sequence_ast(ast, rule_to_type, excluded_rules): """True if ast is a SequenceNode whose every element can be classified.""" - return (isinstance(ast, SequenceNode) - and classify_sequence_elements(ast.children, rule_to_type, excluded_rules) is not None) + return ( + isinstance(ast, SequenceNode) + and classify_sequence_elements(ast.children, rule_to_type, excluded_rules) is not None + ) def generate_sequence_body_decl(rule_name, return_type, elements): @@ -637,9 +653,7 @@ def generate_sequence_internal(rule_name, return_type, elements): body.append(f"\treturn Transform{rule_name}({arg_names});") return ( f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" - f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" - + "\n".join(body) - + "\n}\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + "\n".join(body) + "\n}\n" ) @@ -649,8 +663,8 @@ class GramFileResult: declarations: list implementations: list registrations: list - skipped: list # (rule_name, reason) — nothing generated - manual_bodies: list # (rule_name, reason) — Internal generated, body is hand-written + skipped: list # (rule_name, reason) — nothing generated + manual_bodies: list # (rule_name, reason) — Internal generated, body is hand-written def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): @@ -701,10 +715,12 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): else: declarations.append(generate_choice_body_declaration(rule_name, return_type)) implementations.append(generate_choice_internal_with_body(rule_name, return_type)) - manual_bodies.append(( - rule_name, - f"choice body; identifier alternatives: {identifier_alts}", - )) + manual_bodies.append( + ( + rule_name, + f"choice body; identifier alternatives: {identifier_alts}", + ) + ) continue if is_auto_sequence_ast(ast, rule_to_type, excluded_rules): @@ -786,7 +802,8 @@ def write_shared_files(all_declarations): def print_manual_steps(registrations, gram_stem): reg_lines = "".join(f" {r.strip()}\n" for r in registrations) - print(f""" + print( + f""" Remaining manual steps: 1. In {include_peg_dir / 'peg_transformer.hpp'}: - Add inside class PEGTransformerFactory: @@ -798,7 +815,8 @@ def print_manual_steps(registrations, gram_stem): - Replace REGISTER_TRANSFORM macros for generated rules with: {reg_lines} 4. In transform_{gram_stem}.cpp: - Remove Internal wrappers now generated (keep only hand-written bodies) - - Update body function signatures to match the generated declarations""") + - Update body function signatures to match the generated declarations""" + ) def process_gram_file(gram_filename, rule_to_type, excluded_rules): @@ -838,9 +856,10 @@ def main(): print(f"\n{'=' * 60}") print(f" {r.gram_stem}.gram") print(f"{'=' * 60}") - print_output(r.declarations, r.implementations, r.registrations, - r.skipped, r.manual_bodies, gram_stem=r.gram_stem) + print_output( + r.declarations, r.implementations, r.registrations, r.skipped, r.manual_bodies, gram_stem=r.gram_stem + ) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/parser/generate_transformer.py b/scripts/parser/generate_transformer.py index 1f4ecc9f4e93..f5ba78362afb 100644 --- a/scripts/parser/generate_transformer.py +++ b/scripts/parser/generate_transformer.py @@ -30,7 +30,6 @@ DIRECT_REGISTER_REGEX = re.compile(r'Register\s*\(\s*"(\w+)"\s*,') - def load_grammar_types(types_file): """ Loads grammar_types.yml and returns (rule_to_type, excluded_rules) where diff --git a/scripts/parser/inline_grammar.py b/scripts/parser/inline_grammar.py index a5190f0f4d4b..568a69cf41e7 100644 --- a/scripts/parser/inline_grammar.py +++ b/scripts/parser/inline_grammar.py @@ -384,10 +384,15 @@ def load_all_rules(): def main(): arg_parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files') - arg_parser.add_argument('--print', action='store_true', default=False, - help='Print the grammar instead of writing to a file') - arg_parser.add_argument('--grammar-file', action='store_true', default=False, - help='Write the grammar to a .gram file instead of a C++ header') + arg_parser.add_argument( + '--print', action='store_true', default=False, help='Print the grammar instead of writing to a file' + ) + arg_parser.add_argument( + '--grammar-file', + action='store_true', + default=False, + help='Write the grammar to a .gram file instead of a C++ header', + ) args = arg_parser.parse_args() rules, contents, keyword_sets = load_all_rules() @@ -422,4 +427,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 65b18f2611e7..1581fdc541da 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -1,18 +1,23 @@ // AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT - static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformUseStatement(QualifiedName use_target); - static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); - static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); - static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); - static QualifiedName TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, vector dot_identifier); - static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); - static string TransformDotIdentifier(string identifier); - static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformBeginTransaction(TransactionModifierType read_or_write); - static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformRollbackTransaction(); - static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformCommitTransaction(); - static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); - static TransactionModifierType TransformReadOrWrite(TransactionModifierType read_only_or_read_write); +static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformUseStatement(QualifiedName use_target); +static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); +static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); +static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); +static QualifiedName TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, + vector dot_identifier); +static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); +static string TransformDotIdentifier(string identifier); +static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransaction(TransactionModifierType read_or_write); +static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformRollbackTransaction(); +static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformCommitTransaction(); +static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); +static TransactionModifierType TransformReadOrWrite(TransactionModifierType read_only_or_read_write); diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt index 4ac1c21bd395..d88a81c03d1d 100644 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -1,9 +1,8 @@ add_library_unity( - duckdb_parser_peg_transformer_generated - OBJECT - transform_transaction_generated.cpp - transform_use_generated.cpp) + duckdb_parser_peg_transformer_generated OBJECT + transform_transaction_generated.cpp transform_use_generated.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ + ${ALL_OBJECT_FILES} + $ PARENT_SCOPE) diff --git a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp index 19f9d92a264b..57b249bdd22a 100644 --- a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp @@ -3,33 +3,33 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); return transformer.Transform>(choice_pr.GetResult()); } -unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); TransactionModifierType read_or_write {}; transformer.TransformOptional(list_pr, 2, read_or_write); return TransformBeginTransaction(read_or_write); } -unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { return TransformRollbackTransaction(); } -unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { return TransformCommitTransaction(); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto read_only_or_read_write = transformer.Transform(list_pr, 1); return TransformReadOrWrite(read_only_or_read_write); diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp index 50fe94fc3902..9635f18853b4 100644 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -3,22 +3,22 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformUseStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto use_target = transformer.Transform(list_pr, 1); return TransformUseStatement(use_target); } -QualifiedName PEGTransformerFactory::TransformUseTargetInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +QualifiedName PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); return TransformUseTarget(transformer, choice_pr.GetResult()); } -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; auto reserved_schema_name = list_pr.Child(2).identifier; @@ -33,8 +33,7 @@ QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal( return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); } -string PEGTransformerFactory::TransformDotIdentifierInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +string PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto identifier = list_pr.Child(1).identifier; return TransformDotIdentifier(identifier); diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index 65c8ddbbd23c..2ba54e821abf 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -27,8 +27,7 @@ QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transfor } // UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(string catalog_name, - string reserved_schema_name, +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, vector dot_identifier) { if (!dot_identifier.empty()) { throw ParserException("Expected \"USE database\" or \"USE database.schema\""); From 24f763f23f9cca6d1028d5fa265764a92323e868 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 7 May 2026 18:10:37 +0200 Subject: [PATCH 19/56] Bump ci From 5ac4e26adb0bcaea6e750f238db1a0de9cd89769 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 09:50:35 +0200 Subject: [PATCH 20/56] Bump CI again From 3fa616ed113298f73b53c87bba4bec41b9e60828 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 09:52:22 +0200 Subject: [PATCH 21/56] Bump CI --- src/parser/peg/transformer/peg_transformer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/peg/transformer/peg_transformer.cpp b/src/parser/peg/transformer/peg_transformer.cpp index 954c646cbe9a..033ff81464c6 100644 --- a/src/parser/peg/transformer/peg_transformer.cpp +++ b/src/parser/peg/transformer/peg_transformer.cpp @@ -63,7 +63,6 @@ void PEGTransformer::SetParamCount(idx_t new_count) { unique_ptr PEGTransformer::GenerateCreateEnumStmt(unique_ptr entry) { auto result = make_uniq(); auto info = make_uniq(); - info->temporary = true; info->internal = false; info->catalog = INVALID_CATALOG; From 06471fbbce24f1db7cccff11bfa877204f1038cc Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 10:41:23 +0200 Subject: [PATCH 22/56] Remove regex node --- scripts/parser/gen_transformer_v2.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 79495e6cb5e3..bc9b85868698 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -33,16 +33,6 @@ class ReferenceNode(GrammarNode): name: str -@dataclass -class RegexNode(GrammarNode): - """Regex or character-class match ([...] or <...>). - In practice, rules that contain raw regex patterns are overridden in the - matcher via AddRuleOverride() and therefore never exercise this path at - runtime. We keep the node so the AST parser stays complete.""" - - pattern: str - - @dataclass class ParensNode(GrammarNode): """Parens(D) <- '(' D ')'. Anonymous ListMatcher; child[1] is D's result. @@ -169,8 +159,6 @@ def parse_atom(): return LiteralNode(consume().text) elif t.type == PEGTokenType.REFERENCE: return ReferenceNode(consume().text) - elif t.type == PEGTokenType.REGEX: - return RegexNode(consume().text) elif t.type == PEGTokenType.FUNCTION_CALL: # inline_grammar already consumed the '(' and bumped bracket_count func_name = consume().text From 336acca76cc3c03bca69cb6d91fe2cc633ba3521 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 10:47:41 +0200 Subject: [PATCH 23/56] remove negation node --- scripts/parser/gen_transformer_v2.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index bc9b85868698..6552bc207c40 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -86,11 +86,6 @@ class RepeatNode(GrammarNode): min_count: int -@dataclass -class NegationNode(GrammarNode): - """Negative lookahead !A.""" - - child: GrammarNode def tokens_to_ast(tokens): @@ -176,9 +171,6 @@ def parse_atom(): if peek() and peek().type == PEGTokenType.OPERATOR and peek().text == ')': consume() return inner # anonymous group - transparent node - elif t.type == PEGTokenType.OPERATOR and t.text == '!': - consume() - return NegationNode(parse_atom()) else: raise Exception(f"Unexpected token in grammar AST parse: {t}") From 7193b722aa1e5447fe721b821b2eecd68e99da56 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 11:00:20 +0200 Subject: [PATCH 24/56] Make * and + more like matcher --- scripts/parser/gen_transformer_v2.py | 58 +++++++++++++------ .../generated/transform_use_generated.cpp | 2 +- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 6552bc207c40..585d4b1e36d3 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -83,9 +83,6 @@ class RepeatNode(GrammarNode): """Repeat match A+ (min=1) or A* (min=0). Corresponds to RepeatMatcher.""" child: GrammarNode - min_count: int - - def tokens_to_ast(tokens): @@ -139,9 +136,9 @@ def parse_term(): if op == '?': return OptionalNode(node) elif op == '*': - return RepeatNode(node, 0) + return OptionalNode(RepeatNode(node)) elif op == '+': - return RepeatNode(node, 1) + return RepeatNode(node) else: raise Exception("Unknown operator '{}'".format(op)) return node @@ -523,9 +520,11 @@ def _classify_parens_list(inner_list_node, idx, rule_to_type): return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) -def _classify_star_repeat(node, idx, rule_to_type): +def _classify_repeat(node, idx, rule_to_type, optional): """ - OPERATOR '*' -> Optional(Repeat(child)) -> OptionalParseResult wrapping RepeatParseResult. + Shared helper for A* and A+. + A* -> OptionalNode(RepeatNode(A)) -> OptionalParseResult wrapping RepeatParseResult. + A+ -> RepeatNode(A) -> RepeatParseResult directly (guaranteed >= 1 element). Only supported when the repeated element is a plain reference with a known type. Produces vector. """ @@ -536,19 +535,36 @@ def _classify_star_repeat(node, idx, rule_to_type): return None child_type = rule_to_type[ref_name] var_name = to_snake_case(ref_name) - lines = [ - f"\tauto &{var_name}_opt = list_pr.Child({idx});", - f"\tvector<{child_type}> {var_name};", - f"\tif ({var_name}_opt.HasResult()) {{", - f"\t\tauto &{var_name}_repeat = {var_name}_opt.GetResult().Cast();", - f"\t\tfor (auto {var_name}_item : {var_name}_repeat.GetChildren()) {{", - f"\t\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", - f"\t\t}}", - f"\t}}", - ] + if optional: + lines = [ + f"\tauto &{var_name}_opt = list_pr.Child({idx});", + f"\tvector<{child_type}> {var_name};", + f"\tif ({var_name}_opt.HasResult()) {{", + f"\t\tauto &{var_name}_repeat = {var_name}_opt.GetResult().Cast();", + f"\t\tfor (auto &{var_name}_item : {var_name}_repeat.GetChildren()) {{", + f"\t\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t\t}}", + f"\t}}", + ] + else: + lines = [ + f"\tauto &{var_name}_repeat = list_pr.Child({idx});", + f"\tvector<{child_type}> {var_name};", + f"\tfor (auto &{var_name}_item : {var_name}_repeat.GetChildren()) {{", + f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t}}", + ] return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) +def _classify_star_repeat(node, idx, rule_to_type): + return _classify_repeat(node, idx, rule_to_type, optional=True) + + +def _classify_plus_repeat(node, idx, rule_to_type): + return _classify_repeat(node, idx, rule_to_type, optional=False) + + def classify_sequence_element(child, idx, rule_to_type, excluded_rules): """ Classify one element of a SequenceNode. @@ -565,9 +581,13 @@ def classify_sequence_element(child, idx, rule_to_type, excluded_rules): return _classify_literal(idx) if isinstance(inner, ReferenceNode): return _classify_optional_reference(inner.name, idx, rule_to_type, excluded_rules) + if isinstance(inner, RepeatNode): + # A* is represented as OptionalNode(RepeatNode(A)), matching the runtime + # OptionalMatcher(RepeatMatcher(A)) structure. Delegate to star-repeat classifier. + return _classify_star_repeat(inner, idx, rule_to_type) return None # OptionalNode(ParensNode) etc. - deferred - if isinstance(child, RepeatNode) and child.min_count == 0: - return _classify_star_repeat(child, idx, rule_to_type) + if isinstance(child, RepeatNode): + return _classify_plus_repeat(child, idx, rule_to_type) if isinstance(child, ParensNode): if isinstance(child.inner, ListMacroNode): return _classify_parens_list(child.inner, idx, rule_to_type) diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp index 9635f18853b4..8a312ba2d927 100644 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -26,7 +26,7 @@ QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEG vector dot_identifier; if (dot_identifier_opt.HasResult()) { auto &dot_identifier_repeat = dot_identifier_opt.GetResult().Cast(); - for (auto dot_identifier_item : dot_identifier_repeat.GetChildren()) { + for (auto &dot_identifier_item : dot_identifier_repeat.GetChildren()) { dot_identifier.push_back(transformer.Transform(dot_identifier_item)); } } From fe0f62bf7927e82ac2fe3ffbd74bc75deea051e5 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 12:38:15 +0200 Subject: [PATCH 25/56] Dead code removal --- scripts/parser/gen_transformer_v2.py | 90 +++------------------------- 1 file changed, 8 insertions(+), 82 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 585d4b1e36d3..4031427d0886 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -93,10 +93,9 @@ def tokens_to_ast(tokens): choice = sequence ('/' sequence)* sequence = term+ term = atom ('?' | '*' | '+')? - atom = LITERAL | REFERENCE | REGEX + atom = LITERAL | REFERENCE | FUNCTION_CALL choice ')' | '(' choice ')' - | '!' atom """ pos = [0] @@ -211,20 +210,6 @@ def rule_to_ast(rule): 'CopyOptionName', } -# Rules overridden with non-identifier special matchers (kept separate so -# callers can distinguish the parse-result type if needed in the future). -NUMBER_LITERAL_OVERRIDE_RULES = {'NumberLiteral'} -STRING_LITERAL_OVERRIDE_RULES = {'StringLiteral'} -OPERATOR_LITERAL_OVERRIDE_RULES = {'OperatorLiteral'} - -# Union of all override rules for quick membership tests. -ALL_OVERRIDE_RULES = ( - IDENTIFIER_OVERRIDE_RULES - | NUMBER_LITERAL_OVERRIDE_RULES - | STRING_LITERAL_OVERRIDE_RULES - | OPERATOR_LITERAL_OVERRIDE_RULES -) - scripts_dir = Path(__file__).parent.parent src_dir = scripts_dir.parent / 'src' @@ -243,16 +228,6 @@ def to_snake_case(name): return re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s1).lower() -def is_simple_rule(rule): - """True if all tokens are LITERAL or REFERENCE (no operators, choices, groups).""" - return all(t.type in (PEGTokenType.LITERAL, PEGTokenType.REFERENCE) for t in rule.tokens) - - -def get_semantic_children(rule): - """Returns [(index, rule_name)] for every REFERENCE token in the rule.""" - return [(i, tok.text) for i, tok in enumerate(rule.tokens) if tok.type == PEGTokenType.REFERENCE] - - def generate_internal_declaration(rule_name, return_type): return ( f"\tstatic {return_type} Transform{rule_name}Internal" @@ -260,36 +235,6 @@ def generate_internal_declaration(rule_name, return_type): ) -def generate_body_declaration(rule_name, return_type, semantic_children, rule_to_type): - params = ", ".join(f"{rule_to_type[name]} {to_snake_case(name)}" for _, name in semantic_children) - return f"\tstatic {return_type} Transform{rule_name}({params});\n" - - -def generate_internal_wrapper(rule_name, return_type, semantic_children, rule_to_type): - """Generates the Internal .cpp function that extracts children and calls the body.""" - arg_lines = [] - arg_names = [] - for idx, child_name in semantic_children: - var = to_snake_case(child_name) - if child_name in IDENTIFIER_OVERRIDE_RULES: - arg_lines.append(f"\tauto {var} = list_pr.Child({idx}).identifier;") - else: - child_type = rule_to_type[child_name] - arg_lines.append(f"\tauto {var} = transformer.Transform<{child_type}>(list_pr, {idx});") - arg_names.append(var) - - body = [] - if arg_lines: - body.append("\tauto &list_pr = parse_result.Cast();") - body.extend(arg_lines) - body.append(f"\treturn Transform{rule_name}({', '.join(arg_names)});") - - return ( - f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" - f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + "\n".join(body) + "\n}\n" - ) - - def generate_registration(rule_name): return f'Register("{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal);\n' @@ -617,14 +562,6 @@ def classify_sequence_elements(children, rule_to_type, excluded_rules): # --------------------------------------------------------------------------- -def is_auto_sequence_ast(ast, rule_to_type, excluded_rules): - """True if ast is a SequenceNode whose every element can be classified.""" - return ( - isinstance(ast, SequenceNode) - and classify_sequence_elements(ast.children, rule_to_type, excluded_rules) is not None - ) - - def generate_sequence_body_decl(rule_name, return_type, elements): """Declaration for the hand-written body that receives extracted typed args.""" params = ", ".join(f"{e.cpp_type} {e.var_name}" for e in elements if not e.skip) @@ -681,18 +618,6 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): skipped.append((rule_name, "no return type in grammar_types.yml")) continue - if is_simple_rule(rule): - children = get_semantic_children(rule) - unknown = [name for _, name in children if name not in rule_to_type] - if unknown: - skipped.append((rule_name, f"unknown child types: {unknown}")) - continue - declarations.append(generate_internal_declaration(rule_name, return_type)) - declarations.append(generate_body_declaration(rule_name, return_type, children, rule_to_type)) - implementations.append(generate_internal_wrapper(rule_name, return_type, children, rule_to_type)) - registrations.append(generate_registration(rule_name)) - continue - try: ast = rule_to_ast(rule) except Exception as e: @@ -723,13 +648,14 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): ) continue - if is_auto_sequence_ast(ast, rule_to_type, excluded_rules): + if isinstance(ast, SequenceNode): elements = classify_sequence_elements(ast.children, rule_to_type, excluded_rules) - declarations.append(generate_internal_declaration(rule_name, return_type)) - declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) - implementations.append(generate_sequence_internal(rule_name, return_type, elements)) - registrations.append(generate_registration(rule_name)) - continue + if elements is not None: + declarations.append(generate_internal_declaration(rule_name, return_type)) + declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) + implementations.append(generate_sequence_internal(rule_name, return_type, elements)) + registrations.append(generate_registration(rule_name)) + continue skipped.append((rule_name, "complex rule (has operators/choices/groups)")) From dab343536bbb5155d0dc4009ab1ce44e31501ec4 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 12:45:19 +0200 Subject: [PATCH 26/56] Update docstring --- scripts/parser/gen_transformer_v2.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 4031427d0886..f9e8cb38eceb 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -80,7 +80,9 @@ class OptionalNode(GrammarNode): @dataclass class RepeatNode(GrammarNode): - """Repeat match A+ (min=1) or A* (min=0). Corresponds to RepeatMatcher.""" + """Repeat match A+ (one or more). Corresponds to RepeatMatcher. + A* (zero or more) is represented as OptionalNode(RepeatNode), matching + the runtime OptionalMatcher(RepeatMatcher) structure.""" child: GrammarNode @@ -251,11 +253,11 @@ def is_pure_reference_choice(ast): def classify_choice_alternatives(alternatives, rule_to_type): """ - Split choice alternatives into two groups: + Split choice alternatives into three groups: - transformer_alts: names with a registered transformer (in rule_to_type) - identifier_alts: names that are identifier overrides (produce IdentifierParseResult) + - unknown_alts: neither registered nor known overrides -- need manual handling Returns (transformer_alts, identifier_alts, unknown_alts). - unknown_alts are neither registered nor known overrides - these need manual handling. """ transformer_alts = [] identifier_alts = [] @@ -312,11 +314,16 @@ def generate_choice_body_declaration(rule_name, return_type): # Sequence-element classification # # Mirrors the per-token-type dispatch inside MatcherFactory::CreateMatcher() -# in matcher.cpp. Each helper handles exactly one matcher/parse-result kind: +# in matcher.cpp. Each helper handles one matcher/parse-result kind: # -# _classify_literal <- LITERAL -> KeywordMatcher -> KeywordParseResult (skip) -# _classify_reference <- REFERENCE -> named rule OR identifier override -# _classify_star_repeat <- OPERATOR* -> Optional(Repeat) -> OptionalParseResult(RepeatParseResult) +# _classify_literal LiteralNode -> KeywordParseResult (skip) +# _classify_reference ReferenceNode -> IdentifierParseResult OR Transform +# _classify_optional_reference OptionalNode(Ref) -> optional identifier OR TransformOptional +# _classify_star_repeat OptionalNode(Rep) -> OptionalParseResult(RepeatParseResult) vector +# _classify_plus_repeat RepeatNode -> RepeatParseResult vector +# _classify_parens ParensNode(Ref) -> ExtractResultFromParens T +# _classify_list_macro ListMacroNode(Ref)-> ExtractParseResultsFromList vector +# _classify_parens_list ParensNode(List) -> ExtractParseResultsFromList(ExtractResultFromParens) vector # # classify_sequence_element() is the top-level dispatch (= the switch in CreateMatcher). # classify_sequence_elements() iterates all children of a SequenceNode (= the token loop). @@ -570,9 +577,9 @@ def generate_sequence_body_decl(rule_name, return_type, elements): def generate_sequence_internal(rule_name, return_type, elements): """ - Internal wrapper that casts to ListParseResult, extracts each element, - then calls the hand-written body. Mirrors what ListMatcher::MatchParseResult - does at runtime but in the code-generation direction. + Generate the Internal wrapper for a sequence rule. + Casts parse_result to ListParseResult, extracts each semantic element + into a typed local variable, then calls the hand-written body with those args. """ semantic = [e for e in elements if not e.skip] has_semantic_elements = len(semantic) > 0 @@ -746,6 +753,7 @@ def print_manual_steps(registrations, gram_stem): def process_gram_file(gram_filename, rule_to_type, excluded_rules): + """Parse a .gram file and classify all its rules into a GramFileResult.""" gram_stem = gram_filename.removesuffix('.gram') gram_path = statements_dir / gram_filename with open(gram_path, 'r') as f: From 486298136bdbda92480ebb169c0134672c9d5205 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 12:57:23 +0200 Subject: [PATCH 27/56] Add transformer generation to build_grammar.sh --- scripts/parser/build_grammar.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/parser/build_grammar.sh b/scripts/parser/build_grammar.sh index 39dd4c992bec..1bd114404e51 100755 --- a/scripts/parser/build_grammar.sh +++ b/scripts/parser/build_grammar.sh @@ -12,4 +12,17 @@ fi python "$GRAMMAR_FILE" --grammar-file python "$GRAMMAR_FILE" -echo "Successfully build grammar files" \ No newline at end of file +echo "Successfully built grammar files" + +# Generate Internal transformer wrappers for auto-generatable grammar rules +GEN_TRANSFORMER_FILE="scripts/parser/gen_transformer_v2.py" +if [[ ! -f "$GEN_TRANSFORMER_FILE" ]]; then + echo "Error: $GEN_TRANSFORMER_FILE not found" + exit 1 +fi + +python "$GEN_TRANSFORMER_FILE" --write + +echo "Successfully generated transformer wrappers" + +make format-fix \ No newline at end of file From bb6c6ba91999c992823846d9d35d8c4f1e550422 Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Fri, 8 May 2026 13:57:39 +0200 Subject: [PATCH 28/56] Move format-venv to .cache/ because build/ is often nuked --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a3c0aaaa6954..5a82f9ca64f4 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) PROJ_DIR := $(dir $(MKFILE_PATH)) PYTHON ?= python3 -FORMAT_VENV ?= build/format-venv +FORMAT_VENV ?= .cache/format-venv FORMAT_PYTHON := $(FORMAT_VENV)/bin/python FORMAT_SETUP_DEPS := format_venv From 57083d478c5ff9810bc2b2f4f5b3498dedb8154c Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Fri, 8 May 2026 13:03:38 +0200 Subject: [PATCH 29/56] Ignore four more slow hash-zero tests --- test/configs/hash_zero.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/configs/hash_zero.json b/test/configs/hash_zero.json index dee314bf7f2d..a42c61e6c32c 100644 --- a/test/configs/hash_zero.json +++ b/test/configs/hash_zero.json @@ -24,8 +24,12 @@ "test/issues/general/test_21431.test", "test/sql/join/inner/test_prefix_range_filter_pushdown.test", "test/sql/join/inner/test_prefix_range_filter_pushdown_strinline.test", + "test/sql/join/left_outer/unique_left_join.test", + "test/sql/limit/streaming_limit_pipeline_flush.test", + "test/sql/settings/max_execution_time.test", "test/sql/settings/operator_memory_limit.test", - "test/sql/table_function/duckdb_eviction_queues.test" + "test/sql/table_function/duckdb_eviction_queues.test", + "test/sql/pragma/profiling/test_custom_profiling_total_memory_allocated.test" ] }, { From 10976d78a1fe436450ba5022e5118695c12fa8fe Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Fri, 8 May 2026 13:47:11 +0200 Subject: [PATCH 30/56] Group bwc test jobs by minor version, fix test-utils commit --- .github/config/extensions/test-utils.cmake | 4 +- .github/workflows/NightlyTests.yml | 96 ++++++++++++---------- test/bwc/list_versions.py | 30 +++++++ test/bwc/runner.py | 21 +---- test/bwc/update_cache.py | 24 ++---- test/bwc/utils/version_list.py | 51 ++++++++++++ 6 files changed, 145 insertions(+), 81 deletions(-) create mode 100644 test/bwc/list_versions.py create mode 100644 test/bwc/utils/version_list.py diff --git a/.github/config/extensions/test-utils.cmake b/.github/config/extensions/test-utils.cmake index 6cf7d930d3a2..94e63d661e0b 100644 --- a/.github/config/extensions/test-utils.cmake +++ b/.github/config/extensions/test-utils.cmake @@ -1,6 +1,8 @@ duckdb_extension_load(test_utils GIT_URL https://github.com/duckdb/bwc-test-utils - GIT_TAG 7074208283523a3af8b5ddd1c890a03abdba3b9b + # Use the commit before "Update extensions" (that contains the binaries of + # the commit before that). + GIT_TAG 5b9c7334949c47cdf69ce11c151139c4c88aa7f8 # For local dev: # SOURCE_DIR "${EXTENSION_CONFIG_BASE_DIR}/../../../../test-utils" ) diff --git a/.github/workflows/NightlyTests.yml b/.github/workflows/NightlyTests.yml index 202de16eb055..a148ee25afef 100644 --- a/.github/workflows/NightlyTests.yml +++ b/.github/workflows/NightlyTests.yml @@ -626,14 +626,23 @@ jobs: run: make unittest_relassert T="--track-runtime 30 --batch-timeout 120 --test-config=test/configs/hash_zero.json" - bwc_build: + bwc-build: name: Build DuckDB runs-on: ubuntu-latest + outputs: + groups: ${{ steps.resolve.outputs.groups }} steps: - uses: actions/checkout@v6 with: fetch-depth: 0 + - name: Resolve versions + id: resolve + run: | + groups_json=$(python3 test/bwc/list_versions.py --groups-json) + echo "groups=${groups_json}" >> "$GITHUB_OUTPUT" + echo "Resolved BWC version groups: ${groups_json}" + - name: Install run: python3 scripts/ci/retry.py -- make toolsci @@ -653,28 +662,13 @@ jobs: path: build/release/duckdb bwc-test: - name: BWC Test (DuckDB ${{ matrix.old_duckdb_version }}) + name: BWC Test (DuckDB ${{ matrix.series.group }}) runs-on: ubuntu-latest - needs: bwc_build + needs: bwc-build strategy: fail-fast: false matrix: - old_duckdb_version: - - v1.1.0 - - v1.1.1 - - v1.1.2 - - v1.1.3 - - v1.2.0 - - v1.2.1 - - v1.2.2 - - v1.3.0 - - v1.3.1 - - v1.3.2 - - v1.4.0 - - v1.4.1 - - v1.4.2 - - v1.4.3 - - v1.4.4 + series: ${{ fromJSON(needs.bwc-build.outputs.groups) }} steps: - uses: actions/checkout@v6 with: @@ -696,46 +690,64 @@ jobs: - name: Install Python dependencies run: python3 scripts/ci/retry.py -- pip install duckdb git+https://github.com/duckdb/duckdb-sqllogictest-python.git - - name: Download BWC cache - if: ${{ vars.ENABLE_BWC_CACHE_DOWNLOAD == 'true' }} + - name: Run BWC series run: | - VERSION=${{ matrix.old_duckdb_version }} - CACHE_ARCHIVE="runtime_${VERSION}.tar.gz" + versions='${{ toJson(matrix.series.versions) }}' + mapfile -t VERSION_LIST < <(python3 -c 'import json,sys; [print(v) for v in json.loads(sys.argv[1])]' "$versions") + FAILED=0 + FAILED_VERSIONS="" RUNTIME_DIR="duckdb_unittest_tempdir/bwc/runtime" mkdir -p "${RUNTIME_DIR}" + mkdir -p duckdb_unittest_tempdir/bwc/cache + + for VERSION in "${VERSION_LIST[@]}"; do + echo "============================================================" + echo "Running BWC for ${VERSION} (group: ${{ matrix.series.group }})" + echo "============================================================" + + if [ "${{ vars.ENABLE_BWC_CACHE_DOWNLOAD }}" = "true" ]; then + CACHE_ARCHIVE="runtime_${VERSION}.tar.gz" + DOWNLOAD_URL="https://raw.githubusercontent.com/duckdb/bwc-test-utils/main/cache/${CACHE_ARCHIVE}" + HTTP_CODE=$(curl -sL -w '%{http_code}' -o "/tmp/${CACHE_ARCHIVE}" "${DOWNLOAD_URL}" || true) + if [ "${HTTP_CODE}" = "200" ]; then + tar xzf "/tmp/${CACHE_ARCHIVE}" -C "${RUNTIME_DIR}" + CACHED=$(find "${RUNTIME_DIR}/${VERSION}" -name "*.plan.bin" | wc -l) + echo "Extracted cache for ${VERSION}: ${CACHED} cached test plans" + else + echo "Failed to download cache for ${VERSION} (HTTP ${HTTP_CODE})" + FAILED=1 + FAILED_VERSIONS="${FAILED_VERSIONS} ${VERSION}(cache)" + fi + fi - # Download the cache archive from test-utils repo - DOWNLOAD_URL="https://raw.githubusercontent.com/duckdb/bwc-test-utils/main/cache/${CACHE_ARCHIVE}" - HTTP_CODE=$(curl -sL -w '%{http_code}' -o "/tmp/${CACHE_ARCHIVE}" "${DOWNLOAD_URL}" || true) + if ! python3.11 test/bwc/runner.py --old_duckdb_version="${VERSION}"; then + FAILED=1 + FAILED_VERSIONS="${FAILED_VERSIONS} ${VERSION}(tests)" + fi - if [ "${HTTP_CODE}" != "200" ]; then - echo "Failed to download cache for ${VERSION} (HTTP ${HTTP_CODE})" + if ! python3.11 test/bwc/export_cache.py --version "${VERSION}" --output-dir duckdb_unittest_tempdir/bwc/cache; then + FAILED=1 + FAILED_VERSIONS="${FAILED_VERSIONS} ${VERSION}(export)" + fi + done + + if [ "$FAILED" -ne 0 ]; then + echo "BWC series had failures:${FAILED_VERSIONS}" exit 1 fi - tar xzf "/tmp/${CACHE_ARCHIVE}" -C "${RUNTIME_DIR}" - CACHED=$(find "${RUNTIME_DIR}/${VERSION}" -name "*.plan.bin" | wc -l) - echo "Extracted cache for ${VERSION}: ${CACHED} cached test plans" - - - name: Run BWC tests - run: python3.11 test/bwc/runner.py --old_duckdb_version=${{ matrix.old_duckdb_version }} - - - name: Export BWC cache - if: always() - run: python3.11 test/bwc/export_cache.py --version ${{ matrix.old_duckdb_version }} --output-dir duckdb_unittest_tempdir/bwc/cache - - name: Upload BWC cache if: always() uses: actions/upload-artifact@v7 with: - name: bwc-cache-${{ matrix.old_duckdb_version }} - path: duckdb_unittest_tempdir/bwc/cache/runtime_${{ matrix.old_duckdb_version }}.tar.gz + name: bwc-cache-${{ matrix.series.group }} + path: duckdb_unittest_tempdir/bwc/cache/runtime_*.tar.gz - name: Upload test artifacts if: always() uses: actions/upload-artifact@v7 with: - name: bwc-test-results-${{ matrix.old_duckdb_version }} + name: bwc-test-results-${{ matrix.series.group }} path: | duckdb_unittest_tempdir/bwc/tests_summary_*.txt duckdb_unittest_tempdir/bwc/reports/test_report_*.duckdb diff --git a/test/bwc/list_versions.py b/test/bwc/list_versions.py new file mode 100644 index 000000000000..538ab7bd2a0e --- /dev/null +++ b/test/bwc/list_versions.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +import argparse +import json +from os.path import abspath, dirname +import sys + +sys.path.insert(0, dirname(abspath(__file__))) +from utils.version_list import list_supported_duckdb_version_groups, list_supported_duckdb_versions + + +def main(): + parser = argparse.ArgumentParser(description="List supported DuckDB versions for BWC") + parser.add_argument("--json", action="store_true", help="Output JSON array") + parser.add_argument("--groups-json", action="store_true", help="Output JSON array grouped by vX.Y series") + args = parser.parse_args() + + if args.groups_json: + print(json.dumps(list_supported_duckdb_version_groups())) + return + + versions = list_supported_duckdb_versions() + if args.json: + print(json.dumps(versions)) + else: + for version in versions: + print(version) + + +if __name__ == "__main__": + main() diff --git a/test/bwc/runner.py b/test/bwc/runner.py index 14419a6e2de1..15086c7cf4e7 100644 --- a/test/bwc/runner.py +++ b/test/bwc/runner.py @@ -5,6 +5,7 @@ from utils.test_files_parser import load_test_files from utils.test_report import TestReport from utils.logger import make_logger +from utils.version_list import list_supported_duckdb_versions import time from concurrent.futures import ThreadPoolExecutor from threading import Lock, Event @@ -317,25 +318,7 @@ def cleanup_runtime_dir(bwc_tests_base_dir, dry_run=True): if __name__ == "__main__": supported_duckdb_versions = ( - [args.old_duckdb_version] - if args.old_duckdb_version - else [ - "v1.1.0", - "v1.1.2", - "v1.1.1", - "v1.2.0", - "v1.1.3", - "v1.2.2", - "v1.2.1", - "v1.3.0", - "v1.3.1", - "v1.3.2", - "v1.4.0", - "v1.4.1", - "v1.4.2", - "v1.4.3", - "v1.4.4", - ] + [args.old_duckdb_version] if args.old_duckdb_version else list_supported_duckdb_versions() ) duckdb_root_dir = dirname(dirname(dirname(abspath(__file__)))) diff --git a/test/bwc/update_cache.py b/test/bwc/update_cache.py index 2a5c37161c7e..c3103e4ff9d5 100644 --- a/test/bwc/update_cache.py +++ b/test/bwc/update_cache.py @@ -17,6 +17,10 @@ import subprocess import sys import tempfile +from os.path import abspath, dirname + +sys.path.insert(0, dirname(abspath(__file__))) +from utils.version_list import list_supported_duckdb_versions parser = argparse.ArgumentParser(description='Update BWC test cache in test-utils from CI artifacts') parser.add_argument( @@ -29,24 +33,6 @@ args = parser.parse_args() -SUPPORTED_VERSIONS = [ - "v1.1.0", - "v1.1.1", - "v1.1.2", - "v1.1.3", - "v1.2.0", - "v1.2.1", - "v1.2.2", - "v1.3.0", - "v1.3.1", - "v1.3.2", - "v1.4.0", - "v1.4.1", - "v1.4.2", - "v1.4.3", - "v1.4.4", -] - def download_artifact(run_id, artifact_name, dest_dir, repo): """Download a single artifact from a GitHub Actions run.""" @@ -69,7 +55,7 @@ def download_artifact(run_id, artifact_name, dest_dir, repo): if args.version: versions = [args.version] else: - versions = SUPPORTED_VERSIONS + versions = list_supported_duckdb_versions() print(f"Downloading cache artifacts from run {args.run_id} ({args.repo})") print(f"Updating {len(versions)} version(s) in {cache_dir}\n") diff --git a/test/bwc/utils/version_list.py b/test/bwc/utils/version_list.py new file mode 100644 index 000000000000..8abbc3949820 --- /dev/null +++ b/test/bwc/utils/version_list.py @@ -0,0 +1,51 @@ +import re +import subprocess +from collections import OrderedDict + + +SEMVER_TAG_RE = re.compile(r"^v(\d+)\.(\d+)\.(\d+)$") + + +def parse_version_tag(tag): + match = SEMVER_TAG_RE.match(tag) + if not match: + return None + return tuple(int(part) for part in match.groups()) + + +def list_supported_duckdb_versions(min_version="v1.1.0"): + min_tuple = parse_version_tag(min_version) + if min_tuple is None: + raise ValueError(f"Invalid minimum version tag: {min_version}") + + result = subprocess.run( + ["git", "tag", "--list", "v*"], + capture_output=True, + text=True, + check=True, + ) + raw_tags = [line.strip() for line in result.stdout.splitlines() if line.strip()] + + versions = [] + for tag in raw_tags: + parsed = parse_version_tag(tag) + if parsed is None: + continue + if parsed >= min_tuple: + versions.append((parsed, tag)) + + versions.sort(key=lambda entry: entry[0]) + return [tag for _, tag in versions] + + +def list_supported_duckdb_version_groups(min_version="v1.1.0"): + grouped = OrderedDict() + for version in list_supported_duckdb_versions(min_version=min_version): + parsed = parse_version_tag(version) + assert parsed is not None + major, minor, _ = parsed + group = f"v{major}.{minor}" + if group not in grouped: + grouped[group] = [] + grouped[group].append(version) + return [{"group": group, "versions": versions} for group, versions in grouped.items()] From bd3a4b68249c6062c6af1577f25a1a210b717c73 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 14:27:38 +0200 Subject: [PATCH 31/56] Clean up --- scripts/parser/gen_transformer_v2.py | 75 ++++++++++++++-------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index f9e8cb38eceb..e7a26371c1fb 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -334,16 +334,15 @@ def generate_choice_body_declaration(rule_name, return_type): class SeqElement: """One classified position in a sequence rule.""" - idx: int skip: bool # True for LiteralNode - no semantic value var_name: str = "" cpp_type: str = "" extraction_lines: List[str] = field(default_factory=list) -def _classify_literal(idx): +def _classify_literal(): """LITERAL token -> KeywordMatcher -> KeywordParseResult. No semantic value.""" - return SeqElement(idx=idx, skip=True) + return SeqElement(skip=True) def _classify_reference(name, idx, rule_to_type, excluded_rules): @@ -357,14 +356,14 @@ def _classify_reference(name, idx, rule_to_type, excluded_rules): if name in IDENTIFIER_OVERRIDE_RULES: var_name = to_snake_case(name) lines = [f"\tauto {var_name} = list_pr.Child({idx}).identifier;"] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in excluded_rules: - return _classify_literal(idx) + return _classify_literal() if name in rule_to_type: cpp_type = rule_to_type[name] var_name = to_snake_case(name) lines = [f"\tauto {var_name} = transformer.Transform<{cpp_type}>(list_pr, {idx});"] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) return None @@ -372,12 +371,12 @@ def _classify_optional_reference(name, idx, rule_to_type, excluded_rules): """ OptionalNode(ReferenceNode) -> OptionalMatcher wrapping a named rule. Priority order matches _classify_reference: - 1. excluded_rules -> keyword-only optional (Transaction?) -> skip - 2. IDENTIFIER_OVERRIDE_RULES -> optional identifier, extracted via HasResult() - 3. rule_to_type -> optional typed rule, extracted via TransformOptional + 1. excluded_rules -> keyword-only optional (Transaction?) -> skip + 2. IDENTIFIER_OVERRIDE_RULES -> optional identifier, extracted via HasResult() + 3. rule_to_type -> optional typed rule, extracted via TransformOptional """ if name in excluded_rules: - return _classify_literal(idx) + return _classify_literal() var_name = to_snake_case(name) if name in IDENTIFIER_OVERRIDE_RULES: lines = [ @@ -387,14 +386,14 @@ def _classify_optional_reference(name, idx, rule_to_type, excluded_rules): f"\t\t{var_name} = {var_name}_opt.GetResult().Cast().identifier;", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in rule_to_type: cpp_type = rule_to_type[name] lines = [ f"\t{cpp_type} {var_name} {{}};", f"\ttransformer.TransformOptional(list_pr, {idx}, {var_name});", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) return None @@ -413,14 +412,14 @@ def _classify_parens(inner_node, idx, rule_to_type): f"\tauto {var_name} = ExtractResultFromParens(list_pr.GetChild({idx}))" f".Cast().identifier;", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in rule_to_type: cpp_type = rule_to_type[name] lines = [ f"\tauto {var_name} = transformer.Transform<{cpp_type}>" f"(ExtractResultFromParens(list_pr.GetChild({idx})));", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) return None @@ -445,7 +444,7 @@ def _classify_list_macro(inner_node, idx, rule_to_type): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) def _classify_parens_list(inner_list_node, idx, rule_to_type): @@ -469,7 +468,7 @@ def _classify_parens_list(inner_list_node, idx, rule_to_type): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) def _classify_repeat(node, idx, rule_to_type, optional): @@ -506,7 +505,7 @@ def _classify_repeat(node, idx, rule_to_type, optional): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(idx=idx, skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) def _classify_star_repeat(node, idx, rule_to_type): @@ -524,13 +523,13 @@ def classify_sequence_element(child, idx, rule_to_type, excluded_rules): Returns SeqElement or None if the element cannot be auto-generated. """ if isinstance(child, LiteralNode): - return _classify_literal(idx) + return _classify_literal() if isinstance(child, ReferenceNode): return _classify_reference(child.name, idx, rule_to_type, excluded_rules) if isinstance(child, OptionalNode): inner = child.child if isinstance(inner, LiteralNode): - return _classify_literal(idx) + return _classify_literal() if isinstance(inner, ReferenceNode): return _classify_optional_reference(inner.name, idx, rule_to_type, excluded_rules) if isinstance(inner, RepeatNode): @@ -632,7 +631,7 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): continue if is_pure_reference_choice(ast): - transformer_alts, identifier_alts, unknown_alts = classify_choice_alternatives( + _, identifier_alts, unknown_alts = classify_choice_alternatives( ast.alternatives, rule_to_type ) if unknown_alts: @@ -669,27 +668,27 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): return GramFileResult(gram_stem, declarations, implementations, registrations, skipped, manual_bodies) -def print_output(declarations, implementations, registrations, skipped, manual_bodies, gram_stem): - if skipped: +def print_output(result: GramFileResult): + if result.skipped: print("=== SKIPPED (nothing generated) ===") - for rule_name, reason in skipped: + for rule_name, reason in result.skipped: print(f" {rule_name}: {reason}") print() - if manual_bodies: + if result.manual_bodies: print("=== MANUAL BODY NEEDED (Internal generated, body must be hand-written) ===") - for rule_name, reason in manual_bodies: + for rule_name, reason in result.manual_bodies: print(f" {rule_name}: {reason}") print() print("=== DECLARATIONS (peg_transformer_generated.hpp) ===") - print("".join(declarations)) + print("".join(result.declarations)) - print(f"=== IMPLEMENTATION (generated/transform_{gram_stem}_generated.cpp) ===") - print("".join(implementations)) + print(f"=== IMPLEMENTATION (generated/transform_{result.gram_stem}_generated.cpp) ===") + print("".join(result.implementations)) - print(f"=== REGISTRATION (in Register{gram_stem.capitalize()}() in peg_transformer_factory.cpp) ===") - print("".join(registrations)) + print(f"=== REGISTRATION (in Register{result.gram_stem.capitalize()}() in peg_transformer_factory.cpp) ===") + print("".join(result.registrations)) def cpp_file_content(implementations): @@ -722,11 +721,13 @@ def write_cpp_file(implementations, gram_stem): print(f"Wrote {cpp_path}") -def write_shared_files(all_declarations): +def write_hpp(all_declarations): hpp_path = include_peg_dir / "peg_transformer_generated.hpp" hpp_path.write_text(GENERATED_HEADER + "".join(all_declarations)) print(f"Wrote {hpp_path}") + +def write_cmake(): existing_cpp = sorted(p.name for p in generated_dir.glob("*_generated.cpp")) cmake_path = generated_dir / "CMakeLists.txt" cmake_path.write_text(cmake_content(existing_cpp)) @@ -756,10 +757,8 @@ def process_gram_file(gram_filename, rule_to_type, excluded_rules): """Parse a .gram file and classify all its rules into a GramFileResult.""" gram_stem = gram_filename.removesuffix('.gram') gram_path = statements_dir / gram_filename - with open(gram_path, 'r') as f: - file_content = f.read() try: - rules = parse_peg_grammar(file_content) + rules = parse_peg_grammar(gram_path.read_text()) except Exception as e: raise Exception(f"{gram_filename}: {e}") from None @@ -781,18 +780,18 @@ def main(): if args.write: all_declarations = [d for r in results for d in r.declarations] - write_shared_files(all_declarations) + write_hpp(all_declarations) for r in results: write_cpp_file(r.implementations, r.gram_stem) + write_cmake() + for r in results: print_manual_steps(r.registrations, r.gram_stem) else: for r in results: print(f"\n{'=' * 60}") print(f" {r.gram_stem}.gram") print(f"{'=' * 60}") - print_output( - r.declarations, r.implementations, r.registrations, r.skipped, r.manual_bodies, gram_stem=r.gram_stem - ) + print_output(r) if __name__ == "__main__": From 7b26dcb94d4128ba448a01aa7ddfb78a96654996 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 15:40:31 +0200 Subject: [PATCH 32/56] Format fix --- scripts/parser/gen_transformer_v2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index e7a26371c1fb..e693fa5ddfdf 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -631,9 +631,7 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): continue if is_pure_reference_choice(ast): - _, identifier_alts, unknown_alts = classify_choice_alternatives( - ast.alternatives, rule_to_type - ) + _, identifier_alts, unknown_alts = classify_choice_alternatives(ast.alternatives, rule_to_type) if unknown_alts: skipped.append((rule_name, f"choice has unknown alternatives: {unknown_alts}")) continue From d96753129652b220e754936e64b16631749f6688 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 8 May 2026 16:29:05 +0200 Subject: [PATCH 33/56] Auto generate detach --- scripts/parser/gen_transformer_v2.py | 2 +- scripts/parser/grammar_types.yml | 2 +- .../parser/peg/transformer/peg_transformer.hpp | 1 + .../peg/transformer/peg_transformer_generated.hpp | 3 +++ .../peg/transformer/generated/CMakeLists.txt | 2 +- .../generated/transform_detach_generated.cpp | 15 +++++++++++++++ .../peg/transformer/peg_transformer_factory.cpp | 3 ++- src/parser/peg/transformer/transform_detach.cpp | 10 +++------- src/parser/peg/transformer/transform_drop.cpp | 4 ++++ 9 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 src/parser/peg/transformer/generated/transform_detach_generated.cpp diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index e693fa5ddfdf..fa1a649b87c1 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -772,7 +772,7 @@ def main(): arg_parser.add_argument("--write", action="store_true", help="Write generated files to disk.") args = arg_parser.parse_args() - gram_files_to_gen = ['use.gram', 'transaction.gram'] + gram_files_to_gen = ['use.gram', 'transaction.gram', 'detach.gram'] rule_to_type, excluded_rules = load_grammar_types(type_dir / 'grammar_types.yml') results = [process_gram_file(f, rule_to_type, excluded_rules) for f in gram_files_to_gen] diff --git a/scripts/parser/grammar_types.yml b/scripts/parser/grammar_types.yml index af09f389f6f5..4f31d9daf5b6 100644 --- a/scripts/parser/grammar_types.yml +++ b/scripts/parser/grammar_types.yml @@ -792,6 +792,7 @@ Bool: - AnyOrAll - IgnoreOrRespectNulls - PrecedingOrFollowing + - IfExists - IncludeOrExcludeNulls Int64: @@ -925,7 +926,6 @@ overrides: excluded_rules: - Program - FunctionType - - IfExists - Database - AbortOrRollback - CommitOrEnd diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 70a2af931711..b7c137010427 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -633,6 +633,7 @@ class PEGTransformerFactory { static unique_ptr TransformDropCollation(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropType(PEGTransformer &transformer, ParseResult &parse_result); static bool TransformDropBehavior(PEGTransformer &transformer, ParseResult &parse_result); + static bool TransformIfExists(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropSecret(PEGTransformer &transformer, ParseResult &parse_result); static string TransformDropSecretStorage(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropTrigger(PEGTransformer &transformer, ParseResult &parse_result); diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 1581fdc541da..469222fe9c1b 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -21,3 +21,6 @@ static unique_ptr TransformCommitTransactionInternal(PEGTransforme static unique_ptr TransformCommitTransaction(); static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); static TransactionModifierType TransformReadOrWrite(TransactionModifierType read_only_or_read_write); +static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformDetachStatement(bool if_exists, string catalog_name); diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt index d88a81c03d1d..ad7589e3f665 100644 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -1,5 +1,5 @@ add_library_unity( - duckdb_parser_peg_transformer_generated OBJECT + duckdb_parser_peg_transformer_generated OBJECT transform_detach_generated.cpp transform_transaction_generated.cpp transform_use_generated.cpp) set(ALL_OBJECT_FILES diff --git a/src/parser/peg/transformer/generated/transform_detach_generated.cpp b/src/parser/peg/transformer/generated/transform_detach_generated.cpp new file mode 100644 index 000000000000..ed13963cd1d0 --- /dev/null +++ b/src/parser/peg/transformer/generated/transform_detach_generated.cpp @@ -0,0 +1,15 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#include "duckdb/parser/peg/transformer/peg_transformer.hpp" + +namespace duckdb { + +unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + bool if_exists {}; + transformer.TransformOptional(list_pr, 2, if_exists); + auto catalog_name = list_pr.Child(3).identifier; + return TransformDetachStatement(if_exists, catalog_name); +} + +} // namespace duckdb diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 79bd4d670c65..9dd931534b3b 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -434,7 +434,7 @@ void PEGTransformerFactory::RegisterDescribe() { void PEGTransformerFactory::RegisterDetach() { // detach.gram - REGISTER_TRANSFORM(TransformDetachStatement); + Register("DetachStatement", &TransformDetachStatementInternal); } void PEGTransformerFactory::RegisterDrop() { @@ -454,6 +454,7 @@ void PEGTransformerFactory::RegisterDrop() { REGISTER_TRANSFORM(TransformDropCollation); REGISTER_TRANSFORM(TransformDropType); REGISTER_TRANSFORM(TransformDropBehavior); + REGISTER_TRANSFORM(TransformIfExists); REGISTER_TRANSFORM(TransformDropSecret); REGISTER_TRANSFORM(TransformDropSecretStorage); REGISTER_TRANSFORM(TransformDropTrigger); diff --git a/src/parser/peg/transformer/transform_detach.cpp b/src/parser/peg/transformer/transform_detach.cpp index 447c33807113..a7364bdc2ff4 100644 --- a/src/parser/peg/transformer/transform_detach.cpp +++ b/src/parser/peg/transformer/transform_detach.cpp @@ -3,15 +3,11 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformDetachStatement(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformDetachStatement(bool if_exists, string catalog_name) { auto result = make_uniq(); auto info = make_uniq(); - - auto &list_pr = parse_result.Cast(); - auto &if_exists = list_pr.Child(2); - info->if_not_found = if_exists.HasResult() ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; - info->name = list_pr.Child(3).identifier; + info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; + info->name = std::move(catalog_name); result->info = std::move(info); return std::move(result); } diff --git a/src/parser/peg/transformer/transform_drop.cpp b/src/parser/peg/transformer/transform_drop.cpp index 8a0570611428..93aa4e28687d 100644 --- a/src/parser/peg/transformer/transform_drop.cpp +++ b/src/parser/peg/transformer/transform_drop.cpp @@ -233,6 +233,10 @@ bool PEGTransformerFactory::TransformDropBehavior(PEGTransformer &transformer, P return StringUtil::CIEquals(choice_pr.Cast().keyword, "cascade"); } +bool PEGTransformerFactory::TransformIfExists(PEGTransformer &transformer, ParseResult &parse_result) { + return true; +} + unique_ptr PEGTransformerFactory::TransformDropSecret(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); From 9d42763dac9225980c5346180ada69fc3f07ee90 Mon Sep 17 00:00:00 2001 From: dentinyhao Date: Sat, 9 May 2026 15:22:15 -0700 Subject: [PATCH 34/56] Keep stats at unsigned integer cast --- .../statistics/expression/propagate_cast.cpp | 10 +++++++++ .../statistics/statistics_unsigned_cast.test | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 test/optimizer/statistics/statistics_unsigned_cast.test diff --git a/src/optimizer/statistics/expression/propagate_cast.cpp b/src/optimizer/statistics/expression/propagate_cast.cpp index 3e10cc49a35c..b4df6d740268 100644 --- a/src/optimizer/statistics/expression/propagate_cast.cpp +++ b/src/optimizer/statistics/expression/propagate_cast.cpp @@ -38,6 +38,11 @@ bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const Log case PhysicalType::INT32: case PhysicalType::INT64: case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::UINT128: case PhysicalType::FLOAT: case PhysicalType::DOUBLE: break; @@ -50,6 +55,11 @@ bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const Log case PhysicalType::INT32: case PhysicalType::INT64: case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::UINT128: case PhysicalType::FLOAT: case PhysicalType::DOUBLE: break; diff --git a/test/optimizer/statistics/statistics_unsigned_cast.test b/test/optimizer/statistics/statistics_unsigned_cast.test new file mode 100644 index 000000000000..3a6bc4d2ce4f --- /dev/null +++ b/test/optimizer/statistics/statistics_unsigned_cast.test @@ -0,0 +1,22 @@ +# name: test/optimizer/statistics/statistics_unsigned_cast.test +# description: Statistics propagation across casts involving unsigned integer types +# group: [statistics] + +statement ok +PRAGMA explain_output = OPTIMIZED_ONLY; + +statement ok +CREATE TABLE t_signed AS SELECT i::INT AS id FROM range(0, 100) t(i); + +query II +EXPLAIN SELECT * FROM t_signed WHERE id::BIGINT > 100; +---- +logical_opt :.*EMPTY_RESULT.* + +statement ok +CREATE TABLE t_unsigned AS SELECT i::UINTEGER AS id FROM range(0, 100) t(i); + +query II +EXPLAIN SELECT * FROM t_unsigned WHERE id::BIGINT > 100; +---- +logical_opt :.*EMPTY_RESULT.* From 6bf693ac11fd2309f537a2c1fe8d4f17b200169f Mon Sep 17 00:00:00 2001 From: dentinyhao Date: Sat, 9 May 2026 16:39:50 -0700 Subject: [PATCH 35/56] fix test --- test/optimizer/pushdown/join_filter_pushdown_cast.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/optimizer/pushdown/join_filter_pushdown_cast.test b/test/optimizer/pushdown/join_filter_pushdown_cast.test index 5073ea40ae66..25a0364b87a9 100644 --- a/test/optimizer/pushdown/join_filter_pushdown_cast.test +++ b/test/optimizer/pushdown/join_filter_pushdown_cast.test @@ -167,11 +167,10 @@ CREATE TABLE probe_ubigint AS SELECT v::UBIGINT AS a FROM (VALUES (1), (5), (10) statement ok CREATE TABLE build_neg AS SELECT b::BIGINT AS b FROM (VALUES (5), (-1), (10)) t(b); -# TODO HERE query II EXPLAIN ANALYZE SELECT COUNT(*) FROM (SELECT a::BIGINT AS a FROM probe_ubigint) t JOIN build_neg ON t.a = build_neg.b; ---- -analyzed_plan :.*Dynamic Filters:.* +analyzed_plan :.*Dynamic Filters:.* # -1 cannot be cast to UBIGINT — only 5 and 10 match query I From d6ff89d1755aa8aa0a368e9cf88ddf81c9cfc3a1 Mon Sep 17 00:00:00 2001 From: dentinyhao Date: Sat, 9 May 2026 17:52:21 -0700 Subject: [PATCH 36/56] empty commit to trigger CI: rerun flaky test From d09782bfd8018d442824cb3e2f56613c3e0cd8b4 Mon Sep 17 00:00:00 2001 From: dentinyhao Date: Sat, 9 May 2026 19:07:30 -0700 Subject: [PATCH 37/56] disallow cast between enums --- src/optimizer/statistics/expression/propagate_cast.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/optimizer/statistics/expression/propagate_cast.cpp b/src/optimizer/statistics/expression/propagate_cast.cpp index b4df6d740268..8be2c8a6a819 100644 --- a/src/optimizer/statistics/expression/propagate_cast.cpp +++ b/src/optimizer/statistics/expression/propagate_cast.cpp @@ -31,6 +31,9 @@ bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const Log if (source == target) { return true; } + if (source.id() == LogicalTypeId::ENUM || target.id() == LogicalTypeId::ENUM) { + return false; + } // we can only propagate numeric -> numeric switch (source.InternalType()) { case PhysicalType::INT8: From 84e502c358b34782f3a7d47cc4194f44e62ae36e Mon Sep 17 00:00:00 2001 From: Mytherin Date: Sun, 10 May 2026 10:08:15 +0200 Subject: [PATCH 38/56] Move verification projection to test config --- .github/workflows/Main.yml | 6 ++++++ src/common/settings.json | 7 +++++++ src/include/duckdb/main/settings.hpp | 11 +++++++++++ src/main/config.cpp | 11 ++++++----- src/optimizer/column_lifetime_analyzer.cpp | 5 +++-- test/configs/verification_projection.json | 5 +++++ 6 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 test/configs/verification_projection.json diff --git a/.github/workflows/Main.yml b/.github/workflows/Main.yml index 49460f57e9bc..69d88b720774 100644 --- a/.github/workflows/Main.yml +++ b/.github/workflows/Main.yml @@ -1442,6 +1442,12 @@ jobs: run: | python3 scripts/ci/run_tests.py --test-config=test/configs/prefetch_all_parquet_files.json ./build/release/test/unittest + - name: test/configs/verification_projection.json + if: success() || failure() + shell: bash + run: | + python3 scripts/ci/run_tests.py --test-config=test/configs/verification_projection.json ./build/release/test/unittest + - name: test/configs/no_local_filesystem.json if: success() || failure() shell: bash diff --git a/src/common/settings.json b/src/common/settings.json index 72ce76a94b4a..a5cbb1ed030e 100644 --- a/src/common/settings.json +++ b/src/common/settings.json @@ -338,6 +338,13 @@ "scope": "global", "custom_implementation": true }, + { + "name": "debug_verification_projection", + "description": "DEBUG SETTING: add internal verification projections to stress optimizers", + "type": "BOOLEAN", + "default_scope": "global", + "default_value": "false" + }, { "name": "debug_verify_blocks", "description": "DEBUG SETTING: verify block metadata during checkpointing", diff --git a/src/include/duckdb/main/settings.hpp b/src/include/duckdb/main/settings.hpp index ff95eb44e0d3..fbfbbaa59673 100644 --- a/src/include/duckdb/main/settings.hpp +++ b/src/include/duckdb/main/settings.hpp @@ -558,6 +558,17 @@ struct DebugVerificationModeSetting { static Value GetSetting(const ClientContext &context); }; +struct DebugVerificationProjectionSetting { + using RETURN_TYPE = bool; + static constexpr const char *Name = "debug_verification_projection"; + static constexpr const char *Description = + "DEBUG SETTING: add internal verification projections to stress optimizers"; + static constexpr const char *InputType = "BOOLEAN"; + static constexpr const char *DefaultValue = "false"; + static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; + static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); +}; + struct DebugVerifyBlocksSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "debug_verify_blocks"; diff --git a/src/main/config.cpp b/src/main/config.cpp index a1f920dae935..cf2b8ae79f09 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -106,6 +106,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_SETTING_CALLBACK(DebugPhysicalTableScanExecutionStrategySetting), DUCKDB_SETTING(DebugSkipCheckpointOnCommitSetting), DUCKDB_GLOBAL(DebugVerificationModeSetting), + DUCKDB_SETTING(DebugVerificationProjectionSetting), DUCKDB_SETTING(DebugVerifyBlocksSetting), DUCKDB_SETTING(DebugVerifySerializerSetting), DUCKDB_SETTING_CALLBACK(DebugVerifyStatementSetting), @@ -222,12 +223,12 @@ static const ConfigurationOption internal_options[] = { static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("configure_metrics", 27), DUCKDB_SETTING_ALIAS("custom_profiling_settings", 27), - DUCKDB_SETTING_ALIAS("memory_limit", 110), - DUCKDB_SETTING_ALIAS("null_order", 50), - DUCKDB_SETTING_ALIAS("profiling_output", 131), - DUCKDB_SETTING_ALIAS("user", 146), + DUCKDB_SETTING_ALIAS("memory_limit", 111), + DUCKDB_SETTING_ALIAS("null_order", 51), + DUCKDB_SETTING_ALIAS("profiling_output", 132), + DUCKDB_SETTING_ALIAS("user", 147), DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 26), - DUCKDB_SETTING_ALIAS("worker_threads", 145), + DUCKDB_SETTING_ALIAS("worker_threads", 146), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/optimizer/column_lifetime_analyzer.cpp b/src/optimizer/column_lifetime_analyzer.cpp index bf7a2d107deb..abbc0370e98e 100644 --- a/src/optimizer/column_lifetime_analyzer.cpp +++ b/src/optimizer/column_lifetime_analyzer.cpp @@ -196,7 +196,9 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) { } void ColumnLifetimeAnalyzer::Verify(LogicalOperator &op) { -#ifdef DEBUG + if (!Settings::Get(optimizer.context)) { + return; + } if (everything_referenced) { return; } @@ -216,7 +218,6 @@ void ColumnLifetimeAnalyzer::Verify(LogicalOperator &op) { default: break; } -#endif } void ColumnLifetimeAnalyzer::AddVerificationProjection(unique_ptr &child) { diff --git a/test/configs/verification_projection.json b/test/configs/verification_projection.json new file mode 100644 index 000000000000..81f75fcc5aa2 --- /dev/null +++ b/test/configs/verification_projection.json @@ -0,0 +1,5 @@ +{ + "description": "Run with debug_verification_projection enabled", + "on_init": "SET debug_verification_projection=true;", + "skip_compiled": "true" +} From 263a7b2d6d79f4791ecbc85a0c230603e50b1b55 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Sun, 10 May 2026 16:12:38 +0200 Subject: [PATCH 39/56] Split off debug_verify_column_bindings into a setting as well --- .../json/json_functions/json_serialize_plan.cpp | 2 +- src/common/settings.json | 7 +++++++ src/execution/column_binding_resolver.cpp | 8 +++++--- .../duckdb/execution/column_binding_resolver.hpp | 2 +- src/include/duckdb/main/settings.hpp | 10 ++++++++++ src/main/client_context.cpp | 2 +- src/main/config.cpp | 11 ++++++----- src/optimizer/optimizer.cpp | 2 +- src/planner/planner.cpp | 11 +++++++---- src/planner/subquery/flatten_dependent_join.cpp | 4 ++-- test/configs/verify_column_bindings.json | 13 +++++++++++++ 11 files changed, 54 insertions(+), 18 deletions(-) create mode 100644 test/configs/verify_column_bindings.json diff --git a/extension/json/json_functions/json_serialize_plan.cpp b/extension/json/json_functions/json_serialize_plan.cpp index 962ba9c0d461..a2d2b141831b 100644 --- a/extension/json/json_functions/json_serialize_plan.cpp +++ b/extension/json/json_functions/json_serialize_plan.cpp @@ -147,7 +147,7 @@ static void JsonSerializePlanFunction(DataChunk &args, ExpressionState &state, V } ColumnBindingResolver resolver; - resolver.Verify(*plan); + resolver.Verify(context, *plan); resolver.VisitOperator(*plan); plan->ResolveOperatorTypes(); diff --git a/src/common/settings.json b/src/common/settings.json index a5cbb1ed030e..ff5801cc3f3e 100644 --- a/src/common/settings.json +++ b/src/common/settings.json @@ -352,6 +352,13 @@ "default_scope": "global", "default_value": "false" }, + { + "name": "debug_verify_column_bindings", + "description": "DEBUG SETTING: run extra internal verification of column bindings", + "type": "BOOLEAN", + "default_scope": "global", + "default_value": "false" + }, { "name": "debug_verify_serializer", "description": "DEBUG SETTING: verify logical plan serializer", diff --git a/src/execution/column_binding_resolver.cpp b/src/execution/column_binding_resolver.cpp index 89ea5c4fceb2..993e09c8af79 100644 --- a/src/execution/column_binding_resolver.cpp +++ b/src/execution/column_binding_resolver.cpp @@ -9,6 +9,7 @@ #include "duckdb/planner/operator/logical_extension_operator.hpp" #include "duckdb/planner/operator/logical_insert.hpp" #include "duckdb/planner/operator/logical_recursive_cte.hpp" +#include "duckdb/main/settings.hpp" namespace duckdb { @@ -244,13 +245,14 @@ unordered_set ColumnBindingResolver::VerifyInternal(LogicalOperator return result; } -void ColumnBindingResolver::Verify(LogicalOperator &op) { -#ifdef DEBUG +void ColumnBindingResolver::Verify(ClientContext &context, LogicalOperator &op) { + if (!Settings::Get(context)) { + return; + } op.ResolveOperatorTypes(); ColumnBindingResolver resolver(true); resolver.VisitOperator(op); VerifyInternal(op); -#endif } } // namespace duckdb diff --git a/src/include/duckdb/execution/column_binding_resolver.hpp b/src/include/duckdb/execution/column_binding_resolver.hpp index d1df03f4f979..ba9b5a436a3f 100644 --- a/src/include/duckdb/execution/column_binding_resolver.hpp +++ b/src/include/duckdb/execution/column_binding_resolver.hpp @@ -22,7 +22,7 @@ class ColumnBindingResolver : public LogicalOperatorVisitor { explicit ColumnBindingResolver(bool verify_only = false); void VisitOperator(LogicalOperator &op) override; - static void Verify(LogicalOperator &op); + static void Verify(ClientContext &context, LogicalOperator &op); protected: vector bindings; diff --git a/src/include/duckdb/main/settings.hpp b/src/include/duckdb/main/settings.hpp index fbfbbaa59673..27f1bb942e2a 100644 --- a/src/include/duckdb/main/settings.hpp +++ b/src/include/duckdb/main/settings.hpp @@ -579,6 +579,16 @@ struct DebugVerifyBlocksSetting { static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); }; +struct DebugVerifyColumnBindingsSetting { + using RETURN_TYPE = bool; + static constexpr const char *Name = "debug_verify_column_bindings"; + static constexpr const char *Description = "DEBUG SETTING: run extra internal verification of column bindings"; + static constexpr const char *InputType = "BOOLEAN"; + static constexpr const char *DefaultValue = "false"; + static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; + static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); +}; + struct DebugVerifySerializerSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "debug_verify_serializer"; diff --git a/src/main/client_context.cpp b/src/main/client_context.cpp index 1846dff71856..c872139be150 100644 --- a/src/main/client_context.cpp +++ b/src/main/client_context.cpp @@ -789,7 +789,7 @@ unique_ptr ClientContext::ExtractPlan(const string &query) { } ColumnBindingResolver resolver; - resolver.Verify(*plan); + resolver.Verify(*this, *plan); resolver.VisitOperator(*plan); plan->ResolveOperatorTypes(); diff --git a/src/main/config.cpp b/src/main/config.cpp index cf2b8ae79f09..4c57957a76a3 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -108,6 +108,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_GLOBAL(DebugVerificationModeSetting), DUCKDB_SETTING(DebugVerificationProjectionSetting), DUCKDB_SETTING(DebugVerifyBlocksSetting), + DUCKDB_SETTING(DebugVerifyColumnBindingsSetting), DUCKDB_SETTING(DebugVerifySerializerSetting), DUCKDB_SETTING_CALLBACK(DebugVerifyStatementSetting), DUCKDB_SETTING(DebugVerifyStatsSetting), @@ -223,12 +224,12 @@ static const ConfigurationOption internal_options[] = { static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("configure_metrics", 27), DUCKDB_SETTING_ALIAS("custom_profiling_settings", 27), - DUCKDB_SETTING_ALIAS("memory_limit", 111), - DUCKDB_SETTING_ALIAS("null_order", 51), - DUCKDB_SETTING_ALIAS("profiling_output", 132), - DUCKDB_SETTING_ALIAS("user", 147), + DUCKDB_SETTING_ALIAS("memory_limit", 112), + DUCKDB_SETTING_ALIAS("null_order", 52), + DUCKDB_SETTING_ALIAS("profiling_output", 133), + DUCKDB_SETTING_ALIAS("user", 148), DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 26), - DUCKDB_SETTING_ALIAS("worker_threads", 146), + DUCKDB_SETTING_ALIAS("worker_threads", 147), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 32b94f180e39..5e113f1f01e2 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -116,7 +116,7 @@ void Optimizer::RunOptimizer(OptimizerType type, const std::function &ca } void Optimizer::Verify(LogicalOperator &op) { - ColumnBindingResolver::Verify(op); + ColumnBindingResolver::Verify(context, op); } // Returns true if the plan contains a DML statement (INSERT/UPDATE/DELETE/MERGE INTO) diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index 8bfd7c829ae6..7bf69afdf727 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -178,17 +178,20 @@ static bool OperatorSupportsSerialization(LogicalOperator &op) { void Planner::VerifyPlan(ClientContext &context, unique_ptr &op, optional_ptr map) { - auto &config = DBConfig::GetConfig(context); - if (!op || !Settings::Get(context)) { + if (!op) { + return; + } + // verify the column bindings of the plan + ColumnBindingResolver::Verify(context, *op); + if (!Settings::Get(context)) { return; } //! SELECT only for now if (!OperatorSupportsSerialization(*op)) { return; } - // verify the column bindings of the plan - ColumnBindingResolver::Verify(*op); + auto &config = DBConfig::GetConfig(context); // format (de)serialization of this operator try { MemoryStream stream(Allocator::Get(context)); diff --git a/src/planner/subquery/flatten_dependent_join.cpp b/src/planner/subquery/flatten_dependent_join.cpp index 13f37e7c1229..26f77906c272 100644 --- a/src/planner/subquery/flatten_dependent_join.cpp +++ b/src/planner/subquery/flatten_dependent_join.cpp @@ -485,7 +485,7 @@ vector FlattenDependentJoins::PushDownCorrelatedNode(unique_ptr FlattenDependentJoins::PushDownProjection(unique_ptrexpressions, state, true); auto &proj = plan->Cast(); auto correlated_offset = plan->expressions.size() - correlated_columns.size(); - ColumnBindingResolver::Verify(*plan); + ColumnBindingResolver::Verify(binder.context, *plan); return CreateContiguousState(ColumnBinding(proj.table_index, ProjectionIndex(correlated_offset))); } diff --git a/test/configs/verify_column_bindings.json b/test/configs/verify_column_bindings.json new file mode 100644 index 000000000000..5d5b436095b8 --- /dev/null +++ b/test/configs/verify_column_bindings.json @@ -0,0 +1,13 @@ +{ + "description": "Run with debug_verify_column_bindings enabled", + "on_init": "SET debug_verify_column_bindings=true;", + "skip_compiled": "true", + "skip_tests": [ + { + "reason": "FIXME: Internal verification failed after CommonAggregateOptimizer", + "paths": [ + "test/sql/aggregate/having/test_scalar_having.test" + ] + } + ] +} From a974dd6eaf95866c2026f497f5d13536a9fc413d Mon Sep 17 00:00:00 2001 From: Mytherin Date: Sun, 10 May 2026 16:13:44 +0200 Subject: [PATCH 40/56] Add to Main.yml --- .github/workflows/Main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/Main.yml b/.github/workflows/Main.yml index 69d88b720774..61ba7dcf87ce 100644 --- a/.github/workflows/Main.yml +++ b/.github/workflows/Main.yml @@ -1448,6 +1448,12 @@ jobs: run: | python3 scripts/ci/run_tests.py --test-config=test/configs/verification_projection.json ./build/release/test/unittest + - name: test/configs/verify_column_bindings.json + if: success() || failure() + shell: bash + run: | + python3 scripts/ci/run_tests.py --test-config=test/configs/verify_column_bindings.json ./build/release/test/unittest + - name: test/configs/no_local_filesystem.json if: success() || failure() shell: bash From 0761edb240deb637cf747fbdeb746157c54d95a0 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 10:00:44 +0200 Subject: [PATCH 41/56] Auto generate export as well --- scripts/parser/gen_transformer_v2.py | 2 +- .../duckdb/parser/peg/inlined_grammar.gram | 2 -- .../peg/transformer/peg_transformer.hpp | 9 +---- .../transformer/peg_transformer_generated.hpp | 9 +++++ src/parser/peg/grammar/statements/export.gram | 2 -- .../peg/transformer/generated/CMakeLists.txt | 3 +- .../generated/transform_export_generated.cpp | 30 ++++++++++++++++ .../transformer/peg_transformer_factory.cpp | 6 ++-- .../peg/transformer/transform_export.cpp | 36 +++++++------------ .../peg/transformer/transform_import.cpp | 7 ++-- 10 files changed, 61 insertions(+), 45 deletions(-) create mode 100644 src/parser/peg/transformer/generated/transform_export_generated.cpp diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index fa1a649b87c1..6d4875f345c5 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -772,7 +772,7 @@ def main(): arg_parser.add_argument("--write", action="store_true", help="Write generated files to disk.") args = arg_parser.parse_args() - gram_files_to_gen = ['use.gram', 'transaction.gram', 'detach.gram'] + gram_files_to_gen = ['use.gram', 'transaction.gram', 'detach.gram', 'export.gram'] rule_to_type, excluded_rules = load_grammar_types(type_dir / 'grammar_types.yml') results = [process_gram_file(f, rule_to_type, excluded_rules) for f in gram_files_to_gen] diff --git a/src/include/duckdb/parser/peg/inlined_grammar.gram b/src/include/duckdb/parser/peg/inlined_grammar.gram index e4d07ed0df61..f37066dded89 100644 --- a/src/include/duckdb/parser/peg/inlined_grammar.gram +++ b/src/include/duckdb/parser/peg/inlined_grammar.gram @@ -1444,9 +1444,7 @@ VariableList <- List(Expression) ResetStatement <- 'RESET' (SetVariable / SetSetting) ExportStatement <- 'EXPORT' 'DATABASE' ExportSource? StringLiteral GenericCopyOptionList? - ExportSource <- CatalogName 'TO' - ImportStatement <- 'IMPORT' 'DATABASE' StringLiteral CheckpointStatement <- 'FORCE'? 'CHECKPOINT' CatalogName? diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index b7c137010427..62bc1d7f80c8 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -648,10 +648,6 @@ class PEGTransformerFactory { static vector TransformExplainOptionList(PEGTransformer &transformer, ParseResult &parse_result); static GenericCopyOption TransformExplainOption(PEGTransformer &transformer, ParseResult &parse_result); - // export.gram - static unique_ptr TransformExportStatement(PEGTransformer &transformer, ParseResult &parse_result); - static string TransformExportSource(PEGTransformer &transformer, ParseResult &parse_result); - // expression.gram static unique_ptr TransformExpressionStatement(PEGTransformer &transformer, ParseResult &parse_result); @@ -887,7 +883,7 @@ class PEGTransformerFactory { static unique_ptr TransformListComprehensionFilter(PEGTransformer &transformer, ParseResult &parse_result); static ExpressionType TransformIsDistinctFromOp(PEGTransformer &transformer, ParseResult &parse_result); - // + static unique_ptr TransformGroupingExpression(PEGTransformer &transformer, ParseResult &parse_result); static qualified_column_map_t TransformRenameList(PEGTransformer &transformer, ParseResult &parse_result); @@ -899,9 +895,6 @@ class PEGTransformerFactory { ParseResult &parse_result); static bool TransformIgnoreOrRespectNulls(PEGTransformer &transformer, ParseResult &parse_result); - // import.gram - static unique_ptr TransformImportStatement(PEGTransformer &transformer, ParseResult &parse_result); - // insert.gram static unique_ptr TransformInsertStatement(PEGTransformer &transformer, ParseResult &parse_result); static OnConflictAction TransformOrAction(PEGTransformer &transformer, ParseResult &parse_result); diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 469222fe9c1b..4ee9031b80f8 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -24,3 +24,12 @@ static TransactionModifierType TransformReadOrWrite(TransactionModifierType read static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDetachStatement(bool if_exists, string catalog_name); +static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformExportStatement(string export_source, string string_literal, + vector generic_copy_option_list); +static string TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result); +static string TransformExportSource(string catalog_name); +static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformImportStatement(string string_literal); diff --git a/src/parser/peg/grammar/statements/export.gram b/src/parser/peg/grammar/statements/export.gram index 8be0679e5189..d6d5640bb175 100644 --- a/src/parser/peg/grammar/statements/export.gram +++ b/src/parser/peg/grammar/statements/export.gram @@ -1,5 +1,3 @@ ExportStatement <- 'EXPORT' 'DATABASE' ExportSource? StringLiteral GenericCopyOptionList? - ExportSource <- CatalogName 'TO' - ImportStatement <- 'IMPORT' 'DATABASE' StringLiteral diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt index ad7589e3f665..d5b5042bb954 100644 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -1,6 +1,7 @@ add_library_unity( duckdb_parser_peg_transformer_generated OBJECT transform_detach_generated.cpp - transform_transaction_generated.cpp transform_use_generated.cpp) + transform_export_generated.cpp transform_transaction_generated.cpp + transform_use_generated.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} diff --git a/src/parser/peg/transformer/generated/transform_export_generated.cpp b/src/parser/peg/transformer/generated/transform_export_generated.cpp new file mode 100644 index 000000000000..2db9523b4ceb --- /dev/null +++ b/src/parser/peg/transformer/generated/transform_export_generated.cpp @@ -0,0 +1,30 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#include "duckdb/parser/peg/transformer/peg_transformer.hpp" + +namespace duckdb { + +unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + string export_source {}; + transformer.TransformOptional(list_pr, 2, export_source); + auto string_literal = transformer.Transform(list_pr, 3); + vector generic_copy_option_list {}; + transformer.TransformOptional(list_pr, 4, generic_copy_option_list); + return TransformExportStatement(export_source, string_literal, generic_copy_option_list); +} + +string PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto catalog_name = list_pr.Child(0).identifier; + return TransformExportSource(catalog_name); +} + +unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto string_literal = transformer.Transform(list_pr, 2); + return TransformImportStatement(string_literal); +} + +} // namespace duckdb diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 9dd931534b3b..1f71b91ba30f 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -475,8 +475,8 @@ void PEGTransformerFactory::RegisterExplain() { } void PEGTransformerFactory::RegisterExport() { - REGISTER_TRANSFORM(TransformExportSource); - REGISTER_TRANSFORM(TransformExportStatement); + Register("ExportSource", &TransformExportSourceInternal); + Register("ExportStatement", &TransformExportStatementInternal); } void PEGTransformerFactory::RegisterExpression() { @@ -655,7 +655,7 @@ void PEGTransformerFactory::RegisterExpression() { } void PEGTransformerFactory::RegisterImport() { - REGISTER_TRANSFORM(TransformImportStatement); + Register("ImportStatement", &TransformImportStatementInternal); } void PEGTransformerFactory::RegisterInsert() { diff --git a/src/parser/peg/transformer/transform_export.cpp b/src/parser/peg/transformer/transform_export.cpp index c4390729f48f..7eb4a77c75aa 100644 --- a/src/parser/peg/transformer/transform_export.cpp +++ b/src/parser/peg/transformer/transform_export.cpp @@ -4,40 +4,30 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatement(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformExportStatement(string export_source, string file_path, vector options) { auto info = make_uniq(); - auto &list_pr = parse_result.Cast(); - info->file_path = list_pr.Child(3).result; + info->file_path = std::move(file_path); info->format = "csv"; info->is_from = false; - auto &parens = list_pr.Child(4); - if (parens.HasResult()) { - auto option_list = transformer.Transform>(parens.GetResult()); - for (auto &option : option_list) { - if (option.name == "format") { - info->format = option.children[0].GetValue(); - info->is_format_auto_detected = false; - } else if (option.expression) { - info->parsed_options[StringUtil::Upper(option.name)] = std::move(option.expression); - } else { - info->options[StringUtil::Upper(option.name)] = option.children; - } + for (auto &option : options) { + if (option.name == "format") { + info->format = option.children[0].GetValue(); + info->is_format_auto_detected = false; + } else if (option.expression) { + info->parsed_options[StringUtil::Upper(option.name)] = std::move(option.expression); + } else { + info->options[StringUtil::Upper(option.name)] = option.children; } } auto result = make_uniq(std::move(info)); - auto &database_result = list_pr.Child(2); - if (database_result.HasResult()) { - result->database = transformer.Transform(database_result.GetResult()); - } + result->database = export_source; return std::move(result); } -string PEGTransformerFactory::TransformExportSource(PEGTransformer &transformer, ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - return list_pr.Child(0).identifier; +string PEGTransformerFactory::TransformExportSource(string catalog_name) { + return catalog_name; } } // namespace duckdb diff --git a/src/parser/peg/transformer/transform_import.cpp b/src/parser/peg/transformer/transform_import.cpp index 3ae8104a4ee6..e00d5a560421 100644 --- a/src/parser/peg/transformer/transform_import.cpp +++ b/src/parser/peg/transformer/transform_import.cpp @@ -3,13 +3,10 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformImportStatement(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto name = list_pr.Child(2).result; +unique_ptr PEGTransformerFactory::TransformImportStatement(string import) { auto result = make_uniq(); result->info->name = "import_database"; - result->info->parameters.emplace_back(make_uniq(Value(name))); + result->info->parameters.emplace_back(make_uniq(Value(std::move(import)))); return std::move(result); } From 29fc453ed0beb19145cb50b11fa16cc722a6c446 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 10:01:36 +0200 Subject: [PATCH 42/56] Format fix --- src/parser/peg/transformer/transform_export.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser/peg/transformer/transform_export.cpp b/src/parser/peg/transformer/transform_export.cpp index 7eb4a77c75aa..6bf8c8f54605 100644 --- a/src/parser/peg/transformer/transform_export.cpp +++ b/src/parser/peg/transformer/transform_export.cpp @@ -4,7 +4,8 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatement(string export_source, string file_path, vector options) { +unique_ptr PEGTransformerFactory::TransformExportStatement(string export_source, string file_path, + vector options) { auto info = make_uniq(); info->file_path = std::move(file_path); info->format = "csv"; From c1841a38cf8a6f4b7fe58dc640e673f3a362e7d0 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 13:17:51 +0200 Subject: [PATCH 43/56] Make parameters const & if possible --- scripts/parser/gen_transformer_v2.py | 153 +++++++++++------- scripts/parser/generate_transformer.py | 48 +++--- scripts/parser/grammar_types.yml | 58 +++++++ .../transformer/peg_transformer_generated.hpp | 61 ++++--- .../peg/transformer/generated/CMakeLists.txt | 10 +- .../generated/transform_detach_generated.cpp | 4 +- .../generated/transform_export_generated.cpp | 13 +- .../transform_transaction_generated.cpp | 20 +-- .../generated/transform_use_generated.cpp | 15 +- .../peg/transformer/transform_detach.cpp | 4 +- .../peg/transformer/transform_export.cpp | 11 +- .../peg/transformer/transform_import.cpp | 4 +- .../peg/transformer/transform_transaction.cpp | 4 +- src/parser/peg/transformer/transform_use.cpp | 13 +- 14 files changed, 264 insertions(+), 154 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 6d4875f345c5..b65846b19602 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -7,7 +7,7 @@ sys.path.insert(0, str(Path(__file__).parent)) from inline_grammar import parse_peg_grammar, PEGTokenType -from generate_transformer import load_grammar_types +from generate_transformer import GrammarTypeInfo, load_grammar_types # --------------------------------------------------------------------------- @@ -251,10 +251,10 @@ def is_pure_reference_choice(ast): return isinstance(ast, ChoiceNode) and all(isinstance(a, ReferenceNode) for a in ast.alternatives) -def classify_choice_alternatives(alternatives, rule_to_type): +def classify_choice_alternatives(alternatives, rule_types): """ Split choice alternatives into three groups: - - transformer_alts: names with a registered transformer (in rule_to_type) + - transformer_alts: names with a registered transformer (in rule_types) - identifier_alts: names that are identifier overrides (produce IdentifierParseResult) - unknown_alts: neither registered nor known overrides -- need manual handling Returns (transformer_alts, identifier_alts, unknown_alts). @@ -264,7 +264,7 @@ def classify_choice_alternatives(alternatives, rule_to_type): unknown_alts = [] for ref in alternatives: name = ref.name - if name in rule_to_type: + if name in rule_types: transformer_alts.append(name) elif name in IDENTIFIER_OVERRIDE_RULES: identifier_alts.append(name) @@ -337,21 +337,35 @@ class SeqElement: skip: bool # True for LiteralNode - no semantic value var_name: str = "" cpp_type: str = "" + by_value: bool = False # True for unique_ptr, vector>, bool, int64_t extraction_lines: List[str] = field(default_factory=list) +def _is_by_value(rule_name, rule_types): + """ + Return True if the C++ value for rule_name is move-only and must be passed by value with std::move. + by_value=True in grammar_types.yml means: unique_ptr or vector> (non-copyable). + Primitives (bool, int64_t) use by_value=False and are passed as const T & (harmless, no tidy warning). + Fallback: override rules without a by_value annotation use the unique_ptr< prefix heuristic. + """ + info = rule_types.get(rule_name) + if info is None: + return False + return info.by_value or info.cpp_type.startswith('unique_ptr<') + + def _classify_literal(): """LITERAL token -> KeywordMatcher -> KeywordParseResult. No semantic value.""" return SeqElement(skip=True) -def _classify_reference(name, idx, rule_to_type, excluded_rules): +def _classify_reference(name, idx, rule_types, excluded_rules): """ REFERENCE token -> CreateMatcher(rule_name). Priority order mirrors runtime dispatch: 1. IDENTIFIER_OVERRIDE_RULES -> IdentifierMatcher -> Child() 2. excluded_rules -> keyword-only rule, no semantic value -> skip - 3. rule_to_type -> regular ListMatcher -> transformer.Transform() + 3. rule_types -> regular ListMatcher -> transformer.Transform() """ if name in IDENTIFIER_OVERRIDE_RULES: var_name = to_snake_case(name) @@ -359,21 +373,22 @@ def _classify_reference(name, idx, rule_to_type, excluded_rules): return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) if name in excluded_rules: return _classify_literal() - if name in rule_to_type: - cpp_type = rule_to_type[name] + if name in rule_types: + cpp_type = rule_types[name].cpp_type var_name = to_snake_case(name) lines = [f"\tauto {var_name} = transformer.Transform<{cpp_type}>(list_pr, {idx});"] - return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), extraction_lines=lines) return None -def _classify_optional_reference(name, idx, rule_to_type, excluded_rules): +def _classify_optional_reference(name, idx, rule_types, excluded_rules): """ OptionalNode(ReferenceNode) -> OptionalMatcher wrapping a named rule. Priority order matches _classify_reference: 1. excluded_rules -> keyword-only optional (Transaction?) -> skip 2. IDENTIFIER_OVERRIDE_RULES -> optional identifier, extracted via HasResult() - 3. rule_to_type -> optional typed rule, extracted via TransformOptional + 3. rule_types -> optional typed rule, extracted via TransformOptional """ if name in excluded_rules: return _classify_literal() @@ -387,17 +402,18 @@ def _classify_optional_reference(name, idx, rule_to_type, excluded_rules): f"\t}}", ] return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) - if name in rule_to_type: - cpp_type = rule_to_type[name] + if name in rule_types: + cpp_type = rule_types[name].cpp_type lines = [ f"\t{cpp_type} {var_name} {{}};", f"\ttransformer.TransformOptional(list_pr, {idx}, {var_name});", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), extraction_lines=lines) return None -def _classify_parens(inner_node, idx, rule_to_type): +def _classify_parens(inner_node, idx, rule_types): """ ParensNode -> Parens(D) <- '(' D ')'. Uses ExtractResultFromParens() to reach child[1]. @@ -413,17 +429,18 @@ def _classify_parens(inner_node, idx, rule_to_type): f".Cast().identifier;", ] return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) - if name in rule_to_type: - cpp_type = rule_to_type[name] + if name in rule_types: + cpp_type = rule_types[name].cpp_type lines = [ f"\tauto {var_name} = transformer.Transform<{cpp_type}>" f"(ExtractResultFromParens(list_pr.GetChild({idx})));", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), extraction_lines=lines) return None -def _classify_list_macro(inner_node, idx, rule_to_type): +def _classify_list_macro(inner_node, idx, rule_types): """ ListMacroNode -> List(D) <- D (',' D)* ','?. Uses ExtractParseResultsFromList() to collect all D results. @@ -433,9 +450,9 @@ def _classify_list_macro(inner_node, idx, rule_to_type): if not isinstance(inner_node, ReferenceNode): return None name = inner_node.name - if name not in rule_to_type: + if name not in rule_types: return None - child_type = rule_to_type[name] + child_type = rule_types[name].cpp_type var_name = to_snake_case(name) lines = [ f"\tauto {var_name}_items = ExtractParseResultsFromList(list_pr.GetChild({idx}));", @@ -444,10 +461,11 @@ def _classify_list_macro(inner_node, idx, rule_to_type): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(name, rule_types), extraction_lines=lines) -def _classify_parens_list(inner_list_node, idx, rule_to_type): +def _classify_parens_list(inner_list_node, idx, rule_types): """ ParensNode(ListMacroNode(D)) -> Parens(List(D)). Uses ExtractParseResultsFromList(ExtractResultFromParens(...)) to collect all D results. @@ -457,9 +475,9 @@ def _classify_parens_list(inner_list_node, idx, rule_to_type): if not isinstance(inner_list_node.inner, ReferenceNode): return None name = inner_list_node.inner.name - if name not in rule_to_type: + if name not in rule_types: return None - child_type = rule_to_type[name] + child_type = rule_types[name].cpp_type var_name = to_snake_case(name) lines = [ f"\tauto {var_name}_items = ExtractParseResultsFromList(" f"ExtractResultFromParens(list_pr.GetChild({idx})));", @@ -468,10 +486,11 @@ def _classify_parens_list(inner_list_node, idx, rule_to_type): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(name, rule_types), extraction_lines=lines) -def _classify_repeat(node, idx, rule_to_type, optional): +def _classify_repeat(node, idx, rule_types, optional): """ Shared helper for A* and A+. A* -> OptionalNode(RepeatNode(A)) -> OptionalParseResult wrapping RepeatParseResult. @@ -482,9 +501,9 @@ def _classify_repeat(node, idx, rule_to_type, optional): if not isinstance(node.child, ReferenceNode): return None ref_name = node.child.name - if ref_name not in rule_to_type: + if ref_name not in rule_types: return None - child_type = rule_to_type[ref_name] + child_type = rule_types[ref_name].cpp_type var_name = to_snake_case(ref_name) if optional: lines = [ @@ -505,18 +524,19 @@ def _classify_repeat(node, idx, rule_to_type, optional): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", extraction_lines=lines) + return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(ref_name, rule_types), extraction_lines=lines) -def _classify_star_repeat(node, idx, rule_to_type): - return _classify_repeat(node, idx, rule_to_type, optional=True) +def _classify_star_repeat(node, idx, rule_types): + return _classify_repeat(node, idx, rule_types, optional=True) -def _classify_plus_repeat(node, idx, rule_to_type): - return _classify_repeat(node, idx, rule_to_type, optional=False) +def _classify_plus_repeat(node, idx, rule_types): + return _classify_repeat(node, idx, rule_types, optional=False) -def classify_sequence_element(child, idx, rule_to_type, excluded_rules): +def classify_sequence_element(child, idx, rule_types, excluded_rules): """ Classify one element of a SequenceNode. Mirrors the token-type switch in MatcherFactory::CreateMatcher(). @@ -525,30 +545,30 @@ def classify_sequence_element(child, idx, rule_to_type, excluded_rules): if isinstance(child, LiteralNode): return _classify_literal() if isinstance(child, ReferenceNode): - return _classify_reference(child.name, idx, rule_to_type, excluded_rules) + return _classify_reference(child.name, idx, rule_types, excluded_rules) if isinstance(child, OptionalNode): inner = child.child if isinstance(inner, LiteralNode): return _classify_literal() if isinstance(inner, ReferenceNode): - return _classify_optional_reference(inner.name, idx, rule_to_type, excluded_rules) + return _classify_optional_reference(inner.name, idx, rule_types, excluded_rules) if isinstance(inner, RepeatNode): # A* is represented as OptionalNode(RepeatNode(A)), matching the runtime # OptionalMatcher(RepeatMatcher(A)) structure. Delegate to star-repeat classifier. - return _classify_star_repeat(inner, idx, rule_to_type) + return _classify_star_repeat(inner, idx, rule_types) return None # OptionalNode(ParensNode) etc. - deferred if isinstance(child, RepeatNode): - return _classify_plus_repeat(child, idx, rule_to_type) + return _classify_plus_repeat(child, idx, rule_types) if isinstance(child, ParensNode): if isinstance(child.inner, ListMacroNode): - return _classify_parens_list(child.inner, idx, rule_to_type) - return _classify_parens(child.inner, idx, rule_to_type) + return _classify_parens_list(child.inner, idx, rule_types) + return _classify_parens(child.inner, idx, rule_types) if isinstance(child, ListMacroNode): - return _classify_list_macro(child.inner, idx, rule_to_type) + return _classify_list_macro(child.inner, idx, rule_types) return None -def classify_sequence_elements(children, rule_to_type, excluded_rules): +def classify_sequence_elements(children, rule_types, excluded_rules): """ Classify all children of a SequenceNode. Mirrors the token loop in MatcherFactory::CreateMatcher(). @@ -556,7 +576,7 @@ def classify_sequence_elements(children, rule_to_type, excluded_rules): """ elements = [] for idx, child in enumerate(children): - elem = classify_sequence_element(child, idx, rule_to_type, excluded_rules) + elem = classify_sequence_element(child, idx, rule_types, excluded_rules) if elem is None: return None elements.append(elem) @@ -570,7 +590,14 @@ def classify_sequence_elements(children, rule_to_type, excluded_rules): def generate_sequence_body_decl(rule_name, return_type, elements): """Declaration for the hand-written body that receives extracted typed args.""" - params = ", ".join(f"{e.cpp_type} {e.var_name}" for e in elements if not e.skip) + def _param_decl(e): + # Move-only types (unique_ptr, vector>) are passed by value. + # Everything else (structs, strings, primitives) uses const T & to avoid tidy warnings. + if e.by_value: + return f"{e.cpp_type} {e.var_name}" + return f"const {e.cpp_type} &{e.var_name}" + + params = ", ".join(_param_decl(e) for e in elements if not e.skip) return f"\tstatic {return_type} Transform{rule_name}({params});\n" @@ -592,7 +619,13 @@ def generate_sequence_internal(rule_name, return_type, elements): for elem in semantic: body.extend(elem.extraction_lines) - arg_names = ", ".join(e.var_name for e in semantic) + def _param_arg(e): + # by_value=True means move-only; transfer ownership to body via std::move. + if e.by_value: + return f"std::move({e.var_name})" + return e.var_name + + arg_names = ", ".join(_param_arg(e) for e in semantic) body.append(f"\treturn Transform{rule_name}({arg_names});") return ( f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" @@ -610,7 +643,7 @@ class GramFileResult: manual_bodies: list # (rule_name, reason) — Internal generated, body is hand-written -def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): +def collect_generated(gram_stem, rules, rule_types, excluded_rules): """Classify all rules; return a GramFileResult.""" declarations = [] implementations = [] @@ -631,7 +664,7 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): continue if is_pure_reference_choice(ast): - _, identifier_alts, unknown_alts = classify_choice_alternatives(ast.alternatives, rule_to_type) + _, identifier_alts, unknown_alts = classify_choice_alternatives(ast.alternatives, rule_types) if unknown_alts: skipped.append((rule_name, f"choice has unknown alternatives: {unknown_alts}")) continue @@ -653,7 +686,7 @@ def collect_generated(gram_stem, rules, rule_to_type, excluded_rules): continue if isinstance(ast, SequenceNode): - elements = classify_sequence_elements(ast.children, rule_to_type, excluded_rules) + elements = classify_sequence_elements(ast.children, rule_types, excluded_rules) if elements is not None: declarations.append(generate_internal_declaration(rule_name, return_type)) declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) @@ -721,7 +754,15 @@ def write_cpp_file(implementations, gram_stem): def write_hpp(all_declarations): hpp_path = include_peg_dir / "peg_transformer_generated.hpp" - hpp_path.write_text(GENERATED_HEADER + "".join(all_declarations)) + # This file is #include-d inside the PEGTransformerFactory class body, so it is not a + # valid standalone header. Suppress clang-tidy to avoid false positives from unknown types. + content = ( + GENERATED_HEADER + + "// NOLINTBEGIN\n" + + "".join(all_declarations) + + "// NOLINTEND\n" + ) + hpp_path.write_text(content) print(f"Wrote {hpp_path}") @@ -751,7 +792,7 @@ def print_manual_steps(registrations, gram_stem): ) -def process_gram_file(gram_filename, rule_to_type, excluded_rules): +def process_gram_file(gram_filename, rule_types, excluded_rules): """Parse a .gram file and classify all its rules into a GramFileResult.""" gram_stem = gram_filename.removesuffix('.gram') gram_path = statements_dir / gram_filename @@ -760,11 +801,11 @@ def process_gram_file(gram_filename, rule_to_type, excluded_rules): except Exception as e: raise Exception(f"{gram_filename}: {e}") from None - for rule_name, return_type in rule_to_type.items(): + for rule_name, info in rule_types.items(): if rule_name in rules: - rules[rule_name].return_type = return_type + rules[rule_name].return_type = info.cpp_type - return collect_generated(gram_stem, rules, rule_to_type, excluded_rules) + return collect_generated(gram_stem, rules, rule_types, excluded_rules) def main(): @@ -773,8 +814,8 @@ def main(): args = arg_parser.parse_args() gram_files_to_gen = ['use.gram', 'transaction.gram', 'detach.gram', 'export.gram'] - rule_to_type, excluded_rules = load_grammar_types(type_dir / 'grammar_types.yml') - results = [process_gram_file(f, rule_to_type, excluded_rules) for f in gram_files_to_gen] + rule_types, excluded_rules = load_grammar_types(type_dir / 'grammar_types.yml') + results = [process_gram_file(f, rule_types, excluded_rules) for f in gram_files_to_gen] if args.write: all_declarations = [d for r in results for d in r.declarations] diff --git a/scripts/parser/generate_transformer.py b/scripts/parser/generate_transformer.py index f5ba78362afb..f1bd132ad940 100644 --- a/scripts/parser/generate_transformer.py +++ b/scripts/parser/generate_transformer.py @@ -1,6 +1,7 @@ import argparse import re import sys +from dataclasses import dataclass from pathlib import Path try: @@ -30,11 +31,21 @@ DIRECT_REGISTER_REGEX = re.compile(r'Register\s*\(\s*"(\w+)"\s*,') +@dataclass +class GrammarTypeInfo: + """Per-rule type metadata loaded from grammar_types.yml.""" + + cpp_type: str + by_value: bool = False # True for unique_ptr, vector>, bool, int64_t + + def load_grammar_types(types_file): """ - Loads grammar_types.yml and returns (rule_to_type, excluded_rules) where - rule_to_type maps rule name -> C++ return type, and excluded_rules is the - set of rules that should be skipped during stub generation. + Loads grammar_types.yml and returns (rule_types, excluded_rules) where + rule_types maps rule name -> GrammarTypeInfo (cpp_type + by_value), and excluded_rules is + the set of rules that should be skipped during stub generation. + Override rules default to by_value=False; a startswith('unique_ptr<') fallback covers + any override types that are move-only. """ if yaml is None: print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) @@ -51,25 +62,25 @@ def load_grammar_types(types_file): print(f"Error: {types_file} is malformed (expected a top-level mapping).", file=sys.stderr) sys.exit(1) - rule_to_type = {} + rule_types = {} rule_to_source = {} # tracks where each rule was first seen for error messages duplicates = [] - def register(name, cpp_type, source): + def register(name, cpp_type, by_value, source): name = str(name) - if name in rule_to_type: + if name in rule_types: duplicates.append(f" '{name}' in '{source}' (already listed in '{rule_to_source[name]}')") else: - rule_to_type[name] = str(cpp_type) + rule_types[name] = GrammarTypeInfo(cpp_type=str(cpp_type), by_value=by_value) rule_to_source[name] = source - # Top-level overrides: flat RuleName -> "type" map + # Top-level overrides: flat RuleName -> "type" map (no by_value annotation) overrides = data.get("overrides", {}) if isinstance(overrides, dict): for name, cpp_type in overrides.items(): - register(name, cpp_type, "overrides") + register(name, cpp_type, False, "overrides") - # Category entries: CategoryName -> {type: "...", rules: [...]} + # Category entries: CategoryName -> {type: "...", by_value: bool, rules: [...]} for key, value in data.items(): if key in ("overrides", "excluded_rules"): continue @@ -79,8 +90,9 @@ def register(name, cpp_type, source): rules = value.get("rules", []) if not cpp_type or not isinstance(rules, list): continue + by_value = bool(value.get("by_value", False)) for name in rules: - register(name, cpp_type, key) + register(name, cpp_type, by_value, key) if duplicates: print(f"Error: {types_file} contains duplicate rule listings:", file=sys.stderr) @@ -89,7 +101,7 @@ def register(name, cpp_type, source): sys.exit(1) excluded_rules = set(data.get("excluded_rules", [])) - return rule_to_type, excluded_rules + return rule_types, excluded_rules def find_grammar_rules(grammar_path): @@ -220,10 +232,10 @@ def generate_implementation_stub(rule_name, cpp_type): """ -def generate_code_for_missing_rules(generation_queue, rule_to_type): +def generate_code_for_missing_rules(generation_queue, rule_types): """ Iterates the generation queue and prints stub code, grouped by rule. - Caller is responsible for ensuring all rules have types in rule_to_type. + Caller is responsible for ensuring all rules have entries in rule_types. """ if not generation_queue: print("\nNo missing rules to generate.") @@ -239,7 +251,7 @@ def generate_code_for_missing_rules(generation_queue, rule_to_type): for rule_name, cpp_filename in sorted(rules_to_generate): cpp_path = TRANSFORMER_DIR / cpp_filename - cpp_type = rule_to_type[rule_name] + cpp_type = rule_types[rule_name].cpp_type # Constraint: Do not generate code for non-existent files if not cpp_path.is_file(): @@ -273,7 +285,7 @@ def main(): args = parser.parse_args() - rule_to_type, excluded_rules = load_grammar_types(GRAMMAR_TYPES_FILE) + rule_types, excluded_rules = load_grammar_types(GRAMMAR_TYPES_FILE) grammar_rules_by_file = find_grammar_rules(Path(GRAMMAR_DIR)) transformer_impls = find_transformer_rules(Path(TRANSFORMER_DIR)) enum_rules, registered_rules, directly_registered_rules = find_factory_registrations(Path(FACTORY_REG_FILE)) @@ -411,14 +423,14 @@ def main(): if args.generate: all_rules_to_generate = [r for rules in generation_queue.values() for r in rules] - missing_from_yaml = [r for r in all_rules_to_generate if r not in rule_to_type] + missing_from_yaml = [r for r in all_rules_to_generate if r not in rule_types] if missing_from_yaml: print("\n--- Error: Missing Return Types in grammar_types.yml ---") print("Add the following rules before generating stubs:") for rule in sorted(missing_from_yaml): print(f" {rule}") sys.exit(1) - generate_code_for_missing_rules(generation_queue, rule_to_type) + generate_code_for_missing_rules(generation_queue, rule_types) if __name__ == "__main__": diff --git a/scripts/parser/grammar_types.yml b/scripts/parser/grammar_types.yml index 4f31d9daf5b6..31730aaf0755 100644 --- a/scripts/parser/grammar_types.yml +++ b/scripts/parser/grammar_types.yml @@ -30,11 +30,13 @@ SQLStatementList: type: "vector>" + by_value: true rules: - Program SQLStatement: type: "unique_ptr" + by_value: true rules: - Statement - AlterStatement @@ -82,6 +84,7 @@ SQLStatement: SelectStatement: type: "unique_ptr" + by_value: true rules: - DescribeStatement - PivotStatement @@ -101,12 +104,14 @@ SelectStatement: SetStatement: type: "unique_ptr" + by_value: true rules: - SetTimeZone - StandardAssignment QueryNode: type: "unique_ptr" + by_value: true rules: - ShowSelect - ShowTables @@ -115,6 +120,7 @@ QueryNode: SelectNode: type: "unique_ptr" + by_value: true rules: - SelectFrom - SelectFromClause @@ -123,12 +129,14 @@ SelectNode: SetOperationNode: type: "unique_ptr" + by_value: true rules: - SetopClause - SetIntersectClause CreateStatement: type: "unique_ptr" + by_value: true rules: - CreateSchemaStmt - CreateSecretStmt @@ -143,6 +151,7 @@ CreateStatement: DropStatement: type: "unique_ptr" + by_value: true rules: - DropEntries - DropTable @@ -158,6 +167,7 @@ DropStatement: AlterInfo: type: "unique_ptr" + by_value: true rules: - AlterOptions - AlterTableStmt @@ -170,6 +180,7 @@ AlterInfo: AlterTableInfo: type: "unique_ptr" + by_value: true rules: - AlterTableOptions - AddColumn @@ -193,11 +204,13 @@ AlterTableInfo: CreateTypeInfo: type: "unique_ptr" + by_value: true rules: - CreateType MacroFunction: type: "unique_ptr" + by_value: true rules: - MacroDefinition - TableMacroDefinition @@ -205,6 +218,7 @@ MacroFunction: OnConflictInfo: type: "unique_ptr" + by_value: true rules: - OnConflictClause - OnConflictAction @@ -213,6 +227,7 @@ OnConflictInfo: MergeIntoAction: type: "unique_ptr" + by_value: true rules: - MatchedClauseAction - UpdateMatchClause @@ -228,6 +243,7 @@ MergeIntoAction: UpdateSetInfo: type: "unique_ptr" + by_value: true rules: - UpdateSetClause - UpdateSetTuple @@ -236,6 +252,7 @@ UpdateSetInfo: Constraint: type: "unique_ptr" + by_value: true rules: - TopLevelConstraint - TopLevelConstraintList @@ -246,11 +263,13 @@ Constraint: ForeignKeyConstraint: type: "unique_ptr" + by_value: true rules: - ForeignKeyConstraint WindowExpression: type: "unique_ptr" + by_value: true rules: - OverClause - WindowFrame @@ -262,6 +281,7 @@ WindowExpression: SampleOptions: type: "unique_ptr" + by_value: true rules: - SampleClause - SampleEntry @@ -271,6 +291,7 @@ SampleOptions: ResultModifier: type: "unique_ptr" + by_value: true rules: - LimitOffset - OffsetLimitClause @@ -278,12 +299,14 @@ ResultModifier: AtClause: type: "unique_ptr" + by_value: true rules: - AtClause - AtSpecifier Expression: type: "unique_ptr" + by_value: true rules: - Expression - SingleExpression @@ -421,6 +444,7 @@ Expression: ColumnRefExpression: type: "unique_ptr" + by_value: true rules: - NestedColumnName - CatalogReservedSchemaTableColumnName @@ -429,6 +453,7 @@ ColumnRefExpression: TableRef: type: "unique_ptr" + by_value: true rules: - TableRef - FromClause @@ -456,6 +481,7 @@ TableRef: BaseTableRef: type: "unique_ptr" + by_value: true rules: - BaseTableName - SchemaReservedTable @@ -469,6 +495,7 @@ BaseTableRef: ExpressionList: type: "vector>" + by_value: true rules: - TypeModifiers - BoundedListExpression @@ -495,16 +522,19 @@ ExpressionList: TableRefList: type: "vector>" + by_value: true rules: - DeleteUsingClause ResultModifierList: type: "vector>" + by_value: true rules: - ResultModifiers OrderByNodeList: type: "vector" + by_value: false rules: - OrderByClause - OrderByExpressions @@ -514,11 +544,13 @@ OrderByNodeList: MacroParameterList: type: "vector" + by_value: false rules: - MacroParameters StringList: type: "vector" + by_value: false rules: - ColumnIdList - InsertColumnList @@ -530,6 +562,7 @@ StringList: GenericCopyOptionList: type: "vector" + by_value: true rules: - AttachOptions - GenericCopyOptionList @@ -539,6 +572,7 @@ GenericCopyOptionList: WindowBoundaryExpressionList: type: "vector" + by_value: false rules: - FrameExtent - BetweenFrameExtent @@ -546,12 +580,14 @@ WindowBoundaryExpressionList: PivotColumnList: type: "vector" + by_value: false rules: - PivotColumnList - PivotOn PivotColumnEntryList: type: "vector" + by_value: false rules: - PivotTargetList - UnpivotTargetList @@ -562,6 +598,7 @@ PivotColumnEntryList: QualifiedName: type: "QualifiedName" + by_value: false rules: - QualifiedSequenceName - IdentifierOrStringLiteral @@ -580,11 +617,13 @@ QualifiedName: OrderByNode: type: "OrderByNode" + by_value: false rules: - OrderByExpression GroupByNode: type: "GroupByNode" + by_value: false rules: - GroupByClause - GroupByExpressions @@ -593,6 +632,7 @@ GroupByNode: LimitPercentResult: type: "LimitPercentResult" + by_value: false rules: - LimitClause - LimitValue @@ -604,6 +644,7 @@ LimitPercentResult: DistinctClause: type: "DistinctClause" + by_value: false rules: - DistinctClause - DistinctOn @@ -611,6 +652,7 @@ DistinctClause: TableAlias: type: "TableAlias" + by_value: false rules: - TableAlias - TableAliasAs @@ -618,6 +660,7 @@ TableAlias: JoinPrefix: type: "JoinPrefix" + by_value: false rules: - JoinPrefix - CrossJoinPrefix @@ -626,6 +669,7 @@ JoinPrefix: JoinQualifier: type: "JoinQualifier" + by_value: false rules: - JoinQualifier - OnClause @@ -633,11 +677,13 @@ JoinQualifier: WindowFrame: type: "WindowFrame" + by_value: false rules: - FrameClause WindowBoundaryExpression: type: "WindowBoundaryExpression" + by_value: false rules: - FrameBound - FrameUnbounded @@ -646,6 +692,7 @@ WindowBoundaryExpression: PivotColumn: type: "PivotColumn" + by_value: false rules: - PivotColumnEntry - PivotColumnSubquery @@ -655,6 +702,7 @@ PivotColumn: GenericCopyOption: type: "GenericCopyOption" + by_value: false rules: - SpecializedOption - SingleOption @@ -670,6 +718,7 @@ GenericCopyOption: MacroParameter: type: "MacroParameter" + by_value: false rules: - MacroParameter - SimpleParameter @@ -677,6 +726,7 @@ MacroParameter: TriggerEventInfo: type: "TriggerEventInfo" + by_value: false rules: - TriggerEvent - TriggerEventInsert @@ -686,6 +736,7 @@ TriggerEventInfo: VacuumOptions: type: "VacuumOptions" + by_value: false rules: - VacuumOptions - VacuumLegacyOptions @@ -693,12 +744,14 @@ VacuumOptions: SettingInfo: type: "SettingInfo" + by_value: false rules: - SetSetting - SetVariable PartitionSortedOptions: type: "PartitionSortedOptions" + by_value: false rules: - PartitionSortedOptions - PartitionOptSortedOptions @@ -706,6 +759,7 @@ PartitionSortedOptions: ColumnElements: type: "ColumnElements" + by_value: false rules: - CreateColumnList - CreateTableColumnList @@ -716,6 +770,7 @@ ColumnElements: String: type: "string" + by_value: false rules: - AttachAlias - StringLiteral @@ -776,6 +831,7 @@ String: Bool: type: "bool" + by_value: false rules: - TimeZone - WithOrWithout @@ -797,12 +853,14 @@ Bool: Int64: type: "int64_t" + by_value: false rules: - ArrayBounds - SquareBracketsArray LogicalType: type: "LogicalType" + by_value: false rules: - Type - EnumStringLiteralList diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 4ee9031b80f8..428a71bbbb7b 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -1,35 +1,28 @@ // AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT -static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); -static unique_ptr TransformUseStatement(QualifiedName use_target); -static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); -static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); -static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); -static QualifiedName TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, - vector dot_identifier); -static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); -static string TransformDotIdentifier(string identifier); -static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformBeginTransaction(TransactionModifierType read_or_write); -static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformRollbackTransaction(); -static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformCommitTransaction(); -static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); -static TransactionModifierType TransformReadOrWrite(TransactionModifierType read_only_or_read_write); -static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformDetachStatement(bool if_exists, string catalog_name); -static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformExportStatement(string export_source, string string_literal, - vector generic_copy_option_list); -static string TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result); -static string TransformExportSource(string catalog_name); -static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformImportStatement(string string_literal); +// NOLINTBEGIN + static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformUseStatement(const QualifiedName &use_target); + static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); + static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); + static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); + static QualifiedName TransformUseTargetCatalogSchema(const string &catalog_name, const string &reserved_schema_name, const vector &dot_identifier); + static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); + static string TransformDotIdentifier(const string &identifier); + static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformBeginTransaction(const TransactionModifierType &read_or_write); + static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformRollbackTransaction(); + static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformCommitTransaction(); + static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); + static TransactionModifierType TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write); + static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformDetachStatement(const bool &if_exists, const string &catalog_name); + static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformExportStatement(const string &export_source, const string &string_literal, vector generic_copy_option_list); + static string TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result); + static string TransformExportSource(const string &catalog_name); + static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); + static unique_ptr TransformImportStatement(const string &string_literal); +// NOLINTEND diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt index d5b5042bb954..9f46f3e33c17 100644 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -1,9 +1,11 @@ add_library_unity( - duckdb_parser_peg_transformer_generated OBJECT transform_detach_generated.cpp - transform_export_generated.cpp transform_transaction_generated.cpp + duckdb_parser_peg_transformer_generated + OBJECT + transform_detach_generated.cpp + transform_export_generated.cpp + transform_transaction_generated.cpp transform_use_generated.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} - $ + ${ALL_OBJECT_FILES} $ PARENT_SCOPE) diff --git a/src/parser/peg/transformer/generated/transform_detach_generated.cpp b/src/parser/peg/transformer/generated/transform_detach_generated.cpp index ed13963cd1d0..51ae793fedd6 100644 --- a/src/parser/peg/transformer/generated/transform_detach_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_detach_generated.cpp @@ -3,8 +3,8 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformDetachStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); bool if_exists {}; transformer.TransformOptional(list_pr, 2, if_exists); diff --git a/src/parser/peg/transformer/generated/transform_export_generated.cpp b/src/parser/peg/transformer/generated/transform_export_generated.cpp index 2db9523b4ceb..753ee1c38503 100644 --- a/src/parser/peg/transformer/generated/transform_export_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_export_generated.cpp @@ -3,25 +3,26 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformExportStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); string export_source {}; transformer.TransformOptional(list_pr, 2, export_source); auto string_literal = transformer.Transform(list_pr, 3); vector generic_copy_option_list {}; transformer.TransformOptional(list_pr, 4, generic_copy_option_list); - return TransformExportStatement(export_source, string_literal, generic_copy_option_list); + return TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); } -string PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result) { +string PEGTransformerFactory::TransformExportSourceInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; return TransformExportSource(catalog_name); } -unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformImportStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto string_literal = transformer.Transform(list_pr, 2); return TransformImportStatement(string_literal); diff --git a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp index 57b249bdd22a..19f9d92a264b 100644 --- a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp @@ -3,33 +3,33 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); return transformer.Transform>(choice_pr.GetResult()); } -unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); TransactionModifierType read_or_write {}; transformer.TransformOptional(list_pr, 2, read_or_write); return TransformBeginTransaction(read_or_write); } -unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal( + PEGTransformer &transformer, ParseResult &parse_result) { return TransformRollbackTransaction(); } -unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal( + PEGTransformer &transformer, ParseResult &parse_result) { return TransformCommitTransaction(); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto read_only_or_read_write = transformer.Transform(list_pr, 1); return TransformReadOrWrite(read_only_or_read_write); diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp index 8a312ba2d927..1df393d6c595 100644 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -3,22 +3,22 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformUseStatementInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto use_target = transformer.Transform(list_pr, 1); return TransformUseStatement(use_target); } -QualifiedName PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +QualifiedName PEGTransformerFactory::TransformUseTargetInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); return TransformUseTarget(transformer, choice_pr.GetResult()); } -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; auto reserved_schema_name = list_pr.Child(2).identifier; @@ -33,7 +33,8 @@ QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEG return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); } -string PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result) { +string PEGTransformerFactory::TransformDotIdentifierInternal( + PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto identifier = list_pr.Child(1).identifier; return TransformDotIdentifier(identifier); diff --git a/src/parser/peg/transformer/transform_detach.cpp b/src/parser/peg/transformer/transform_detach.cpp index a7364bdc2ff4..f4dc837749af 100644 --- a/src/parser/peg/transformer/transform_detach.cpp +++ b/src/parser/peg/transformer/transform_detach.cpp @@ -3,11 +3,11 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformDetachStatement(bool if_exists, string catalog_name) { +unique_ptr PEGTransformerFactory::TransformDetachStatement(const bool &if_exists, const string &catalog_name) { auto result = make_uniq(); auto info = make_uniq(); info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; - info->name = std::move(catalog_name); + info->name = catalog_name; result->info = std::move(info); return std::move(result); } diff --git a/src/parser/peg/transformer/transform_export.cpp b/src/parser/peg/transformer/transform_export.cpp index 6bf8c8f54605..5ace7a4cd60a 100644 --- a/src/parser/peg/transformer/transform_export.cpp +++ b/src/parser/peg/transformer/transform_export.cpp @@ -4,14 +4,15 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatement(string export_source, string file_path, - vector options) { +unique_ptr PEGTransformerFactory::TransformExportStatement(const string &export_source, + const string &string_literal, + vector generic_copy_option_list) { auto info = make_uniq(); - info->file_path = std::move(file_path); + info->file_path = string_literal; info->format = "csv"; info->is_from = false; - for (auto &option : options) { + for (auto &option : generic_copy_option_list) { if (option.name == "format") { info->format = option.children[0].GetValue(); info->is_format_auto_detected = false; @@ -27,7 +28,7 @@ unique_ptr PEGTransformerFactory::TransformExportStatement(string return std::move(result); } -string PEGTransformerFactory::TransformExportSource(string catalog_name) { +string PEGTransformerFactory::TransformExportSource(const string &catalog_name) { return catalog_name; } diff --git a/src/parser/peg/transformer/transform_import.cpp b/src/parser/peg/transformer/transform_import.cpp index e00d5a560421..1d54d470dae1 100644 --- a/src/parser/peg/transformer/transform_import.cpp +++ b/src/parser/peg/transformer/transform_import.cpp @@ -3,10 +3,10 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformImportStatement(string import) { +unique_ptr PEGTransformerFactory::TransformImportStatement(const string &string_literal) { auto result = make_uniq(); result->info->name = "import_database"; - result->info->parameters.emplace_back(make_uniq(Value(std::move(import)))); + result->info->parameters.emplace_back(make_uniq(Value(string_literal))); return std::move(result); } diff --git a/src/parser/peg/transformer/transform_transaction.cpp b/src/parser/peg/transformer/transform_transaction.cpp index c0a4d2614790..37bc8e2b7e7c 100644 --- a/src/parser/peg/transformer/transform_transaction.cpp +++ b/src/parser/peg/transformer/transform_transaction.cpp @@ -3,13 +3,13 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformBeginTransaction(TransactionModifierType read_or_write) { +unique_ptr PEGTransformerFactory::TransformBeginTransaction(const TransactionModifierType &read_or_write) { auto info = make_uniq(TransactionType::BEGIN_TRANSACTION); info->modifier = read_or_write; return make_uniq(std::move(info)); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWrite(TransactionModifierType read_only_or_read_write) { +TransactionModifierType PEGTransformerFactory::TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write) { return read_only_or_read_write; } diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index 2ba54e821abf..ac2bc4dfcd43 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -4,7 +4,7 @@ namespace duckdb { // UseStatement <- 'USE' UseTarget -unique_ptr PEGTransformerFactory::TransformUseStatement(QualifiedName use_target) { +unique_ptr PEGTransformerFactory::TransformUseStatement(const QualifiedName &use_target) { string value_str; if (IsInvalidSchema(use_target.schema)) { value_str = SQLIdentifier::ToString(use_target.name); @@ -27,19 +27,20 @@ QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transfor } // UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(string catalog_name, string reserved_schema_name, - vector dot_identifier) { +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(const string &catalog_name, + const string &reserved_schema_name, + const vector &dot_identifier) { if (!dot_identifier.empty()) { throw ParserException("Expected \"USE database\" or \"USE database.schema\""); } QualifiedName result; result.catalog = INVALID_CATALOG; - result.schema = std::move(catalog_name); - result.name = std::move(reserved_schema_name); + result.schema = catalog_name; + result.name = reserved_schema_name; return result; } -string PEGTransformerFactory::TransformDotIdentifier(string identifier) { +string PEGTransformerFactory::TransformDotIdentifier(const string &identifier) { return identifier; } } // namespace duckdb From b139b063201fd0785bccdb98b0c170afac5959ff Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 13:21:09 +0200 Subject: [PATCH 44/56] Format fix --- scripts/parser/gen_transformer_v2.py | 62 +++++++++++++------ .../transformer/peg_transformer_generated.hpp | 59 ++++++++++-------- .../peg/transformer/generated/CMakeLists.txt | 10 ++- .../generated/transform_detach_generated.cpp | 4 +- .../generated/transform_export_generated.cpp | 11 ++-- .../transform_transaction_generated.cpp | 20 +++--- .../generated/transform_use_generated.cpp | 15 +++-- .../peg/transformer/transform_detach.cpp | 3 +- .../peg/transformer/transform_export.cpp | 6 +- .../peg/transformer/transform_transaction.cpp | 6 +- 10 files changed, 115 insertions(+), 81 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index b65846b19602..b3193ac3ad58 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -377,8 +377,13 @@ def _classify_reference(name, idx, rule_types, excluded_rules): cpp_type = rule_types[name].cpp_type var_name = to_snake_case(name) lines = [f"\tauto {var_name} = transformer.Transform<{cpp_type}>(list_pr, {idx});"] - return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, - by_value=_is_by_value(name, rule_types), extraction_lines=lines) + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) return None @@ -408,8 +413,13 @@ def _classify_optional_reference(name, idx, rule_types, excluded_rules): f"\t{cpp_type} {var_name} {{}};", f"\ttransformer.TransformOptional(list_pr, {idx}, {var_name});", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, - by_value=_is_by_value(name, rule_types), extraction_lines=lines) + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) return None @@ -435,8 +445,13 @@ def _classify_parens(inner_node, idx, rule_types): f"\tauto {var_name} = transformer.Transform<{cpp_type}>" f"(ExtractResultFromParens(list_pr.GetChild({idx})));", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=cpp_type, - by_value=_is_by_value(name, rule_types), extraction_lines=lines) + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) return None @@ -461,8 +476,13 @@ def _classify_list_macro(inner_node, idx, rule_types): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", - by_value=_is_by_value(name, rule_types), extraction_lines=lines) + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) def _classify_parens_list(inner_list_node, idx, rule_types): @@ -486,8 +506,13 @@ def _classify_parens_list(inner_list_node, idx, rule_types): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", - by_value=_is_by_value(name, rule_types), extraction_lines=lines) + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) def _classify_repeat(node, idx, rule_types, optional): @@ -524,8 +549,13 @@ def _classify_repeat(node, idx, rule_types, optional): f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", f"\t}}", ] - return SeqElement(skip=False, var_name=var_name, cpp_type=f"vector<{child_type}>", - by_value=_is_by_value(ref_name, rule_types), extraction_lines=lines) + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(ref_name, rule_types), + extraction_lines=lines, + ) def _classify_star_repeat(node, idx, rule_types): @@ -590,6 +620,7 @@ def classify_sequence_elements(children, rule_types, excluded_rules): def generate_sequence_body_decl(rule_name, return_type, elements): """Declaration for the hand-written body that receives extracted typed args.""" + def _param_decl(e): # Move-only types (unique_ptr, vector>) are passed by value. # Everything else (structs, strings, primitives) uses const T & to avoid tidy warnings. @@ -756,12 +787,7 @@ def write_hpp(all_declarations): hpp_path = include_peg_dir / "peg_transformer_generated.hpp" # This file is #include-d inside the PEGTransformerFactory class body, so it is not a # valid standalone header. Suppress clang-tidy to avoid false positives from unknown types. - content = ( - GENERATED_HEADER - + "// NOLINTBEGIN\n" - + "".join(all_declarations) - + "// NOLINTEND\n" - ) + content = GENERATED_HEADER + "// NOLINTBEGIN\n" + "".join(all_declarations) + "// NOLINTEND\n" hpp_path.write_text(content) print(f"Wrote {hpp_path}") diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index 428a71bbbb7b..c8725da46afb 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -1,28 +1,37 @@ // AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT // NOLINTBEGIN - static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformUseStatement(const QualifiedName &use_target); - static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); - static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); - static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); - static QualifiedName TransformUseTargetCatalogSchema(const string &catalog_name, const string &reserved_schema_name, const vector &dot_identifier); - static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); - static string TransformDotIdentifier(const string &identifier); - static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformBeginTransaction(const TransactionModifierType &read_or_write); - static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformRollbackTransaction(); - static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformCommitTransaction(); - static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); - static TransactionModifierType TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write); - static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformDetachStatement(const bool &if_exists, const string &catalog_name); - static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformExportStatement(const string &export_source, const string &string_literal, vector generic_copy_option_list); - static string TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result); - static string TransformExportSource(const string &catalog_name); - static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformImportStatement(const string &string_literal); +static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformUseStatement(const QualifiedName &use_target); +static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); +static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); +static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); +static QualifiedName TransformUseTargetCatalogSchema(const string &catalog_name, const string &reserved_schema_name, + const vector &dot_identifier); +static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); +static string TransformDotIdentifier(const string &identifier); +static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransaction(const TransactionModifierType &read_or_write); +static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformRollbackTransaction(); +static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformCommitTransaction(); +static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); +static TransactionModifierType TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write); +static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformDetachStatement(const bool &if_exists, const string &catalog_name); +static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformExportStatement(const string &export_source, const string &string_literal, + vector generic_copy_option_list); +static string TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result); +static string TransformExportSource(const string &catalog_name); +static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformImportStatement(const string &string_literal); // NOLINTEND diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt index 9f46f3e33c17..d5b5042bb954 100644 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ b/src/parser/peg/transformer/generated/CMakeLists.txt @@ -1,11 +1,9 @@ add_library_unity( - duckdb_parser_peg_transformer_generated - OBJECT - transform_detach_generated.cpp - transform_export_generated.cpp - transform_transaction_generated.cpp + duckdb_parser_peg_transformer_generated OBJECT transform_detach_generated.cpp + transform_export_generated.cpp transform_transaction_generated.cpp transform_use_generated.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ + ${ALL_OBJECT_FILES} + $ PARENT_SCOPE) diff --git a/src/parser/peg/transformer/generated/transform_detach_generated.cpp b/src/parser/peg/transformer/generated/transform_detach_generated.cpp index 51ae793fedd6..ed13963cd1d0 100644 --- a/src/parser/peg/transformer/generated/transform_detach_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_detach_generated.cpp @@ -3,8 +3,8 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformDetachStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); bool if_exists {}; transformer.TransformOptional(list_pr, 2, if_exists); diff --git a/src/parser/peg/transformer/generated/transform_export_generated.cpp b/src/parser/peg/transformer/generated/transform_export_generated.cpp index 753ee1c38503..01b8a39bac2f 100644 --- a/src/parser/peg/transformer/generated/transform_export_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_export_generated.cpp @@ -3,8 +3,8 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); string export_source {}; transformer.TransformOptional(list_pr, 2, export_source); @@ -14,15 +14,14 @@ unique_ptr PEGTransformerFactory::TransformExportStatementInternal return TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); } -string PEGTransformerFactory::TransformExportSourceInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +string PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; return TransformExportSource(catalog_name); } -unique_ptr PEGTransformerFactory::TransformImportStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto string_literal = transformer.Transform(list_pr, 2); return TransformImportStatement(string_literal); diff --git a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp index 19f9d92a264b..57b249bdd22a 100644 --- a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp @@ -3,33 +3,33 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); return transformer.Transform>(choice_pr.GetResult()); } -unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); TransactionModifierType read_or_write {}; transformer.TransformOptional(list_pr, 2, read_or_write); return TransformBeginTransaction(read_or_write); } -unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { return TransformRollbackTransaction(); } -unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { return TransformCommitTransaction(); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto read_only_or_read_write = transformer.Transform(list_pr, 1); return TransformReadOrWrite(read_only_or_read_write); diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp index 1df393d6c595..8a312ba2d927 100644 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ b/src/parser/peg/transformer/generated/transform_use_generated.cpp @@ -3,22 +3,22 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformUseStatementInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto use_target = transformer.Transform(list_pr, 1); return TransformUseStatement(use_target); } -QualifiedName PEGTransformerFactory::TransformUseTargetInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +QualifiedName PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); return TransformUseTarget(transformer, choice_pr.GetResult()); } -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; auto reserved_schema_name = list_pr.Child(2).identifier; @@ -33,8 +33,7 @@ QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal( return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); } -string PEGTransformerFactory::TransformDotIdentifierInternal( - PEGTransformer &transformer, ParseResult &parse_result) { +string PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto identifier = list_pr.Child(1).identifier; return TransformDotIdentifier(identifier); diff --git a/src/parser/peg/transformer/transform_detach.cpp b/src/parser/peg/transformer/transform_detach.cpp index f4dc837749af..ecd7c7b33005 100644 --- a/src/parser/peg/transformer/transform_detach.cpp +++ b/src/parser/peg/transformer/transform_detach.cpp @@ -3,7 +3,8 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformDetachStatement(const bool &if_exists, const string &catalog_name) { +unique_ptr PEGTransformerFactory::TransformDetachStatement(const bool &if_exists, + const string &catalog_name) { auto result = make_uniq(); auto info = make_uniq(); info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; diff --git a/src/parser/peg/transformer/transform_export.cpp b/src/parser/peg/transformer/transform_export.cpp index 5ace7a4cd60a..18b2a71e12d2 100644 --- a/src/parser/peg/transformer/transform_export.cpp +++ b/src/parser/peg/transformer/transform_export.cpp @@ -4,9 +4,9 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatement(const string &export_source, - const string &string_literal, - vector generic_copy_option_list) { +unique_ptr +PEGTransformerFactory::TransformExportStatement(const string &export_source, const string &string_literal, + vector generic_copy_option_list) { auto info = make_uniq(); info->file_path = string_literal; info->format = "csv"; diff --git a/src/parser/peg/transformer/transform_transaction.cpp b/src/parser/peg/transformer/transform_transaction.cpp index 37bc8e2b7e7c..fedb2e773e18 100644 --- a/src/parser/peg/transformer/transform_transaction.cpp +++ b/src/parser/peg/transformer/transform_transaction.cpp @@ -3,13 +3,15 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformBeginTransaction(const TransactionModifierType &read_or_write) { +unique_ptr +PEGTransformerFactory::TransformBeginTransaction(const TransactionModifierType &read_or_write) { auto info = make_uniq(TransactionType::BEGIN_TRANSACTION); info->modifier = read_or_write; return make_uniq(std::move(info)); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write) { +TransactionModifierType +PEGTransformerFactory::TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write) { return read_only_or_read_write; } From 100399dcc9defdaf22d9c51007aabfb9dd261390 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 13:36:10 +0200 Subject: [PATCH 45/56] add define guard to fix tidy check --- scripts/parser/gen_transformer_v2.py | 14 +++++++++++--- .../parser/peg/transformer/peg_transformer.hpp | 2 ++ .../peg/transformer/peg_transformer_generated.hpp | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index b3193ac3ad58..a08db28fb826 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -785,9 +785,17 @@ def write_cpp_file(implementations, gram_stem): def write_hpp(all_declarations): hpp_path = include_peg_dir / "peg_transformer_generated.hpp" - # This file is #include-d inside the PEGTransformerFactory class body, so it is not a - # valid standalone header. Suppress clang-tidy to avoid false positives from unknown types. - content = GENERATED_HEADER + "// NOLINTBEGIN\n" + "".join(all_declarations) + "// NOLINTEND\n" + # This file is #include-d inside the PEGTransformerFactory class body, so it cannot be a + # valid standalone header (types like SQLStatement are only in scope inside the class). + # The #ifdef guard makes the file a no-op when clang-tidy processes it standalone, + # preventing false compilation errors. The guard is defined by peg_transformer.hpp + # immediately before the #include. + content = ( + GENERATED_HEADER + + "#ifdef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP\n" + + "".join(all_declarations) + + "#endif // DUCKDB_INSIDE_PEG_TRANSFORMER_HPP\n" + ) hpp_path.write_text(content) print(f"Wrote {hpp_path}") diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 62bc1d7f80c8..1e706d4decf0 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -1210,7 +1210,9 @@ class PEGTransformerFactory { static string TransformVacuumOption(PEGTransformer &transformer, ParseResult &parse_result); static vector TransformNameList(PEGTransformer &transformer, ParseResult &parse_result); +#define DUCKDB_INSIDE_PEG_TRANSFORMER_HPP #include "duckdb/parser/peg/transformer/peg_transformer_generated.hpp" +#undef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP private: PEGParser parser; diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index c8725da46afb..eec8d4af9528 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -1,5 +1,5 @@ // AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT -// NOLINTBEGIN +#ifdef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformUseStatement(const QualifiedName &use_target); static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); @@ -34,4 +34,4 @@ static string TransformExportSource(const string &catalog_name); static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformImportStatement(const string &string_literal); -// NOLINTEND +#endif // DUCKDB_INSIDE_PEG_TRANSFORMER_HPP From 77f2a6e9c127b81b2ef30205a24ca4ee54e93a50 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 13:55:58 +0200 Subject: [PATCH 46/56] Move generated rules to a single file --- scripts/parser/gen_transformer_v2.py | 76 ++++--------- src/parser/peg/transformer/CMakeLists.txt | 3 +- .../peg/transformer/generated/CMakeLists.txt | 9 -- .../generated/transform_detach_generated.cpp | 15 --- .../generated/transform_export_generated.cpp | 30 ----- .../transform_transaction_generated.cpp | 38 ------- .../generated/transform_use_generated.cpp | 42 ------- .../peg/transformer/transform_generated.cpp | 107 ++++++++++++++++++ 8 files changed, 132 insertions(+), 188 deletions(-) delete mode 100644 src/parser/peg/transformer/generated/CMakeLists.txt delete mode 100644 src/parser/peg/transformer/generated/transform_detach_generated.cpp delete mode 100644 src/parser/peg/transformer/generated/transform_export_generated.cpp delete mode 100644 src/parser/peg/transformer/generated/transform_transaction_generated.cpp delete mode 100644 src/parser/peg/transformer/generated/transform_use_generated.cpp create mode 100644 src/parser/peg/transformer/transform_generated.cpp diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index a08db28fb826..e28505475e10 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -219,7 +219,6 @@ def rule_to_ast(rule): statements_dir = peg_dir / 'grammar' / 'statements' type_dir = scripts_dir / 'parser' transformer_dir = peg_dir / 'transformer' -generated_dir = transformer_dir / 'generated' include_peg_dir = src_dir / 'include' / 'duckdb' / 'parser' / 'peg' / 'transformer' GENERATED_HEADER = "// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT\n" @@ -753,33 +752,16 @@ def print_output(result: GramFileResult): print("".join(result.registrations)) -def cpp_file_content(implementations): - return ( +def write_cpp(all_implementations): + cpp_path = transformer_dir / "transform_generated.cpp" + content = ( GENERATED_HEADER + '#include "duckdb/parser/peg/transformer/peg_transformer.hpp"\n' + "\nnamespace duckdb {\n\n" - + "\n".join(implementations) + + "\n".join(all_implementations) + "\n} // namespace duckdb\n" ) - - -def cmake_content(cpp_filenames): - files = "\n ".join(cpp_filenames) - return ( - "add_library_unity(\n" - " duckdb_parser_peg_transformer_generated\n" - " OBJECT\n" - f" {files})\n\n" - "set(ALL_OBJECT_FILES\n" - " ${ALL_OBJECT_FILES} $\n" - " PARENT_SCOPE)\n" - ) - - -def write_cpp_file(implementations, gram_stem): - generated_dir.mkdir(parents=True, exist_ok=True) - cpp_path = generated_dir / f"transform_{gram_stem}_generated.cpp" - cpp_path.write_text(cpp_file_content(implementations)) + cpp_path.write_text(content) print(f"Wrote {cpp_path}") @@ -800,30 +782,22 @@ def write_hpp(all_declarations): print(f"Wrote {hpp_path}") -def write_cmake(): - existing_cpp = sorted(p.name for p in generated_dir.glob("*_generated.cpp")) - cmake_path = generated_dir / "CMakeLists.txt" - cmake_path.write_text(cmake_content(existing_cpp)) - print(f"Wrote {cmake_path}") - - -def print_manual_steps(registrations, gram_stem): - reg_lines = "".join(f" {r.strip()}\n" for r in registrations) - print( - f""" -Remaining manual steps: - 1. In {include_peg_dir / 'peg_transformer.hpp'}: - - Add inside class PEGTransformerFactory: - #include "duckdb/parser/peg/transformer/peg_transformer_generated.hpp" - - Remove any declarations now covered by peg_transformer_generated.hpp - 2. In {transformer_dir / 'CMakeLists.txt'}: - - Add: add_subdirectory(generated) - 3. In peg_transformer_factory.cpp Register{gram_stem.capitalize()}(): - - Replace REGISTER_TRANSFORM macros for generated rules with: -{reg_lines} 4. In transform_{gram_stem}.cpp: - - Remove Internal wrappers now generated (keep only hand-written bodies) - - Update body function signatures to match the generated declarations""" - ) +def print_manual_steps(all_results): + print("\nRemaining manual steps:") + print(f" 1. In {include_peg_dir / 'peg_transformer.hpp'}:") + print(" - The #include of peg_transformer_generated.hpp must remain inside the class body") + print(f" 2. In {transformer_dir / 'CMakeLists.txt'}:") + print(" - Ensure transform_generated.cpp is listed in add_library_unity()") + for r in all_results: + if not r.registrations: + continue + reg_lines = "".join(f" {r.strip()}\n" for r in r.registrations) + print(f" 3. In peg_transformer_factory.cpp Register{r.gram_stem.capitalize()}():") + print(f" - Replace REGISTER_TRANSFORM macros for generated rules with:") + print(reg_lines, end="") + print(f" 4. In transform_{r.gram_stem}.cpp:") + print(" - Remove Internal wrappers now generated (keep only hand-written bodies)") + print(" - Update body function signatures to match the generated declarations") def process_gram_file(gram_filename, rule_types, excluded_rules): @@ -854,11 +828,9 @@ def main(): if args.write: all_declarations = [d for r in results for d in r.declarations] write_hpp(all_declarations) - for r in results: - write_cpp_file(r.implementations, r.gram_stem) - write_cmake() - for r in results: - print_manual_steps(r.registrations, r.gram_stem) + all_implementations = [impl for r in results for impl in r.implementations] + write_cpp(all_implementations) + print_manual_steps(results) else: for r in results: print(f"\n{'=' * 60}") diff --git a/src/parser/peg/transformer/CMakeLists.txt b/src/parser/peg/transformer/CMakeLists.txt index 48d7d3ab895c..9b11e62c22eb 100644 --- a/src/parser/peg/transformer/CMakeLists.txt +++ b/src/parser/peg/transformer/CMakeLists.txt @@ -1,10 +1,9 @@ -add_subdirectory(generated) - add_library_unity( duckdb_parser_peg_transformer OBJECT peg_transformer.cpp peg_transformer_factory.cpp + transform_generated.cpp transform_alter.cpp transform_analyze.cpp transform_attach.cpp diff --git a/src/parser/peg/transformer/generated/CMakeLists.txt b/src/parser/peg/transformer/generated/CMakeLists.txt deleted file mode 100644 index d5b5042bb954..000000000000 --- a/src/parser/peg/transformer/generated/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_library_unity( - duckdb_parser_peg_transformer_generated OBJECT transform_detach_generated.cpp - transform_export_generated.cpp transform_transaction_generated.cpp - transform_use_generated.cpp) - -set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} - $ - PARENT_SCOPE) diff --git a/src/parser/peg/transformer/generated/transform_detach_generated.cpp b/src/parser/peg/transformer/generated/transform_detach_generated.cpp deleted file mode 100644 index ed13963cd1d0..000000000000 --- a/src/parser/peg/transformer/generated/transform_detach_generated.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT -#include "duckdb/parser/peg/transformer/peg_transformer.hpp" - -namespace duckdb { - -unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - bool if_exists {}; - transformer.TransformOptional(list_pr, 2, if_exists); - auto catalog_name = list_pr.Child(3).identifier; - return TransformDetachStatement(if_exists, catalog_name); -} - -} // namespace duckdb diff --git a/src/parser/peg/transformer/generated/transform_export_generated.cpp b/src/parser/peg/transformer/generated/transform_export_generated.cpp deleted file mode 100644 index 01b8a39bac2f..000000000000 --- a/src/parser/peg/transformer/generated/transform_export_generated.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT -#include "duckdb/parser/peg/transformer/peg_transformer.hpp" - -namespace duckdb { - -unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - string export_source {}; - transformer.TransformOptional(list_pr, 2, export_source); - auto string_literal = transformer.Transform(list_pr, 3); - vector generic_copy_option_list {}; - transformer.TransformOptional(list_pr, 4, generic_copy_option_list); - return TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); -} - -string PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto catalog_name = list_pr.Child(0).identifier; - return TransformExportSource(catalog_name); -} - -unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto string_literal = transformer.Transform(list_pr, 2); - return TransformImportStatement(string_literal); -} - -} // namespace duckdb diff --git a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp b/src/parser/peg/transformer/generated/transform_transaction_generated.cpp deleted file mode 100644 index 57b249bdd22a..000000000000 --- a/src/parser/peg/transformer/generated/transform_transaction_generated.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT -#include "duckdb/parser/peg/transformer/peg_transformer.hpp" - -namespace duckdb { - -unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto &choice_pr = list_pr.Child(0); - return transformer.Transform>(choice_pr.GetResult()); -} - -unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - TransactionModifierType read_or_write {}; - transformer.TransformOptional(list_pr, 2, read_or_write); - return TransformBeginTransaction(read_or_write); -} - -unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - return TransformRollbackTransaction(); -} - -unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - return TransformCommitTransaction(); -} - -TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto read_only_or_read_write = transformer.Transform(list_pr, 1); - return TransformReadOrWrite(read_only_or_read_write); -} - -} // namespace duckdb diff --git a/src/parser/peg/transformer/generated/transform_use_generated.cpp b/src/parser/peg/transformer/generated/transform_use_generated.cpp deleted file mode 100644 index 8a312ba2d927..000000000000 --- a/src/parser/peg/transformer/generated/transform_use_generated.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT -#include "duckdb/parser/peg/transformer/peg_transformer.hpp" - -namespace duckdb { - -unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto use_target = transformer.Transform(list_pr, 1); - return TransformUseStatement(use_target); -} - -QualifiedName PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto &choice_pr = list_pr.Child(0); - return TransformUseTarget(transformer, choice_pr.GetResult()); -} - -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto catalog_name = list_pr.Child(0).identifier; - auto reserved_schema_name = list_pr.Child(2).identifier; - auto &dot_identifier_opt = list_pr.Child(3); - vector dot_identifier; - if (dot_identifier_opt.HasResult()) { - auto &dot_identifier_repeat = dot_identifier_opt.GetResult().Cast(); - for (auto &dot_identifier_item : dot_identifier_repeat.GetChildren()) { - dot_identifier.push_back(transformer.Transform(dot_identifier_item)); - } - } - return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); -} - -string PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto identifier = list_pr.Child(1).identifier; - return TransformDotIdentifier(identifier); -} - -} // namespace duckdb diff --git a/src/parser/peg/transformer/transform_generated.cpp b/src/parser/peg/transformer/transform_generated.cpp new file mode 100644 index 000000000000..3ba5bfd91d31 --- /dev/null +++ b/src/parser/peg/transformer/transform_generated.cpp @@ -0,0 +1,107 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#include "duckdb/parser/peg/transformer/peg_transformer.hpp" + +namespace duckdb { + +unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto use_target = transformer.Transform(list_pr, 1); + return TransformUseStatement(use_target); +} + +QualifiedName PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto &choice_pr = list_pr.Child(0); + return TransformUseTarget(transformer, choice_pr.GetResult()); +} + +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto catalog_name = list_pr.Child(0).identifier; + auto reserved_schema_name = list_pr.Child(2).identifier; + auto &dot_identifier_opt = list_pr.Child(3); + vector dot_identifier; + if (dot_identifier_opt.HasResult()) { + auto &dot_identifier_repeat = dot_identifier_opt.GetResult().Cast(); + for (auto &dot_identifier_item : dot_identifier_repeat.GetChildren()) { + dot_identifier.push_back(transformer.Transform(dot_identifier_item)); + } + } + return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); +} + +string PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto identifier = list_pr.Child(1).identifier; + return TransformDotIdentifier(identifier); +} + +unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto &choice_pr = list_pr.Child(0); + return transformer.Transform>(choice_pr.GetResult()); +} + +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + TransactionModifierType read_or_write {}; + transformer.TransformOptional(list_pr, 2, read_or_write); + return TransformBeginTransaction(read_or_write); +} + +unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + return TransformRollbackTransaction(); +} + +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + return TransformCommitTransaction(); +} + +TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto read_only_or_read_write = transformer.Transform(list_pr, 1); + return TransformReadOrWrite(read_only_or_read_write); +} + +unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + bool if_exists {}; + transformer.TransformOptional(list_pr, 2, if_exists); + auto catalog_name = list_pr.Child(3).identifier; + return TransformDetachStatement(if_exists, catalog_name); +} + +unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + string export_source {}; + transformer.TransformOptional(list_pr, 2, export_source); + auto string_literal = transformer.Transform(list_pr, 3); + vector generic_copy_option_list {}; + transformer.TransformOptional(list_pr, 4, generic_copy_option_list); + return TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); +} + +string PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto catalog_name = list_pr.Child(0).identifier; + return TransformExportSource(catalog_name); +} + +unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto string_literal = transformer.Transform(list_pr, 2); + return TransformImportStatement(string_literal); +} + +} // namespace duckdb From b57d8bed885eafa8444972ed117ed2c445877d25 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 14:39:15 +0200 Subject: [PATCH 47/56] Auto register the generated rules --- scripts/parser/gen_transformer_v2.py | 56 ++++++--- .../peg/transformer/peg_transformer.hpp | 13 ++- .../transformer/peg_transformer_generated.hpp | 46 ++++---- .../transformer/peg_transformer_factory.cpp | 33 +----- .../peg/transformer/transform_generated.cpp | 110 ++++++++++++------ 5 files changed, 147 insertions(+), 111 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index e28505475e10..1a4e1d5bc16b 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -229,15 +229,15 @@ def to_snake_case(name): return re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s1).lower() -def generate_internal_declaration(rule_name, return_type): +def generate_internal_declaration(rule_name): return ( - f"\tstatic {return_type} Transform{rule_name}Internal" + f"\tstatic unique_ptr Transform{rule_name}Internal" f"(PEGTransformer &transformer, ParseResult &parse_result);\n" ) def generate_registration(rule_name): - return f'Register("{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal);\n' + return f'\t{{"{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal}},\n' # --------------------------------------------------------------------------- @@ -275,14 +275,15 @@ def classify_choice_alternatives(alternatives, rule_types): def generate_choice_internal_full(rule_name, return_type): """ Fully auto-generated Internal for a pure-transformer choice rule. - All alternatives have registered transformers so we can delegate directly. + Static class member matching transform_function_t for the static TransformRule table. """ return ( - f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" f"\tauto &list_pr = parse_result.Cast();\n" f"\tauto &choice_pr = list_pr.Child(0);\n" - f"\treturn transformer.Transform<{return_type}>(choice_pr.GetResult());\n" + f"\tauto result = transformer.Transform<{return_type}>(choice_pr.GetResult());\n" + f"\treturn make_uniq>(std::move(result));\n" f"}}\n" ) @@ -290,14 +291,15 @@ def generate_choice_internal_full(rule_name, return_type): def generate_choice_internal_with_body(rule_name, return_type): """ Internal for a choice rule that has identifier-override alternatives. - Extracts the ChoiceParseResult then delegates to a hand-written body. + Static class member matching transform_function_t for the static TransformRule table. """ return ( - f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" f"\tauto &list_pr = parse_result.Cast();\n" f"\tauto &choice_pr = list_pr.Child(0);\n" - f"\treturn Transform{rule_name}(transformer, choice_pr.GetResult());\n" + f"\tauto result = Transform{rule_name}(transformer, choice_pr.GetResult());\n" + f"\treturn make_uniq>(std::move(result));\n" f"}}\n" ) @@ -633,9 +635,9 @@ def _param_decl(e): def generate_sequence_internal(rule_name, return_type, elements): """ - Generate the Internal wrapper for a sequence rule. - Casts parse_result to ListParseResult, extracts each semantic element - into a typed local variable, then calls the hand-written body with those args. + Generate the Internal static class member for a sequence rule. + Returns unique_ptr matching transform_function_t for the static table. + Extracts typed args from parse_result, calls the hand-written body, then boxes via TypedTransformResult. """ semantic = [e for e in elements if not e.skip] has_semantic_elements = len(semantic) > 0 @@ -656,9 +658,10 @@ def _param_arg(e): return e.var_name arg_names = ", ".join(_param_arg(e) for e in semantic) - body.append(f"\treturn Transform{rule_name}({arg_names});") + body.append(f"\tauto result = Transform{rule_name}({arg_names});") + body.append(f"\treturn make_uniq>(std::move(result));") return ( - f"{return_type} PEGTransformerFactory::Transform{rule_name}Internal(\n" + f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + "\n".join(body) + "\n}\n" ) @@ -699,7 +702,7 @@ def collect_generated(gram_stem, rules, rule_types, excluded_rules): skipped.append((rule_name, f"choice has unknown alternatives: {unknown_alts}")) continue - declarations.append(generate_internal_declaration(rule_name, return_type)) + declarations.append(generate_internal_declaration(rule_name)) registrations.append(generate_registration(rule_name)) if not identifier_alts: @@ -718,7 +721,7 @@ def collect_generated(gram_stem, rules, rule_types, excluded_rules): if isinstance(ast, SequenceNode): elements = classify_sequence_elements(ast.children, rule_types, excluded_rules) if elements is not None: - declarations.append(generate_internal_declaration(rule_name, return_type)) + declarations.append(generate_internal_declaration(rule_name)) declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) implementations.append(generate_sequence_internal(rule_name, return_type, elements)) registrations.append(generate_registration(rule_name)) @@ -752,13 +755,29 @@ def print_output(result: GramFileResult): print("".join(result.registrations)) -def write_cpp(all_implementations): +def generate_table_and_register(all_registrations): + entries = "".join("\t\t" + e.lstrip() for e in all_registrations) + return ( + "void PEGTransformerFactory::RegisterGenerated() {\n" + + "\tstatic const TransformRule builtin_transform_rules[] = {\n" + + entries + + "\t};\n" + + "\tfor (const auto &rule : builtin_transform_rules) {\n" + + "\t\tsql_transform_functions[rule.name] = rule.transform;\n" + + "\t}\n" + + "}\n" + ) + + +def write_cpp(all_implementations, all_registrations): cpp_path = transformer_dir / "transform_generated.cpp" content = ( GENERATED_HEADER + '#include "duckdb/parser/peg/transformer/peg_transformer.hpp"\n' + "\nnamespace duckdb {\n\n" + "\n".join(all_implementations) + + "\n" + + generate_table_and_register(all_registrations) + "\n} // namespace duckdb\n" ) cpp_path.write_text(content) @@ -829,7 +848,8 @@ def main(): all_declarations = [d for r in results for d in r.declarations] write_hpp(all_declarations) all_implementations = [impl for r in results for impl in r.implementations] - write_cpp(all_implementations) + all_registrations = [reg for r in results for reg in r.registrations] + write_cpp(all_implementations, all_registrations) print_manual_steps(results) else: for r in results: diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 1e706d4decf0..5a08b9c553ec 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -208,6 +208,14 @@ class PEGTransformer { ParserOptions options; }; +typedef unique_ptr (*transform_function_t)(PEGTransformer &transformer, + ParseResult &parse_result); + +struct TransformRule { + const char *name; + transform_function_t transform; +}; + class PEGTransformerFactory { public: explicit PEGTransformerFactory(); @@ -269,14 +277,11 @@ class PEGTransformerFactory { void RegisterCreateTrigger(); void RegisterDeallocate(); void RegisterDelete(); - void RegisterDetach(); void RegisterDescribe(); void RegisterDrop(); void RegisterExecute(); void RegisterExplain(); - void RegisterExport(); void RegisterExpression(); - void RegisterImport(); void RegisterInsert(); void RegisterLoad(); void RegisterMergeInto(); @@ -284,13 +289,13 @@ class PEGTransformerFactory { void RegisterPragma(); void RegisterPrepare(); void RegisterSelect(); - void RegisterUse(); void RegisterSet(); void RegisterTransaction(); void RegisterUpdate(); void RegisterVacuum(); void RegisterKeywordsAndIdentifiers(); void RegisterEnums(); + void RegisterGenerated(); private: template diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp index eec8d4af9528..288eb86dbfb3 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -1,37 +1,43 @@ // AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT #ifdef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP -static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformUseStatement(const QualifiedName &use_target); -static QualifiedName TransformUseTargetInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result); static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); -static QualifiedName TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, + ParseResult &parse_result); static QualifiedName TransformUseTargetCatalogSchema(const string &catalog_name, const string &reserved_schema_name, const vector &dot_identifier); -static string TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformDotIdentifierInternal(PEGTransformer &transformer, + ParseResult &parse_result); static string TransformDotIdentifier(const string &identifier); -static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); -static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result); +static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformBeginTransaction(const TransactionModifierType &read_or_write); -static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result); +static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformRollbackTransaction(); -static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result); +static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformCommitTransaction(); -static TransactionModifierType TransformReadOrWriteInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result); static TransactionModifierType TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write); -static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); +static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformDetachStatement(const bool &if_exists, const string &catalog_name); -static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); +static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformExportStatement(const string &export_source, const string &string_literal, vector generic_copy_option_list); -static string TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result); +static unique_ptr TransformExportSourceInternal(PEGTransformer &transformer, + ParseResult &parse_result); static string TransformExportSource(const string &catalog_name); -static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result); +static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); static unique_ptr TransformImportStatement(const string &string_literal); #endif // DUCKDB_INSIDE_PEG_TRANSFORMER_HPP diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index 1f71b91ba30f..35775dfd87fa 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -432,11 +432,6 @@ void PEGTransformerFactory::RegisterDescribe() { REGISTER_TRANSFORM(TransformSummarize); } -void PEGTransformerFactory::RegisterDetach() { - // detach.gram - Register("DetachStatement", &TransformDetachStatementInternal); -} - void PEGTransformerFactory::RegisterDrop() { // drop.gram REGISTER_TRANSFORM(TransformDropStatement); @@ -474,11 +469,6 @@ void PEGTransformerFactory::RegisterExplain() { Register("ExplainOptionName", &TransformIdentifierOrKeyword); } -void PEGTransformerFactory::RegisterExport() { - Register("ExportSource", &TransformExportSourceInternal); - Register("ExportStatement", &TransformExportStatementInternal); -} - void PEGTransformerFactory::RegisterExpression() { // expression.gram REGISTER_TRANSFORM(TransformExpressionStatement); @@ -654,10 +644,6 @@ void PEGTransformerFactory::RegisterExpression() { REGISTER_TRANSFORM(TransformIgnoreOrRespectNulls); } -void PEGTransformerFactory::RegisterImport() { - Register("ImportStatement", &TransformImportStatementInternal); -} - void PEGTransformerFactory::RegisterInsert() { // insert.gram REGISTER_TRANSFORM(TransformInsertStatement); @@ -882,14 +868,6 @@ void PEGTransformerFactory::RegisterSelect() { REGISTER_TRANSFORM(TransformRepeatableSample); } -void PEGTransformerFactory::RegisterUse() { - // use.gram - Register("UseStatement", &PEGTransformerFactory::TransformUseStatementInternal); - Register("UseTarget", &PEGTransformerFactory::TransformUseTargetInternal); - Register("UseTargetCatalogSchema", &PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal); - Register("DotIdentifier", &PEGTransformerFactory::TransformDotIdentifierInternal); -} - void PEGTransformerFactory::RegisterSet() { // set.gram REGISTER_TRANSFORM(TransformResetStatement); @@ -906,12 +884,6 @@ void PEGTransformerFactory::RegisterSet() { } void PEGTransformerFactory::RegisterTransaction() { - // transaction.gram - Register("TransactionStatement", &PEGTransformerFactory::TransformTransactionStatementInternal); - Register("ReadOrWrite", &PEGTransformerFactory::TransformReadOrWriteInternal); - Register("BeginTransaction", &PEGTransformerFactory::TransformBeginTransactionInternal); - Register("CommitTransaction", &PEGTransformerFactory::TransformCommitTransactionInternal); - Register("RollbackTransaction", &PEGTransformerFactory::TransformRollbackTransactionInternal); REGISTER_TRANSFORM(TransformReadOnlyOrReadWrite); } @@ -1101,6 +1073,7 @@ void PEGTransformerFactory::RegisterEnums() { } PEGTransformerFactory::PEGTransformerFactory() { + RegisterGenerated(); REGISTER_TRANSFORM(TransformStatement); RegisterAlter(); RegisterAttach(); @@ -1121,14 +1094,11 @@ PEGTransformerFactory::PEGTransformerFactory() { RegisterCreateTrigger(); RegisterDeallocate(); RegisterDelete(); - RegisterDetach(); RegisterDescribe(); RegisterDrop(); RegisterExecute(); RegisterExplain(); - RegisterExport(); RegisterExpression(); - RegisterImport(); RegisterInsert(); RegisterLoad(); RegisterMergeInto(); @@ -1136,7 +1106,6 @@ PEGTransformerFactory::PEGTransformerFactory() { RegisterPragma(); RegisterPrepare(); RegisterSelect(); - RegisterUse(); RegisterSet(); RegisterTransaction(); RegisterUpdate(); diff --git a/src/parser/peg/transformer/transform_generated.cpp b/src/parser/peg/transformer/transform_generated.cpp index 3ba5bfd91d31..c71104407d98 100644 --- a/src/parser/peg/transformer/transform_generated.cpp +++ b/src/parser/peg/transformer/transform_generated.cpp @@ -3,22 +3,24 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto use_target = transformer.Transform(list_pr, 1); - return TransformUseStatement(use_target); + auto result = TransformUseStatement(use_target); + return make_uniq>>(std::move(result)); } -QualifiedName PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); - return TransformUseTarget(transformer, choice_pr.GetResult()); + auto result = TransformUseTarget(transformer, choice_pr.GetResult()); + return make_uniq>(std::move(result)); } -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr +PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; auto reserved_schema_name = list_pr.Child(2).identifier; @@ -30,78 +32,112 @@ QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEG dot_identifier.push_back(transformer.Transform(dot_identifier_item)); } } - return TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); + auto result = TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); + return make_uniq>(std::move(result)); } -string PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto identifier = list_pr.Child(1).identifier; - return TransformDotIdentifier(identifier); + auto result = TransformDotIdentifier(identifier); + return make_uniq>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr +PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); - return transformer.Transform>(choice_pr.GetResult()); + auto result = transformer.Transform>(choice_pr.GetResult()); + return make_uniq>>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); TransactionModifierType read_or_write {}; transformer.TransformOptional(list_pr, 2, read_or_write); - return TransformBeginTransaction(read_or_write); + auto result = TransformBeginTransaction(read_or_write); + return make_uniq>>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - return TransformRollbackTransaction(); +unique_ptr +PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto result = TransformRollbackTransaction(); + return make_uniq>>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, - ParseResult &parse_result) { - return TransformCommitTransaction(); +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto result = TransformCommitTransaction(); + return make_uniq>>(std::move(result)); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto read_only_or_read_write = transformer.Transform(list_pr, 1); - return TransformReadOrWrite(read_only_or_read_write); + auto result = TransformReadOrWrite(read_only_or_read_write); + return make_uniq>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); bool if_exists {}; transformer.TransformOptional(list_pr, 2, if_exists); auto catalog_name = list_pr.Child(3).identifier; - return TransformDetachStatement(if_exists, catalog_name); + auto result = TransformDetachStatement(if_exists, catalog_name); + return make_uniq>>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); string export_source {}; transformer.TransformOptional(list_pr, 2, export_source); auto string_literal = transformer.Transform(list_pr, 3); vector generic_copy_option_list {}; transformer.TransformOptional(list_pr, 4, generic_copy_option_list); - return TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); + auto result = TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); + return make_uniq>>(std::move(result)); } -string PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; - return TransformExportSource(catalog_name); + auto result = TransformExportSource(catalog_name); + return make_uniq>(std::move(result)); } -unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto string_literal = transformer.Transform(list_pr, 2); - return TransformImportStatement(string_literal); + auto result = TransformImportStatement(string_literal); + return make_uniq>>(std::move(result)); +} + +void PEGTransformerFactory::RegisterGenerated() { + static const TransformRule builtin_transform_rules[] = { + {"UseStatement", &PEGTransformerFactory::TransformUseStatementInternal}, + {"UseTarget", &PEGTransformerFactory::TransformUseTargetInternal}, + {"UseTargetCatalogSchema", &PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal}, + {"DotIdentifier", &PEGTransformerFactory::TransformDotIdentifierInternal}, + {"TransactionStatement", &PEGTransformerFactory::TransformTransactionStatementInternal}, + {"BeginTransaction", &PEGTransformerFactory::TransformBeginTransactionInternal}, + {"RollbackTransaction", &PEGTransformerFactory::TransformRollbackTransactionInternal}, + {"CommitTransaction", &PEGTransformerFactory::TransformCommitTransactionInternal}, + {"ReadOrWrite", &PEGTransformerFactory::TransformReadOrWriteInternal}, + {"DetachStatement", &PEGTransformerFactory::TransformDetachStatementInternal}, + {"ExportStatement", &PEGTransformerFactory::TransformExportStatementInternal}, + {"ExportSource", &PEGTransformerFactory::TransformExportSourceInternal}, + {"ImportStatement", &PEGTransformerFactory::TransformImportStatementInternal}, + }; + for (const auto &rule : builtin_transform_rules) { + sql_transform_functions[rule.name] = rule.transform; + } } } // namespace duckdb From 4941c7a510718569c0167e8eb68b068353a4aeb7 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 14:48:07 +0200 Subject: [PATCH 48/56] Remove move if not needed --- scripts/parser/gen_transformer_v2.py | 36 +++++++++++++------ .../peg/transformer/transform_generated.cpp | 10 +++--- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py index 1a4e1d5bc16b..468d820cc290 100644 --- a/scripts/parser/gen_transformer_v2.py +++ b/scripts/parser/gen_transformer_v2.py @@ -272,7 +272,18 @@ def classify_choice_alternatives(alternatives, rule_types): return transformer_alts, identifier_alts, unknown_alts -def generate_choice_internal_full(rule_name, return_type): +def _box_result(return_type, return_by_value): + """ + Generate the boxing return statement for an Internal function. + Use std::move only for move-only types (unique_ptr, vector>). + Trivially-copyable types (enums, primitives) and copyable structs use a plain copy to + avoid the performance-move-const-arg clang-tidy warning. + """ + arg = "std::move(result)" if return_by_value else "result" + return f"\treturn make_uniq>({arg});\n" + + +def generate_choice_internal_full(rule_name, return_type, return_by_value): """ Fully auto-generated Internal for a pure-transformer choice rule. Static class member matching transform_function_t for the static TransformRule table. @@ -283,12 +294,12 @@ def generate_choice_internal_full(rule_name, return_type): f"\tauto &list_pr = parse_result.Cast();\n" f"\tauto &choice_pr = list_pr.Child(0);\n" f"\tauto result = transformer.Transform<{return_type}>(choice_pr.GetResult());\n" - f"\treturn make_uniq>(std::move(result));\n" - f"}}\n" + + _box_result(return_type, return_by_value) + + f"}}\n" ) -def generate_choice_internal_with_body(rule_name, return_type): +def generate_choice_internal_with_body(rule_name, return_type, return_by_value): """ Internal for a choice rule that has identifier-override alternatives. Static class member matching transform_function_t for the static TransformRule table. @@ -299,8 +310,8 @@ def generate_choice_internal_with_body(rule_name, return_type): f"\tauto &list_pr = parse_result.Cast();\n" f"\tauto &choice_pr = list_pr.Child(0);\n" f"\tauto result = Transform{rule_name}(transformer, choice_pr.GetResult());\n" - f"\treturn make_uniq>(std::move(result));\n" - f"}}\n" + + _box_result(return_type, return_by_value) + + f"}}\n" ) @@ -633,7 +644,7 @@ def _param_decl(e): return f"\tstatic {return_type} Transform{rule_name}({params});\n" -def generate_sequence_internal(rule_name, return_type, elements): +def generate_sequence_internal(rule_name, return_type, return_by_value, elements): """ Generate the Internal static class member for a sequence rule. Returns unique_ptr matching transform_function_t for the static table. @@ -659,7 +670,8 @@ def _param_arg(e): arg_names = ", ".join(_param_arg(e) for e in semantic) body.append(f"\tauto result = Transform{rule_name}({arg_names});") - body.append(f"\treturn make_uniq>(std::move(result));") + box = _box_result(return_type, return_by_value).rstrip('\n') + body.append(box) return ( f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + "\n".join(body) + "\n}\n" @@ -696,6 +708,8 @@ def collect_generated(gram_stem, rules, rule_types, excluded_rules): skipped.append((rule_name, f"AST parse error: {e}")) continue + return_by_value = _is_by_value(rule_name, rule_types) + if is_pure_reference_choice(ast): _, identifier_alts, unknown_alts = classify_choice_alternatives(ast.alternatives, rule_types) if unknown_alts: @@ -706,10 +720,10 @@ def collect_generated(gram_stem, rules, rule_types, excluded_rules): registrations.append(generate_registration(rule_name)) if not identifier_alts: - implementations.append(generate_choice_internal_full(rule_name, return_type)) + implementations.append(generate_choice_internal_full(rule_name, return_type, return_by_value)) else: declarations.append(generate_choice_body_declaration(rule_name, return_type)) - implementations.append(generate_choice_internal_with_body(rule_name, return_type)) + implementations.append(generate_choice_internal_with_body(rule_name, return_type, return_by_value)) manual_bodies.append( ( rule_name, @@ -723,7 +737,7 @@ def collect_generated(gram_stem, rules, rule_types, excluded_rules): if elements is not None: declarations.append(generate_internal_declaration(rule_name)) declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) - implementations.append(generate_sequence_internal(rule_name, return_type, elements)) + implementations.append(generate_sequence_internal(rule_name, return_type, return_by_value, elements)) registrations.append(generate_registration(rule_name)) continue diff --git a/src/parser/peg/transformer/transform_generated.cpp b/src/parser/peg/transformer/transform_generated.cpp index c71104407d98..d5d1fa1151b6 100644 --- a/src/parser/peg/transformer/transform_generated.cpp +++ b/src/parser/peg/transformer/transform_generated.cpp @@ -16,7 +16,7 @@ unique_ptr PEGTransformerFactory::TransformUseTargetIntern auto &list_pr = parse_result.Cast(); auto &choice_pr = list_pr.Child(0); auto result = TransformUseTarget(transformer, choice_pr.GetResult()); - return make_uniq>(std::move(result)); + return make_uniq>(result); } unique_ptr @@ -33,7 +33,7 @@ PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &t } } auto result = TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); - return make_uniq>(std::move(result)); + return make_uniq>(result); } unique_ptr PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, @@ -41,7 +41,7 @@ unique_ptr PEGTransformerFactory::TransformDotIdentifierIn auto &list_pr = parse_result.Cast(); auto identifier = list_pr.Child(1).identifier; auto result = TransformDotIdentifier(identifier); - return make_uniq>(std::move(result)); + return make_uniq>(result); } unique_ptr @@ -78,7 +78,7 @@ unique_ptr PEGTransformerFactory::TransformReadOrWriteInte auto &list_pr = parse_result.Cast(); auto read_only_or_read_write = transformer.Transform(list_pr, 1); auto result = TransformReadOrWrite(read_only_or_read_write); - return make_uniq>(std::move(result)); + return make_uniq>(result); } unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, @@ -108,7 +108,7 @@ unique_ptr PEGTransformerFactory::TransformExportSourceInt auto &list_pr = parse_result.Cast(); auto catalog_name = list_pr.Child(0).identifier; auto result = TransformExportSource(catalog_name); - return make_uniq>(std::move(result)); + return make_uniq>(result); } unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, From 637037d42018e3c2cd0f9fbd91847145960ad38a Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 14:57:14 +0200 Subject: [PATCH 49/56] Remove old declaration --- src/include/duckdb/parser/peg/transformer/peg_transformer.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 5a08b9c553ec..3d5f4fce1b94 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -619,9 +619,6 @@ class PEGTransformerFactory { ParseResult &parse_result); static unique_ptr TransformTruncateStatement(PEGTransformer &transformer, ParseResult &parse_result); - // detach.gram - static unique_ptr TransformDetachStatement(PEGTransformer &transformer, ParseResult &parse_result); - // drop.gram static unique_ptr TransformDropStatement(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropEntries(PEGTransformer &transformer, ParseResult &parse_result); From af359786bac54d2a7f80a002622253b88baac4e7 Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Mon, 11 May 2026 15:25:07 +0200 Subject: [PATCH 50/56] Add libduckdb_static.a to build artifact in CI --- scripts/prepare_build_artifact.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/prepare_build_artifact.sh b/scripts/prepare_build_artifact.sh index b8c9480990ef..f87529ba8cc2 100644 --- a/scripts/prepare_build_artifact.sh +++ b/scripts/prepare_build_artifact.sh @@ -37,6 +37,13 @@ else echo "No $BUILD_DIR/src/libduckdb.so* files found" fi +# Required by jobs that link against the prebuilt static library. +if [[ -f "$BUILD_DIR/src/libduckdb_static.a" ]]; then + cp -av "$BUILD_DIR/src/libduckdb_static.a" "$ARTIFACT_DIR"/src/ +else + echo "No $BUILD_DIR/src/libduckdb_static.a file found" +fi + # Required by regression jobs that run the prebuilt benchmark runner. if [[ -f "$BUILD_DIR/benchmark/benchmark_runner" ]]; then mkdir -p "$ARTIFACT_DIR"/benchmark "$ARTIFACT_DIR"/scripts From a9936bb33eda445dfc8f3cde3affe6c9d7a5c8f7 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 15:48:08 +0200 Subject: [PATCH 51/56] Guard against empty subscript expression --- src/parser/peg/transformer/transform_expression.cpp | 3 +++ test/sql/peg_parser/fuzzer/fuzzer_4434.test | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 test/sql/peg_parser/fuzzer/fuzzer_4434.test diff --git a/src/parser/peg/transformer/transform_expression.cpp b/src/parser/peg/transformer/transform_expression.cpp index 9d78f08ede53..1f43481fcec8 100644 --- a/src/parser/peg/transformer/transform_expression.cpp +++ b/src/parser/peg/transformer/transform_expression.cpp @@ -1617,6 +1617,9 @@ unique_ptr PEGTransformerFactory::TransformSliceExpression(PEG ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto slice_bound = transformer.Transform>>(list_pr.Child(1)); + if (slice_bound.size() == 0) { + throw ParserException("Empty subscript '[]' is not allowed"); + } if (slice_bound.size() == 1) { return make_uniq(ExpressionType::ARRAY_EXTRACT, std::move(slice_bound)); } diff --git a/test/sql/peg_parser/fuzzer/fuzzer_4434.test b/test/sql/peg_parser/fuzzer/fuzzer_4434.test new file mode 100644 index 000000000000..699aa366a188 --- /dev/null +++ b/test/sql/peg_parser/fuzzer/fuzzer_4434.test @@ -0,0 +1,9 @@ +# name: test/sql/peg_parser/fuzzer/fuzzer_4434.test +# description: Fix empty subscript expression leading to internal error +# group: [fuzzer] + +statement error +n''''+ST�&6>ttacrotablesampimportleT�&6>tta�rordeT�Niqeger[], e�tfilter% + t_not_currorderent; +---- +Parser Error: Empty subscript '[]' is not allowed From 7bfa2fcd41745211e093042da31327017fa28e53 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 15:50:26 +0200 Subject: [PATCH 52/56] Add test on empty subscript --- test/sql/peg_parser/fuzzer/fuzzer_4434.test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/sql/peg_parser/fuzzer/fuzzer_4434.test b/test/sql/peg_parser/fuzzer/fuzzer_4434.test index 699aa366a188..4366592980d6 100644 --- a/test/sql/peg_parser/fuzzer/fuzzer_4434.test +++ b/test/sql/peg_parser/fuzzer/fuzzer_4434.test @@ -7,3 +7,8 @@ n''''+ST�&6>ttacrotablesampimportleT�&6>tta�rordeT�Niqeger[], e�tfilte t_not_currorderent; ---- Parser Error: Empty subscript '[]' is not allowed + +statement error +[1,2,3][]; +---- +Parser Error: Empty subscript '[]' is not allowed \ No newline at end of file From d702be22ec21fc157b9f78d18b250c27117ac6bf Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 16:11:45 +0200 Subject: [PATCH 53/56] Change to empty check --- src/parser/peg/transformer/transform_expression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/peg/transformer/transform_expression.cpp b/src/parser/peg/transformer/transform_expression.cpp index 1f43481fcec8..352a02dd473b 100644 --- a/src/parser/peg/transformer/transform_expression.cpp +++ b/src/parser/peg/transformer/transform_expression.cpp @@ -1617,7 +1617,7 @@ unique_ptr PEGTransformerFactory::TransformSliceExpression(PEG ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto slice_bound = transformer.Transform>>(list_pr.Child(1)); - if (slice_bound.size() == 0) { + if (slice_bound.empty()) { throw ParserException("Empty subscript '[]' is not allowed"); } if (slice_bound.size() == 1) { From 5230c02573c9c79aeaeb366358ea8c168678256f Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 11 May 2026 16:37:31 +0200 Subject: [PATCH 54/56] Bump CI From cfdc4fc0c92e1e39d8a2914eec62bd6d60aca0be Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Mon, 11 May 2026 17:13:57 +0200 Subject: [PATCH 55/56] Add libatomic for missing __atomic_is_lock_free --- test/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e84ad2a402a5..51ba8cb8e91c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -65,4 +65,8 @@ else() link_extension_libraries(unittest "") endif() +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_link_libraries(unittest atomic) +endif() + duckdb_codesign_for_debugging(unittest) From 1c06de8e0a0170b00e762a9a31d23034b842b237 Mon Sep 17 00:00:00 2001 From: Leonid Krugliak Date: Tue, 12 May 2026 12:27:01 +0300 Subject: [PATCH 56/56] Add warning log to EvictionQueue::Purge when iterations > 10 or elapsed > 1s Logs via DUCKDB_LOG_WARNING with queue_size and dead_nodes count. Also prints to stderr in DEBUG builds for local debugging. Requires: SET GLOBAL enable_logging = true; SET GLOBAL logging_level = 'warning'; Co-Authored-By: Claude Opus 4.6 (1M context) --- src/storage/buffer/buffer_pool.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/storage/buffer/buffer_pool.cpp b/src/storage/buffer/buffer_pool.cpp index 104d3fba62eb..84877a0f19e9 100644 --- a/src/storage/buffer/buffer_pool.cpp +++ b/src/storage/buffer/buffer_pool.cpp @@ -3,6 +3,8 @@ #include "duckdb/common/exception.hpp" #include "duckdb/common/thread.hpp" #include "duckdb/common/typedefs.hpp" +#include "duckdb/logging/logger.hpp" +#include #include "duckdb/main/settings.hpp" #include "duckdb/parallel/concurrentqueue.hpp" #include "duckdb/parallel/task_scheduler.hpp" @@ -82,7 +84,7 @@ struct EvictionQueue { //! Tries to dequeue an element from the eviction queue, but only after acquiring the purge queue lock. bool TryDequeueWithLock(BufferEvictionNode &node); //! Garbage collect dead nodes in the eviction queue. - void Purge(); + void Purge(const DatabaseInstance &db); template void IterateUnloadableBlocks(FN fn); @@ -155,13 +157,15 @@ bool EvictionQueue::TryDequeueWithLock(BufferEvictionNode &node) { return q.try_dequeue(node); } -void EvictionQueue::Purge() { +void EvictionQueue::Purge(const DatabaseInstance &db) { // only one thread purges the queue, all other threads early-out unique_lock guard(purge_lock, std::try_to_lock); if (!guard.owns_lock()) { return; } + auto purge_start = std::chrono::steady_clock::now(); + // we purge INSERT_INTERVAL * PURGE_SIZE_MULTIPLIER nodes idx_t purge_size = INSERT_INTERVAL * PURGE_SIZE_MULTIPLIER; @@ -191,6 +195,7 @@ void EvictionQueue::Purge() { // guaranteeing that we always exit the loop. idx_t max_purges = approx_q_size / purge_size; + idx_t initial_max_purges = max_purges; while (max_purges != 0) { PurgeIteration(purge_size); @@ -213,6 +218,15 @@ void EvictionQueue::Purge() { max_purges--; } + + idx_t iterations = initial_max_purges - max_purges; + auto elapsed_ms = std::chrono::duration_cast( + std::chrono::steady_clock::now() - purge_start) + .count(); + if (iterations > 10 || elapsed_ms > 1000) { + DUCKDB_LOG_WARNING(db, "EvictionQueue::Purge took %lldms with %llu iterations, queue_size=%llu, dead_nodes=%llu", + elapsed_ms, iterations, q.size_approx(), (idx_t)total_dead_nodes); + } } void EvictionQueue::PurgeIteration(const idx_t purge_size) { @@ -500,7 +514,7 @@ void BufferPool::PurgeQueue(const BlockHandle &block) { const auto queue_sleep_micros = Settings::Get(buffer_manager.GetDatabase()); eviction_queue.debug_eviction_queue_sleep = queue_sleep_micros; - eviction_queue.Purge(); + eviction_queue.Purge(buffer_manager.GetDatabase()); } void BufferPool::SetLimit(idx_t limit, const char *exception_postscript) {