diff --git a/.github/config/extensions/test-utils.cmake b/.github/config/extensions/test-utils.cmake index 6cf7d930d3a2..94e63d661e0b 100644 --- a/.github/config/extensions/test-utils.cmake +++ b/.github/config/extensions/test-utils.cmake @@ -1,6 +1,8 @@ duckdb_extension_load(test_utils GIT_URL https://github.com/duckdb/bwc-test-utils - GIT_TAG 7074208283523a3af8b5ddd1c890a03abdba3b9b + # Use the commit before "Update extensions" (that contains the binaries of + # the commit before that). + GIT_TAG 5b9c7334949c47cdf69ce11c151139c4c88aa7f8 # For local dev: # SOURCE_DIR "${EXTENSION_CONFIG_BASE_DIR}/../../../../test-utils" ) diff --git a/.github/workflows/Main.yml b/.github/workflows/Main.yml index 49460f57e9bc..61ba7dcf87ce 100644 --- a/.github/workflows/Main.yml +++ b/.github/workflows/Main.yml @@ -1442,6 +1442,18 @@ jobs: run: | python3 scripts/ci/run_tests.py --test-config=test/configs/prefetch_all_parquet_files.json ./build/release/test/unittest + - name: test/configs/verification_projection.json + if: success() || failure() + shell: bash + run: | + python3 scripts/ci/run_tests.py --test-config=test/configs/verification_projection.json ./build/release/test/unittest + + - name: test/configs/verify_column_bindings.json + if: success() || failure() + shell: bash + run: | + python3 scripts/ci/run_tests.py --test-config=test/configs/verify_column_bindings.json ./build/release/test/unittest + - name: test/configs/no_local_filesystem.json if: success() || failure() shell: bash diff --git a/.github/workflows/NightlyTests.yml b/.github/workflows/NightlyTests.yml index 202de16eb055..a148ee25afef 100644 --- a/.github/workflows/NightlyTests.yml +++ b/.github/workflows/NightlyTests.yml @@ -626,14 +626,23 @@ jobs: run: make unittest_relassert T="--track-runtime 30 --batch-timeout 120 --test-config=test/configs/hash_zero.json" - bwc_build: + bwc-build: name: Build DuckDB runs-on: ubuntu-latest + outputs: + groups: ${{ steps.resolve.outputs.groups }} steps: - uses: actions/checkout@v6 with: fetch-depth: 0 + - name: Resolve versions + id: resolve + run: | + groups_json=$(python3 test/bwc/list_versions.py --groups-json) + echo "groups=${groups_json}" >> "$GITHUB_OUTPUT" + echo "Resolved BWC version groups: ${groups_json}" + - name: Install run: python3 scripts/ci/retry.py -- make toolsci @@ -653,28 +662,13 @@ jobs: path: build/release/duckdb bwc-test: - name: BWC Test (DuckDB ${{ matrix.old_duckdb_version }}) + name: BWC Test (DuckDB ${{ matrix.series.group }}) runs-on: ubuntu-latest - needs: bwc_build + needs: bwc-build strategy: fail-fast: false matrix: - old_duckdb_version: - - v1.1.0 - - v1.1.1 - - v1.1.2 - - v1.1.3 - - v1.2.0 - - v1.2.1 - - v1.2.2 - - v1.3.0 - - v1.3.1 - - v1.3.2 - - v1.4.0 - - v1.4.1 - - v1.4.2 - - v1.4.3 - - v1.4.4 + series: ${{ fromJSON(needs.bwc-build.outputs.groups) }} steps: - uses: actions/checkout@v6 with: @@ -696,46 +690,64 @@ jobs: - name: Install Python dependencies run: python3 scripts/ci/retry.py -- pip install duckdb git+https://github.com/duckdb/duckdb-sqllogictest-python.git - - name: Download BWC cache - if: ${{ vars.ENABLE_BWC_CACHE_DOWNLOAD == 'true' }} + - name: Run BWC series run: | - VERSION=${{ matrix.old_duckdb_version }} - CACHE_ARCHIVE="runtime_${VERSION}.tar.gz" + versions='${{ toJson(matrix.series.versions) }}' + mapfile -t VERSION_LIST < <(python3 -c 'import json,sys; [print(v) for v in json.loads(sys.argv[1])]' "$versions") + FAILED=0 + FAILED_VERSIONS="" RUNTIME_DIR="duckdb_unittest_tempdir/bwc/runtime" mkdir -p "${RUNTIME_DIR}" + mkdir -p duckdb_unittest_tempdir/bwc/cache + + for VERSION in "${VERSION_LIST[@]}"; do + echo "============================================================" + echo "Running BWC for ${VERSION} (group: ${{ matrix.series.group }})" + echo "============================================================" + + if [ "${{ vars.ENABLE_BWC_CACHE_DOWNLOAD }}" = "true" ]; then + CACHE_ARCHIVE="runtime_${VERSION}.tar.gz" + DOWNLOAD_URL="https://raw.githubusercontent.com/duckdb/bwc-test-utils/main/cache/${CACHE_ARCHIVE}" + HTTP_CODE=$(curl -sL -w '%{http_code}' -o "/tmp/${CACHE_ARCHIVE}" "${DOWNLOAD_URL}" || true) + if [ "${HTTP_CODE}" = "200" ]; then + tar xzf "/tmp/${CACHE_ARCHIVE}" -C "${RUNTIME_DIR}" + CACHED=$(find "${RUNTIME_DIR}/${VERSION}" -name "*.plan.bin" | wc -l) + echo "Extracted cache for ${VERSION}: ${CACHED} cached test plans" + else + echo "Failed to download cache for ${VERSION} (HTTP ${HTTP_CODE})" + FAILED=1 + FAILED_VERSIONS="${FAILED_VERSIONS} ${VERSION}(cache)" + fi + fi - # Download the cache archive from test-utils repo - DOWNLOAD_URL="https://raw.githubusercontent.com/duckdb/bwc-test-utils/main/cache/${CACHE_ARCHIVE}" - HTTP_CODE=$(curl -sL -w '%{http_code}' -o "/tmp/${CACHE_ARCHIVE}" "${DOWNLOAD_URL}" || true) + if ! python3.11 test/bwc/runner.py --old_duckdb_version="${VERSION}"; then + FAILED=1 + FAILED_VERSIONS="${FAILED_VERSIONS} ${VERSION}(tests)" + fi - if [ "${HTTP_CODE}" != "200" ]; then - echo "Failed to download cache for ${VERSION} (HTTP ${HTTP_CODE})" + if ! python3.11 test/bwc/export_cache.py --version "${VERSION}" --output-dir duckdb_unittest_tempdir/bwc/cache; then + FAILED=1 + FAILED_VERSIONS="${FAILED_VERSIONS} ${VERSION}(export)" + fi + done + + if [ "$FAILED" -ne 0 ]; then + echo "BWC series had failures:${FAILED_VERSIONS}" exit 1 fi - tar xzf "/tmp/${CACHE_ARCHIVE}" -C "${RUNTIME_DIR}" - CACHED=$(find "${RUNTIME_DIR}/${VERSION}" -name "*.plan.bin" | wc -l) - echo "Extracted cache for ${VERSION}: ${CACHED} cached test plans" - - - name: Run BWC tests - run: python3.11 test/bwc/runner.py --old_duckdb_version=${{ matrix.old_duckdb_version }} - - - name: Export BWC cache - if: always() - run: python3.11 test/bwc/export_cache.py --version ${{ matrix.old_duckdb_version }} --output-dir duckdb_unittest_tempdir/bwc/cache - - name: Upload BWC cache if: always() uses: actions/upload-artifact@v7 with: - name: bwc-cache-${{ matrix.old_duckdb_version }} - path: duckdb_unittest_tempdir/bwc/cache/runtime_${{ matrix.old_duckdb_version }}.tar.gz + name: bwc-cache-${{ matrix.series.group }} + path: duckdb_unittest_tempdir/bwc/cache/runtime_*.tar.gz - name: Upload test artifacts if: always() uses: actions/upload-artifact@v7 with: - name: bwc-test-results-${{ matrix.old_duckdb_version }} + name: bwc-test-results-${{ matrix.series.group }} path: | duckdb_unittest_tempdir/bwc/tests_summary_*.txt duckdb_unittest_tempdir/bwc/reports/test_report_*.duckdb diff --git a/Makefile b/Makefile index a3c0aaaa6954..5a82f9ca64f4 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) PROJ_DIR := $(dir $(MKFILE_PATH)) PYTHON ?= python3 -FORMAT_VENV ?= build/format-venv +FORMAT_VENV ?= .cache/format-venv FORMAT_PYTHON := $(FORMAT_VENV)/bin/python FORMAT_SETUP_DEPS := format_venv diff --git a/extension/json/json_functions/json_serialize_plan.cpp b/extension/json/json_functions/json_serialize_plan.cpp index 962ba9c0d461..a2d2b141831b 100644 --- a/extension/json/json_functions/json_serialize_plan.cpp +++ b/extension/json/json_functions/json_serialize_plan.cpp @@ -147,7 +147,7 @@ static void JsonSerializePlanFunction(DataChunk &args, ExpressionState &state, V } ColumnBindingResolver resolver; - resolver.Verify(*plan); + resolver.Verify(context, *plan); resolver.VisitOperator(*plan); plan->ResolveOperatorTypes(); diff --git a/scripts/parser/build_grammar.sh b/scripts/parser/build_grammar.sh index 39dd4c992bec..1bd114404e51 100755 --- a/scripts/parser/build_grammar.sh +++ b/scripts/parser/build_grammar.sh @@ -12,4 +12,17 @@ fi python "$GRAMMAR_FILE" --grammar-file python "$GRAMMAR_FILE" -echo "Successfully build grammar files" \ No newline at end of file +echo "Successfully built grammar files" + +# Generate Internal transformer wrappers for auto-generatable grammar rules +GEN_TRANSFORMER_FILE="scripts/parser/gen_transformer_v2.py" +if [[ ! -f "$GEN_TRANSFORMER_FILE" ]]; then + echo "Error: $GEN_TRANSFORMER_FILE not found" + exit 1 +fi + +python "$GEN_TRANSFORMER_FILE" --write + +echo "Successfully generated transformer wrappers" + +make format-fix \ No newline at end of file diff --git a/scripts/parser/gen_transformer_v2.py b/scripts/parser/gen_transformer_v2.py new file mode 100644 index 000000000000..468d820cc290 --- /dev/null +++ b/scripts/parser/gen_transformer_v2.py @@ -0,0 +1,877 @@ +import argparse +import re +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import List + +sys.path.insert(0, str(Path(__file__).parent)) +from inline_grammar import parse_peg_grammar, PEGTokenType +from generate_transformer import GrammarTypeInfo, load_grammar_types + + +# --------------------------------------------------------------------------- +# Grammar AST - mirrors the Matcher class hierarchy in matcher.cpp +# --------------------------------------------------------------------------- + + +class GrammarNode: + pass + + +@dataclass +class LiteralNode(GrammarNode): + """Keyword literal ('keyword'). Corresponds to KeywordMatcher.""" + + text: str + + +@dataclass +class ReferenceNode(GrammarNode): + """Reference to a named grammar rule. Resolved to another Matcher at build time.""" + + name: str + + +@dataclass +class ParensNode(GrammarNode): + """Parens(D) <- '(' D ')'. Anonymous ListMatcher; child[1] is D's result. + Use ExtractResultFromParens() to reach inside.""" + + inner: GrammarNode + + +@dataclass +class ListMacroNode(GrammarNode): + """List(D) <- D (',' D)* ','?. Anonymous ListMatcher. + Use ExtractParseResultsFromList() to get all D results.""" + + inner: GrammarNode + + +@dataclass +class FunctionCallNode(GrammarNode): + """Unknown macro call (not Parens or List). Not auto-generated.""" + + func_name: str + inner: GrammarNode + + +@dataclass +class SequenceNode(GrammarNode): + """Ordered sequence of matchers. Corresponds to ListMatcher.""" + + children: List[GrammarNode] + + +@dataclass +class ChoiceNode(GrammarNode): + """Ordered choice A / B / C. Corresponds to ChoiceMatcher.""" + + alternatives: List[GrammarNode] + + +@dataclass +class OptionalNode(GrammarNode): + """Optional match A?. Corresponds to OptionalMatcher.""" + + child: GrammarNode + + +@dataclass +class RepeatNode(GrammarNode): + """Repeat match A+ (one or more). Corresponds to RepeatMatcher. + A* (zero or more) is represented as OptionalNode(RepeatNode), matching + the runtime OptionalMatcher(RepeatMatcher) structure.""" + + child: GrammarNode + + +def tokens_to_ast(tokens): + """ + Parse a flat PEGToken list into a GrammarNode AST tree. + + Grammar of PEG rule bodies (simplified): + choice = sequence ('/' sequence)* + sequence = term+ + term = atom ('?' | '*' | '+')? + atom = LITERAL | REFERENCE + | FUNCTION_CALL choice ')' + | '(' choice ')' + """ + pos = [0] + + def peek(): + return tokens[pos[0]] if pos[0] < len(tokens) else None + + def consume(): + tok = tokens[pos[0]] + pos[0] += 1 + return tok + + def parse_choice(): + alts = [parse_sequence()] + while peek() and peek().type == PEGTokenType.OPERATOR and peek().text == '/': + consume() # consume '/' + alts.append(parse_sequence()) + return ChoiceNode(alts) if len(alts) > 1 else alts[0] + + def parse_sequence(): + children = [] + while True: + t = peek() + if t is None: + break + if t.type == PEGTokenType.OPERATOR and t.text in ('/', ')'): + break + children.append(parse_term()) + if not children: + return SequenceNode([]) + return SequenceNode(children) if len(children) > 1 else children[0] + + def parse_term(): + node = parse_atom() + t = peek() + if t and t.type == PEGTokenType.OPERATOR and t.text in ('?', '*', '+'): + op = consume().text + if op == '?': + return OptionalNode(node) + elif op == '*': + return OptionalNode(RepeatNode(node)) + elif op == '+': + return RepeatNode(node) + else: + raise Exception("Unknown operator '{}'".format(op)) + return node + + def parse_atom(): + t = peek() + if t is None: + raise Exception("Unexpected end of tokens in grammar AST parse") + if t.type == PEGTokenType.LITERAL: + return LiteralNode(consume().text) + elif t.type == PEGTokenType.REFERENCE: + return ReferenceNode(consume().text) + elif t.type == PEGTokenType.FUNCTION_CALL: + # inline_grammar already consumed the '(' and bumped bracket_count + func_name = consume().text + inner = parse_choice() + if peek() and peek().type == PEGTokenType.OPERATOR and peek().text == ')': + consume() + if func_name == 'Parens': + return ParensNode(inner) + elif func_name == 'List': + return ListMacroNode(inner) + return FunctionCallNode(func_name, inner) + elif t.type == PEGTokenType.OPERATOR and t.text == '(': + consume() + inner = parse_choice() + if peek() and peek().type == PEGTokenType.OPERATOR and peek().text == ')': + consume() + return inner # anonymous group - transparent node + else: + raise Exception(f"Unexpected token in grammar AST parse: {t}") + + result = parse_choice() + if pos[0] < len(tokens): + raise Exception(f"Tokens remaining after grammar AST parse: {tokens[pos[0]:]}") + return result + + +def rule_to_ast(rule): + """Convert a PEGGrammarRule (flat token list) to a GrammarNode AST.""" + return tokens_to_ast(rule.tokens) + + +# --------------------------------------------------------------------------- +# Rule overrides - mirrors AddRuleOverride() calls in matcher.cpp. +# These rules are replaced with special matchers that produce IdentifierParseResult +# (or similar) directly, bypassing the generic ListParseResult path. +# When one of these appears as an alternative in a choice rule the generated +# Internal must check ParseResultType rather than calling transformer.Transform<>. +# --------------------------------------------------------------------------- + +IDENTIFIER_OVERRIDE_RULES = { + 'Identifier', + 'ReservedIdentifier', + 'CatalogName', + 'SchemaName', + 'ReservedSchemaName', + 'TableName', + 'ReservedTableName', + 'ColumnName', + 'ReservedColumnName', + 'IndexName', + 'SequenceName', + 'FunctionName', + 'ReservedFunctionName', + 'TableFunctionName', + 'TypeName', + 'PragmaName', + 'SettingName', + 'CopyOptionName', +} + + +scripts_dir = Path(__file__).parent.parent +src_dir = scripts_dir.parent / 'src' +peg_dir = src_dir / 'parser' / 'peg' +statements_dir = peg_dir / 'grammar' / 'statements' +type_dir = scripts_dir / 'parser' +transformer_dir = peg_dir / 'transformer' +include_peg_dir = src_dir / 'include' / 'duckdb' / 'parser' / 'peg' / 'transformer' + +GENERATED_HEADER = "// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT\n" + + +def to_snake_case(name): + s1 = re.sub(r'(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + +def generate_internal_declaration(rule_name): + return ( + f"\tstatic unique_ptr Transform{rule_name}Internal" + f"(PEGTransformer &transformer, ParseResult &parse_result);\n" + ) + + +def generate_registration(rule_name): + return f'\t{{"{rule_name}", &PEGTransformerFactory::Transform{rule_name}Internal}},\n' + + +# --------------------------------------------------------------------------- +# Choice-rule helpers +# --------------------------------------------------------------------------- + + +def is_pure_reference_choice(ast): + """True if ast is a ChoiceNode whose every alternative is a ReferenceNode.""" + return isinstance(ast, ChoiceNode) and all(isinstance(a, ReferenceNode) for a in ast.alternatives) + + +def classify_choice_alternatives(alternatives, rule_types): + """ + Split choice alternatives into three groups: + - transformer_alts: names with a registered transformer (in rule_types) + - identifier_alts: names that are identifier overrides (produce IdentifierParseResult) + - unknown_alts: neither registered nor known overrides -- need manual handling + Returns (transformer_alts, identifier_alts, unknown_alts). + """ + transformer_alts = [] + identifier_alts = [] + unknown_alts = [] + for ref in alternatives: + name = ref.name + if name in rule_types: + transformer_alts.append(name) + elif name in IDENTIFIER_OVERRIDE_RULES: + identifier_alts.append(name) + else: + unknown_alts.append(name) + return transformer_alts, identifier_alts, unknown_alts + + +def _box_result(return_type, return_by_value): + """ + Generate the boxing return statement for an Internal function. + Use std::move only for move-only types (unique_ptr, vector>). + Trivially-copyable types (enums, primitives) and copyable structs use a plain copy to + avoid the performance-move-const-arg clang-tidy warning. + """ + arg = "std::move(result)" if return_by_value else "result" + return f"\treturn make_uniq>({arg});\n" + + +def generate_choice_internal_full(rule_name, return_type, return_by_value): + """ + Fully auto-generated Internal for a pure-transformer choice rule. + Static class member matching transform_function_t for the static TransformRule table. + """ + return ( + f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + f"\tauto &list_pr = parse_result.Cast();\n" + f"\tauto &choice_pr = list_pr.Child(0);\n" + f"\tauto result = transformer.Transform<{return_type}>(choice_pr.GetResult());\n" + + _box_result(return_type, return_by_value) + + f"}}\n" + ) + + +def generate_choice_internal_with_body(rule_name, return_type, return_by_value): + """ + Internal for a choice rule that has identifier-override alternatives. + Static class member matching transform_function_t for the static TransformRule table. + """ + return ( + f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + f"\tauto &list_pr = parse_result.Cast();\n" + f"\tauto &choice_pr = list_pr.Child(0);\n" + f"\tauto result = Transform{rule_name}(transformer, choice_pr.GetResult());\n" + + _box_result(return_type, return_by_value) + + f"}}\n" + ) + + +def generate_choice_body_declaration(rule_name, return_type): + """Declaration for the manual body that handles identifier alternatives.""" + return ( + f"\tstatic {return_type} Transform{rule_name}" f"(PEGTransformer &transformer, ParseResult &choice_result);\n" + ) + + +# --------------------------------------------------------------------------- +# Sequence-element classification +# +# Mirrors the per-token-type dispatch inside MatcherFactory::CreateMatcher() +# in matcher.cpp. Each helper handles one matcher/parse-result kind: +# +# _classify_literal LiteralNode -> KeywordParseResult (skip) +# _classify_reference ReferenceNode -> IdentifierParseResult OR Transform +# _classify_optional_reference OptionalNode(Ref) -> optional identifier OR TransformOptional +# _classify_star_repeat OptionalNode(Rep) -> OptionalParseResult(RepeatParseResult) vector +# _classify_plus_repeat RepeatNode -> RepeatParseResult vector +# _classify_parens ParensNode(Ref) -> ExtractResultFromParens T +# _classify_list_macro ListMacroNode(Ref)-> ExtractParseResultsFromList vector +# _classify_parens_list ParensNode(List) -> ExtractParseResultsFromList(ExtractResultFromParens) vector +# +# classify_sequence_element() is the top-level dispatch (= the switch in CreateMatcher). +# classify_sequence_elements() iterates all children of a SequenceNode (= the token loop). +# --------------------------------------------------------------------------- + + +@dataclass +class SeqElement: + """One classified position in a sequence rule.""" + + skip: bool # True for LiteralNode - no semantic value + var_name: str = "" + cpp_type: str = "" + by_value: bool = False # True for unique_ptr, vector>, bool, int64_t + extraction_lines: List[str] = field(default_factory=list) + + +def _is_by_value(rule_name, rule_types): + """ + Return True if the C++ value for rule_name is move-only and must be passed by value with std::move. + by_value=True in grammar_types.yml means: unique_ptr or vector> (non-copyable). + Primitives (bool, int64_t) use by_value=False and are passed as const T & (harmless, no tidy warning). + Fallback: override rules without a by_value annotation use the unique_ptr< prefix heuristic. + """ + info = rule_types.get(rule_name) + if info is None: + return False + return info.by_value or info.cpp_type.startswith('unique_ptr<') + + +def _classify_literal(): + """LITERAL token -> KeywordMatcher -> KeywordParseResult. No semantic value.""" + return SeqElement(skip=True) + + +def _classify_reference(name, idx, rule_types, excluded_rules): + """ + REFERENCE token -> CreateMatcher(rule_name). + Priority order mirrors runtime dispatch: + 1. IDENTIFIER_OVERRIDE_RULES -> IdentifierMatcher -> Child() + 2. excluded_rules -> keyword-only rule, no semantic value -> skip + 3. rule_types -> regular ListMatcher -> transformer.Transform() + """ + if name in IDENTIFIER_OVERRIDE_RULES: + var_name = to_snake_case(name) + lines = [f"\tauto {var_name} = list_pr.Child({idx}).identifier;"] + return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + if name in excluded_rules: + return _classify_literal() + if name in rule_types: + cpp_type = rule_types[name].cpp_type + var_name = to_snake_case(name) + lines = [f"\tauto {var_name} = transformer.Transform<{cpp_type}>(list_pr, {idx});"] + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) + return None + + +def _classify_optional_reference(name, idx, rule_types, excluded_rules): + """ + OptionalNode(ReferenceNode) -> OptionalMatcher wrapping a named rule. + Priority order matches _classify_reference: + 1. excluded_rules -> keyword-only optional (Transaction?) -> skip + 2. IDENTIFIER_OVERRIDE_RULES -> optional identifier, extracted via HasResult() + 3. rule_types -> optional typed rule, extracted via TransformOptional + """ + if name in excluded_rules: + return _classify_literal() + var_name = to_snake_case(name) + if name in IDENTIFIER_OVERRIDE_RULES: + lines = [ + f"\tstring {var_name};", + f"\tauto &{var_name}_opt = list_pr.Child({idx});", + f"\tif ({var_name}_opt.HasResult()) {{", + f"\t\t{var_name} = {var_name}_opt.GetResult().Cast().identifier;", + f"\t}}", + ] + return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + if name in rule_types: + cpp_type = rule_types[name].cpp_type + lines = [ + f"\t{cpp_type} {var_name} {{}};", + f"\ttransformer.TransformOptional(list_pr, {idx}, {var_name});", + ] + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) + return None + + +def _classify_parens(inner_node, idx, rule_types): + """ + ParensNode -> Parens(D) <- '(' D ')'. + Uses ExtractResultFromParens() to reach child[1]. + Only supported when inner is a plain ReferenceNode. + """ + if not isinstance(inner_node, ReferenceNode): + return None + name = inner_node.name + var_name = to_snake_case(name) + if name in IDENTIFIER_OVERRIDE_RULES: + lines = [ + f"\tauto {var_name} = ExtractResultFromParens(list_pr.GetChild({idx}))" + f".Cast().identifier;", + ] + return SeqElement(skip=False, var_name=var_name, cpp_type="string", extraction_lines=lines) + if name in rule_types: + cpp_type = rule_types[name].cpp_type + lines = [ + f"\tauto {var_name} = transformer.Transform<{cpp_type}>" + f"(ExtractResultFromParens(list_pr.GetChild({idx})));", + ] + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=cpp_type, + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) + return None + + +def _classify_list_macro(inner_node, idx, rule_types): + """ + ListMacroNode -> List(D) <- D (',' D)* ','?. + Uses ExtractParseResultsFromList() to collect all D results. + Only supported when inner is a plain ReferenceNode with a known type. + Produces vector. + """ + if not isinstance(inner_node, ReferenceNode): + return None + name = inner_node.name + if name not in rule_types: + return None + child_type = rule_types[name].cpp_type + var_name = to_snake_case(name) + lines = [ + f"\tauto {var_name}_items = ExtractParseResultsFromList(list_pr.GetChild({idx}));", + f"\tvector<{child_type}> {var_name};", + f"\tfor (auto &{var_name}_item : {var_name}_items) {{", + f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t}}", + ] + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) + + +def _classify_parens_list(inner_list_node, idx, rule_types): + """ + ParensNode(ListMacroNode(D)) -> Parens(List(D)). + Uses ExtractParseResultsFromList(ExtractResultFromParens(...)) to collect all D results. + Only supported when the ListMacroNode's inner is a plain ReferenceNode with a known type. + Produces vector. + """ + if not isinstance(inner_list_node.inner, ReferenceNode): + return None + name = inner_list_node.inner.name + if name not in rule_types: + return None + child_type = rule_types[name].cpp_type + var_name = to_snake_case(name) + lines = [ + f"\tauto {var_name}_items = ExtractParseResultsFromList(" f"ExtractResultFromParens(list_pr.GetChild({idx})));", + f"\tvector<{child_type}> {var_name};", + f"\tfor (auto &{var_name}_item : {var_name}_items) {{", + f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t}}", + ] + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(name, rule_types), + extraction_lines=lines, + ) + + +def _classify_repeat(node, idx, rule_types, optional): + """ + Shared helper for A* and A+. + A* -> OptionalNode(RepeatNode(A)) -> OptionalParseResult wrapping RepeatParseResult. + A+ -> RepeatNode(A) -> RepeatParseResult directly (guaranteed >= 1 element). + Only supported when the repeated element is a plain reference with a known type. + Produces vector. + """ + if not isinstance(node.child, ReferenceNode): + return None + ref_name = node.child.name + if ref_name not in rule_types: + return None + child_type = rule_types[ref_name].cpp_type + var_name = to_snake_case(ref_name) + if optional: + lines = [ + f"\tauto &{var_name}_opt = list_pr.Child({idx});", + f"\tvector<{child_type}> {var_name};", + f"\tif ({var_name}_opt.HasResult()) {{", + f"\t\tauto &{var_name}_repeat = {var_name}_opt.GetResult().Cast();", + f"\t\tfor (auto &{var_name}_item : {var_name}_repeat.GetChildren()) {{", + f"\t\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t\t}}", + f"\t}}", + ] + else: + lines = [ + f"\tauto &{var_name}_repeat = list_pr.Child({idx});", + f"\tvector<{child_type}> {var_name};", + f"\tfor (auto &{var_name}_item : {var_name}_repeat.GetChildren()) {{", + f"\t\t{var_name}.push_back(transformer.Transform<{child_type}>({var_name}_item));", + f"\t}}", + ] + return SeqElement( + skip=False, + var_name=var_name, + cpp_type=f"vector<{child_type}>", + by_value=_is_by_value(ref_name, rule_types), + extraction_lines=lines, + ) + + +def _classify_star_repeat(node, idx, rule_types): + return _classify_repeat(node, idx, rule_types, optional=True) + + +def _classify_plus_repeat(node, idx, rule_types): + return _classify_repeat(node, idx, rule_types, optional=False) + + +def classify_sequence_element(child, idx, rule_types, excluded_rules): + """ + Classify one element of a SequenceNode. + Mirrors the token-type switch in MatcherFactory::CreateMatcher(). + Returns SeqElement or None if the element cannot be auto-generated. + """ + if isinstance(child, LiteralNode): + return _classify_literal() + if isinstance(child, ReferenceNode): + return _classify_reference(child.name, idx, rule_types, excluded_rules) + if isinstance(child, OptionalNode): + inner = child.child + if isinstance(inner, LiteralNode): + return _classify_literal() + if isinstance(inner, ReferenceNode): + return _classify_optional_reference(inner.name, idx, rule_types, excluded_rules) + if isinstance(inner, RepeatNode): + # A* is represented as OptionalNode(RepeatNode(A)), matching the runtime + # OptionalMatcher(RepeatMatcher(A)) structure. Delegate to star-repeat classifier. + return _classify_star_repeat(inner, idx, rule_types) + return None # OptionalNode(ParensNode) etc. - deferred + if isinstance(child, RepeatNode): + return _classify_plus_repeat(child, idx, rule_types) + if isinstance(child, ParensNode): + if isinstance(child.inner, ListMacroNode): + return _classify_parens_list(child.inner, idx, rule_types) + return _classify_parens(child.inner, idx, rule_types) + if isinstance(child, ListMacroNode): + return _classify_list_macro(child.inner, idx, rule_types) + return None + + +def classify_sequence_elements(children, rule_types, excluded_rules): + """ + Classify all children of a SequenceNode. + Mirrors the token loop in MatcherFactory::CreateMatcher(). + Returns list of SeqElement, or None if any element cannot be classified. + """ + elements = [] + for idx, child in enumerate(children): + elem = classify_sequence_element(child, idx, rule_types, excluded_rules) + if elem is None: + return None + elements.append(elem) + return elements + + +# --------------------------------------------------------------------------- +# Extended sequence-rule code generation +# --------------------------------------------------------------------------- + + +def generate_sequence_body_decl(rule_name, return_type, elements): + """Declaration for the hand-written body that receives extracted typed args.""" + + def _param_decl(e): + # Move-only types (unique_ptr, vector>) are passed by value. + # Everything else (structs, strings, primitives) uses const T & to avoid tidy warnings. + if e.by_value: + return f"{e.cpp_type} {e.var_name}" + return f"const {e.cpp_type} &{e.var_name}" + + params = ", ".join(_param_decl(e) for e in elements if not e.skip) + return f"\tstatic {return_type} Transform{rule_name}({params});\n" + + +def generate_sequence_internal(rule_name, return_type, return_by_value, elements): + """ + Generate the Internal static class member for a sequence rule. + Returns unique_ptr matching transform_function_t for the static table. + Extracts typed args from parse_result, calls the hand-written body, then boxes via TypedTransformResult. + """ + semantic = [e for e in elements if not e.skip] + has_semantic_elements = len(semantic) > 0 + + body = [] + # Only emit the list_pr cast when there are elements to extract from it. + # All-skip rules (e.g. CommitTransaction <- CommitOrEnd Transaction?) + # produce no arguments and must not declare an unused list_pr variable. + if has_semantic_elements: + body.append("\tauto &list_pr = parse_result.Cast();") + for elem in semantic: + body.extend(elem.extraction_lines) + + def _param_arg(e): + # by_value=True means move-only; transfer ownership to body via std::move. + if e.by_value: + return f"std::move({e.var_name})" + return e.var_name + + arg_names = ", ".join(_param_arg(e) for e in semantic) + body.append(f"\tauto result = Transform{rule_name}({arg_names});") + box = _box_result(return_type, return_by_value).rstrip('\n') + body.append(box) + return ( + f"unique_ptr PEGTransformerFactory::Transform{rule_name}Internal(\n" + f" PEGTransformer &transformer, ParseResult &parse_result) {{\n" + "\n".join(body) + "\n}\n" + ) + + +@dataclass +class GramFileResult: + gram_stem: str + declarations: list + implementations: list + registrations: list + skipped: list # (rule_name, reason) — nothing generated + manual_bodies: list # (rule_name, reason) — Internal generated, body is hand-written + + +def collect_generated(gram_stem, rules, rule_types, excluded_rules): + """Classify all rules; return a GramFileResult.""" + declarations = [] + implementations = [] + registrations = [] + skipped = [] + manual_bodies = [] + + for rule_name, rule in rules.items(): + return_type = rule.return_type + if return_type is None: + skipped.append((rule_name, "no return type in grammar_types.yml")) + continue + + try: + ast = rule_to_ast(rule) + except Exception as e: + skipped.append((rule_name, f"AST parse error: {e}")) + continue + + return_by_value = _is_by_value(rule_name, rule_types) + + if is_pure_reference_choice(ast): + _, identifier_alts, unknown_alts = classify_choice_alternatives(ast.alternatives, rule_types) + if unknown_alts: + skipped.append((rule_name, f"choice has unknown alternatives: {unknown_alts}")) + continue + + declarations.append(generate_internal_declaration(rule_name)) + registrations.append(generate_registration(rule_name)) + + if not identifier_alts: + implementations.append(generate_choice_internal_full(rule_name, return_type, return_by_value)) + else: + declarations.append(generate_choice_body_declaration(rule_name, return_type)) + implementations.append(generate_choice_internal_with_body(rule_name, return_type, return_by_value)) + manual_bodies.append( + ( + rule_name, + f"choice body; identifier alternatives: {identifier_alts}", + ) + ) + continue + + if isinstance(ast, SequenceNode): + elements = classify_sequence_elements(ast.children, rule_types, excluded_rules) + if elements is not None: + declarations.append(generate_internal_declaration(rule_name)) + declarations.append(generate_sequence_body_decl(rule_name, return_type, elements)) + implementations.append(generate_sequence_internal(rule_name, return_type, return_by_value, elements)) + registrations.append(generate_registration(rule_name)) + continue + + skipped.append((rule_name, "complex rule (has operators/choices/groups)")) + + return GramFileResult(gram_stem, declarations, implementations, registrations, skipped, manual_bodies) + + +def print_output(result: GramFileResult): + if result.skipped: + print("=== SKIPPED (nothing generated) ===") + for rule_name, reason in result.skipped: + print(f" {rule_name}: {reason}") + print() + + if result.manual_bodies: + print("=== MANUAL BODY NEEDED (Internal generated, body must be hand-written) ===") + for rule_name, reason in result.manual_bodies: + print(f" {rule_name}: {reason}") + print() + + print("=== DECLARATIONS (peg_transformer_generated.hpp) ===") + print("".join(result.declarations)) + + print(f"=== IMPLEMENTATION (generated/transform_{result.gram_stem}_generated.cpp) ===") + print("".join(result.implementations)) + + print(f"=== REGISTRATION (in Register{result.gram_stem.capitalize()}() in peg_transformer_factory.cpp) ===") + print("".join(result.registrations)) + + +def generate_table_and_register(all_registrations): + entries = "".join("\t\t" + e.lstrip() for e in all_registrations) + return ( + "void PEGTransformerFactory::RegisterGenerated() {\n" + + "\tstatic const TransformRule builtin_transform_rules[] = {\n" + + entries + + "\t};\n" + + "\tfor (const auto &rule : builtin_transform_rules) {\n" + + "\t\tsql_transform_functions[rule.name] = rule.transform;\n" + + "\t}\n" + + "}\n" + ) + + +def write_cpp(all_implementations, all_registrations): + cpp_path = transformer_dir / "transform_generated.cpp" + content = ( + GENERATED_HEADER + + '#include "duckdb/parser/peg/transformer/peg_transformer.hpp"\n' + + "\nnamespace duckdb {\n\n" + + "\n".join(all_implementations) + + "\n" + + generate_table_and_register(all_registrations) + + "\n} // namespace duckdb\n" + ) + cpp_path.write_text(content) + print(f"Wrote {cpp_path}") + + +def write_hpp(all_declarations): + hpp_path = include_peg_dir / "peg_transformer_generated.hpp" + # This file is #include-d inside the PEGTransformerFactory class body, so it cannot be a + # valid standalone header (types like SQLStatement are only in scope inside the class). + # The #ifdef guard makes the file a no-op when clang-tidy processes it standalone, + # preventing false compilation errors. The guard is defined by peg_transformer.hpp + # immediately before the #include. + content = ( + GENERATED_HEADER + + "#ifdef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP\n" + + "".join(all_declarations) + + "#endif // DUCKDB_INSIDE_PEG_TRANSFORMER_HPP\n" + ) + hpp_path.write_text(content) + print(f"Wrote {hpp_path}") + + +def print_manual_steps(all_results): + print("\nRemaining manual steps:") + print(f" 1. In {include_peg_dir / 'peg_transformer.hpp'}:") + print(" - The #include of peg_transformer_generated.hpp must remain inside the class body") + print(f" 2. In {transformer_dir / 'CMakeLists.txt'}:") + print(" - Ensure transform_generated.cpp is listed in add_library_unity()") + for r in all_results: + if not r.registrations: + continue + reg_lines = "".join(f" {r.strip()}\n" for r in r.registrations) + print(f" 3. In peg_transformer_factory.cpp Register{r.gram_stem.capitalize()}():") + print(f" - Replace REGISTER_TRANSFORM macros for generated rules with:") + print(reg_lines, end="") + print(f" 4. In transform_{r.gram_stem}.cpp:") + print(" - Remove Internal wrappers now generated (keep only hand-written bodies)") + print(" - Update body function signatures to match the generated declarations") + + +def process_gram_file(gram_filename, rule_types, excluded_rules): + """Parse a .gram file and classify all its rules into a GramFileResult.""" + gram_stem = gram_filename.removesuffix('.gram') + gram_path = statements_dir / gram_filename + try: + rules = parse_peg_grammar(gram_path.read_text()) + except Exception as e: + raise Exception(f"{gram_filename}: {e}") from None + + for rule_name, info in rule_types.items(): + if rule_name in rules: + rules[rule_name].return_type = info.cpp_type + + return collect_generated(gram_stem, rules, rule_types, excluded_rules) + + +def main(): + arg_parser = argparse.ArgumentParser(description="Generate Internal transformer wrappers from grammar rules.") + arg_parser.add_argument("--write", action="store_true", help="Write generated files to disk.") + args = arg_parser.parse_args() + + gram_files_to_gen = ['use.gram', 'transaction.gram', 'detach.gram', 'export.gram'] + rule_types, excluded_rules = load_grammar_types(type_dir / 'grammar_types.yml') + results = [process_gram_file(f, rule_types, excluded_rules) for f in gram_files_to_gen] + + if args.write: + all_declarations = [d for r in results for d in r.declarations] + write_hpp(all_declarations) + all_implementations = [impl for r in results for impl in r.implementations] + all_registrations = [reg for r in results for reg in r.registrations] + write_cpp(all_implementations, all_registrations) + print_manual_steps(results) + else: + for r in results: + print(f"\n{'=' * 60}") + print(f" {r.gram_stem}.gram") + print(f"{'=' * 60}") + print_output(r) + + +if __name__ == "__main__": + main() diff --git a/scripts/parser/generate_transformer.py b/scripts/parser/generate_transformer.py index 67afa018046d..f1bd132ad940 100644 --- a/scripts/parser/generate_transformer.py +++ b/scripts/parser/generate_transformer.py @@ -1,6 +1,7 @@ import argparse import re import sys +from dataclasses import dataclass from pathlib import Path try: @@ -29,82 +30,22 @@ # Matches: Register("RuleName", &SomeFunction) — direct registration bypassing the macro DIRECT_REGISTER_REGEX = re.compile(r'Register\s*\(\s*"(\w+)"\s*,') -EXCLUDED_RULES = { - "Program", - "FunctionType", - "IfExists", - "Database", - "AbortOrRollback", - "CommitOrEnd", - "StartOrBegin", - "Transaction", - "VariableAssign", - "MacroOrFunction", - "SettingScope", - "ColLabel", - "MacroOrFunction", - "GroupingOrGroupingId", - "DefaultValues", - "RowOrRows", - "Recursive", - "StarSymbol", - "IfNotExists", - "PlainIdentifier", - "QuotedIdentifier", - "CreateTableColumnElement", - "OrReplace", - "ReservedIdentifier", - "CatalogName", - "SchemaName", - "ReservedSchemaName", - "ReservedIdentifier", - "TableName", - "ConstraintName", - "IntervalNumber", - "ReservedTableName", - "ColumnName", - "ReservedColumnName", - "FunctionName", - "ReservedFunctionName", - "TableFunctionName", - "TypeName", - "PragmaName", - "SettingName", - "CopyOptionName", - "AtTimeZoneOperator", - "Generated", - "ColumnConstraint", - "AlwaysOrByDefault", - "Lateral", - "ConstraintNameClause", - "ReservedSchemaQualification", - "UsingSample", - "TableSample", - "TypeList", - "NamedParameterAssignment", - "WithOrdinality", - "ByName", - "CollateOperator", - "ExportClause", - "ValueOrValues", - "PivotKeyword", - "UnpivotKeyword", - "Unique", - "DefArg", - "NoneLiteral", - "RowOrStruct", - "ForEachRow", - "ForEachStatement", - "SetData", - "CTEBodyContent", - "SingleArrowPair", - "OperatorLiteral", -} + +@dataclass +class GrammarTypeInfo: + """Per-rule type metadata loaded from grammar_types.yml.""" + + cpp_type: str + by_value: bool = False # True for unique_ptr, vector>, bool, int64_t def load_grammar_types(types_file): """ - Loads grammar_types.yml and returns a dict mapping rule name -> C++ return type. + Loads grammar_types.yml and returns (rule_types, excluded_rules) where + rule_types maps rule name -> GrammarTypeInfo (cpp_type + by_value), and excluded_rules is + the set of rules that should be skipped during stub generation. + Override rules default to by_value=False; a startswith('unique_ptr<') fallback covers + any override types that are move-only. """ if yaml is None: print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) @@ -121,27 +62,27 @@ def load_grammar_types(types_file): print(f"Error: {types_file} is malformed (expected a top-level mapping).", file=sys.stderr) sys.exit(1) - rule_to_type = {} + rule_types = {} rule_to_source = {} # tracks where each rule was first seen for error messages duplicates = [] - def register(rule_name, cpp_type, source): - rule_name = str(rule_name) - if rule_name in rule_to_type: - duplicates.append(f" '{rule_name}' in '{source}' (already listed in '{rule_to_source[rule_name]}')") + def register(name, cpp_type, by_value, source): + name = str(name) + if name in rule_types: + duplicates.append(f" '{name}' in '{source}' (already listed in '{rule_to_source[name]}')") else: - rule_to_type[rule_name] = str(cpp_type) - rule_to_source[rule_name] = source + rule_types[name] = GrammarTypeInfo(cpp_type=str(cpp_type), by_value=by_value) + rule_to_source[name] = source - # Top-level overrides: flat RuleName -> "type" map + # Top-level overrides: flat RuleName -> "type" map (no by_value annotation) overrides = data.get("overrides", {}) if isinstance(overrides, dict): - for rule_name, cpp_type in overrides.items(): - register(rule_name, cpp_type, "overrides") + for name, cpp_type in overrides.items(): + register(name, cpp_type, False, "overrides") - # Category entries: CategoryName -> {type: "...", rules: [...]} + # Category entries: CategoryName -> {type: "...", by_value: bool, rules: [...]} for key, value in data.items(): - if key == "overrides": + if key in ("overrides", "excluded_rules"): continue if not isinstance(value, dict): continue @@ -149,8 +90,9 @@ def register(rule_name, cpp_type, source): rules = value.get("rules", []) if not cpp_type or not isinstance(rules, list): continue - for rule_name in rules: - register(rule_name, cpp_type, key) + by_value = bool(value.get("by_value", False)) + for name in rules: + register(name, cpp_type, by_value, key) if duplicates: print(f"Error: {types_file} contains duplicate rule listings:", file=sys.stderr) @@ -158,7 +100,8 @@ def register(rule_name, cpp_type, source): print(msg, file=sys.stderr) sys.exit(1) - return rule_to_type + excluded_rules = set(data.get("excluded_rules", [])) + return rule_types, excluded_rules def find_grammar_rules(grammar_path): @@ -289,10 +232,10 @@ def generate_implementation_stub(rule_name, cpp_type): """ -def generate_code_for_missing_rules(generation_queue, rule_to_type): +def generate_code_for_missing_rules(generation_queue, rule_types): """ Iterates the generation queue and prints stub code, grouped by rule. - Caller is responsible for ensuring all rules have types in rule_to_type. + Caller is responsible for ensuring all rules have entries in rule_types. """ if not generation_queue: print("\nNo missing rules to generate.") @@ -308,7 +251,7 @@ def generate_code_for_missing_rules(generation_queue, rule_to_type): for rule_name, cpp_filename in sorted(rules_to_generate): cpp_path = TRANSFORMER_DIR / cpp_filename - cpp_type = rule_to_type[rule_name] + cpp_type = rule_types[rule_name].cpp_type # Constraint: Do not generate code for non-existent files if not cpp_path.is_file(): @@ -342,7 +285,7 @@ def main(): args = parser.parse_args() - rule_to_type = load_grammar_types(GRAMMAR_TYPES_FILE) + rule_types, excluded_rules = load_grammar_types(GRAMMAR_TYPES_FILE) grammar_rules_by_file = find_grammar_rules(Path(GRAMMAR_DIR)) transformer_impls = find_transformer_rules(Path(TRANSFORMER_DIR)) enum_rules, registered_rules, directly_registered_rules = find_factory_registrations(Path(FACTORY_REG_FILE)) @@ -379,7 +322,7 @@ def main(): for rule_name in sorted(grammar_rules): total_rules_scanned += 1 - if rule_name in EXCLUDED_RULES: + if rule_name in excluded_rules: print(f"{'[ EXCLUDED ]':<14} {rule_name}") continue @@ -428,7 +371,7 @@ def main(): print("\n--- Summary: Rule Coverage ---") print(f"{'TOTAL RULES SCANNED':<25} : {total_rules_scanned}") - print(f" {' - Excluded':<23} : {len(EXCLUDED_RULES)}") + print(f" {' - Excluded':<23} : {len(excluded_rules)}") print("---------------------------------------") print(f"{'TOTAL ACTIONABLE RULES':<25} : {total_grammar_rules}") print(f"{'TOTAL COVERED':<25} : {total_covered} ({coverage:.2f}%)") @@ -442,25 +385,25 @@ def main(): print(f"{file_name:<25} : {count} issues") print("\n--- Orphan / Mismatch Check ---") - orphan_transformers = transformer_impls - all_grammar_rules_flat - EXCLUDED_RULES + orphan_transformers = transformer_impls - all_grammar_rules_flat - excluded_rules if orphan_transformers: print("\n[!] Orphan Transformer Functions (No matching grammar rule):") for rule in sorted(list(orphan_transformers)): print(f" - Transform{rule}") - orphan_enums = enum_rules - all_grammar_rules_flat - EXCLUDED_RULES + orphan_enums = enum_rules - all_grammar_rules_flat - excluded_rules if orphan_enums: print("\n[!] Orphan Enum Rules (No matching grammar rule):") for rule in sorted(list(orphan_enums)): print(f' - RegisterEnum("{rule}")') - orphan_registrations = registered_rules - all_grammar_rules_flat - EXCLUDED_RULES + orphan_registrations = registered_rules - all_grammar_rules_flat - excluded_rules if orphan_registrations: print("\n[!] Orphan Registrations (No matching grammar rule):") for rule in sorted(list(orphan_registrations)): print(f" - REGISTER_TRANSFORM(Transform{rule})") - orphan_direct = directly_registered_rules - all_grammar_rules_flat - EXCLUDED_RULES + orphan_direct = directly_registered_rules - all_grammar_rules_flat - excluded_rules if orphan_direct: print("\n[!] Orphan Direct Registrations (No matching grammar rule):") for rule in sorted(list(orphan_direct)): @@ -480,14 +423,14 @@ def main(): if args.generate: all_rules_to_generate = [r for rules in generation_queue.values() for r in rules] - missing_from_yaml = [r for r in all_rules_to_generate if r not in rule_to_type] + missing_from_yaml = [r for r in all_rules_to_generate if r not in rule_types] if missing_from_yaml: print("\n--- Error: Missing Return Types in grammar_types.yml ---") print("Add the following rules before generating stubs:") for rule in sorted(missing_from_yaml): print(f" {rule}") sys.exit(1) - generate_code_for_missing_rules(generation_queue, rule_to_type) + generate_code_for_missing_rules(generation_queue, rule_types) if __name__ == "__main__": diff --git a/scripts/parser/grammar_types.yml b/scripts/parser/grammar_types.yml index 21e0a0440d0e..31730aaf0755 100644 --- a/scripts/parser/grammar_types.yml +++ b/scripts/parser/grammar_types.yml @@ -19,7 +19,10 @@ # # Rules handled by RegisterEnum() are NOT listed here (they are auto-detected # by generate_transformer.py from peg_transformer_factory.cpp). -# Rules in EXCLUDED_RULES in generate_transformer.py are also NOT listed here. +# +# excluded_rules: rules skipped during stub generation. When encountered as +# children in auto-generated sequence rules they are treated as skip elements +# (no semantic value extracted). # --------------------------------------------------------------------------- # unique_ptr categories @@ -27,11 +30,13 @@ SQLStatementList: type: "vector>" + by_value: true rules: - Program SQLStatement: type: "unique_ptr" + by_value: true rules: - Statement - AlterStatement @@ -69,6 +74,9 @@ SQLStatement: - SelectStatement - SetStatement - TransactionStatement + - BeginTransaction + - CommitTransaction + - RollbackTransaction - TriggerBody - UpdateStatement - UseStatement @@ -76,6 +84,7 @@ SQLStatement: SelectStatement: type: "unique_ptr" + by_value: true rules: - DescribeStatement - PivotStatement @@ -95,19 +104,14 @@ SelectStatement: SetStatement: type: "unique_ptr" + by_value: true rules: - SetTimeZone - StandardAssignment -TransactionStatement: - type: "unique_ptr" - rules: - - BeginTransaction - - CommitTransaction - - RollbackTransaction - QueryNode: type: "unique_ptr" + by_value: true rules: - ShowSelect - ShowTables @@ -116,6 +120,7 @@ QueryNode: SelectNode: type: "unique_ptr" + by_value: true rules: - SelectFrom - SelectFromClause @@ -124,12 +129,14 @@ SelectNode: SetOperationNode: type: "unique_ptr" + by_value: true rules: - SetopClause - SetIntersectClause CreateStatement: type: "unique_ptr" + by_value: true rules: - CreateSchemaStmt - CreateSecretStmt @@ -144,6 +151,7 @@ CreateStatement: DropStatement: type: "unique_ptr" + by_value: true rules: - DropEntries - DropTable @@ -159,6 +167,7 @@ DropStatement: AlterInfo: type: "unique_ptr" + by_value: true rules: - AlterOptions - AlterTableStmt @@ -171,6 +180,7 @@ AlterInfo: AlterTableInfo: type: "unique_ptr" + by_value: true rules: - AlterTableOptions - AddColumn @@ -194,11 +204,13 @@ AlterTableInfo: CreateTypeInfo: type: "unique_ptr" + by_value: true rules: - CreateType MacroFunction: type: "unique_ptr" + by_value: true rules: - MacroDefinition - TableMacroDefinition @@ -206,6 +218,7 @@ MacroFunction: OnConflictInfo: type: "unique_ptr" + by_value: true rules: - OnConflictClause - OnConflictAction @@ -214,6 +227,7 @@ OnConflictInfo: MergeIntoAction: type: "unique_ptr" + by_value: true rules: - MatchedClauseAction - UpdateMatchClause @@ -229,6 +243,7 @@ MergeIntoAction: UpdateSetInfo: type: "unique_ptr" + by_value: true rules: - UpdateSetClause - UpdateSetTuple @@ -237,6 +252,7 @@ UpdateSetInfo: Constraint: type: "unique_ptr" + by_value: true rules: - TopLevelConstraint - TopLevelConstraintList @@ -247,11 +263,13 @@ Constraint: ForeignKeyConstraint: type: "unique_ptr" + by_value: true rules: - ForeignKeyConstraint WindowExpression: type: "unique_ptr" + by_value: true rules: - OverClause - WindowFrame @@ -263,6 +281,7 @@ WindowExpression: SampleOptions: type: "unique_ptr" + by_value: true rules: - SampleClause - SampleEntry @@ -272,6 +291,7 @@ SampleOptions: ResultModifier: type: "unique_ptr" + by_value: true rules: - LimitOffset - OffsetLimitClause @@ -279,12 +299,14 @@ ResultModifier: AtClause: type: "unique_ptr" + by_value: true rules: - AtClause - AtSpecifier Expression: type: "unique_ptr" + by_value: true rules: - Expression - SingleExpression @@ -422,6 +444,7 @@ Expression: ColumnRefExpression: type: "unique_ptr" + by_value: true rules: - NestedColumnName - CatalogReservedSchemaTableColumnName @@ -430,6 +453,7 @@ ColumnRefExpression: TableRef: type: "unique_ptr" + by_value: true rules: - TableRef - FromClause @@ -457,6 +481,7 @@ TableRef: BaseTableRef: type: "unique_ptr" + by_value: true rules: - BaseTableName - SchemaReservedTable @@ -470,6 +495,7 @@ BaseTableRef: ExpressionList: type: "vector>" + by_value: true rules: - TypeModifiers - BoundedListExpression @@ -496,16 +522,19 @@ ExpressionList: TableRefList: type: "vector>" + by_value: true rules: - DeleteUsingClause ResultModifierList: type: "vector>" + by_value: true rules: - ResultModifiers OrderByNodeList: type: "vector" + by_value: false rules: - OrderByClause - OrderByExpressions @@ -515,11 +544,13 @@ OrderByNodeList: MacroParameterList: type: "vector" + by_value: false rules: - MacroParameters StringList: type: "vector" + by_value: false rules: - ColumnIdList - InsertColumnList @@ -531,6 +562,7 @@ StringList: GenericCopyOptionList: type: "vector" + by_value: true rules: - AttachOptions - GenericCopyOptionList @@ -540,6 +572,7 @@ GenericCopyOptionList: WindowBoundaryExpressionList: type: "vector" + by_value: false rules: - FrameExtent - BetweenFrameExtent @@ -547,12 +580,14 @@ WindowBoundaryExpressionList: PivotColumnList: type: "vector" + by_value: false rules: - PivotColumnList - PivotOn PivotColumnEntryList: type: "vector" + by_value: false rules: - PivotTargetList - UnpivotTargetList @@ -563,6 +598,7 @@ PivotColumnEntryList: QualifiedName: type: "QualifiedName" + by_value: false rules: - QualifiedSequenceName - IdentifierOrStringLiteral @@ -581,11 +617,13 @@ QualifiedName: OrderByNode: type: "OrderByNode" + by_value: false rules: - OrderByExpression GroupByNode: type: "GroupByNode" + by_value: false rules: - GroupByClause - GroupByExpressions @@ -594,6 +632,7 @@ GroupByNode: LimitPercentResult: type: "LimitPercentResult" + by_value: false rules: - LimitClause - LimitValue @@ -605,6 +644,7 @@ LimitPercentResult: DistinctClause: type: "DistinctClause" + by_value: false rules: - DistinctClause - DistinctOn @@ -612,6 +652,7 @@ DistinctClause: TableAlias: type: "TableAlias" + by_value: false rules: - TableAlias - TableAliasAs @@ -619,6 +660,7 @@ TableAlias: JoinPrefix: type: "JoinPrefix" + by_value: false rules: - JoinPrefix - CrossJoinPrefix @@ -627,6 +669,7 @@ JoinPrefix: JoinQualifier: type: "JoinQualifier" + by_value: false rules: - JoinQualifier - OnClause @@ -634,11 +677,13 @@ JoinQualifier: WindowFrame: type: "WindowFrame" + by_value: false rules: - FrameClause WindowBoundaryExpression: type: "WindowBoundaryExpression" + by_value: false rules: - FrameBound - FrameUnbounded @@ -647,6 +692,7 @@ WindowBoundaryExpression: PivotColumn: type: "PivotColumn" + by_value: false rules: - PivotColumnEntry - PivotColumnSubquery @@ -656,6 +702,7 @@ PivotColumn: GenericCopyOption: type: "GenericCopyOption" + by_value: false rules: - SpecializedOption - SingleOption @@ -671,6 +718,7 @@ GenericCopyOption: MacroParameter: type: "MacroParameter" + by_value: false rules: - MacroParameter - SimpleParameter @@ -678,6 +726,7 @@ MacroParameter: TriggerEventInfo: type: "TriggerEventInfo" + by_value: false rules: - TriggerEvent - TriggerEventInsert @@ -687,6 +736,7 @@ TriggerEventInfo: VacuumOptions: type: "VacuumOptions" + by_value: false rules: - VacuumOptions - VacuumLegacyOptions @@ -694,12 +744,14 @@ VacuumOptions: SettingInfo: type: "SettingInfo" + by_value: false rules: - SetSetting - SetVariable PartitionSortedOptions: type: "PartitionSortedOptions" + by_value: false rules: - PartitionSortedOptions - PartitionOptSortedOptions @@ -707,6 +759,7 @@ PartitionSortedOptions: ColumnElements: type: "ColumnElements" + by_value: false rules: - CreateColumnList - CreateTableColumnList @@ -717,6 +770,7 @@ ColumnElements: String: type: "string" + by_value: false rules: - AttachAlias - StringLiteral @@ -773,9 +827,11 @@ String: - ConstraintName - ExplainOptionName - OperatorLiteral + - DotIdentifier Bool: type: "bool" + by_value: false rules: - TimeZone - WithOrWithout @@ -792,16 +848,19 @@ Bool: - AnyOrAll - IgnoreOrRespectNulls - PrecedingOrFollowing + - IfExists - IncludeOrExcludeNulls Int64: type: "int64_t" + by_value: false rules: - ArrayBounds - SquareBracketsArray LogicalType: type: "LogicalType" + by_value: false rules: - Type - EnumStringLiteralList @@ -917,4 +976,75 @@ overrides: NullsFirstOrLast: "OrderByNullType" # update.gram UpdateSetElement: "pair>" - UpdateSetColumnTarget: "string" \ No newline at end of file + UpdateSetColumnTarget: "string" +# --------------------------------------------------------------------------- +# excluded_rules: no transformer stub generated, no semantic value extracted +# --------------------------------------------------------------------------- + +excluded_rules: + - Program + - FunctionType + - Database + - AbortOrRollback + - CommitOrEnd + - StartOrBegin + - Transaction + - VariableAssign + - MacroOrFunction + - SettingScope + - ColLabel + - GroupingOrGroupingId + - DefaultValues + - RowOrRows + - Recursive + - StarSymbol + - IfNotExists + - PlainIdentifier + - QuotedIdentifier + - CreateTableColumnElement + - OrReplace + - ReservedIdentifier + - CatalogName + - SchemaName + - ReservedSchemaName + - TableName + - ConstraintName + - IntervalNumber + - ReservedTableName + - ColumnName + - ReservedColumnName + - FunctionName + - ReservedFunctionName + - TableFunctionName + - TypeName + - PragmaName + - SettingName + - CopyOptionName + - AtTimeZoneOperator + - Generated + - ColumnConstraint + - AlwaysOrByDefault + - Lateral + - ConstraintNameClause + - ReservedSchemaQualification + - UsingSample + - TableSample + - TypeList + - NamedParameterAssignment + - WithOrdinality + - ByName + - CollateOperator + - ExportClause + - ValueOrValues + - PivotKeyword + - UnpivotKeyword + - Unique + - DefArg + - NoneLiteral + - RowOrStruct + - ForEachRow + - ForEachStatement + - SetData + - CTEBodyContent + - SingleArrowPair + - OperatorLiteral diff --git a/scripts/parser/inline_grammar.py b/scripts/parser/inline_grammar.py index 0f0bdf1ffe94..568a69cf41e7 100644 --- a/scripts/parser/inline_grammar.py +++ b/scripts/parser/inline_grammar.py @@ -3,27 +3,12 @@ from enum import Enum, auto from pathlib import Path -parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files') -parser.add_argument( - '--print', action='store_true', help='Print the grammar instead of writing to a file', default=False -) -parser.add_argument( - '--grammar-file', - action='store_true', - help='Write the grammar to a .gram file instead of a C++ header', - default=False, -) - -args = parser.parse_args() - scripts_dir = Path(__file__).parent.parent peg_dir = scripts_dir.parent / 'src' / 'parser' / 'peg' statements_dir = peg_dir / 'grammar' / 'statements' keywords_dir = peg_dir / 'grammar' / 'keywords' target_file = scripts_dir.parent / 'src' / 'include' / 'duckdb' / 'parser' / 'peg' / 'inlined_grammar.hpp' -contents = "" - IMPLICIT_RULES = {'%whitespace'} # Maps filenames to string categories. @@ -46,50 +31,13 @@ "TYPE_NAME_KEYWORD": "typename_keyword_map", } -# Use a dictionary of sets to collect keywords for each category, preventing duplicates -keyword_sets = {category: set() for category in CPP_MAP_NAMES} - -# --- Validation and Loading (largely unchanged) --- -# For validation during the loading phase -reserved_set = set() -unreserved_set = set() - def load_keywords(path): with open(path, "r") as f: return [line.strip().lower() for line in f if line.strip()] -for filepath in keywords_dir.iterdir(): - if filepath.name not in FILENAME_TO_CATEGORY: - continue - - category = FILENAME_TO_CATEGORY[filepath.name] - keywords = load_keywords(filepath) - - for kw in keywords: - # Validation logic remains the same to enforce rules - if category == "RESERVED_KEYWORD": - if kw in reserved_set or kw in unreserved_set: - print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") - exit(1) - reserved_set.add(kw) - elif category == "UNRESERVED_KEYWORD": - if kw in reserved_set or kw in unreserved_set: - print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") - exit(1) - unreserved_set.add(kw) - - # Add the keyword to the primary set - keyword_sets[category].add(kw) - - # type_name_keyword.list also populates typename_keyword_map - if filepath.name == "type_name_keyword.list": - keyword_sets["TYPE_NAME_KEYWORD"].add(kw) - - -def write_keyword_map(): - # --- C++ Code Generation --- +def write_keyword_map(keyword_sets): output_path = peg_dir / "keyword_map.cpp" with open(output_path, "w") as f: f.write("/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */\n") @@ -99,19 +47,12 @@ def write_keyword_map(): f.write("\tif (initialized) {\n\t\treturn;\n\t};\n") f.write("\tinitialized = true;\n\n") - # Get the total number of categories to handle the last item differently num_categories = len(keyword_sets) - - # Iterate through each category and generate code for each map for i, (category, keywords) in enumerate(keyword_sets.items()): cpp_map_name = CPP_MAP_NAMES[category] f.write(f"\t// Populating {cpp_map_name}\n") - # Sort keywords for deterministic output for kw in sorted(keywords): - # Populate the C++ set with insert f.write(f'\t{cpp_map_name}.insert("{kw}");\n') - - # Add a newline for all but the last block if i < num_categories - 1: f.write("\n") f.write("}\n") @@ -132,12 +73,16 @@ class PEGTokenType(Enum): REGEX = auto() OPERATOR = auto() + def __str__(self): + return self.value + class PEGGrammarRule: def __init__(self): self.rule_name = None self.tokens = [] self.parameters = {} + self.return_type = None def has_tokens(self): return bool(self.tokens) @@ -145,10 +90,14 @@ def has_tokens(self): def clear(self): self.tokens = [] self.parameters = {} + self.return_type = None def references(self): return [t.text for t in self.tokens if t.type in (PEGTokenType.REFERENCE, PEGTokenType.FUNCTION_CALL)] + def __str__(self): + return f"{self.rule_name} [{self.return_type}]<- {self.tokens}" + class ParseState(Enum): RULE_NAME = auto() @@ -357,58 +306,6 @@ def check_undefined_rules(all_rules): exit(1) -rules = {} - -with open(statements_dir / "common.gram", 'r') as f: - file_content = f.read() - try: - update_rules(rules, parse_peg_grammar(file_content), "common.gram") - except Exception as e: - raise Exception(f"common.gram: {e}") from None - contents += file_content + "\n" - - -for filepath in keywords_dir.iterdir(): - if filepath.suffix != '.list': - continue - rule_name = filename_to_upper_camel(filepath) - rule = f"{rule_name} <- " - with open(filepath, 'r') as f: - lines = [f"'{line.strip()}'" for line in f if line.strip()] - rule += " /\n".join(lines) + "\n" - contents += rule - update_rules(rules, parse_peg_grammar(rule), filepath.name) - -for filepath in statements_dir.iterdir(): - if filepath.suffix != '.gram': - if not filepath.is_dir(): - raise Exception(f"File {filepath.name} does not end with .gram") - continue - if filepath.name != "common.gram": - with open(filepath, 'r') as f: - file_content = f.read() - try: - update_rules(rules, parse_peg_grammar(file_content), filepath.name) - except Exception as e: - raise Exception(f"{filepath.name}: {e}") from None - contents += file_content + "\n" - -check_unused_rules(rules) -check_undefined_rules(rules) - -write_keyword_map() - -if args.print: - print(contents) - exit(0) - -if args.grammar_file: - grammar_file = target_file.with_suffix('.gram') - with open(grammar_file, 'w+') as f: - f.write(contents) - exit(0) - - def get_grammar_bytes(contents): result_text = "" for line in contents.split('\n'): @@ -418,19 +315,116 @@ def get_grammar_bytes(contents): return result_text -with open(target_file, 'w+') as f: - f.write( - '''/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */ +def load_all_rules(): + """Load and validate all grammar rules from statements/ and keywords/. Returns (rules, contents).""" + keyword_sets = {category: set() for category in CPP_MAP_NAMES} + reserved_set = set() + unreserved_set = set() + contents = "" + rules = {} + + for filepath in keywords_dir.iterdir(): + if filepath.name not in FILENAME_TO_CATEGORY: + continue + category = FILENAME_TO_CATEGORY[filepath.name] + keywords = load_keywords(filepath) + for kw in keywords: + if category == "RESERVED_KEYWORD": + if kw in reserved_set or kw in unreserved_set: + print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") + exit(1) + reserved_set.add(kw) + elif category == "UNRESERVED_KEYWORD": + if kw in reserved_set or kw in unreserved_set: + print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories") + exit(1) + unreserved_set.add(kw) + keyword_sets[category].add(kw) + if filepath.name == "type_name_keyword.list": + keyword_sets["TYPE_NAME_KEYWORD"].add(kw) + + with open(statements_dir / "common.gram", 'r') as f: + file_content = f.read() + try: + update_rules(rules, parse_peg_grammar(file_content), "common.gram") + except Exception as e: + raise Exception(f"common.gram: {e}") from None + contents += file_content + "\n" + + for filepath in keywords_dir.iterdir(): + if filepath.suffix != '.list': + continue + rule_name = filename_to_upper_camel(filepath) + rule = f"{rule_name} <- " + with open(filepath, 'r') as f: + lines = [f"'{line.strip()}'" for line in f if line.strip()] + rule += " /\n".join(lines) + "\n" + contents += rule + update_rules(rules, parse_peg_grammar(rule), filepath.name) + + for filepath in statements_dir.iterdir(): + if filepath.suffix != '.gram': + if not filepath.is_dir(): + raise Exception(f"File {filepath.name} does not end with .gram") + continue + if filepath.name != "common.gram": + with open(filepath, 'r') as f: + file_content = f.read() + try: + update_rules(rules, parse_peg_grammar(file_content), filepath.name) + except Exception as e: + raise Exception(f"{filepath.name}: {e}") from None + contents += file_content + "\n" + + check_unused_rules(rules) + check_undefined_rules(rules) + + return rules, contents, keyword_sets + + +def main(): + arg_parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files') + arg_parser.add_argument( + '--print', action='store_true', default=False, help='Print the grammar instead of writing to a file' + ) + arg_parser.add_argument( + '--grammar-file', + action='store_true', + default=False, + help='Write the grammar to a .gram file instead of a C++ header', + ) + args = arg_parser.parse_args() + + rules, contents, keyword_sets = load_all_rules() + write_keyword_map(keyword_sets) + + if args.print: + print(contents) + return + + if args.grammar_file: + grammar_file = target_file.with_suffix('.gram') + with open(grammar_file, 'w+') as f: + f.write(contents) + return + + with open(target_file, 'w+') as f: + f.write( + '''/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */ #pragma once namespace duckdb { const char INLINED_PEG_GRAMMAR[] = { ''' - + get_grammar_bytes(contents) - + ''' + + get_grammar_bytes(contents) + + ''' }; } // namespace duckdb ''' - ) + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/prepare_build_artifact.sh b/scripts/prepare_build_artifact.sh index b8c9480990ef..f87529ba8cc2 100644 --- a/scripts/prepare_build_artifact.sh +++ b/scripts/prepare_build_artifact.sh @@ -37,6 +37,13 @@ else echo "No $BUILD_DIR/src/libduckdb.so* files found" fi +# Required by jobs that link against the prebuilt static library. +if [[ -f "$BUILD_DIR/src/libduckdb_static.a" ]]; then + cp -av "$BUILD_DIR/src/libduckdb_static.a" "$ARTIFACT_DIR"/src/ +else + echo "No $BUILD_DIR/src/libduckdb_static.a file found" +fi + # Required by regression jobs that run the prebuilt benchmark runner. if [[ -f "$BUILD_DIR/benchmark/benchmark_runner" ]]; then mkdir -p "$ARTIFACT_DIR"/benchmark "$ARTIFACT_DIR"/scripts diff --git a/src/common/settings.json b/src/common/settings.json index 72ce76a94b4a..ff5801cc3f3e 100644 --- a/src/common/settings.json +++ b/src/common/settings.json @@ -338,6 +338,13 @@ "scope": "global", "custom_implementation": true }, + { + "name": "debug_verification_projection", + "description": "DEBUG SETTING: add internal verification projections to stress optimizers", + "type": "BOOLEAN", + "default_scope": "global", + "default_value": "false" + }, { "name": "debug_verify_blocks", "description": "DEBUG SETTING: verify block metadata during checkpointing", @@ -345,6 +352,13 @@ "default_scope": "global", "default_value": "false" }, + { + "name": "debug_verify_column_bindings", + "description": "DEBUG SETTING: run extra internal verification of column bindings", + "type": "BOOLEAN", + "default_scope": "global", + "default_value": "false" + }, { "name": "debug_verify_serializer", "description": "DEBUG SETTING: verify logical plan serializer", diff --git a/src/execution/column_binding_resolver.cpp b/src/execution/column_binding_resolver.cpp index 89ea5c4fceb2..993e09c8af79 100644 --- a/src/execution/column_binding_resolver.cpp +++ b/src/execution/column_binding_resolver.cpp @@ -9,6 +9,7 @@ #include "duckdb/planner/operator/logical_extension_operator.hpp" #include "duckdb/planner/operator/logical_insert.hpp" #include "duckdb/planner/operator/logical_recursive_cte.hpp" +#include "duckdb/main/settings.hpp" namespace duckdb { @@ -244,13 +245,14 @@ unordered_set ColumnBindingResolver::VerifyInternal(LogicalOperator return result; } -void ColumnBindingResolver::Verify(LogicalOperator &op) { -#ifdef DEBUG +void ColumnBindingResolver::Verify(ClientContext &context, LogicalOperator &op) { + if (!Settings::Get(context)) { + return; + } op.ResolveOperatorTypes(); ColumnBindingResolver resolver(true); resolver.VisitOperator(op); VerifyInternal(op); -#endif } } // namespace duckdb diff --git a/src/include/duckdb/execution/column_binding_resolver.hpp b/src/include/duckdb/execution/column_binding_resolver.hpp index d1df03f4f979..ba9b5a436a3f 100644 --- a/src/include/duckdb/execution/column_binding_resolver.hpp +++ b/src/include/duckdb/execution/column_binding_resolver.hpp @@ -22,7 +22,7 @@ class ColumnBindingResolver : public LogicalOperatorVisitor { explicit ColumnBindingResolver(bool verify_only = false); void VisitOperator(LogicalOperator &op) override; - static void Verify(LogicalOperator &op); + static void Verify(ClientContext &context, LogicalOperator &op); protected: vector bindings; diff --git a/src/include/duckdb/main/settings.hpp b/src/include/duckdb/main/settings.hpp index ff95eb44e0d3..27f1bb942e2a 100644 --- a/src/include/duckdb/main/settings.hpp +++ b/src/include/duckdb/main/settings.hpp @@ -558,6 +558,17 @@ struct DebugVerificationModeSetting { static Value GetSetting(const ClientContext &context); }; +struct DebugVerificationProjectionSetting { + using RETURN_TYPE = bool; + static constexpr const char *Name = "debug_verification_projection"; + static constexpr const char *Description = + "DEBUG SETTING: add internal verification projections to stress optimizers"; + static constexpr const char *InputType = "BOOLEAN"; + static constexpr const char *DefaultValue = "false"; + static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; + static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); +}; + struct DebugVerifyBlocksSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "debug_verify_blocks"; @@ -568,6 +579,16 @@ struct DebugVerifyBlocksSetting { static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); }; +struct DebugVerifyColumnBindingsSetting { + using RETURN_TYPE = bool; + static constexpr const char *Name = "debug_verify_column_bindings"; + static constexpr const char *Description = "DEBUG SETTING: run extra internal verification of column bindings"; + static constexpr const char *InputType = "BOOLEAN"; + static constexpr const char *DefaultValue = "false"; + static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; + static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); +}; + struct DebugVerifySerializerSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "debug_verify_serializer"; diff --git a/src/include/duckdb/parser/peg/inlined_grammar.gram b/src/include/duckdb/parser/peg/inlined_grammar.gram index a60df681d392..f37066dded89 100644 --- a/src/include/duckdb/parser/peg/inlined_grammar.gram +++ b/src/include/duckdb/parser/peg/inlined_grammar.gram @@ -790,7 +790,7 @@ MapExpression <- 'MAP' MapStructExpression MapStructExpression <- '{' List(MapStructField)? '}' MapStructField <- Expression ':' Expression -GroupingExpression <- GroupingOrGroupingId Parens(List(Expression)) +GroupingExpression <- GroupingOrGroupingId Parens(List(Expression)?) GroupingOrGroupingId <- 'GROUPING' / 'GROUPING_ID' Parameter <- QuestionMarkNumberedParameter / AnonymousParameter / NumberedParameter / ColLabelParameter QuestionMarkNumberedParameter <- '?' NumberLiteral @@ -1444,9 +1444,7 @@ VariableList <- List(Expression) ResetStatement <- 'RESET' (SetVariable / SetSetting) ExportStatement <- 'EXPORT' 'DATABASE' ExportSource? StringLiteral GenericCopyOptionList? - ExportSource <- CatalogName 'TO' - ImportStatement <- 'IMPORT' 'DATABASE' StringLiteral CheckpointStatement <- 'FORCE'? 'CHECKPOINT' CatalogName? @@ -1652,7 +1650,8 @@ DetachStatement <- 'DETACH' Database? IfExists? CatalogName UseStatement <- 'USE' UseTarget UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName -UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* +UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* +DotIdentifier <- '.' Identifier CallStatement <- 'CALL' QualifiedTableFunction TableFunctionArguments diff --git a/src/include/duckdb/parser/peg/inlined_grammar.hpp b/src/include/duckdb/parser/peg/inlined_grammar.hpp index 5527052577fc..5789febcaabd 100644 --- a/src/include/duckdb/parser/peg/inlined_grammar.hpp +++ b/src/include/duckdb/parser/peg/inlined_grammar.hpp @@ -1476,7 +1476,8 @@ const char INLINED_PEG_GRAMMAR[] = { "DetachStatement <- 'DETACH' Database? IfExists? CatalogName\n" "UseStatement <- 'USE' UseTarget\n" "UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName\n" - "UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)*\n" + "UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier*\n" + "DotIdentifier <- '.' Identifier\n" "CallStatement <- 'CALL' QualifiedTableFunction TableFunctionArguments\n" }; diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index d5868fd6179a..3d5f4fce1b94 100644 --- a/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -208,6 +208,14 @@ class PEGTransformer { ParserOptions options; }; +typedef unique_ptr (*transform_function_t)(PEGTransformer &transformer, + ParseResult &parse_result); + +struct TransformRule { + const char *name; + transform_function_t transform; +}; + class PEGTransformerFactory { public: explicit PEGTransformerFactory(); @@ -269,14 +277,11 @@ class PEGTransformerFactory { void RegisterCreateTrigger(); void RegisterDeallocate(); void RegisterDelete(); - void RegisterDetach(); void RegisterDescribe(); void RegisterDrop(); void RegisterExecute(); void RegisterExplain(); - void RegisterExport(); void RegisterExpression(); - void RegisterImport(); void RegisterInsert(); void RegisterLoad(); void RegisterMergeInto(); @@ -284,13 +289,13 @@ class PEGTransformerFactory { void RegisterPragma(); void RegisterPrepare(); void RegisterSelect(); - void RegisterUse(); void RegisterSet(); void RegisterTransaction(); void RegisterUpdate(); void RegisterVacuum(); void RegisterKeywordsAndIdentifiers(); void RegisterEnums(); + void RegisterGenerated(); private: template @@ -614,9 +619,6 @@ class PEGTransformerFactory { ParseResult &parse_result); static unique_ptr TransformTruncateStatement(PEGTransformer &transformer, ParseResult &parse_result); - // detach.gram - static unique_ptr TransformDetachStatement(PEGTransformer &transformer, ParseResult &parse_result); - // drop.gram static unique_ptr TransformDropStatement(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropEntries(PEGTransformer &transformer, ParseResult &parse_result); @@ -633,6 +635,7 @@ class PEGTransformerFactory { static unique_ptr TransformDropCollation(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropType(PEGTransformer &transformer, ParseResult &parse_result); static bool TransformDropBehavior(PEGTransformer &transformer, ParseResult &parse_result); + static bool TransformIfExists(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropSecret(PEGTransformer &transformer, ParseResult &parse_result); static string TransformDropSecretStorage(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformDropTrigger(PEGTransformer &transformer, ParseResult &parse_result); @@ -647,10 +650,6 @@ class PEGTransformerFactory { static vector TransformExplainOptionList(PEGTransformer &transformer, ParseResult &parse_result); static GenericCopyOption TransformExplainOption(PEGTransformer &transformer, ParseResult &parse_result); - // export.gram - static unique_ptr TransformExportStatement(PEGTransformer &transformer, ParseResult &parse_result); - static string TransformExportSource(PEGTransformer &transformer, ParseResult &parse_result); - // expression.gram static unique_ptr TransformExpressionStatement(PEGTransformer &transformer, ParseResult &parse_result); @@ -886,7 +885,7 @@ class PEGTransformerFactory { static unique_ptr TransformListComprehensionFilter(PEGTransformer &transformer, ParseResult &parse_result); static ExpressionType TransformIsDistinctFromOp(PEGTransformer &transformer, ParseResult &parse_result); - // + static unique_ptr TransformGroupingExpression(PEGTransformer &transformer, ParseResult &parse_result); static qualified_column_map_t TransformRenameList(PEGTransformer &transformer, ParseResult &parse_result); @@ -898,9 +897,6 @@ class PEGTransformerFactory { ParseResult &parse_result); static bool TransformIgnoreOrRespectNulls(PEGTransformer &transformer, ParseResult &parse_result); - // import.gram - static unique_ptr TransformImportStatement(PEGTransformer &transformer, ParseResult &parse_result); - // insert.gram static unique_ptr TransformInsertStatement(PEGTransformer &transformer, ParseResult &parse_result); static OnConflictAction TransformOrAction(PEGTransformer &transformer, ParseResult &parse_result); @@ -1192,14 +1188,7 @@ class PEGTransformerFactory { static string TransformIdentifierOrKeyword(PEGTransformer &transformer, ParseResult &parse_result); // transaction.gram - static unique_ptr TransformTransactionStatement(PEGTransformer &transformer, - ParseResult &parse_result); - static unique_ptr TransformBeginTransaction(PEGTransformer &transformer, - ParseResult &parse_result); - static TransactionModifierType TransformReadOrWrite(PEGTransformer &transformer, ParseResult &parse_result); static TransactionModifierType TransformReadOnlyOrReadWrite(PEGTransformer &transformer, ParseResult &parse_result); - static unique_ptr TransformCommitTransaction(PEGTransformer &, ParseResult &parse_result); - static unique_ptr TransformRollbackTransaction(PEGTransformer &, ParseResult &parse_result); // update.gram static unique_ptr TransformUpdateStatement(PEGTransformer &transformer, ParseResult &parse_result); @@ -1215,11 +1204,6 @@ class PEGTransformerFactory { ParseResult &parse_result); static string TransformUpdateSetColumnTarget(PEGTransformer &transformer, ParseResult &parse_result); - // use.gram - static unique_ptr TransformUseStatement(PEGTransformer &transformer, ParseResult &parse_result); - static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &parse_result); - static QualifiedName TransformUseTargetCatalogSchema(PEGTransformer &transformer, ParseResult &parse_result); - // vacuum.gram static unique_ptr TransformVacuumStatement(PEGTransformer &transformer, ParseResult &parse_result); static VacuumOptions TransformVacuumOptions(PEGTransformer &transformer, ParseResult &parse_result); @@ -1228,6 +1212,10 @@ class PEGTransformerFactory { static string TransformVacuumOption(PEGTransformer &transformer, ParseResult &parse_result); static vector TransformNameList(PEGTransformer &transformer, ParseResult &parse_result); +#define DUCKDB_INSIDE_PEG_TRANSFORMER_HPP +#include "duckdb/parser/peg/transformer/peg_transformer_generated.hpp" +#undef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP + private: PEGParser parser; case_insensitive_map_t sql_transform_functions; diff --git a/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp new file mode 100644 index 000000000000..288eb86dbfb3 --- /dev/null +++ b/src/include/duckdb/parser/peg/transformer/peg_transformer_generated.hpp @@ -0,0 +1,43 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#ifdef DUCKDB_INSIDE_PEG_TRANSFORMER_HPP +static unique_ptr TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformUseStatement(const QualifiedName &use_target); +static unique_ptr TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static QualifiedName TransformUseTarget(PEGTransformer &transformer, ParseResult &choice_result); +static unique_ptr TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static QualifiedName TransformUseTargetCatalogSchema(const string &catalog_name, const string &reserved_schema_name, + const vector &dot_identifier); +static unique_ptr TransformDotIdentifierInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static string TransformDotIdentifier(const string &identifier); +static unique_ptr TransformTransactionStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformBeginTransaction(const TransactionModifierType &read_or_write); +static unique_ptr TransformRollbackTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformRollbackTransaction(); +static unique_ptr TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformCommitTransaction(); +static unique_ptr TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static TransactionModifierType TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write); +static unique_ptr TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformDetachStatement(const bool &if_exists, const string &catalog_name); +static unique_ptr TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformExportStatement(const string &export_source, const string &string_literal, + vector generic_copy_option_list); +static unique_ptr TransformExportSourceInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static string TransformExportSource(const string &catalog_name); +static unique_ptr TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result); +static unique_ptr TransformImportStatement(const string &string_literal); +#endif // DUCKDB_INSIDE_PEG_TRANSFORMER_HPP diff --git a/src/main/client_context.cpp b/src/main/client_context.cpp index 1846dff71856..c872139be150 100644 --- a/src/main/client_context.cpp +++ b/src/main/client_context.cpp @@ -789,7 +789,7 @@ unique_ptr ClientContext::ExtractPlan(const string &query) { } ColumnBindingResolver resolver; - resolver.Verify(*plan); + resolver.Verify(*this, *plan); resolver.VisitOperator(*plan); plan->ResolveOperatorTypes(); diff --git a/src/main/config.cpp b/src/main/config.cpp index a1f920dae935..4c57957a76a3 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -106,7 +106,9 @@ static const ConfigurationOption internal_options[] = { DUCKDB_SETTING_CALLBACK(DebugPhysicalTableScanExecutionStrategySetting), DUCKDB_SETTING(DebugSkipCheckpointOnCommitSetting), DUCKDB_GLOBAL(DebugVerificationModeSetting), + DUCKDB_SETTING(DebugVerificationProjectionSetting), DUCKDB_SETTING(DebugVerifyBlocksSetting), + DUCKDB_SETTING(DebugVerifyColumnBindingsSetting), DUCKDB_SETTING(DebugVerifySerializerSetting), DUCKDB_SETTING_CALLBACK(DebugVerifyStatementSetting), DUCKDB_SETTING(DebugVerifyStatsSetting), @@ -222,12 +224,12 @@ static const ConfigurationOption internal_options[] = { static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("configure_metrics", 27), DUCKDB_SETTING_ALIAS("custom_profiling_settings", 27), - DUCKDB_SETTING_ALIAS("memory_limit", 110), - DUCKDB_SETTING_ALIAS("null_order", 50), - DUCKDB_SETTING_ALIAS("profiling_output", 131), - DUCKDB_SETTING_ALIAS("user", 146), + DUCKDB_SETTING_ALIAS("memory_limit", 112), + DUCKDB_SETTING_ALIAS("null_order", 52), + DUCKDB_SETTING_ALIAS("profiling_output", 133), + DUCKDB_SETTING_ALIAS("user", 148), DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 26), - DUCKDB_SETTING_ALIAS("worker_threads", 145), + DUCKDB_SETTING_ALIAS("worker_threads", 147), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/optimizer/column_lifetime_analyzer.cpp b/src/optimizer/column_lifetime_analyzer.cpp index bf7a2d107deb..abbc0370e98e 100644 --- a/src/optimizer/column_lifetime_analyzer.cpp +++ b/src/optimizer/column_lifetime_analyzer.cpp @@ -196,7 +196,9 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) { } void ColumnLifetimeAnalyzer::Verify(LogicalOperator &op) { -#ifdef DEBUG + if (!Settings::Get(optimizer.context)) { + return; + } if (everything_referenced) { return; } @@ -216,7 +218,6 @@ void ColumnLifetimeAnalyzer::Verify(LogicalOperator &op) { default: break; } -#endif } void ColumnLifetimeAnalyzer::AddVerificationProjection(unique_ptr &child) { diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 32b94f180e39..5e113f1f01e2 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -116,7 +116,7 @@ void Optimizer::RunOptimizer(OptimizerType type, const std::function &ca } void Optimizer::Verify(LogicalOperator &op) { - ColumnBindingResolver::Verify(op); + ColumnBindingResolver::Verify(context, op); } // Returns true if the plan contains a DML statement (INSERT/UPDATE/DELETE/MERGE INTO) diff --git a/src/optimizer/statistics/expression/propagate_cast.cpp b/src/optimizer/statistics/expression/propagate_cast.cpp index 3e10cc49a35c..8be2c8a6a819 100644 --- a/src/optimizer/statistics/expression/propagate_cast.cpp +++ b/src/optimizer/statistics/expression/propagate_cast.cpp @@ -31,6 +31,9 @@ bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const Log if (source == target) { return true; } + if (source.id() == LogicalTypeId::ENUM || target.id() == LogicalTypeId::ENUM) { + return false; + } // we can only propagate numeric -> numeric switch (source.InternalType()) { case PhysicalType::INT8: @@ -38,6 +41,11 @@ bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const Log case PhysicalType::INT32: case PhysicalType::INT64: case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::UINT128: case PhysicalType::FLOAT: case PhysicalType::DOUBLE: break; @@ -50,6 +58,11 @@ bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const Log case PhysicalType::INT32: case PhysicalType::INT64: case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::UINT128: case PhysicalType::FLOAT: case PhysicalType::DOUBLE: break; diff --git a/src/parser/peg/grammar/statements/export.gram b/src/parser/peg/grammar/statements/export.gram index 8be0679e5189..d6d5640bb175 100644 --- a/src/parser/peg/grammar/statements/export.gram +++ b/src/parser/peg/grammar/statements/export.gram @@ -1,5 +1,3 @@ ExportStatement <- 'EXPORT' 'DATABASE' ExportSource? StringLiteral GenericCopyOptionList? - ExportSource <- CatalogName 'TO' - ImportStatement <- 'IMPORT' 'DATABASE' StringLiteral diff --git a/src/parser/peg/grammar/statements/use.gram b/src/parser/peg/grammar/statements/use.gram index 8a3140156004..742362e5c5ff 100644 --- a/src/parser/peg/grammar/statements/use.gram +++ b/src/parser/peg/grammar/statements/use.gram @@ -1,4 +1,5 @@ UseStatement <- 'USE' UseTarget UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName -UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* +UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* +DotIdentifier <- '.' Identifier diff --git a/src/parser/peg/transformer/CMakeLists.txt b/src/parser/peg/transformer/CMakeLists.txt index 563679da48c3..9b11e62c22eb 100644 --- a/src/parser/peg/transformer/CMakeLists.txt +++ b/src/parser/peg/transformer/CMakeLists.txt @@ -3,6 +3,7 @@ add_library_unity( OBJECT peg_transformer.cpp peg_transformer_factory.cpp + transform_generated.cpp transform_alter.cpp transform_analyze.cpp transform_attach.cpp diff --git a/src/parser/peg/transformer/peg_transformer.cpp b/src/parser/peg/transformer/peg_transformer.cpp index 954c646cbe9a..033ff81464c6 100644 --- a/src/parser/peg/transformer/peg_transformer.cpp +++ b/src/parser/peg/transformer/peg_transformer.cpp @@ -63,7 +63,6 @@ void PEGTransformer::SetParamCount(idx_t new_count) { unique_ptr PEGTransformer::GenerateCreateEnumStmt(unique_ptr entry) { auto result = make_uniq(); auto info = make_uniq(); - info->temporary = true; info->internal = false; info->catalog = INVALID_CATALOG; diff --git a/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/parser/peg/transformer/peg_transformer_factory.cpp index eb5a77b843b4..35775dfd87fa 100644 --- a/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -432,11 +432,6 @@ void PEGTransformerFactory::RegisterDescribe() { REGISTER_TRANSFORM(TransformSummarize); } -void PEGTransformerFactory::RegisterDetach() { - // detach.gram - REGISTER_TRANSFORM(TransformDetachStatement); -} - void PEGTransformerFactory::RegisterDrop() { // drop.gram REGISTER_TRANSFORM(TransformDropStatement); @@ -454,6 +449,7 @@ void PEGTransformerFactory::RegisterDrop() { REGISTER_TRANSFORM(TransformDropCollation); REGISTER_TRANSFORM(TransformDropType); REGISTER_TRANSFORM(TransformDropBehavior); + REGISTER_TRANSFORM(TransformIfExists); REGISTER_TRANSFORM(TransformDropSecret); REGISTER_TRANSFORM(TransformDropSecretStorage); REGISTER_TRANSFORM(TransformDropTrigger); @@ -473,11 +469,6 @@ void PEGTransformerFactory::RegisterExplain() { Register("ExplainOptionName", &TransformIdentifierOrKeyword); } -void PEGTransformerFactory::RegisterExport() { - REGISTER_TRANSFORM(TransformExportSource); - REGISTER_TRANSFORM(TransformExportStatement); -} - void PEGTransformerFactory::RegisterExpression() { // expression.gram REGISTER_TRANSFORM(TransformExpressionStatement); @@ -653,10 +644,6 @@ void PEGTransformerFactory::RegisterExpression() { REGISTER_TRANSFORM(TransformIgnoreOrRespectNulls); } -void PEGTransformerFactory::RegisterImport() { - REGISTER_TRANSFORM(TransformImportStatement); -} - void PEGTransformerFactory::RegisterInsert() { // insert.gram REGISTER_TRANSFORM(TransformInsertStatement); @@ -881,13 +868,6 @@ void PEGTransformerFactory::RegisterSelect() { REGISTER_TRANSFORM(TransformRepeatableSample); } -void PEGTransformerFactory::RegisterUse() { - // use.gram - REGISTER_TRANSFORM(TransformUseStatement); - REGISTER_TRANSFORM(TransformUseTarget); - REGISTER_TRANSFORM(TransformUseTargetCatalogSchema); -} - void PEGTransformerFactory::RegisterSet() { // set.gram REGISTER_TRANSFORM(TransformResetStatement); @@ -904,13 +884,7 @@ void PEGTransformerFactory::RegisterSet() { } void PEGTransformerFactory::RegisterTransaction() { - // transaction.gram - REGISTER_TRANSFORM(TransformTransactionStatement); - REGISTER_TRANSFORM(TransformBeginTransaction); - REGISTER_TRANSFORM(TransformReadOrWrite); REGISTER_TRANSFORM(TransformReadOnlyOrReadWrite); - REGISTER_TRANSFORM(TransformCommitTransaction); - REGISTER_TRANSFORM(TransformRollbackTransaction); } void PEGTransformerFactory::RegisterUpdate() { @@ -1099,6 +1073,7 @@ void PEGTransformerFactory::RegisterEnums() { } PEGTransformerFactory::PEGTransformerFactory() { + RegisterGenerated(); REGISTER_TRANSFORM(TransformStatement); RegisterAlter(); RegisterAttach(); @@ -1119,14 +1094,11 @@ PEGTransformerFactory::PEGTransformerFactory() { RegisterCreateTrigger(); RegisterDeallocate(); RegisterDelete(); - RegisterDetach(); RegisterDescribe(); RegisterDrop(); RegisterExecute(); RegisterExplain(); - RegisterExport(); RegisterExpression(); - RegisterImport(); RegisterInsert(); RegisterLoad(); RegisterMergeInto(); @@ -1134,7 +1106,6 @@ PEGTransformerFactory::PEGTransformerFactory() { RegisterPragma(); RegisterPrepare(); RegisterSelect(); - RegisterUse(); RegisterSet(); RegisterTransaction(); RegisterUpdate(); diff --git a/src/parser/peg/transformer/transform_detach.cpp b/src/parser/peg/transformer/transform_detach.cpp index 447c33807113..ecd7c7b33005 100644 --- a/src/parser/peg/transformer/transform_detach.cpp +++ b/src/parser/peg/transformer/transform_detach.cpp @@ -3,15 +3,12 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformDetachStatement(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr PEGTransformerFactory::TransformDetachStatement(const bool &if_exists, + const string &catalog_name) { auto result = make_uniq(); auto info = make_uniq(); - - auto &list_pr = parse_result.Cast(); - auto &if_exists = list_pr.Child(2); - info->if_not_found = if_exists.HasResult() ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; - info->name = list_pr.Child(3).identifier; + info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; + info->name = catalog_name; result->info = std::move(info); return std::move(result); } diff --git a/src/parser/peg/transformer/transform_drop.cpp b/src/parser/peg/transformer/transform_drop.cpp index 8a0570611428..93aa4e28687d 100644 --- a/src/parser/peg/transformer/transform_drop.cpp +++ b/src/parser/peg/transformer/transform_drop.cpp @@ -233,6 +233,10 @@ bool PEGTransformerFactory::TransformDropBehavior(PEGTransformer &transformer, P return StringUtil::CIEquals(choice_pr.Cast().keyword, "cascade"); } +bool PEGTransformerFactory::TransformIfExists(PEGTransformer &transformer, ParseResult &parse_result) { + return true; +} + unique_ptr PEGTransformerFactory::TransformDropSecret(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); diff --git a/src/parser/peg/transformer/transform_export.cpp b/src/parser/peg/transformer/transform_export.cpp index c4390729f48f..18b2a71e12d2 100644 --- a/src/parser/peg/transformer/transform_export.cpp +++ b/src/parser/peg/transformer/transform_export.cpp @@ -4,40 +4,32 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformExportStatement(PEGTransformer &transformer, - ParseResult &parse_result) { +unique_ptr +PEGTransformerFactory::TransformExportStatement(const string &export_source, const string &string_literal, + vector generic_copy_option_list) { auto info = make_uniq(); - auto &list_pr = parse_result.Cast(); - info->file_path = list_pr.Child(3).result; + info->file_path = string_literal; info->format = "csv"; info->is_from = false; - auto &parens = list_pr.Child(4); - if (parens.HasResult()) { - auto option_list = transformer.Transform>(parens.GetResult()); - for (auto &option : option_list) { - if (option.name == "format") { - info->format = option.children[0].GetValue(); - info->is_format_auto_detected = false; - } else if (option.expression) { - info->parsed_options[StringUtil::Upper(option.name)] = std::move(option.expression); - } else { - info->options[StringUtil::Upper(option.name)] = option.children; - } + for (auto &option : generic_copy_option_list) { + if (option.name == "format") { + info->format = option.children[0].GetValue(); + info->is_format_auto_detected = false; + } else if (option.expression) { + info->parsed_options[StringUtil::Upper(option.name)] = std::move(option.expression); + } else { + info->options[StringUtil::Upper(option.name)] = option.children; } } auto result = make_uniq(std::move(info)); - auto &database_result = list_pr.Child(2); - if (database_result.HasResult()) { - result->database = transformer.Transform(database_result.GetResult()); - } + result->database = export_source; return std::move(result); } -string PEGTransformerFactory::TransformExportSource(PEGTransformer &transformer, ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - return list_pr.Child(0).identifier; +string PEGTransformerFactory::TransformExportSource(const string &catalog_name) { + return catalog_name; } } // namespace duckdb diff --git a/src/parser/peg/transformer/transform_expression.cpp b/src/parser/peg/transformer/transform_expression.cpp index 9d78f08ede53..352a02dd473b 100644 --- a/src/parser/peg/transformer/transform_expression.cpp +++ b/src/parser/peg/transformer/transform_expression.cpp @@ -1617,6 +1617,9 @@ unique_ptr PEGTransformerFactory::TransformSliceExpression(PEG ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); auto slice_bound = transformer.Transform>>(list_pr.Child(1)); + if (slice_bound.empty()) { + throw ParserException("Empty subscript '[]' is not allowed"); + } if (slice_bound.size() == 1) { return make_uniq(ExpressionType::ARRAY_EXTRACT, std::move(slice_bound)); } diff --git a/src/parser/peg/transformer/transform_generated.cpp b/src/parser/peg/transformer/transform_generated.cpp new file mode 100644 index 000000000000..d5d1fa1151b6 --- /dev/null +++ b/src/parser/peg/transformer/transform_generated.cpp @@ -0,0 +1,143 @@ +// AUTO-GENERATED by scripts/parser/gen_transformer_v2.py -- DO NOT EDIT +#include "duckdb/parser/peg/transformer/peg_transformer.hpp" + +namespace duckdb { + +unique_ptr PEGTransformerFactory::TransformUseStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto use_target = transformer.Transform(list_pr, 1); + auto result = TransformUseStatement(use_target); + return make_uniq>>(std::move(result)); +} + +unique_ptr PEGTransformerFactory::TransformUseTargetInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto &choice_pr = list_pr.Child(0); + auto result = TransformUseTarget(transformer, choice_pr.GetResult()); + return make_uniq>(result); +} + +unique_ptr +PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto catalog_name = list_pr.Child(0).identifier; + auto reserved_schema_name = list_pr.Child(2).identifier; + auto &dot_identifier_opt = list_pr.Child(3); + vector dot_identifier; + if (dot_identifier_opt.HasResult()) { + auto &dot_identifier_repeat = dot_identifier_opt.GetResult().Cast(); + for (auto &dot_identifier_item : dot_identifier_repeat.GetChildren()) { + dot_identifier.push_back(transformer.Transform(dot_identifier_item)); + } + } + auto result = TransformUseTargetCatalogSchema(catalog_name, reserved_schema_name, dot_identifier); + return make_uniq>(result); +} + +unique_ptr PEGTransformerFactory::TransformDotIdentifierInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto identifier = list_pr.Child(1).identifier; + auto result = TransformDotIdentifier(identifier); + return make_uniq>(result); +} + +unique_ptr +PEGTransformerFactory::TransformTransactionStatementInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto &choice_pr = list_pr.Child(0); + auto result = transformer.Transform>(choice_pr.GetResult()); + return make_uniq>>(std::move(result)); +} + +unique_ptr PEGTransformerFactory::TransformBeginTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + TransactionModifierType read_or_write {}; + transformer.TransformOptional(list_pr, 2, read_or_write); + auto result = TransformBeginTransaction(read_or_write); + return make_uniq>>(std::move(result)); +} + +unique_ptr +PEGTransformerFactory::TransformRollbackTransactionInternal(PEGTransformer &transformer, ParseResult &parse_result) { + auto result = TransformRollbackTransaction(); + return make_uniq>>(std::move(result)); +} + +unique_ptr PEGTransformerFactory::TransformCommitTransactionInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto result = TransformCommitTransaction(); + return make_uniq>>(std::move(result)); +} + +unique_ptr PEGTransformerFactory::TransformReadOrWriteInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto read_only_or_read_write = transformer.Transform(list_pr, 1); + auto result = TransformReadOrWrite(read_only_or_read_write); + return make_uniq>(result); +} + +unique_ptr PEGTransformerFactory::TransformDetachStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + bool if_exists {}; + transformer.TransformOptional(list_pr, 2, if_exists); + auto catalog_name = list_pr.Child(3).identifier; + auto result = TransformDetachStatement(if_exists, catalog_name); + return make_uniq>>(std::move(result)); +} + +unique_ptr PEGTransformerFactory::TransformExportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + string export_source {}; + transformer.TransformOptional(list_pr, 2, export_source); + auto string_literal = transformer.Transform(list_pr, 3); + vector generic_copy_option_list {}; + transformer.TransformOptional(list_pr, 4, generic_copy_option_list); + auto result = TransformExportStatement(export_source, string_literal, std::move(generic_copy_option_list)); + return make_uniq>>(std::move(result)); +} + +unique_ptr PEGTransformerFactory::TransformExportSourceInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto catalog_name = list_pr.Child(0).identifier; + auto result = TransformExportSource(catalog_name); + return make_uniq>(result); +} + +unique_ptr PEGTransformerFactory::TransformImportStatementInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto &list_pr = parse_result.Cast(); + auto string_literal = transformer.Transform(list_pr, 2); + auto result = TransformImportStatement(string_literal); + return make_uniq>>(std::move(result)); +} + +void PEGTransformerFactory::RegisterGenerated() { + static const TransformRule builtin_transform_rules[] = { + {"UseStatement", &PEGTransformerFactory::TransformUseStatementInternal}, + {"UseTarget", &PEGTransformerFactory::TransformUseTargetInternal}, + {"UseTargetCatalogSchema", &PEGTransformerFactory::TransformUseTargetCatalogSchemaInternal}, + {"DotIdentifier", &PEGTransformerFactory::TransformDotIdentifierInternal}, + {"TransactionStatement", &PEGTransformerFactory::TransformTransactionStatementInternal}, + {"BeginTransaction", &PEGTransformerFactory::TransformBeginTransactionInternal}, + {"RollbackTransaction", &PEGTransformerFactory::TransformRollbackTransactionInternal}, + {"CommitTransaction", &PEGTransformerFactory::TransformCommitTransactionInternal}, + {"ReadOrWrite", &PEGTransformerFactory::TransformReadOrWriteInternal}, + {"DetachStatement", &PEGTransformerFactory::TransformDetachStatementInternal}, + {"ExportStatement", &PEGTransformerFactory::TransformExportStatementInternal}, + {"ExportSource", &PEGTransformerFactory::TransformExportSourceInternal}, + {"ImportStatement", &PEGTransformerFactory::TransformImportStatementInternal}, + }; + for (const auto &rule : builtin_transform_rules) { + sql_transform_functions[rule.name] = rule.transform; + } +} + +} // namespace duckdb diff --git a/src/parser/peg/transformer/transform_import.cpp b/src/parser/peg/transformer/transform_import.cpp index 3ae8104a4ee6..1d54d470dae1 100644 --- a/src/parser/peg/transformer/transform_import.cpp +++ b/src/parser/peg/transformer/transform_import.cpp @@ -3,13 +3,10 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformImportStatement(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto name = list_pr.Child(2).result; +unique_ptr PEGTransformerFactory::TransformImportStatement(const string &string_literal) { auto result = make_uniq(); result->info->name = "import_database"; - result->info->parameters.emplace_back(make_uniq(Value(name))); + result->info->parameters.emplace_back(make_uniq(Value(string_literal))); return std::move(result); } diff --git a/src/parser/peg/transformer/transform_transaction.cpp b/src/parser/peg/transformer/transform_transaction.cpp index 77a14926f2b6..fedb2e773e18 100644 --- a/src/parser/peg/transformer/transform_transaction.cpp +++ b/src/parser/peg/transformer/transform_transaction.cpp @@ -3,30 +3,16 @@ namespace duckdb { -unique_ptr PEGTransformerFactory::TransformTransactionStatement(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto &choice_pr = list_pr.Child(0); - return transformer.Transform>(choice_pr.GetResult()); -} - -unique_ptr PEGTransformerFactory::TransformBeginTransaction(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); +unique_ptr +PEGTransformerFactory::TransformBeginTransaction(const TransactionModifierType &read_or_write) { auto info = make_uniq(TransactionType::BEGIN_TRANSACTION); - auto &read_or_write = list_pr.Child(2); - if (read_or_write.HasResult()) { - info->modifier = transformer.Transform(read_or_write.GetResult()); - } else { - info->modifier = TransactionModifierType::TRANSACTION_DEFAULT_MODIFIER; - } + info->modifier = read_or_write; return make_uniq(std::move(info)); } -TransactionModifierType PEGTransformerFactory::TransformReadOrWrite(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - return transformer.Transform(list_pr.Child(1)); +TransactionModifierType +PEGTransformerFactory::TransformReadOrWrite(const TransactionModifierType &read_only_or_read_write) { + return read_only_or_read_write; } TransactionModifierType PEGTransformerFactory::TransformReadOnlyOrReadWrite(PEGTransformer &transformer, @@ -35,11 +21,11 @@ TransactionModifierType PEGTransformerFactory::TransformReadOnlyOrReadWrite(PEGT return transformer.TransformEnum(list_pr.Child(0).GetResult()); } -unique_ptr PEGTransformerFactory::TransformCommitTransaction(PEGTransformer &, ParseResult &) { +unique_ptr PEGTransformerFactory::TransformCommitTransaction() { return make_uniq(make_uniq(TransactionType::COMMIT)); } -unique_ptr PEGTransformerFactory::TransformRollbackTransaction(PEGTransformer &, ParseResult &) { +unique_ptr PEGTransformerFactory::TransformRollbackTransaction() { return make_uniq(make_uniq(TransactionType::ROLLBACK)); } } // namespace duckdb diff --git a/src/parser/peg/transformer/transform_use.cpp b/src/parser/peg/transformer/transform_use.cpp index eac46a89feee..ac2bc4dfcd43 100644 --- a/src/parser/peg/transformer/transform_use.cpp +++ b/src/parser/peg/transformer/transform_use.cpp @@ -4,16 +4,12 @@ namespace duckdb { // UseStatement <- 'USE' UseTarget -unique_ptr PEGTransformerFactory::TransformUseStatement(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto qn = transformer.Transform(list_pr, 1); - +unique_ptr PEGTransformerFactory::TransformUseStatement(const QualifiedName &use_target) { string value_str; - if (IsInvalidSchema(qn.schema)) { - value_str = SQLIdentifier::ToString(qn.name); + if (IsInvalidSchema(use_target.schema)) { + value_str = SQLIdentifier::ToString(use_target.name); } else { - value_str = SQLIdentifier(qn.schema) + "." + SQLIdentifier(qn.name); + value_str = SQLIdentifier(use_target.schema) + "." + SQLIdentifier(use_target.name); } auto value_expr = make_uniq(Value(value_str)); @@ -21,31 +17,30 @@ unique_ptr PEGTransformerFactory::TransformUseStatement(PEGTransfo } // UseTarget <- UseTargetCatalogSchema / SchemaName / CatalogName -QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transformer, ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto &choice_pr = list_pr.Child(0); - if (choice_pr.GetResult().type == ParseResultType::IDENTIFIER) { +QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transformer, ParseResult &pr) { + if (pr.type == ParseResultType::IDENTIFIER) { QualifiedName result; - result.name = choice_pr.GetResult().Cast().identifier; + result.name = pr.Cast().identifier; return result; } - return transformer.Transform(choice_pr.GetResult()); + return transformer.Transform(pr); } -// UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName ('.' Identifier)* -QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(PEGTransformer &transformer, - ParseResult &parse_result) { - auto &list_pr = parse_result.Cast(); - auto catalog = list_pr.Child(0).identifier; - auto schema = list_pr.Child(2).identifier; - auto &extra_opt = list_pr.Child(3); - if (extra_opt.HasResult()) { +// UseTargetCatalogSchema <- CatalogName '.' ReservedSchemaName DotIdentifier* +QualifiedName PEGTransformerFactory::TransformUseTargetCatalogSchema(const string &catalog_name, + const string &reserved_schema_name, + const vector &dot_identifier) { + if (!dot_identifier.empty()) { throw ParserException("Expected \"USE database\" or \"USE database.schema\""); } QualifiedName result; result.catalog = INVALID_CATALOG; - result.schema = catalog; - result.name = schema; + result.schema = catalog_name; + result.name = reserved_schema_name; return result; } + +string PEGTransformerFactory::TransformDotIdentifier(const string &identifier) { + return identifier; +} } // namespace duckdb diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index 8bfd7c829ae6..7bf69afdf727 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -178,17 +178,20 @@ static bool OperatorSupportsSerialization(LogicalOperator &op) { void Planner::VerifyPlan(ClientContext &context, unique_ptr &op, optional_ptr map) { - auto &config = DBConfig::GetConfig(context); - if (!op || !Settings::Get(context)) { + if (!op) { + return; + } + // verify the column bindings of the plan + ColumnBindingResolver::Verify(context, *op); + if (!Settings::Get(context)) { return; } //! SELECT only for now if (!OperatorSupportsSerialization(*op)) { return; } - // verify the column bindings of the plan - ColumnBindingResolver::Verify(*op); + auto &config = DBConfig::GetConfig(context); // format (de)serialization of this operator try { MemoryStream stream(Allocator::Get(context)); diff --git a/src/planner/subquery/flatten_dependent_join.cpp b/src/planner/subquery/flatten_dependent_join.cpp index 13f37e7c1229..26f77906c272 100644 --- a/src/planner/subquery/flatten_dependent_join.cpp +++ b/src/planner/subquery/flatten_dependent_join.cpp @@ -485,7 +485,7 @@ vector FlattenDependentJoins::PushDownCorrelatedNode(unique_ptr FlattenDependentJoins::PushDownProjection(unique_ptrexpressions, state, true); auto &proj = plan->Cast(); auto correlated_offset = plan->expressions.size() - correlated_columns.size(); - ColumnBindingResolver::Verify(*plan); + ColumnBindingResolver::Verify(binder.context, *plan); return CreateContiguousState(ColumnBinding(proj.table_index, ProjectionIndex(correlated_offset))); } diff --git a/src/storage/buffer/buffer_pool.cpp b/src/storage/buffer/buffer_pool.cpp index 104d3fba62eb..84877a0f19e9 100644 --- a/src/storage/buffer/buffer_pool.cpp +++ b/src/storage/buffer/buffer_pool.cpp @@ -3,6 +3,8 @@ #include "duckdb/common/exception.hpp" #include "duckdb/common/thread.hpp" #include "duckdb/common/typedefs.hpp" +#include "duckdb/logging/logger.hpp" +#include #include "duckdb/main/settings.hpp" #include "duckdb/parallel/concurrentqueue.hpp" #include "duckdb/parallel/task_scheduler.hpp" @@ -82,7 +84,7 @@ struct EvictionQueue { //! Tries to dequeue an element from the eviction queue, but only after acquiring the purge queue lock. bool TryDequeueWithLock(BufferEvictionNode &node); //! Garbage collect dead nodes in the eviction queue. - void Purge(); + void Purge(const DatabaseInstance &db); template void IterateUnloadableBlocks(FN fn); @@ -155,13 +157,15 @@ bool EvictionQueue::TryDequeueWithLock(BufferEvictionNode &node) { return q.try_dequeue(node); } -void EvictionQueue::Purge() { +void EvictionQueue::Purge(const DatabaseInstance &db) { // only one thread purges the queue, all other threads early-out unique_lock guard(purge_lock, std::try_to_lock); if (!guard.owns_lock()) { return; } + auto purge_start = std::chrono::steady_clock::now(); + // we purge INSERT_INTERVAL * PURGE_SIZE_MULTIPLIER nodes idx_t purge_size = INSERT_INTERVAL * PURGE_SIZE_MULTIPLIER; @@ -191,6 +195,7 @@ void EvictionQueue::Purge() { // guaranteeing that we always exit the loop. idx_t max_purges = approx_q_size / purge_size; + idx_t initial_max_purges = max_purges; while (max_purges != 0) { PurgeIteration(purge_size); @@ -213,6 +218,15 @@ void EvictionQueue::Purge() { max_purges--; } + + idx_t iterations = initial_max_purges - max_purges; + auto elapsed_ms = std::chrono::duration_cast( + std::chrono::steady_clock::now() - purge_start) + .count(); + if (iterations > 10 || elapsed_ms > 1000) { + DUCKDB_LOG_WARNING(db, "EvictionQueue::Purge took %lldms with %llu iterations, queue_size=%llu, dead_nodes=%llu", + elapsed_ms, iterations, q.size_approx(), (idx_t)total_dead_nodes); + } } void EvictionQueue::PurgeIteration(const idx_t purge_size) { @@ -500,7 +514,7 @@ void BufferPool::PurgeQueue(const BlockHandle &block) { const auto queue_sleep_micros = Settings::Get(buffer_manager.GetDatabase()); eviction_queue.debug_eviction_queue_sleep = queue_sleep_micros; - eviction_queue.Purge(); + eviction_queue.Purge(buffer_manager.GetDatabase()); } void BufferPool::SetLimit(idx_t limit, const char *exception_postscript) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e84ad2a402a5..51ba8cb8e91c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -65,4 +65,8 @@ else() link_extension_libraries(unittest "") endif() +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_link_libraries(unittest atomic) +endif() + duckdb_codesign_for_debugging(unittest) diff --git a/test/bwc/list_versions.py b/test/bwc/list_versions.py new file mode 100644 index 000000000000..538ab7bd2a0e --- /dev/null +++ b/test/bwc/list_versions.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +import argparse +import json +from os.path import abspath, dirname +import sys + +sys.path.insert(0, dirname(abspath(__file__))) +from utils.version_list import list_supported_duckdb_version_groups, list_supported_duckdb_versions + + +def main(): + parser = argparse.ArgumentParser(description="List supported DuckDB versions for BWC") + parser.add_argument("--json", action="store_true", help="Output JSON array") + parser.add_argument("--groups-json", action="store_true", help="Output JSON array grouped by vX.Y series") + args = parser.parse_args() + + if args.groups_json: + print(json.dumps(list_supported_duckdb_version_groups())) + return + + versions = list_supported_duckdb_versions() + if args.json: + print(json.dumps(versions)) + else: + for version in versions: + print(version) + + +if __name__ == "__main__": + main() diff --git a/test/bwc/runner.py b/test/bwc/runner.py index 14419a6e2de1..15086c7cf4e7 100644 --- a/test/bwc/runner.py +++ b/test/bwc/runner.py @@ -5,6 +5,7 @@ from utils.test_files_parser import load_test_files from utils.test_report import TestReport from utils.logger import make_logger +from utils.version_list import list_supported_duckdb_versions import time from concurrent.futures import ThreadPoolExecutor from threading import Lock, Event @@ -317,25 +318,7 @@ def cleanup_runtime_dir(bwc_tests_base_dir, dry_run=True): if __name__ == "__main__": supported_duckdb_versions = ( - [args.old_duckdb_version] - if args.old_duckdb_version - else [ - "v1.1.0", - "v1.1.2", - "v1.1.1", - "v1.2.0", - "v1.1.3", - "v1.2.2", - "v1.2.1", - "v1.3.0", - "v1.3.1", - "v1.3.2", - "v1.4.0", - "v1.4.1", - "v1.4.2", - "v1.4.3", - "v1.4.4", - ] + [args.old_duckdb_version] if args.old_duckdb_version else list_supported_duckdb_versions() ) duckdb_root_dir = dirname(dirname(dirname(abspath(__file__)))) diff --git a/test/bwc/update_cache.py b/test/bwc/update_cache.py index 2a5c37161c7e..c3103e4ff9d5 100644 --- a/test/bwc/update_cache.py +++ b/test/bwc/update_cache.py @@ -17,6 +17,10 @@ import subprocess import sys import tempfile +from os.path import abspath, dirname + +sys.path.insert(0, dirname(abspath(__file__))) +from utils.version_list import list_supported_duckdb_versions parser = argparse.ArgumentParser(description='Update BWC test cache in test-utils from CI artifacts') parser.add_argument( @@ -29,24 +33,6 @@ args = parser.parse_args() -SUPPORTED_VERSIONS = [ - "v1.1.0", - "v1.1.1", - "v1.1.2", - "v1.1.3", - "v1.2.0", - "v1.2.1", - "v1.2.2", - "v1.3.0", - "v1.3.1", - "v1.3.2", - "v1.4.0", - "v1.4.1", - "v1.4.2", - "v1.4.3", - "v1.4.4", -] - def download_artifact(run_id, artifact_name, dest_dir, repo): """Download a single artifact from a GitHub Actions run.""" @@ -69,7 +55,7 @@ def download_artifact(run_id, artifact_name, dest_dir, repo): if args.version: versions = [args.version] else: - versions = SUPPORTED_VERSIONS + versions = list_supported_duckdb_versions() print(f"Downloading cache artifacts from run {args.run_id} ({args.repo})") print(f"Updating {len(versions)} version(s) in {cache_dir}\n") diff --git a/test/bwc/utils/version_list.py b/test/bwc/utils/version_list.py new file mode 100644 index 000000000000..8abbc3949820 --- /dev/null +++ b/test/bwc/utils/version_list.py @@ -0,0 +1,51 @@ +import re +import subprocess +from collections import OrderedDict + + +SEMVER_TAG_RE = re.compile(r"^v(\d+)\.(\d+)\.(\d+)$") + + +def parse_version_tag(tag): + match = SEMVER_TAG_RE.match(tag) + if not match: + return None + return tuple(int(part) for part in match.groups()) + + +def list_supported_duckdb_versions(min_version="v1.1.0"): + min_tuple = parse_version_tag(min_version) + if min_tuple is None: + raise ValueError(f"Invalid minimum version tag: {min_version}") + + result = subprocess.run( + ["git", "tag", "--list", "v*"], + capture_output=True, + text=True, + check=True, + ) + raw_tags = [line.strip() for line in result.stdout.splitlines() if line.strip()] + + versions = [] + for tag in raw_tags: + parsed = parse_version_tag(tag) + if parsed is None: + continue + if parsed >= min_tuple: + versions.append((parsed, tag)) + + versions.sort(key=lambda entry: entry[0]) + return [tag for _, tag in versions] + + +def list_supported_duckdb_version_groups(min_version="v1.1.0"): + grouped = OrderedDict() + for version in list_supported_duckdb_versions(min_version=min_version): + parsed = parse_version_tag(version) + assert parsed is not None + major, minor, _ = parsed + group = f"v{major}.{minor}" + if group not in grouped: + grouped[group] = [] + grouped[group].append(version) + return [{"group": group, "versions": versions} for group, versions in grouped.items()] diff --git a/test/configs/hash_zero.json b/test/configs/hash_zero.json index dee314bf7f2d..a42c61e6c32c 100644 --- a/test/configs/hash_zero.json +++ b/test/configs/hash_zero.json @@ -24,8 +24,12 @@ "test/issues/general/test_21431.test", "test/sql/join/inner/test_prefix_range_filter_pushdown.test", "test/sql/join/inner/test_prefix_range_filter_pushdown_strinline.test", + "test/sql/join/left_outer/unique_left_join.test", + "test/sql/limit/streaming_limit_pipeline_flush.test", + "test/sql/settings/max_execution_time.test", "test/sql/settings/operator_memory_limit.test", - "test/sql/table_function/duckdb_eviction_queues.test" + "test/sql/table_function/duckdb_eviction_queues.test", + "test/sql/pragma/profiling/test_custom_profiling_total_memory_allocated.test" ] }, { diff --git a/test/configs/verification_projection.json b/test/configs/verification_projection.json new file mode 100644 index 000000000000..81f75fcc5aa2 --- /dev/null +++ b/test/configs/verification_projection.json @@ -0,0 +1,5 @@ +{ + "description": "Run with debug_verification_projection enabled", + "on_init": "SET debug_verification_projection=true;", + "skip_compiled": "true" +} diff --git a/test/configs/verify_column_bindings.json b/test/configs/verify_column_bindings.json new file mode 100644 index 000000000000..5d5b436095b8 --- /dev/null +++ b/test/configs/verify_column_bindings.json @@ -0,0 +1,13 @@ +{ + "description": "Run with debug_verify_column_bindings enabled", + "on_init": "SET debug_verify_column_bindings=true;", + "skip_compiled": "true", + "skip_tests": [ + { + "reason": "FIXME: Internal verification failed after CommonAggregateOptimizer", + "paths": [ + "test/sql/aggregate/having/test_scalar_having.test" + ] + } + ] +} diff --git a/test/optimizer/pushdown/join_filter_pushdown_cast.test b/test/optimizer/pushdown/join_filter_pushdown_cast.test index 5073ea40ae66..25a0364b87a9 100644 --- a/test/optimizer/pushdown/join_filter_pushdown_cast.test +++ b/test/optimizer/pushdown/join_filter_pushdown_cast.test @@ -167,11 +167,10 @@ CREATE TABLE probe_ubigint AS SELECT v::UBIGINT AS a FROM (VALUES (1), (5), (10) statement ok CREATE TABLE build_neg AS SELECT b::BIGINT AS b FROM (VALUES (5), (-1), (10)) t(b); -# TODO HERE query II EXPLAIN ANALYZE SELECT COUNT(*) FROM (SELECT a::BIGINT AS a FROM probe_ubigint) t JOIN build_neg ON t.a = build_neg.b; ---- -analyzed_plan :.*Dynamic Filters:.* +analyzed_plan :.*Dynamic Filters:.* # -1 cannot be cast to UBIGINT — only 5 and 10 match query I diff --git a/test/optimizer/statistics/statistics_unsigned_cast.test b/test/optimizer/statistics/statistics_unsigned_cast.test new file mode 100644 index 000000000000..3a6bc4d2ce4f --- /dev/null +++ b/test/optimizer/statistics/statistics_unsigned_cast.test @@ -0,0 +1,22 @@ +# name: test/optimizer/statistics/statistics_unsigned_cast.test +# description: Statistics propagation across casts involving unsigned integer types +# group: [statistics] + +statement ok +PRAGMA explain_output = OPTIMIZED_ONLY; + +statement ok +CREATE TABLE t_signed AS SELECT i::INT AS id FROM range(0, 100) t(i); + +query II +EXPLAIN SELECT * FROM t_signed WHERE id::BIGINT > 100; +---- +logical_opt :.*EMPTY_RESULT.* + +statement ok +CREATE TABLE t_unsigned AS SELECT i::UINTEGER AS id FROM range(0, 100) t(i); + +query II +EXPLAIN SELECT * FROM t_unsigned WHERE id::BIGINT > 100; +---- +logical_opt :.*EMPTY_RESULT.* diff --git a/test/sql/peg_parser/fuzzer/fuzzer_4434.test b/test/sql/peg_parser/fuzzer/fuzzer_4434.test new file mode 100644 index 000000000000..4366592980d6 --- /dev/null +++ b/test/sql/peg_parser/fuzzer/fuzzer_4434.test @@ -0,0 +1,14 @@ +# name: test/sql/peg_parser/fuzzer/fuzzer_4434.test +# description: Fix empty subscript expression leading to internal error +# group: [fuzzer] + +statement error +n''''+ST�&6>ttacrotablesampimportleT�&6>tta�rordeT�Niqeger[], e�tfilter% + t_not_currorderent; +---- +Parser Error: Empty subscript '[]' is not allowed + +statement error +[1,2,3][]; +---- +Parser Error: Empty subscript '[]' is not allowed \ No newline at end of file