From afc84ee206d4d54f581988c162773c67954ab3f3 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 13 Dec 2024 16:05:27 +0100 Subject: [PATCH 1/5] attempting to use bitmap, but so much is depending on the relations array. It's kinda wild --- .../join_order/cardinality_estimator.hpp | 22 ++-- .../duckdb/optimizer/join_order/join_node.hpp | 10 +- .../optimizer/join_order/join_relation.hpp | 63 +++++++++-- .../optimizer/join_order/plan_enumerator.hpp | 16 +-- .../join_order/query_graph_manager.hpp | 24 ++--- .../optimizer/join_order/relation_manager.hpp | 2 +- .../join_order/cardinality_estimator.cpp | 36 +++---- src/optimizer/join_order/join_node.cpp | 6 +- .../join_order/join_relation_set.cpp | 102 +++++++++++++----- src/optimizer/join_order/plan_enumerator.cpp | 22 ++-- src/optimizer/join_order/query_graph.cpp | 4 +- .../join_order/query_graph_manager.cpp | 26 ++--- src/optimizer/join_order/relation_manager.cpp | 8 +- 13 files changed, 220 insertions(+), 121 deletions(-) diff --git a/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp b/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp index 8aec1cd02c46..af62f7a9a55e 100644 --- a/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +++ b/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp @@ -17,11 +17,11 @@ namespace duckdb { class FilterInfo; struct DenomInfo { - DenomInfo(JoinRelationSet &numerator_relations, double filter_strength, double denominator) + DenomInfo(JoinRelationSetOld &numerator_relations, double filter_strength, double denominator) : numerator_relations(numerator_relations), filter_strength(filter_strength), denominator(denominator) { } - JoinRelationSet &numerator_relations; + JoinRelationSetOld &numerator_relations; double filter_strength; double denominator; }; @@ -59,8 +59,8 @@ class FilterInfoWithTotalDomains { }; struct Subgraph2Denominator { - optional_ptr relations; - optional_ptr numerator_relations; + optional_ptr relations; + optional_ptr numerator_relations; double denom; Subgraph2Denominator() : relations(nullptr), numerator_relations(nullptr), denom(1) {}; @@ -94,28 +94,28 @@ class CardinalityEstimator { private: vector relations_to_tdoms; unordered_map relation_set_2_cardinality; - JoinRelationSetManager set_manager; + JoinRelationSetManagerOld set_manager; vector relation_stats; public: void RemoveEmptyTotalDomains(); - void UpdateTotalDomains(optional_ptr set, RelationStats &stats); + void UpdateTotalDomains(optional_ptr set, RelationStats &stats); void InitEquivalentRelations(const vector> &filter_infos); - void InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats); + void InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats); //! cost model needs estimated cardinalities to the fraction since the formula captures //! distinct count selectivities and multiplicities. Hence the template template - T EstimateCardinalityWithSet(JoinRelationSet &new_set); + T EstimateCardinalityWithSet(JoinRelationSetOld &new_set); //! used for debugging. void AddRelationNamesToTdoms(vector &stats); void PrintRelationToTdomInfo(); private: - double GetNumerator(JoinRelationSet &set); - DenomInfo GetDenominator(JoinRelationSet &set); + double GetNumerator(JoinRelationSetOld &set); + DenomInfo GetDenominator(JoinRelationSetOld &set); bool SingleColumnFilter(FilterInfo &filter_info); vector DetermineMatchingEquivalentSets(optional_ptr filter_info); @@ -126,7 +126,7 @@ class CardinalityEstimator { double CalculateUpdatedDenom(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter); - JoinRelationSet &UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, + JoinRelationSetOld &UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter); void AddRelationTdom(FilterInfo &filter_info); diff --git a/src/include/duckdb/optimizer/join_order/join_node.hpp b/src/include/duckdb/optimizer/join_order/join_node.hpp index b68f9a0a4e77..06fe79a09a2f 100644 --- a/src/include/duckdb/optimizer/join_order/join_node.hpp +++ b/src/include/duckdb/optimizer/join_order/join_node.hpp @@ -17,13 +17,13 @@ struct NeighborInfo; class DPJoinNode { public: //! Represents a node in the join plan - JoinRelationSet &set; + JoinRelationSetOld &set; //! information on how left and right are connected optional_ptr info; bool is_leaf; //! left and right plans - JoinRelationSet &left_set; - JoinRelationSet &right_set; + JoinRelationSetOld &left_set; + JoinRelationSetOld &right_set; //! The cost of the join node. The cost is stored here so that the cost of //! a join node stays in sync with how the join node is constructed. Storing the cost in an unordered_set @@ -34,13 +34,13 @@ class DPJoinNode { idx_t cardinality; //! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality - DPJoinNode(JoinRelationSet &set, optional_ptr info, JoinRelationSet &left, JoinRelationSet &right, + DPJoinNode(JoinRelationSetOld &set, optional_ptr info, JoinRelationSetOld &left, JoinRelationSetOld &right, double cost); //! Create a leaf node in the join tree //! set cost to 0 for leaf nodes //! cost will be the cost to *produce* an intermediate table - explicit DPJoinNode(JoinRelationSet &set); + explicit DPJoinNode(JoinRelationSetOld &set); }; } // namespace duckdb diff --git a/src/include/duckdb/optimizer/join_order/join_relation.hpp b/src/include/duckdb/optimizer/join_order/join_relation.hpp index 7b040c1b5ef4..b6d81ed1254f 100644 --- a/src/include/duckdb/optimizer/join_order/join_relation.hpp +++ b/src/include/duckdb/optimizer/join_order/join_relation.hpp @@ -14,9 +14,54 @@ namespace duckdb { -//! Set of relations, used in the join graph. + struct JoinRelationSet { - JoinRelationSet(unsafe_unique_array relations, idx_t count) : relations(std::move(relations)), count(count) { + JoinRelationSet() {} + JoinRelationSet(unsafe_unique_array &relations_, idx_t count) { + for (idx_t i = 0; i < count; i++) { + relations[relations_[i]] = true; + } + } + + string ToString() const; + std::bitset<12> relations; + + static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub); + JoinRelationSet Copy(); +}; + + +//! The JoinRelationTree is a structure holding all the created JoinRelationSet objects and allowing fast lookup on to +//! them +class JoinRelationSetManager { +public: + //! Contains a node with a JoinRelationSet and child relations + // FIXME: this structure is inefficient, could use a bitmap for lookup instead (todo: profile) + + +public: + //! Create or get a JoinRelationSet from a single node with the given index + reference GetJoinRelation(idx_t index); + //! Create or get a JoinRelationSet from a set of relation bindings + reference GetJoinRelation(const unordered_set &bindings); + //! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations + reference GetJoinRelation(unsafe_unique_array relations, idx_t count); + //! Create or get a JoinRelationSet from another JoinRelation Set + reference GetJoinRelation(unique_ptr set); + //! Union two sets of relations together and create a new relation set + reference Union(JoinRelationSet &left, JoinRelationSet &right); + // //! Create the set difference of left \ right (i.e. all elements in left that are not in right) + // JoinRelationSet *Difference(JoinRelationSet *left, JoinRelationSet *right); + string ToString() const; + void Print(); + +private: + unordered_map, unique_ptr> active_relation_sets; +}; + +//! Set of relations, used in the join graph. +struct JoinRelationSetOld { + JoinRelationSetOld(unsafe_unique_array relations, idx_t count) : relations(std::move(relations)), count(count) { } string ToString() const; @@ -24,29 +69,29 @@ struct JoinRelationSet { unsafe_unique_array relations; idx_t count; - static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub); + // static bool IsSubset(JoinRelationSetOld &super, JoinRelationSetOld &sub); }; //! The JoinRelationTree is a structure holding all the created JoinRelationSet objects and allowing fast lookup on to //! them -class JoinRelationSetManager { +class JoinRelationSetManagerOld { public: //! Contains a node with a JoinRelationSet and child relations // FIXME: this structure is inefficient, could use a bitmap for lookup instead (todo: profile) struct JoinRelationTreeNode { - unique_ptr relation; + unique_ptr relation; unordered_map> children; }; public: //! Create or get a JoinRelationSet from a single node with the given index - JoinRelationSet &GetJoinRelation(idx_t index); + JoinRelationSetOld &GetJoinRelation(idx_t index); //! Create or get a JoinRelationSet from a set of relation bindings - JoinRelationSet &GetJoinRelation(const unordered_set &bindings); + JoinRelationSetOld &GetJoinRelation(const unordered_set &bindings); //! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations - JoinRelationSet &GetJoinRelation(unsafe_unique_array relations, idx_t count); + JoinRelationSetOld &GetJoinRelation(unsafe_unique_array relations, idx_t count); //! Union two sets of relations together and create a new relation set - JoinRelationSet &Union(JoinRelationSet &left, JoinRelationSet &right); + JoinRelationSetOld &Union(JoinRelationSetOld &left, JoinRelationSetOld &right); // //! Create the set difference of left \ right (i.e. all elements in left that are not in right) // JoinRelationSet *Difference(JoinRelationSet *left, JoinRelationSet *right); string ToString() const; diff --git a/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp b/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp index 29e8679532ce..a5294313add4 100644 --- a/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +++ b/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp @@ -39,7 +39,7 @@ class PlanEnumerator { void SolveJoinOrder(); void InitLeafPlans(); - const reference_map_t> &GetPlans() const; + const reference_map_t> &GetPlans() const; private: //! The set of edges used in the join optimizer @@ -51,26 +51,26 @@ class PlanEnumerator { //! Cost model to evaluate cost of joins CostModel &cost_model; //! A map to store the optimal join plan found for a specific JoinRelationSet* - reference_map_t> plans; + reference_map_t> plans; unordered_set join_nodes_in_full_plan; - unique_ptr CreateJoinTree(JoinRelationSet &set, + unique_ptr CreateJoinTree(JoinRelationSetOld &set, const vector> &possible_connections, DPJoinNode &left, DPJoinNode &right); //! Emit a pair as a potential join candidate. Returns the best plan found for the (left, right) connection (either //! the newly created plan, or an existing plan) - DPJoinNode &EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info); + DPJoinNode &EmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, const vector> &info); //! Tries to emit a potential join candidate pair. Returns false if too many pairs have already been emitted, //! cancelling the dynamic programming step. - bool TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info); + bool TryEmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, const vector> &info); - bool EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set &exclusion_set); + bool EnumerateCmpRecursive(JoinRelationSetOld &left, JoinRelationSetOld &right, unordered_set &exclusion_set); //! Emit a relation set node - bool EmitCSG(JoinRelationSet &node); + bool EmitCSG(JoinRelationSetOld &node); //! Enumerate the possible connected subgraphs that can be joined together in the join graph - bool EnumerateCSGRecursive(JoinRelationSet &node, unordered_set &exclusion_set); + bool EnumerateCSGRecursive(JoinRelationSetOld &node, unordered_set &exclusion_set); //! Generate cross product edges inside the side void GenerateCrossProducts(); diff --git a/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp b/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp index e98868115af3..f9dd43ac8f36 100644 --- a/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +++ b/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp @@ -29,11 +29,11 @@ namespace duckdb { class QueryGraphEdges; struct GenerateJoinRelation { - GenerateJoinRelation(optional_ptr set, unique_ptr op_p) + GenerateJoinRelation(optional_ptr set, unique_ptr op_p) : set(set), op(std::move(op_p)) { } - optional_ptr set; + optional_ptr set; unique_ptr op; }; @@ -41,23 +41,23 @@ struct GenerateJoinRelation { //! but is also eventually transformed into a query edge. class FilterInfo { public: - FilterInfo(unique_ptr filter, JoinRelationSet &set, idx_t filter_index, + FilterInfo(unique_ptr filter, JoinRelationSetOld &set, idx_t filter_index, JoinType join_type = JoinType::INNER) : filter(std::move(filter)), set(set), filter_index(filter_index), join_type(join_type) { } public: unique_ptr filter; - reference set; + reference set; idx_t filter_index; JoinType join_type; - optional_ptr left_set; - optional_ptr right_set; + optional_ptr left_set; + optional_ptr right_set; ColumnBinding left_binding; ColumnBinding right_binding; - void SetLeftSet(optional_ptr left_set_new); - void SetRightSet(optional_ptr right_set_new); + void SetLeftSet(optional_ptr left_set_new); + void SetRightSet(optional_ptr right_set_new); }; //! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations @@ -72,7 +72,7 @@ class QueryGraphManager { RelationManager relation_manager; //! A structure holding all the created JoinRelationSet objects - JoinRelationSetManager set_manager; + JoinRelationSetManagerOld set_manager; ClientContext &context; @@ -92,10 +92,10 @@ class QueryGraphManager { //! Plan enumerator may not find a full plan and therefore will need to create cross //! products to create edges. - void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right); + void CreateQueryGraphCrossProduct(JoinRelationSetOld &left, JoinRelationSetOld &right); //! A map to store the optimal join plan found for a specific JoinRelationSet* - optional_ptr>> plans; + optional_ptr>> plans; private: vector> filter_operators; @@ -110,7 +110,7 @@ class QueryGraphManager { void CreateHyperGraphEdges(); - GenerateJoinRelation GenerateJoins(vector> &extracted_relations, JoinRelationSet &set); + GenerateJoinRelation GenerateJoins(vector> &extracted_relations, JoinRelationSetOld &set); }; } // namespace duckdb diff --git a/src/include/duckdb/optimizer/join_order/relation_manager.hpp b/src/include/duckdb/optimizer/join_order/relation_manager.hpp index 3b8fda1c67f6..bd12c2b42716 100644 --- a/src/include/duckdb/optimizer/join_order/relation_manager.hpp +++ b/src/include/duckdb/optimizer/join_order/relation_manager.hpp @@ -51,7 +51,7 @@ class RelationManager { //! both sides of the join filter, along with the tables & indexes. vector> ExtractEdges(LogicalOperator &op, vector> &filter_operators, - JoinRelationSetManager &set_manager); + JoinRelationSetManagerOld &set_manager); //! Extract the set of relations referred to inside an expression bool ExtractBindings(Expression &expression, unordered_set &bindings); diff --git a/src/optimizer/join_order/cardinality_estimator.cpp b/src/optimizer/join_order/cardinality_estimator.cpp index 07cbf1dcd466..72ea38e0ac2f 100644 --- a/src/optimizer/join_order/cardinality_estimator.cpp +++ b/src/optimizer/join_order/cardinality_estimator.cpp @@ -126,7 +126,7 @@ void CardinalityEstimator::RemoveEmptyTotalDomains() { relations_to_tdoms.erase(remove_start, relations_to_tdoms.end()); } -double CardinalityEstimator::GetNumerator(JoinRelationSet &set) { +double CardinalityEstimator::GetNumerator(JoinRelationSetOld &set) { double numerator = 1; for (idx_t i = 0; i < set.count; i++) { auto &single_node_set = set_manager.GetJoinRelation(set.relations[i]); @@ -138,13 +138,13 @@ double CardinalityEstimator::GetNumerator(JoinRelationSet &set) { bool EdgeConnects(FilterInfoWithTotalDomains &edge, Subgraph2Denominator &subgraph) { if (edge.filter_info->left_set) { - if (JoinRelationSet::IsSubset(*subgraph.relations, *edge.filter_info->left_set)) { + if (JoinRelationSetOld::IsSubset(*subgraph.relations, *edge.filter_info->left_set)) { // cool return true; } } if (edge.filter_info->right_set) { - if (JoinRelationSet::IsSubset(*subgraph.relations, *edge.filter_info->right_set)) { + if (JoinRelationSetOld::IsSubset(*subgraph.relations, *edge.filter_info->right_set)) { return true; } } @@ -152,11 +152,11 @@ bool EdgeConnects(FilterInfoWithTotalDomains &edge, Subgraph2Denominator &subgra } vector GetEdges(vector &relations_to_tdom, - JoinRelationSet &requested_set) { + JoinRelationSetOld &requested_set) { vector res; for (auto &relation_2_tdom : relations_to_tdom) { for (auto &filter : relation_2_tdom.filters) { - if (JoinRelationSet::IsSubset(requested_set, filter->set)) { + if (JoinRelationSetOld::IsSubset(requested_set, filter->set)) { FilterInfoWithTotalDomains new_edge(filter, relation_2_tdom); res.push_back(new_edge); } @@ -194,13 +194,13 @@ vector SubgraphsConnectedByEdge(FilterInfoWithTotalDomains &edge, vector< return res; } -JoinRelationSet &CardinalityEstimator::UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, +JoinRelationSetOld &CardinalityEstimator::UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter) { switch (filter.filter_info->join_type) { case JoinType::SEMI: case JoinType::ANTI: { - if (JoinRelationSet::IsSubset(*left.relations, *filter.filter_info->left_set) && - JoinRelationSet::IsSubset(*right.relations, *filter.filter_info->right_set)) { + if (JoinRelationSetOld::IsSubset(*left.relations, *filter.filter_info->left_set) && + JoinRelationSetOld::IsSubset(*right.relations, *filter.filter_info->right_set)) { return *left.numerator_relations; } return *right.numerator_relations; @@ -261,8 +261,8 @@ double CardinalityEstimator::CalculateUpdatedDenom(Subgraph2Denominator left, Su } case JoinType::SEMI: case JoinType::ANTI: { - if (JoinRelationSet::IsSubset(*left.relations, *filter.filter_info->left_set) && - JoinRelationSet::IsSubset(*right.relations, *filter.filter_info->right_set)) { + if (JoinRelationSetOld::IsSubset(*left.relations, *filter.filter_info->left_set) && + JoinRelationSetOld::IsSubset(*right.relations, *filter.filter_info->right_set)) { new_denom = left.denom * CardinalityEstimator::DEFAULT_SEMI_ANTI_SELECTIVITY; return new_denom; } @@ -275,7 +275,7 @@ double CardinalityEstimator::CalculateUpdatedDenom(Subgraph2Denominator left, Su } } -DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSet &set) { +DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSetOld &set) { vector subgraphs; // Finding the denominator is tricky. You need to go through the tdoms in decreasing order @@ -321,13 +321,13 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSet &set) { auto right_subgraph = Subgraph2Denominator(); right_subgraph.relations = edge.filter_info->right_set; right_subgraph.numerator_relations = edge.filter_info->right_set; - if (JoinRelationSet::IsSubset(*left_subgraph->relations, *right_subgraph.relations)) { + if (JoinRelationSetOld::IsSubset(*left_subgraph->relations, *right_subgraph.relations)) { right_subgraph.relations = edge.filter_info->left_set; right_subgraph.numerator_relations = edge.filter_info->left_set; } - if (JoinRelationSet::IsSubset(*left_subgraph->relations, *edge.filter_info->left_set) && - JoinRelationSet::IsSubset(*left_subgraph->relations, *edge.filter_info->right_set)) { + if (JoinRelationSetOld::IsSubset(*left_subgraph->relations, *edge.filter_info->left_set) && + JoinRelationSetOld::IsSubset(*left_subgraph->relations, *edge.filter_info->right_set)) { // here we have an edge that connects the same subgraph to the same subgraph. Just continue. no need to // update the denom continue; @@ -377,7 +377,7 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSet &set) { } template <> -double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) { +double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSetOld &new_set) { if (relation_set_2_cardinality.find(new_set.ToString()) != relation_set_2_cardinality.end()) { return relation_set_2_cardinality[new_set.ToString()].cardinality_before_filters; @@ -394,7 +394,7 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set } template <> -idx_t CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) { +idx_t CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSetOld &new_set) { auto cardinality_as_double = EstimateCardinalityWithSet(new_set); auto max = NumericLimits::Maximum(); if (cardinality_as_double >= (double)max) { @@ -416,7 +416,7 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) { return a.tdom_no_hll > b.tdom_no_hll; } -void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats) { +void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats) { // Get the join relation set D_ASSERT(stats.stats_initialized); auto relation_cardinality = stats.cardinality; @@ -430,7 +430,7 @@ void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats) { +void CardinalityEstimator::UpdateTotalDomains(optional_ptr set, RelationStats &stats) { D_ASSERT(set->count == 1); auto relation_id = set->relations[0]; //! Initialize the distinct count for all columns used in joins with the current relation. diff --git a/src/optimizer/join_order/join_node.cpp b/src/optimizer/join_order/join_node.cpp index 031f56ce26ad..e5f965938c52 100644 --- a/src/optimizer/join_order/join_node.cpp +++ b/src/optimizer/join_order/join_node.cpp @@ -6,11 +6,11 @@ namespace duckdb { -DPJoinNode::DPJoinNode(JoinRelationSet &set) : set(set), info(nullptr), is_leaf(true), left_set(set), right_set(set) { +DPJoinNode::DPJoinNode(JoinRelationSetOld &set) : set(set), info(nullptr), is_leaf(true), left_set(set), right_set(set) { } -DPJoinNode::DPJoinNode(JoinRelationSet &set, optional_ptr info, JoinRelationSet &left, - JoinRelationSet &right, double cost) +DPJoinNode::DPJoinNode(JoinRelationSetOld &set, optional_ptr info, JoinRelationSetOld &left, + JoinRelationSetOld &right, double cost) : set(set), info(info), is_leaf(false), left_set(left), right_set(right), cost(cost) { } diff --git a/src/optimizer/join_order/join_relation_set.cpp b/src/optimizer/join_order/join_relation_set.cpp index aa5767427ae7..245a38d17499 100644 --- a/src/optimizer/join_order/join_relation_set.cpp +++ b/src/optimizer/join_order/join_relation_set.cpp @@ -7,10 +7,10 @@ namespace duckdb { -using JoinRelationTreeNode = JoinRelationSetManager::JoinRelationTreeNode; +using JoinRelationTreeNode = JoinRelationSetManagerOld::JoinRelationTreeNode; // LCOV_EXCL_START -string JoinRelationSet::ToString() const { +string JoinRelationSetOld::ToString() const { string result = "["; result += StringUtil::Join(relations, count, ", ", [](const idx_t &relation) { return to_string(relation); }); result += "]"; @@ -19,24 +19,24 @@ string JoinRelationSet::ToString() const { // LCOV_EXCL_STOP //! Returns true if sub is a subset of super -bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) { - D_ASSERT(sub.count > 0); - if (sub.count > super.count) { - return false; - } - idx_t j = 0; - for (idx_t i = 0; i < super.count; i++) { - if (sub.relations[j] == super.relations[i]) { - j++; - if (j == sub.count) { - return true; - } - } - } - return false; -} +// bool JoinRelationSetOld::IsSubset(JoinRelationSetOld &super, JoinRelationSetOld &sub) { +// D_ASSERT(sub.count > 0); +// if (sub.count > super.count) { +// return false; +// } +// idx_t j = 0; +// for (idx_t i = 0; i < super.count; i++) { +// if (sub.relations[j] == super.relations[i]) { +// j++; +// if (j == sub.count) { +// return true; +// } +// } +// } +// return false; +// } -JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unsafe_unique_array relations, idx_t count) { +JoinRelationSetOld &JoinRelationSetManagerOld::GetJoinRelation(unsafe_unique_array relations, idx_t count) { // now look it up in the tree reference info(root); for (idx_t i = 0; i < count; i++) { @@ -52,13 +52,13 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unsafe_unique_array(std::move(relations), count); + info.get().relation = make_uniq(std::move(relations), count); } return *info.get().relation; } //! Create or get a JoinRelationSet from a single node with the given index -JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) { +JoinRelationSetOld &JoinRelationSetManagerOld::GetJoinRelation(idx_t index) { // create a sorted vector of the relations auto relations = make_unsafe_uniq_array(1); relations[0] = index; @@ -66,7 +66,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) { return GetJoinRelation(std::move(relations), count); } -JoinRelationSet &JoinRelationSetManager::GetJoinRelation(const unordered_set &bindings) { +JoinRelationSetOld &JoinRelationSetManagerOld::GetJoinRelation(const unordered_set &bindings) { // create a sorted vector of the relations unsafe_unique_array relations = bindings.empty() ? nullptr : make_unsafe_uniq_array(bindings.size()); idx_t count = 0; @@ -77,7 +77,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(const unordered_set(left.count + right.count); idx_t count = 0; // move through the left and right relations, eliminating duplicates @@ -113,6 +113,58 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati return GetJoinRelation(std::move(relations), count); } +bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) { + std::bitset<12> sub_copy = sub.relations; + sub_copy &= super.relations; + return sub_copy == sub.relations; +} + + + +reference JoinRelationSetManager::GetJoinRelation(unsafe_unique_array relations, idx_t count) { + auto ret = make_uniq(relations, count); + return GetJoinRelation(std::move(ret)); +} + +reference JoinRelationSetManager::GetJoinRelation(unique_ptr set) { + auto existing = active_relation_sets.find(set->relations); + if (existing == active_relation_sets.end()) { + active_relation_sets[set->relations] = std::move(set); + } + auto ret = active_relation_sets.find(set->relations); + auto &wat = *ret->second; + return wat; +} + +//! Create or get a JoinRelationSet from a single node with the given index +reference JoinRelationSetManager::GetJoinRelation(idx_t index) { + // create a sorted vector of the relations + auto relations = make_unsafe_uniq_array(1); + relations[0] = index; + idx_t count = 1; + return GetJoinRelation(std::move(relations), count); +} + +reference JoinRelationSetManager::GetJoinRelation(const unordered_set &bindings) { + // create a sorted vector of the relations + unsafe_unique_array relations = bindings.empty() ? nullptr : make_unsafe_uniq_array(bindings.size()); + idx_t count = 0; + for (auto &entry : bindings) { + relations[count++] = entry; + } + std::sort(relations.get(), relations.get() + count); + return GetJoinRelation(std::move(relations), count); +} + +reference JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelationSet &right) { + auto left_copy = make_uniq(left.Copy()); + auto right_copy = right.Copy(); + left_copy->relations |= right_copy.relations; + return GetJoinRelation(std::move(left_copy)); +} + + + // JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) { // auto relations = unsafe_unique_array(new idx_t[left->count]); // idx_t count = 0; @@ -155,11 +207,11 @@ static string JoinRelationTreeNodeToString(const JoinRelationTreeNode *node) { return result; } -string JoinRelationSetManager::ToString() const { +string JoinRelationSetManagerOld::ToString() const { return JoinRelationTreeNodeToString(&root); } -void JoinRelationSetManager::Print() { +void JoinRelationSetManagerOld::Print() { Printer::Print(ToString()); } diff --git a/src/optimizer/join_order/plan_enumerator.cpp b/src/optimizer/join_order/plan_enumerator.cpp index 04b396b97800..2545bf55cade 100644 --- a/src/optimizer/join_order/plan_enumerator.cpp +++ b/src/optimizer/join_order/plan_enumerator.cpp @@ -33,7 +33,7 @@ static vector> AddSuperSets(const vector node, unordered_set &exclusion_set) { +static void UpdateExclusionSet(optional_ptr node, unordered_set &exclusion_set) { for (idx_t i = 0; i < node->count; i++) { exclusion_set.insert(node->relations[i]); } @@ -93,12 +93,12 @@ void PlanEnumerator::GenerateCrossProducts() { // query_graph = query_graph_manager.GetQueryGraph(); } -const reference_map_t> &PlanEnumerator::GetPlans() const { +const reference_map_t> &PlanEnumerator::GetPlans() const { return plans; } //! Create a new JoinTree node by joining together two previous JoinTree nodes -unique_ptr PlanEnumerator::CreateJoinTree(JoinRelationSet &set, +unique_ptr PlanEnumerator::CreateJoinTree(JoinRelationSetOld &set, const vector> &possible_connections, DPJoinNode &left, DPJoinNode &right) { @@ -138,7 +138,7 @@ unique_ptr PlanEnumerator::CreateJoinTree(JoinRelationSet &set, return result; } -DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSet &left, JoinRelationSet &right, +DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, const vector> &info) { // get the left and right join plans auto left_plan = plans.find(left); @@ -165,7 +165,7 @@ DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSet &left, JoinRelationSet &rig return *entry->second; } -bool PlanEnumerator::TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, +bool PlanEnumerator::TryEmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, const vector> &info) { pairs++; // If a full plan is created, it's possible a node in the plan gets updated. When this happens, make sure you keep @@ -181,7 +181,7 @@ bool PlanEnumerator::TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, return true; } -bool PlanEnumerator::EmitCSG(JoinRelationSet &node) { +bool PlanEnumerator::EmitCSG(JoinRelationSetOld &node) { if (node.count == query_graph_manager.relation_manager.NumRelations()) { return true; } @@ -233,7 +233,7 @@ bool PlanEnumerator::EmitCSG(JoinRelationSet &node) { return true; } -bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, +bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSetOld &left, JoinRelationSetOld &right, unordered_set &exclusion_set) { // get the neighbors of the second relation under the exclusion set auto neighbors = query_graph.GetNeighbors(right, exclusion_set); @@ -242,7 +242,7 @@ bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSe } auto all_subset = GetAllNeighborSets(neighbors); - vector> union_sets; + vector> union_sets; union_sets.reserve(all_subset.size()); for (const auto &rel_set : all_subset) { auto &neighbor = query_graph_manager.set_manager.GetJoinRelation(rel_set); @@ -277,7 +277,7 @@ bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSe return true; } -bool PlanEnumerator::EnumerateCSGRecursive(JoinRelationSet &node, unordered_set &exclusion_set) { +bool PlanEnumerator::EnumerateCSGRecursive(JoinRelationSetOld &node, unordered_set &exclusion_set) { // find neighbors of S under the exclusion set auto neighbors = query_graph.GetNeighbors(node, exclusion_set); if (neighbors.empty()) { @@ -285,7 +285,7 @@ bool PlanEnumerator::EnumerateCSGRecursive(JoinRelationSet &node, unordered_set< } auto all_subset = GetAllNeighborSets(neighbors); - vector> union_sets; + vector> union_sets; union_sets.reserve(all_subset.size()); for (const auto &rel_set : all_subset) { auto &neighbor = query_graph_manager.set_manager.GetJoinRelation(rel_set); @@ -342,7 +342,7 @@ void PlanEnumerator::SolveJoinOrderApproximately() { // at this point, we exited the dynamic programming but did not compute the final join order because it took too // long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to // construct the result tree first we start out with all the base relations (the to-be-joined relations) - vector> join_relations; // T in the paper + vector> join_relations; // T in the paper for (idx_t i = 0; i < query_graph_manager.relation_manager.NumRelations(); i++) { join_relations.push_back(query_graph_manager.set_manager.GetJoinRelation(i)); } diff --git a/src/optimizer/join_order/query_graph.cpp b/src/optimizer/join_order/query_graph.cpp index beb9e1521a7b..94b42eadcf18 100644 --- a/src/optimizer/join_order/query_graph.cpp +++ b/src/optimizer/join_order/query_graph.cpp @@ -4,6 +4,8 @@ #include "duckdb/common/string_util.hpp" #include "duckdb/common/assert.hpp" +#include + namespace duckdb { using QueryEdge = QueryGraphEdges::QueryEdge; @@ -97,7 +99,7 @@ void QueryGraphEdges::EnumerateNeighborsDFS(JoinRelationSet &node, reference &callback) const { - for (idx_t j = 0; j < node.count; j++) { + for (idx_t j = 0; j < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; j++) { auto iter = root.children.find(node.relations[j]); if (iter != root.children.end()) { reference new_info = *iter->second; diff --git a/src/optimizer/join_order/query_graph_manager.cpp b/src/optimizer/join_order/query_graph_manager.cpp index 3a5214d2c206..f39e27adb558 100644 --- a/src/optimizer/join_order/query_graph_manager.cpp +++ b/src/optimizer/join_order/query_graph_manager.cpp @@ -56,11 +56,11 @@ const vector> &QueryGraphManager::GetFilterBindings() con return filters_and_bindings; } -void FilterInfo::SetLeftSet(optional_ptr left_set_new) { +void FilterInfo::SetLeftSet(optional_ptr left_set_new) { left_set = left_set_new; } -void FilterInfo::SetRightSet(optional_ptr right_set_new) { +void FilterInfo::SetRightSet(optional_ptr right_set_new) { right_set = right_set_new; } @@ -235,10 +235,10 @@ static JoinCondition MaybeInvertConditions(unique_ptr condition, boo } GenerateJoinRelation QueryGraphManager::GenerateJoins(vector> &extracted_relations, - JoinRelationSet &set) { - optional_ptr left_node; - optional_ptr right_node; - optional_ptr result_relation; + JoinRelationSetOld &set) { + optional_ptr left_node; + optional_ptr right_node; + optional_ptr result_relation; unique_ptr result_operator; auto dp_entry = plans->find(set); @@ -285,7 +285,7 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorright_set) && JoinRelationSet::IsSubset(*right.set, *f->left_set))); - bool invert = !JoinRelationSet::IsSubset(*left.set, *f->left_set); + bool invert = !JoinRelationSetOld::IsSubset(*left.set, *f->left_set); // If the left and right set are inverted AND it is a semi or anti join // swap left and right children back. if (invert && (f->join_type == JoinType::SEMI || f->join_type == JoinType::ANTI)) { @@ -333,7 +333,7 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorfilter) { // now check if the filter is a subset of the current relation // note that infos with an empty relation set are a special case and we do not push them down - if (info.set.get().count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) { + if (info.set.get().count > 0 && JoinRelationSetOld::IsSubset(*result_relation, info.set)) { auto &filter_and_binding = filters_and_bindings[info.filter_index]; auto filter = std::move(filter_and_binding->filter); // if it is, we can push the filter @@ -348,11 +348,11 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector> RelationManager::ExtractEdges(LogicalOperator &op, vector> &filter_operators, - JoinRelationSetManager &set_manager) { + JoinRelationSetManagerOld &set_manager) { // now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate // filters in the process vector> filters_and_bindings; @@ -504,9 +504,9 @@ vector> RelationManager::ExtractEdges(LogicalOperator &op // create the filter info so all required LHS relations are present when reconstructing the // join - optional_ptr left_set; - optional_ptr right_set; - optional_ptr full_set; + optional_ptr left_set; + optional_ptr right_set; + optional_ptr full_set; // here we create a left_set that unions all relations from the left side of // every expression and a right_set that unions all relations frmo the right side of a // every expression (although this should always be 1). From d05b54714bf085289db0111ad75ff0e43bb81b64 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 13 Dec 2024 16:49:02 +0100 Subject: [PATCH 2/5] need to build a helper function to enumerate the realtions in a bitset --- .../optimizer/join_order/query_graph.hpp | 4 ++ .../join_order/query_graph_manager.hpp | 2 +- .../join_order/join_order_optimizer.cpp | 2 + src/optimizer/join_order/query_graph.cpp | 56 ++++++++++++------- test/optimizer/joins/test_simple_joins.test | 18 ++++++ 5 files changed, 61 insertions(+), 21 deletions(-) create mode 100644 test/optimizer/joins/test_simple_joins.test diff --git a/src/include/duckdb/optimizer/join_order/query_graph.hpp b/src/include/duckdb/optimizer/join_order/query_graph.hpp index e5ac67138978..671604f392dd 100644 --- a/src/include/duckdb/optimizer/join_order/query_graph.hpp +++ b/src/include/duckdb/optimizer/join_order/query_graph.hpp @@ -37,6 +37,10 @@ struct NeighborInfo { class QueryGraphEdges { public: //! Contains a node with info about neighboring relations and child edge infos + //! The root is a top level QueryEdge with no neighbors, then each child represents a single + //! relation node. Neighbors with these single nodes are in the neighbors vector. + //! If the edge is complex (like a+b = c), then the children structure is used to capture + //! the presence of [a, b]. struct QueryEdge { vector> neighbors; unordered_map> children; diff --git a/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp b/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp index f9dd43ac8f36..02f75448a415 100644 --- a/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +++ b/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp @@ -97,7 +97,7 @@ class QueryGraphManager { //! A map to store the optimal join plan found for a specific JoinRelationSet* optional_ptr>> plans; -private: +// private: vector> filter_operators; //! Filter information including the column_bindings that join filters diff --git a/src/optimizer/join_order/join_order_optimizer.cpp b/src/optimizer/join_order/join_order_optimizer.cpp index e49798ac4bff..0eda15e5cce4 100644 --- a/src/optimizer/join_order/join_order_optimizer.cpp +++ b/src/optimizer/join_order/join_order_optimizer.cpp @@ -32,7 +32,9 @@ unique_ptr JoinOrderOptimizer::Optimize(unique_ptrPrint(); unique_ptr new_logical_plan = nullptr; + query_graph_manager.query_graph.Print(); if (reorderable) { // query graph now has filters and relations diff --git a/src/optimizer/join_order/query_graph.cpp b/src/optimizer/join_order/query_graph.cpp index 94b42eadcf18..9960513aada6 100644 --- a/src/optimizer/join_order/query_graph.cpp +++ b/src/optimizer/join_order/query_graph.cpp @@ -39,18 +39,19 @@ void QueryGraphEdges::Print() { // LCOV_EXCL_STOP optional_ptr QueryGraphEdges::GetQueryEdge(JoinRelationSet &left) { - D_ASSERT(left.count > 0); // find the EdgeInfo corresponding to the left set optional_ptr info(&root); - for (idx_t i = 0; i < left.count; i++) { - auto entry = info.get()->children.find(left.relations[i]); - if (entry == info.get()->children.end()) { - // node not found, create it - auto insert_it = info.get()->children.insert(make_pair(left.relations[i], make_uniq())); - entry = insert_it.first; + for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { + if (left.relations[i]) { + auto entry = info.get()->children.find(i); + if (entry == info.get()->children.end()) { + // node not found, create it + auto insert_it = info.get()->children.insert(make_pair(i, make_uniq())); + entry = insert_it.first; + } + // move to the next node + info = entry->second; } - // move to the next node - info = entry->second; } return info; } @@ -88,11 +89,13 @@ void QueryGraphEdges::EnumerateNeighborsDFS(JoinRelationSet &node, reference new_info = *iter->second; - EnumerateNeighborsDFS(node, new_info, node_index + 1, callback); + for (idx_t node_index = index; node_index < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; ++node_index) { + if (node.relations[node_index]) { + auto iter = info.get().children.find(node_index); + if (iter != info.get().children.end()) { + reference new_info = *iter->second; + EnumerateNeighborsDFS(node, new_info, node_index + 1, callback); + } } } } @@ -100,17 +103,25 @@ void QueryGraphEdges::EnumerateNeighborsDFS(JoinRelationSet &node, reference &callback) const { for (idx_t j = 0; j < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; j++) { - auto iter = root.children.find(node.relations[j]); - if (iter != root.children.end()) { - reference new_info = *iter->second; - EnumerateNeighborsDFS(node, new_info, j + 1, callback); + if (node.relations[j]) { + auto iter = root.children.find(j); + if (iter != root.children.end()) { + reference new_info = *iter->second; + EnumerateNeighborsDFS(node, new_info, j + 1, callback); + } } } } //! Returns true if a JoinRelationSet is banned by the list of exclusion_set, false otherwise static bool JoinRelationSetIsExcluded(optional_ptr node, unordered_set &exclusion_set) { - return exclusion_set.find(node->relations[0]) != exclusion_set.end(); + // TODO: figure this one out. + for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { + if (node->relations[i]) { + return exclusion_set.find(i) != exclusion_set.end(); + } + } + throw InternalException("something went wrong"); } const vector QueryGraphEdges::GetNeighbors(JoinRelationSet &node, unordered_set &exclusion_set) const { @@ -118,7 +129,12 @@ const vector QueryGraphEdges::GetNeighbors(JoinRelationSet &node, unorder EnumerateNeighbors(node, [&](NeighborInfo &info) -> bool { if (!JoinRelationSetIsExcluded(info.neighbor, exclusion_set)) { // add the smallest node of the neighbor to the set - result.insert(info.neighbor->relations[0]); + for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { + if (info.neighbor->relations[i]) { + result.insert(i); + break; + } + } } return false; }); diff --git a/test/optimizer/joins/test_simple_joins.test b/test/optimizer/joins/test_simple_joins.test new file mode 100644 index 000000000000..fa311e631568 --- /dev/null +++ b/test/optimizer/joins/test_simple_joins.test @@ -0,0 +1,18 @@ +# name: test/optimizer/joins/test_simple_joins.test +# description: just test simple joins +# group: [joins] + +statement ok +create table t1 as select range a from range(10); + +statement ok +create table t2 as select range b from range(100); + +statement ok +create table t3 as select range c from range(1000); + +statement ok +select * from t1, t2, t3 where a = b and b = c; + + + From c7ca0cdd75a79179bb820eeb188c2e9e6e0aa375 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 13 Dec 2024 17:24:09 +0100 Subject: [PATCH 3/5] last commit --- .../optimizer/join_order/join_relation.hpp | 47 +------ .../join_order/query_graph_manager.hpp | 24 ++-- .../join_order/join_relation_set.cpp | 126 ++++-------------- src/optimizer/join_order/query_graph.cpp | 20 ++- .../join_order/query_graph_manager.cpp | 26 ++-- 5 files changed, 63 insertions(+), 180 deletions(-) diff --git a/src/include/duckdb/optimizer/join_order/join_relation.hpp b/src/include/duckdb/optimizer/join_order/join_relation.hpp index b6d81ed1254f..be6a7be935c6 100644 --- a/src/include/duckdb/optimizer/join_order/join_relation.hpp +++ b/src/include/duckdb/optimizer/join_order/join_relation.hpp @@ -22,6 +22,7 @@ struct JoinRelationSet { relations[relations_[i]] = true; } } + static void EnumerateRelations(std::bitset<12> relations, const std::function &callback); string ToString() const; std::bitset<12> relations; @@ -34,10 +35,6 @@ struct JoinRelationSet { //! The JoinRelationTree is a structure holding all the created JoinRelationSet objects and allowing fast lookup on to //! them class JoinRelationSetManager { -public: - //! Contains a node with a JoinRelationSet and child relations - // FIXME: this structure is inefficient, could use a bitmap for lookup instead (todo: profile) - public: //! Create or get a JoinRelationSet from a single node with the given index @@ -59,46 +56,4 @@ class JoinRelationSetManager { unordered_map, unique_ptr> active_relation_sets; }; -//! Set of relations, used in the join graph. -struct JoinRelationSetOld { - JoinRelationSetOld(unsafe_unique_array relations, idx_t count) : relations(std::move(relations)), count(count) { - } - - string ToString() const; - - unsafe_unique_array relations; - idx_t count; - - // static bool IsSubset(JoinRelationSetOld &super, JoinRelationSetOld &sub); -}; - -//! The JoinRelationTree is a structure holding all the created JoinRelationSet objects and allowing fast lookup on to -//! them -class JoinRelationSetManagerOld { -public: - //! Contains a node with a JoinRelationSet and child relations - // FIXME: this structure is inefficient, could use a bitmap for lookup instead (todo: profile) - struct JoinRelationTreeNode { - unique_ptr relation; - unordered_map> children; - }; - -public: - //! Create or get a JoinRelationSet from a single node with the given index - JoinRelationSetOld &GetJoinRelation(idx_t index); - //! Create or get a JoinRelationSet from a set of relation bindings - JoinRelationSetOld &GetJoinRelation(const unordered_set &bindings); - //! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations - JoinRelationSetOld &GetJoinRelation(unsafe_unique_array relations, idx_t count); - //! Union two sets of relations together and create a new relation set - JoinRelationSetOld &Union(JoinRelationSetOld &left, JoinRelationSetOld &right); - // //! Create the set difference of left \ right (i.e. all elements in left that are not in right) - // JoinRelationSet *Difference(JoinRelationSet *left, JoinRelationSet *right); - string ToString() const; - void Print(); - -private: - JoinRelationTreeNode root; -}; - } // namespace duckdb diff --git a/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp b/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp index 02f75448a415..e523281e3be0 100644 --- a/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +++ b/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp @@ -29,11 +29,11 @@ namespace duckdb { class QueryGraphEdges; struct GenerateJoinRelation { - GenerateJoinRelation(optional_ptr set, unique_ptr op_p) + GenerateJoinRelation(optional_ptr set, unique_ptr op_p) : set(set), op(std::move(op_p)) { } - optional_ptr set; + optional_ptr set; unique_ptr op; }; @@ -41,23 +41,23 @@ struct GenerateJoinRelation { //! but is also eventually transformed into a query edge. class FilterInfo { public: - FilterInfo(unique_ptr filter, JoinRelationSetOld &set, idx_t filter_index, + FilterInfo(unique_ptr filter, JoinRelationSet &set, idx_t filter_index, JoinType join_type = JoinType::INNER) : filter(std::move(filter)), set(set), filter_index(filter_index), join_type(join_type) { } public: unique_ptr filter; - reference set; + reference set; idx_t filter_index; JoinType join_type; - optional_ptr left_set; - optional_ptr right_set; + optional_ptr left_set; + optional_ptr right_set; ColumnBinding left_binding; ColumnBinding right_binding; - void SetLeftSet(optional_ptr left_set_new); - void SetRightSet(optional_ptr right_set_new); + void SetLeftSet(optional_ptr left_set_new); + void SetRightSet(optional_ptr right_set_new); }; //! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations @@ -72,7 +72,7 @@ class QueryGraphManager { RelationManager relation_manager; //! A structure holding all the created JoinRelationSet objects - JoinRelationSetManagerOld set_manager; + JoinRelationSetManager set_manager; ClientContext &context; @@ -92,10 +92,10 @@ class QueryGraphManager { //! Plan enumerator may not find a full plan and therefore will need to create cross //! products to create edges. - void CreateQueryGraphCrossProduct(JoinRelationSetOld &left, JoinRelationSetOld &right); + void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right); //! A map to store the optimal join plan found for a specific JoinRelationSet* - optional_ptr>> plans; + optional_ptr>> plans; // private: vector> filter_operators; @@ -110,7 +110,7 @@ class QueryGraphManager { void CreateHyperGraphEdges(); - GenerateJoinRelation GenerateJoins(vector> &extracted_relations, JoinRelationSetOld &set); + GenerateJoinRelation GenerateJoins(vector> &extracted_relations, JoinRelationSet &set); }; } // namespace duckdb diff --git a/src/optimizer/join_order/join_relation_set.cpp b/src/optimizer/join_order/join_relation_set.cpp index 245a38d17499..559e533f1b2a 100644 --- a/src/optimizer/join_order/join_relation_set.cpp +++ b/src/optimizer/join_order/join_relation_set.cpp @@ -7,12 +7,13 @@ namespace duckdb { -using JoinRelationTreeNode = JoinRelationSetManagerOld::JoinRelationTreeNode; // LCOV_EXCL_START -string JoinRelationSetOld::ToString() const { +string JoinRelationSet::ToString() const { string result = "["; - result += StringUtil::Join(relations, count, ", ", [](const idx_t &relation) { return to_string(relation); }); + EnumerateRelations(relations, [&](idx_t relation) { + result += to_string(relation) + ", "; + }); result += "]"; return result; } @@ -36,90 +37,19 @@ string JoinRelationSetOld::ToString() const { // return false; // } -JoinRelationSetOld &JoinRelationSetManagerOld::GetJoinRelation(unsafe_unique_array relations, idx_t count) { - // now look it up in the tree - reference info(root); - for (idx_t i = 0; i < count; i++) { - auto entry = info.get().children.find(relations[i]); - if (entry == info.get().children.end()) { - // node not found, create it - auto insert_it = info.get().children.insert(make_pair(relations[i], make_uniq())); - entry = insert_it.first; - } - // move to the next node - info = *entry->second; - } - // now check if the JoinRelationSet has already been created - if (!info.get().relation) { - // if it hasn't we need to create it - info.get().relation = make_uniq(std::move(relations), count); - } - return *info.get().relation; -} - -//! Create or get a JoinRelationSet from a single node with the given index -JoinRelationSetOld &JoinRelationSetManagerOld::GetJoinRelation(idx_t index) { - // create a sorted vector of the relations - auto relations = make_unsafe_uniq_array(1); - relations[0] = index; - idx_t count = 1; - return GetJoinRelation(std::move(relations), count); -} - -JoinRelationSetOld &JoinRelationSetManagerOld::GetJoinRelation(const unordered_set &bindings) { - // create a sorted vector of the relations - unsafe_unique_array relations = bindings.empty() ? nullptr : make_unsafe_uniq_array(bindings.size()); - idx_t count = 0; - for (auto &entry : bindings) { - relations[count++] = entry; - } - std::sort(relations.get(), relations.get() + count); - return GetJoinRelation(std::move(relations), count); -} - -JoinRelationSetOld &JoinRelationSetManagerOld::Union(JoinRelationSetOld &left, JoinRelationSetOld &right) { - auto relations = make_unsafe_uniq_array(left.count + right.count); - idx_t count = 0; - // move through the left and right relations, eliminating duplicates - idx_t i = 0, j = 0; - while (true) { - if (i == left.count) { - // exhausted left relation, add remaining of right relation - for (; j < right.count; j++) { - relations[count++] = right.relations[j]; - } - break; - } else if (j == right.count) { - // exhausted right relation, add remaining of left - for (; i < left.count; i++) { - relations[count++] = left.relations[i]; - } - break; - } else if (left.relations[i] < right.relations[j]) { - // left is smaller, progress left and add it to the set - relations[count++] = left.relations[i]; - i++; - } else if (left.relations[i] > right.relations[j]) { - // right is smaller, progress right and add it to the set - relations[count++] = right.relations[j]; - j++; - } else { - D_ASSERT(left.relations[i] == right.relations[j]); - relations[count++] = left.relations[i]; - i++; - j++; - } - } - return GetJoinRelation(std::move(relations), count); -} - bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) { std::bitset<12> sub_copy = sub.relations; sub_copy &= super.relations; return sub_copy == sub.relations; } - +void JoinRelationSet::EnumerateRelations(std::bitset<12> relations, const std::function &callback) { + for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { + if (relations[i]) { + callback(i); + } + } +} reference JoinRelationSetManager::GetJoinRelation(unsafe_unique_array relations, idx_t count) { auto ret = make_uniq(relations, count); @@ -196,23 +126,23 @@ reference JoinRelationSetManager::Union(JoinRelationSet &left, // return GetJoinRelation(std::move(relations), count); // } -static string JoinRelationTreeNodeToString(const JoinRelationTreeNode *node) { - string result = ""; - if (node->relation) { - result += node->relation.get()->ToString() + "\n"; - } - for (auto &child : node->children) { - result += JoinRelationTreeNodeToString(child.second.get()); - } - return result; -} - -string JoinRelationSetManagerOld::ToString() const { - return JoinRelationTreeNodeToString(&root); -} +// static string JoinRelationTreeNodeToString(const JoinRelationTreeNode *node) { +// string result = ""; +// if (node->relation) { +// result += node->relation.get()->ToString() + "\n"; +// } +// for (auto &child : node->children) { +// result += JoinRelationTreeNodeToString(child.second.get()); +// } +// return result; +// } -void JoinRelationSetManagerOld::Print() { - Printer::Print(ToString()); -} +// string JoinRelationSetManagerOld::ToString() const { +// return JoinRelationTreeNodeToString(&root); +// } +// +// void JoinRelationSetManagerOld::Print() { +// Printer::Print(ToString()); +// } } // namespace duckdb diff --git a/src/optimizer/join_order/query_graph.cpp b/src/optimizer/join_order/query_graph.cpp index 9960513aada6..b3ac319c0068 100644 --- a/src/optimizer/join_order/query_graph.cpp +++ b/src/optimizer/join_order/query_graph.cpp @@ -41,18 +41,16 @@ void QueryGraphEdges::Print() { optional_ptr QueryGraphEdges::GetQueryEdge(JoinRelationSet &left) { // find the EdgeInfo corresponding to the left set optional_ptr info(&root); - for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { - if (left.relations[i]) { - auto entry = info.get()->children.find(i); - if (entry == info.get()->children.end()) { - // node not found, create it - auto insert_it = info.get()->children.insert(make_pair(i, make_uniq())); - entry = insert_it.first; - } - // move to the next node - info = entry->second; + JoinRelationSet::EnumerateRelations(left.relations, [&](idx_t relation_id) { + auto entry = info.get()->children.find(relation_id); + if (entry == info.get()->children.end()) { + // node not found, create it + auto insert_it = info.get()->children.insert(make_pair(relation_id, make_uniq())); + entry = insert_it.first; } - } + // move to the next node + info = entry->second; + }); return info; } diff --git a/src/optimizer/join_order/query_graph_manager.cpp b/src/optimizer/join_order/query_graph_manager.cpp index f39e27adb558..3a5214d2c206 100644 --- a/src/optimizer/join_order/query_graph_manager.cpp +++ b/src/optimizer/join_order/query_graph_manager.cpp @@ -56,11 +56,11 @@ const vector> &QueryGraphManager::GetFilterBindings() con return filters_and_bindings; } -void FilterInfo::SetLeftSet(optional_ptr left_set_new) { +void FilterInfo::SetLeftSet(optional_ptr left_set_new) { left_set = left_set_new; } -void FilterInfo::SetRightSet(optional_ptr right_set_new) { +void FilterInfo::SetRightSet(optional_ptr right_set_new) { right_set = right_set_new; } @@ -235,10 +235,10 @@ static JoinCondition MaybeInvertConditions(unique_ptr condition, boo } GenerateJoinRelation QueryGraphManager::GenerateJoins(vector> &extracted_relations, - JoinRelationSetOld &set) { - optional_ptr left_node; - optional_ptr right_node; - optional_ptr result_relation; + JoinRelationSet &set) { + optional_ptr left_node; + optional_ptr right_node; + optional_ptr result_relation; unique_ptr result_operator; auto dp_entry = plans->find(set); @@ -285,7 +285,7 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorright_set) && JoinRelationSet::IsSubset(*right.set, *f->left_set))); - bool invert = !JoinRelationSetOld::IsSubset(*left.set, *f->left_set); + bool invert = !JoinRelationSet::IsSubset(*left.set, *f->left_set); // If the left and right set are inverted AND it is a semi or anti join // swap left and right children back. if (invert && (f->join_type == JoinType::SEMI || f->join_type == JoinType::ANTI)) { @@ -333,7 +333,7 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorfilter) { // now check if the filter is a subset of the current relation // note that infos with an empty relation set are a special case and we do not push them down - if (info.set.get().count > 0 && JoinRelationSetOld::IsSubset(*result_relation, info.set)) { + if (info.set.get().count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) { auto &filter_and_binding = filters_and_bindings[info.filter_index]; auto filter = std::move(filter_and_binding->filter); // if it is, we can push the filter @@ -348,11 +348,11 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector Date: Fri, 20 Dec 2024 14:44:10 -0800 Subject: [PATCH 4/5] need to figure out why I'm stuck in a loop --- .../join_order/cardinality_estimator.hpp | 22 +++---- .../duckdb/optimizer/join_order/join_node.hpp | 10 +-- .../optimizer/join_order/join_relation.hpp | 8 +-- .../optimizer/join_order/plan_enumerator.hpp | 16 ++--- .../optimizer/join_order/relation_manager.hpp | 2 +- .../join_order/cardinality_estimator.cpp | 60 +++++++++--------- src/optimizer/join_order/cost_model.cpp | 2 +- src/optimizer/join_order/join_node.cpp | 6 +- .../join_order/join_order_optimizer.cpp | 2 - .../join_order/join_relation_set.cpp | 29 ++++++--- src/optimizer/join_order/plan_enumerator.cpp | 63 ++++++++++--------- src/optimizer/join_order/query_graph.cpp | 13 ++-- .../join_order/query_graph_manager.cpp | 12 ++-- src/optimizer/join_order/relation_manager.cpp | 28 ++++----- 14 files changed, 144 insertions(+), 129 deletions(-) diff --git a/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp b/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp index af62f7a9a55e..8aec1cd02c46 100644 --- a/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +++ b/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp @@ -17,11 +17,11 @@ namespace duckdb { class FilterInfo; struct DenomInfo { - DenomInfo(JoinRelationSetOld &numerator_relations, double filter_strength, double denominator) + DenomInfo(JoinRelationSet &numerator_relations, double filter_strength, double denominator) : numerator_relations(numerator_relations), filter_strength(filter_strength), denominator(denominator) { } - JoinRelationSetOld &numerator_relations; + JoinRelationSet &numerator_relations; double filter_strength; double denominator; }; @@ -59,8 +59,8 @@ class FilterInfoWithTotalDomains { }; struct Subgraph2Denominator { - optional_ptr relations; - optional_ptr numerator_relations; + optional_ptr relations; + optional_ptr numerator_relations; double denom; Subgraph2Denominator() : relations(nullptr), numerator_relations(nullptr), denom(1) {}; @@ -94,28 +94,28 @@ class CardinalityEstimator { private: vector relations_to_tdoms; unordered_map relation_set_2_cardinality; - JoinRelationSetManagerOld set_manager; + JoinRelationSetManager set_manager; vector relation_stats; public: void RemoveEmptyTotalDomains(); - void UpdateTotalDomains(optional_ptr set, RelationStats &stats); + void UpdateTotalDomains(optional_ptr set, RelationStats &stats); void InitEquivalentRelations(const vector> &filter_infos); - void InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats); + void InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats); //! cost model needs estimated cardinalities to the fraction since the formula captures //! distinct count selectivities and multiplicities. Hence the template template - T EstimateCardinalityWithSet(JoinRelationSetOld &new_set); + T EstimateCardinalityWithSet(JoinRelationSet &new_set); //! used for debugging. void AddRelationNamesToTdoms(vector &stats); void PrintRelationToTdomInfo(); private: - double GetNumerator(JoinRelationSetOld &set); - DenomInfo GetDenominator(JoinRelationSetOld &set); + double GetNumerator(JoinRelationSet &set); + DenomInfo GetDenominator(JoinRelationSet &set); bool SingleColumnFilter(FilterInfo &filter_info); vector DetermineMatchingEquivalentSets(optional_ptr filter_info); @@ -126,7 +126,7 @@ class CardinalityEstimator { double CalculateUpdatedDenom(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter); - JoinRelationSetOld &UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, + JoinRelationSet &UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter); void AddRelationTdom(FilterInfo &filter_info); diff --git a/src/include/duckdb/optimizer/join_order/join_node.hpp b/src/include/duckdb/optimizer/join_order/join_node.hpp index 06fe79a09a2f..b68f9a0a4e77 100644 --- a/src/include/duckdb/optimizer/join_order/join_node.hpp +++ b/src/include/duckdb/optimizer/join_order/join_node.hpp @@ -17,13 +17,13 @@ struct NeighborInfo; class DPJoinNode { public: //! Represents a node in the join plan - JoinRelationSetOld &set; + JoinRelationSet &set; //! information on how left and right are connected optional_ptr info; bool is_leaf; //! left and right plans - JoinRelationSetOld &left_set; - JoinRelationSetOld &right_set; + JoinRelationSet &left_set; + JoinRelationSet &right_set; //! The cost of the join node. The cost is stored here so that the cost of //! a join node stays in sync with how the join node is constructed. Storing the cost in an unordered_set @@ -34,13 +34,13 @@ class DPJoinNode { idx_t cardinality; //! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality - DPJoinNode(JoinRelationSetOld &set, optional_ptr info, JoinRelationSetOld &left, JoinRelationSetOld &right, + DPJoinNode(JoinRelationSet &set, optional_ptr info, JoinRelationSet &left, JoinRelationSet &right, double cost); //! Create a leaf node in the join tree //! set cost to 0 for leaf nodes //! cost will be the cost to *produce* an intermediate table - explicit DPJoinNode(JoinRelationSetOld &set); + explicit DPJoinNode(JoinRelationSet &set); }; } // namespace duckdb diff --git a/src/include/duckdb/optimizer/join_order/join_relation.hpp b/src/include/duckdb/optimizer/join_order/join_relation.hpp index be6a7be935c6..b66cbcb8396c 100644 --- a/src/include/duckdb/optimizer/join_order/join_relation.hpp +++ b/src/include/duckdb/optimizer/join_order/join_relation.hpp @@ -14,9 +14,9 @@ namespace duckdb { - struct JoinRelationSet { - JoinRelationSet() {} + JoinRelationSet() { + } JoinRelationSet(unsafe_unique_array &relations_, idx_t count) { for (idx_t i = 0; i < count; i++) { relations[relations_[i]] = true; @@ -25,13 +25,13 @@ struct JoinRelationSet { static void EnumerateRelations(std::bitset<12> relations, const std::function &callback); string ToString() const; + idx_t Count() const; std::bitset<12> relations; static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub); - JoinRelationSet Copy(); + JoinRelationSet Copy() const; }; - //! The JoinRelationTree is a structure holding all the created JoinRelationSet objects and allowing fast lookup on to //! them class JoinRelationSetManager { diff --git a/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp b/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp index a5294313add4..29e8679532ce 100644 --- a/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +++ b/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp @@ -39,7 +39,7 @@ class PlanEnumerator { void SolveJoinOrder(); void InitLeafPlans(); - const reference_map_t> &GetPlans() const; + const reference_map_t> &GetPlans() const; private: //! The set of edges used in the join optimizer @@ -51,26 +51,26 @@ class PlanEnumerator { //! Cost model to evaluate cost of joins CostModel &cost_model; //! A map to store the optimal join plan found for a specific JoinRelationSet* - reference_map_t> plans; + reference_map_t> plans; unordered_set join_nodes_in_full_plan; - unique_ptr CreateJoinTree(JoinRelationSetOld &set, + unique_ptr CreateJoinTree(JoinRelationSet &set, const vector> &possible_connections, DPJoinNode &left, DPJoinNode &right); //! Emit a pair as a potential join candidate. Returns the best plan found for the (left, right) connection (either //! the newly created plan, or an existing plan) - DPJoinNode &EmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, const vector> &info); + DPJoinNode &EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info); //! Tries to emit a potential join candidate pair. Returns false if too many pairs have already been emitted, //! cancelling the dynamic programming step. - bool TryEmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, const vector> &info); + bool TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info); - bool EnumerateCmpRecursive(JoinRelationSetOld &left, JoinRelationSetOld &right, unordered_set &exclusion_set); + bool EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set &exclusion_set); //! Emit a relation set node - bool EmitCSG(JoinRelationSetOld &node); + bool EmitCSG(JoinRelationSet &node); //! Enumerate the possible connected subgraphs that can be joined together in the join graph - bool EnumerateCSGRecursive(JoinRelationSetOld &node, unordered_set &exclusion_set); + bool EnumerateCSGRecursive(JoinRelationSet &node, unordered_set &exclusion_set); //! Generate cross product edges inside the side void GenerateCrossProducts(); diff --git a/src/include/duckdb/optimizer/join_order/relation_manager.hpp b/src/include/duckdb/optimizer/join_order/relation_manager.hpp index bd12c2b42716..3b8fda1c67f6 100644 --- a/src/include/duckdb/optimizer/join_order/relation_manager.hpp +++ b/src/include/duckdb/optimizer/join_order/relation_manager.hpp @@ -51,7 +51,7 @@ class RelationManager { //! both sides of the join filter, along with the tables & indexes. vector> ExtractEdges(LogicalOperator &op, vector> &filter_operators, - JoinRelationSetManagerOld &set_manager); + JoinRelationSetManager &set_manager); //! Extract the set of relations referred to inside an expression bool ExtractBindings(Expression &expression, unordered_set &bindings); diff --git a/src/optimizer/join_order/cardinality_estimator.cpp b/src/optimizer/join_order/cardinality_estimator.cpp index 72ea38e0ac2f..578c85725e35 100644 --- a/src/optimizer/join_order/cardinality_estimator.cpp +++ b/src/optimizer/join_order/cardinality_estimator.cpp @@ -21,7 +21,7 @@ bool CardinalityEstimator::EmptyFilter(FilterInfo &filter_info) { } void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) { - D_ASSERT(filter_info.set.get().count >= 1); + D_ASSERT(filter_info.set.get().Count() >= 1); for (const RelationsToTDom &r2tdom : relations_to_tdoms) { auto &i_set = r2tdom.equivalent_relations; if (i_set.find(filter_info.left_binding) != i_set.end()) { @@ -37,7 +37,7 @@ void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) { } bool CardinalityEstimator::SingleColumnFilter(duckdb::FilterInfo &filter_info) { - if (filter_info.left_set && filter_info.right_set && filter_info.set.get().count > 1) { + if (filter_info.left_set && filter_info.right_set && filter_info.set.get().Count() > 1) { // Both set and are from different relations return false; } @@ -111,8 +111,8 @@ void CardinalityEstimator::InitEquivalentRelations(const vectorleft_set->count >= 1); - D_ASSERT(filter->right_set->count >= 1); + D_ASSERT(filter->left_set->Count() >= 1); + D_ASSERT(filter->right_set->Count() >= 1); auto matching_equivalent_sets = DetermineMatchingEquivalentSets(filter.get()); AddToEquivalenceSets(filter.get(), matching_equivalent_sets); @@ -126,11 +126,11 @@ void CardinalityEstimator::RemoveEmptyTotalDomains() { relations_to_tdoms.erase(remove_start, relations_to_tdoms.end()); } -double CardinalityEstimator::GetNumerator(JoinRelationSetOld &set) { +double CardinalityEstimator::GetNumerator(JoinRelationSet &set) { double numerator = 1; - for (idx_t i = 0; i < set.count; i++) { - auto &single_node_set = set_manager.GetJoinRelation(set.relations[i]); - auto card_helper = relation_set_2_cardinality[single_node_set.ToString()]; + for (idx_t i = 0; i < set.Count(); i++) { + auto single_node_set = set_manager.GetJoinRelation(set.relations[i]); + auto card_helper = relation_set_2_cardinality[single_node_set.get().ToString()]; numerator *= card_helper.cardinality_before_filters == 0 ? 1 : card_helper.cardinality_before_filters; } return numerator; @@ -138,13 +138,13 @@ double CardinalityEstimator::GetNumerator(JoinRelationSetOld &set) { bool EdgeConnects(FilterInfoWithTotalDomains &edge, Subgraph2Denominator &subgraph) { if (edge.filter_info->left_set) { - if (JoinRelationSetOld::IsSubset(*subgraph.relations, *edge.filter_info->left_set)) { + if (JoinRelationSet::IsSubset(*subgraph.relations, *edge.filter_info->left_set)) { // cool return true; } } if (edge.filter_info->right_set) { - if (JoinRelationSetOld::IsSubset(*subgraph.relations, *edge.filter_info->right_set)) { + if (JoinRelationSet::IsSubset(*subgraph.relations, *edge.filter_info->right_set)) { return true; } } @@ -152,11 +152,11 @@ bool EdgeConnects(FilterInfoWithTotalDomains &edge, Subgraph2Denominator &subgra } vector GetEdges(vector &relations_to_tdom, - JoinRelationSetOld &requested_set) { + JoinRelationSet &requested_set) { vector res; for (auto &relation_2_tdom : relations_to_tdom) { for (auto &filter : relation_2_tdom.filters) { - if (JoinRelationSetOld::IsSubset(requested_set, filter->set)) { + if (JoinRelationSet::IsSubset(requested_set, filter->set)) { FilterInfoWithTotalDomains new_edge(filter, relation_2_tdom); res.push_back(new_edge); } @@ -194,13 +194,13 @@ vector SubgraphsConnectedByEdge(FilterInfoWithTotalDomains &edge, vector< return res; } -JoinRelationSetOld &CardinalityEstimator::UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, +JoinRelationSet &CardinalityEstimator::UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter) { switch (filter.filter_info->join_type) { case JoinType::SEMI: case JoinType::ANTI: { - if (JoinRelationSetOld::IsSubset(*left.relations, *filter.filter_info->left_set) && - JoinRelationSetOld::IsSubset(*right.relations, *filter.filter_info->right_set)) { + if (JoinRelationSet::IsSubset(*left.relations, *filter.filter_info->left_set) && + JoinRelationSet::IsSubset(*right.relations, *filter.filter_info->right_set)) { return *left.numerator_relations; } return *right.numerator_relations; @@ -261,8 +261,8 @@ double CardinalityEstimator::CalculateUpdatedDenom(Subgraph2Denominator left, Su } case JoinType::SEMI: case JoinType::ANTI: { - if (JoinRelationSetOld::IsSubset(*left.relations, *filter.filter_info->left_set) && - JoinRelationSetOld::IsSubset(*right.relations, *filter.filter_info->right_set)) { + if (JoinRelationSet::IsSubset(*left.relations, *filter.filter_info->left_set) && + JoinRelationSet::IsSubset(*right.relations, *filter.filter_info->right_set)) { new_denom = left.denom * CardinalityEstimator::DEFAULT_SEMI_ANTI_SELECTIVITY; return new_denom; } @@ -275,7 +275,7 @@ double CardinalityEstimator::CalculateUpdatedDenom(Subgraph2Denominator left, Su } } -DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSetOld &set) { +DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSet &set) { vector subgraphs; // Finding the denominator is tricky. You need to go through the tdoms in decreasing order @@ -321,19 +321,19 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSetOld &set) { auto right_subgraph = Subgraph2Denominator(); right_subgraph.relations = edge.filter_info->right_set; right_subgraph.numerator_relations = edge.filter_info->right_set; - if (JoinRelationSetOld::IsSubset(*left_subgraph->relations, *right_subgraph.relations)) { + if (JoinRelationSet::IsSubset(*left_subgraph->relations, *right_subgraph.relations)) { right_subgraph.relations = edge.filter_info->left_set; right_subgraph.numerator_relations = edge.filter_info->left_set; } - if (JoinRelationSetOld::IsSubset(*left_subgraph->relations, *edge.filter_info->left_set) && - JoinRelationSetOld::IsSubset(*left_subgraph->relations, *edge.filter_info->right_set)) { + if (JoinRelationSet::IsSubset(*left_subgraph->relations, *edge.filter_info->left_set) && + JoinRelationSet::IsSubset(*left_subgraph->relations, *edge.filter_info->right_set)) { // here we have an edge that connects the same subgraph to the same subgraph. Just continue. no need to // update the denom continue; } left_subgraph->numerator_relations = &UpdateNumeratorRelations(*left_subgraph, right_subgraph, edge); - left_subgraph->relations = &set_manager.Union(*left_subgraph->relations, *right_subgraph.relations); + left_subgraph->relations = set_manager.Union(*left_subgraph->relations, *right_subgraph.relations).get(); left_subgraph->denom = CalculateUpdatedDenom(*left_subgraph, right_subgraph, edge); } else if (subgraph_connections.size() == 2) { // The two subgraphs in the subgraph_connections can be merged by this edge. @@ -341,7 +341,7 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSetOld &set) { auto subgraph_to_merge_into = &subgraphs.at(subgraph_connections.at(0)); auto subgraph_to_delete = &subgraphs.at(subgraph_connections.at(1)); subgraph_to_merge_into->relations = - &set_manager.Union(*subgraph_to_merge_into->relations, *subgraph_to_delete->relations); + set_manager.Union(*subgraph_to_merge_into->relations, *subgraph_to_delete->relations).get(); subgraph_to_merge_into->numerator_relations = &UpdateNumeratorRelations(*subgraph_to_merge_into, *subgraph_to_delete, edge); subgraph_to_merge_into->denom = CalculateUpdatedDenom(*subgraph_to_merge_into, *subgraph_to_delete, edge); @@ -361,10 +361,10 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSetOld &set) { auto final_subgraph = subgraphs.at(0); for (auto merge_with = subgraphs.begin() + 1; merge_with != subgraphs.end(); merge_with++) { D_ASSERT(final_subgraph.relations && merge_with->relations); - final_subgraph.relations = &set_manager.Union(*final_subgraph.relations, *merge_with->relations); + final_subgraph.relations = set_manager.Union(*final_subgraph.relations, *merge_with->relations).get(); D_ASSERT(final_subgraph.numerator_relations && merge_with->numerator_relations); final_subgraph.numerator_relations = - &set_manager.Union(*final_subgraph.numerator_relations, *merge_with->numerator_relations); + set_manager.Union(*final_subgraph.numerator_relations, *merge_with->numerator_relations).get(); final_subgraph.denom *= merge_with->denom; } } @@ -377,7 +377,7 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSetOld &set) { } template <> -double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSetOld &new_set) { +double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) { if (relation_set_2_cardinality.find(new_set.ToString()) != relation_set_2_cardinality.end()) { return relation_set_2_cardinality[new_set.ToString()].cardinality_before_filters; @@ -394,7 +394,7 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSetOld &new_ } template <> -idx_t CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSetOld &new_set) { +idx_t CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) { auto cardinality_as_double = EstimateCardinalityWithSet(new_set); auto max = NumericLimits::Maximum(); if (cardinality_as_double >= (double)max) { @@ -416,7 +416,7 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) { return a.tdom_no_hll > b.tdom_no_hll; } -void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats) { +void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats) { // Get the join relation set D_ASSERT(stats.stats_initialized); auto relation_cardinality = stats.cardinality; @@ -430,8 +430,8 @@ void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats) { - D_ASSERT(set->count == 1); +void CardinalityEstimator::UpdateTotalDomains(optional_ptr set, RelationStats &stats) { + D_ASSERT(set->Count() == 1); auto relation_id = set->relations[0]; //! Initialize the distinct count for all columns used in joins with the current relation. // D_ASSERT(stats.column_distinct_count.size() >= 1); diff --git a/src/optimizer/join_order/cost_model.cpp b/src/optimizer/join_order/cost_model.cpp index bfe64412f053..a7ae137c6458 100644 --- a/src/optimizer/join_order/cost_model.cpp +++ b/src/optimizer/join_order/cost_model.cpp @@ -9,7 +9,7 @@ CostModel::CostModel(QueryGraphManager &query_graph_manager) } double CostModel::ComputeCost(DPJoinNode &left, DPJoinNode &right) { - auto &combination = query_graph_manager.set_manager.Union(left.set, right.set); + auto combination = query_graph_manager.set_manager.Union(left.set, right.set); auto join_card = cardinality_estimator.EstimateCardinalityWithSet(combination); auto join_cost = join_card; return join_cost + left.cost + right.cost; diff --git a/src/optimizer/join_order/join_node.cpp b/src/optimizer/join_order/join_node.cpp index e5f965938c52..031f56ce26ad 100644 --- a/src/optimizer/join_order/join_node.cpp +++ b/src/optimizer/join_order/join_node.cpp @@ -6,11 +6,11 @@ namespace duckdb { -DPJoinNode::DPJoinNode(JoinRelationSetOld &set) : set(set), info(nullptr), is_leaf(true), left_set(set), right_set(set) { +DPJoinNode::DPJoinNode(JoinRelationSet &set) : set(set), info(nullptr), is_leaf(true), left_set(set), right_set(set) { } -DPJoinNode::DPJoinNode(JoinRelationSetOld &set, optional_ptr info, JoinRelationSetOld &left, - JoinRelationSetOld &right, double cost) +DPJoinNode::DPJoinNode(JoinRelationSet &set, optional_ptr info, JoinRelationSet &left, + JoinRelationSet &right, double cost) : set(set), info(info), is_leaf(false), left_set(left), right_set(right), cost(cost) { } diff --git a/src/optimizer/join_order/join_order_optimizer.cpp b/src/optimizer/join_order/join_order_optimizer.cpp index 0eda15e5cce4..e49798ac4bff 100644 --- a/src/optimizer/join_order/join_order_optimizer.cpp +++ b/src/optimizer/join_order/join_order_optimizer.cpp @@ -32,9 +32,7 @@ unique_ptr JoinOrderOptimizer::Optimize(unique_ptrPrint(); unique_ptr new_logical_plan = nullptr; - query_graph_manager.query_graph.Print(); if (reorderable) { // query graph now has filters and relations diff --git a/src/optimizer/join_order/join_relation_set.cpp b/src/optimizer/join_order/join_relation_set.cpp index 559e533f1b2a..81ea5251e6d0 100644 --- a/src/optimizer/join_order/join_relation_set.cpp +++ b/src/optimizer/join_order/join_relation_set.cpp @@ -4,16 +4,14 @@ #include "duckdb/common/to_string.hpp" #include +#include namespace duckdb { - // LCOV_EXCL_START string JoinRelationSet::ToString() const { string result = "["; - EnumerateRelations(relations, [&](idx_t relation) { - result += to_string(relation) + ", "; - }); + EnumerateRelations(relations, [&](idx_t relation) { result += to_string(relation) + ", "; }); result += "]"; return result; } @@ -43,7 +41,8 @@ bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) { return sub_copy == sub.relations; } -void JoinRelationSet::EnumerateRelations(std::bitset<12> relations, const std::function &callback) { +void JoinRelationSet::EnumerateRelations(std::bitset<12> relations, + const std::function &callback) { for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { if (relations[i]) { callback(i); @@ -51,6 +50,22 @@ void JoinRelationSet::EnumerateRelations(std::bitset<12> relations, const std::f } } +idx_t JoinRelationSet::Count() const { + idx_t count = 0; + for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { + if (relations[i]) { + count++; + } + } + return count; +} + +JoinRelationSet JoinRelationSet::Copy() const { + JoinRelationSet result; + result.relations = relations; + return result; +} + reference JoinRelationSetManager::GetJoinRelation(unsafe_unique_array relations, idx_t count) { auto ret = make_uniq(relations, count); return GetJoinRelation(std::move(ret)); @@ -59,7 +74,9 @@ reference JoinRelationSetManager::GetJoinRelation(unsafe_unique reference JoinRelationSetManager::GetJoinRelation(unique_ptr set) { auto existing = active_relation_sets.find(set->relations); if (existing == active_relation_sets.end()) { + auto copy = make_uniq(set->Copy()); active_relation_sets[set->relations] = std::move(set); + set = std::move(copy); } auto ret = active_relation_sets.find(set->relations); auto &wat = *ret->second; @@ -93,8 +110,6 @@ reference JoinRelationSetManager::Union(JoinRelationSet &left, return GetJoinRelation(std::move(left_copy)); } - - // JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) { // auto relations = unsafe_unique_array(new idx_t[left->count]); // idx_t count = 0; diff --git a/src/optimizer/join_order/plan_enumerator.cpp b/src/optimizer/join_order/plan_enumerator.cpp index 2545bf55cade..03c67902d1ce 100644 --- a/src/optimizer/join_order/plan_enumerator.cpp +++ b/src/optimizer/join_order/plan_enumerator.cpp @@ -33,8 +33,8 @@ static vector> AddSuperSets(const vector node, unordered_set &exclusion_set) { - for (idx_t i = 0; i < node->count; i++) { +static void UpdateExclusionSet(optional_ptr node, unordered_set &exclusion_set) { + for (idx_t i = 0; i < node->Count(); i++) { exclusion_set.insert(node->relations[i]); } } @@ -78,12 +78,12 @@ void PlanEnumerator::GenerateCrossProducts() { // generate a set of cross products to combine the currently available plans into a full join plan // we create edges between every relation with a high cost for (idx_t i = 0; i < query_graph_manager.relation_manager.NumRelations(); i++) { - auto &left = query_graph_manager.set_manager.GetJoinRelation(i); + auto left = query_graph_manager.set_manager.GetJoinRelation(i); for (idx_t j = 0; j < query_graph_manager.relation_manager.NumRelations(); j++) { auto cross_product_allowed = query_graph_manager.relation_manager.CrossProductWithRelationAllowed(i) && query_graph_manager.relation_manager.CrossProductWithRelationAllowed(j); if (i != j && cross_product_allowed) { - auto &right = query_graph_manager.set_manager.GetJoinRelation(j); + auto right = query_graph_manager.set_manager.GetJoinRelation(j); query_graph_manager.CreateQueryGraphCrossProduct(left, right); } } @@ -93,12 +93,12 @@ void PlanEnumerator::GenerateCrossProducts() { // query_graph = query_graph_manager.GetQueryGraph(); } -const reference_map_t> &PlanEnumerator::GetPlans() const { +const reference_map_t> &PlanEnumerator::GetPlans() const { return plans; } //! Create a new JoinTree node by joining together two previous JoinTree nodes -unique_ptr PlanEnumerator::CreateJoinTree(JoinRelationSetOld &set, +unique_ptr PlanEnumerator::CreateJoinTree(JoinRelationSet &set, const vector> &possible_connections, DPJoinNode &left, DPJoinNode &right) { @@ -138,7 +138,7 @@ unique_ptr PlanEnumerator::CreateJoinTree(JoinRelationSetOld &set, return result; } -DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, +DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info) { // get the left and right join plans auto left_plan = plans.find(left); @@ -146,7 +146,7 @@ DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSetOld &left, JoinRelationSetOl if (left_plan == plans.end() || right_plan == plans.end()) { throw InternalException("No left or right plan: internal error in join order optimizer"); } - auto &new_set = query_graph_manager.set_manager.Union(left, right); + auto new_set = query_graph_manager.set_manager.Union(left, right); // create the join tree based on combining the two plans auto new_plan = CreateJoinTree(new_set, info, *left_plan->second, *right_plan->second); // check if this plan is the optimal plan we found for this set of relations @@ -165,7 +165,7 @@ DPJoinNode &PlanEnumerator::EmitPair(JoinRelationSetOld &left, JoinRelationSetOl return *entry->second; } -bool PlanEnumerator::TryEmitPair(JoinRelationSetOld &left, JoinRelationSetOld &right, +bool PlanEnumerator::TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info) { pairs++; // If a full plan is created, it's possible a node in the plan gets updated. When this happens, make sure you keep @@ -181,8 +181,8 @@ bool PlanEnumerator::TryEmitPair(JoinRelationSetOld &left, JoinRelationSetOld &r return true; } -bool PlanEnumerator::EmitCSG(JoinRelationSetOld &node) { - if (node.count == query_graph_manager.relation_manager.NumRelations()) { +bool PlanEnumerator::EmitCSG(JoinRelationSet &node) { + if (node.Count() == query_graph_manager.relation_manager.NumRelations()) { return true; } // create the exclusion set as everything inside the subgraph AND anything with members BELOW it @@ -216,7 +216,7 @@ bool PlanEnumerator::EmitCSG(JoinRelationSetOld &node) { for (auto neighbor : neighbors) { // since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to // (only!) this neighbor, hence we have to do a connectedness check before we can emit it - auto &neighbor_relation = query_graph_manager.set_manager.GetJoinRelation(neighbor); + auto neighbor_relation = query_graph_manager.set_manager.GetJoinRelation(neighbor); auto connections = query_graph.GetConnections(node, neighbor_relation); if (!connections.empty()) { if (!TryEmitPair(node, neighbor_relation, connections)) { @@ -233,7 +233,7 @@ bool PlanEnumerator::EmitCSG(JoinRelationSetOld &node) { return true; } -bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSetOld &left, JoinRelationSetOld &right, +bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set &exclusion_set) { // get the neighbors of the second relation under the exclusion set auto neighbors = query_graph.GetNeighbors(right, exclusion_set); @@ -242,15 +242,15 @@ bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSetOld &left, JoinRelatio } auto all_subset = GetAllNeighborSets(neighbors); - vector> union_sets; + vector> union_sets; union_sets.reserve(all_subset.size()); for (const auto &rel_set : all_subset) { - auto &neighbor = query_graph_manager.set_manager.GetJoinRelation(rel_set); + auto neighbor = query_graph_manager.set_manager.GetJoinRelation(rel_set); // emit the combinations of this node and its neighbors - auto &combined_set = query_graph_manager.set_manager.Union(right, neighbor); + auto combined_set = query_graph_manager.set_manager.Union(right, neighbor); // If combined_set.count == right.count, This means we found a neighbor that has been present before // This means we didn't set exclusion_set correctly. - D_ASSERT(combined_set.count > right.count); + D_ASSERT(combined_set.get().Count() > right.Count()); if (plans.find(combined_set) != plans.end()) { auto connections = query_graph.GetConnections(left, combined_set); if (!connections.empty()) { @@ -277,7 +277,7 @@ bool PlanEnumerator::EnumerateCmpRecursive(JoinRelationSetOld &left, JoinRelatio return true; } -bool PlanEnumerator::EnumerateCSGRecursive(JoinRelationSetOld &node, unordered_set &exclusion_set) { +bool PlanEnumerator::EnumerateCSGRecursive(JoinRelationSet &node, unordered_set &exclusion_set) { // find neighbors of S under the exclusion set auto neighbors = query_graph.GetNeighbors(node, exclusion_set); if (neighbors.empty()) { @@ -285,13 +285,16 @@ bool PlanEnumerator::EnumerateCSGRecursive(JoinRelationSetOld &node, unordered_s } auto all_subset = GetAllNeighborSets(neighbors); - vector> union_sets; + vector> union_sets; union_sets.reserve(all_subset.size()); for (const auto &rel_set : all_subset) { - auto &neighbor = query_graph_manager.set_manager.GetJoinRelation(rel_set); + auto neighbor = query_graph_manager.set_manager.GetJoinRelation(rel_set); // emit the combinations of this node and its neighbors - auto &new_set = query_graph_manager.set_manager.Union(node, neighbor); - D_ASSERT(new_set.count > node.count); + auto new_set = query_graph_manager.set_manager.Union(node, neighbor); + if (new_set.get().Count() <= node.Count()) { + auto break_here = 0; + } + D_ASSERT(new_set.get().Count() > node.Count()); if (plans.find(new_set) != plans.end()) { if (!EmitCSG(new_set)) { return false; @@ -320,7 +323,7 @@ bool PlanEnumerator::SolveJoinOrderExactly() { // we enumerate over all the possible pairs in the neighborhood for (idx_t i = query_graph_manager.relation_manager.NumRelations(); i > 0; i--) { // for every node in the set, we consider it as the start node once - auto &start_node = query_graph_manager.set_manager.GetJoinRelation(i - 1); + auto start_node = query_graph_manager.set_manager.GetJoinRelation(i - 1); // emit the start node if (!EmitCSG(start_node)) { return false; @@ -342,7 +345,7 @@ void PlanEnumerator::SolveJoinOrderApproximately() { // at this point, we exited the dynamic programming but did not compute the final join order because it took too // long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to // construct the result tree first we start out with all the base relations (the to-be-joined relations) - vector> join_relations; // T in the paper + vector> join_relations; // T in the paper for (idx_t i = 0; i < query_graph_manager.relation_manager.NumRelations(); i++) { join_relations.push_back(query_graph_manager.set_manager.GetJoinRelation(i)); } @@ -434,8 +437,8 @@ void PlanEnumerator::SolveJoinOrderApproximately() { // important to erase the biggest element first // if we erase the smallest element first the index of the biggest element changes - auto &new_set = query_graph_manager.set_manager.Union(join_relations.at(best_left).get(), - join_relations.at(best_right).get()); + auto new_set = query_graph_manager.set_manager.Union(join_relations.at(best_left).get(), + join_relations.at(best_right).get()); D_ASSERT(best_right > best_left); join_relations.erase(join_relations.begin() + (int64_t)best_right); join_relations.erase(join_relations.begin() + (int64_t)best_left); @@ -456,13 +459,13 @@ void PlanEnumerator::InitLeafPlans() { // then update the total domains based on the cardinalities of each relation. for (idx_t i = 0; i < relation_stats.size(); i++) { auto stats = relation_stats.at(i); - auto &relation_set = query_graph_manager.set_manager.GetJoinRelation(i); + auto relation_set = query_graph_manager.set_manager.GetJoinRelation(i); auto join_node = make_uniq(relation_set); join_node->cost = 0; join_node->cardinality = stats.cardinality; - D_ASSERT(join_node->set.count == 1); + D_ASSERT(join_node->set.Count() == 1); plans[relation_set] = std::move(join_node); - cost_model.cardinality_estimator.InitCardinalityEstimatorProps(&relation_set, stats); + cost_model.cardinality_estimator.InitCardinalityEstimatorProps(relation_set.get(), stats); } } @@ -485,7 +488,7 @@ void PlanEnumerator::SolveJoinOrder() { for (idx_t i = 0; i < query_graph_manager.relation_manager.NumRelations(); i++) { bindings.insert(i); } - auto &total_relation = query_graph_manager.set_manager.GetJoinRelation(bindings); + auto total_relation = query_graph_manager.set_manager.GetJoinRelation(bindings); auto final_plan = plans.find(total_relation); if (final_plan == plans.end()) { // could not find the final plan diff --git a/src/optimizer/join_order/query_graph.cpp b/src/optimizer/join_order/query_graph.cpp index b3ac319c0068..c19320ce65f5 100644 --- a/src/optimizer/join_order/query_graph.cpp +++ b/src/optimizer/join_order/query_graph.cpp @@ -55,7 +55,7 @@ optional_ptr QueryGraphEdges::GetQueryEdge(JoinRelationSet &left) { } void QueryGraphEdges::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr filter_info) { - D_ASSERT(left.count > 0 && right.count > 0); + D_ASSERT(left.Count() > 0 && right.Count() > 0); // find the EdgeInfo corresponding to the left set auto info = GetQueryEdge(left); // now insert the edge to the right relation, if it does not exist @@ -114,12 +114,11 @@ void QueryGraphEdges::EnumerateNeighbors(JoinRelationSet &node, //! Returns true if a JoinRelationSet is banned by the list of exclusion_set, false otherwise static bool JoinRelationSetIsExcluded(optional_ptr node, unordered_set &exclusion_set) { // TODO: figure this one out. - for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { - if (node->relations[i]) { - return exclusion_set.find(i) != exclusion_set.end(); - } - } - throw InternalException("something went wrong"); + bool is_excluded = false; + JoinRelationSet::EnumerateRelations(node->relations, [&](idx_t relation_id) { + is_excluded |= exclusion_set.find(relation_id) != exclusion_set.end(); + }); + return is_excluded; } const vector QueryGraphEdges::GetNeighbors(JoinRelationSet &node, unordered_set &exclusion_set) const { diff --git a/src/optimizer/join_order/query_graph_manager.cpp b/src/optimizer/join_order/query_graph_manager.cpp index 3a5214d2c206..9b68ec53c73e 100644 --- a/src/optimizer/join_order/query_graph_manager.cpp +++ b/src/optimizer/join_order/query_graph_manager.cpp @@ -97,10 +97,10 @@ void QueryGraphManager::CreateHyperGraphEdges() { // both the left and the right side have bindings // first create the relation sets, if they do not exist if (!filter_info->left_set) { - filter_info->left_set = &set_manager.GetJoinRelation(left_bindings); + filter_info->left_set = set_manager.GetJoinRelation(left_bindings).get(); } if (!filter_info->right_set) { - filter_info->right_set = &set_manager.GetJoinRelation(right_bindings); + filter_info->right_set = set_manager.GetJoinRelation(right_bindings).get(); } // we can only create a meaningful edge if the sets are not exactly the same if (filter_info->left_set != filter_info->right_set) { @@ -179,7 +179,7 @@ unique_ptr QueryGraphManager::Reconstruct(unique_ptr> extracted_relations; @@ -310,10 +310,10 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorset.count == 1); + D_ASSERT(node->set.Count() == 1); D_ASSERT(extracted_relations[node->set.relations[0]]); result_relation = &node->set; result_operator = std::move(extracted_relations[result_relation->relations[0]]); @@ -333,7 +333,7 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorfilter) { // now check if the filter is a subset of the current relation // note that infos with an empty relation set are a special case and we do not push them down - if (info.set.get().count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) { + if (info.set.get().Count() > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) { auto &filter_and_binding = filters_and_bindings[info.filter_index]; auto filter = std::move(filter_and_binding->filter); // if it is, we can push the filter diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 17615fb35eb4..e0e198a02357 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -473,7 +473,7 @@ bool RelationManager::ExtractBindings(Expression &expression, unordered_set> RelationManager::ExtractEdges(LogicalOperator &op, vector> &filter_operators, - JoinRelationSetManagerOld &set_manager) { + JoinRelationSetManager &set_manager) { // now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate // filters in the process vector> filters_and_bindings; @@ -504,9 +504,9 @@ vector> RelationManager::ExtractEdges(LogicalOperator &op // create the filter info so all required LHS relations are present when reconstructing the // join - optional_ptr left_set; - optional_ptr right_set; - optional_ptr full_set; + optional_ptr left_set; + optional_ptr right_set; + optional_ptr full_set; // here we create a left_set that unions all relations from the left side of // every expression and a right_set that unions all relations frmo the right side of a // every expression (although this should always be 1). @@ -518,20 +518,20 @@ vector> RelationManager::ExtractEdges(LogicalOperator &op ExtractBindings(*comp.left, left_bindings); if (!left_set) { - left_set = set_manager.GetJoinRelation(left_bindings); + left_set = set_manager.GetJoinRelation(left_bindings).get(); } else { - left_set = set_manager.Union(set_manager.GetJoinRelation(left_bindings), *left_set); + left_set = set_manager.Union(set_manager.GetJoinRelation(left_bindings), *left_set).get(); } if (!right_set) { - right_set = set_manager.GetJoinRelation(right_bindings); + right_set = set_manager.GetJoinRelation(right_bindings).get(); } else { - right_set = set_manager.Union(set_manager.GetJoinRelation(right_bindings), *right_set); + right_set = set_manager.Union(set_manager.GetJoinRelation(right_bindings), *right_set).get(); } } - full_set = set_manager.Union(*left_set, *right_set); - D_ASSERT(left_set && left_set->count > 0); - D_ASSERT(right_set && right_set->count == 1); - D_ASSERT(full_set && full_set->count > 0); + full_set = set_manager.Union(*left_set, *right_set).get(); + D_ASSERT(left_set && left_set->Count() > 0); + D_ASSERT(right_set && right_set->Count() == 1); + D_ASSERT(full_set && full_set->Count() > 0); // now we push the conjunction expressions // In QueryGraphManager::GenerateJoins we extract each condition again and create a standalone join @@ -551,7 +551,7 @@ vector> RelationManager::ExtractEdges(LogicalOperator &op filter_set.insert(*comparison); unordered_set bindings; ExtractBindings(*comparison, bindings); - auto &set = set_manager.GetJoinRelation(bindings); + auto set = set_manager.GetJoinRelation(bindings); auto filter_info = make_uniq(std::move(comparison), set, filters_and_bindings.size(), join.join_type); filters_and_bindings.push_back(std::move(filter_info)); @@ -572,7 +572,7 @@ vector> RelationManager::ExtractEdges(LogicalOperator &op leftover_expressions.push_back(std::move(expression)); continue; } - auto &set = set_manager.GetJoinRelation(bindings); + auto set = set_manager.GetJoinRelation(bindings); auto filter_info = make_uniq(std::move(expression), set, filters_and_bindings.size()); filters_and_bindings.push_back(std::move(filter_info)); } From d0ab9f5f7afd7ff2de21c553efc8bfd5f1a5059f Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 20 Dec 2024 17:18:46 -0800 Subject: [PATCH 5/5] getting there, but need to figure out exclusion set stuff again --- .../optimizer/join_order/join_relation.hpp | 1 + .../join_order/join_relation_set.cpp | 13 ++++++++---- src/optimizer/join_order/plan_enumerator.cpp | 21 +++++++++++++------ src/optimizer/join_order/query_graph.cpp | 10 ++++----- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/include/duckdb/optimizer/join_order/join_relation.hpp b/src/include/duckdb/optimizer/join_order/join_relation.hpp index b66cbcb8396c..d5a898fc05d2 100644 --- a/src/include/duckdb/optimizer/join_order/join_relation.hpp +++ b/src/include/duckdb/optimizer/join_order/join_relation.hpp @@ -26,6 +26,7 @@ struct JoinRelationSet { string ToString() const; idx_t Count() const; + idx_t NextNeighbor(idx_t i); std::bitset<12> relations; static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub); diff --git a/src/optimizer/join_order/join_relation_set.cpp b/src/optimizer/join_order/join_relation_set.cpp index 81ea5251e6d0..4135709b9ce3 100644 --- a/src/optimizer/join_order/join_relation_set.cpp +++ b/src/optimizer/join_order/join_relation_set.cpp @@ -1,10 +1,6 @@ #include "duckdb/optimizer/join_order/join_relation.hpp" -#include "duckdb/common/printer.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/to_string.hpp" #include -#include namespace duckdb { @@ -60,6 +56,15 @@ idx_t JoinRelationSet::Count() const { return count; } +idx_t JoinRelationSet::NextNeighbor(idx_t i) { + for (idx_t j = 0; j < i; j++) { + if (relations[j]) { + return j; + } + } + return DConstants::INVALID_INDEX; +} + JoinRelationSet JoinRelationSet::Copy() const { JoinRelationSet result; result.relations = relations; diff --git a/src/optimizer/join_order/plan_enumerator.cpp b/src/optimizer/join_order/plan_enumerator.cpp index 03c67902d1ce..4d4248b37ad4 100644 --- a/src/optimizer/join_order/plan_enumerator.cpp +++ b/src/optimizer/join_order/plan_enumerator.cpp @@ -34,8 +34,10 @@ static vector> AddSuperSets(const vector node, unordered_set &exclusion_set) { - for (idx_t i = 0; i < node->Count(); i++) { - exclusion_set.insert(node->relations[i]); + for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { + if (node->relations[i]) { + exclusion_set.insert(i); + } } } @@ -187,9 +189,15 @@ bool PlanEnumerator::EmitCSG(JoinRelationSet &node) { } // create the exclusion set as everything inside the subgraph AND anything with members BELOW it unordered_set exclusion_set; - for (idx_t i = 0; i < node.relations[0]; i++) { - exclusion_set.insert(i); + for (idx_t j = 0; j < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; ++j) { + if (node.relations[j]) { + for (idx_t i = 0; i < j; i++) { + exclusion_set.insert(i); + } + break; + } } + UpdateExclusionSet(&node, exclusion_set); // find the neighbors given this exclusion set auto neighbors = query_graph.GetNeighbors(node, exclusion_set); @@ -322,15 +330,16 @@ bool PlanEnumerator::SolveJoinOrderExactly() { // now we perform the actual dynamic programming to compute the final result // we enumerate over all the possible pairs in the neighborhood for (idx_t i = query_graph_manager.relation_manager.NumRelations(); i > 0; i--) { + auto relation_id = i - 1; // for every node in the set, we consider it as the start node once - auto start_node = query_graph_manager.set_manager.GetJoinRelation(i - 1); + auto start_node = query_graph_manager.set_manager.GetJoinRelation(relation_id); // emit the start node if (!EmitCSG(start_node)) { return false; } // initialize the set of exclusion_set as all the nodes with a number below this unordered_set exclusion_set; - for (idx_t j = 0; j < i; j++) { + for (idx_t j = 0; j < relation_id; j++) { exclusion_set.insert(j); } // then we recursively search for neighbors that do not belong to the banned entries diff --git a/src/optimizer/join_order/query_graph.cpp b/src/optimizer/join_order/query_graph.cpp index c19320ce65f5..70fcd3524adb 100644 --- a/src/optimizer/join_order/query_graph.cpp +++ b/src/optimizer/join_order/query_graph.cpp @@ -113,7 +113,6 @@ void QueryGraphEdges::EnumerateNeighbors(JoinRelationSet &node, //! Returns true if a JoinRelationSet is banned by the list of exclusion_set, false otherwise static bool JoinRelationSetIsExcluded(optional_ptr node, unordered_set &exclusion_set) { - // TODO: figure this one out. bool is_excluded = false; JoinRelationSet::EnumerateRelations(node->relations, [&](idx_t relation_id) { is_excluded |= exclusion_set.find(relation_id) != exclusion_set.end(); @@ -126,12 +125,11 @@ const vector QueryGraphEdges::GetNeighbors(JoinRelationSet &node, unorder EnumerateNeighbors(node, [&](NeighborInfo &info) -> bool { if (!JoinRelationSetIsExcluded(info.neighbor, exclusion_set)) { // add the smallest node of the neighbor to the set - for (idx_t i = 0; i < PlanEnumerator::THRESHOLD_TO_SWAP_TO_APPROXIMATE; i++) { - if (info.neighbor->relations[i]) { - result.insert(i); - break; + JoinRelationSet::EnumerateRelations(info.neighbor->relations, [&](idx_t relation_id) { + if (result.size() == 0) { + result.insert(relation_id); } - } + }); } return false; });