From 2bba91546cf9e2454042df62fc5d1ab478d167cb Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:14:19 +0000 Subject: [PATCH 1/5] calc_consensus --- src/consensus.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/consensus.cpp b/src/consensus.cpp index 21c1012a..5deb4114 100644 --- a/src/consensus.cpp +++ b/src/consensus.cpp @@ -6,7 +6,6 @@ using namespace Rcpp; #include /* for fill */ #include /* for array */ -#include /* for vector */ using TreeTools::ct_stack_size; using TreeTools::ct_stack_threshold; @@ -15,7 +14,7 @@ using TreeTools::ct_max_leaves_heap; // Helper template function to perform consensus computation // Uses StackContainer for the S array (either std::array or std::vector) template -RawMatrix consensus_tree_impl( +RawMatrix calc_consensus_tree( const List& trees, const NumericVector& p, StackContainer& S @@ -159,11 +158,11 @@ RawMatrix consensus_tree(const List trees, const NumericVector p) { if (n_tip <= ct_stack_threshold) { // Small tree: use stack-allocated array std::array S; - return consensus_tree_impl(trees, p, S); + return calc_consensus_tree(trees, p, S); } else { // Large tree: use heap-allocated vector std::vector S(ct_stack_size * n_tip); - return consensus_tree_impl(trees, p, S); + return calc_consensus_tree(trees, p, S); } } catch(const std::exception& e) { Rcpp::stop(e.what()); From e66ce4574a45f68a94b96970b456dfde5ef4b1db Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:27:57 +0000 Subject: [PATCH 2/5] StackEntries replace ct_stack_size --- inst/include/TreeTools/ClusterTable.h | 23 ---------- src/consensus.cpp | 66 ++++++++++++++------------- 2 files changed, 34 insertions(+), 55 deletions(-) diff --git a/inst/include/TreeTools/ClusterTable.h b/inst/include/TreeTools/ClusterTable.h index bbd3f5f8..48491e9f 100644 --- a/inst/include/TreeTools/ClusterTable.h +++ b/inst/include/TreeTools/ClusterTable.h @@ -13,23 +13,6 @@ #define UNINIT -999 #define INF TreeTools::INTX_MAX -#define CT_ASSERT_CAN_PUSH() \ - ASSERT(static_cast(Spos + CT_STACK_SIZE) <= S.size()) - -#define CT_ASSERT_CAN_POP() ASSERT(Spos >= CT_STACK_SIZE) - -#define CT_PUSH(a, b, c, d) \ - S[Spos++] = (a); \ - S[Spos++] = (b); \ - S[Spos++] = (c); \ - S[Spos++] = (d) - -#define CT_POP(a, b, c, d) \ - (d) = S[--Spos]; \ - (c) = S[--Spos]; \ - (b) = S[--Spos]; \ - (a) = S[--Spos] - #define CT_IS_LEAF(a) (a) <= n_tip namespace TreeTools { @@ -38,12 +21,6 @@ namespace TreeTools { inline constexpr int_fast32_t ct_stack_threshold = 8192; // New increased limit with heap allocation inline constexpr int_fast32_t ct_max_leaves_heap = 100000; - inline constexpr int_fast32_t ct_stack_size = 4; - - // Old hard limit, still used in TreeDist 2.12 - // TODO: Update TreeDist to use use heap where necessary - // NOTE: This constant is deprecated - new code should use ct_max_leaves_heap - inline constexpr int_fast32_t ct_max_leaves = 16383; template inline void resize_uninitialized(std::vector& v, std::size_t n) { diff --git a/src/consensus.cpp b/src/consensus.cpp index 5deb4114..4579c3d4 100644 --- a/src/consensus.cpp +++ b/src/consensus.cpp @@ -7,10 +7,11 @@ using namespace Rcpp; #include /* for fill */ #include /* for array */ -using TreeTools::ct_stack_size; using TreeTools::ct_stack_threshold; using TreeTools::ct_max_leaves_heap; +struct StackEntry { int32 L, R, N, W; }; + // Helper template function to perform consensus computation // Uses StackContainer for the S array (either std::array or std::vector) template @@ -38,11 +39,21 @@ RawMatrix calc_consensus_tree( const int32 ntip_3 = n_tip - 3; const int32 nbin = (n_tip + 7) / 8; // bytes per row in packed output - std::vector split_count(n_tip, 1); + int32* split_count; + std::array split_stack; + std::vector split_heap; + if (n_tip <= ct_stack_threshold) { + split_count = split_stack.data(); + } else { + split_heap.resize(n_tip); + split_count = split_heap.data(); + } + + StackEntry *const S_start = S.data(); // Packed output: each row has nbin bytes RawMatrix ret(ntip_3, nbin); - + int32 i = 0; int32 splits_found = 0; @@ -51,42 +62,39 @@ RawMatrix calc_consensus_tree( continue; } - std::fill(split_count.begin(), split_count.end(), 1); - + std::fill(split_count, split_count + n_tip, 1); + for (int32 j = i + 1; j < n_trees; ++j) { ASSERT(tables[i].N() == tables[j].N()); - + tables[i].CLEAR(); - + tables[j].TRESET(); tables[j].READT(&v, &w); int32 j_pos = 0; - int32 Spos = 0; // Empty the stack S. Used in CT_PUSH / CT_POP macros. + StackEntry* S_top = S_start; // Empty the stack S. do { if (CT_IS_LEAF(v)) { - CT_ASSERT_CAN_PUSH(); - CT_PUSH(tables[i].ENCODE(v), tables[i].ENCODE(v), 1, 1); + const auto enc_v = tables[i].ENCODE(v); + *S_top++ = {enc_v, enc_v, 1, 1}; } else { - CT_ASSERT_CAN_POP(); - CT_POP(L, R, N, W_j); + const StackEntry& entry = *--S_top; + L = entry.L; R = entry.R; N = entry.N; W_j = entry.W; W = 1 + W_j; w = w - W_j; - while (w) { - CT_ASSERT_CAN_POP(); - CT_POP(L_j, R_j, N_j, W_j); - if (L_j < L) L = L_j; - if (R_j > R) R = R_j; - N = N + N_j; - W = W + W_j; - w = w - W_j; + const StackEntry& next = *--S_top; + L = std::min(L, next.L); // Faster than ternary operator + R = std::max(R, next.R); + N += next.N; + W += next.W; + w -= next.W; } - CT_ASSERT_CAN_PUSH(); - CT_PUSH(L, R, N, W); + *S_top++ = {L, R, N, W}; ++j_pos; @@ -132,14 +140,8 @@ RawMatrix calc_consensus_tree( } } while (i++ != n_trees - thresh); // All clades in p% consensus must occur in first q% of trees. - if (splits_found == 0) { - return RawMatrix(0, nbin); - } else if (splits_found < ntip_3) { - // Return only the rows we filled - return ret(Range(0, splits_found - 1), _); - } else { - return ret; - } + return (splits_found == 0) ? RawMatrix(0, nbin) : + (splits_found < ntip_3) ? ret(Range(0, splits_found - 1), _) : ret; } // trees is a list of objects of class phylo, all with the same tip labels @@ -157,11 +159,11 @@ RawMatrix consensus_tree(const List trees, const NumericVector p) { if (n_tip <= ct_stack_threshold) { // Small tree: use stack-allocated array - std::array S; + std::array S; return calc_consensus_tree(trees, p, S); } else { // Large tree: use heap-allocated vector - std::vector S(ct_stack_size * n_tip); + std::vector S(n_tip); return calc_consensus_tree(trees, p, S); } } catch(const std::exception& e) { From f70b7ac29ece198af19184a5e62a84931802082f Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:29:55 +0000 Subject: [PATCH 3/5] v2.0.0.9004 --- DESCRIPTION | 2 +- NEWS.md | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d5bf3c6f..c0eb72be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeTools Title: Create, Modify and Analyse Phylogenetic Trees -Version: 2.0.0.9003 +Version: 2.0.0.9004 Authors@R: c( person("Martin R.", 'Smith', role = c("aut", "cre", "cph"), email = "martin.smith@durham.ac.uk", diff --git a/NEWS.md b/NEWS.md index 52d1b115..a0069765 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ -# TreeTools 2.0.0.9003 (development) # +# TreeTools 2.0.0.9004 (development) # +- Support larger trees in ClusterTable objects + * Retires `CT_PUSH` and `CT_POP` macros. - Support larger trees in `Consensus()`. - Uses 32-bit integers, necessitating downstream changes to TreeDist. + * Uses 32-bit integers, necessitating downstream changes to TreeDist. # TreeTools 2.0.0.9001 (development) # - Remove hard limit on tree size in `SplitList`. From 1503d3f7b56bc6a8dee5e5df1a6db8617fa2918a Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:38:20 +0000 Subject: [PATCH 4/5] *_j unused --- src/consensus.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/consensus.cpp b/src/consensus.cpp index 4579c3d4..e3502992 100644 --- a/src/consensus.cpp +++ b/src/consensus.cpp @@ -23,7 +23,6 @@ RawMatrix calc_consensus_tree( int32 v = 0; int32 w = 0; int32 L, R, N, W; - int32 L_j, R_j, N_j, W_j; const int32 n_trees = trees.length(); const int32 frac_thresh = int32(n_trees * p[0]) + 1; @@ -73,7 +72,7 @@ RawMatrix calc_consensus_tree( tables[j].READT(&v, &w); int32 j_pos = 0; - StackEntry* S_top = S_start; // Empty the stack S. + StackEntry* S_top = S_start; // Empty the stack S do { if (CT_IS_LEAF(v)) { @@ -81,10 +80,9 @@ RawMatrix calc_consensus_tree( *S_top++ = {enc_v, enc_v, 1, 1}; } else { const StackEntry& entry = *--S_top; - L = entry.L; R = entry.R; N = entry.N; W_j = entry.W; - - W = 1 + W_j; - w = w - W_j; + L = entry.L; R = entry.R; N = entry.N; + W = 1 + entry.W; + w -= entry.W; while (w) { const StackEntry& next = *--S_top; L = std::min(L, next.L); // Faster than ternary operator From 42c9db661134781e9837c1f15a7899305a959a7e Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:40:17 +0000 Subject: [PATCH 5/5] encode CT --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a0069765..a81f8e99 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ # TreeTools 2.0.0.9004 (development) # -- Support larger trees in ClusterTable objects +- Support larger trees in `ClusterTable` objects. * Retires `CT_PUSH` and `CT_POP` macros. - Support larger trees in `Consensus()`. * Uses 32-bit integers, necessitating downstream changes to TreeDist.