From 6a08ae0afc40490e616d4da42dc6e07ac5769048 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 30 Jul 2025 21:47:38 -0400
Subject: [PATCH 1/9] Add build instructions and printouts for C++ demo

---
 README.md       |  28 ++++-
 simple_mcts.cpp | 324 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 351 insertions(+), 1 deletion(-)
 create mode 100644 simple_mcts.cpp
diff --git a/README.md b/README.md
index d70bdb6..d65fffd 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,33 @@ cd alphamaplesat
 python -u main.py "constraints_17_c_100000_2_2_0_final.simp" -d 1 -m 136 -o "test.cubes" -prod
 ```
 
-This command will generate cubes from the specified constraints file (provided as an example in the repo), using a depth of 1 and a maximum of 136 variables and outputting to `test.cubes`. 
+This command will generate cubes from the specified constraints file (provided as an example in the repo), using a depth of 1 and a maximum of 136 variables and outputting to `test.cubes`.
+
+## C++ cubing demo
+
+A lightweight C++ version of the cubing procedure is provided in `simple_mcts.cpp`.
+Compile it with a C++17 compiler:
+
+```bash
+g++ -std=c++17 -O2 simple_mcts.cpp -o simple_mcts
+```
+
+Run the tool on the sample CNF file:
+
+```bash
+./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 200 -o out.cubes -numMCTSSims 1
+```
+
+Typical output looks like:
+
+```
+200 variables will be considered for cubing
+No. of free variables: 100
+Saved cubes to file  out.cubes
+Time taken for cubing:  0.016
+Number of nodes:  3
+Tool runtime:  0.837
+```
 
 ## License
 
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
new file mode 100644
index 0000000..def2c9f
--- /dev/null
+++ b/simple_mcts.cpp
@@ -0,0 +1,324 @@
+#include <bits/stdc++.h>
+using namespace std;
+
+/**
+ * Simplified C++ version of AlphaMapleSAT cubing.
+ * This implementation focuses on argument parsing, CNF parsing,
+ * a basic unit propagation based on the provided BCP routines,
+ * and a depth-first search that explores variable assignments.
+ * It does not implement the full MCTS algorithm but mimics the
+ * cube generation logic of the Python tool.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cstdint>
+#include <vector>
+#include <string>
+#include <fstream>
+#include <algorithm>
+#include <chrono>
+
+// ----------------- BCP ROUTINES -----------------
+// The following code is adapted from the provided BCP snippet.
+
+#define MAX_VARS 20000
+#define MAX_CLAUSES 1000000
+#define MAX_LITS 1000000
+#define BIMP_GROW_CHUNK 4
+#define ASSIGN_NONE 0
+#define ASSIGN_TRUE 1
+#define ASSIGN_FALSE 2
+
+int n_vars, n_clauses;
+int *clauses[MAX_CLAUSES];
+int clause_sizes[MAX_CLAUSES];
+int clause_stamp[MAX_CLAUSES];
+int *watch1[MAX_CLAUSES];
+int *watch2[MAX_CLAUSES];
+
+uint8_t assignments[MAX_VARS + 1];
+int current_stamp = 1;
+
+// Binary implication arrays
+int *bimp[MAX_VARS * 2 + 2];
+int bimp_size[MAX_VARS * 2 + 2];
+int bimp_capacity[MAX_VARS * 2 + 2];
+
+int var_activity[MAX_VARS + 1];
+int global_queue[MAX_VARS + 1];
+
+static inline int lit_index(int lit) {
+    return (lit > 0) ? lit : n_vars - lit;
+}
+
+static inline void reset_assignments() {
+    memset(assignments, ASSIGN_NONE, sizeof(uint8_t) * (n_vars + 1));
+    current_stamp++;
+}
+
+void add_bimp(int lit, int implied) {
+    int idx = lit_index(lit);
+    if (bimp_capacity[idx] == 0) {
+        bimp_capacity[idx] = BIMP_GROW_CHUNK;
+        bimp[idx] = (int*)malloc(sizeof(int) * bimp_capacity[idx]);
+    } else if (bimp_size[idx] >= bimp_capacity[idx]) {
+        bimp_capacity[idx] *= 2;
+        bimp[idx] = (int*)realloc(bimp[idx], sizeof(int) * bimp_capacity[idx]);
+    }
+    bimp[idx][bimp_size[idx]++] = implied;
+}
+
+void parse_cnf(const char *filename) {
+    FILE *fp = fopen(filename, "r");
+    if (!fp) {
+        perror("Error opening file");
+        exit(1);
+    }
+    char line[10000];
+
+    int clause_index = 0;
+    while (fgets(line, sizeof(line), fp)) {
+        if (line[0] == 'p') {
+            sscanf(line, "p cnf %d %d", &n_vars, &n_clauses);
+        } else if (line[0] != 'c') {
+            int lits[1000], size = 0, lit;
+            char *ptr = line;
+            while (sscanf(ptr, "%d", &lit) == 1 && lit != 0) {
+                lits[size++] = lit;
+                ptr = strchr(ptr, ' ');
+                if (!ptr) break;
+                ptr++;
+            }
+            clauses[clause_index] = (int *)malloc(sizeof(int) * size);
+            memcpy(clauses[clause_index], lits, sizeof(int) * size);
+            clause_sizes[clause_index] = size;
+            clause_stamp[clause_index] = 0;
+            if (size >= 2) {
+                watch1[clause_index] = &clauses[clause_index][0];
+                watch2[clause_index] = &clauses[clause_index][1];
+            }
+            if (size == 2) {
+                add_bimp(-lits[0], lits[1]);
+                add_bimp(-lits[1], lits[0]);
+            }
+            if (size <= 3) {
+                for (int j = 0; j < size; j++)
+                    var_activity[abs(lits[j])]++;
+            }
+            clause_index++;
+        }
+    }
+    n_clauses = clause_index;
+    fclose(fp);
+}
+
+int propagate_bimp(int lit, std::vector<int>& propagated) {
+    int front = 0, rear = 0;
+    global_queue[rear++] = lit;
+
+    while (front < rear) {
+        int l = global_queue[front++];
+        int var = abs(l);
+        if (assignments[var] != ASSIGN_NONE) {
+            if ((assignments[var] == ASSIGN_TRUE && l < 0) ||
+                (assignments[var] == ASSIGN_FALSE && l > 0))
+                return 0; // conflict
+            continue;
+        }
+        assignments[var] = (l > 0) ? ASSIGN_TRUE : ASSIGN_FALSE;
+        propagated.push_back(var);
+
+        int idx = lit_index(l);
+        for (int i = 0; i < bimp_size[idx]; i++) {
+            global_queue[rear++] = bimp[idx][i];
+        }
+    }
+    return 1;
+}
+
+int propagate_big_clauses(std::vector<int>& propagated) {
+    for (int i = 0; i < n_clauses; i++) {
+        if (clause_sizes[i] <= 2 || clause_stamp[i] == current_stamp) continue;
+        clause_stamp[i] = current_stamp;
+
+        int *lits = clauses[i];
+        int sat = 0, unassigned = 0, last_unassigned = 0;
+
+        for (int j = 0; j < clause_sizes[i]; j++) {
+            int lit = lits[j];
+            int var = abs(lit);
+            if (assignments[var] == ASSIGN_NONE) {
+                unassigned++;
+                last_unassigned = lit;
+            } else if ((assignments[var] == ASSIGN_TRUE && lit > 0) ||
+                       (assignments[var] == ASSIGN_FALSE && lit < 0)) {
+                sat = 1;
+                break;
+            }
+        }
+
+        if (!sat && unassigned == 0) return 0;
+        if (!sat && unassigned == 1) {
+            int v = abs(last_unassigned);
+            assignments[v] = (last_unassigned > 0) ? ASSIGN_TRUE : ASSIGN_FALSE;
+            propagated.push_back(v);
+        }
+    }
+    return 1;
+}
+
+int unit_propagation(int lit, std::vector<int>& propagated) {
+    propagated.clear();
+    if (!propagate_bimp(lit, propagated)) return 0;
+    return propagate_big_clauses(propagated);
+}
+
+// --------------- END BCP ROUTINES ---------------
+
+struct Options {
+    std::string filename;
+    int n_cutoff = -1;
+    int d_cutoff = -1;
+    int m_vars = 0;
+    std::string out_file;
+    int num_sims = 10;
+};
+
+/** Parse command line arguments.
+ * Only minimal checks are performed. */
+Options parse_args(int argc, char** argv) {
+    Options opt;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "-n" && i+1 < argc) {
+            opt.n_cutoff = atoi(argv[++i]);
+        } else if (arg == "-d" && i+1 < argc) {
+            opt.d_cutoff = atoi(argv[++i]);
+        } else if (arg == "-m" && i+1 < argc) {
+            opt.m_vars = atoi(argv[++i]);
+        } else if (arg == "-o" && i+1 < argc) {
+            opt.out_file = argv[++i];
+        } else if (arg == "-numMCTSSims" && i+1 < argc) {
+            opt.num_sims = atoi(argv[++i]);
+        } else if (arg[0] != '-') {
+            opt.filename = arg;
+        }
+    }
+    return opt;
+}
+
+// Store a single cube
+struct Cube {
+    std::vector<int> lits;
+};
+
+int node_count = 0;
+
+// Basic DFS that explores assignments up to depth/n_cutoff
+void dfs(std::vector<int>& path, int depth, int& count, Options& opt,
+         const std::vector<int>& vars, std::vector<Cube>& cubes) {
+    node_count++;
+    if ((opt.d_cutoff != -1 && depth >= opt.d_cutoff) ||
+        (opt.n_cutoff != -1 && (int)path.size() >= opt.n_cutoff)) {
+        cubes.push_back({path});
+        return;
+    }
+    if (depth >= (int)vars.size()) {
+        cubes.push_back({path});
+        return;
+    }
+    int var = vars[depth];
+    for (int val = 0; val < 2; ++val) {
+        int lit = val ? var : -var;
+        path.push_back(lit);
+        count++;
+        dfs(path, depth + 1, count, opt, vars, cubes);
+        path.pop_back();
+    }
+}
+
+// Score variables using propagation as in the provided snippet
+std::vector<int> preselect_vars(int M) {
+    std::vector<int> selected;
+    int scores[MAX_VARS + 1][2] = {0};
+
+    for (int v = 1; v <= n_vars && v <= M; v++) {
+        std::vector<int> propagated;
+        int pos = 0, neg = 0;
+
+        reset_assignments();
+        if (unit_propagation(v, propagated)) pos = propagated.size();
+
+        reset_assignments();
+        if (unit_propagation(-v, propagated)) neg = propagated.size();
+
+        scores[v][0] = v;
+        scores[v][1] = pos * neg + 10 * (pos + neg) +
+                       100 * (bimp_size[lit_index(v)] + bimp_size[lit_index(-v)]) +
+                       var_activity[v] * 5;
+    }
+
+    std::vector<std::pair<int,int>> vec;
+    for (int v = 1; v <= n_vars && v <= M; ++v) {
+        if (scores[v][1] > 0) vec.push_back({scores[v][1], v});
+    }
+    std::sort(vec.begin(), vec.end(), std::greater<>());
+    for (auto &p : vec) selected.push_back(p.second);
+    return selected;
+}
+
+int count_free_vars(int M) {
+    int cnt = 0;
+    for (int v = 1; v <= n_vars && v <= M; ++v) {
+        if (bimp_size[lit_index(v)] > 0 || bimp_size[lit_index(-v)] > 0) cnt++;
+    }
+    return cnt;
+}
+
+int main(int argc, char** argv) {
+    auto total_start = std::chrono::high_resolution_clock::now();
+    Options opt = parse_args(argc, argv);
+    if (opt.filename.empty()) {
+        printf("CNF file not specified\n");
+        return 1;
+    }
+    parse_cnf(opt.filename.c_str());
+
+    printf("%d variables will be considered for cubing\n", opt.m_vars);
+    printf("No. of free variables: %d\n", count_free_vars(opt.m_vars));
+
+    // Preselect variables using the heuristic
+    std::vector<int> vars = preselect_vars(opt.m_vars);
+
+    // Simple DFS based cube generation
+    auto cubing_start = std::chrono::high_resolution_clock::now();
+    std::vector<Cube> cubes;
+    std::vector<int> path;
+    int count = 0;
+    for (int i = 0; i < opt.num_sims; ++i) {
+        dfs(path, 0, count, opt, vars, cubes);
+    }
+    auto cubing_end = std::chrono::high_resolution_clock::now();
+
+    if (!opt.out_file.empty()) {
+        std::ofstream out(opt.out_file);
+        for (auto &c : cubes) {
+            for (int lit : c.lits) out << lit << " ";
+            out << "0\n";
+        }
+        printf("Saved cubes to file  %s\n", opt.out_file.c_str());
+    }
+
+    double cubing_time = std::chrono::duration<double>(cubing_end - cubing_start).count();
+    printf("Time taken for cubing:  %.3f\n", cubing_time);
+    printf("Number of nodes:  %d\n", node_count);
+
+    double total_time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - total_start).count();
+    printf("Tool runtime:  %.3f\n", total_time);
+
+    printf("Generated %zu cubes\n", cubes.size());
+    return 0;
+}
+

From 5ebc00b1bd3b21f5ac1bc6ff9102f44486baccec Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 30 Jul 2025 22:03:28 -0400
Subject: [PATCH 2/9] Implement basic MCTS in C++ demo

---
 README.md       |  17 ++++---
 simple_mcts.cpp | 133 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 112 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index d65fffd..824d7de 100644
--- a/README.md
+++ b/README.md
@@ -28,27 +28,28 @@ This command will generate cubes from the specified constraints file (provided a
 ## C++ cubing demo
 
 A lightweight C++ version of the cubing procedure is provided in `simple_mcts.cpp`.
-Compile it with a C++17 compiler:
+The program implements a tiny MCTS engine that selects variable assignments to
+form a cube. Compile it with a C++17 compiler:
 
 ```bash
 g++ -std=c++17 -O2 simple_mcts.cpp -o simple_mcts
 ```
 
-Run the tool on the sample CNF file:
+Run the tool on the sample CNF file with a handful of search simulations:
 
 ```bash
-./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 200 -o out.cubes -numMCTSSims 1
+./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 20 -o out.cubes -numMCTSSims 2
 ```
 
 Typical output looks like:
 
 ```
-200 variables will be considered for cubing
-No. of free variables: 100
+20 variables will be considered for cubing
+No. of free variables: 3
 Saved cubes to file  out.cubes
-Time taken for cubing:  0.016
-Number of nodes:  3
-Tool runtime:  0.837
+Time taken for cubing:  0.000
+Number of nodes:  23
+Tool runtime:  0.681
 ```
 
 ## License
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index def2c9f..d55380f 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -5,9 +5,10 @@ using namespace std;
  * Simplified C++ version of AlphaMapleSAT cubing.
  * This implementation focuses on argument parsing, CNF parsing,
  * a basic unit propagation based on the provided BCP routines,
- * and a depth-first search that explores variable assignments.
- * It does not implement the full MCTS algorithm but mimics the
- * cube generation logic of the Python tool.
+ * and a Monte Carlo Tree Search (MCTS) routine that explores
+ * variable assignments. The implementation is intentionally
+ * lightweight but demonstrates the core ideas behind the
+ * MCTS-based cubing strategy used in AlphaMapleSAT.
  */
 
 #include <cstdio>
@@ -214,30 +215,105 @@ struct Cube {
     std::vector<int> lits;
 };
 
-int node_count = 0;
+// ------------------------------------------------------------
+// Minimal MCTS implementation for exploring variable literals
+// ------------------------------------------------------------
+
+struct MCTSNode {
+    std::vector<int> path;   // literals assigned so far
+    MCTSNode* child[2] = {nullptr, nullptr};
+    double Q[2] = {0.0, 0.0};
+    int N[2] = {0, 0};
+    bool terminal = false;
+    int depth = 0;
+    MCTSNode(const std::vector<int>& p, int d, bool term)
+        : path(p), terminal(term), depth(d) {}
+};
 
-// Basic DFS that explores assignments up to depth/n_cutoff
-void dfs(std::vector<int>& path, int depth, int& count, Options& opt,
-         const std::vector<int>& vars, std::vector<Cube>& cubes) {
-    node_count++;
-    if ((opt.d_cutoff != -1 && depth >= opt.d_cutoff) ||
-        (opt.n_cutoff != -1 && (int)path.size() >= opt.n_cutoff)) {
-        cubes.push_back({path});
-        return;
+struct MCTS {
+    const std::vector<int>& vars;
+    const Options& opt;
+    double cpuct = 1.4;
+    MCTSNode* root;
+    int node_created = 0;
+
+    MCTS(const std::vector<int>& v, const Options& o)
+        : vars(v), opt(o) {
+        root = new MCTSNode({}, 0, is_terminal(0, {}));
+        node_created = 1;
     }
-    if (depth >= (int)vars.size()) {
-        cubes.push_back({path});
-        return;
+
+    bool is_terminal(int depth, const std::vector<int>& path) const {
+        if (opt.d_cutoff != -1 && depth >= opt.d_cutoff) return true;
+        if (opt.n_cutoff != -1 && (int)path.size() >= opt.n_cutoff) return true;
+        if (depth >= (int)vars.size()) return true;
+        return false;
     }
-    int var = vars[depth];
-    for (int val = 0; val < 2; ++val) {
-        int lit = val ? var : -var;
-        path.push_back(lit);
-        count++;
-        dfs(path, depth + 1, count, opt, vars, cubes);
-        path.pop_back();
+
+    double rollout(MCTSNode* node) const {
+        return (double)node->path.size();
     }
-}
+
+    void expand(MCTSNode* node) {
+        if (node->terminal) return;
+        int var = vars[node->depth];
+        for (int a = 0; a < 2; ++a) {
+            auto p = node->path;
+            int lit = a ? var : -var;
+            p.push_back(lit);
+            bool term = is_terminal(node->depth + 1, p);
+            node->child[a] = new MCTSNode(p, node->depth + 1, term);
+            node_created++;
+        }
+    }
+
+    double search(MCTSNode* node) {
+        if (node->terminal) {
+            return rollout(node);
+        }
+        if (node->child[0] == nullptr) {
+            expand(node);
+            return rollout(node);
+        }
+
+        int totalN = node->N[0] + node->N[1];
+        double bestU = -1e9;
+        int bestA = 0;
+        for (int a = 0; a < 2; ++a) {
+            double u = node->Q[a] + cpuct * sqrt((double)(totalN + 1e-6)) / (1 + node->N[a]);
+            if (u > bestU) {
+                bestU = u;
+                bestA = a;
+            }
+        }
+        double v = search(node->child[bestA]);
+        node->N[bestA]++;
+        node->Q[bestA] += (v - node->Q[bestA]) / node->N[bestA];
+        return v;
+    }
+
+    std::vector<int> run() {
+        for (int i = 0; i < opt.num_sims; ++i) search(root);
+        std::vector<int> best;
+        MCTSNode* node = root;
+        while (!node->terminal) {
+            int a = node->N[1] > node->N[0] ? 1 : 0;
+            if (node->child[a] == nullptr) {
+                int var = vars[node->depth];
+                auto p = node->path;
+                int lit_tmp = a ? var : -var;
+                p.push_back(lit_tmp);
+                bool term = is_terminal(node->depth + 1, p);
+                node->child[a] = new MCTSNode(p, node->depth + 1, term);
+                node_created++;
+            }
+            int lit = a ? vars[node->depth] : -vars[node->depth];
+            best.push_back(lit);
+            node = node->child[a];
+        }
+        return best;
+    }
+};
 
 // Score variables using propagation as in the provided snippet
 std::vector<int> preselect_vars(int M) {
@@ -292,14 +368,11 @@ int main(int argc, char** argv) {
     // Preselect variables using the heuristic
     std::vector<int> vars = preselect_vars(opt.m_vars);
 
-    // Simple DFS based cube generation
+    // MCTS based cube generation
     auto cubing_start = std::chrono::high_resolution_clock::now();
+    MCTS mcts(vars, opt);
     std::vector<Cube> cubes;
-    std::vector<int> path;
-    int count = 0;
-    for (int i = 0; i < opt.num_sims; ++i) {
-        dfs(path, 0, count, opt, vars, cubes);
-    }
+    cubes.push_back({mcts.run()});
     auto cubing_end = std::chrono::high_resolution_clock::now();
 
     if (!opt.out_file.empty()) {
@@ -313,7 +386,7 @@ int main(int argc, char** argv) {
 
     double cubing_time = std::chrono::duration<double>(cubing_end - cubing_start).count();
     printf("Time taken for cubing:  %.3f\n", cubing_time);
-    printf("Number of nodes:  %d\n", node_count);
+    printf("Number of nodes:  %d\n", mcts.node_created);
 
     double total_time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - total_start).count();
     printf("Tool runtime:  %.3f\n", total_time);

From 2d80ca3278535243d4e74490ef6663b70b2dc192 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 30 Jul 2025 23:20:34 -0400
Subject: [PATCH 3/9] Enhance C++ demo output and timing

---
 README.md       |  11 +++++-
 simple_mcts.cpp | 100 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 824d7de..ecb0207 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,15 @@ Typical output looks like:
 ```
 20 variables will be considered for cubing
 No. of free variables: 3
-Saved cubes to file  out.cubes
-Time taken for cubing:  0.000
+Free variables: 1 3 5
+Variable ranking (var:score):
+1. 5:323
+2. 3:200
+Parsing time: 0.001
+Scoring time: 0.002
+MCTS time: 0.001
+Cube gen time: 0.000
+Write time: 0.000
 Number of nodes:  23
 Tool runtime:  0.681
 ```
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index d55380f..27755f5 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -54,6 +54,10 @@ static inline int lit_index(int lit) {
     return (lit > 0) ? lit : n_vars - lit;
 }
 
+static inline bool is_preselected(int var) {
+    return bimp_size[lit_index(var)] > 0 || bimp_size[lit_index(-var)] > 0;
+}
+
 static inline void reset_assignments() {
     memset(assignments, ASSIGN_NONE, sizeof(uint8_t) * (n_vars + 1));
     current_stamp++;
@@ -316,8 +320,8 @@ struct MCTS {
 };
 
 // Score variables using propagation as in the provided snippet
-std::vector<int> preselect_vars(int M) {
-    std::vector<int> selected;
+std::vector<std::pair<int,int>> preselect_vars(int M) {
+    std::vector<std::pair<int,int>> ranked;
     int scores[MAX_VARS + 1][2] = {0};
 
     for (int v = 1; v <= n_vars && v <= M; v++) {
@@ -336,23 +340,50 @@ std::vector<int> preselect_vars(int M) {
                        var_activity[v] * 5;
     }
 
-    std::vector<std::pair<int,int>> vec;
     for (int v = 1; v <= n_vars && v <= M; ++v) {
-        if (scores[v][1] > 0) vec.push_back({scores[v][1], v});
+        if (scores[v][1] > 0) ranked.push_back({v, scores[v][1]});
     }
-    std::sort(vec.begin(), vec.end(), std::greater<>());
-    for (auto &p : vec) selected.push_back(p.second);
-    return selected;
+    std::sort(ranked.begin(), ranked.end(), [](auto &a, auto &b){return a.second > b.second;});
+    return ranked;
 }
 
 int count_free_vars(int M) {
     int cnt = 0;
     for (int v = 1; v <= n_vars && v <= M; ++v) {
-        if (bimp_size[lit_index(v)] > 0 || bimp_size[lit_index(-v)] > 0) cnt++;
+        if (is_preselected(v)) cnt++;
     }
     return cnt;
 }
 
+std::vector<int> list_free_vars(int M) {
+    std::vector<int> lst;
+    for (int v = 1; v <= n_vars && v <= M; ++v) {
+        if (is_preselected(v)) lst.push_back(v);
+    }
+    return lst;
+}
+
+void enumerate_cubes_rec(const std::vector<int>& vars, int idx, std::vector<int>& cur,
+                         std::vector<Cube>& cubes) {
+    if (idx == (int)vars.size()) {
+        cubes.push_back({cur});
+        return;
+    }
+    int v = vars[idx];
+    cur.push_back(-v);
+    enumerate_cubes_rec(vars, idx + 1, cur, cubes);
+    cur.back() = v;
+    enumerate_cubes_rec(vars, idx + 1, cur, cubes);
+    cur.pop_back();
+}
+
+std::vector<Cube> generate_cubes(const std::vector<int>& vars) {
+    std::vector<Cube> cubes;
+    std::vector<int> cur;
+    enumerate_cubes_rec(vars, 0, cur, cubes);
+    return cubes;
+}
+
 int main(int argc, char** argv) {
     auto total_start = std::chrono::high_resolution_clock::now();
     Options opt = parse_args(argc, argv);
@@ -360,34 +391,61 @@ int main(int argc, char** argv) {
         printf("CNF file not specified\n");
         return 1;
     }
+
+    auto io_start = std::chrono::high_resolution_clock::now();
     parse_cnf(opt.filename.c_str());
+    auto io_end = std::chrono::high_resolution_clock::now();
 
     printf("%d variables will be considered for cubing\n", opt.m_vars);
-    printf("No. of free variables: %d\n", count_free_vars(opt.m_vars));
+    auto free_vars = list_free_vars(opt.m_vars);
+    printf("No. of free variables: %zu\n", free_vars.size());
+    printf("Free variables:");
+    for (int v : free_vars) printf(" %d", v);
+    printf("\n");
+
+    auto score_start = std::chrono::high_resolution_clock::now();
+    auto ranked = preselect_vars(opt.m_vars);
+    auto score_end = std::chrono::high_resolution_clock::now();
+
+    printf("Variable ranking (var:score):\n");
+    for (size_t i = 0; i < ranked.size(); ++i) {
+        printf("%zu. %d:%d\n", i+1, ranked[i].first, ranked[i].second);
+    }
 
-    // Preselect variables using the heuristic
-    std::vector<int> vars = preselect_vars(opt.m_vars);
+    std::vector<int> vars;
+    for (auto &p : ranked) vars.push_back(p.first);
 
-    // MCTS based cube generation
-    auto cubing_start = std::chrono::high_resolution_clock::now();
+    auto mcts_start = std::chrono::high_resolution_clock::now();
     MCTS mcts(vars, opt);
-    std::vector<Cube> cubes;
-    cubes.push_back({mcts.run()});
-    auto cubing_end = std::chrono::high_resolution_clock::now();
+    auto best_path = mcts.run();
+    auto mcts_end = std::chrono::high_resolution_clock::now();
 
+    std::vector<int> abs_vars;
+    for (int lit : best_path) abs_vars.push_back(std::abs(lit));
+
+    auto cube_gen_start = std::chrono::high_resolution_clock::now();
+    std::vector<Cube> cubes = generate_cubes(abs_vars);
+    auto cube_gen_end = std::chrono::high_resolution_clock::now();
+
+    auto write_start = std::chrono::high_resolution_clock::now();
     if (!opt.out_file.empty()) {
         std::ofstream out(opt.out_file);
         for (auto &c : cubes) {
-            for (int lit : c.lits) out << lit << " ";
-            out << "0\n";
+            out << "a";
+            for (int lit : c.lits) out << " " << lit;
+            out << " 0\n";
         }
         printf("Saved cubes to file  %s\n", opt.out_file.c_str());
     }
+    auto write_end = std::chrono::high_resolution_clock::now();
 
-    double cubing_time = std::chrono::duration<double>(cubing_end - cubing_start).count();
-    printf("Time taken for cubing:  %.3f\n", cubing_time);
-    printf("Number of nodes:  %d\n", mcts.node_created);
+    printf("Parsing time: %.3f\n", std::chrono::duration<double>(io_end - io_start).count());
+    printf("Scoring time: %.3f\n", std::chrono::duration<double>(score_end - score_start).count());
+    printf("MCTS time: %.3f\n", std::chrono::duration<double>(mcts_end - mcts_start).count());
+    printf("Cube gen time: %.3f\n", std::chrono::duration<double>(cube_gen_end - cube_gen_start).count());
+    printf("Write time: %.3f\n", std::chrono::duration<double>(write_end - write_start).count());
 
+    printf("Number of nodes:  %d\n", mcts.node_created);
     double total_time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - total_start).count();
     printf("Tool runtime:  %.3f\n", total_time);
 

From d275af18bdc8b40e37c28bbc2d103183b58c79d3 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 30 Jul 2025 23:20:37 -0400
Subject: [PATCH 4/9] Add debug mode and timing summary

---
 README.md       | 18 +++++++++++++++---
 simple_mcts.cpp | 42 ++++++++++++++++++++++++++++--------------
 2 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index ecb0207..41d3b04 100644
--- a/README.md
+++ b/README.md
@@ -35,13 +35,14 @@ form a cube. Compile it with a C++17 compiler:
 g++ -std=c++17 -O2 simple_mcts.cpp -o simple_mcts
 ```
 
-Run the tool on the sample CNF file with a handful of search simulations:
+Run the tool on the sample CNF file with a handful of search simulations. Pass
+`-debug` to see detailed timings and rankings:
 
 ```bash
-./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 20 -o out.cubes -numMCTSSims 2
+./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 20 -o out.cubes -numMCTSSims 2 -debug
 ```
 
-Typical output looks like:
+Typical debug output looks like:
 
 ```
 20 variables will be considered for cubing
@@ -59,6 +60,17 @@ Number of nodes:  23
 Tool runtime:  0.681
 ```
 
+Without `-debug`, only a concise summary is printed:
+
+```
+20 variables will be considered for cubing
+No. of free variables: 3
+Saved cubes to file  out.cubes
+Time taken for cubing:  0.000
+Number of nodes:  23
+Tool runtime:  0.681
+```
+
 ## License
 
 This project is licensed under MIT license. See the LICENSE file for details.
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index 27755f5..8ceac89 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -189,6 +189,7 @@ struct Options {
     int m_vars = 0;
     std::string out_file;
     int num_sims = 10;
+    bool debug = false;
 };
 
 /** Parse command line arguments.
@@ -207,6 +208,8 @@ Options parse_args(int argc, char** argv) {
             opt.out_file = argv[++i];
         } else if (arg == "-numMCTSSims" && i+1 < argc) {
             opt.num_sims = atoi(argv[++i]);
+        } else if (arg == "-debug") {
+            opt.debug = true;
         } else if (arg[0] != '-') {
             opt.filename = arg;
         }
@@ -399,17 +402,20 @@ int main(int argc, char** argv) {
     printf("%d variables will be considered for cubing\n", opt.m_vars);
     auto free_vars = list_free_vars(opt.m_vars);
     printf("No. of free variables: %zu\n", free_vars.size());
-    printf("Free variables:");
-    for (int v : free_vars) printf(" %d", v);
-    printf("\n");
+    if (opt.debug) {
+        printf("Free variables:");
+        for (int v : free_vars) printf(" %d", v);
+        printf("\n");
+    }
 
     auto score_start = std::chrono::high_resolution_clock::now();
     auto ranked = preselect_vars(opt.m_vars);
     auto score_end = std::chrono::high_resolution_clock::now();
-
-    printf("Variable ranking (var:score):\n");
-    for (size_t i = 0; i < ranked.size(); ++i) {
-        printf("%zu. %d:%d\n", i+1, ranked[i].first, ranked[i].second);
+    if (opt.debug) {
+        printf("Variable ranking (var:score):\n");
+        for (size_t i = 0; i < ranked.size(); ++i) {
+            printf("%zu. %d:%d\n", i+1, ranked[i].first, ranked[i].second);
+        }
     }
 
     std::vector<int> vars;
@@ -438,18 +444,26 @@ int main(int argc, char** argv) {
         printf("Saved cubes to file  %s\n", opt.out_file.c_str());
     }
     auto write_end = std::chrono::high_resolution_clock::now();
+    double parse_time = std::chrono::duration<double>(io_end - io_start).count();
+    double score_time = std::chrono::duration<double>(score_end - score_start).count();
+    double mcts_time = std::chrono::duration<double>(mcts_end - mcts_start).count();
+    double cube_time = std::chrono::duration<double>(cube_gen_end - cube_gen_start).count();
+    double write_time = std::chrono::duration<double>(write_end - write_start).count();
+    if (opt.debug) {
+        printf("Parsing time: %.3f\n", parse_time);
+        printf("Scoring time: %.3f\n", score_time);
+        printf("MCTS time: %.3f\n", mcts_time);
+        printf("Cube gen time: %.3f\n", cube_time);
+        printf("Write time: %.3f\n", write_time);
+    }
 
-    printf("Parsing time: %.3f\n", std::chrono::duration<double>(io_end - io_start).count());
-    printf("Scoring time: %.3f\n", std::chrono::duration<double>(score_end - score_start).count());
-    printf("MCTS time: %.3f\n", std::chrono::duration<double>(mcts_end - mcts_start).count());
-    printf("Cube gen time: %.3f\n", std::chrono::duration<double>(cube_gen_end - cube_gen_start).count());
-    printf("Write time: %.3f\n", std::chrono::duration<double>(write_end - write_start).count());
+    double cubing_time = score_time + mcts_time + cube_time + write_time;
+    printf("Time taken for cubing:  %.3f\n", cubing_time);
 
     printf("Number of nodes:  %d\n", mcts.node_created);
     double total_time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - total_start).count();
     printf("Tool runtime:  %.3f\n", total_time);
-
-    printf("Generated %zu cubes\n", cubes.size());
+    if (opt.debug) printf("Generated %zu cubes\n", cubes.size());
     return 0;
 }
 

From 9605d1890ec1cba549b24f8401a096437ffe8ca1 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Mon, 16 Feb 2026 10:18:48 +0530
Subject: [PATCH 5/9] Fix free-variable ranking scope and debug timing
 accounting

---
 README.md       | 4 +++-
 simple_mcts.cpp | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 41d3b04..ff74484 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ g++ -std=c++17 -O2 simple_mcts.cpp -o simple_mcts
 ```
 
 Run the tool on the sample CNF file with a handful of search simulations. Pass
-`-debug` to see detailed timings and rankings:
+`-debug` to see detailed timings and rankings (rankings are printed only for free/preselected variables):
 
 ```bash
 ./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 20 -o out.cubes -numMCTSSims 2 -debug
@@ -56,6 +56,8 @@ Scoring time: 0.002
 MCTS time: 0.001
 Cube gen time: 0.000
 Write time: 0.000
+Accounted component time: 0.004
+Time taken for cubing:  0.003
 Number of nodes:  23
 Tool runtime:  0.681
 ```
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index 8ceac89..aaf2b85 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -322,12 +322,13 @@ struct MCTS {
     }
 };
 
-// Score variables using propagation as in the provided snippet
+// Score only "free" (preselected) variables using propagation.
 std::vector<std::pair<int,int>> preselect_vars(int M) {
     std::vector<std::pair<int,int>> ranked;
     int scores[MAX_VARS + 1][2] = {0};
 
     for (int v = 1; v <= n_vars && v <= M; v++) {
+        if (!is_preselected(v)) continue;
         std::vector<int> propagated;
         int pos = 0, neg = 0;
 
@@ -344,6 +345,7 @@ std::vector<std::pair<int,int>> preselect_vars(int M) {
     }
 
     for (int v = 1; v <= n_vars && v <= M; ++v) {
+        if (!is_preselected(v)) continue;
         if (scores[v][1] > 0) ranked.push_back({v, scores[v][1]});
     }
     std::sort(ranked.begin(), ranked.end(), [](auto &a, auto &b){return a.second > b.second;});
@@ -455,6 +457,8 @@ int main(int argc, char** argv) {
         printf("MCTS time: %.3f\n", mcts_time);
         printf("Cube gen time: %.3f\n", cube_time);
         printf("Write time: %.3f\n", write_time);
+        double accounted = parse_time + score_time + mcts_time + cube_time + write_time;
+        printf("Accounted component time: %.3f\n", accounted);
     }
 
     double cubing_time = score_time + mcts_time + cube_time + write_time;
@@ -466,4 +470,3 @@ int main(int argc, char** argv) {
     if (opt.debug) printf("Generated %zu cubes\n", cubes.size());
     return 0;
 }
-

From 2eff0dbb1596b61111c01df6a4b51058634dc809 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 18 Feb 2026 12:13:26 +0530
Subject: [PATCH 6/9] Add per-simulation MCTS debug tracing

---
 README.md       |  2 +-
 simple_mcts.cpp | 75 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index ff74484..3dd9e1a 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ g++ -std=c++17 -O2 simple_mcts.cpp -o simple_mcts
 ```
 
 Run the tool on the sample CNF file with a handful of search simulations. Pass
-`-debug` to see detailed timings and rankings (rankings are printed only for free/preselected variables):
+`-debug` to see detailed timings, per-simulation MCTS trace (UCT scores + chosen action), and rankings (rankings are printed only for free/preselected variables):
 
 ```bash
 ./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 20 -o out.cubes -numMCTSSims 2 -debug
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index aaf2b85..3ad1c03 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -261,7 +261,7 @@ struct MCTS {
         return (double)node->path.size();
     }
 
-    void expand(MCTSNode* node) {
+    void expand(MCTSNode* node, int sim_id, int trace_depth) {
         if (node->terminal) return;
         int var = vars[node->depth];
         for (int a = 0; a < 2; ++a) {
@@ -271,36 +271,80 @@ struct MCTS {
             bool term = is_terminal(node->depth + 1, p);
             node->child[a] = new MCTSNode(p, node->depth + 1, term);
             node_created++;
+            if (opt.debug) {
+                printf("[sim %d][depth %d] expand action=%d lit=%d terminal=%d\n",
+                       sim_id, trace_depth, a, lit, (int)term);
+            }
         }
     }
 
-    double search(MCTSNode* node) {
+    double search(MCTSNode* node, int sim_id, int trace_depth = 0) {
         if (node->terminal) {
-            return rollout(node);
+            double v = rollout(node);
+            if (opt.debug) {
+                printf("[sim %d][depth %d] terminal path_len=%zu value=%.3f\n",
+                       sim_id, trace_depth, node->path.size(), v);
+            }
+            return v;
         }
         if (node->child[0] == nullptr) {
-            expand(node);
-            return rollout(node);
+            expand(node, sim_id, trace_depth);
+            double v = rollout(node);
+            if (opt.debug) {
+                printf("[sim %d][depth %d] leaf rollout value=%.3f\n", sim_id, trace_depth, v);
+            }
+            return v;
         }
 
         int totalN = node->N[0] + node->N[1];
-        double bestU = -1e9;
-        int bestA = 0;
+        std::vector<std::tuple<double,int,int,double>> scored; // u, action, lit, q
+        scored.reserve(2);
         for (int a = 0; a < 2; ++a) {
-            double u = node->Q[a] + cpuct * sqrt((double)(totalN + 1e-6)) / (1 + node->N[a]);
-            if (u > bestU) {
-                bestU = u;
-                bestA = a;
+            int lit = a ? vars[node->depth] : -vars[node->depth];
+            double explore = cpuct * sqrt((double)(totalN + 1e-6)) / (1 + node->N[a]);
+            double u = node->Q[a] + explore;
+            scored.push_back({u, a, lit, node->Q[a]});
+        }
+        std::sort(scored.begin(), scored.end(), [](const auto& x, const auto& y) {
+            return std::get<0>(x) > std::get<0>(y);
+        });
+
+        int bestA = std::get<1>(scored[0]);
+        if (opt.debug) {
+            printf("[sim %d][depth %d] top UCT choices (up to 3):\n", sim_id, trace_depth);
+            for (size_t i = 0; i < scored.size() && i < 3; ++i) {
+                printf("  #%zu action=%d lit=%d UCT=%.6f Q=%.6f N=%d\n",
+                       i + 1,
+                       std::get<1>(scored[i]),
+                       std::get<2>(scored[i]),
+                       std::get<0>(scored[i]),
+                       std::get<3>(scored[i]),
+                       node->N[std::get<1>(scored[i])]);
             }
+            printf("[sim %d][depth %d] choose action=%d lit=%d\n",
+                   sim_id, trace_depth, bestA,
+                   bestA ? vars[node->depth] : -vars[node->depth]);
         }
-        double v = search(node->child[bestA]);
+
+        double v = search(node->child[bestA], sim_id, trace_depth + 1);
         node->N[bestA]++;
         node->Q[bestA] += (v - node->Q[bestA]) / node->N[bestA];
+        if (opt.debug) {
+            printf("[sim %d][depth %d] backprop action=%d newQ=%.6f newN=%d value=%.3f\n",
+                   sim_id, trace_depth, bestA, node->Q[bestA], node->N[bestA], v);
+        }
         return v;
     }
 
     std::vector<int> run() {
-        for (int i = 0; i < opt.num_sims; ++i) search(root);
+        for (int i = 0; i < opt.num_sims; ++i) {
+            if (opt.debug) printf("=== MCTS simulation %d/%d ===\n", i + 1, opt.num_sims);
+            double val = search(root, i + 1);
+            if (opt.debug) {
+                printf("[sim %d] finished with value=%.3f rootN=(%d,%d) rootQ=(%.6f,%.6f)\n",
+                       i + 1, val, root->N[0], root->N[1], root->Q[0], root->Q[1]);
+            }
+        }
         std::vector<int> best;
         MCTSNode* node = root;
         while (!node->terminal) {
@@ -318,6 +362,11 @@ struct MCTS {
             best.push_back(lit);
             node = node->child[a];
         }
+        if (opt.debug) {
+            printf("Final best path by visit count:");
+            for (int lit : best) printf(" %d", lit);
+            printf("\n");
+        }
         return best;
     }
 };

From a99f472461dc5e3fea3db5e130808e4ce9df3b51 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 18 Feb 2026 12:13:31 +0530
Subject: [PATCH 7/9] Refactor C++ search to variable-level forest MCTS

---
 README.md       |   8 +-
 simple_mcts.cpp | 274 +++++++++++++++++++++++++++++-------------------
 2 files changed, 170 insertions(+), 112 deletions(-)

diff --git a/README.md b/README.md
index 3dd9e1a..1d31943 100644
--- a/README.md
+++ b/README.md
@@ -48,9 +48,11 @@ Typical debug output looks like:
 20 variables will be considered for cubing
 No. of free variables: 3
 Free variables: 1 3 5
-Variable ranking (var:score):
-1. 5:323
-2. 3:200
+Variable ranking with normalization (var:raw:norm_raw:pos:neg:norm_pos:norm_neg):
+1. 5:323.000:1.000000:12:10:1.000000:0.833333
+2. 3:200.000:0.619195:7:8:0.583333:0.666667
+=== MCTS simulation 1/2 ===
+[sim 1][depth 0] top actions by UCT (up to 3):
 Parsing time: 0.001
 Scoring time: 0.002
 MCTS time: 0.001
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index 3ad1c03..0db96bc 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -238,100 +238,128 @@ struct MCTSNode {
 };
 
 struct MCTS {
-    const std::vector<int>& vars;
+    struct Node {
+        std::vector<int> chosen_vars;
+        std::vector<int> remaining_vars;
+        std::unordered_map<int, std::unique_ptr<Node>> child; // chosen var -> child node
+        std::unordered_map<int, double> Q; // action-value per variable
+        std::unordered_map<int, int> N;    // visit count per variable
+        int depth = 0;
+
+        Node(std::vector<int> chosen, std::vector<int> remaining, int d)
+            : chosen_vars(std::move(chosen)), remaining_vars(std::move(remaining)), depth(d) {}
+    };
+
     const Options& opt;
+    const std::unordered_map<int, std::pair<double,double>>& split_reward; // var -> (pos, neg)
     double cpuct = 1.4;
-    MCTSNode* root;
+    std::unique_ptr<Node> root;
     int node_created = 0;
 
-    MCTS(const std::vector<int>& v, const Options& o)
-        : vars(v), opt(o) {
-        root = new MCTSNode({}, 0, is_terminal(0, {}));
+    MCTS(const std::vector<int>& ranked_vars,
+         const Options& o,
+         const std::unordered_map<int, std::pair<double,double>>& rewards)
+        : opt(o), split_reward(rewards) {
+        root = std::make_unique<Node>(std::vector<int>{}, ranked_vars, 0);
         node_created = 1;
     }
 
-    bool is_terminal(int depth, const std::vector<int>& path) const {
-        if (opt.d_cutoff != -1 && depth >= opt.d_cutoff) return true;
-        if (opt.n_cutoff != -1 && (int)path.size() >= opt.n_cutoff) return true;
-        if (depth >= (int)vars.size()) return true;
+    bool is_terminal(const Node* node) const {
+        if (opt.d_cutoff != -1 && node->depth >= opt.d_cutoff) return true;
+        if (opt.n_cutoff != -1 && (int)node->chosen_vars.size() >= opt.n_cutoff) return true;
+        if (node->remaining_vars.empty()) return true;
         return false;
     }
 
-    double rollout(MCTSNode* node) const {
-        return (double)node->path.size();
+    static std::vector<int> remove_var(const std::vector<int>& xs, int v) {
+        std::vector<int> out;
+        out.reserve(xs.size());
+        for (int x : xs) if (x != v) out.push_back(x);
+        return out;
     }
 
-    void expand(MCTSNode* node, int sim_id, int trace_depth) {
-        if (node->terminal) return;
-        int var = vars[node->depth];
-        for (int a = 0; a < 2; ++a) {
-            auto p = node->path;
-            int lit = a ? var : -var;
-            p.push_back(lit);
-            bool term = is_terminal(node->depth + 1, p);
-            node->child[a] = new MCTSNode(p, node->depth + 1, term);
-            node_created++;
-            if (opt.debug) {
-                printf("[sim %d][depth %d] expand action=%d lit=%d terminal=%d\n",
-                       sim_id, trace_depth, a, lit, (int)term);
-            }
-        }
+    // Forest-style split reward for variable v: split on -v and +v, average both branches.
+    double action_reward(int v) const {
+        auto it = split_reward.find(v);
+        if (it == split_reward.end()) return 0.0;
+        return (it->second.first + it->second.second) / 2.0;
     }
 
-    double search(MCTSNode* node, int sim_id, int trace_depth = 0) {
-        if (node->terminal) {
-            double v = rollout(node);
-            if (opt.debug) {
-                printf("[sim %d][depth %d] terminal path_len=%zu value=%.3f\n",
-                       sim_id, trace_depth, node->path.size(), v);
-            }
-            return v;
+    std::vector<std::tuple<double,int,double,int>> rank_actions_by_uct(Node* node) {
+        std::vector<std::tuple<double,int,double,int>> scored; // uct, var, avg_reward, visits
+        scored.reserve(node->remaining_vars.size());
+
+        int totalN = 0;
+        for (int v : node->remaining_vars) totalN += node->N[v];
+
+        for (int v : node->remaining_vars) {
+            double q = node->Q[v];
+            int n = node->N[v];
+            double explore = cpuct * sqrt((double)(totalN + 1e-6)) / (1 + n);
+            double u = q + explore;
+            scored.push_back({u, v, action_reward(v), n});
         }
-        if (node->child[0] == nullptr) {
-            expand(node, sim_id, trace_depth);
-            double v = rollout(node);
+
+        std::sort(scored.begin(), scored.end(), [](const auto& a, const auto& b) {
+            if (std::get<0>(a) != std::get<0>(b)) return std::get<0>(a) > std::get<0>(b);
+            return std::get<1>(a) < std::get<1>(b);
+        });
+        return scored;
+    }
+
+    double search(Node* node, int sim_id, int trace_depth = 0) {
+        if (is_terminal(node)) {
             if (opt.debug) {
-                printf("[sim %d][depth %d] leaf rollout value=%.3f\n", sim_id, trace_depth, v);
+                printf("[sim %d][depth %d] terminal chosen_vars=%zu value=0.000\n",
+                       sim_id, trace_depth, node->chosen_vars.size());
             }
-            return v;
-        }
-
-        int totalN = node->N[0] + node->N[1];
-        std::vector<std::tuple<double,int,int,double>> scored; // u, action, lit, q
-        scored.reserve(2);
-        for (int a = 0; a < 2; ++a) {
-            int lit = a ? vars[node->depth] : -vars[node->depth];
-            double explore = cpuct * sqrt((double)(totalN + 1e-6)) / (1 + node->N[a]);
-            double u = node->Q[a] + explore;
-            scored.push_back({u, a, lit, node->Q[a]});
+            return 0.0;
         }
-        std::sort(scored.begin(), scored.end(), [](const auto& x, const auto& y) {
-            return std::get<0>(x) > std::get<0>(y);
-        });
 
-        int bestA = std::get<1>(scored[0]);
+        auto scored = rank_actions_by_uct(node);
         if (opt.debug) {
-            printf("[sim %d][depth %d] top UCT choices (up to 3):\n", sim_id, trace_depth);
+            printf("[sim %d][depth %d] top actions by UCT (up to 3):\n", sim_id, trace_depth);
             for (size_t i = 0; i < scored.size() && i < 3; ++i) {
-                printf("  #%zu action=%d lit=%d UCT=%.6f Q=%.6f N=%d\n",
+                printf("  #%zu var=%d UCT=%.6f avg_split_rew=%.6f Q=%.6f N=%d\n",
                        i + 1,
                        std::get<1>(scored[i]),
-                       std::get<2>(scored[i]),
                        std::get<0>(scored[i]),
-                       std::get<3>(scored[i]),
-                       node->N[std::get<1>(scored[i])]);
+                       std::get<2>(scored[i]),
+                       node->Q[std::get<1>(scored[i])],
+                       std::get<3>(scored[i]));
+            }
+        }
+
+        int best_v = std::get<1>(scored[0]);
+        auto rewards = split_reward.at(best_v);
+        double split_avg = (rewards.first + rewards.second) / 2.0;
+
+        if (opt.debug) {
+            printf("[sim %d][depth %d] choose var=%d, split rewards: -%d=>%.6f +%d=>%.6f avg=%.6f\n",
+                   sim_id, trace_depth, best_v, best_v, rewards.first, best_v, rewards.second, split_avg);
+        }
+
+        if (!node->child.count(best_v)) {
+            auto next_chosen = node->chosen_vars;
+            next_chosen.push_back(best_v);
+            auto next_remaining = remove_var(node->remaining_vars, best_v);
+            node->child[best_v] = std::make_unique<Node>(std::move(next_chosen), std::move(next_remaining), node->depth + 1);
+            node_created++;
+            if (opt.debug) {
+                printf("[sim %d][depth %d] expand child for var=%d -> child_depth=%d\n",
+                       sim_id, trace_depth, best_v, node->depth + 1);
             }
-            printf("[sim %d][depth %d] choose action=%d lit=%d\n",
-                   sim_id, trace_depth, bestA,
-                   bestA ? vars[node->depth] : -vars[node->depth]);
         }
 
-        double v = search(node->child[bestA], sim_id, trace_depth + 1);
-        node->N[bestA]++;
-        node->Q[bestA] += (v - node->Q[bestA]) / node->N[bestA];
+        double child_value = search(node->child[best_v].get(), sim_id, trace_depth + 1);
+        double v = (split_avg + child_value) / 2.0;
+
+        node->N[best_v]++;
+        node->Q[best_v] += (v - node->Q[best_v]) / node->N[best_v];
+
         if (opt.debug) {
-            printf("[sim %d][depth %d] backprop action=%d newQ=%.6f newN=%d value=%.3f\n",
-                   sim_id, trace_depth, bestA, node->Q[bestA], node->N[bestA], v);
+            printf("[sim %d][depth %d] backprop var=%d newQ=%.6f newN=%d value=%.6f\n",
+                   sim_id, trace_depth, best_v, node->Q[best_v], node->N[best_v], v);
         }
         return v;
     }
@@ -339,42 +367,54 @@ struct MCTS {
     std::vector<int> run() {
         for (int i = 0; i < opt.num_sims; ++i) {
             if (opt.debug) printf("=== MCTS simulation %d/%d ===\n", i + 1, opt.num_sims);
-            double val = search(root, i + 1);
+            double val = search(root.get(), i + 1);
             if (opt.debug) {
-                printf("[sim %d] finished with value=%.3f rootN=(%d,%d) rootQ=(%.6f,%.6f)\n",
-                       i + 1, val, root->N[0], root->N[1], root->Q[0], root->Q[1]);
+                int total = 0;
+                for (int v : root->remaining_vars) total += root->N[v];
+                printf("[sim %d] finished value=%.6f root_total_visits=%d\n", i + 1, val, total);
             }
         }
-        std::vector<int> best;
-        MCTSNode* node = root;
-        while (!node->terminal) {
-            int a = node->N[1] > node->N[0] ? 1 : 0;
-            if (node->child[a] == nullptr) {
-                int var = vars[node->depth];
-                auto p = node->path;
-                int lit_tmp = a ? var : -var;
-                p.push_back(lit_tmp);
-                bool term = is_terminal(node->depth + 1, p);
-                node->child[a] = new MCTSNode(p, node->depth + 1, term);
-                node_created++;
+
+        std::vector<int> best_vars;
+        Node* node = root.get();
+        while (!is_terminal(node)) {
+            int best_v = -1;
+            int best_n = -1;
+            for (int v : node->remaining_vars) {
+                int n = node->N[v];
+                if (n > best_n) {
+                    best_n = n;
+                    best_v = v;
+                }
             }
-            int lit = a ? vars[node->depth] : -vars[node->depth];
-            best.push_back(lit);
-            node = node->child[a];
+            if (best_v == -1) break;
+            best_vars.push_back(best_v);
+            if (!node->child.count(best_v)) break;
+            node = node->child[best_v].get();
         }
+
         if (opt.debug) {
-            printf("Final best path by visit count:");
-            for (int lit : best) printf(" %d", lit);
+            printf("Final best variable sequence by visit count:");
+            for (int v : best_vars) printf(" %d", v);
             printf("\n");
         }
-        return best;
+        return best_vars;
     }
 };
 
 // Score only "free" (preselected) variables using propagation.
-std::vector<std::pair<int,int>> preselect_vars(int M) {
-    std::vector<std::pair<int,int>> ranked;
-    int scores[MAX_VARS + 1][2] = {0};
+struct VarScore {
+    int var = 0;
+    int pos = 0;
+    int neg = 0;
+    double raw_score = 0.0;
+    double norm_pos = 0.0;
+    double norm_neg = 0.0;
+    double norm_raw = 0.0;
+};
+
+std::vector<VarScore> preselect_vars(int M) {
+    std::vector<VarScore> ranked;
 
     for (int v = 1; v <= n_vars && v <= M; v++) {
         if (!is_preselected(v)) continue;
@@ -387,17 +427,29 @@ std::vector<std::pair<int,int>> preselect_vars(int M) {
         reset_assignments();
         if (unit_propagation(-v, propagated)) neg = propagated.size();
 
-        scores[v][0] = v;
-        scores[v][1] = pos * neg + 10 * (pos + neg) +
-                       100 * (bimp_size[lit_index(v)] + bimp_size[lit_index(-v)]) +
-                       var_activity[v] * 5;
+        double raw = (double)pos * (double)neg + 10.0 * (pos + neg) +
+                     100.0 * (bimp_size[lit_index(v)] + bimp_size[lit_index(-v)]) +
+                     5.0 * var_activity[v];
+
+        ranked.push_back({v, pos, neg, raw, 0.0, 0.0, 0.0});
     }
 
-    for (int v = 1; v <= n_vars && v <= M; ++v) {
-        if (!is_preselected(v)) continue;
-        if (scores[v][1] > 0) ranked.push_back({v, scores[v][1]});
+    int max_prop = 1;
+    double max_raw = 1.0;
+    for (const auto& s : ranked) {
+        max_prop = std::max(max_prop, std::max(s.pos, s.neg));
+        max_raw = std::max(max_raw, s.raw_score);
+    }
+
+    for (auto& s : ranked) {
+        s.norm_pos = (double)s.pos / (double)max_prop;
+        s.norm_neg = (double)s.neg / (double)max_prop;
+        s.norm_raw = s.raw_score / max_raw;
     }
-    std::sort(ranked.begin(), ranked.end(), [](auto &a, auto &b){return a.second > b.second;});
+
+    std::sort(ranked.begin(), ranked.end(), [](const VarScore &a, const VarScore &b) {
+        return a.norm_raw > b.norm_raw;
+    });
     return ranked;
 }
 
@@ -462,26 +514,30 @@ int main(int argc, char** argv) {
     auto score_start = std::chrono::high_resolution_clock::now();
     auto ranked = preselect_vars(opt.m_vars);
     auto score_end = std::chrono::high_resolution_clock::now();
+
+    std::vector<int> vars;
+    std::unordered_map<int, std::pair<double,double>> split_reward;
+    for (const auto& s : ranked) {
+        vars.push_back(s.var);
+        split_reward[s.var] = {s.norm_neg, s.norm_pos}; // (-v, +v)
+    }
+
     if (opt.debug) {
-        printf("Variable ranking (var:score):\n");
+        printf("Variable ranking with normalization (var:raw:norm_raw:pos:neg:norm_pos:norm_neg):\n");
         for (size_t i = 0; i < ranked.size(); ++i) {
-            printf("%zu. %d:%d\n", i+1, ranked[i].first, ranked[i].second);
+            const auto& s = ranked[i];
+            printf("%zu. %d:%.3f:%.6f:%d:%d:%.6f:%.6f\n",
+                   i + 1, s.var, s.raw_score, s.norm_raw, s.pos, s.neg, s.norm_pos, s.norm_neg);
         }
     }
 
-    std::vector<int> vars;
-    for (auto &p : ranked) vars.push_back(p.first);
-
     auto mcts_start = std::chrono::high_resolution_clock::now();
-    MCTS mcts(vars, opt);
-    auto best_path = mcts.run();
+    MCTS mcts(vars, opt, split_reward);
+    auto best_vars = mcts.run();
     auto mcts_end = std::chrono::high_resolution_clock::now();
 
-    std::vector<int> abs_vars;
-    for (int lit : best_path) abs_vars.push_back(std::abs(lit));
-
     auto cube_gen_start = std::chrono::high_resolution_clock::now();
-    std::vector<Cube> cubes = generate_cubes(abs_vars);
+    std::vector<Cube> cubes = generate_cubes(best_vars);
     auto cube_gen_end = std::chrono::high_resolution_clock::now();
 
     auto write_start = std::chrono::high_resolution_clock::now();

From cd8bff0570663ee48a8aea4c47f02f407b466d30 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 18 Feb 2026 12:13:34 +0530
Subject: [PATCH 8/9] Recompute BCP rewards per depth and limit UCT to top-3
 actions

---
 README.md       |   4 +-
 simple_mcts.cpp | 189 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 126 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index 1d31943..3973434 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ g++ -std=c++17 -O2 simple_mcts.cpp -o simple_mcts
 ```
 
 Run the tool on the sample CNF file with a handful of search simulations. Pass
-`-debug` to see detailed timings, per-simulation MCTS trace (UCT scores + chosen action), and rankings (rankings are printed only for free/preselected variables):
+`-debug` to see detailed timings, per-simulation MCTS trace (UCT details + rewards + chosen variable), and rankings (rankings are printed only for free/preselected variables):
 
 ```bash
 ./simple_mcts alphamaplesat/constraints_17_c_100000_2_2_0_final.simp -m 20 -o out.cubes -numMCTSSims 2 -debug
@@ -52,7 +52,7 @@ Variable ranking with normalization (var:raw:norm_raw:pos:neg:norm_pos:norm_neg)
 1. 5:323.000:1.000000:12:10:1.000000:0.833333
 2. 3:200.000:0.619195:7:8:0.583333:0.666667
 === MCTS simulation 1/2 ===
-[sim 1][depth 0] top actions by UCT (up to 3):
+[sim 1][depth 0] top actions by UCT (max 3 candidates):
 Parsing time: 0.001
 Scoring time: 0.002
 MCTS time: 0.001
diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index 0db96bc..79b3c4a 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -226,21 +226,10 @@ struct Cube {
 // Minimal MCTS implementation for exploring variable literals
 // ------------------------------------------------------------
 
-struct MCTSNode {
-    std::vector<int> path;   // literals assigned so far
-    MCTSNode* child[2] = {nullptr, nullptr};
-    double Q[2] = {0.0, 0.0};
-    int N[2] = {0, 0};
-    bool terminal = false;
-    int depth = 0;
-    MCTSNode(const std::vector<int>& p, int d, bool term)
-        : path(p), terminal(term), depth(d) {}
-};
-
 struct MCTS {
     struct Node {
-        std::vector<int> chosen_vars;
-        std::vector<int> remaining_vars;
+        std::vector<int> chosen_vars;      // variables selected along this path (signless actions)
+        std::vector<int> remaining_vars;   // ranked variables not selected yet
         std::unordered_map<int, std::unique_ptr<Node>> child; // chosen var -> child node
         std::unordered_map<int, double> Q; // action-value per variable
         std::unordered_map<int, int> N;    // visit count per variable
@@ -250,16 +239,30 @@ struct MCTS {
             : chosen_vars(std::move(chosen)), remaining_vars(std::move(remaining)), depth(d) {}
     };
 
+    struct ActionEval {
+        double pos_avg = 0.0;   // average normalized reward for +v over all path branches
+        double neg_avg = 0.0;   // average normalized reward for -v over all path branches
+        double split_avg = 0.0; // average of pos_avg and neg_avg
+        int branch_count = 0;
+    };
+
+    struct ActionScore {
+        int var = 0;
+        double q = 0.0;
+        int n = 0;
+        double explore = 0.0;
+        double imm = 0.0;
+        double uct = 0.0;
+        ActionEval eval;
+    };
+
     const Options& opt;
-    const std::unordered_map<int, std::pair<double,double>>& split_reward; // var -> (pos, neg)
     double cpuct = 1.4;
     std::unique_ptr<Node> root;
     int node_created = 0;
 
-    MCTS(const std::vector<int>& ranked_vars,
-         const Options& o,
-         const std::unordered_map<int, std::pair<double,double>>& rewards)
-        : opt(o), split_reward(rewards) {
+    MCTS(const std::vector<int>& ranked_vars, const Options& o)
+        : opt(o) {
         root = std::make_unique<Node>(std::vector<int>{}, ranked_vars, 0);
         node_created = 1;
     }
@@ -278,31 +281,84 @@ struct MCTS {
         return out;
     }
 
-    // Forest-style split reward for variable v: split on -v and +v, average both branches.
-    double action_reward(int v) const {
-        auto it = split_reward.find(v);
-        if (it == split_reward.end()) return 0.0;
-        return (it->second.first + it->second.second) / 2.0;
+    // Run BCP on the current sub-formula under a full assumption list and return
+    // normalized reward in [0, 1] as propagated_variables / n_vars.
+    double evaluate_with_assumptions(const std::vector<int>& assumptions) const {
+        reset_assignments();
+        std::vector<int> propagated;
+
+        for (int lit : assumptions) {
+            if (!propagate_bimp(lit, propagated)) return 0.0;
+            if (!propagate_big_clauses(propagated)) return 0.0;
+        }
+        if (!propagate_big_clauses(propagated)) return 0.0;
+
+        return (double)propagated.size() / (double)std::max(1, n_vars);
+    }
+
+    ActionEval evaluate_action(Node* node, int v) const {
+        ActionEval out;
+        std::vector<int> assumptions;
+
+        std::function<void(size_t)> dfs = [&](size_t idx) {
+            if (idx == node->chosen_vars.size()) {
+                auto plus_assump = assumptions;
+                plus_assump.push_back(v);
+                auto minus_assump = assumptions;
+                minus_assump.push_back(-v);
+
+                double pos = evaluate_with_assumptions(plus_assump);
+                double neg = evaluate_with_assumptions(minus_assump);
+                out.pos_avg += pos;
+                out.neg_avg += neg;
+                out.branch_count += 1;
+                return;
+            }
+
+            int chosen = node->chosen_vars[idx];
+            assumptions.push_back(chosen);
+            dfs(idx + 1);
+            assumptions.back() = -chosen;
+            dfs(idx + 1);
+            assumptions.pop_back();
+        };
+
+        dfs(0);
+        if (out.branch_count > 0) {
+            out.pos_avg /= out.branch_count;
+            out.neg_avg /= out.branch_count;
+        }
+        out.split_avg = (out.pos_avg + out.neg_avg) / 2.0;
+        return out;
     }
 
-    std::vector<std::tuple<double,int,double,int>> rank_actions_by_uct(Node* node) {
-        std::vector<std::tuple<double,int,double,int>> scored; // uct, var, avg_reward, visits
-        scored.reserve(node->remaining_vars.size());
+    // Compute UCT scores for only top 3 candidate variables at this node.
+    std::vector<ActionScore> rank_actions_by_uct(Node* node) {
+        std::vector<int> candidates;
+        for (size_t i = 0; i < node->remaining_vars.size() && i < 3; ++i) {
+            candidates.push_back(node->remaining_vars[i]);
+        }
 
         int totalN = 0;
-        for (int v : node->remaining_vars) totalN += node->N[v];
-
-        for (int v : node->remaining_vars) {
-            double q = node->Q[v];
-            int n = node->N[v];
-            double explore = cpuct * sqrt((double)(totalN + 1e-6)) / (1 + n);
-            double u = q + explore;
-            scored.push_back({u, v, action_reward(v), n});
+        for (int v : candidates) totalN += node->N[v];
+
+        std::vector<ActionScore> scored;
+        scored.reserve(candidates.size());
+        for (int v : candidates) {
+            ActionScore s;
+            s.var = v;
+            s.q = node->Q[v];
+            s.n = node->N[v];
+            s.eval = evaluate_action(node, v);
+            s.imm = s.eval.split_avg;
+            s.explore = cpuct * sqrt((double)(totalN + 1e-6)) / (1 + s.n);
+            s.uct = s.q + s.imm + s.explore;
+            scored.push_back(s);
         }
 
-        std::sort(scored.begin(), scored.end(), [](const auto& a, const auto& b) {
-            if (std::get<0>(a) != std::get<0>(b)) return std::get<0>(a) > std::get<0>(b);
-            return std::get<1>(a) < std::get<1>(b);
+        std::sort(scored.begin(), scored.end(), [](const ActionScore& a, const ActionScore& b) {
+            if (a.uct != b.uct) return a.uct > b.uct;
+            return a.var < b.var;
         });
         return scored;
     }
@@ -310,33 +366,39 @@ struct MCTS {
     double search(Node* node, int sim_id, int trace_depth = 0) {
         if (is_terminal(node)) {
             if (opt.debug) {
-                printf("[sim %d][depth %d] terminal chosen_vars=%zu value=0.000\n",
+                printf("[sim %d][depth %d] terminal chosen_vars=%zu value=0.000000\n",
                        sim_id, trace_depth, node->chosen_vars.size());
             }
             return 0.0;
         }
 
         auto scored = rank_actions_by_uct(node);
+        if (scored.empty()) return 0.0;
+
         if (opt.debug) {
-            printf("[sim %d][depth %d] top actions by UCT (up to 3):\n", sim_id, trace_depth);
-            for (size_t i = 0; i < scored.size() && i < 3; ++i) {
-                printf("  #%zu var=%d UCT=%.6f avg_split_rew=%.6f Q=%.6f N=%d\n",
-                       i + 1,
-                       std::get<1>(scored[i]),
-                       std::get<0>(scored[i]),
-                       std::get<2>(scored[i]),
-                       node->Q[std::get<1>(scored[i])],
-                       std::get<3>(scored[i]));
+            printf("[sim %d][depth %d] path vars:", sim_id, trace_depth);
+            if (node->chosen_vars.empty()) {
+                printf(" <root>");
+            } else {
+                for (int v : node->chosen_vars) printf(" %d", v);
+            }
+            printf("\n");
+
+            printf("[sim %d][depth %d] top actions by UCT (max 3 candidates):\n", sim_id, trace_depth);
+            for (size_t i = 0; i < scored.size(); ++i) {
+                const auto& s = scored[i];
+                printf("  #%zu var=%d UCT=%.6f [Q=%.6f + imm=%.6f + explore=%.6f] N=%d branches=%d\n",
+                       i + 1, s.var, s.uct, s.q, s.imm, s.explore, s.n, s.eval.branch_count);
+                printf("      rewards: +%d => %.6f, -%d => %.6f, split_avg=%.6f\n",
+                       s.var, s.eval.pos_avg, s.var, s.eval.neg_avg, s.eval.split_avg);
             }
         }
 
-        int best_v = std::get<1>(scored[0]);
-        auto rewards = split_reward.at(best_v);
-        double split_avg = (rewards.first + rewards.second) / 2.0;
+        const auto best = scored[0];
+        int best_v = best.var;
 
         if (opt.debug) {
-            printf("[sim %d][depth %d] choose var=%d, split rewards: -%d=>%.6f +%d=>%.6f avg=%.6f\n",
-                   sim_id, trace_depth, best_v, best_v, rewards.first, best_v, rewards.second, split_avg);
+            printf("[sim %d][depth %d] choose var=%d\n", sim_id, trace_depth, best_v);
         }
 
         if (!node->child.count(best_v)) {
@@ -352,14 +414,14 @@ struct MCTS {
         }
 
         double child_value = search(node->child[best_v].get(), sim_id, trace_depth + 1);
-        double v = (split_avg + child_value) / 2.0;
+        double v = (best.eval.split_avg + child_value) / 2.0;
 
         node->N[best_v]++;
         node->Q[best_v] += (v - node->Q[best_v]) / node->N[best_v];
 
         if (opt.debug) {
-            printf("[sim %d][depth %d] backprop var=%d newQ=%.6f newN=%d value=%.6f\n",
-                   sim_id, trace_depth, best_v, node->Q[best_v], node->N[best_v], v);
+            printf("[sim %d][depth %d] backprop var=%d newQ=%.6f newN=%d value=%.6f (split_avg=%.6f child=%.6f)\n",
+                   sim_id, trace_depth, best_v, node->Q[best_v], node->N[best_v], v, best.eval.split_avg, child_value);
         }
         return v;
     }
@@ -370,8 +432,8 @@ struct MCTS {
             double val = search(root.get(), i + 1);
             if (opt.debug) {
                 int total = 0;
-                for (int v : root->remaining_vars) total += root->N[v];
-                printf("[sim %d] finished value=%.6f root_total_visits=%d\n", i + 1, val, total);
+                for (size_t j = 0; j < root->remaining_vars.size() && j < 3; ++j) total += root->N[root->remaining_vars[j]];
+                printf("[sim %d] finished value=%.6f root_total_visits(top3)=%d\n", i + 1, val, total);
             }
         }
 
@@ -380,14 +442,15 @@ struct MCTS {
         while (!is_terminal(node)) {
             int best_v = -1;
             int best_n = -1;
-            for (int v : node->remaining_vars) {
+            for (size_t i = 0; i < node->remaining_vars.size() && i < 3; ++i) {
+                int v = node->remaining_vars[i];
                 int n = node->N[v];
                 if (n > best_n) {
                     best_n = n;
                     best_v = v;
                 }
             }
-            if (best_v == -1) break;
+            if (best_v == -1 || best_n <= 0) break;
             best_vars.push_back(best_v);
             if (!node->child.count(best_v)) break;
             node = node->child[best_v].get();
@@ -516,11 +579,7 @@ int main(int argc, char** argv) {
     auto score_end = std::chrono::high_resolution_clock::now();
 
     std::vector<int> vars;
-    std::unordered_map<int, std::pair<double,double>> split_reward;
-    for (const auto& s : ranked) {
-        vars.push_back(s.var);
-        split_reward[s.var] = {s.norm_neg, s.norm_pos}; // (-v, +v)
-    }
+    for (const auto& s : ranked) vars.push_back(s.var);
 
     if (opt.debug) {
         printf("Variable ranking with normalization (var:raw:norm_raw:pos:neg:norm_pos:norm_neg):\n");
@@ -532,7 +591,7 @@ int main(int argc, char** argv) {
     }
 
     auto mcts_start = std::chrono::high_resolution_clock::now();
-    MCTS mcts(vars, opt, split_reward);
+    MCTS mcts(vars, opt);
     auto best_vars = mcts.run();
     auto mcts_end = std::chrono::high_resolution_clock::now();
 

From 6da65d08418b9bf16dcd02485e98a431f389ec74 Mon Sep 17 00:00:00 2001
From: Piyush Jha <piyushnit15@gmail.com>
Date: Wed, 18 Feb 2026 12:13:38 +0530
Subject: [PATCH 9/9] Switch to PUCT and simplify propagation-based scoring

---
 simple_mcts.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/simple_mcts.cpp b/simple_mcts.cpp
index 79b3c4a..74192db 100644
--- a/simple_mcts.cpp
+++ b/simple_mcts.cpp
@@ -282,7 +282,7 @@ struct MCTS {
     }
 
     // Run BCP on the current sub-formula under a full assumption list and return
-    // normalized reward in [0, 1] as propagated_variables / n_vars.
+    // normalized reward in [0, 1] as propagated_variables / m.
     double evaluate_with_assumptions(const std::vector<int>& assumptions) const {
         reset_assignments();
         std::vector<int> propagated;
@@ -293,7 +293,7 @@ struct MCTS {
         }
         if (!propagate_big_clauses(propagated)) return 0.0;
 
-        return (double)propagated.size() / (double)std::max(1, n_vars);
+        return (double)propagated.size() / (double)std::max(1, opt.m_vars);
     }
 
     ActionEval evaluate_action(Node* node, int v) const {
@@ -351,8 +351,8 @@ struct MCTS {
             s.n = node->N[v];
             s.eval = evaluate_action(node, v);
             s.imm = s.eval.split_avg;
-            s.explore = cpuct * sqrt((double)(totalN + 1e-6)) / (1 + s.n);
-            s.uct = s.q + s.imm + s.explore;
+            s.explore = cpuct * s.imm * sqrt((double)(totalN + 1e-6)) / (1 + s.n);
+            s.uct = s.q + s.explore;
             scored.push_back(s);
         }
 
@@ -387,7 +387,7 @@ struct MCTS {
             printf("[sim %d][depth %d] top actions by UCT (max 3 candidates):\n", sim_id, trace_depth);
             for (size_t i = 0; i < scored.size(); ++i) {
                 const auto& s = scored[i];
-                printf("  #%zu var=%d UCT=%.6f [Q=%.6f + imm=%.6f + explore=%.6f] N=%d branches=%d\n",
+                printf("  #%zu var=%d UCT=%.6f [Q=%.6f + cpuct*P*sqrt(N)/(1+n), P=%.6f, explore=%.6f] N=%d branches=%d\n",
                        i + 1, s.var, s.uct, s.q, s.imm, s.explore, s.n, s.eval.branch_count);
                 printf("      rewards: +%d => %.6f, -%d => %.6f, split_avg=%.6f\n",
                        s.var, s.eval.pos_avg, s.var, s.eval.neg_avg, s.eval.split_avg);
@@ -490,9 +490,7 @@ std::vector<VarScore> preselect_vars(int M) {
         reset_assignments();
         if (unit_propagation(-v, propagated)) neg = propagated.size();
 
-        double raw = (double)pos * (double)neg + 10.0 * (pos + neg) +
-                     100.0 * (bimp_size[lit_index(v)] + bimp_size[lit_index(-v)]) +
-                     5.0 * var_activity[v];
+        double raw = (double)pos * (double)neg + (double)pos + (double)neg;
 
         ranked.push_back({v, pos, neg, raw, 0.0, 0.0, 0.0});
     }