diff --git a/archives/prospr_core.tar.gz b/archives/prospr_core.tar.gz index 6f7d7a4..78c84b1 100644 Binary files a/archives/prospr_core.tar.gz and b/archives/prospr_core.tar.gz differ diff --git a/archives/prospr_core.zip b/archives/prospr_core.zip index d706680..d2a6ea3 100644 Binary files a/archives/prospr_core.zip and b/archives/prospr_core.zip differ diff --git a/archives/prospr_data.tar.gz b/archives/prospr_data.tar.gz index 4a48a2c..ce7ffcc 100644 Binary files a/archives/prospr_data.tar.gz and b/archives/prospr_data.tar.gz differ diff --git a/archives/prospr_data.zip b/archives/prospr_data.zip index 3f1c5ec..7a31e1d 100644 Binary files a/archives/prospr_data.zip and b/archives/prospr_data.zip differ diff --git a/docs/source/api.rst b/docs/source/api.rst index cd8e562..855536f 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -35,6 +35,24 @@ specifying a submodule, e.g. | *Does not reset the Protein properties beforehand!* | *Parameters:* | * **protein** - *Protein*: the Protein object to fold. + | * **prune_func** - *str*: "naive_prune" (default) or "reach_prune". + | * **is_pre_folded** - *bool*: If _True_, the partial hash + of the protein will not be reset (Default is _False_.) + | *Returns:* + | * **Protein** - the Protein object set at the found + conformation and with updated properties according to the + performed moves. + + | **depth_first_bnb_parallel**\ (*protein*) + | Finds the most optimal conformation using a depth-first + branch-and-bound algorithm. + | Multiple subtrees are enumerated and solved in parallel using OpenMP. + | *Does not reset the Protein properties beforehand!* + | *Parameters:* + | * **protein** - *Protein*: the Protein object to fold. + | * **prune_func** - *str*: "naive_prune" (default) or "reach_prune". + | * **work_ratio** - *float*: The number of subtrees per thread to aim for. + Too low/high values may result in degraded performance. (Default is 3.) | *Returns:* | * **Protein** - the Protein object set at the found conformation and with updated properties according to the diff --git a/prospr/__init__.py b/prospr/__init__.py index 05c0cbf..d82334b 100644 --- a/prospr/__init__.py +++ b/prospr/__init__.py @@ -5,6 +5,7 @@ Protein, depth_first, depth_first_bnb, + depth_first_bnb_parallel, beam_search, ) from .datasets import load_vanEck250, load_vanEck1000, load_vanEck_hratio @@ -20,6 +21,7 @@ "Protein", "depth_first", "depth_first_bnb", + "depth_first_bnb_parallel", "beam_search", "load_vanEck250", "load_vanEck1000", diff --git a/prospr/core/core_module.cpp b/prospr/core/core_module.cpp index 96d1613..93b605e 100644 --- a/prospr/core/core_module.cpp +++ b/prospr/core/core_module.cpp @@ -84,7 +84,13 @@ PYBIND11_MODULE(prospr_core, m) { m.def("depth_first_bnb", depth_first_bnb, "Finds the optimal conformation via depth-first branch-and-bound " "search", - py::arg("protein"), py::arg("prune_func") = ""); + py::arg("protein"), py::arg("prune_func") = "", py::arg("is_pre_folded") = false); + + /* Parallel depth-first branch-and-bound search function definition. */ + m.def("depth_first_bnb_parallel", depth_first_bnb_parallel, + "Finds the optimal conformation via depth-first branch-and-bound " + "search using OpenMP and multiple subtrees", + py::arg("protein"), py::arg("prune_func") = "", py::arg("work_ratio") = 3); /* Beam search function definition. */ m.def("beam_search", beam_search, diff --git a/prospr/core/src/depth_first_bnb.cpp b/prospr/core/src/depth_first_bnb.cpp index fba8a8e..9f28925 100644 --- a/prospr/core/src/depth_first_bnb.cpp +++ b/prospr/core/src/depth_first_bnb.cpp @@ -11,10 +11,16 @@ #include #include +#include +#include #include #include #include +#ifdef _OPENMP +#include +#endif + /* All possible variables required by custom pruning. */ struct prune_vars { size_t max_length; @@ -93,17 +99,20 @@ bool reach_prune(Protein *protein, int move, int best_score, /* A depth-first branch-and-bound search function for finding a minimum * energy conformation. */ -void depth_first_bnb(Protein *protein, std::string prune_func) { - protein->reset_conformation(); +void depth_first_bnb(Protein *protein, std::string prune_func, bool is_pre_folded) { + if (!is_pre_folded) + protein->reset_conformation(); size_t max_length = protein->get_sequence().length(); int dim = protein->get_dim(); size_t no_neighbors = (size_t)pow(2, (dim - 1)); - /* The first two amino acids are fixed to prevent y-axis symmetry. */ - if (max_length > 1) - protein->place_amino(-1); - if (max_length <= 2) - return; + if (!is_pre_folded) { + /* The first two amino acids are fixed to prevent y-axis symmetry. */ + if (max_length > 1) + protein->place_amino(-1); + if (max_length <= 2) + return; + } /* Init default prune functions and arguments. */ auto prune_branch = naive_prune; @@ -154,9 +163,13 @@ void depth_first_bnb(Protein *protein, std::string prune_func) { /* Declare and set variables for the depth-first search. */ bool placed_amino = false; - int best_score = 1; + int best_score = 1; protein->get_score(); int score; - std::vector best_hash; + std::vector best_hash = protein->hash_fold(); + if (is_pre_folded) { + best_score = protein->get_score(); + best_hash = protein->hash_fold(); + } do { placed_amino = false; @@ -214,6 +227,89 @@ void depth_first_bnb(Protein *protein, std::string prune_func) { } } while (move != -dim - 1 || !dfs_stack.empty()); + /* Complete hash with "straight line" */ + while (best_hash.size() < max_length - 1) + best_hash.push_back(best_hash.back()); /* Set best found conformation and return protein. */ protein->set_hash(best_hash); } + +/* Iterate all valid leaf nodes of pre-folded proteins at a certain depth + * (limited to tree depth). + */ +std::vector pre_fold(Protein* protein, size_t depth) { + std::vector pre_folded; + const int dim = protein->get_dim(); + const size_t max_length = protein->get_sequence().length(); + std::function pre_fold_recurse; + pre_fold_recurse = [&](const Protein& protein, size_t recurse) { + for(int move = -dim; move <= dim; move++) { + if(move == 0) continue; + if(!protein.is_valid(move)) continue; + Protein next(protein); + next.place_amino(move); + if(recurse) + pre_fold_recurse(next, recurse - 1); + else + pre_folded.push_back(next); + } + }; + Protein root(*protein); + root.reset_conformation(); + /* The first two amino acids are fixed to prevent y-axis symmetry. */ + if (max_length > 1) + root.place_amino(-1); + depth = std::min(max_length, depth) - 1; + if(depth > 0) + pre_fold_recurse(root, depth - 1); + else + pre_folded.push_back(root); + return pre_folded; +} + +/* A depth-first branch-and-bound search function for finding a minimum energy + * conformation using OpenMP to explore multiple subtrees in parallel. + */ +void depth_first_bnb_parallel(Protein *protein, std::string prune_func, float work_ratio) { +#ifdef _OPENMP + int workerCount; + #pragma omp parallel + #pragma omp single + { + workerCount = omp_get_num_threads(); + } + float targetSubtreeCount = (float)(workerCount) * work_ratio; + /* Each node has up to 3 child nodes -> determine closest depth to match work_ratio */ + size_t pre_fold_depth = (size_t)std::max(0ll, llround(log(targetSubtreeCount) / log(3.0))); + auto pre_folded = pre_fold(protein, pre_fold_depth); + + /*TODO: Consider sharing a reference to best_score between threads for better pruning. + * (Requires atomic compare-and-update.) + */ + int best_score = 1; + std::vector best_hash; + #pragma omp parallel for schedule(dynamic) shared(best_score, best_hash) + for(int i = 0; i < (int) pre_folded.size(); i++) { + Protein& candidate_protein = pre_folded[i]; + depth_first_bnb(&candidate_protein, prune_func, true); + int score = candidate_protein.get_score(); + #pragma omp critical + { + if(score < best_score) { + best_score = score; + best_hash = candidate_protein.hash_fold(); + protein->aminos_placed += candidate_protein.get_aminos_placed(); + protein->solutions_checked += candidate_protein.get_solutions_checked(); + } + } + } + protein->set_hash(best_hash); +#else + static bool warned = false; + if (!warned) { + std::cerr << "Warning: Built without OpenMP support. Using serial depth_first_bnb(...) instead.\n"; + warned = true; + } + depth_first_bnb(protein, prune_func); +#endif +} diff --git a/prospr/core/src/depth_first_bnb.hpp b/prospr/core/src/depth_first_bnb.hpp index 74b5915..d280785 100644 --- a/prospr/core/src/depth_first_bnb.hpp +++ b/prospr/core/src/depth_first_bnb.hpp @@ -14,6 +14,11 @@ /* A depth-first branch-and-bound search function for finding a minimum energy * conformation. */ -void depth_first_bnb(Protein *protein, std::string prune_func = ""); +void depth_first_bnb(Protein *protein, std::string prune_func = "", bool is_pre_folded = false); + +/* A depth-first branch-and-bound search function for finding a minimum energy + * conformation using OpenMP to explore multiple subtrees in parallel. + */ +void depth_first_bnb_parallel(Protein *protein, std::string prune_func = "", float work_ratio = 3); #endif diff --git a/prospr/core/src/protein.cpp b/prospr/core/src/protein.cpp index 62eb2a3..bb80570 100644 --- a/prospr/core/src/protein.cpp +++ b/prospr/core/src/protein.cpp @@ -277,7 +277,7 @@ void Protein::reset_conformation() { } /* Returns true if a move is valid, returns false otherwise. */ -bool Protein::is_valid(int move) { +bool Protein::is_valid(int move) const { std::vector check_pos = last_pos; check_pos[abs(move) - 1] += move / abs(move); diff --git a/prospr/core/src/protein.hpp b/prospr/core/src/protein.hpp index 93f3513..70f894a 100644 --- a/prospr/core/src/protein.hpp +++ b/prospr/core/src/protein.hpp @@ -82,7 +82,7 @@ class Protein { void reset_conformation(); /* Returns true if a move is valid, returns false otherwise. */ - bool is_valid(int move); + bool is_valid(int move) const; /* Place the next amino acid and update the conformation accordingly. */ void place_amino(int move, bool track = true); @@ -125,6 +125,9 @@ class Protein { std::vector> _append_bond_pairs(std::vector> pairs, std::vector pos, std::vector moves); + + /* Private member access required to merge statistics of subtree solutions */ + friend void depth_first_bnb_parallel(Protein *protein, std::string prune_func, float work_ratio); }; /* Overload << operator for printing Proteins. */ diff --git a/prospr/core/tests/run_tests.sh b/prospr/core/tests/run_tests.sh index 40aac50..bdceaa0 100755 --- a/prospr/core/tests/run_tests.sh +++ b/prospr/core/tests/run_tests.sh @@ -10,7 +10,7 @@ set -e SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) CFLAGS="-o3 -g -Wall -Wextra -Wconversion -Wcast-align -std=c++11 - -Wunreachable-code" + -Wunreachable-code -fopenmp" DEBUG="" VALGRIND="" @@ -122,6 +122,7 @@ test_all() { test_protein test_depth_first test_depth_first_bnb + test_depth_first_bnb_parallel test_beam_search } diff --git a/prospr/core/tests/test_algorithms.cpp b/prospr/core/tests/test_algorithms.cpp index 6b7bd46..79b2885 100644 --- a/prospr/core/tests/test_algorithms.cpp +++ b/prospr/core/tests/test_algorithms.cpp @@ -56,6 +56,16 @@ void test_depth_first_bnb() { assert(protein->get_score() == -4); delete protein; std::cout << "\t3D Protein solution scores matches.\n"; + + /* Check if parallel algorithm solutions are found correctly. */ + protein = new Protein("PHPHPHPPH", 2, "HP"); + depth_first_bnb_parallel(protein); + int score = protein->get_score(); + protein->reset(); + depth_first_bnb(protein); + assert(score == protein->get_score()); + delete protein; + std::cout << "\t2D Protein solution scores matches between parallel and serial algorithms.\n"; } /* Test functionality of depth_first_bnb. */ diff --git a/setup.py b/setup.py index d6f571f..88d9f72 100644 --- a/setup.py +++ b/setup.py @@ -8,15 +8,37 @@ specifics. """ +import os +import sys from setuptools import setup from pybind11.setup_helpers import Pybind11Extension, build_ext +compile_args = [] +link_args = [] + +if sys.platform == "darwin": + # macOS: Requires libomp installed via brew install libomp + use_omp = os.environ.get("MAC_USE_LOMP", "0") == "1" + if use_omp: + compile_args.append("-Xpreprocessor -fopenmp") + link_args.extend(["-lomp"]) + else: + print("Warning: OpenMP is not enabled on macOS by default." + " Set MAC_USE_LOMP=1 to enable it.", file=sys.stderr) +elif sys.platform == "linux": + compile_args.append("-fopenmp") + link_args.append("-fopenmp") +elif sys.platform == "win32": + compile_args.append("/openmp") + setup( ext_modules=[ Pybind11Extension( name="prospr_core", sources=["prospr/core/core_module.cpp"], + extra_compile_args=compile_args, + extra_link_args=link_args, language="c++", ), ], diff --git a/tests/core/test_depth_first_bnb.py b/tests/core/test_depth_first_bnb.py index 51c3af7..6ee4be6 100644 --- a/tests/core/test_depth_first_bnb.py +++ b/tests/core/test_depth_first_bnb.py @@ -8,7 +8,7 @@ specifics. """ -from prospr import Protein, depth_first_bnb +from prospr import Protein, depth_first_bnb, depth_first_bnb_parallel import pytest @@ -50,3 +50,14 @@ def test_protein_3d_depth_first_bnb(self, protein_3d): assert protein_3d.score == -4 assert protein_3d.solutions_checked == 5 assert protein_3d.aminos_placed == 49368 + + + def test_protein_2d_depth_first_bnb_parallel(self, protein_2d): + """ + Test if parallel algorithm solution matches serial algorithm. + """ + depth_first_bnb_parallel(protein_2d) + score = protein_2d.score + protein_2d.reset() + depth_first_bnb(protein_2d) + assert score == protein_2d.score