Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified archives/prospr_core.tar.gz
Binary file not shown.
Binary file modified archives/prospr_core.zip
Binary file not shown.
Binary file modified archives/prospr_data.tar.gz
Binary file not shown.
Binary file modified archives/prospr_data.zip
Binary file not shown.
18 changes: 18 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,24 @@ specifying a submodule, e.g.
| *Does not reset the Protein properties beforehand!*
| *Parameters:*
| * **protein** - *Protein*: the Protein object to fold.
| * **prune_func** - *str*: "naive_prune" (default) or "reach_prune".
| * **is_pre_folded** - *bool*: If _True_, the partial hash
of the protein will not be reset (Default is _False_.)
| *Returns:*
| * **Protein** - the Protein object set at the found
conformation and with updated properties according to the
performed moves.

| **depth_first_bnb_parallel**\ (*protein*)
| Finds the most optimal conformation using a depth-first
branch-and-bound algorithm.
| Multiple subtrees are enumerated and solved in parallel using OpenMP.
| *Does not reset the Protein properties beforehand!*
| *Parameters:*
| * **protein** - *Protein*: the Protein object to fold.
| * **prune_func** - *str*: "naive_prune" (default) or "reach_prune".
| * **work_ratio** - *float*: The number of subtrees per thread to aim for.
Too low/high values may result in degraded performance. (Default is 3.)
| *Returns:*
| * **Protein** - the Protein object set at the found
conformation and with updated properties according to the
Expand Down
2 changes: 2 additions & 0 deletions prospr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Protein,
depth_first,
depth_first_bnb,
depth_first_bnb_parallel,
beam_search,
)
from .datasets import load_vanEck250, load_vanEck1000, load_vanEck_hratio
Expand All @@ -20,6 +21,7 @@
"Protein",
"depth_first",
"depth_first_bnb",
"depth_first_bnb_parallel",
"beam_search",
"load_vanEck250",
"load_vanEck1000",
Expand Down
8 changes: 7 additions & 1 deletion prospr/core/core_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@ PYBIND11_MODULE(prospr_core, m) {
m.def("depth_first_bnb", depth_first_bnb,
"Finds the optimal conformation via depth-first branch-and-bound "
"search",
py::arg("protein"), py::arg("prune_func") = "");
py::arg("protein"), py::arg("prune_func") = "", py::arg("is_pre_folded") = false);

/* Parallel depth-first branch-and-bound search function definition. */
m.def("depth_first_bnb_parallel", depth_first_bnb_parallel,
"Finds the optimal conformation via depth-first branch-and-bound "
"search using OpenMP and multiple subtrees",
py::arg("protein"), py::arg("prune_func") = "", py::arg("work_ratio") = 3);

/* Beam search function definition. */
m.def("beam_search", beam_search,
Expand Down
114 changes: 105 additions & 9 deletions prospr/core/src/depth_first_bnb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@
#include <math.h>

#include <algorithm>
#include <functional>
#include <iostream>
#include <numeric>
#include <stack>
#include <vector>

#ifdef _OPENMP
#include <omp.h>
#endif

/* All possible variables required by custom pruning. */
struct prune_vars {
size_t max_length;
Expand Down Expand Up @@ -93,17 +99,20 @@ bool reach_prune(Protein *protein, int move, int best_score,
/* A depth-first branch-and-bound search function for finding a minimum
* energy conformation.
*/
void depth_first_bnb(Protein *protein, std::string prune_func) {
protein->reset_conformation();
void depth_first_bnb(Protein *protein, std::string prune_func, bool is_pre_folded) {
if (!is_pre_folded)
protein->reset_conformation();
size_t max_length = protein->get_sequence().length();
int dim = protein->get_dim();
size_t no_neighbors = (size_t)pow(2, (dim - 1));

/* The first two amino acids are fixed to prevent y-axis symmetry. */
if (max_length > 1)
protein->place_amino(-1);
if (max_length <= 2)
return;
if (!is_pre_folded) {
/* The first two amino acids are fixed to prevent y-axis symmetry. */
if (max_length > 1)
protein->place_amino(-1);
if (max_length <= 2)
return;
}

/* Init default prune functions and arguments. */
auto prune_branch = naive_prune;
Expand Down Expand Up @@ -154,9 +163,13 @@ void depth_first_bnb(Protein *protein, std::string prune_func) {

/* Declare and set variables for the depth-first search. */
bool placed_amino = false;
int best_score = 1;
int best_score = 1; protein->get_score();
int score;
std::vector<int> best_hash;
std::vector<int> best_hash = protein->hash_fold();
if (is_pre_folded) {
best_score = protein->get_score();
best_hash = protein->hash_fold();
}

do {
placed_amino = false;
Expand Down Expand Up @@ -214,6 +227,89 @@ void depth_first_bnb(Protein *protein, std::string prune_func) {
}
} while (move != -dim - 1 || !dfs_stack.empty());

/* Complete hash with "straight line" */
while (best_hash.size() < max_length - 1)
best_hash.push_back(best_hash.back());
/* Set best found conformation and return protein. */
protein->set_hash(best_hash);
}

/* Iterate all valid leaf nodes of pre-folded proteins at a certain depth
* (limited to tree depth).
*/
std::vector<Protein> pre_fold(Protein* protein, size_t depth) {
std::vector<Protein> pre_folded;
const int dim = protein->get_dim();
const size_t max_length = protein->get_sequence().length();
std::function<void(const Protein&, size_t)> pre_fold_recurse;
pre_fold_recurse = [&](const Protein& protein, size_t recurse) {
for(int move = -dim; move <= dim; move++) {
if(move == 0) continue;
if(!protein.is_valid(move)) continue;
Protein next(protein);
next.place_amino(move);
if(recurse)
pre_fold_recurse(next, recurse - 1);
else
pre_folded.push_back(next);
}
};
Protein root(*protein);
root.reset_conformation();
/* The first two amino acids are fixed to prevent y-axis symmetry. */
if (max_length > 1)
root.place_amino(-1);
depth = std::min(max_length, depth) - 1;
if(depth > 0)
pre_fold_recurse(root, depth - 1);
else
pre_folded.push_back(root);
return pre_folded;
}

/* A depth-first branch-and-bound search function for finding a minimum energy
* conformation using OpenMP to explore multiple subtrees in parallel.
*/
void depth_first_bnb_parallel(Protein *protein, std::string prune_func, float work_ratio) {
#ifdef _OPENMP
int workerCount;
#pragma omp parallel
#pragma omp single
{
workerCount = omp_get_num_threads();
}
float targetSubtreeCount = (float)(workerCount) * work_ratio;
/* Each node has up to 3 child nodes -> determine closest depth to match work_ratio */
size_t pre_fold_depth = (size_t)std::max(0ll, llround(log(targetSubtreeCount) / log(3.0)));
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrong! Each node has dim+dim-1 children. (Replace hardcoded 3.0!)

auto pre_folded = pre_fold(protein, pre_fold_depth);

/*TODO: Consider sharing a reference to best_score between threads for better pruning.
* (Requires atomic compare-and-update.)
*/
int best_score = 1;
std::vector<int> best_hash;
#pragma omp parallel for schedule(dynamic) shared(best_score, best_hash)
for(int i = 0; i < (int) pre_folded.size(); i++) {
Protein& candidate_protein = pre_folded[i];
depth_first_bnb(&candidate_protein, prune_func, true);
int score = candidate_protein.get_score();
#pragma omp critical
{
if(score < best_score) {
best_score = score;
best_hash = candidate_protein.hash_fold();
protein->aminos_placed += candidate_protein.get_aminos_placed();
protein->solutions_checked += candidate_protein.get_solutions_checked();
}
}
}
protein->set_hash(best_hash);
#else
static bool warned = false;
if (!warned) {
std::cerr << "Warning: Built without OpenMP support. Using serial depth_first_bnb(...) instead.\n";
warned = true;
}
depth_first_bnb(protein, prune_func);
#endif
}
7 changes: 6 additions & 1 deletion prospr/core/src/depth_first_bnb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
/* A depth-first branch-and-bound search function for finding a minimum energy
* conformation.
*/
void depth_first_bnb(Protein *protein, std::string prune_func = "");
void depth_first_bnb(Protein *protein, std::string prune_func = "", bool is_pre_folded = false);

/* A depth-first branch-and-bound search function for finding a minimum energy
* conformation using OpenMP to explore multiple subtrees in parallel.
*/
void depth_first_bnb_parallel(Protein *protein, std::string prune_func = "", float work_ratio = 3);

#endif
2 changes: 1 addition & 1 deletion prospr/core/src/protein.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ void Protein::reset_conformation() {
}

/* Returns true if a move is valid, returns false otherwise. */
bool Protein::is_valid(int move) {
bool Protein::is_valid(int move) const {
std::vector<int> check_pos = last_pos;
check_pos[abs(move) - 1] += move / abs(move);

Expand Down
5 changes: 4 additions & 1 deletion prospr/core/src/protein.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class Protein {
void reset_conformation();

/* Returns true if a move is valid, returns false otherwise. */
bool is_valid(int move);
bool is_valid(int move) const;

/* Place the next amino acid and update the conformation accordingly. */
void place_amino(int move, bool track = true);
Expand Down Expand Up @@ -125,6 +125,9 @@ class Protein {
std::vector<std::pair<int, int>>
_append_bond_pairs(std::vector<std::pair<int, int>> pairs,
std::vector<int> pos, std::vector<int> moves);

/* Private member access required to merge statistics of subtree solutions */
friend void depth_first_bnb_parallel(Protein *protein, std::string prune_func, float work_ratio);
};

/* Overload << operator for printing Proteins. */
Expand Down
3 changes: 2 additions & 1 deletion prospr/core/tests/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -e

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
CFLAGS="-o3 -g -Wall -Wextra -Wconversion -Wcast-align -std=c++11
-Wunreachable-code"
-Wunreachable-code -fopenmp"
DEBUG=""
VALGRIND=""

Expand Down Expand Up @@ -122,6 +122,7 @@ test_all() {
test_protein
test_depth_first
test_depth_first_bnb
test_depth_first_bnb_parallel
test_beam_search
}

Expand Down
10 changes: 10 additions & 0 deletions prospr/core/tests/test_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ void test_depth_first_bnb() {
assert(protein->get_score() == -4);
delete protein;
std::cout << "\t3D Protein solution scores matches.\n";

/* Check if parallel algorithm solutions are found correctly. */
protein = new Protein("PHPHPHPPH", 2, "HP");
depth_first_bnb_parallel(protein);
int score = protein->get_score();
protein->reset();
depth_first_bnb(protein);
assert(score == protein->get_score());
delete protein;
std::cout << "\t2D Protein solution scores matches between parallel and serial algorithms.\n";
}

/* Test functionality of depth_first_bnb. */
Expand Down
22 changes: 22 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,37 @@
specifics.
"""

import os
import sys
from setuptools import setup
from pybind11.setup_helpers import Pybind11Extension, build_ext


compile_args = []
link_args = []

if sys.platform == "darwin":
# macOS: Requires libomp installed via brew install libomp
use_omp = os.environ.get("MAC_USE_LOMP", "0") == "1"
if use_omp:
compile_args.append("-Xpreprocessor -fopenmp")
link_args.extend(["-lomp"])
else:
print("Warning: OpenMP is not enabled on macOS by default."
" Set MAC_USE_LOMP=1 to enable it.", file=sys.stderr)
elif sys.platform == "linux":
compile_args.append("-fopenmp")
link_args.append("-fopenmp")
elif sys.platform == "win32":
compile_args.append("/openmp")

setup(
ext_modules=[
Pybind11Extension(
name="prospr_core",
sources=["prospr/core/core_module.cpp"],
extra_compile_args=compile_args,
extra_link_args=link_args,
language="c++",
),
],
Expand Down
13 changes: 12 additions & 1 deletion tests/core/test_depth_first_bnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
specifics.
"""

from prospr import Protein, depth_first_bnb
from prospr import Protein, depth_first_bnb, depth_first_bnb_parallel
import pytest


Expand Down Expand Up @@ -50,3 +50,14 @@ def test_protein_3d_depth_first_bnb(self, protein_3d):
assert protein_3d.score == -4
assert protein_3d.solutions_checked == 5
assert protein_3d.aminos_placed == 49368


def test_protein_2d_depth_first_bnb_parallel(self, protein_2d):
"""
Test if parallel algorithm solution matches serial algorithm.
"""
depth_first_bnb_parallel(protein_2d)
score = protein_2d.score
protein_2d.reset()
depth_first_bnb(protein_2d)
assert score == protein_2d.score