diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a3948a..5c3a1cb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ exclude: > (?x)^( .idea/.*| )$ -default_stages: [commit] +default_stages: [pre-commit] repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.2.0 diff --git a/docs/source/api.rst b/docs/source/api.rst index cd8e562..d31fd4e 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -207,7 +207,8 @@ can be imported from the *prospr.helpers* submodule, e.g. | **export_protein**\ (*protein, path*) | Save conformation of a protein in Protein Data Bank (PDB) file format - for processing or visualization with external software such as `Mol* `_. + for processing or visualization with external software such as + `Mol* `_. | *Parameters:* | * **protein** - *Protein*: Protein object to save the hash of. | * **path** - *os.PathLike or str*: The path of the output file. diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 499178c..13b9766 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -365,6 +365,26 @@ be easily used via a direct import, as is shown below. p_2d.hash_fold() >> [1, 2, -1] +Checkpoints +------------------- +The algorithm *depth_first_bnb(protein)* supports checkpoints to resume an interrupted search +by storing the state of the protein and the algorithm to a file, after a signal +(*SIGTERM* or *SIGINT*) is received. + +.. code-block:: python + + import os + + from prospr import Protein, depth_first_bnb + + os.environ["PROSPR_CACHE_DIR"] = "/tmp/prospr" + + p_2d = Protein("HPPH") + # Will read/write /tmp/prospr/depth_first_bnb/HPPH.checkpoint + depth_first_bnb(p_2d) + print("Done.") + >>> + Visualizing conformations ------------------------- Visualizing conformations can be key to understanding how the resulting diff --git a/prospr/_version.py b/prospr/_version.py index 5cc179e..b87fc0c 100644 --- a/prospr/_version.py +++ b/prospr/_version.py @@ -1 +1 @@ -__version__ = "1.2.8" +__version__ = "1.2.9" diff --git a/prospr/core/src/depth_first_bnb.cpp b/prospr/core/src/depth_first_bnb.cpp index fba8a8e..3ddbc5e 100644 --- a/prospr/core/src/depth_first_bnb.cpp +++ b/prospr/core/src/depth_first_bnb.cpp @@ -7,14 +7,22 @@ */ #include "depth_first_bnb.hpp" - -#include +#include "utils.hpp" #include +#include +#include +#include +#include +#include #include +#include #include #include +/* Global flag for custom handling of SIGINT. */ +std::atomic COUGHT_SIGNAL{0}; + /* All possible variables required by custom pruning. */ struct prune_vars { size_t max_length; @@ -90,10 +98,176 @@ bool reach_prune(Protein *protein, int move, int best_score, return cur_score + branch_score >= best_score; } +/* If a checkpoint location is provided through the environment, attempt to + * store the checkpoint. + */ +void try_store_checkpoint(const Protein &protein, + const std::stack &dfs_stack, int move, + bool placed_amino, int best_score, int score, + const std::vector &best_hash, int iterations) { + /* Return if cache not in use. */ + auto cache_dir = get_cache_dir("depth_first_bnb", true); + if (!cache_dir) { +#ifdef PROSPR_DEBUG_STEPS + std::cout + << "[Debug depth_first_bnb] No cache directory to save checkpoint to." + << std::endl; +#endif + return; + } + + /* If cache is in use, try writing to checkpoint. */ + std::string filename = + *cache_dir + PATH_SEPARATOR + protein.get_sequence() + ".checkpoint"; +#ifdef PROSPR_DEBUG_STEPS + std::cout << "[Debug depth_first_bnb] Writing to checkpoint: " << filename + << std::endl; +#endif + std::ofstream ofs(filename); + if (!ofs) + throw std::runtime_error("Cannot open checkpoint file for writing."); + + ofs << "; prospr checkpoint for sequence " << protein.get_sequence() << "\n"; + ofs << "; Protein state:\n"; + dump_protein_state(protein, ofs); + ofs << "\n; Algorithm state:\n"; + ofs << "algorithm=depth_first_bnb\n"; + + /* Serialize stack as comma-separated list. */ + std::stack temp = dfs_stack; + std::vector stack_data; + while (!temp.empty()) { + stack_data.push_back(temp.top()); + temp.pop(); + } + std::reverse(stack_data.begin(), stack_data.end()); + ofs << "dfs_stack="; + for (size_t i = 0; i < stack_data.size(); ++i) { + if (i != 0) + ofs << ","; + ofs << stack_data[i]; + } + ofs << "\n"; + + ofs << "move=" << move << "\n"; + ofs << "placed_amino=" << placed_amino << "\n"; + ofs << "best_score=" << best_score << "\n"; + ofs << "score=" << score << "\n"; + + ofs << "best_hash="; + for (size_t i = 0; i < best_hash.size(); ++i) { + if (i != 0) + ofs << ","; + ofs << best_hash[i]; + } + ofs << "\n"; + ofs << "iterations=" << iterations << "\n"; +} + +/* If a checkpoint location is provided through the environment, attempt to load + * the checkpoint. + */ +void try_load_checkpoint(Protein &protein, std::stack &dfs_stack, + int &move, bool &placed_amino, int &best_score, + int &score, std::vector &best_hash, + int &iterations) { + /* Return if cache not in use. */ + auto cache_dir = get_cache_dir("depth_first_bnb"); + if (!cache_dir) { +#ifdef PROSPR_DEBUG_STEPS + std::cout + << "[Debug depth_first_bnb] No cache directory to load checkpoint from." + << std::endl; +#endif + return; + } + + /* If cache is in use, try loading to checkpoint. */ + std::string filename = + *cache_dir + PATH_SEPARATOR + protein.get_sequence() + ".checkpoint"; + if (!file_exists(filename)) { +#ifdef PROSPR_DEBUG_STEPS + std::cout << "[Debug depth_first_bnb] No checkpoint to load:" << filename + << std::endl; +#endif + return; + } + +#ifdef PROSPR_DEBUG_STEPS + std::cout << "[Debug depth_first_bnb] Reading from checkpoint: " << filename + << std::endl; +#endif + std::ifstream ifs(filename); + if (!ifs) + throw std::runtime_error("Cannot open checkpoint file for reading."); + + /* Load the protein state. */ + load_protein_state(protein, ifs); + + /* Read the file again for loading the algorithm state. */ + ifs.clear(); + ifs.seekg(0, std::ios::beg); + + std::string line; + while (std::getline(ifs, line)) { + std::string key; + std::string value; + if (!parse_ini_line(line, key, value)) + continue; + + if (key == "dfs_stack") { + /* Clear the stack. */ + dfs_stack = std::stack(); + std::vector stack_data; + std::stringstream ss(value); + std::string token; + while (std::getline(ss, token, ',')) { + stack_data.push_back(std::stoi(token)); + } + + /* Rebuild the stack. */ + for (int v : stack_data) + dfs_stack.push(v); + } else if (key == "algorithm" && value != "depth_first_bnb") { +#ifdef PROSPR_DEBUG_STEPS + std::cerr << "[Debug depth_first_bnb] Unexpected value for checkpoint " + "algorithm: " + << value << std::endl; +#endif + } else if (key == "move") + move = std::stoi(value); + else if (key == "placed_amino") + placed_amino = std::stoi(value); + else if (key == "best_score") + best_score = std::stoi(value); + else if (key == "score") + score = std::stoi(value); + else if (key == "best_hash") { + best_hash.clear(); + std::stringstream ss(value); + std::string token; + while (std::getline(ss, token, ',')) { + best_hash.push_back(std::stoi(token)); + } + } else if (key == "iterations") + iterations = std::stoi(value) - 1; + } +} + +/* Function to catch signals (SIGTERM, SIGINT) and store them for delayed + * handling. */ +void signal_handler(int signal) { + COUGHT_SIGNAL.store(signal, std::memory_order_relaxed); +} + /* A depth-first branch-and-bound search function for finding a minimum * energy conformation. */ void depth_first_bnb(Protein *protein, std::string prune_func) { + /* Override signal handlers. */ + void (*signal_handler_sigint)(int) = std::signal(SIGINT, signal_handler); + void (*signal_handler_sigterm)(int) = std::signal(SIGTERM, signal_handler); + protein->reset_conformation(); size_t max_length = protein->get_sequence().length(); int dim = protein->get_dim(); @@ -158,7 +332,30 @@ void depth_first_bnb(Protein *protein, std::string prune_func) { int score; std::vector best_hash; + int signal = 0; + int iterations = 0; + + /* Load intermediate solution from cache if present. */ + try_load_checkpoint(*protein, dfs_stack, move, placed_amino, best_score, + score, best_hash, iterations); +#ifdef PROSPR_DEBUG_STEPS + std::cout << "[Debug depth_first_bnb] Algorithm starting from iteration " + << iterations << "." << std::endl; +#endif + do { + /* Break if a signal was caught. */ + signal = COUGHT_SIGNAL.exchange(0); + if (signal) + break; + + iterations++; +#ifdef PROSPR_DEBUG_STEPS + std::cout << "[Debug depth_first_bnb] Paused before iteration " + << iterations << ". (Press enter to continue!) " << std::flush; + std::cin.get(); +#endif + placed_amino = false; /* Try to place the current amino acid. */ @@ -214,6 +411,16 @@ void depth_first_bnb(Protein *protein, std::string prune_func) { } } while (move != -dim - 1 || !dfs_stack.empty()); - /* Set best found conformation and return protein. */ + /* Write possible temporary solution to cache, if available. */ + try_store_checkpoint(*protein, dfs_stack, move, placed_amino, best_score, + score, best_hash, iterations); + + /* Set best found conformation. */ protein->set_hash(best_hash); + + /* Restore signal handlers and propagate caught signal. */ + std::signal(SIGINT, signal_handler_sigint); + std::signal(SIGTERM, signal_handler_sigterm); + if (signal) + std::raise(signal); } diff --git a/prospr/core/src/protein.cpp b/prospr/core/src/protein.cpp index 62eb2a3..a0be687 100644 --- a/prospr/core/src/protein.cpp +++ b/prospr/core/src/protein.cpp @@ -180,7 +180,7 @@ Protein &Protein::operator=(const Protein &other) { } /* Returns the Protein's sequence. */ -std::string Protein::get_sequence() { return sequence; } +std::string Protein::get_sequence() const { return sequence; } /* Returns the Protein's set maximum dimension. */ int Protein::get_dim() { return dim; } @@ -212,10 +212,22 @@ AminoAcid *Protein::get_amino(std::vector position) { int Protein::get_score() { return score; } /* Returns the number of checked solutions. */ -std::uint64_t Protein::get_solutions_checked() { return solutions_checked; } +std::uint64_t Protein::get_solutions_checked() const { + return solutions_checked; +} + +/* Set the number of checked solutions. */ +void Protein::_set_solutions_checked(std::uint64_t checked) { + solutions_checked = checked; +} /* Returns the number of amino acids placed. */ -std::uint64_t Protein::get_aminos_placed() { return aminos_placed; } +std::uint64_t Protein::get_aminos_placed() const { return aminos_placed; } + +/* Set the number of amino acids placed. */ +void Protein::_set_aminos_placed(std::uint64_t placed) { + aminos_placed = placed; +} /* Returns if the amino acid at the given index is weighted. */ bool Protein::is_weighted(size_t index) { @@ -340,7 +352,7 @@ void Protein::remove_amino() { } /* Hash and return the fold of the current conformation. */ -std::vector Protein::hash_fold() { +std::vector Protein::hash_fold() const { std::vector fold_hash; std::vector cur_pos(dim, 0); AminoAcid *cur_amino; @@ -383,7 +395,7 @@ void Protein::_change_score(int move, bool placed) { std::vector moves; for (int i = -dim; i <= dim; i++) { - if (i != 0 and i != -move) + if (i != 0 && i != -move) moves.push_back(i); } diff --git a/prospr/core/src/protein.hpp b/prospr/core/src/protein.hpp index 93f3513..05c052b 100644 --- a/prospr/core/src/protein.hpp +++ b/prospr/core/src/protein.hpp @@ -35,7 +35,7 @@ class Protein { Protein &operator=(const Protein &other); /* Returns the Protein's sequence. */ - std::string get_sequence(); + std::string get_sequence() const; /* Returns the Protein's set maximum dimension. */ int get_dim(); @@ -61,10 +61,10 @@ class Protein { int get_score(); /* Returns the number of performed changes. */ - std::uint64_t get_solutions_checked(); + std::uint64_t get_solutions_checked() const; /* Returns the number of amino acids placed. */ - std::uint64_t get_aminos_placed(); + std::uint64_t get_aminos_placed() const; /* Returns if the amino acid at the given index is weighted. */ bool is_weighted(size_t index); @@ -91,7 +91,7 @@ class Protein { void remove_amino(); /* Hash and return the fold of the current conformation. */ - std::vector hash_fold(); + std::vector hash_fold() const; /* Set the conformation to the given hash. */ void set_hash(std::vector fold_hash, bool track = false); @@ -114,6 +114,12 @@ class Protein { std::uint64_t solutions_checked; std::vector amino_acids; + /* Set the number of performed changes. */ + void _set_solutions_checked(std::uint64_t); + + /* Set the number of amino acids placed. */ + void _set_aminos_placed(std::uint64_t); + /* Change score according to the already performed addition or removal * of the given move. */ @@ -125,6 +131,11 @@ class Protein { std::vector> _append_bond_pairs(std::vector> pairs, std::vector pos, std::vector moves); + + /* Deserialize the state of the protein from key=value format. + * Required by the checkpointing utilities. + */ + friend void load_protein_state(Protein&, std::istream&); }; /* Overload << operator for printing Proteins. */ diff --git a/prospr/core/src/utils.cpp b/prospr/core/src/utils.cpp new file mode 100644 index 0000000..b610ef0 --- /dev/null +++ b/prospr/core/src/utils.cpp @@ -0,0 +1,188 @@ +/* File: utils.cpp + * Description: Implementation file for utility functions. + * License: This file is licensed under the GNU LGPL V3 license by + * Okke van Eck (2020 - 2023). See the LICENSE file for the + * specifics. + */ + +#include "utils.hpp" + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#define mkdir(p, m) _mkdir(p) +#include +#define stat _stat +#ifndef S_ISDIR +#define S_ISDIR(m) (((m)&_S_IFDIR) != 0) +#endif +#ifndef S_ISREG +#define S_ISREG(m) (((m)&_S_IFREG) != 0) +#endif +#else +#include +#endif + +/* Check if a file exists. + * (Not using std::filesystem due to macOS compatibility issues) + */ +bool file_exists(const std::string &path) { + struct stat info; + return (stat(path.c_str(), &info) == 0 && S_ISREG(info.st_mode)); +} + +/* Check if a dir exists. + * (Not using std::filesystem due to macOS compatibility issues) + */ +static bool dir_exists(const std::string &path) { + struct stat info; + return stat(path.c_str(), &info) == 0 && S_ISDIR(info.st_mode); +} + +/* Create directories of a path. + * (Not using std::filesystem due to macOS compatibility issues) + */ +static bool make_dirs(const std::string &path) { + /* Skip empty paths. */ + if (path.empty()) + return false; + + /* If already exists, done. */ + if (dir_exists(path)) + return true; + + /* Recursively create parent. */ + auto pos = path.find_last_of(PATH_SEPARATOR); + if (pos != std::string::npos) { + std::string parent = path.substr(0, pos); + if (!parent.empty() && !dir_exists(parent)) { + if (!make_dirs(parent)) + return false; + } + } + + /* Create this directory. */ + if (mkdir(path.c_str(), 0755) != 0 && errno != EEXIST) { + std::cerr << "Warning: mkdir failed for " << path << ": " << strerror(errno) + << "\n"; + return false; + } + return true; +} + +/* Return the path to the cache directory for a given algorithm, if the + * environment variable PROSPR_CACHE_DIR is set. (Not using std::filesystem due + * to macOS compatibility issues) + */ +std::optional get_cache_dir(const std::string &algorithm, + bool create) { + /* Load cache path from environment, if set. */ + const char *cache_dir_env = std::getenv("PROSPR_CACHE_DIR"); + if (!cache_dir_env) { + return std::nullopt; + } + + /* Check if cache path variable is empty. */ + std::string cache_dir = std::string(cache_dir_env); + trim_inplace(cache_dir); + if (cache_dir.empty()) { + return std::nullopt; + } + + /* Try to create the cache. */ + cache_dir += PATH_SEPARATOR + algorithm; + if (create) { + if (!dir_exists(cache_dir)) { + if (!make_dirs(cache_dir)) { + std::cerr << "Warning: Failed to create prospr cache directory at " + << cache_dir << "\n"; + return std::nullopt; + } + } + } + + return cache_dir; +} + +/* Remove leading and trailing whitespace from a string. */ +void trim_inplace(std::string &s) { + /* Trim first left side, then right. */ + s.erase(0, s.find_first_not_of(" \t\n\r")); + s.erase(s.find_last_not_of(" \t\n\r") + 1); +} + +/* Parse a line in INI format (key=value) which may contain comments. */ +bool parse_ini_line(std::string line, std::string &key, std::string &value) { + /* Remove comments. */ + auto pos = line.find_first_of(";#"); + if (pos != std::string::npos) + line.erase(pos); + + /* Skip empty lines. */ + trim_inplace(line); + if (line.empty()) + return false; + + /* Split into key and value. */ + pos = line.find('='); + + /* Skip invalid lines. */ + if (pos == std::string::npos) { +#ifdef PROSPR_DEBUG_STEPS + std::cerr << "[Debug parse_ini_line] Could not parse this line as INI: " + << line << std::endl; +#endif + return false; + } + + /* Parse remaining line. */ + key = line.substr(0, pos); + value = line.substr(pos + 1); + trim_inplace(key); + trim_inplace(value); + return true; +} + +/* Serialize the state of the protein to key=value format. */ +void dump_protein_state(const Protein &protein, std::ostream &out) { + out << "current_hash="; + auto current_hash = protein.hash_fold(); + for (size_t i = 0; i < current_hash.size(); ++i) { + if (i != 0) + out << ","; + out << current_hash[i]; + } + out << "\n"; + out << "aminos_placed=" << protein.get_aminos_placed() << "\n"; + out << "solutions_checked=" << protein.get_solutions_checked() << "\n"; +} + +/* Deserialize the state of the protein from key=value format. */ +void load_protein_state(Protein &protein, std::istream &in) { + std::string line; + while (std::getline(in, line)) { + std::string key; + std::string value; + + /* Continue on invalid lines. */ + if (!parse_ini_line(line, key, value)) + continue; + + /* Parse valid lines based on variable key. */ + if (key == "current_hash") { + protein.reset(); + std::stringstream ss(value); + std::string token; + while (std::getline(ss, token, ',')) { + protein.place_amino(std::stoi(token)); + } + } else if (key == "aminos_placed") + protein._set_aminos_placed(std::stoi(value)); + else if (key == "solutions_checked") + protein._set_solutions_checked(std::stoi(value)); + } +} diff --git a/prospr/core/src/utils.hpp b/prospr/core/src/utils.hpp new file mode 100644 index 0000000..fd08da9 --- /dev/null +++ b/prospr/core/src/utils.hpp @@ -0,0 +1,51 @@ +/* File: utils.hpp + * Description: Header file for utility functions. + * License: This file is licensed under the GNU LGPL V3 license by + * Okke van Eck (2020 - 2023). See the LICENSE file for the + * specifics. + */ + +#ifndef UTILS_H +#define UTILS_H + +#include "protein.hpp" + +#include +#include + +#ifdef _WIN32 +#define PATH_SEPARATOR '\\' +#else +#define PATH_SEPARATOR '/' +#endif + +/* Check if a file exists. + * (Not using std::filesystem due to macOS compatibility issues) + */ +bool file_exists(const std::string &path); + +/* Check if a dir exists. + * (Not using std::filesystem due to macOS compatibility issues) + */ +static bool dir_exists(const std::string &path); + +/* Return the path to the cache directory for a given algorithm, if the + * environment variable PROSPR_CACHE_DIR ist set. (Not using std::filesystem due + * to macOS compatibility issues) + */ +std::optional get_cache_dir(const std::string &algorithm, + bool create = false); + +/* Remove leading and trailing whitespace from a string */ +void trim_inplace(std::string &s); + +/* Parse a line in INI format (key=value) which may contain comments */ +bool parse_ini_line(std::string line, std::string &key, std::string &value); + +/* Serialize the state of the protein to key=value format */ +void dump_protein_state(const Protein &protein, std::ostream &out); + +/* Deserialize the state of the protein from key=value format */ +void load_protein_state(Protein &protein, std::istream &in); + +#endif diff --git a/prospr/core/tests/run_tests.sh b/prospr/core/tests/run_tests.sh index 40aac50..df8089a 100755 --- a/prospr/core/tests/run_tests.sh +++ b/prospr/core/tests/run_tests.sh @@ -9,7 +9,7 @@ set -e SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -CFLAGS="-o3 -g -Wall -Wextra -Wconversion -Wcast-align -std=c++11 +CFLAGS="-o3 -g -Wall -Wextra -Wconversion -Wcast-align -std=c++17 -Wunreachable-code" DEBUG="" VALGRIND="" @@ -80,7 +80,7 @@ test_depth_first_bnb() { # shellcheck disable=SC2086 c++ $CFLAGS -o test_algorithms test_algorithms.cpp ../src/beam_search.cpp \ ../src/depth_first.cpp ../src/depth_first_bnb.cpp ../src/protein.cpp \ - ../src/amino_acid.cpp + ../src/amino_acid.cpp ../src/utils.cpp echo "~ Compilation successful, running the tests.." diff --git a/prospr/datasets.py b/prospr/datasets.py index 2c93489..3f2bc7b 100644 --- a/prospr/datasets.py +++ b/prospr/datasets.py @@ -7,12 +7,13 @@ specifics. """ +from pathlib import Path import pandas as pd def _load_dataset(folder, filename): """Returns a specified dataset as a dataframe.""" - return pd.read_csv(f"prospr/data/{folder}/{filename}") + return pd.read_csv(f"{Path(__file__).parent}/data/{folder}/{filename}") def load_vanEck250(length=10): diff --git a/prospr/helpers.py b/prospr/helpers.py index cb6b31a..37aa602 100644 --- a/prospr/helpers.py +++ b/prospr/helpers.py @@ -51,7 +51,8 @@ def export_protein(protein, path): buf.write("HEADER HP-protein folding structure\n") buf.write(f"TITLE Sequence: {protein.sequence}\n") buf.write( - "REMARK Generated using prospr (https://github.com/okkevaneck/prospr)\n" + "REMARK " + + "Generated using prospr (https://github.com/okkevaneck/prospr)\n" ) # Amino acids for i, c in enumerate(coordinates): @@ -61,7 +62,7 @@ def export_protein(protein, path): buf.write(f"ATOM {i+1:5d} CA {amino_acid:>3} A{i+1:4d} ") buf.write(f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00 C\n") # Chain - buf.write(f"CONECT 1 2\n") + buf.write("CONECT 1 2\n") for i in range(2, len(coordinates)): buf.write(f"CONECT {i:4d} {i-1:4d} {i+1:4d}\n") buf.write("END\n") diff --git a/pyproject.toml b/pyproject.toml index 8119ddd..7acc7f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ documentation = "https://prospr.readthedocs.io" repository = "https://github.com/okkevaneck/prospr" [tool.setuptools] -packages = ["prospr"] +packages = {find = {include = ["prospr", "prospr.*"]}} package-dir = {"prospr" = "prospr"} zip-safe = false platforms = ["Linux", "macOS", "Windows"] diff --git a/setup.py b/setup.py index d6f571f..6687296 100644 --- a/setup.py +++ b/setup.py @@ -8,17 +8,27 @@ specifics. """ +import os from setuptools import setup from pybind11.setup_helpers import Pybind11Extension, build_ext +module = Pybind11Extension( + name="prospr_core", + sources=[ + "prospr/core/core_module.cpp", + "prospr/core/src/utils.cpp", + ], + language="c++", + cxx_std=17, +) + +# Check enable debugging +# (Console output and pausing in depth_first_bnb(...)) +if os.getenv("PROSPR_DEBUG_STEPS", "0") == "1": + module.define_macros = list(module.define_macros or []) + module.define_macros.append(("PROSPR_DEBUG_STEPS", None)) setup( - ext_modules=[ - Pybind11Extension( - name="prospr_core", - sources=["prospr/core/core_module.cpp"], - language="c++", - ), - ], + ext_modules=[module], cmdclass={"build_ext": build_ext}, ) diff --git a/tests/core/test_depth_first_bnb.py b/tests/core/test_depth_first_bnb.py index 51c3af7..8d49ac6 100644 --- a/tests/core/test_depth_first_bnb.py +++ b/tests/core/test_depth_first_bnb.py @@ -8,6 +8,9 @@ specifics. """ +import os +from pathlib import Path +from tempfile import TemporaryDirectory from prospr import Protein, depth_first_bnb import pytest @@ -35,7 +38,7 @@ def test_protein_2d_depth_first_bnb_naive(self, protein_2d): def test_protein_2d_depth_first_bnb_reach(self, protein_2d): """ - Test if a 2D solutions is folded correctly using reach_prune criteria. + Test if a 2D solution is folded correctly using reach_prune criteria. """ depth_first_bnb(protein_2d, "reach_prune") assert protein_2d.score == -3 @@ -50,3 +53,46 @@ def test_protein_3d_depth_first_bnb(self, protein_3d): assert protein_3d.score == -4 assert protein_3d.solutions_checked == 5 assert protein_3d.aminos_placed == 49368 + + def test_depth_first_bnb_no_checkpoint(self, protein_3d): + """ + Test if a checkpoint is not created by default. + """ + if "PROSPR_CACHE_DIR" in os.environ: + del os.environ["PROSPR_CACHE_DIR"] + with TemporaryDirectory() as tmpdir: + assert len(list(Path(tmpdir).iterdir())) == 0 + depth_first_bnb(protein_3d) + assert len(list(Path(tmpdir).iterdir())) == 0 + # Still solves the protein correctly. + assert protein_3d.score == -4 + assert protein_3d.solutions_checked == 5 + assert protein_3d.aminos_placed == 49368 + + def test_depth_first_bnb_checkpoint(self, protein_3d): + """ + Test if a checkpoint is created correctly. + """ + # Still solves the protein correctly + def check_protein(): + assert protein_3d.score == -4 + assert protein_3d.solutions_checked == 5 + assert protein_3d.aminos_placed == 49368 + + with TemporaryDirectory() as tmpdir: + os.environ["PROSPR_CACHE_DIR"] = tmpdir + checkpoint_path = ( + Path(tmpdir) + / "depth_first_bnb" + / (protein_3d.sequence + ".checkpoint") + ) + assert not checkpoint_path.exists() + depth_first_bnb(protein_3d) + check_protein() + assert checkpoint_path.exists() + checkpoint = checkpoint_path.read_text() + protein_3d.reset() + depth_first_bnb(protein_3d) + check_protein() + assert checkpoint_path.exists() + assert checkpoint == checkpoint_path.read_text() diff --git a/tests/helpers/test_helpers.py b/tests/helpers/test_helpers.py index b4c350b..b47ed50 100644 --- a/tests/helpers/test_helpers.py +++ b/tests/helpers/test_helpers.py @@ -11,7 +11,7 @@ from tempfile import NamedTemporaryFile import pytest -from prospr import * +from prospr import Protein, export_protein, depth_first class TestHelpers: @@ -47,7 +47,8 @@ def test_export_HHPHH(self): assert pdb_lines[1].startswith("TITLE ") assert pdb_lines[2].startswith("REMARK ") - # Expect valid atoms section (Carbon with ALA/SER residue names for amino acids) + # Expect valid atoms section (Carbon with ALA/SER residue names for + # amino acids) assert all( pdb_lines[3 + i].startswith("ATOM ") for i in range(n_aminos) )