diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4a3948a..5c3a1cb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ exclude: >
(?x)^(
.idea/.*|
)$
-default_stages: [commit]
+default_stages: [pre-commit]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
diff --git a/docs/source/api.rst b/docs/source/api.rst
index cd8e562..d31fd4e 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -207,7 +207,8 @@ can be imported from the *prospr.helpers* submodule, e.g.
| **export_protein**\ (*protein, path*)
| Save conformation of a protein in Protein Data Bank (PDB) file format
- for processing or visualization with external software such as `Mol* `_.
+ for processing or visualization with external software such as
+ `Mol* `_.
| *Parameters:*
| * **protein** - *Protein*: Protein object to save the hash of.
| * **path** - *os.PathLike or str*: The path of the output file.
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 499178c..13b9766 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -365,6 +365,26 @@ be easily used via a direct import, as is shown below.
p_2d.hash_fold()
>> [1, 2, -1]
+Checkpoints
+-------------------
+The algorithm *depth_first_bnb(protein)* supports checkpoints to resume an interrupted search
+by storing the state of the protein and the algorithm to a file, after a signal
+(*SIGTERM* or *SIGINT*) is received.
+
+.. code-block:: python
+
+ import os
+
+ from prospr import Protein, depth_first_bnb
+
+ os.environ["PROSPR_CACHE_DIR"] = "/tmp/prospr"
+
+ p_2d = Protein("HPPH")
+ # Will read/write /tmp/prospr/depth_first_bnb/HPPH.checkpoint
+ depth_first_bnb(p_2d)
+ print("Done.")
+ >>>
+
Visualizing conformations
-------------------------
Visualizing conformations can be key to understanding how the resulting
diff --git a/prospr/_version.py b/prospr/_version.py
index 5cc179e..b87fc0c 100644
--- a/prospr/_version.py
+++ b/prospr/_version.py
@@ -1 +1 @@
-__version__ = "1.2.8"
+__version__ = "1.2.9"
diff --git a/prospr/core/src/depth_first_bnb.cpp b/prospr/core/src/depth_first_bnb.cpp
index fba8a8e..3ddbc5e 100644
--- a/prospr/core/src/depth_first_bnb.cpp
+++ b/prospr/core/src/depth_first_bnb.cpp
@@ -7,14 +7,22 @@
*/
#include "depth_first_bnb.hpp"
-
-#include
+#include "utils.hpp"
#include
+#include
+#include
+#include
+#include
+#include
#include
+#include
#include
#include
+/* Global flag for custom handling of SIGINT. */
+std::atomic COUGHT_SIGNAL{0};
+
/* All possible variables required by custom pruning. */
struct prune_vars {
size_t max_length;
@@ -90,10 +98,176 @@ bool reach_prune(Protein *protein, int move, int best_score,
return cur_score + branch_score >= best_score;
}
+/* If a checkpoint location is provided through the environment, attempt to
+ * store the checkpoint.
+ */
+void try_store_checkpoint(const Protein &protein,
+ const std::stack &dfs_stack, int move,
+ bool placed_amino, int best_score, int score,
+ const std::vector &best_hash, int iterations) {
+ /* Return if cache not in use. */
+ auto cache_dir = get_cache_dir("depth_first_bnb", true);
+ if (!cache_dir) {
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout
+ << "[Debug depth_first_bnb] No cache directory to save checkpoint to."
+ << std::endl;
+#endif
+ return;
+ }
+
+ /* If cache is in use, try writing to checkpoint. */
+ std::string filename =
+ *cache_dir + PATH_SEPARATOR + protein.get_sequence() + ".checkpoint";
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout << "[Debug depth_first_bnb] Writing to checkpoint: " << filename
+ << std::endl;
+#endif
+ std::ofstream ofs(filename);
+ if (!ofs)
+ throw std::runtime_error("Cannot open checkpoint file for writing.");
+
+ ofs << "; prospr checkpoint for sequence " << protein.get_sequence() << "\n";
+ ofs << "; Protein state:\n";
+ dump_protein_state(protein, ofs);
+ ofs << "\n; Algorithm state:\n";
+ ofs << "algorithm=depth_first_bnb\n";
+
+ /* Serialize stack as comma-separated list. */
+ std::stack temp = dfs_stack;
+ std::vector stack_data;
+ while (!temp.empty()) {
+ stack_data.push_back(temp.top());
+ temp.pop();
+ }
+ std::reverse(stack_data.begin(), stack_data.end());
+ ofs << "dfs_stack=";
+ for (size_t i = 0; i < stack_data.size(); ++i) {
+ if (i != 0)
+ ofs << ",";
+ ofs << stack_data[i];
+ }
+ ofs << "\n";
+
+ ofs << "move=" << move << "\n";
+ ofs << "placed_amino=" << placed_amino << "\n";
+ ofs << "best_score=" << best_score << "\n";
+ ofs << "score=" << score << "\n";
+
+ ofs << "best_hash=";
+ for (size_t i = 0; i < best_hash.size(); ++i) {
+ if (i != 0)
+ ofs << ",";
+ ofs << best_hash[i];
+ }
+ ofs << "\n";
+ ofs << "iterations=" << iterations << "\n";
+}
+
+/* If a checkpoint location is provided through the environment, attempt to load
+ * the checkpoint.
+ */
+void try_load_checkpoint(Protein &protein, std::stack &dfs_stack,
+ int &move, bool &placed_amino, int &best_score,
+ int &score, std::vector &best_hash,
+ int &iterations) {
+ /* Return if cache not in use. */
+ auto cache_dir = get_cache_dir("depth_first_bnb");
+ if (!cache_dir) {
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout
+ << "[Debug depth_first_bnb] No cache directory to load checkpoint from."
+ << std::endl;
+#endif
+ return;
+ }
+
+ /* If cache is in use, try loading to checkpoint. */
+ std::string filename =
+ *cache_dir + PATH_SEPARATOR + protein.get_sequence() + ".checkpoint";
+ if (!file_exists(filename)) {
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout << "[Debug depth_first_bnb] No checkpoint to load:" << filename
+ << std::endl;
+#endif
+ return;
+ }
+
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout << "[Debug depth_first_bnb] Reading from checkpoint: " << filename
+ << std::endl;
+#endif
+ std::ifstream ifs(filename);
+ if (!ifs)
+ throw std::runtime_error("Cannot open checkpoint file for reading.");
+
+ /* Load the protein state. */
+ load_protein_state(protein, ifs);
+
+ /* Read the file again for loading the algorithm state. */
+ ifs.clear();
+ ifs.seekg(0, std::ios::beg);
+
+ std::string line;
+ while (std::getline(ifs, line)) {
+ std::string key;
+ std::string value;
+ if (!parse_ini_line(line, key, value))
+ continue;
+
+ if (key == "dfs_stack") {
+ /* Clear the stack. */
+ dfs_stack = std::stack();
+ std::vector stack_data;
+ std::stringstream ss(value);
+ std::string token;
+ while (std::getline(ss, token, ',')) {
+ stack_data.push_back(std::stoi(token));
+ }
+
+ /* Rebuild the stack. */
+ for (int v : stack_data)
+ dfs_stack.push(v);
+ } else if (key == "algorithm" && value != "depth_first_bnb") {
+#ifdef PROSPR_DEBUG_STEPS
+ std::cerr << "[Debug depth_first_bnb] Unexpected value for checkpoint "
+ "algorithm: "
+ << value << std::endl;
+#endif
+ } else if (key == "move")
+ move = std::stoi(value);
+ else if (key == "placed_amino")
+ placed_amino = std::stoi(value);
+ else if (key == "best_score")
+ best_score = std::stoi(value);
+ else if (key == "score")
+ score = std::stoi(value);
+ else if (key == "best_hash") {
+ best_hash.clear();
+ std::stringstream ss(value);
+ std::string token;
+ while (std::getline(ss, token, ',')) {
+ best_hash.push_back(std::stoi(token));
+ }
+ } else if (key == "iterations")
+ iterations = std::stoi(value) - 1;
+ }
+}
+
+/* Function to catch signals (SIGTERM, SIGINT) and store them for delayed
+ * handling. */
+void signal_handler(int signal) {
+ COUGHT_SIGNAL.store(signal, std::memory_order_relaxed);
+}
+
/* A depth-first branch-and-bound search function for finding a minimum
* energy conformation.
*/
void depth_first_bnb(Protein *protein, std::string prune_func) {
+ /* Override signal handlers. */
+ void (*signal_handler_sigint)(int) = std::signal(SIGINT, signal_handler);
+ void (*signal_handler_sigterm)(int) = std::signal(SIGTERM, signal_handler);
+
protein->reset_conformation();
size_t max_length = protein->get_sequence().length();
int dim = protein->get_dim();
@@ -158,7 +332,30 @@ void depth_first_bnb(Protein *protein, std::string prune_func) {
int score;
std::vector best_hash;
+ int signal = 0;
+ int iterations = 0;
+
+ /* Load intermediate solution from cache if present. */
+ try_load_checkpoint(*protein, dfs_stack, move, placed_amino, best_score,
+ score, best_hash, iterations);
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout << "[Debug depth_first_bnb] Algorithm starting from iteration "
+ << iterations << "." << std::endl;
+#endif
+
do {
+ /* Break if a signal was caught. */
+ signal = COUGHT_SIGNAL.exchange(0);
+ if (signal)
+ break;
+
+ iterations++;
+#ifdef PROSPR_DEBUG_STEPS
+ std::cout << "[Debug depth_first_bnb] Paused before iteration "
+ << iterations << ". (Press enter to continue!) " << std::flush;
+ std::cin.get();
+#endif
+
placed_amino = false;
/* Try to place the current amino acid. */
@@ -214,6 +411,16 @@ void depth_first_bnb(Protein *protein, std::string prune_func) {
}
} while (move != -dim - 1 || !dfs_stack.empty());
- /* Set best found conformation and return protein. */
+ /* Write possible temporary solution to cache, if available. */
+ try_store_checkpoint(*protein, dfs_stack, move, placed_amino, best_score,
+ score, best_hash, iterations);
+
+ /* Set best found conformation. */
protein->set_hash(best_hash);
+
+ /* Restore signal handlers and propagate caught signal. */
+ std::signal(SIGINT, signal_handler_sigint);
+ std::signal(SIGTERM, signal_handler_sigterm);
+ if (signal)
+ std::raise(signal);
}
diff --git a/prospr/core/src/protein.cpp b/prospr/core/src/protein.cpp
index 62eb2a3..a0be687 100644
--- a/prospr/core/src/protein.cpp
+++ b/prospr/core/src/protein.cpp
@@ -180,7 +180,7 @@ Protein &Protein::operator=(const Protein &other) {
}
/* Returns the Protein's sequence. */
-std::string Protein::get_sequence() { return sequence; }
+std::string Protein::get_sequence() const { return sequence; }
/* Returns the Protein's set maximum dimension. */
int Protein::get_dim() { return dim; }
@@ -212,10 +212,22 @@ AminoAcid *Protein::get_amino(std::vector position) {
int Protein::get_score() { return score; }
/* Returns the number of checked solutions. */
-std::uint64_t Protein::get_solutions_checked() { return solutions_checked; }
+std::uint64_t Protein::get_solutions_checked() const {
+ return solutions_checked;
+}
+
+/* Set the number of checked solutions. */
+void Protein::_set_solutions_checked(std::uint64_t checked) {
+ solutions_checked = checked;
+}
/* Returns the number of amino acids placed. */
-std::uint64_t Protein::get_aminos_placed() { return aminos_placed; }
+std::uint64_t Protein::get_aminos_placed() const { return aminos_placed; }
+
+/* Set the number of amino acids placed. */
+void Protein::_set_aminos_placed(std::uint64_t placed) {
+ aminos_placed = placed;
+}
/* Returns if the amino acid at the given index is weighted. */
bool Protein::is_weighted(size_t index) {
@@ -340,7 +352,7 @@ void Protein::remove_amino() {
}
/* Hash and return the fold of the current conformation. */
-std::vector Protein::hash_fold() {
+std::vector Protein::hash_fold() const {
std::vector fold_hash;
std::vector cur_pos(dim, 0);
AminoAcid *cur_amino;
@@ -383,7 +395,7 @@ void Protein::_change_score(int move, bool placed) {
std::vector moves;
for (int i = -dim; i <= dim; i++) {
- if (i != 0 and i != -move)
+ if (i != 0 && i != -move)
moves.push_back(i);
}
diff --git a/prospr/core/src/protein.hpp b/prospr/core/src/protein.hpp
index 93f3513..05c052b 100644
--- a/prospr/core/src/protein.hpp
+++ b/prospr/core/src/protein.hpp
@@ -35,7 +35,7 @@ class Protein {
Protein &operator=(const Protein &other);
/* Returns the Protein's sequence. */
- std::string get_sequence();
+ std::string get_sequence() const;
/* Returns the Protein's set maximum dimension. */
int get_dim();
@@ -61,10 +61,10 @@ class Protein {
int get_score();
/* Returns the number of performed changes. */
- std::uint64_t get_solutions_checked();
+ std::uint64_t get_solutions_checked() const;
/* Returns the number of amino acids placed. */
- std::uint64_t get_aminos_placed();
+ std::uint64_t get_aminos_placed() const;
/* Returns if the amino acid at the given index is weighted. */
bool is_weighted(size_t index);
@@ -91,7 +91,7 @@ class Protein {
void remove_amino();
/* Hash and return the fold of the current conformation. */
- std::vector hash_fold();
+ std::vector hash_fold() const;
/* Set the conformation to the given hash. */
void set_hash(std::vector fold_hash, bool track = false);
@@ -114,6 +114,12 @@ class Protein {
std::uint64_t solutions_checked;
std::vector amino_acids;
+ /* Set the number of performed changes. */
+ void _set_solutions_checked(std::uint64_t);
+
+ /* Set the number of amino acids placed. */
+ void _set_aminos_placed(std::uint64_t);
+
/* Change score according to the already performed addition or removal
* of the given move.
*/
@@ -125,6 +131,11 @@ class Protein {
std::vector>
_append_bond_pairs(std::vector> pairs,
std::vector pos, std::vector moves);
+
+ /* Deserialize the state of the protein from key=value format.
+ * Required by the checkpointing utilities.
+ */
+ friend void load_protein_state(Protein&, std::istream&);
};
/* Overload << operator for printing Proteins. */
diff --git a/prospr/core/src/utils.cpp b/prospr/core/src/utils.cpp
new file mode 100644
index 0000000..b610ef0
--- /dev/null
+++ b/prospr/core/src/utils.cpp
@@ -0,0 +1,188 @@
+/* File: utils.cpp
+ * Description: Implementation file for utility functions.
+ * License: This file is licensed under the GNU LGPL V3 license by
+ * Okke van Eck (2020 - 2023). See the LICENSE file for the
+ * specifics.
+ */
+
+#include "utils.hpp"
+
+#include
+#include
+#include
+#include
+
+#ifdef _WIN32
+#include
+#define mkdir(p, m) _mkdir(p)
+#include
+#define stat _stat
+#ifndef S_ISDIR
+#define S_ISDIR(m) (((m)&_S_IFDIR) != 0)
+#endif
+#ifndef S_ISREG
+#define S_ISREG(m) (((m)&_S_IFREG) != 0)
+#endif
+#else
+#include
+#endif
+
+/* Check if a file exists.
+ * (Not using std::filesystem due to macOS compatibility issues)
+ */
+bool file_exists(const std::string &path) {
+ struct stat info;
+ return (stat(path.c_str(), &info) == 0 && S_ISREG(info.st_mode));
+}
+
+/* Check if a dir exists.
+ * (Not using std::filesystem due to macOS compatibility issues)
+ */
+static bool dir_exists(const std::string &path) {
+ struct stat info;
+ return stat(path.c_str(), &info) == 0 && S_ISDIR(info.st_mode);
+}
+
+/* Create directories of a path.
+ * (Not using std::filesystem due to macOS compatibility issues)
+ */
+static bool make_dirs(const std::string &path) {
+ /* Skip empty paths. */
+ if (path.empty())
+ return false;
+
+ /* If already exists, done. */
+ if (dir_exists(path))
+ return true;
+
+ /* Recursively create parent. */
+ auto pos = path.find_last_of(PATH_SEPARATOR);
+ if (pos != std::string::npos) {
+ std::string parent = path.substr(0, pos);
+ if (!parent.empty() && !dir_exists(parent)) {
+ if (!make_dirs(parent))
+ return false;
+ }
+ }
+
+ /* Create this directory. */
+ if (mkdir(path.c_str(), 0755) != 0 && errno != EEXIST) {
+ std::cerr << "Warning: mkdir failed for " << path << ": " << strerror(errno)
+ << "\n";
+ return false;
+ }
+ return true;
+}
+
+/* Return the path to the cache directory for a given algorithm, if the
+ * environment variable PROSPR_CACHE_DIR is set. (Not using std::filesystem due
+ * to macOS compatibility issues)
+ */
+std::optional get_cache_dir(const std::string &algorithm,
+ bool create) {
+ /* Load cache path from environment, if set. */
+ const char *cache_dir_env = std::getenv("PROSPR_CACHE_DIR");
+ if (!cache_dir_env) {
+ return std::nullopt;
+ }
+
+ /* Check if cache path variable is empty. */
+ std::string cache_dir = std::string(cache_dir_env);
+ trim_inplace(cache_dir);
+ if (cache_dir.empty()) {
+ return std::nullopt;
+ }
+
+ /* Try to create the cache. */
+ cache_dir += PATH_SEPARATOR + algorithm;
+ if (create) {
+ if (!dir_exists(cache_dir)) {
+ if (!make_dirs(cache_dir)) {
+ std::cerr << "Warning: Failed to create prospr cache directory at "
+ << cache_dir << "\n";
+ return std::nullopt;
+ }
+ }
+ }
+
+ return cache_dir;
+}
+
+/* Remove leading and trailing whitespace from a string. */
+void trim_inplace(std::string &s) {
+ /* Trim first left side, then right. */
+ s.erase(0, s.find_first_not_of(" \t\n\r"));
+ s.erase(s.find_last_not_of(" \t\n\r") + 1);
+}
+
+/* Parse a line in INI format (key=value) which may contain comments. */
+bool parse_ini_line(std::string line, std::string &key, std::string &value) {
+ /* Remove comments. */
+ auto pos = line.find_first_of(";#");
+ if (pos != std::string::npos)
+ line.erase(pos);
+
+ /* Skip empty lines. */
+ trim_inplace(line);
+ if (line.empty())
+ return false;
+
+ /* Split into key and value. */
+ pos = line.find('=');
+
+ /* Skip invalid lines. */
+ if (pos == std::string::npos) {
+#ifdef PROSPR_DEBUG_STEPS
+ std::cerr << "[Debug parse_ini_line] Could not parse this line as INI: "
+ << line << std::endl;
+#endif
+ return false;
+ }
+
+ /* Parse remaining line. */
+ key = line.substr(0, pos);
+ value = line.substr(pos + 1);
+ trim_inplace(key);
+ trim_inplace(value);
+ return true;
+}
+
+/* Serialize the state of the protein to key=value format. */
+void dump_protein_state(const Protein &protein, std::ostream &out) {
+ out << "current_hash=";
+ auto current_hash = protein.hash_fold();
+ for (size_t i = 0; i < current_hash.size(); ++i) {
+ if (i != 0)
+ out << ",";
+ out << current_hash[i];
+ }
+ out << "\n";
+ out << "aminos_placed=" << protein.get_aminos_placed() << "\n";
+ out << "solutions_checked=" << protein.get_solutions_checked() << "\n";
+}
+
+/* Deserialize the state of the protein from key=value format. */
+void load_protein_state(Protein &protein, std::istream &in) {
+ std::string line;
+ while (std::getline(in, line)) {
+ std::string key;
+ std::string value;
+
+ /* Continue on invalid lines. */
+ if (!parse_ini_line(line, key, value))
+ continue;
+
+ /* Parse valid lines based on variable key. */
+ if (key == "current_hash") {
+ protein.reset();
+ std::stringstream ss(value);
+ std::string token;
+ while (std::getline(ss, token, ',')) {
+ protein.place_amino(std::stoi(token));
+ }
+ } else if (key == "aminos_placed")
+ protein._set_aminos_placed(std::stoi(value));
+ else if (key == "solutions_checked")
+ protein._set_solutions_checked(std::stoi(value));
+ }
+}
diff --git a/prospr/core/src/utils.hpp b/prospr/core/src/utils.hpp
new file mode 100644
index 0000000..fd08da9
--- /dev/null
+++ b/prospr/core/src/utils.hpp
@@ -0,0 +1,51 @@
+/* File: utils.hpp
+ * Description: Header file for utility functions.
+ * License: This file is licensed under the GNU LGPL V3 license by
+ * Okke van Eck (2020 - 2023). See the LICENSE file for the
+ * specifics.
+ */
+
+#ifndef UTILS_H
+#define UTILS_H
+
+#include "protein.hpp"
+
+#include
+#include
+
+#ifdef _WIN32
+#define PATH_SEPARATOR '\\'
+#else
+#define PATH_SEPARATOR '/'
+#endif
+
+/* Check if a file exists.
+ * (Not using std::filesystem due to macOS compatibility issues)
+ */
+bool file_exists(const std::string &path);
+
+/* Check if a dir exists.
+ * (Not using std::filesystem due to macOS compatibility issues)
+ */
+static bool dir_exists(const std::string &path);
+
+/* Return the path to the cache directory for a given algorithm, if the
+ * environment variable PROSPR_CACHE_DIR ist set. (Not using std::filesystem due
+ * to macOS compatibility issues)
+ */
+std::optional get_cache_dir(const std::string &algorithm,
+ bool create = false);
+
+/* Remove leading and trailing whitespace from a string */
+void trim_inplace(std::string &s);
+
+/* Parse a line in INI format (key=value) which may contain comments */
+bool parse_ini_line(std::string line, std::string &key, std::string &value);
+
+/* Serialize the state of the protein to key=value format */
+void dump_protein_state(const Protein &protein, std::ostream &out);
+
+/* Deserialize the state of the protein from key=value format */
+void load_protein_state(Protein &protein, std::istream &in);
+
+#endif
diff --git a/prospr/core/tests/run_tests.sh b/prospr/core/tests/run_tests.sh
index 40aac50..df8089a 100755
--- a/prospr/core/tests/run_tests.sh
+++ b/prospr/core/tests/run_tests.sh
@@ -9,7 +9,7 @@
set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-CFLAGS="-o3 -g -Wall -Wextra -Wconversion -Wcast-align -std=c++11
+CFLAGS="-o3 -g -Wall -Wextra -Wconversion -Wcast-align -std=c++17
-Wunreachable-code"
DEBUG=""
VALGRIND=""
@@ -80,7 +80,7 @@ test_depth_first_bnb() {
# shellcheck disable=SC2086
c++ $CFLAGS -o test_algorithms test_algorithms.cpp ../src/beam_search.cpp \
../src/depth_first.cpp ../src/depth_first_bnb.cpp ../src/protein.cpp \
- ../src/amino_acid.cpp
+ ../src/amino_acid.cpp ../src/utils.cpp
echo "~ Compilation successful, running the tests.."
diff --git a/prospr/datasets.py b/prospr/datasets.py
index 2c93489..3f2bc7b 100644
--- a/prospr/datasets.py
+++ b/prospr/datasets.py
@@ -7,12 +7,13 @@
specifics.
"""
+from pathlib import Path
import pandas as pd
def _load_dataset(folder, filename):
"""Returns a specified dataset as a dataframe."""
- return pd.read_csv(f"prospr/data/{folder}/{filename}")
+ return pd.read_csv(f"{Path(__file__).parent}/data/{folder}/{filename}")
def load_vanEck250(length=10):
diff --git a/prospr/helpers.py b/prospr/helpers.py
index cb6b31a..37aa602 100644
--- a/prospr/helpers.py
+++ b/prospr/helpers.py
@@ -51,7 +51,8 @@ def export_protein(protein, path):
buf.write("HEADER HP-protein folding structure\n")
buf.write(f"TITLE Sequence: {protein.sequence}\n")
buf.write(
- "REMARK Generated using prospr (https://github.com/okkevaneck/prospr)\n"
+ "REMARK "
+ + "Generated using prospr (https://github.com/okkevaneck/prospr)\n"
)
# Amino acids
for i, c in enumerate(coordinates):
@@ -61,7 +62,7 @@ def export_protein(protein, path):
buf.write(f"ATOM {i+1:5d} CA {amino_acid:>3} A{i+1:4d} ")
buf.write(f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00 C\n")
# Chain
- buf.write(f"CONECT 1 2\n")
+ buf.write("CONECT 1 2\n")
for i in range(2, len(coordinates)):
buf.write(f"CONECT {i:4d} {i-1:4d} {i+1:4d}\n")
buf.write("END\n")
diff --git a/pyproject.toml b/pyproject.toml
index 8119ddd..7acc7f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ documentation = "https://prospr.readthedocs.io"
repository = "https://github.com/okkevaneck/prospr"
[tool.setuptools]
-packages = ["prospr"]
+packages = {find = {include = ["prospr", "prospr.*"]}}
package-dir = {"prospr" = "prospr"}
zip-safe = false
platforms = ["Linux", "macOS", "Windows"]
diff --git a/setup.py b/setup.py
index d6f571f..6687296 100644
--- a/setup.py
+++ b/setup.py
@@ -8,17 +8,27 @@
specifics.
"""
+import os
from setuptools import setup
from pybind11.setup_helpers import Pybind11Extension, build_ext
+module = Pybind11Extension(
+ name="prospr_core",
+ sources=[
+ "prospr/core/core_module.cpp",
+ "prospr/core/src/utils.cpp",
+ ],
+ language="c++",
+ cxx_std=17,
+)
+
+# Check enable debugging
+# (Console output and pausing in depth_first_bnb(...))
+if os.getenv("PROSPR_DEBUG_STEPS", "0") == "1":
+ module.define_macros = list(module.define_macros or [])
+ module.define_macros.append(("PROSPR_DEBUG_STEPS", None))
setup(
- ext_modules=[
- Pybind11Extension(
- name="prospr_core",
- sources=["prospr/core/core_module.cpp"],
- language="c++",
- ),
- ],
+ ext_modules=[module],
cmdclass={"build_ext": build_ext},
)
diff --git a/tests/core/test_depth_first_bnb.py b/tests/core/test_depth_first_bnb.py
index 51c3af7..8d49ac6 100644
--- a/tests/core/test_depth_first_bnb.py
+++ b/tests/core/test_depth_first_bnb.py
@@ -8,6 +8,9 @@
specifics.
"""
+import os
+from pathlib import Path
+from tempfile import TemporaryDirectory
from prospr import Protein, depth_first_bnb
import pytest
@@ -35,7 +38,7 @@ def test_protein_2d_depth_first_bnb_naive(self, protein_2d):
def test_protein_2d_depth_first_bnb_reach(self, protein_2d):
"""
- Test if a 2D solutions is folded correctly using reach_prune criteria.
+ Test if a 2D solution is folded correctly using reach_prune criteria.
"""
depth_first_bnb(protein_2d, "reach_prune")
assert protein_2d.score == -3
@@ -50,3 +53,46 @@ def test_protein_3d_depth_first_bnb(self, protein_3d):
assert protein_3d.score == -4
assert protein_3d.solutions_checked == 5
assert protein_3d.aminos_placed == 49368
+
+ def test_depth_first_bnb_no_checkpoint(self, protein_3d):
+ """
+ Test if a checkpoint is not created by default.
+ """
+ if "PROSPR_CACHE_DIR" in os.environ:
+ del os.environ["PROSPR_CACHE_DIR"]
+ with TemporaryDirectory() as tmpdir:
+ assert len(list(Path(tmpdir).iterdir())) == 0
+ depth_first_bnb(protein_3d)
+ assert len(list(Path(tmpdir).iterdir())) == 0
+ # Still solves the protein correctly.
+ assert protein_3d.score == -4
+ assert protein_3d.solutions_checked == 5
+ assert protein_3d.aminos_placed == 49368
+
+ def test_depth_first_bnb_checkpoint(self, protein_3d):
+ """
+ Test if a checkpoint is created correctly.
+ """
+ # Still solves the protein correctly
+ def check_protein():
+ assert protein_3d.score == -4
+ assert protein_3d.solutions_checked == 5
+ assert protein_3d.aminos_placed == 49368
+
+ with TemporaryDirectory() as tmpdir:
+ os.environ["PROSPR_CACHE_DIR"] = tmpdir
+ checkpoint_path = (
+ Path(tmpdir)
+ / "depth_first_bnb"
+ / (protein_3d.sequence + ".checkpoint")
+ )
+ assert not checkpoint_path.exists()
+ depth_first_bnb(protein_3d)
+ check_protein()
+ assert checkpoint_path.exists()
+ checkpoint = checkpoint_path.read_text()
+ protein_3d.reset()
+ depth_first_bnb(protein_3d)
+ check_protein()
+ assert checkpoint_path.exists()
+ assert checkpoint == checkpoint_path.read_text()
diff --git a/tests/helpers/test_helpers.py b/tests/helpers/test_helpers.py
index b4c350b..b47ed50 100644
--- a/tests/helpers/test_helpers.py
+++ b/tests/helpers/test_helpers.py
@@ -11,7 +11,7 @@
from tempfile import NamedTemporaryFile
import pytest
-from prospr import *
+from prospr import Protein, export_protein, depth_first
class TestHelpers:
@@ -47,7 +47,8 @@ def test_export_HHPHH(self):
assert pdb_lines[1].startswith("TITLE ")
assert pdb_lines[2].startswith("REMARK ")
- # Expect valid atoms section (Carbon with ALA/SER residue names for amino acids)
+ # Expect valid atoms section (Carbon with ALA/SER residue names for
+ # amino acids)
assert all(
pdb_lines[3 + i].startswith("ATOM ") for i in range(n_aminos)
)