diff --git a/include/migr_traversal.h b/include/migr_traversal.h new file mode 100644 index 0000000..b01ef46 --- /dev/null +++ b/include/migr_traversal.h @@ -0,0 +1,99 @@ +#ifndef MIGR_TRAVERSAL_H +#define MIGR_TRAVERSAL_H + +#include "migr.h" +#include "migr_semantic.h" + +/* +Rules: +- class and struct will be named in PascalCase +- class member functions and members will be named in snake_case +*/ + +enum class TraversalType { + DFS, + BFS, +}; + +enum class TraversalDirection { + FORWARD, // structural: children, semantic: outgoing edges + BACKWARD, // structural: parent, semantic: incoming edges + BIDIRECTIONAL, // both +}; + +/* + * A struct to represent path between any two connected nodes + * with full edge context + */ +struct TraversalPath { + std::vector> nodes; + std::vector edges; + int total_depth; + double path_weight; // NOTE: will not implemented in this iteration; for + // future weighted traversals (heuristics) + + TraversalPath() : total_depth(0), path_weight(0.0) {} +}; + +/* + * A layer-agnostic traversal implementation interface + * for all MIGRGraphLayer interface. + * Currently proviedes DFS/BFS with filtering, visiting and transformation + * capabilities meanwhile maintaining MIGR's pure IR nature. + */ +class MIGRTraversal { +public: + explicit MIGRTraversal(MIGRGraphLayer &layer); + +private: + MIGRGraphLayer &layer_; + + /* Internal Traversal Engines */ + std::vector> + dfs_collect(const std::vector> &starts, + std::function predicate, int max_depth, + TraversalDirection direction) const; + + std::vector> + bfs_collect(const std::vector> &starts, + std::function predicate, int max_depth, + TraversalDirection direction) const; + + bool + dfs_visit(const std::vector> &starts, + std::function, int depth)> visitor, + int max_depth, TraversalDirection direction) const; + + bool + bfs_visit(const std::vector> &starts, + std::function, int depth)> visitor, + int max_depth, TraversalDirection direction) const; + + /* Internal Transform Engine */ + std::vector> dfs_transform( + const std::vector> &starts, + std::function(std::shared_ptr, + int depth)> + transformer, + int max_depth, TraversalDirection direction) const; + + std::vector> bfs_transform( + const std::vector> &starts, + std::function(std::shared_ptr, + int depth)> + transformer, + int max_depth, TraversalDirection direction) const; + + /* Neighbour Helpers */ + std::vector> + get_neighbours(const std::shared_ptr &node, + TraversalDirection direction) const; + + std::vector> + get_forward_neighbours(const std::shared_ptr &node) const; + + std::vector> + get_backward_neighbours(const std::shared_ptr &node) const; +}; + +#endif //! MIGR_TRAVERSAL_H diff --git a/src/migr_traversal.cpp b/src/migr_traversal.cpp new file mode 100644 index 0000000..5377c70 --- /dev/null +++ b/src/migr_traversal.cpp @@ -0,0 +1,408 @@ +#include "migr_traversal.h" +#include "globals.h" +#include +#include + +MIGRTraversal::MIGRTraversal(MIGRGraphLayer &layer) : layer_(layer) { + _V_ << " [MIGRTraversal] Traversal Interface Initialized." << std::endl; +} + +//-----------------------------------// +// Internal Traversal Engines // +//-----------------------------------// + +/* + * Performs a depth-first search (DFS) to collect nodes starting from the + * given start nodes. + * Nodes are visited iteratively using a stack. + * The traversal respects a max depth and traversal + * direction (forward, backward, or bidirectional). + * + * Each visited node is tested against the provided predicate function; if + * the predicate returns true, the node is added to result vector. + */ +std::vector> +MIGRTraversal::dfs_collect(const std::vector> &starts, + std::function predicate, + int max_depth, TraversalDirection direction) const { + std::vector> results; + std::unordered_set visited; + std::stack, int>> st; // [node : depth] + + // stack -> start nodes, with depth 0 + for (const auto &start : starts) { + if (start) { + st.push({start, 0}); + } + } + + while (!st.empty()) { + auto [node, depth] = st.top(); + st.pop(); + + if (!node || visited.count(node->id_)) { + continue; + } + + if (max_depth >= 0 && depth > max_depth) { + continue; + } + + visited.insert(node->id_); + + if (predicate(*node)) { + results.push_back(node); + } + + /* pushing neighbours (in reverse order) */ + auto neighbours = get_neighbours(node, direction); + for (auto it = neighbours.rbegin(); it != neighbours.rend(); ++it) { + if (*it && !visited.count((*it)->id_)) { + st.push({*it, depth + 1}); + } + } + } + _V_ << " [MIGRTraversal] DFS collected " << results.size() << " nodes" + << std::endl; + return results; +} + +/* + * Performs a breadth-first search (BFS) to collect nodes starting from the + * given start nodes. + * Nodes are visited iteratively using a queue. + * The traversal respects a max depth and traversal + * direction (forward, backward, or bidirectional). + * + * Each visited node is tested against the provided predicate function; if + * the predicate returns true, the node is added to result vector. + */ +std::vector> +MIGRTraversal::bfs_collect(const std::vector> &starts, + std::function predicate, + int max_depth, TraversalDirection direction) const { + std::vector> results; + std::unordered_set visited; + std::queue, int>> qu; // [node : depth] + + for (const auto &start : starts) { + if (start) { + qu.push({start, 0}); + } + } + + while (!qu.empty()) { + auto [node, depth] = qu.front(); + qu.pop(); + + if (!node || visited.count(node->id_)) { + continue; + } + + if (max_depth >= 0 && depth > max_depth) { + continue; + } + + visited.insert(node->id_); + + if (predicate(*node)) { + results.push_back(node); + } + + auto neighbours = get_neighbours(node, direction); + for (const auto &neighbour : neighbours) { + if (neighbour && !visited.count(neighbour->id_)) { + qu.push({neighbour, depth + 1}); + } + } + } + _V_ << " [MIGRTraversal] BFS collected " << results.size() << " nodes" + << std::endl; + return results; +} + +/* + * Performs an iterative depth-first search (DFS) traversal to visit nodes + * according to the visitor function Starting from the provided nodes and + * max depth. + * + * If visitor returns false at some point, then it's early termination and it + * returns false in that case, otherwise if traversal completes then returns + * true. + */ +bool MIGRTraversal::dfs_visit( + const std::vector> &starts, + std::function, int depth)> visitor, + int max_depth, TraversalDirection direction) const { + std::unordered_set visited; + std::stack, int>> st; + + for (const auto &start : starts) { + if (start) { + st.push({start, 0}); + } + } + + while (!st.empty()) { + auto [node, depth] = st.top(); + st.pop(); + + if (!node || visited.count(node->id_)) { + continue; + } + + if (max_depth >= 0 && depth > max_depth) { + continue; + } + + visited.insert(node->id_); + + if (!visitor(node, depth)) { + _V_ << " [MIGRTraversal] DFS visit terminated early by visitor" + << std::endl; + return false; + } + + auto neighbours = get_neighbours(node, direction); + for (auto it = neighbours.rbegin(); it != neighbours.rend(); ++it) { + if (*it && !visited.count((*it)->id_)) { + st.push({*it, depth + 1}); + } + } + } + + return true; +} + +/* + * Performs an iterative breadth-first search (BFS) traversal to visit nodes + * according to the visitor function Starting from the provided nodes and + * max depth. + * + * If visitor returns false at some point, then it's early termination and it + * returns false in that case, otherwise if traversal completes then returns + * true. + */ +bool MIGRTraversal::bfs_visit( + const std::vector> &starts, + std::function, int depth)> visitor, + int max_depth, TraversalDirection direction) const { + std::unordered_set visited; + std::queue, int>> qu; + + for (const auto &start : starts) { + if (start) { + qu.push({start, 0}); + } + } + + while (!qu.empty()) { + auto [node, depth] = qu.front(); + qu.pop(); + + if (!node || visited.count(node->id_)) { + continue; + } + + if (max_depth >= 0 && depth > max_depth) { + continue; + } + + visited.insert(node->id_); + + if (!visitor(node, depth)) { + _V_ << " [MIGRTraversal] BFS visit terminated early by visitor" + << std::endl; + return false; + } + + auto neighbours = get_neighbours(node, direction); + for (const auto &neighbour : neighbours) { + if (neighbour && !visited.count(neighbour->id_)) { + qu.push({neighbour, depth + 1}); + } + } + } + return true; +} + +//-----------------------------// +// Internal Transform Engine // +//-----------------------------// + +/* + * Performs a depth-first search (DFS) traversal and applies the transformation + * function to each visited node. The transformer function receives the + * current node and depth, and returns a possibly transformed node according to + * the tranformer function to be include in results vector. + * + * traversal respects max depth and direction. Transformed + * nodes are collected into a result vector. + */ +std::vector> MIGRTraversal::dfs_transform( + const std::vector> &starts, + std::function(std::shared_ptr, + int depth)> + transformer, + int max_depth, TraversalDirection direction) const { + std::vector> results; + std::unordered_set visited; + std::stack, int>> st; + + for (const auto &start : starts) { + if (start) { + st.push({start, 0}); + } + } + + while (!st.empty()) { + auto [node, depth] = st.top(); + st.pop(); + + if (!node || visited.count(node->id_)) { + continue; + } + + if (max_depth >= 0 && depth > max_depth) { + continue; + } + + visited.insert(node->id_); + + auto transformed = transformer(node, depth); + if (transformed) { + results.push_back(transformed); + } + auto neighbours = get_neighbours(node, direction); + for (auto it = neighbours.rbegin(); it != neighbours.rend(); ++it) { + if (*it && !visited.count((*it)->id_)) { + st.push({*it, depth + 1}); + } + } + } + _V_ << " [MIGRTraversal] DFS transformed " << results.size() << " nodes" + << std::endl; + return results; +} + +/* + * Performs a breadth-first search (BFS) traversal and applies the + * transformation function to each visited node. The transformer function + * receives the current node and depth, and returns a possibly transformed node + * according to the tranformer function to be include in results vector. + * + * traversal respects max depth and direction. Transformed + * nodes are collected into a result vector. + */ +std::vector> MIGRTraversal::bfs_transform( + const std::vector> &starts, + std::function(std::shared_ptr, + int depth)> + transformer, + int max_depth, TraversalDirection direction) const { + std::vector> results; + std::unordered_set visited; + std::queue, int>> qu; + + for (const auto &start : starts) { + if (start) { + qu.push({start, 0}); + } + } + + while (!qu.empty()) { + auto [node, depth] = qu.front(); + qu.pop(); + + if (!node || visited.count(node->id_)) { + continue; + } + + if (max_depth >= 0 && depth > max_depth) { + continue; + } + + visited.insert(node->id_); + + auto transformed = transformer(node, depth); + if (transformed) { + results.push_back(transformed); + } + + auto neighbours = get_neighbours(node, direction); + for (const auto &neighbour : neighbours) { + if (neighbour && !visited.count(neighbour->id_)) { + qu.push({neighbour, depth + 1}); + } + } + } + _V_ << " [MIGRTraversal] BFS transformed " << results.size() << " nodes" + << std::endl; + return results; +} + +//----------------------// +// Neighbour Helpers // +//----------------------// + +/* + * Returns neighbors of a node based on the specified traversal direction. + * (forward, backward, or bidirectional) + */ +std::vector> +MIGRTraversal::get_neighbours(const std::shared_ptr &node, + TraversalDirection direction) const { + if (!node) { + return {}; + } + + if (direction == TraversalDirection::FORWARD) { + return get_forward_neighbours(node); + } else if (direction == TraversalDirection::BACKWARD) { + return get_backward_neighbours(node); + } else { + // both.., + auto forward = get_forward_neighbours(node); + auto backward = get_backward_neighbours(node); + forward.insert(forward.end(), backward.begin(), backward.end()); + return forward; + } +} + +/* + * Gets forward neighbors of a node. + * - for semantic layers, returns outgoing semantic targets. + * - for structural layers, returns children nodes. + */ +std::vector> MIGRTraversal::get_forward_neighbours( + const std::shared_ptr &node) const { + /* seeing if semantic layer */ + if (SemanticLayer *semantic = dynamic_cast(&layer_)) { + return semantic->get_semantic_targets(node->id_); + } + + /* otherwise return structural children */ + return node->children_; +} + +/* + * Gets backward neighbors of a node. + * - for semantic layers, returns incoming semantic sources. + * - for structural layers, returns the parent node if present. + */ +std::vector> MIGRTraversal::get_backward_neighbours( + const std::shared_ptr &node) const { + std::vector> neighbours; + + /* seeing if semantic layer */ + if (SemanticLayer *semantic = dynamic_cast(&layer_)) { + return semantic->get_semantic_sources(node->id_); + } + + /* if structural get parent if there */ + if (auto parent = node->parent_.lock()) { + neighbours.push_back(parent); + } + + return neighbours; +}