Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/config/server.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,18 @@ Data Access Settings
- float
- ``2.0``
- Timeout for sampling topics with active publishers. Range: 0.1-30.0.
* - ``parameter_service_timeout_sec``
- float
- ``2.0``
- Timeout for ROS 2 parameter service calls (configurations endpoint).
Nodes without parameter service (e.g., micro-ROS bridges) block for this
duration before returning SERVICE_UNAVAILABLE. Range: 0.1-10.0.
* - ``parameter_service_negative_cache_sec``
- float
- ``60.0``
- After a node's parameter service fails to respond, subsequent requests
return immediately with SERVICE_UNAVAILABLE for this duration.
Set to 0 to disable. Range: 0-3600.

.. note::

Expand Down
14 changes: 14 additions & 0 deletions src/ros2_medkit_gateway/config/gateway_params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,20 @@ ros2_medkit_gateway:
# Valid range: 0.1-30.0
topic_sample_timeout_sec: 2.0

# Timeout (in seconds) for ROS 2 parameter service calls (configurations endpoint).
# Nodes without parameter service (e.g., micro-ROS bridges) will block for this
# duration before returning SERVICE_UNAVAILABLE. Lower values improve responsiveness
# on systems with mixed node types.
# Valid range: 0.1-10.0
parameter_service_timeout_sec: 2.0

# Negative cache TTL (in seconds) for parameter service unavailability.
# After a node's parameter service fails to respond, subsequent requests return
# immediately with SERVICE_UNAVAILABLE for this duration instead of blocking.
# Set to 0 to disable negative caching.
# Valid range: 0-3600
parameter_service_negative_cache_sec: 60.0

# Fault Manager integration
fault_manager:
# Namespace prefix used to resolve Fault Manager services.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@

#pragma once

#include <atomic>
#include <chrono>
#include <map>
#include <memory>
#include <mutex>
#include <nlohmann/json.hpp>
#include <rclcpp/rclcpp.hpp>
#include <shared_mutex>
#include <string>
#include <unordered_map>
#include <vector>

namespace ros2_medkit_gateway {
Expand All @@ -36,6 +40,7 @@ enum class ParameterErrorCode {
TYPE_MISMATCH, ///< Value type doesn't match parameter type
INVALID_VALUE, ///< Invalid value for parameter
NO_DEFAULTS_CACHED, ///< No default values cached for reset operation
SHUT_DOWN, ///< ConfigurationManager has been shut down
INTERNAL_ERROR ///< Internal/unexpected error
};

Expand All @@ -53,6 +58,12 @@ struct ParameterResult {
class ConfigurationManager {
public:
explicit ConfigurationManager(rclcpp::Node * node);
~ConfigurationManager();

/// Clean up shared param node and cached clients before ROS 2 context shutdown.
/// Must be called before rclcpp::shutdown() to prevent use-after-free.
/// Idempotent - safe to call multiple times.
void shutdown();

/// List all parameters for a node
/// @param node_name Fully qualified node name (e.g., "/powertrain/engine/engine_temp_sensor")
Expand Down Expand Up @@ -84,12 +95,31 @@ class ConfigurationManager {
ParameterResult reset_all_parameters(const std::string & node_name);

private:
/// Get or create a SyncParametersClient for the given node
/// Get or create a cached SyncParametersClient for the given node.
std::shared_ptr<rclcpp::SyncParametersClient> get_param_client(const std::string & node_name);

/// Cache default values for a node (called on first access)
/// @pre spin_mutex_ must be held by the caller
void cache_default_values(const std::string & node_name);

/// Check if a node is in the negative cache (recently unavailable)
bool is_node_unavailable(const std::string & node_name) const;

/// Mark a node as unavailable in the negative cache
void mark_node_unavailable(const std::string & node_name);

/// Check if node_name is the gateway's own node (self-query guard)
bool is_self_node(const std::string & node_name) const;

/// List parameters for the gateway's own node (direct access, no IPC)
ParameterResult list_own_parameters();

/// Get a specific parameter from the gateway's own node
ParameterResult get_own_parameter(const std::string & param_name);

/// Return SHUT_DOWN error result
static ParameterResult shut_down_result();

/// Convert ROS2 parameter type to string
static std::string parameter_type_to_string(rclcpp::ParameterType type);

Expand All @@ -104,28 +134,48 @@ class ConfigurationManager {

rclcpp::Node * node_;

/// Internal node for parameter client operations
/// SyncParametersClient requires a node that is NOT in an executor
/// to perform synchronous operations (spinning internally)
std::shared_ptr<rclcpp::Node> param_node_;

/// Timeout for waiting for parameter services (configurable via parameter_service_timeout_sec parameter)
double service_timeout_sec_{2.0};

/// Cache of parameter clients per node (avoids recreating clients)
mutable std::mutex clients_mutex_;
std::map<std::string, std::shared_ptr<rclcpp::SyncParametersClient>> param_clients_;
/// Negative cache TTL (configurable via parameter_service_negative_cache_sec parameter)
double negative_cache_ttl_sec_{60.0};

/// Gateway's own fully qualified node name (for self-query detection)
std::string own_node_fqn_;

/// Shutdown flag - prevents use-after-free on param_node_ after shutdown
std::atomic<bool> shutdown_{false};

/// Negative cache: nodes whose parameter service was recently unavailable.
/// Avoids repeated blocking waits on nodes that don't have parameter services.
mutable std::shared_mutex negative_cache_mutex_;
std::unordered_map<std::string, std::chrono::steady_clock::time_point> unavailable_nodes_;

/// Cache of default parameter values per node
/// Key: node_name, Value: map of param_name -> Parameter
mutable std::mutex defaults_mutex_;
std::map<std::string, std::map<std::string, rclcpp::Parameter>> default_values_;

/// Mutex to serialize all parameter operations.
/// SyncParametersClient internally spins param_node_ via spin_node_until_future_complete(),
/// which is not thread-safe. Concurrent HTTP requests would cause:
/// "Node '/_param_client_node' has already been added to an executor"
mutable std::mutex param_operations_mutex_;
/// Mutex to serialize spin operations on param_node_.
/// SyncParametersClient::wait_for_service/get_parameters/etc spin param_node_
/// internally via spin_node_until_future_complete which is NOT thread-safe.
/// This mutex is ONLY held during the actual ROS 2 IPC call, not during
/// cache lookups or JSON building. With negative cache + self-guard,
/// most requests never touch this mutex.
/// Declared BEFORE param_node_ so it outlives the node during destruction.
mutable std::mutex spin_mutex_;

/// Internal node for parameter client operations.
/// Created once in constructor - must be in DDS graph early for fast service discovery.
/// SyncParametersClient requires a node NOT in an executor to spin internally.
std::shared_ptr<rclcpp::Node> param_node_;

/// Cached SyncParametersClient per target node (avoids recreating clients)
mutable std::mutex clients_mutex_;
std::map<std::string, std::shared_ptr<rclcpp::SyncParametersClient>> param_clients_;

/// Maximum entries in negative cache before hard eviction
static constexpr size_t kMaxNegativeCacheSize = 500;
};

} // namespace ros2_medkit_gateway
Loading
Loading