diff --git a/docs/config/server.rst b/docs/config/server.rst index a097fd17..96ad372a 100644 --- a/docs/config/server.rst +++ b/docs/config/server.rst @@ -154,6 +154,18 @@ Data Access Settings - float - ``2.0`` - Timeout for sampling topics with active publishers. Range: 0.1-30.0. + * - ``parameter_service_timeout_sec`` + - float + - ``2.0`` + - Timeout for ROS 2 parameter service calls (configurations endpoint). + Nodes without parameter service (e.g., micro-ROS bridges) block for this + duration before returning SERVICE_UNAVAILABLE. Range: 0.1-10.0. + * - ``parameter_service_negative_cache_sec`` + - float + - ``60.0`` + - After a node's parameter service fails to respond, subsequent requests + return immediately with SERVICE_UNAVAILABLE for this duration. + Set to 0 to disable. Range: 0-3600. .. note:: diff --git a/src/ros2_medkit_gateway/config/gateway_params.yaml b/src/ros2_medkit_gateway/config/gateway_params.yaml index c8a1ee98..3016c67f 100644 --- a/src/ros2_medkit_gateway/config/gateway_params.yaml +++ b/src/ros2_medkit_gateway/config/gateway_params.yaml @@ -91,6 +91,20 @@ ros2_medkit_gateway: # Valid range: 0.1-30.0 topic_sample_timeout_sec: 2.0 + # Timeout (in seconds) for ROS 2 parameter service calls (configurations endpoint). + # Nodes without parameter service (e.g., micro-ROS bridges) will block for this + # duration before returning SERVICE_UNAVAILABLE. Lower values improve responsiveness + # on systems with mixed node types. + # Valid range: 0.1-10.0 + parameter_service_timeout_sec: 2.0 + + # Negative cache TTL (in seconds) for parameter service unavailability. + # After a node's parameter service fails to respond, subsequent requests return + # immediately with SERVICE_UNAVAILABLE for this duration instead of blocking. + # Set to 0 to disable negative caching. + # Valid range: 0-3600 + parameter_service_negative_cache_sec: 60.0 + # Fault Manager integration fault_manager: # Namespace prefix used to resolve Fault Manager services. diff --git a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/configuration_manager.hpp b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/configuration_manager.hpp index 0ae12a8f..c795e6b4 100644 --- a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/configuration_manager.hpp +++ b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/configuration_manager.hpp @@ -14,12 +14,16 @@ #pragma once +#include +#include #include #include #include #include #include +#include #include +#include #include namespace ros2_medkit_gateway { @@ -36,6 +40,7 @@ enum class ParameterErrorCode { TYPE_MISMATCH, ///< Value type doesn't match parameter type INVALID_VALUE, ///< Invalid value for parameter NO_DEFAULTS_CACHED, ///< No default values cached for reset operation + SHUT_DOWN, ///< ConfigurationManager has been shut down INTERNAL_ERROR ///< Internal/unexpected error }; @@ -53,6 +58,12 @@ struct ParameterResult { class ConfigurationManager { public: explicit ConfigurationManager(rclcpp::Node * node); + ~ConfigurationManager(); + + /// Clean up shared param node and cached clients before ROS 2 context shutdown. + /// Must be called before rclcpp::shutdown() to prevent use-after-free. + /// Idempotent - safe to call multiple times. + void shutdown(); /// List all parameters for a node /// @param node_name Fully qualified node name (e.g., "/powertrain/engine/engine_temp_sensor") @@ -84,12 +95,31 @@ class ConfigurationManager { ParameterResult reset_all_parameters(const std::string & node_name); private: - /// Get or create a SyncParametersClient for the given node + /// Get or create a cached SyncParametersClient for the given node. std::shared_ptr get_param_client(const std::string & node_name); /// Cache default values for a node (called on first access) + /// @pre spin_mutex_ must be held by the caller void cache_default_values(const std::string & node_name); + /// Check if a node is in the negative cache (recently unavailable) + bool is_node_unavailable(const std::string & node_name) const; + + /// Mark a node as unavailable in the negative cache + void mark_node_unavailable(const std::string & node_name); + + /// Check if node_name is the gateway's own node (self-query guard) + bool is_self_node(const std::string & node_name) const; + + /// List parameters for the gateway's own node (direct access, no IPC) + ParameterResult list_own_parameters(); + + /// Get a specific parameter from the gateway's own node + ParameterResult get_own_parameter(const std::string & param_name); + + /// Return SHUT_DOWN error result + static ParameterResult shut_down_result(); + /// Convert ROS2 parameter type to string static std::string parameter_type_to_string(rclcpp::ParameterType type); @@ -104,28 +134,48 @@ class ConfigurationManager { rclcpp::Node * node_; - /// Internal node for parameter client operations - /// SyncParametersClient requires a node that is NOT in an executor - /// to perform synchronous operations (spinning internally) - std::shared_ptr param_node_; - /// Timeout for waiting for parameter services (configurable via parameter_service_timeout_sec parameter) double service_timeout_sec_{2.0}; - /// Cache of parameter clients per node (avoids recreating clients) - mutable std::mutex clients_mutex_; - std::map> param_clients_; + /// Negative cache TTL (configurable via parameter_service_negative_cache_sec parameter) + double negative_cache_ttl_sec_{60.0}; + + /// Gateway's own fully qualified node name (for self-query detection) + std::string own_node_fqn_; + + /// Shutdown flag - prevents use-after-free on param_node_ after shutdown + std::atomic shutdown_{false}; + + /// Negative cache: nodes whose parameter service was recently unavailable. + /// Avoids repeated blocking waits on nodes that don't have parameter services. + mutable std::shared_mutex negative_cache_mutex_; + std::unordered_map unavailable_nodes_; /// Cache of default parameter values per node /// Key: node_name, Value: map of param_name -> Parameter mutable std::mutex defaults_mutex_; std::map> default_values_; - /// Mutex to serialize all parameter operations. - /// SyncParametersClient internally spins param_node_ via spin_node_until_future_complete(), - /// which is not thread-safe. Concurrent HTTP requests would cause: - /// "Node '/_param_client_node' has already been added to an executor" - mutable std::mutex param_operations_mutex_; + /// Mutex to serialize spin operations on param_node_. + /// SyncParametersClient::wait_for_service/get_parameters/etc spin param_node_ + /// internally via spin_node_until_future_complete which is NOT thread-safe. + /// This mutex is ONLY held during the actual ROS 2 IPC call, not during + /// cache lookups or JSON building. With negative cache + self-guard, + /// most requests never touch this mutex. + /// Declared BEFORE param_node_ so it outlives the node during destruction. + mutable std::mutex spin_mutex_; + + /// Internal node for parameter client operations. + /// Created once in constructor - must be in DDS graph early for fast service discovery. + /// SyncParametersClient requires a node NOT in an executor to spin internally. + std::shared_ptr param_node_; + + /// Cached SyncParametersClient per target node (avoids recreating clients) + mutable std::mutex clients_mutex_; + std::map> param_clients_; + + /// Maximum entries in negative cache before hard eviction + static constexpr size_t kMaxNegativeCacheSize = 500; }; } // namespace ros2_medkit_gateway diff --git a/src/ros2_medkit_gateway/src/configuration_manager.cpp b/src/ros2_medkit_gateway/src/configuration_manager.cpp index 2dac920c..051b7286 100644 --- a/src/ros2_medkit_gateway/src/configuration_manager.cpp +++ b/src/ros2_medkit_gateway/src/configuration_manager.cpp @@ -21,28 +21,168 @@ using namespace std::chrono_literals; namespace ros2_medkit_gateway { ConfigurationManager::ConfigurationManager(rclcpp::Node * node) : node_(node) { - // Create an internal node for parameter client operations - // SyncParametersClient needs a node that is NOT added to any executor - // so it can spin internally when waiting for service responses + // Create internal node for parameter client operations early. + // Must be in DDS graph before any parameter queries for fast service discovery. rclcpp::NodeOptions options; options.start_parameter_services(false); options.start_parameter_event_publisher(false); - // Don't inherit node name/namespace from global arguments (--ros-args -r __node:=...) - // Without this, the internal node would have the same name as the main gateway node options.use_global_arguments(false); param_node_ = std::make_shared("_param_client_node", options); - // Get configurable timeout for parameter services (default 2.0 seconds) - service_timeout_sec_ = node_->declare_parameter("parameter_service_timeout_sec", 2.0); + // Declare parameters with range validation + rcl_interfaces::msg::ParameterDescriptor timeout_desc; + timeout_desc.description = "Timeout for ROS 2 parameter service calls (configurations endpoint)"; + rcl_interfaces::msg::FloatingPointRange timeout_range; + timeout_range.from_value = 0.1; + timeout_range.to_value = 10.0; + timeout_desc.floating_point_range.push_back(timeout_range); + service_timeout_sec_ = node_->declare_parameter("parameter_service_timeout_sec", 2.0, timeout_desc); + + rcl_interfaces::msg::ParameterDescriptor cache_desc; + cache_desc.description = "Negative cache TTL for unavailable parameter services (0 = disabled)"; + rcl_interfaces::msg::FloatingPointRange cache_range; + cache_range.from_value = 0.0; + cache_range.to_value = 3600.0; + cache_desc.floating_point_range.push_back(cache_range); + negative_cache_ttl_sec_ = node_->declare_parameter("parameter_service_negative_cache_sec", 60.0, cache_desc); + + // Store own node FQN for self-query detection + own_node_fqn_ = node_->get_fully_qualified_name(); + + RCLCPP_INFO(node_->get_logger(), "ConfigurationManager initialized (timeout=%.1fs, negative_cache=%.0fs)", + service_timeout_sec_, negative_cache_ttl_sec_); +} + +ConfigurationManager::~ConfigurationManager() { + shutdown(); +} + +void ConfigurationManager::shutdown() { + if (shutdown_.exchange(true)) { + return; // Already shut down + } + // Acquire spin_mutex_ first to wait for any in-flight IPC to complete + std::lock_guard spin_lock(spin_mutex_); + std::lock_guard lock(clients_mutex_); + param_clients_.clear(); + param_node_.reset(); +} - RCLCPP_INFO(node_->get_logger(), "ConfigurationManager initialized"); +ParameterResult ConfigurationManager::shut_down_result() { + return {false, {}, "ConfigurationManager is shut down", ParameterErrorCode::SHUT_DOWN}; } -/// Helper to get the service timeout as a chrono duration std::chrono::duration ConfigurationManager::get_service_timeout() const { return std::chrono::duration(service_timeout_sec_); } +bool ConfigurationManager::is_node_unavailable(const std::string & node_name) const { + std::shared_lock lock(negative_cache_mutex_); + auto it = unavailable_nodes_.find(node_name); + if (it == unavailable_nodes_.end()) { + return false; + } + auto elapsed = std::chrono::steady_clock::now() - it->second; + return elapsed < std::chrono::duration(negative_cache_ttl_sec_); +} + +void ConfigurationManager::mark_node_unavailable(const std::string & node_name) { + std::unique_lock lock(negative_cache_mutex_); + unavailable_nodes_[node_name] = std::chrono::steady_clock::now(); + + // Cleanup to prevent unbounded growth + if (unavailable_nodes_.size() > kMaxNegativeCacheSize) { + // First pass: remove expired entries + auto now = std::chrono::steady_clock::now(); + auto ttl = std::chrono::duration(negative_cache_ttl_sec_); + for (auto it = unavailable_nodes_.begin(); it != unavailable_nodes_.end();) { + if (now - it->second > ttl) { + it = unavailable_nodes_.erase(it); + } else { + ++it; + } + } + // If still over limit, evict oldest entry + if (unavailable_nodes_.size() > kMaxNegativeCacheSize) { + auto oldest = + std::min_element(unavailable_nodes_.begin(), unavailable_nodes_.end(), [](const auto & a, const auto & b) { + return a.second < b.second; + }); + if (oldest != unavailable_nodes_.end()) { + unavailable_nodes_.erase(oldest); + } + } + } +} + +bool ConfigurationManager::is_self_node(const std::string & node_name) const { + return node_name == own_node_fqn_; +} + +ParameterResult ConfigurationManager::list_own_parameters() { + ParameterResult result; + try { + auto params = node_->get_parameters(node_->list_parameters({}, 0).names); + + // Cache defaults for reset operations (same as IPC path) + { + std::lock_guard lock(defaults_mutex_); + if (default_values_.find(own_node_fqn_) == default_values_.end()) { + std::map node_defaults; + for (const auto & param : params) { + node_defaults[param.get_name()] = param; + } + default_values_[own_node_fqn_] = std::move(node_defaults); + } + } + + json params_array = json::array(); + for (const auto & param : params) { + json param_obj; + param_obj["name"] = param.get_name(); + param_obj["value"] = parameter_value_to_json(param.get_parameter_value()); + param_obj["type"] = parameter_type_to_string(param.get_type()); + params_array.push_back(param_obj); + } + result.success = true; + result.data = params_array; + } catch (const std::exception & e) { + result.success = false; + result.error_message = std::string("Failed to list own parameters: ") + e.what(); + result.error_code = ParameterErrorCode::INTERNAL_ERROR; + } + return result; +} + +ParameterResult ConfigurationManager::get_own_parameter(const std::string & param_name) { + ParameterResult result; + try { + if (!node_->has_parameter(param_name)) { + result.success = false; + result.error_message = "Parameter not found: " + param_name; + result.error_code = ParameterErrorCode::NOT_FOUND; + return result; + } + auto param = node_->get_parameter(param_name); + auto descriptor = node_->describe_parameter(param_name); + + json param_obj; + param_obj["name"] = param.get_name(); + param_obj["value"] = parameter_value_to_json(param.get_parameter_value()); + param_obj["type"] = parameter_type_to_string(param.get_type()); + param_obj["description"] = descriptor.description; + param_obj["read_only"] = descriptor.read_only; + + result.success = true; + result.data = param_obj; + } catch (const std::exception & e) { + result.success = false; + result.error_message = std::string("Failed to get own parameter: ") + e.what(); + result.error_code = ParameterErrorCode::INTERNAL_ERROR; + } + return result; +} + std::shared_ptr ConfigurationManager::get_param_client(const std::string & node_name) { std::lock_guard lock(clients_mutex_); @@ -51,71 +191,72 @@ std::shared_ptr ConfigurationManager::get_param_cl return it->second; } - // Create new client for this node using the internal param_node_ - // (not the main gateway node, which is already in an executor) + // Create cached client on the shared param_node_ (created in constructor) auto client = std::make_shared(param_node_, node_name); param_clients_[node_name] = client; return client; } ParameterResult ConfigurationManager::list_parameters(const std::string & node_name) { - std::lock_guard op_lock(param_operations_mutex_); - ParameterResult result; + if (shutdown_.load()) { + return shut_down_result(); + } - RCLCPP_DEBUG(node_->get_logger(), "list_parameters called for node: '%s'", node_name.c_str()); + // Self-query guard: use direct access for gateway's own node + if (is_self_node(node_name)) { + return list_own_parameters(); + } - // Cache default values on first access to this node - cache_default_values(node_name); + // Negative cache: skip nodes that recently had no parameter service + if (is_node_unavailable(node_name)) { + ParameterResult result; + result.success = false; + result.error_message = "Parameter service not available for node (cached): " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + return result; + } + + ParameterResult result; try { auto client = get_param_client(node_name); - RCLCPP_DEBUG(node_->get_logger(), "Got param client for node: '%s'", node_name.c_str()); - - if (!client->wait_for_service(get_service_timeout())) { - result.success = false; - result.error_message = "Parameter service not available for node: " + node_name; - result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; - RCLCPP_WARN(node_->get_logger(), "Parameter service not available for node: '%s'", node_name.c_str()); - return result; - } - - RCLCPP_DEBUG(node_->get_logger(), "Service ready for node: '%s'", node_name.c_str()); - - // List all parameter names - auto param_names = client->list_parameters({}, 0); - - RCLCPP_DEBUG(node_->get_logger(), "Got %zu parameter names for node: '%s'", param_names.names.size(), - node_name.c_str()); - - // Log first few parameter names for debugging - for (size_t i = 0; i < std::min(param_names.names.size(), static_cast(5)); ++i) { - RCLCPP_DEBUG(node_->get_logger(), " param[%zu]: '%s'", i, param_names.names[i].c_str()); - } - - // Get all parameter values - try in smaller batches if needed + // All ROS 2 IPC calls (wait_for_service, list_parameters, get_parameters) + // spin param_node_ internally. spin_mutex_ serializes these to prevent + // "Node already added to executor" errors from concurrent spin. std::vector parameters; + { + std::lock_guard spin_lock(spin_mutex_); - // First try getting all at once - parameters = client->get_parameters(param_names.names); + // Cache default values first (gives node extra time for DDS service discovery) + cache_default_values(node_name); - if (parameters.empty() && !param_names.names.empty()) { - RCLCPP_WARN(node_->get_logger(), "get_parameters returned empty, trying one by one for node: '%s'", - node_name.c_str()); - // Try getting parameters one by one - for (const auto & name : param_names.names) { - try { - auto single_params = client->get_parameters({name}); - if (!single_params.empty()) { - parameters.push_back(single_params[0]); + if (!client->wait_for_service(get_service_timeout())) { + result.success = false; + result.error_message = "Parameter service not available for node: " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + mark_node_unavailable(node_name); + RCLCPP_WARN(node_->get_logger(), "Parameter service not available for node: '%s' (cached for %.0fs)", + node_name.c_str(), negative_cache_ttl_sec_); + return result; + } + + auto param_names = client->list_parameters({}, 0); + parameters = client->get_parameters(param_names.names); + + if (parameters.empty() && !param_names.names.empty()) { + for (const auto & name : param_names.names) { + try { + auto single_params = client->get_parameters({name}); + if (!single_params.empty()) { + parameters.push_back(single_params[0]); + } + } catch (const std::exception & e) { + RCLCPP_DEBUG(node_->get_logger(), "Failed to get param '%s': %s", name.c_str(), e.what()); } - } catch (const std::exception & e) { - RCLCPP_DEBUG(node_->get_logger(), "Failed to get param '%s': %s", name.c_str(), e.what()); } } - } - - RCLCPP_DEBUG(node_->get_logger(), "Got %zu parameter values for node: '%s'", parameters.size(), node_name.c_str()); + } // spin_mutex_ released - JSON building is lock-free json params_array = json::array(); for (const auto & param : parameters) { @@ -139,41 +280,59 @@ ParameterResult ConfigurationManager::list_parameters(const std::string & node_n } ParameterResult ConfigurationManager::get_parameter(const std::string & node_name, const std::string & param_name) { - std::lock_guard op_lock(param_operations_mutex_); + if (shutdown_.load()) { + return shut_down_result(); + } + + if (is_self_node(node_name)) { + return get_own_parameter(param_name); + } + + if (is_node_unavailable(node_name)) { + ParameterResult result; + result.success = false; + result.error_message = "Parameter service not available for node (cached): " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + return result; + } + ParameterResult result; try { - auto client = get_param_client(node_name); + rclcpp::Parameter param; + std::vector descriptors; - if (!client->wait_for_service(get_service_timeout())) { - result.success = false; - result.error_message = "Parameter service not available for node: " + node_name; - result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; - return result; - } + { + std::lock_guard spin_lock(spin_mutex_); + auto client = get_param_client(node_name); - // Check if parameter exists - auto param_names = client->list_parameters({param_name}, 1); - if (param_names.names.empty()) { - result.success = false; - result.error_message = "Parameter not found: " + param_name; - result.error_code = ParameterErrorCode::NOT_FOUND; - return result; - } + if (!client->wait_for_service(get_service_timeout())) { + result.success = false; + result.error_message = "Parameter service not available for node: " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + mark_node_unavailable(node_name); + return result; + } - // Get parameter value - auto parameters = client->get_parameters({param_name}); - if (parameters.empty()) { - result.success = false; - result.error_message = "Failed to get parameter: " + param_name; - result.error_code = ParameterErrorCode::INTERNAL_ERROR; - return result; - } + auto param_names = client->list_parameters({param_name}, 1); + if (param_names.names.empty()) { + result.success = false; + result.error_message = "Parameter not found: " + param_name; + result.error_code = ParameterErrorCode::NOT_FOUND; + return result; + } - const auto & param = parameters[0]; + auto parameters = client->get_parameters({param_name}); + if (parameters.empty()) { + result.success = false; + result.error_message = "Failed to get parameter: " + param_name; + result.error_code = ParameterErrorCode::INTERNAL_ERROR; + return result; + } - // Get parameter descriptor for additional metadata - auto descriptors = client->describe_parameters({param_name}); + param = parameters[0]; + descriptors = client->describe_parameters({param_name}); + } // spin_mutex_ released json param_obj; param_obj["name"] = param.get_name(); @@ -198,7 +357,52 @@ ParameterResult ConfigurationManager::get_parameter(const std::string & node_nam ParameterResult ConfigurationManager::set_parameter(const std::string & node_name, const std::string & param_name, const json & value) { - std::lock_guard op_lock(param_operations_mutex_); + if (shutdown_.load()) { + return shut_down_result(); + } + + // Self-query guard: set parameter directly on own node + if (is_self_node(node_name)) { + ParameterResult result; + try { + if (!node_->has_parameter(param_name)) { + result.success = false; + result.error_message = "Parameter not found: " + param_name; + result.error_code = ParameterErrorCode::NOT_FOUND; + return result; + } + auto current_value = node_->get_parameter(param_name).get_parameter_value(); + rclcpp::ParameterValue param_value = json_to_parameter_value(value, current_value.get_type()); + auto set_result = node_->set_parameter(rclcpp::Parameter(param_name, param_value)); + if (!set_result.successful) { + result.success = false; + result.error_message = set_result.reason; + result.error_code = ParameterErrorCode::INVALID_VALUE; + return result; + } + json param_obj; + param_obj["name"] = param_name; + param_obj["value"] = parameter_value_to_json(param_value); + param_obj["type"] = parameter_type_to_string(param_value.get_type()); + result.success = true; + result.data = param_obj; + } catch (const std::exception & e) { + result.success = false; + result.error_message = std::string("Failed to set own parameter: ") + e.what(); + result.error_code = ParameterErrorCode::INTERNAL_ERROR; + } + return result; + } + + if (is_node_unavailable(node_name)) { + ParameterResult result; + result.success = false; + result.error_message = "Parameter service not available for node (cached): " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + return result; + } + + std::lock_guard spin_lock(spin_mutex_); ParameterResult result; try { @@ -208,26 +412,23 @@ ParameterResult ConfigurationManager::set_parameter(const std::string & node_nam result.success = false; result.error_message = "Parameter service not available for node: " + node_name; result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + mark_node_unavailable(node_name); return result; } - // Get current parameter to determine type hint auto current_params = client->get_parameters({param_name}); rclcpp::ParameterType hint_type = rclcpp::ParameterType::PARAMETER_NOT_SET; if (!current_params.empty()) { hint_type = current_params[0].get_type(); } - // Convert JSON value to ROS2 parameter rclcpp::ParameterValue param_value = json_to_parameter_value(value, hint_type); rclcpp::Parameter param(param_name, param_value); - // Set parameter auto results = client->set_parameters({param}); if (results.empty() || !results[0].successful) { result.success = false; result.error_message = results.empty() ? "Failed to set parameter" : results[0].reason; - // Classify error based on reason if (!results.empty()) { const auto & reason = results[0].reason; if (reason.find("read-only") != std::string::npos || reason.find("read only") != std::string::npos || @@ -244,7 +445,6 @@ ParameterResult ConfigurationManager::set_parameter(const std::string & node_nam return result; } - // Return the new value json param_obj; param_obj["name"] = param_name; param_obj["value"] = parameter_value_to_json(param_value); @@ -315,7 +515,6 @@ json ConfigurationManager::parameter_value_to_json(const rclcpp::ParameterValue rclcpp::ParameterValue ConfigurationManager::json_to_parameter_value(const json & value, rclcpp::ParameterType hint_type) { - // If we have a type hint, try to match it if (hint_type != rclcpp::ParameterType::PARAMETER_NOT_SET) { switch (hint_type) { case rclcpp::ParameterType::PARAMETER_BOOL: @@ -344,7 +543,6 @@ rclcpp::ParameterValue ConfigurationManager::json_to_parameter_value(const json if (value.is_string()) { return rclcpp::ParameterValue(value.get()); } - // Convert other types to string return rclcpp::ParameterValue(value.dump()); case rclcpp::ParameterType::PARAMETER_BOOL_ARRAY: if (value.is_array()) { @@ -383,7 +581,6 @@ rclcpp::ParameterValue ConfigurationManager::json_to_parameter_value(const json } } - // Infer type from JSON value if (value.is_boolean()) { return rclcpp::ParameterValue(value.get()); } @@ -398,10 +595,8 @@ rclcpp::ParameterValue ConfigurationManager::json_to_parameter_value(const json } if (value.is_array()) { if (value.empty()) { - // Empty array with no type hint - default to string array return rclcpp::ParameterValue(std::vector{}); } - // Determine array type from first element if (value[0].is_boolean()) { return rclcpp::ParameterValue(value.get>()); } @@ -416,21 +611,21 @@ rclcpp::ParameterValue ConfigurationManager::json_to_parameter_value(const json } } - // Default: convert to string return rclcpp::ParameterValue(value.dump()); } void ConfigurationManager::cache_default_values(const std::string & node_name) { - // Check if already cached (quick check under lock) { std::lock_guard lock(defaults_mutex_); if (default_values_.find(node_name) != default_values_.end()) { return; } } - // Lock released — perform blocking I/O without holding defaults_mutex_ - RCLCPP_DEBUG(node_->get_logger(), "Caching default values for node: '%s'", node_name.c_str()); + // Skip if node is in negative cache + if (is_node_unavailable(node_name)) { + return; + } try { auto client = get_param_client(node_name); @@ -438,17 +633,15 @@ void ConfigurationManager::cache_default_values(const std::string & node_name) { if (!client->wait_for_service(get_service_timeout())) { RCLCPP_WARN(node_->get_logger(), "Cannot cache defaults - service not available for node: '%s'", node_name.c_str()); + mark_node_unavailable(node_name); return; } - // List all parameter names auto param_names = client->list_parameters({}, 0); - // Get all parameter values std::vector parameters; parameters = client->get_parameters(param_names.names); - // Fallback to one-by-one if batch fails if (parameters.empty() && !param_names.names.empty()) { for (const auto & name : param_names.names) { try { @@ -457,12 +650,10 @@ void ConfigurationManager::cache_default_values(const std::string & node_name) { parameters.push_back(single_params[0]); } } catch (const std::exception &) { - // Skip failed parameters } } } - // Store as defaults (re-acquire lock, double-check to avoid overwriting concurrent cacher) std::map node_defaults; for (const auto & param : parameters) { node_defaults[param.get_name()] = param; @@ -472,8 +663,6 @@ void ConfigurationManager::cache_default_values(const std::string & node_name) { std::lock_guard lock(defaults_mutex_); if (default_values_.find(node_name) == default_values_.end()) { default_values_[node_name] = std::move(node_defaults); - RCLCPP_DEBUG(node_->get_logger(), "Cached %zu default values for node: '%s'", default_values_[node_name].size(), - node_name.c_str()); } } } catch (const std::exception & e) { @@ -482,14 +671,65 @@ void ConfigurationManager::cache_default_values(const std::string & node_name) { } ParameterResult ConfigurationManager::reset_parameter(const std::string & node_name, const std::string & param_name) { - std::lock_guard op_lock(param_operations_mutex_); + if (shutdown_.load()) { + return shut_down_result(); + } + + // Self-query guard: reset via direct access + if (is_self_node(node_name)) { + ParameterResult result; + // Copy default value under lock, release before set_parameter I/O + rclcpp::Parameter default_param; + { + std::lock_guard lock(defaults_mutex_); + auto node_it = default_values_.find(node_name); + if (node_it == default_values_.end()) { + result.success = false; + result.error_message = "No default values cached for node: " + node_name; + result.error_code = ParameterErrorCode::NO_DEFAULTS_CACHED; + return result; + } + auto param_it = node_it->second.find(param_name); + if (param_it == node_it->second.end()) { + result.success = false; + result.error_message = "No default value for parameter: " + param_name; + result.error_code = ParameterErrorCode::NOT_FOUND; + return result; + } + default_param = param_it->second; + } // defaults_mutex_ released + + auto set_result = node_->set_parameter(default_param); + if (!set_result.successful) { + result.success = false; + result.error_message = set_result.reason; + result.error_code = ParameterErrorCode::INTERNAL_ERROR; + return result; + } + json param_obj; + param_obj["name"] = param_name; + param_obj["value"] = parameter_value_to_json(default_param.get_parameter_value()); + param_obj["type"] = parameter_type_to_string(default_param.get_type()); + param_obj["reset_to_default"] = true; + result.success = true; + result.data = param_obj; + return result; + } + + if (is_node_unavailable(node_name)) { + ParameterResult result; + result.success = false; + result.error_message = "Parameter service not available for node (cached): " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + return result; + } + + std::lock_guard spin_lock(spin_mutex_); ParameterResult result; try { - // Ensure defaults are cached cache_default_values(node_name); - // Look up default value — copy under lock, then release before blocking I/O rclcpp::Parameter default_param; { std::lock_guard lock(defaults_mutex_); @@ -512,12 +752,12 @@ ParameterResult ConfigurationManager::reset_parameter(const std::string & node_n default_param = param_it->second; } - // Lock released — perform blocking I/O without holding defaults_mutex_ auto client = get_param_client(node_name); if (!client->wait_for_service(get_service_timeout())) { result.success = false; result.error_message = "Parameter service not available for node: " + node_name; result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + mark_node_unavailable(node_name); return result; } @@ -529,7 +769,6 @@ ParameterResult ConfigurationManager::reset_parameter(const std::string & node_n return result; } - // Return the reset value json param_obj; param_obj["name"] = param_name; param_obj["value"] = parameter_value_to_json(default_param.get_parameter_value()); @@ -551,14 +790,69 @@ ParameterResult ConfigurationManager::reset_parameter(const std::string & node_n } ParameterResult ConfigurationManager::reset_all_parameters(const std::string & node_name) { - std::lock_guard op_lock(param_operations_mutex_); + if (shutdown_.load()) { + return shut_down_result(); + } + + // Self-query guard: reset via direct access + if (is_self_node(node_name)) { + ParameterResult result; + std::vector params_to_reset; + { + std::lock_guard lock(defaults_mutex_); + auto node_it = default_values_.find(node_name); + if (node_it == default_values_.end()) { + result.success = false; + result.error_message = "No default values cached for node: " + node_name; + result.error_code = ParameterErrorCode::NO_DEFAULTS_CACHED; + return result; + } + for (const auto & [name, param] : node_it->second) { + params_to_reset.push_back(param); + } + } + size_t reset_count = 0; + size_t failed_count = 0; + json failed_params = json::array(); + for (const auto & param : params_to_reset) { + auto set_result = node_->set_parameter(param); + if (set_result.successful) { + reset_count++; + } else { + failed_count++; + failed_params.push_back(param.get_name()); + } + } + json response; + response["node_name"] = node_name; + response["reset_count"] = reset_count; + response["failed_count"] = failed_count; + if (!failed_params.empty()) { + response["failed_parameters"] = failed_params; + } + result.success = (failed_count == 0); + result.data = response; + if (failed_count > 0) { + result.error_message = "Some parameters could not be reset"; + result.error_code = ParameterErrorCode::INTERNAL_ERROR; + } + return result; + } + + if (is_node_unavailable(node_name)) { + ParameterResult result; + result.success = false; + result.error_message = "Parameter service not available for node (cached): " + node_name; + result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + return result; + } + + std::lock_guard spin_lock(spin_mutex_); ParameterResult result; try { - // Ensure defaults are cached cache_default_values(node_name); - // Look up default values — copy under lock, then release before blocking I/O std::vector params_to_reset; { std::lock_guard lock(defaults_mutex_); @@ -576,21 +870,19 @@ ParameterResult ConfigurationManager::reset_all_parameters(const std::string & n } } - // Lock released — perform blocking I/O without holding defaults_mutex_ auto client = get_param_client(node_name); if (!client->wait_for_service(get_service_timeout())) { result.success = false; result.error_message = "Parameter service not available for node: " + node_name; result.error_code = ParameterErrorCode::SERVICE_UNAVAILABLE; + mark_node_unavailable(node_name); return result; } - // Reset all parameters size_t reset_count = 0; size_t failed_count = 0; json failed_params = json::array(); - // Set parameters one by one to handle partial failures for (const auto & param : params_to_reset) { try { auto results = client->set_parameters({param}); @@ -619,7 +911,7 @@ ParameterResult ConfigurationManager::reset_all_parameters(const std::string & n if (failed_count > 0) { result.error_message = "Some parameters could not be reset"; - result.error_code = ParameterErrorCode::INTERNAL_ERROR; // Partial failure + result.error_code = ParameterErrorCode::INTERNAL_ERROR; } RCLCPP_INFO(node_->get_logger(), "Reset %zu parameters on node '%s' (%zu failed)", reset_count, node_name.c_str(), diff --git a/src/ros2_medkit_gateway/src/gateway_node.cpp b/src/ros2_medkit_gateway/src/gateway_node.cpp index af2a5c3d..d6e11826 100644 --- a/src/ros2_medkit_gateway/src/gateway_node.cpp +++ b/src/ros2_medkit_gateway/src/gateway_node.cpp @@ -1178,7 +1178,12 @@ GatewayNode::~GatewayNode() { if (operation_mgr_) { operation_mgr_->shutdown(); } - // 8. Normal member destruction (managers safe - all transports stopped) + // 8. Shutdown ConfigurationManager (clears param_node_ and cached clients + // before rclcpp context is destroyed - prevents use-after-free) + if (config_mgr_) { + config_mgr_->shutdown(); + } + // 9. Normal member destruction (managers safe - all transports stopped) } const ThreadSafeEntityCache & GatewayNode::get_thread_safe_cache() const { diff --git a/src/ros2_medkit_gateway/test/test_configuration_manager.cpp b/src/ros2_medkit_gateway/test/test_configuration_manager.cpp index 196b35ce..f5b870d1 100644 --- a/src/ros2_medkit_gateway/test/test_configuration_manager.cpp +++ b/src/ros2_medkit_gateway/test/test_configuration_manager.cpp @@ -316,6 +316,68 @@ TEST_F(TestConfigurationManager, test_error_messages_informative) { EXPECT_FALSE(result2.error_message.empty()); } +// ==================== NEGATIVE CACHE TESTS ==================== + +TEST_F(TestConfigurationManager, test_negative_cache_fast_return) { + // First call to nonexistent node: waits for timeout, returns SERVICE_UNAVAILABLE + auto result1 = config_manager_->list_parameters("/nonexistent_cached_node"); + EXPECT_FALSE(result1.success); + EXPECT_EQ(result1.error_code, ParameterErrorCode::SERVICE_UNAVAILABLE); + + // Second call: should return from negative cache (no service discovery wait) + auto result2 = config_manager_->list_parameters("/nonexistent_cached_node"); + EXPECT_FALSE(result2.success); + EXPECT_EQ(result2.error_code, ParameterErrorCode::SERVICE_UNAVAILABLE); + EXPECT_TRUE(result2.error_message.find("cached") != std::string::npos); +} + +// ==================== SELF-QUERY GUARD TESTS ==================== + +TEST_F(TestConfigurationManager, test_self_query_no_deadlock) { + // Querying our own node should work without deadlock (direct access, no IPC) + auto result = config_manager_->list_parameters("/test_config_manager_node"); + + EXPECT_TRUE(result.success); +} + +// ==================== SPIN SERIALIZATION TESTS ==================== + +TEST_F(TestConfigurationManager, test_concurrent_queries_no_crash) { + // Concurrent queries to different nonexistent nodes must not crash + // (spin_mutex_ serializes ROS 2 IPC to prevent executor conflicts). + std::vector results(3); + std::vector threads; + for (int i = 0; i < 3; ++i) { + threads.emplace_back([this, i, &results]() { + results[static_cast(i)] = config_manager_->list_parameters("/concurrent_node_" + std::to_string(i)); + }); + } + for (auto & t : threads) { + t.join(); + } + for (int i = 0; i < 3; ++i) { + EXPECT_FALSE(results[static_cast(i)].success); + EXPECT_EQ(results[static_cast(i)].error_code, ParameterErrorCode::SERVICE_UNAVAILABLE); + } +} + +TEST_F(TestConfigurationManager, test_negative_cache_cross_method) { + // list_parameters marks node unavailable, get_parameter should return cached + auto list_result = config_manager_->list_parameters("/cross_method_cached_node"); + EXPECT_FALSE(list_result.success); + EXPECT_EQ(list_result.error_code, ParameterErrorCode::SERVICE_UNAVAILABLE); + + auto get_result = config_manager_->get_parameter("/cross_method_cached_node", "param"); + EXPECT_FALSE(get_result.success); + EXPECT_EQ(get_result.error_code, ParameterErrorCode::SERVICE_UNAVAILABLE); + EXPECT_TRUE(get_result.error_message.find("cached") != std::string::npos); + + auto set_result = config_manager_->set_parameter("/cross_method_cached_node", "param", nlohmann::json(42)); + EXPECT_FALSE(set_result.success); + EXPECT_EQ(set_result.error_code, ParameterErrorCode::SERVICE_UNAVAILABLE); + EXPECT_TRUE(set_result.error_message.find("cached") != std::string::npos); +} + // ==================== CONCURRENT ACCESS TEST ==================== TEST_F(TestConfigurationManager, test_concurrent_parameter_access) {