diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 0804cb3448..730372107b 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -334,6 +334,7 @@ Ort::Status QnnBackendManager::LoadBackend() { QNN_API_VERSION_PATCH}, &backend_interface_provider))); qnn_interface_ = backend_interface_provider->QNN_INTERFACE_VER_NAME; + htp_power_config_manager_.Init(qnn_interface_); backend_id_ = backend_interface_provider->backendId; backend_api_version_ = backend_interface_provider->apiVersion.backendApiVersion; SetQnnBackendType(backend_id_); @@ -394,6 +395,7 @@ Ort::Status QnnBackendManager::LoadQnnSerializerBackend() { QNN_API_VERSION_PATCH}, &serializer_interface_provider))); qnn_interface_ = serializer_interface_provider->QNN_INTERFACE_VER_NAME; // NOTE: QnnSaver/Ir will provide the interfaces + htp_power_config_manager_.Init(qnn_interface_); Qnn_Version_t backend_interface_version = GetQnnInterfaceApiVersion(backend_interface_provider); Qnn_Version_t serializer_interface_version = GetQnnInterfaceApiVersion(serializer_interface_provider); @@ -2001,9 +2003,17 @@ Ort::Status QnnBackendManager::SetupBackend( return status; } -Ort::Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, - uint32_t core_id, - uint32_t& htp_power_config_id) { +Ort::Status QnnBackendManager::InitializePowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) { + RETURN_IF_ERROR(CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id)); + htp_power_config_manager_.CreateTimerThread(htp_power_config_id); + return Ort::Status(); +} + +void QnnBackendManager::DeInitializePerfTimer() { + htp_power_config_manager_.ReleaseTimerThread(); +} + +Ort::Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) { // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned // to a different EP. Therefore, we have to check that backend setup actually completed before trying to // create an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded. @@ -2023,26 +2033,6 @@ Ort::Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, return Ort::Status(); } -Ort::Status QnnBackendManager::SetHtpPowerConfigs(uint32_t htp_power_config_client_id, - HtpPerformanceMode htp_performance_mode, - uint32_t rpc_polling_time, - uint32_t rpc_control_latency) { - // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned - // to a different EP. Therefore, we have to check that backend setup actually completed before trying to - // set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded. - RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete."); - RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time, *logger_ptr_)); - RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency, *logger_ptr_)); - RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(htp_performance_mode, - htp_power_config_client_id, - *logger_ptr_)); - RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, - GetQnnInterface(), - *logger_ptr_)); - - return Ort::Status(); -} - Ort::Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run) { PerThreadHtpPowerConfigs_t htp_power_configs; if (!GetPerThreadHtpPowerConfigMapping(thread_id, htp_power_configs)) { @@ -2051,29 +2041,23 @@ Ort::Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id auto htp_power_config_id = htp_power_configs.power_config_id; if (pre_run) { + // add in htp_power_configs the default power config id also so to run when we execute if (htp_power_configs.pre_run_perf_mode.has_value()) { - RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.pre_run_perf_mode, - htp_power_config_id, - *logger_ptr_)); - } - - if (htp_power_configs.rpc_control_latency.has_value()) { - RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(*htp_power_configs.rpc_control_latency, - *logger_ptr_)); + power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.pre_run_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency}; + RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_START, config, *logger_ptr_)); + } else if (htp_power_configs.default_perf_mode.has_value()) { + power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.default_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency}; + RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_START, config, *logger_ptr_)); } - - if (htp_power_configs.rpc_polling_time.has_value()) { - RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(*htp_power_configs.rpc_polling_time, - *logger_ptr_)); + } else { + if (htp_power_configs.post_run_perf_mode.has_value()) { + power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.post_run_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency}; + RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_DONE, config, *logger_ptr_)); + } else if (htp_power_configs.default_perf_mode.has_value()) { + power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.default_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency}; + RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_DONE, config, *logger_ptr_)); } - } else if (htp_power_configs.post_run_perf_mode.has_value()) { - RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.post_run_perf_mode, - htp_power_config_id, - *logger_ptr_)); } - - RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_id, GetQnnInterface(), *logger_ptr_)); - return Ort::Status(); } @@ -2108,7 +2092,7 @@ void QnnBackendManager::RemovePerThreadHtpPowerConfigMapping(const std::thread:: per_thread_power_configs_.erase(thread_id); } -Ort::Status QnnBackendManager::DestroyHTPPowerConfigID(uint32_t htp_power_config_id) { +Ort::Status QnnBackendManager::DestroyHtpPowerConfigId(uint32_t htp_power_config_id) { QnnDevice_Infrastructure_t qnn_device_infra = nullptr; auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index d0dc3f7bae..7961258ce3 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -185,12 +185,11 @@ class QnnBackendManager : public std::enable_shared_from_this bool enable_htp_extended_udma_mode = false, bool enable_htp_prepare_only = false); - Ort::Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); + Ort::Status InitializePowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); + + void DeInitializePerfTimer(); - Ort::Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id, - HtpPerformanceMode htp_performance_mode, - uint32_t rpc_polling_time, - uint32_t rpc_control_latency); + Ort::Status DestroyHtpPowerConfigId(uint32_t htp_power_config_id); Ort::Status SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run); @@ -264,8 +263,6 @@ class QnnBackendManager : public std::enable_shared_from_this : backend_path.parent_path().string(); } - Ort::Status DestroyHTPPowerConfigID(uint32_t htp_power_config_id); - Ort::Status GetMaxSpillFillBufferSize(unsigned char* buffer, uint64_t buffer_length, uint64_t& max_spill_fill_buffer_size); @@ -326,7 +323,13 @@ class QnnBackendManager : public std::enable_shared_from_this } FileMappingCallbackInfo_t; #endif - void ResetLogger(const Ort::Logger& logger) { logger_ptr_ = &logger; } + void ResetLogger(const Ort::Logger& logger) { + logger_ptr_ = &logger; + } + + power::HtpPowerConfigManager& GetHtpPowerConfigManager() { + return htp_power_config_manager_; + } private: Ort::Status LoadBackend(); @@ -439,6 +442,8 @@ class QnnBackendManager : public std::enable_shared_from_this void* LibFunction(void* handle, const char* symbol, std::string& error_msg); + Ort::Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); + template inline T ResolveSymbol(void* lib_handle, const char* sym, const Ort::Logger& logger) { std::string error_msg = ""; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index d3c917ee7e..3e372c301b 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -84,11 +84,18 @@ enum class HtpPerformanceMode : uint8_t { kHtpExtremePowerSaver, }; +// pre_run_perf_mode and post_run_perf_mode takes precedence over default_perf_mode. If pre_run_perf_mode is set, +// it will be used for performance setting in OnRunStart(). +// If post_run_perf_mode is set, it will be used for performance setting in OnRunDone(). +// If default_perf_mode is set and pre_run_perf_mode or post_run_perf_mode is not set, +// default_perf_mode will be used for performance setting in both OnRunStart() and OnRunDone(). +// rpc_control_latency and rpc_polling_time will be set beforehand in OnRunStart() as it depends on the performance mode set in OnRunStart(). typedef struct PerThreadHtpPowerConfigs { std::optional pre_run_perf_mode; std::optional post_run_perf_mode; std::optional rpc_control_latency; std::optional rpc_polling_time; + std::optional default_perf_mode; uint32_t power_config_id = 0; } PerThreadHtpPowerConfigs_t; @@ -130,17 +137,23 @@ bool IsQpuBackend(QnnBackendType backend_type); std::string QnnBackendTypeToString(QnnBackendType backend_type); -// constexpr config values +// latency values are in microseconds constexpr const int kSleepMinLatency = 40; constexpr const int kSleepLowLatency = 100; constexpr const int kSleepMediumLatency = 1000; constexpr const int kSleepHighLatency = 2000; +constexpr const int kSleepHigherLatency = 65535; + +// constexpr config values constexpr const int kDcvsDisable = 0; constexpr const int kDcvsEnable = 1; constexpr const uint32_t kDisableRpcPolling = 0; constexpr const uint32_t kDisableRpcControlLatency = 0; constexpr const uint32_t kMaxRpcPolling = 9999; +// Sustained high performance mode timer timeout duration in microseconds +constexpr const uint64_t kDefaultTimerTimeoutUs = 300000; + struct OnnxTensorInfo { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTensorInfo); OnnxTensorInfo(size_t index, int32_t data_type, std::vector&& shape) : index_(index), data_type_(data_type), shape_(std::move(shape)) {} diff --git a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc index 152a5fc575..dc8532578c 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc @@ -94,34 +94,29 @@ static std::string_view PerformanceModeToString(HtpPerformanceMode htp_performan return "UNKNOWN"; } +Ort::Status HtpPowerConfigManager::AddHtpPerformanceConfig(QnnHtpPerfInfrastructure_PowerConfig_t htp_performance_cfg) { + power_configs_.emplace_back(std::move(htp_performance_cfg)); + htp_performance_mode_set_ = true; + return Ort::Status(); +} + Ort::Status HtpPowerConfigManager::AddHtpPerformanceMode(HtpPerformanceMode htp_performance_mode, uint32_t htp_power_config_client_id, const Ort::Logger& logger) { RETURN_IF(htp_performance_mode_set_, "There is already a pending HTP performance mode config"); - if (htp_performance_mode == last_set_htp_performance_mode_) { - ORT_CXX_LOG(logger, - ORT_LOGGING_LEVEL_VERBOSE, - ("Requested htp performance mode is the same as last set (" + - std::string(PerformanceModeToString(last_set_htp_performance_mode_)) + - "). Ignoring request") - .c_str()); - } else { - ORT_CXX_LOG(logger, - ORT_LOGGING_LEVEL_VERBOSE, - ("Updating htp performance mode to: " + - std::string(PerformanceModeToString(htp_performance_mode)) + ".") - .c_str()); + ORT_CXX_LOG(logger, + ORT_LOGGING_LEVEL_VERBOSE, + ("Updating htp performance mode to: " + + std::string(PerformanceModeToString(htp_performance_mode)) + ".") + .c_str()); - QnnHtpPerfInfrastructure_PowerConfig_t htp_performance_cfg{}; - RETURN_IF_ERROR(SetHtpPerformancePowerConfig(htp_performance_cfg, - htp_power_config_client_id, - htp_performance_mode)); + QnnHtpPerfInfrastructure_PowerConfig_t htp_performance_cfg{}; + RETURN_IF_ERROR(SetHtpPerformancePowerConfig(htp_performance_cfg, + htp_power_config_client_id, + htp_performance_mode)); - power_configs_.emplace_back(std::move(htp_performance_cfg)); - - last_set_htp_performance_mode_ = htp_performance_mode; - htp_performance_mode_set_ = true; - } + power_configs_.emplace_back(std::move(htp_performance_cfg)); + htp_performance_mode_set_ = true; return Ort::Status(); } @@ -289,6 +284,323 @@ Ort::Status HtpPowerConfigManager::SetHtpPerformancePowerConfig(QnnHtpPerfInfras return Ort::Status(); } +void HtpPowerConfigManager::SetRelaxedPerfPowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t htp_power_config_client_id, DcvsState dcvsState) { + power_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = power_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.dcvsEnable = 1; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.sleepLatency = kSleepHighLatency; + dcvs_v3.setSleepLatency = 1; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setSleepDisable = 0; + if (dcvsState == DcvsState::DCVS_ENABLE) { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN; + } else { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + } + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.setBusParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.setCoreParams = 1; +} + +void HtpPowerConfigManager::SetExtremeLowPerfPowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t htp_power_config_client_id) { + power_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = power_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.dcvsEnable = 1; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.sleepLatency = kSleepHigherLatency; + dcvs_v3.setSleepLatency = 1; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setSleepDisable = 0; + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.setBusParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.setCoreParams = 1; +} + +void HtpPowerConfigManager::SetReleasedPerfPowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t htp_power_config_client_id, DcvsState dcvsState) { + power_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = power_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.dcvsEnable = 1; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.sleepLatency = kSleepHigherLatency; + dcvs_v3.setSleepLatency = 1; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setSleepDisable = 0; + if (dcvsState == DcvsState::DCVS_ENABLE) { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN; + } else { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + } + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.setBusParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.setCoreParams = 1; +} + +void HtpPowerConfigManager::CreateTimerThread(uint32_t htp_power_config_client_id) { + std::lock_guard lk(state_mutex_); + const Ort::Logger& logger = OrtLoggingManager::GetDefaultLogger(); + if (timer_ == nullptr) { + std::unique_ptr temp(new Timer()); + if (temp != nullptr) { + timer_ = std::move(temp); + timer_callback_arg_ = std::make_unique(htp_power_config_client_id, this); + if (!timer_->Initialize(TimerCallback, timer_callback_arg_.get())) { + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "Failed to create timer to set performance"); + timer_callback_arg_.reset(); + timer_.reset(); + } else { + timer_resource_.timer_active_ = true; + } + } else { + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "Failed: Timer is nullptr"); + } + } else { + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "Timer already created"); + } +} + +void HtpPowerConfigManager::ReleaseTimerThread() { + std::unique_ptr local_timer; + std::unique_ptr local_callback_arg; + { + std::lock_guard lk(state_mutex_); + if (timer_ != nullptr) { + timer_resource_.timer_active_ = false; + graph_state_ = GraphState::NONE; + timer_resource_.caller_busy_ = false; + // Move ownership out while holding the lock: timer_ becomes nullptr + // atomically, so CreateTimerThread sees null and can safely create + // a new timer. We hold exclusive ownership in the locals. + local_timer = std::move(timer_); + local_callback_arg = std::move(timer_callback_arg_); + } + } + // Deinitialize outside the lock to avoid deadlock: an in-flight + // TimerCallback calls SetState() which acquires state_mutex_. + // Note: DeInitialize()->join() ensures any in-flight callback completes + // before the timer and callback_arg are destroyed, so no additional + // synchronization is needed to protect callback access to these objects. + if (local_timer != nullptr) { + local_timer->DeInitialize(); + local_callback_arg.reset(); + local_timer.reset(); + } +} + +Ort::Status HtpPowerConfigManager::SetSustainedPerformance(GraphState state, const HtpPerfConfig_t& config, const Ort::Logger& logger) { + std::lock_guard lk(perf_mutex_); + Ort::Status status = Ort::Status(); + + std::chrono::microseconds sustainedDurationUs(timer_resource_.sustained_timer_duration_); + + switch (state) { + case GraphState::RUN_DONE: + if (IsTimerThreadRunning()) { + timer_->AbortTimer(); + } + RETURN_IF_NOT(timer_->Launch(sustainedDurationUs), "Not able to launch timer thread."); + timer_resource_.caller_busy_ = false; + break; + case GraphState::RUN_START: + if (IsTimerThreadRunning()) { + timer_->AbortTimer(); + } else { + status = SetHtpPowerConfigs(config, logger); + } + timer_resource_.caller_busy_ = true; + break; + case GraphState::INIT_DONE: { + QnnHtpPerfInfrastructure_PowerConfig_t init_done_htp_performance_cfg{}; + SetRelaxedPerfPowerConfig(init_done_htp_performance_cfg, config.htp_power_config_client_id, DcvsState::DCVS_DEFAULT); + status = SetHtpPowerCustomConfigs(config.htp_power_config_client_id, init_done_htp_performance_cfg, config.rpc_polling_time, config.rpc_control_latency, logger); + timer_resource_.caller_busy_ = false; + break; + } + case GraphState::INIT_START: + if (IsTimerThreadRunning()) { + timer_->AbortTimer(); + } else { + status = SetHtpPowerConfigs(config, logger); + } + timer_resource_.caller_busy_ = true; + break; + case GraphState::TIMEOUT: { + if (!timer_resource_.caller_busy_) { + QnnHtpPerfInfrastructure_PowerConfig_t timeout_htp_performance_cfg{}; + SetRelaxedPerfPowerConfig(timeout_htp_performance_cfg, config.htp_power_config_client_id, DcvsState::DCVS_DEFAULT); + status = SetHtpPowerCustomConfigs(config.htp_power_config_client_id, timeout_htp_performance_cfg, config.rpc_polling_time, config.rpc_control_latency, logger); + } + break; + } + default: + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "Invalid graph state"); + break; + } + return status; +} + +Ort::Status HtpPowerConfigManager::SetPerformance(GraphState state, const HtpPerfConfig_t& config, const Ort::Logger& logger) { + std::lock_guard lk(perf_mutex_); + Ort::Status status = Ort::Status(); + switch (state) { + case GraphState::RUN_DONE: + case GraphState::INIT_DONE: + switch (config.perf_mode) { + case qnn::HtpPerformanceMode::kHtpLowBalanced: + case qnn::HtpPerformanceMode::kHtpBalanced: + case qnn::HtpPerformanceMode::kHtpHighPerformance: { + QnnHtpPerfInfrastructure_PowerConfig_t relaxed_htp_performance_cfg{}; + SetRelaxedPerfPowerConfig(relaxed_htp_performance_cfg, config.htp_power_config_client_id, DcvsState::DCVS_DEFAULT); + status = SetHtpPowerCustomConfigs(config.htp_power_config_client_id, relaxed_htp_performance_cfg, config.rpc_polling_time, config.rpc_control_latency, logger); + break; + } + case qnn::HtpPerformanceMode::kHtpExtremePowerSaver: { + QnnHtpPerfInfrastructure_PowerConfig_t extreme_power_saver_htp_performance_cfg{}; + SetExtremeLowPerfPowerConfig(extreme_power_saver_htp_performance_cfg, config.htp_power_config_client_id); + status = SetHtpPowerCustomConfigs(config.htp_power_config_client_id, extreme_power_saver_htp_performance_cfg, config.rpc_polling_time, config.rpc_control_latency, logger); + break; + } + case qnn::HtpPerformanceMode::kHtpLowPowerSaver: + case qnn::HtpPerformanceMode::kHtpHighPowerSaver: + case qnn::HtpPerformanceMode::kHtpPowerSaver: { + QnnHtpPerfInfrastructure_PowerConfig_t released_htp_performance_cfg{}; + SetReleasedPerfPowerConfig(released_htp_performance_cfg, config.htp_power_config_client_id, DcvsState::DCVS_DEFAULT); + status = SetHtpPowerCustomConfigs(config.htp_power_config_client_id, released_htp_performance_cfg, config.rpc_polling_time, config.rpc_control_latency, logger); + break; + } + default: + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "Invalid performance mode"); + break; + } + break; + case GraphState::RUN_START: + case GraphState::INIT_START: + status = SetHtpPowerConfigs(config, logger); + break; + default: + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "Invalid graph state"); + break; + } + return status; +} + +Ort::Status HtpPowerConfigManager::SetState(GraphState state, const HtpPerfConfig_t& config, const Ort::Logger& logger) { + { + std::lock_guard lk(state_mutex_); + if (state != graph_state_) { + graph_state_ = state; + } else { + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "State is the same as current. Ignoring request."); + return Ort::Status(); + } + if (config.perf_mode == qnn::HtpPerformanceMode::kHtpSustainedHighPerformance || config.perf_mode == qnn::HtpPerformanceMode::kHtpBurst) { + RETURN_IF(timer_resource_.timer_active_ == false, "Timer is not active. Cannot set state."); + RETURN_IF(timer_ == nullptr, "timer is not started"); + } + } + + // Dispatch to performance setters outside state_mutex_ to avoid deadlock: + // AbortTimer() blocks until the timer thread is idle, but the timer thread + // (inside TimerCallback) calls SetState() which acquires state_mutex_. + // Holding state_mutex_ across AbortTimer() would therefore deadlock. + // The same pattern is already applied in ReleaseTimerThread(). + Ort::Status status; + if (config.perf_mode == qnn::HtpPerformanceMode::kHtpSustainedHighPerformance || config.perf_mode == qnn::HtpPerformanceMode::kHtpBurst) { + status = SetSustainedPerformance(state, config, logger); + } else if (config.perf_mode == qnn::HtpPerformanceMode::kHtpDefault) { + if (timer_ && timer_->TimerInUse()) { + timer_->AbortTimer(); + } + status = Ort::Status(); + } else { + if (timer_ && timer_->TimerInUse()) { + timer_->AbortTimer(); + } + status = SetPerformance(state, config, logger); + } + + // Update graph_state_ to NONE after performance functions complete + { + std::lock_guard lk(state_mutex_); + graph_state_ = GraphState::NONE; + } + + return status; +} + +void HtpPowerConfigManager::TimerCallback(void* user_data) { + TimerCallbackArg* args = static_cast(user_data); + if (args == nullptr) { + return; + } + HtpPowerConfigManager* instance = args->instance_; + if (instance == nullptr) { + return; + } + if (instance->timer_resource_.timer_active_) { + const Ort::Logger& logger = OrtLoggingManager::GetDefaultLogger(); + auto rt = instance->SetState(GraphState::TIMEOUT, {args->power_config_id_, qnn::HtpPerformanceMode::kHtpSustainedHighPerformance, 0, 0}, logger); + if (!rt.IsOK()) { + ORT_CXX_LOG(logger, ORT_LOGGING_LEVEL_VERBOSE, "State update failed"); + } + } +} + +bool HtpPowerConfigManager::IsTimerThreadRunning() { + std::chrono::microseconds remainUs = std::chrono::microseconds::zero(); + uint64_t remaining_duration = 0; + if (timer_ && timer_->TimerInUse() && timer_->RemainingDuration(remainUs)) { + remaining_duration = static_cast(remainUs.count()); + return remaining_duration > 0 && remaining_duration < timer_resource_.sustained_timer_duration_; + } + return false; +} + +Ort::Status HtpPowerConfigManager::SetHtpPowerConfigs(const HtpPerfConfig_t& config, const Ort::Logger& logger) { + RETURN_IF(qnn_interface_ == nullptr, "QNN interface is not initialized. Call Init() first."); + RETURN_IF_ERROR(AddRpcPollingTime(config.rpc_polling_time, logger)); + RETURN_IF_ERROR(AddRpcControlLatency(config.rpc_control_latency, logger)); + RETURN_IF_ERROR(AddHtpPerformanceMode(config.perf_mode, + config.htp_power_config_client_id, logger)); + RETURN_IF_ERROR(SetPowerConfig(config.htp_power_config_client_id, + *qnn_interface_, logger)); + + return Ort::Status(); +} + +Ort::Status HtpPowerConfigManager::SetHtpPowerCustomConfigs(uint32_t htp_power_config_client_id, + const QnnHtpPerfInfrastructure_PowerConfig_t& power_config, + uint32_t rpc_polling_time, + uint32_t rpc_control_latency, + const Ort::Logger& logger) { + RETURN_IF(qnn_interface_ == nullptr, "QNN interface is not initialized. Call Init() first."); + RETURN_IF_ERROR(AddRpcPollingTime(rpc_polling_time, logger)); + RETURN_IF_ERROR(AddRpcControlLatency(rpc_control_latency, logger)); + RETURN_IF_ERROR(AddHtpPerformanceConfig(power_config)); + RETURN_IF_ERROR(SetPowerConfig(htp_power_config_client_id, *qnn_interface_, logger)); + + return Ort::Status(); +} + } // namespace power } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h index eed403bb40..26c816013d 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h @@ -11,13 +11,42 @@ #include "core/providers/qnn/builder/qnn_def.h" #include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/timer.h" namespace onnxruntime { namespace qnn { namespace power { +// Graph states to tune the power/performance configurations +enum class GraphState { + INIT_START, + INIT_DONE, + RUN_START, + RUN_DONE, + TIMEOUT, + NONE +}; + +typedef struct HtpPerfConfig { + uint32_t htp_power_config_client_id; + HtpPerformanceMode perf_mode; + uint32_t rpc_polling_time; + uint32_t rpc_control_latency; +} HtpPerfConfig_t; + +enum class DcvsState { + DCVS_DEFAULT = 0, + DCVS_DISABLE = 1, + DCVS_ENABLE = 2 +}; + // Manages staging of any new power configurations and -// updates power configurations for the HTP backend +// updates power configurations for the HTP backend. +// +// IMPORTANT: Init() must be called before any other methods that access +// the QNN interface (SetState, SetPowerConfig, etc.), typically during +// backend initialization. Failure to call Init() will result in errors +// when attempting to set power configurations. class HtpPowerConfigManager { public: HtpPowerConfigManager(); @@ -34,12 +63,14 @@ class HtpPowerConfigManager { Ort::Status AddRpcControlLatency(uint32_t rpc_control_latency, const Ort::Logger& logger); // Stages a new performance mode for next power config update - // If the value is the same as the last previously set, then - // there will be no new performance mode staged Ort::Status AddHtpPerformanceMode(HtpPerformanceMode htp_performance_mode, uint32_t htp_power_config_client_id, const Ort::Logger& logger); + // Stages a new HTP power configuration for next power config update + // performance mode is set to default after setting the power config + Ort::Status AddHtpPerformanceConfig(QnnHtpPerfInfrastructure_PowerConfig_t); + // Takes all configs staged for update and attempts to update // the HTP power configurations. If there is nothing staged, // then no attempt will be made. @@ -47,22 +78,76 @@ class HtpPowerConfigManager { const QNN_INTERFACE_VER_TYPE& qnn_interface, const Ort::Logger& logger); + void CreateTimerThread(uint32_t htp_power_config_client_id); + + void ReleaseTimerThread(); + + Ort::Status SetState(GraphState state, const HtpPerfConfig_t& config, const Ort::Logger& logger); + + void Init(const QNN_INTERFACE_VER_TYPE& qnn_interface) { qnn_interface_ = &qnn_interface; } + private: + ORT_DISALLOW_COPY_AND_ASSIGNMENT(HtpPowerConfigManager); // Sets voltage corner votes for HTP based on the given performance mode Ort::Status SetHtpPerformancePowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t htp_power_config_client_id, const HtpPerformanceMode& htp_performance_mode); + Ort::Status SetSustainedPerformance(GraphState state, const HtpPerfConfig_t& config, const Ort::Logger& logger); + + Ort::Status SetPerformance(GraphState state, const HtpPerfConfig_t& config, const Ort::Logger& logger); + + static void TimerCallback(void* user_data); + + bool IsTimerThreadRunning(); + + Ort::Status SetHtpPowerConfigs(const HtpPerfConfig_t& config, const Ort::Logger& logger); + + Ort::Status SetHtpPowerCustomConfigs(uint32_t htp_power_config_client_id, const QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t rpc_polling_time, uint32_t rpc_control_latency, const Ort::Logger& logger); + + // Sets power config for relaxed performance mode based on DCVS state + void SetRelaxedPerfPowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, + uint32_t htp_power_config_client_id, + DcvsState dcvsState); + + // Sets power config for released performance mode based on DCVS state + void SetReleasedPerfPowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t htp_power_config_client_id, DcvsState dcvsState); + + // Sets power config for extreme low performance mode + void SetExtremeLowPerfPowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, uint32_t htp_power_config_client_id); + uint32_t last_set_rpc_polling_time_ = kDisableRpcPolling; uint32_t last_set_rpc_control_latency_ = kDisableRpcControlLatency; - HtpPerformanceMode last_set_htp_performance_mode_ = HtpPerformanceMode::kHtpDefault; bool rpc_polling_time_set_ = false; bool rpc_control_latency_set_ = false; bool htp_performance_mode_set_ = false; std::vector power_configs_; + + const QNN_INTERFACE_VER_TYPE* qnn_interface_ = nullptr; + + // Lock acquisition order: state_mutex_ must always be acquired before perf_mutex_ + // to prevent deadlocks. Never acquire state_mutex_ while already holding perf_mutex_. + std::mutex perf_mutex_; + std::mutex state_mutex_; + std::unique_ptr timer_; + struct TimerResource { + static constexpr uint64_t sustained_timer_duration_ = kDefaultTimerTimeoutUs; // in microseconds + std::atomic caller_busy_ = false; + std::atomic timer_active_ = false; + }; + TimerResource timer_resource_; + GraphState graph_state_ = GraphState::NONE; + struct TimerCallbackArg { + uint32_t power_config_id_; + HtpPowerConfigManager* instance_; + TimerCallbackArg(uint32_t id, HtpPowerConfigManager* manager) + : power_config_id_(id), instance_(manager) {} + }; + std::unique_ptr timer_callback_arg_; }; + } // namespace power } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_state_guard.h b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_state_guard.h new file mode 100644 index 0000000000..4d84f57b95 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_state_guard.h @@ -0,0 +1,86 @@ +// Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "core/providers/qnn/builder/qnn_htp_power_config_manager.h" + +namespace onnxruntime { +namespace qnn { +// RAII guard for HtpPowerConfigManager::SetState. +// +// Calls SetState(start_state, ...) when SetPreRunHtpPerfStatus() is invoked and +// SetState(done_state, ...) on destruction, ensuring the done state is always reached +// even on early returns. +// +// Typical usage (INIT_START / INIT_DONE pair): +// +// power::HtpPowerConfigManager* power_manager = ...; +// bool valid_power_config_id = ...; // determined by caller based on whether power config id was successfully created +// power::HtpPerfConfig_t config = ...; // configured as needed for the operation +// HtpPowerStateGuard power_guard(power_manager, valid_power_config_id, power::GraphState::INIT_START, power::GraphState::INIT_DONE, +// config); +// // ... optional setup work ... +// RETURN_IF_NOT_OK(power_guard.SetPreRunHtpPerfStatus()); // Sets the pre-run state here +// auto status = DoWork(...); +// RETURN_IF_NOT_OK(power_guard.SetPostRunHtpPerf()); // optional: capture post-run perf error +// return status; +// +// Passing nullptr as power_manager creates a no-op guard (all calls succeed immediately). +class HtpPowerStateGuard { + public: + HtpPowerStateGuard(power::HtpPowerConfigManager* power_manager, + bool valid_power_config_id, + power::GraphState start_state, + power::GraphState done_state, + const power::HtpPerfConfig_t& config, + const Ort::Logger& logger) + : power_manager_(power_manager), + valid_power_config_id_(valid_power_config_id), + start_state_(start_state), + done_state_(done_state), + config_(config), + logger_(logger), + pre_run_called_(false), + finalized_(false) { + } + ~HtpPowerStateGuard() { + if (pre_run_called_ && !finalized_ && power_manager_ && valid_power_config_id_) { + // Error cannot be propagated from a destructor; silently ignore. + power_manager_->SetState(done_state_, config_, logger_); + } + } + // Sets HTP performance state before work begins and returns the status. + // Should be called after construction and before the actual work starts. + // This provides flexibility to perform other setup between construction and state setting. + Ort::Status SetPreRunHtpPerfStatus() { + pre_run_called_ = true; + if (power_manager_ && valid_power_config_id_) { + return power_manager_->SetState(start_state_, config_, logger_); + } + return Ort::Status(); + } + // Explicitly sets HTP performance after work is done and returns its status. + // After this call the destructor will not invoke SetState again. + Ort::Status SetPostRunHtpPerf() { + finalized_ = true; + if (power_manager_ && valid_power_config_id_) { + return power_manager_->SetState(done_state_, config_, logger_); + } + return Ort::Status(); + } + HtpPowerStateGuard(const HtpPowerStateGuard&) = delete; + HtpPowerStateGuard& operator=(const HtpPowerStateGuard&) = delete; + + private: + power::HtpPowerConfigManager* power_manager_; + bool valid_power_config_id_; + power::GraphState start_state_; + power::GraphState done_state_; + power::HtpPerfConfig_t config_; + const Ort::Logger& logger_; + bool pre_run_called_; + bool finalized_; +}; +} // namespace qnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/timer.cc b/onnxruntime/core/providers/qnn/builder/timer.cc new file mode 100644 index 0000000000..c339f6dd55 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/timer.cc @@ -0,0 +1,95 @@ +// Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +// SPDX-License-Identifier: MIT + +#include "timer.h" + +namespace onnxruntime { +namespace qnn { +namespace power { + +void Timer::DeInitialize() { + std::unique_lock lk(mtx_); + is_timer_deinit_ = true; + cv_.notify_all(); + lk.unlock(); + if (bkg_thread_.joinable()) { + bkg_thread_.join(); + } +} + +Timer::~Timer() { this->DeInitialize(); } + +void Timer::BkgTimer() { + { + std::unique_lock lk(mtx_); + thread_status_ = threadState::IDLE; + cv_.notify_all(); + } + while (true) { + std::unique_lock lk(mtx_); + + if (thread_status_ == threadState::IDLE) { + cv_.wait(lk, [&]() { + return is_timer_launched_ || is_timer_stopped_ || is_timer_deinit_; + }); + } + + if (is_timer_deinit_) { + thread_status_ = threadState::DEINIT; + is_timer_deinit_ = false; + return; + } + + if (is_timer_stopped_) { + thread_status_ = threadState::IDLE; + is_timer_stopped_ = false; + cv_.notify_all(); + } + + if (thread_status_ == threadState::LAUNCH) { + bool isElapsed = !cv_.wait_until(lk, end_time_, [&]() { + return is_timer_stopped_ || is_timer_deinit_; + }); + if (isElapsed) { + thread_status_ = threadState::CALLING; + lk.unlock(); + timeout_fn_(timeout_arg_); + lk.lock(); + thread_status_ = threadState::IDLE; + cv_.notify_all(); + } + is_timer_launched_ = false; + } + } +} + +bool Timer::Initialize(std::function callbackFn, void* callbackArg) { + std::unique_lock lk(mtx_); + timeout_arg_ = callbackArg; + timeout_fn_ = callbackFn; + try { + bkg_thread_ = std::thread(&Timer::BkgTimer, this); + } catch (const std::system_error& e) { + ORT_UNUSED_PARAMETER(e); + thread_status_ = threadState::FAILED; + return false; + } + cv_.wait(lk, [&] { return thread_status_ == threadState::IDLE; }); + return true; +} + +void Timer::AbortTimer() { + std::unique_lock lk(mtx_); + is_timer_stopped_ = true; + cv_.notify_all(); + cv_.wait(lk, [&] { return thread_status_ == threadState::IDLE; }); +} + +bool Timer::TimerInUse() { + std::unique_lock lk(mtx_); + return thread_status_ == threadState::LAUNCH || thread_status_ == threadState::CALLING; +} + +} // namespace power +} // namespace qnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/timer.h b/onnxruntime/core/providers/qnn/builder/timer.h new file mode 100644 index 0000000000..48f1c921ad --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/timer.h @@ -0,0 +1,83 @@ +// Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include +#include "core/providers/qnn/ort_api.h" + +namespace onnxruntime { +namespace qnn { +namespace power { + +class Timer { + public: + enum class threadState { + IDLE, // Timer is created + LAUNCH, // Timer starts counting down + CALLING, // Callback function is called + DEINIT, // Timer is deinit + FAILED // Timer thread failed to start + }; + // constructor + Timer() = default; + // destructor + ~Timer(); + + template + bool RemainingDuration(std::chrono::duration& duration) { + std::unique_lock lk(mtx_); + if (thread_status_ == threadState::LAUNCH) { + duration = std::chrono::duration_cast>(end_time_ - std::chrono::steady_clock::now()); + return true; + } else if (thread_status_ == threadState::CALLING || thread_status_ == threadState::IDLE) { + duration = std::chrono::duration::zero(); + return true; + } else { + duration = std::chrono::duration::zero(); + return false; + } + } + + template + bool Launch(const std::chrono::duration& timeoutVal) { + std::unique_lock lk(mtx_); + if (thread_status_ != threadState::IDLE) { + return false; + } + end_time_ = std::chrono::steady_clock::now() + timeoutVal; + thread_status_ = threadState::LAUNCH; + is_timer_launched_ = true; + cv_.notify_all(); + return true; + } + + bool Initialize(std::function callbackFn, void* callbackArg); + void DeInitialize(); + void AbortTimer(); + + bool TimerInUse(); + + private: + ORT_DISALLOW_COPY_AND_ASSIGNMENT(Timer); + + std::thread bkg_thread_; + void BkgTimer(); + std::mutex mtx_; + std::condition_variable cv_; + std::function timeout_fn_; + void* timeout_arg_{nullptr}; + std::atomic thread_status_{threadState::DEINIT}; + std::chrono::time_point end_time_; + std::atomic is_timer_stopped_ = false; + std::atomic is_timer_deinit_ = false; + std::atomic is_timer_launched_ = false; +}; + +} // namespace power +} // namespace qnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 731339cc5a..ab885af4dd 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -1103,10 +1103,10 @@ QnnEp::~QnnEp() { if (qnn_backend_manager_) { auto thread_id = std::this_thread::get_id(); qnn_backend_manager_->RemovePerThreadHtpPowerConfigMapping(thread_id); - + qnn_backend_manager_->DeInitializePerfTimer(); std::lock_guard lock(config_id_mutex_); if (htp_power_config_id_.has_value()) { - qnn_backend_manager_->DestroyHTPPowerConfigID(*htp_power_config_id_); + qnn_backend_manager_->DestroyHtpPowerConfigId(*htp_power_config_id_); } } @@ -2068,9 +2068,33 @@ OrtStatus* ORT_API_CALL QnnEp::CompileImpl(_In_ OrtEp* this_ptr, QnnEp* ep = static_cast(this_ptr); if (qnn::IsOrtGraphHasCtxNode(graphs, count, ep->ort_api)) { - return ep->CompileContextModel(graphs, fused_nodes, count, node_compute_infos); + uint32_t htp_power_config_id = 0; + bool power_config_valid = ep->GetHtpPowerConfigId(htp_power_config_id); + qnn::power::HtpPerfConfig_t perf_config{htp_power_config_id, ep->default_htp_performance_mode_, ep->default_rpc_polling_time_, ep->default_rpc_control_latency_}; + qnn::HtpPowerStateGuard power_guard( + &ep->qnn_backend_manager_->GetHtpPowerConfigManager(), + power_config_valid, + qnn::power::GraphState::INIT_START, qnn::power::GraphState::INIT_DONE, + perf_config, + ep->logger_); + RETURN_IF_NOT_OK(power_guard.SetPreRunHtpPerfStatus()); + auto status = ep->CompileContextModel(graphs, fused_nodes, count, node_compute_infos); + RETURN_IF_NOT_OK(power_guard.SetPostRunHtpPerf()); + return status; } else if (qnn::IsOrtGraphHasDlcCtxNode(graphs, count, ep->ort_api)) { - return ep->CompileDlcContextModel(this_ptr, graphs, fused_nodes, count, node_compute_infos); + uint32_t htp_power_config_id = 0; + bool power_config_valid = ep->GetHtpPowerConfigId(htp_power_config_id); + qnn::power::HtpPerfConfig_t perf_config{htp_power_config_id, ep->default_htp_performance_mode_, ep->default_rpc_polling_time_, ep->default_rpc_control_latency_}; + qnn::HtpPowerStateGuard power_guard( + &ep->qnn_backend_manager_->GetHtpPowerConfigManager(), + power_config_valid, + qnn::power::GraphState::INIT_START, qnn::power::GraphState::INIT_DONE, + perf_config, + ep->logger_); + RETURN_IF_NOT_OK(power_guard.SetPreRunHtpPerfStatus()); + auto status = ep->CompileDlcContextModel(this_ptr, graphs, fused_nodes, count, node_compute_infos); + RETURN_IF_NOT_OK(power_guard.SetPostRunHtpPerf()); + return status; } #if defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) @@ -2182,7 +2206,21 @@ OrtStatus* ORT_API_CALL QnnEp::CompileImpl(_In_ OrtEp* this_ptr, } else { finalize_start = std::chrono::steady_clock::now(); #endif - RETURN_IF_NOT_OK(qnn_model->FinalizeGraphs(ep->logger_)); + uint32_t htp_power_config_id = 0; + bool valid_power_config_id = ep->GetHtpPowerConfigId(htp_power_config_id); + qnn::power::HtpPerfConfig_t perf_config{htp_power_config_id, ep->default_htp_performance_mode_, ep->default_rpc_polling_time_, ep->default_rpc_control_latency_}; + qnn::HtpPowerStateGuard power_guard( + &ep->qnn_backend_manager_->GetHtpPowerConfigManager(), + valid_power_config_id, + qnn::power::GraphState::INIT_START, qnn::power::GraphState::INIT_DONE, + perf_config, + ep->logger_); + RETURN_IF_NOT_OK(power_guard.SetPreRunHtpPerfStatus()); + auto finalize_status = qnn_model->FinalizeGraphs(ep->logger_); + RETURN_IF_NOT_OK(power_guard.SetPostRunHtpPerf()); + if (!finalize_status.IsOK()) { + return finalize_status.release(); + } #if defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) end = std::chrono::steady_clock::now(); total_finalize_time += std::chrono::duration_cast(end - finalize_start); @@ -2207,6 +2245,16 @@ OrtStatus* ORT_API_CALL QnnEp::CompileImpl(_In_ OrtEp* this_ptr, if (use_multithreaded_prepare) { qnn::thread::QnnJobThreadPool tp(ep->num_graph_prepare_threads_); tp.Start(); + uint32_t htp_power_config_id = 0; + bool valid_power_config_id = ep->GetHtpPowerConfigId(htp_power_config_id); + qnn::power::HtpPerfConfig_t perf_config{htp_power_config_id, ep->default_htp_performance_mode_, ep->default_rpc_polling_time_, ep->default_rpc_control_latency_}; + qnn::HtpPowerStateGuard power_guard( + &ep->qnn_backend_manager_->GetHtpPowerConfigManager(), + valid_power_config_id, + qnn::power::GraphState::INIT_START, qnn::power::GraphState::INIT_DONE, + perf_config, + ep->logger_); + RETURN_IF_NOT_OK(power_guard.SetPreRunHtpPerfStatus()); finalize_start = std::chrono::steady_clock::now(); for (auto& model_info : model_infos) { tp.SubmitJob([qnn_model = model_info.model.get(), &logger = ep->logger_, res = &model_info.result] { @@ -2216,7 +2264,7 @@ OrtStatus* ORT_API_CALL QnnEp::CompileImpl(_In_ OrtEp* this_ptr, tp.WaitForQueuedJobsToFinish(); end = std::chrono::steady_clock::now(); total_finalize_time = std::chrono::duration_cast(end - finalize_start); - + RETURN_IF_NOT_OK(power_guard.SetPostRunHtpPerf()); for (auto& model_info : model_infos) { RETURN_IF_NOT_OK(std::move(model_info.result)); @@ -2328,13 +2376,11 @@ OrtStatus* ORT_API_CALL QnnEp::ShouldConvertDataLayoutForOpImpl(_In_ OrtEp* this return nullptr; } -bool QnnEp::GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thread_htp_power_configs, +void QnnEp::GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thread_htp_power_configs, const ::OrtRunOptions* run_options) { qnn::HtpPerformanceMode pre_run_htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; qnn::HtpPerformanceMode post_run_htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; - bool configs_set = false; - const char* htp_perf_mode = nullptr; htp_perf_mode = ort_api.GetRunConfigEntry(run_options, kOrtRunOptionsConfigQnnPerfMode); if (htp_perf_mode != nullptr) { @@ -2353,29 +2399,35 @@ bool QnnEp::GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thr if (rpc_latency != nullptr) { rpc_control_latency = static_cast(std::stoul(rpc_latency)); per_thread_htp_power_configs.rpc_control_latency = rpc_control_latency; - configs_set = true; ORT_CXX_LOG(logger_, ORT_LOGGING_LEVEL_VERBOSE, (std::string("rpc_control_latency: ") + rpc_latency).c_str()); + } else { + per_thread_htp_power_configs.rpc_control_latency = default_rpc_control_latency_; } - uint32_t rpc_polling_time = 0; - if (qnn::HtpPerformanceMode::kHtpBurst == pre_run_htp_performance_mode) { - rpc_polling_time = 9999; + // This ensures that rpc polling time is always set to a value + per_thread_htp_power_configs.rpc_polling_time = qnn::kDisableRpcPolling; + + if (qnn::HtpPerformanceMode::kHtpDefault != dynamic_htp_performance_mode_) { + // reset perf mode, rpc control latency and rpc polling time to dynamic perf mode values + per_thread_htp_power_configs.default_perf_mode = dynamic_htp_performance_mode_; + per_thread_htp_power_configs.rpc_polling_time = dynamic_rpc_polling_time_; + } else if (qnn::HtpPerformanceMode::kHtpDefault != default_htp_performance_mode_) { + per_thread_htp_power_configs.default_perf_mode = default_htp_performance_mode_; + per_thread_htp_power_configs.rpc_polling_time = default_rpc_polling_time_; } if (qnn::HtpPerformanceMode::kHtpDefault != pre_run_htp_performance_mode) { per_thread_htp_power_configs.pre_run_perf_mode = pre_run_htp_performance_mode; // rpc polling time will only be updated with perf mode changes - per_thread_htp_power_configs.rpc_polling_time = rpc_polling_time; - configs_set = true; + if (qnn::HtpPerformanceMode::kHtpBurst == pre_run_htp_performance_mode) { + per_thread_htp_power_configs.rpc_polling_time = 9999; + } } if (qnn::HtpPerformanceMode::kHtpDefault != post_run_htp_performance_mode) { per_thread_htp_power_configs.post_run_perf_mode = post_run_htp_performance_mode; - configs_set = true; } - - return configs_set; } OrtStatus* ORT_API_CALL QnnEp::OnRunStartImpl(_In_ OrtEp* this_ptr, _In_ const ::OrtRunOptions* run_options) noexcept { @@ -2396,11 +2448,10 @@ OrtStatus* ORT_API_CALL QnnEp::OnRunStartImpl(_In_ OrtEp* this_ptr, _In_ const : if (ep->GetHtpPowerConfigId(htp_power_config_id)) { auto thread_id = std::this_thread::get_id(); qnn::PerThreadHtpPowerConfigs_t per_thread_htp_power_configs; - if (ep->GetPerThreadHtpPowerConfigs(per_thread_htp_power_configs, run_options)) { - per_thread_htp_power_configs.power_config_id = htp_power_config_id; - RETURN_IF_ERROR(ep->qnn_backend_manager_->AddPerThreadHtpPowerConfigMapping(thread_id, - per_thread_htp_power_configs)); - } + ep->GetPerThreadHtpPowerConfigs(per_thread_htp_power_configs, run_options); + per_thread_htp_power_configs.power_config_id = htp_power_config_id; + RETURN_IF_ERROR(ep->qnn_backend_manager_->AddPerThreadHtpPowerConfigMapping(thread_id, + per_thread_htp_power_configs)); } const char* lora_config = nullptr; @@ -2492,18 +2543,14 @@ OrtStatus* ORT_API_CALL QnnEp::SetDynamicOptionsImpl(_In_ OrtEp* this_ptr, } qnn::HtpPerformanceMode htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; ParseHtpPerformanceMode(value, htp_performance_mode, ep->logger_); - - uint32_t rpc_polling_time = 0; - if (htp_performance_mode == qnn::HtpPerformanceMode::kHtpBurst) { - rpc_polling_time = 9999; - } - - uint32_t htp_power_config_id = 0; - if (ep->GetHtpPowerConfigId(htp_power_config_id)) { - RETURN_IF_NOT_OK(ep->qnn_backend_manager_->SetHtpPowerConfigs(htp_power_config_id, - htp_performance_mode, - rpc_polling_time, - ep->default_rpc_control_latency_)); + // Dynamic HTP performance mode is used for performance setting for execute so it will be set in OnRunStart. + if (htp_performance_mode != qnn::HtpPerformanceMode::kHtpDefault) { + ep->dynamic_htp_performance_mode_ = htp_performance_mode; + if (htp_performance_mode == qnn::HtpPerformanceMode::kHtpBurst) { + ep->dynamic_rpc_polling_time_ = 9999; + } else { + ep->dynamic_rpc_polling_time_ = 0; + } } } else { ORT_CXX_LOG(ep->logger_, @@ -2677,19 +2724,10 @@ void QnnEp::CreateHtpPowerConfigId() const { constexpr uint32_t core_id = 0; uint32_t htp_power_config_id; - Ort::Status rt = qnn_backend_manager_->CreateHtpPowerCfgId(device_id_, core_id, htp_power_config_id); + Ort::Status rt = qnn_backend_manager_->InitializePowerCfgId(device_id_, core_id, htp_power_config_id); if (rt.IsOK()) { htp_power_config_id_ = htp_power_config_id; - - rt = qnn_backend_manager_->SetHtpPowerConfigs(htp_power_config_id, - default_htp_performance_mode_, - default_rpc_polling_time_, - default_rpc_control_latency_); - - if (!rt.IsOK()) { - ORT_CXX_LOG(logger_, ORT_LOGGING_LEVEL_ERROR, "Unable to set HTP power configurations."); - } } else { ORT_CXX_LOG(logger_, ORT_LOGGING_LEVEL_ERROR, "Failed to create HTP power config id."); } diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index c90b7844f7..bcb2734fa4 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -26,6 +26,7 @@ #include "core/providers/qnn/genie/genie_api_loader.h" #include "core/providers/qnn/genie/genie_node.h" #include "core/providers/qnn/genie/genie_node_compute_info.h" +#include "core/providers/qnn/builder/qnn_htp_power_state_guard.h" namespace onnxruntime { class QnnEpFactory; @@ -157,8 +158,8 @@ class QnnEp : public OrtEp, public ApiPtrs { } GraphFinalizationInfo_t; - // Will return true if any power config options need to be updated - bool GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thread_htp_power_configs, + // Retrieves per-thread HTP power configurations from run options + void GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thread_htp_power_configs, const ::OrtRunOptions* run_options); void CreateHtpPowerConfigId() const; @@ -203,8 +204,10 @@ class QnnEp : public OrtEp, public ApiPtrs { // Configurations for HTP backend. uint32_t device_id_{0}; qnn::HtpPerformanceMode default_htp_performance_mode_{qnn::HtpPerformanceMode::kHtpDefault}; + qnn::HtpPerformanceMode dynamic_htp_performance_mode_{qnn::HtpPerformanceMode::kHtpDefault}; uint32_t default_rpc_control_latency_ = 0; uint32_t default_rpc_polling_time_ = 0; + uint32_t dynamic_rpc_polling_time_ = 0; qnn::ModelSettings model_settings_ = {}; qnn::HtpGraphFinalizationOptimizationMode htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kDefault; int32_t vtcm_size_in_mb_ = 0; diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 65664c47ba..8381090310 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -983,6 +983,56 @@ TEST_F(QnnHTPBackendTests, MultithreadDefaultHtpPowerCfgFromEpOption) { } } +// Tests running a single session in multiple threads on the HTP backend with EP option to set default power config to sustained high performance +TEST_F(QnnHTPBackendTests, MultithreadSustainedHighPowerCfgFromEpOption) { + std::unique_ptr model; + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + std::vector shape = {1, 3, 2}; + std::vector> output_shapes = {shape}; + std::vector> output_values = {{3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}}; + + CreateModelInMemory(model, + QDQBuildAdd3Tensors(TestInputDef(shape, false, input_data), + TestInputDef(shape, false, input_data), + TestInputDef(shape, false, input_data))); + + ProviderOptions options; +#if defined(_WIN32) + options["backend_path"] = "QnnHtp.dll"; +#else + options["backend_path"] = "libQnnHtp.so"; +#endif + options["offload_graph_io_quantization"] = "0"; + options["htp_performance_mode"] = "sustained_high_performance"; + +#if defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) + // By default, 8 is used, which will impact time to run all + // unit tests due to overhead of thread creation/destruction + options["num_graph_prepare_threads"] = "1"; +#endif + + Ort::SessionOptions session_opts; + session_opts.SetLogId("logger0"); + + RegisteredEpDeviceUniquePtr registered_ep_device; + RegisterQnnEpLibrary(registered_ep_device, session_opts, kQnnExecutionProvider, options); + + ScopedOrtSession scoped(std::move(registered_ep_device), + Ort::Session(*ort_env, model->model_data.data(), model->model_data.size(), session_opts)); + + std::vector threads; + constexpr int num_threads = 5; + constexpr int loop_count = 10; + for (int i = 0; i < num_threads; i++) { + threads.push_back(std::thread(RunSessionAndVerify, std::ref(scoped.session()), Ort::RunOptions{nullptr}, + std::ref(model->builder.feeds_), output_shapes, output_values, loop_count)); + } + + for (auto& th : threads) { + th.join(); + } +} + // Tests running a single session in multiple threads on the HTP backend with // EP option to set default power config + run option to set power config for each run TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgDefaultAndRunOption) {