Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
9b67a85
rebased pr
qti-monumeen Jun 5, 2026
78d9f1e
Added the review changes
qti-monumeen Apr 14, 2026
8c60d7f
Added the review changes
qti-monumeen Apr 14, 2026
3e08f23
fixing test failure
qti-monumeen Apr 15, 2026
7d28442
addressing reviews
qti-monumeen Apr 16, 2026
6603640
addressing reviews
qti-monumeen Apr 16, 2026
303b8ea
addressing reviews
qti-monumeen Apr 16, 2026
0079f47
fix test
qti-monumeen Apr 16, 2026
f27f082
addressing test
qti-monumeen Apr 17, 2026
1af97b8
addressing test
qti-monumeen Apr 17, 2026
59b1e42
addressing test
qti-monumeen Apr 17, 2026
5bba691
fixing build failure
qti-monumeen Apr 18, 2026
5e0ed87
Refactoring perf changes
qti-monumeen Apr 20, 2026
8558120
fix fail test
qti-monumeen Apr 20, 2026
345627f
removing redundant code
qti-monumeen Apr 22, 2026
be289fe
refactoring code
qti-monumeen Apr 22, 2026
78e777b
refactoring code
qti-monumeen Apr 22, 2026
6c8a707
addressing comments
qti-monumeen Apr 22, 2026
0d4188a
addressed review comments
qti-monumeen Apr 22, 2026
1f8c8c3
fix build failure
qti-monumeen Apr 22, 2026
8081636
addressing comments
qti-monumeen Apr 23, 2026
33eda5b
adressing new comments
qti-monumeen Apr 24, 2026
6c7015f
changing implementation of release timer
qti-monumeen Apr 26, 2026
791b5a8
moving raii wrapper from qnn execution provider
qti-monumeen Apr 28, 2026
8f70095
fix for test failure
qti-monumeen Apr 28, 2026
bd73eae
addressing comments
qti-monumeen Apr 28, 2026
39b1bc2
addressing comments
qti-monumeen Apr 29, 2026
35a2903
improving logger functionality of htp_power_config_manager
qti-monumeen Apr 29, 2026
9c7020e
addressing comments
qti-monumeen Apr 29, 2026
13cf72e
addressing comments
qti-monumeen Jun 5, 2026
74379c5
fix for test failure
qti-monumeen Jun 8, 2026
4646461
fix for test failure
qti-monumeen Jun 8, 2026
12ee3a9
added changes to improve code
qti-monumeen Jun 8, 2026
be1e054
adding changes for review comment
qti-monumeen Jun 9, 2026
a786dd6
resolving review comments
qti-monumeen Jun 15, 2026
f0f5fc8
addressing reviews
qti-monumeen Jun 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 27 additions & 43 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ Ort::Status QnnBackendManager::LoadBackend() {
QNN_API_VERSION_PATCH},
&backend_interface_provider)));
qnn_interface_ = backend_interface_provider->QNN_INTERFACE_VER_NAME;
htp_power_config_manager_.Init(qnn_interface_);
backend_id_ = backend_interface_provider->backendId;
backend_api_version_ = backend_interface_provider->apiVersion.backendApiVersion;
SetQnnBackendType(backend_id_);
Expand Down Expand Up @@ -394,6 +395,7 @@ Ort::Status QnnBackendManager::LoadQnnSerializerBackend() {
QNN_API_VERSION_PATCH},
&serializer_interface_provider)));
qnn_interface_ = serializer_interface_provider->QNN_INTERFACE_VER_NAME; // NOTE: QnnSaver/Ir will provide the interfaces
htp_power_config_manager_.Init(qnn_interface_);
Comment thread
huaychou marked this conversation as resolved.

Qnn_Version_t backend_interface_version = GetQnnInterfaceApiVersion(backend_interface_provider);
Qnn_Version_t serializer_interface_version = GetQnnInterfaceApiVersion(serializer_interface_provider);
Expand Down Expand Up @@ -2001,9 +2003,17 @@ Ort::Status QnnBackendManager::SetupBackend(
return status;
}

Ort::Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id,
uint32_t core_id,
uint32_t& htp_power_config_id) {
Ort::Status QnnBackendManager::InitializePowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
RETURN_IF_ERROR(CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id));
htp_power_config_manager_.CreateTimerThread(htp_power_config_id);
return Ort::Status();
}

void QnnBackendManager::DeInitializePerfTimer() {
htp_power_config_manager_.ReleaseTimerThread();
}

Ort::Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
// create an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded.
Expand All @@ -2023,26 +2033,6 @@ Ort::Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id,
return Ort::Status();
}

Ort::Status QnnBackendManager::SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency) {
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
// set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded.
RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete.");
RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time, *logger_ptr_));
RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency, *logger_ptr_));
RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(htp_performance_mode,
htp_power_config_client_id,
*logger_ptr_));
RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id,
GetQnnInterface(),
*logger_ptr_));

return Ort::Status();
}

Ort::Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run) {
PerThreadHtpPowerConfigs_t htp_power_configs;
if (!GetPerThreadHtpPowerConfigMapping(thread_id, htp_power_configs)) {
Expand All @@ -2051,29 +2041,23 @@ Ort::Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id

auto htp_power_config_id = htp_power_configs.power_config_id;
if (pre_run) {
// add in htp_power_configs the default power config id also so to run when we execute
if (htp_power_configs.pre_run_perf_mode.has_value()) {
RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.pre_run_perf_mode,
htp_power_config_id,
*logger_ptr_));
}

if (htp_power_configs.rpc_control_latency.has_value()) {
RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(*htp_power_configs.rpc_control_latency,
*logger_ptr_));
power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.pre_run_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency};
Comment thread
huaychou marked this conversation as resolved.
RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_START, config, *logger_ptr_));
} else if (htp_power_configs.default_perf_mode.has_value()) {
Comment thread
huaychou marked this conversation as resolved.
power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.default_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency};
RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_START, config, *logger_ptr_));
}

if (htp_power_configs.rpc_polling_time.has_value()) {
RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(*htp_power_configs.rpc_polling_time,
*logger_ptr_));
} else {
if (htp_power_configs.post_run_perf_mode.has_value()) {
power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.post_run_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency};
RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_DONE, config, *logger_ptr_));
} else if (htp_power_configs.default_perf_mode.has_value()) {
power::HtpPerfConfig_t config{htp_power_config_id, *htp_power_configs.default_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency};
RETURN_IF_ERROR(htp_power_config_manager_.SetState(power::GraphState::RUN_DONE, config, *logger_ptr_));
}
} else if (htp_power_configs.post_run_perf_mode.has_value()) {
RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.post_run_perf_mode,
htp_power_config_id,
*logger_ptr_));
}

RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_id, GetQnnInterface(), *logger_ptr_));

return Ort::Status();
}

Expand Down Expand Up @@ -2108,7 +2092,7 @@ void QnnBackendManager::RemovePerThreadHtpPowerConfigMapping(const std::thread::
per_thread_power_configs_.erase(thread_id);
}

Ort::Status QnnBackendManager::DestroyHTPPowerConfigID(uint32_t htp_power_config_id) {
Ort::Status QnnBackendManager::DestroyHtpPowerConfigId(uint32_t htp_power_config_id) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");
Expand Down
21 changes: 13 additions & 8 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,11 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
bool enable_htp_extended_udma_mode = false,
bool enable_htp_prepare_only = false);

Ort::Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);
Ort::Status InitializePowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);

void DeInitializePerfTimer();

Ort::Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency);
Ort::Status DestroyHtpPowerConfigId(uint32_t htp_power_config_id);

Ort::Status SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run);

Expand Down Expand Up @@ -264,8 +263,6 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
: backend_path.parent_path().string();
}

Ort::Status DestroyHTPPowerConfigID(uint32_t htp_power_config_id);

Ort::Status GetMaxSpillFillBufferSize(unsigned char* buffer,
uint64_t buffer_length,
uint64_t& max_spill_fill_buffer_size);
Expand Down Expand Up @@ -326,7 +323,13 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
} FileMappingCallbackInfo_t;
#endif

void ResetLogger(const Ort::Logger& logger) { logger_ptr_ = &logger; }
void ResetLogger(const Ort::Logger& logger) {
logger_ptr_ = &logger;
}

power::HtpPowerConfigManager& GetHtpPowerConfigManager() {
return htp_power_config_manager_;
}

private:
Ort::Status LoadBackend();
Expand Down Expand Up @@ -439,6 +442,8 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>

void* LibFunction(void* handle, const char* symbol, std::string& error_msg);

Ort::Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);

template <class T>
inline T ResolveSymbol(void* lib_handle, const char* sym, const Ort::Logger& logger) {
std::string error_msg = "";
Expand Down
15 changes: 14 additions & 1 deletion onnxruntime/core/providers/qnn/builder/qnn_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,18 @@ enum class HtpPerformanceMode : uint8_t {
kHtpExtremePowerSaver,
};

// pre_run_perf_mode and post_run_perf_mode takes precedence over default_perf_mode. If pre_run_perf_mode is set,
// it will be used for performance setting in OnRunStart().
// If post_run_perf_mode is set, it will be used for performance setting in OnRunDone().
// If default_perf_mode is set and pre_run_perf_mode or post_run_perf_mode is not set,
// default_perf_mode will be used for performance setting in both OnRunStart() and OnRunDone().
// rpc_control_latency and rpc_polling_time will be set beforehand in OnRunStart() as it depends on the performance mode set in OnRunStart().
typedef struct PerThreadHtpPowerConfigs {
std::optional<HtpPerformanceMode> pre_run_perf_mode;
std::optional<HtpPerformanceMode> post_run_perf_mode;
std::optional<uint32_t> rpc_control_latency;
std::optional<uint32_t> rpc_polling_time;
std::optional<HtpPerformanceMode> default_perf_mode;

uint32_t power_config_id = 0;
} PerThreadHtpPowerConfigs_t;
Expand Down Expand Up @@ -130,17 +137,23 @@ bool IsQpuBackend(QnnBackendType backend_type);

std::string QnnBackendTypeToString(QnnBackendType backend_type);

// constexpr config values
// latency values are in microseconds
constexpr const int kSleepMinLatency = 40;
constexpr const int kSleepLowLatency = 100;
constexpr const int kSleepMediumLatency = 1000;
constexpr const int kSleepHighLatency = 2000;
constexpr const int kSleepHigherLatency = 65535;

// constexpr config values
constexpr const int kDcvsDisable = 0;
constexpr const int kDcvsEnable = 1;
constexpr const uint32_t kDisableRpcPolling = 0;
constexpr const uint32_t kDisableRpcControlLatency = 0;
constexpr const uint32_t kMaxRpcPolling = 9999;

// Sustained high performance mode timer timeout duration in microseconds
constexpr const uint64_t kDefaultTimerTimeoutUs = 300000;

struct OnnxTensorInfo {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTensorInfo);
OnnxTensorInfo(size_t index, int32_t data_type, std::vector<int64_t>&& shape) : index_(index), data_type_(data_type), shape_(std::move(shape)) {}
Expand Down
Loading