diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp index 882e2b5c9c6eea..772a2015217835 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp @@ -467,6 +467,50 @@ struct BATCH_MODE final : OptionBase { } }; +struct COMMANDLIST_MODE final : OptionBase { + static std::string_view key() { + return ov::intel_npu::commandlist_mode.name(); + } + + static constexpr std::string_view getTypeName() { + return "ov::intel_npu::CommandListMode"; + } + + static ov::intel_npu::CommandListMode defaultValue() { + return ov::intel_npu::CommandListMode::DEFAULT; + } + + static bool isPublic() { + return false; + } + + static OptionMode mode() { + return OptionMode::RunTime; + } + + static ov::intel_npu::CommandListMode parse(std::string_view val) { + if (val == "DEFAULT") { + return ov::intel_npu::CommandListMode::DEFAULT; + } else if (val == "ENABLE_MUTABLE_COMMANDLIST") { + return ov::intel_npu::CommandListMode::ENABLE_MUTABLE_COMMANDLIST; + } else if (val == "FORCE_COMMANDLIST_RECORDING_ONLY") { + return ov::intel_npu::CommandListMode::FORCE_COMMANDLIST_RECORDING_ONLY; + } else if (val == "FORCE_UPDATE_MUTABLE_COMMANDLIST") { + return ov::intel_npu::CommandListMode::FORCE_UPDATE_MUTABLE_COMMANDLIST; + } + + OPENVINO_THROW("Value '", val, "' is not a valid COMMANDLIST_MODE option"); + } + + static std::string toString(const ov::intel_npu::CommandListMode& val) { + std::stringstream strStream; + + strStream << val; + + return strStream.str(); + } +}; + struct PROFILING_TYPE final : OptionBase { static std::string_view key() { return ov::intel_npu::profiling_type.name(); diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp index d39e98b22b1089..173d97f0b37f4f 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp @@ -122,6 +122,47 @@ inline std::ostream& operator<<(std::ostream& out, const BatchMode& fmt) { return out; } +/** + * @brief [Only for NPU Plugin] + * Type: String. Default is "DEFAULT". + * Selects the command list update strategy used by the plugin. + * Possible values: "DEFAULT", "ENABLE_MUTABLE_COMMANDLIST", "FORCE_UPDATE_MUTABLE_COMMANDLIST", + * "FORCE_COMMANDLIST_RECORDING_ONLY" + */ +enum class CommandListMode { + DEFAULT = 0, + ENABLE_MUTABLE_COMMANDLIST = 1, + FORCE_UPDATE_MUTABLE_COMMANDLIST = 2, + FORCE_COMMANDLIST_RECORDING_ONLY = 3, +}; + +/** + * @brief Prints a string representation of ov::intel_npu::CommandListMode to a stream + * @param out An output stream to send to + * @param fmt A command list mode value to print to a stream + * @return A reference to the `out` stream + * @note Configuration API v 2.0 + */ +inline std::ostream& operator<<(std::ostream& out, const CommandListMode& fmt) { + switch (fmt) { + case CommandListMode::DEFAULT: { + out << "DEFAULT"; + } break; + case CommandListMode::ENABLE_MUTABLE_COMMANDLIST: { + out << "ENABLE_MUTABLE_COMMANDLIST"; + } break; + case CommandListMode::FORCE_UPDATE_MUTABLE_COMMANDLIST: { + out << "FORCE_UPDATE_MUTABLE_COMMANDLIST"; + } break; + case CommandListMode::FORCE_COMMANDLIST_RECORDING_ONLY: { + out << "FORCE_COMMANDLIST_RECORDING_ONLY"; + } break; + default: + OPENVINO_THROW("Unsupported value for the command list mode:", fmt); + } + return out; +} + /** * @brief [Only for NPU Plugin] * Default is "ITERATIVE". @@ -328,6 +369,14 @@ static constexpr ov::Property profiling_type{"NPU_PROFILING_TYPE" */ static constexpr ov::Property batch_mode{"NPU_BATCH_MODE"}; +/** + * @brief [Only for NPU Plugin] + * Type: String. Default is "DEFAULT". + * Selects the command list update strategy used by the plugin. + * Possible values: "DEFAULT", "FORCE_COMMANDLIST_RECORDING_ONLY", "FORCE_UPDATE_MUTABLE_COMMANDLIST". + */ +static constexpr ov::Property commandlist_mode{"NPU_COMMANDLIST_MODE"}; + /** * @brief [Experimental, only for NPU Plugin] * Type: enum. Default is "ITERATIVE". If the compiler-in-plugin is used (intel_npu::compiler_type = @@ -448,6 +497,5 @@ static constexpr ov::Property export_raw_blob{"NPU_EXPORT_RAW_BLOB"}; * models from each other, which can be required for some use cases. */ static constexpr ov::Property shared_common_queue{"NPU_SHARED_COMMON_QUEUE"}; - } // namespace intel_npu } // namespace ov diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp index 871316ab5c11e5..99cc2df75a22ca 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp @@ -132,6 +132,7 @@ class DynamicGraph final : public IDynamicGraph { virtual void setArgumentValueWithStrides(uint32_t argi, const void* argv, const std::vector& strides) = 0; + virtual void setOptimizedDynamicStridesMode(bool enabled) = 0; virtual uint64_t getNumSubgraphs() = 0; virtual void getBinding(GraphArguments& binding) = 0; virtual void executeGraph(const std::shared_ptr& zeroInitStruct, diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 0dd6d4df3089d8..31546086e28743 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -90,6 +90,10 @@ class ZeGraphExtWrappers { void evict_memory(const GraphDescriptor& graphDescriptor) const; + bool isOptimizedDynamicStridesSupported() const { + return _isOptimizedDynamicStridesSupported; + } + private: void getMetadata(ze_graph_handle_t graphHandle, uint32_t indexUsedByDriver, @@ -103,6 +107,7 @@ class ZeGraphExtWrappers { std::shared_ptr _zeroInitStruct; uint32_t _graphExtVersion; bool _isCompilerOptionQuerySupported; + bool _isOptimizedDynamicStridesSupported = false; Logger _logger; }; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/dynamic_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/dynamic_graph.cpp index 5dd5fca726a098..15b895388fe7da 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/dynamic_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/dynamic_graph.cpp @@ -10,12 +10,14 @@ #include "compiler_impl.hpp" #include "intel_npu/common/compiler_adapter_factory.hpp" #include "intel_npu/config/options.hpp" +#include "intel_npu/npu_private_properties.hpp" #include "intel_npu/prefix.hpp" #include "intel_npu/utils/utils.hpp" #include "intel_npu/utils/zero/zero_api.hpp" #include "intel_npu/utils/zero/zero_cmd_queue_pool.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" #include "openvino/runtime/make_tensor.hpp" +#include "ze_graph_ext_wrappers.hpp" namespace intel_npu { @@ -24,13 +26,19 @@ class DynamicGraphImpl : public DynamicGraph::Impl { using MemRefType = DynamicGraph::MemRefType; public: - DynamicGraphImpl() : _engineProperties{}, _logger("DynamicGraphImpl", Logger::global().level()) {} + DynamicGraphImpl(const FilteredConfig& config) + : _engineProperties{}, + _bindingCommandListMode(config.get()), + _logger("DynamicGraphImpl", Logger::global().level()) {} void initialize(std::optional& blob, NetworkMetadata& metadata) override; void createExecutionEngine(std::optional& blob); void prepareMetadata(NetworkMetadata& metadata); void initializeDynamicGraphExecution(std::optional& blob, NetworkMetadata& metadata); void setArgumentValue(uint32_t argi, const void* argv) override; void setArgumentValueWithStrides(uint32_t argi, const void* argv, const std::vector& strides) override; + void setOptimizedDynamicStridesMode(bool enabled) override { + _optimizedDynamicStridesMode = enabled; + } uint64_t getNumSubgraphs() override { return _engineProperties.numOfSubGraphs; } @@ -61,6 +69,8 @@ class DynamicGraphImpl : public DynamicGraph::Impl { npu_vm_runtime_handle_t _engine = nullptr; npu_vm_runtime_properties_t _engineProperties; DynamicGraph::GraphArguments _binding; + ov::intel_npu::CommandListMode _bindingCommandListMode; + bool _optimizedDynamicStridesMode = false; bool _initialized = false; Logger _logger; }; @@ -305,13 +315,21 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr ze_event_handle_t event, ze_graph_profiling_pool_handle_t profiling) { _logger.debug("Start to execute graph with runtime engine"); + std::shared_ptr argsImpl = args._impl ? std::static_pointer_cast(args._impl) : std::make_shared(); - bool noTensorChange = true; + // Force record commandlist for first execution or the mode is set to FORCE_COMMANDLIST_RECORDING_ONLY + bool commandListRecordingRequired = + (args._impl == nullptr) || + _bindingCommandListMode == ov::intel_npu::CommandListMode::FORCE_COMMANDLIST_RECORDING_ONLY; + std::vector commandListIndexArray; + npu_vm_runtime_execute_params_t* params = &argsImpl->_executeParams; - for (auto& in : args._inputs) { + auto inputSize = args._inputs.size(); + for (size_t i = 0; i < inputSize; ++i) { + auto& in = args._inputs[i]; std::shared_ptr inImpl = std::static_pointer_cast(in._impl); if (inImpl == nullptr) { @@ -320,12 +338,37 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr } inImpl->UpdateMemRefHandleStatus(in); if (args._impl == nullptr) { + // First execution argsImpl->_inputMemRefs.push_back(inImpl->_memRef); - } else if (inImpl->_ptrUpdated || inImpl->_shapeUpdated || inImpl->_strideUpdated) { - noTensorChange = false; + } else if (_bindingCommandListMode == ov::intel_npu::CommandListMode::FORCE_UPDATE_MUTABLE_COMMANDLIST) { + if (!commandListRecordingRequired) { + if (inImpl->_shapeUpdated || inImpl->_strideUpdated) { + // If shape or stride change, need recording commandlist + commandListRecordingRequired = true; + } else { + // If force update commandlist, then pass all index info + commandListIndexArray.push_back(i); + } + } + } else if (!commandListRecordingRequired && + (inImpl->_ptrUpdated || inImpl->_shapeUpdated || inImpl->_strideUpdated)) { + if (inImpl->_ptrUpdated && _optimizedDynamicStridesMode && + _bindingCommandListMode == ov::intel_npu::CommandListMode::ENABLE_MUTABLE_COMMANDLIST) { + _logger.debug( + "Input tensor pointer change detected for index %d, and optimized dynamic stride is supported, " + "which can be updated with UpdateMutableCommandList API without recording a new command list.", + static_cast(i)); + commandListIndexArray.push_back(i); + } else { + // For shape change, stride change, ptr change without optimized dynamic stride supported, need record + // commandlist + _logger.debug("Input tensor %d trigger command list recording", static_cast(i)); + commandListRecordingRequired = true; + } } } - for (auto& out : args._outputs) { + for (size_t i = 0; i < args._outputs.size(); ++i) { + auto& out = args._outputs[i]; std::shared_ptr outImpl = std::static_pointer_cast(out._impl); if (outImpl == nullptr) { @@ -334,13 +377,36 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr } outImpl->UpdateMemRefHandleStatus(out); if (args._impl == nullptr) { + // First execution argsImpl->_outputMemRefs.push_back(outImpl->_memRef); - } else if (outImpl->_ptrUpdated || outImpl->_shapeUpdated || outImpl->_strideUpdated) { - noTensorChange = false; + } else if (_bindingCommandListMode == ov::intel_npu::CommandListMode::FORCE_UPDATE_MUTABLE_COMMANDLIST) { + if (!commandListRecordingRequired) { + if (outImpl->_shapeUpdated || outImpl->_strideUpdated) { + // If shape or stride change, need recording commandlist + commandListRecordingRequired = true; + } else { + // If force update commandlist, then pass all index info + commandListIndexArray.push_back(inputSize + i); + } + } + } else if (!commandListRecordingRequired && + (outImpl->_ptrUpdated || outImpl->_shapeUpdated || outImpl->_strideUpdated)) { + if (outImpl->_ptrUpdated && _optimizedDynamicStridesMode && + _bindingCommandListMode == ov::intel_npu::CommandListMode::ENABLE_MUTABLE_COMMANDLIST) { + _logger.debug( + "Output tensor pointer change detected for index %d, and optimized dynamic stride is supported, " + "which can be updated with UpdateMutableCommandList API without recording a new command list.", + static_cast(i)); + commandListIndexArray.push_back(inputSize + i); + } else { + _logger.debug("Output tensor %d trigger command list recording", static_cast(i)); + // For shape change, need record commandlist + commandListRecordingRequired = true; + } } } - if (args._impl == nullptr || !noTensorChange) { + if (args._impl == nullptr || commandListRecordingRequired) { _logger.debug("Reset command list to run with runtime"); // Reset commandLists since there are tensor with new shapes or it is the first execution, can not reuse command // list with update @@ -348,14 +414,29 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr zeCommandListReset(cmdList); } } else { - _logger.debug("Reuse command list without update since no tensor change detected"); - - auto result = zeCommandQueueExecuteCommandLists(commandQueue, - static_cast(commandLists.size()), - commandLists.data(), - fence); - if (result != ZE_RESULT_SUCCESS) { - OPENVINO_THROW("Failed to submit command lists"); + if (!commandListIndexArray.empty() || + _bindingCommandListMode == ov::intel_npu::CommandListMode::FORCE_UPDATE_MUTABLE_COMMANDLIST) { + _logger.debug("Update command list and execute directly"); + if (params->executionContext == nullptr) { + OPENVINO_THROW( + "Execution context is not created, can not reuse command list with UpdateMutableCommandList API"); + } + + if (npuVMRuntimeUpdateMutableCommandList(_engine, + params, + const_cast(commandListIndexArray.data()), + commandListIndexArray.size()) != NPU_VM_RUNTIME_RESULT_SUCCESS) { + OPENVINO_THROW("Failed to execute VM runtime engine to update commandlist"); + } + } else { + _logger.debug("Reuse command list without update since no tensor change detected"); + auto result = zeCommandQueueExecuteCommandLists(commandQueue, + static_cast(commandLists.size()), + commandLists.data(), + fence); + if (result != ZE_RESULT_SUCCESS) { + OPENVINO_THROW("Failed to submit command lists"); + } } return; } @@ -451,7 +532,7 @@ DynamicGraph::DynamicGraph(const std::shared_ptr& zeroIni return; } - _impl = std::make_unique(); + _impl = std::make_unique(config); // TODO: metadata needs to be parsed even when CREATE_EXECUTOR is 0 or DEFER_WEIGHTS_LOAD is YES, keep here to // support pure compilation without vm runtime initialize VM execution engine, metadata, input&output @@ -583,7 +664,7 @@ void DynamicGraph::initialize_impl(const FilteredConfig& config) { _logger.debug("Graph initialize start"); if (!_impl) { - _impl = std::make_unique(); + _impl = std::make_unique(config); // initialize VM execution engine, metadata, input&output descriptors _impl->initialize(_blob, _metadata); _num_of_subgraphs = _impl->getNumSubgraphs(); @@ -596,6 +677,8 @@ void DynamicGraph::initialize_impl(const FilteredConfig& config) { _logger.debug("Graph initialize without graph handle"); + _impl->setOptimizedDynamicStridesMode(ZeGraphExtWrappers(_zeroInitStruct).isOptimizedDynamicStridesSupported()); + uint32_t commandQueueOptions = 0; if (config.has() && config.get()) { OPENVINO_ASSERT(_zeroInitStruct->getCommandQueueDdiTable().version() >= ZE_MAKE_VERSION(1, 0), diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index a408bbd07498a8..e5b89d9242d49f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -146,6 +146,15 @@ ZeGraphExtWrappers::ZeGraphExtWrappers(const std::shared_ptrgetGraphDdiTable().pfnCompilerIsOptionSupported(_zeroInitStruct->getDevice(), + ZE_NPU_DRIVER_OPTIONS, + "OPTIMIZED_DYNAMIC_STRIDE", + nullptr) == ZE_RESULT_SUCCESS; + _logger.debug("OPTIMIZED_DYNAMIC_STRIDE compiler option is %s", + _isOptimizedDynamicStridesSupported ? "supported" : "not supported"); + } } ZeGraphExtWrappers::~ZeGraphExtWrappers() { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 286dff6c64586b..2f155fb7c3ee0e 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -247,6 +247,7 @@ void init_config(const IEngineBackend* backend, OptionsDesc& options, FilteredCo REGISTER_OPTION(PROFILING_TYPE); REGISTER_OPTION(BACKEND_COMPILATION_PARAMS); REGISTER_OPTION(BATCH_MODE); + REGISTER_OPTION(COMMANDLIST_MODE); REGISTER_OPTION(BYPASS_UMD_CACHING); REGISTER_OPTION(DEFER_WEIGHTS_LOAD); REGISTER_OPTION(WEIGHTS_PATH); diff --git a/src/plugins/intel_npu/src/plugin/src/properties.cpp b/src/plugins/intel_npu/src/plugin/src/properties.cpp index c971685c747c35..bff1005064924d 100644 --- a/src/plugins/intel_npu/src/plugin/src/properties.cpp +++ b/src/plugins/intel_npu/src/plugin/src/properties.cpp @@ -23,7 +23,7 @@ std::map any_copy(const ov::AnyMap& params) { inline bool isSpecialBothProperty(const std::string& key) { return key == ov::hint::performance_mode.name() || key == ov::enable_profiling.name() || - key == ov::log::level.name(); + key == ov::log::level.name() || key == ov::intel_npu::commandlist_mode.name(); } inline void logCpuPinningDeprecationWarning(intel_npu::Logger& logger) { diff --git a/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.cpp b/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.cpp index 4a1796b61fecf5..6c242516b935a9 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.cpp @@ -15,10 +15,31 @@ const std::vector configs = { {{"NPU_COMPILER_TYPE", "PLUGIN"}, {"NPU_COMPILATION_MODE", "HostCompile"}, {"NPU_CREATE_EXECUTOR", "0"}, - // After HostCompile default params were changed to a more performant configuration, these tests fail - // under the new defaults and need investigation before they can be re-enabled with the new configuration - // Untill then set old defaults explicitly to keep the tests running. + // After HostCompile default params were changed to a more performant configuration, + // these tests fail under the new defaults and need investigation before they can be + // re-enabled with the new configuration Untill then set old defaults explicitly to + // keep the tests running. // Track: E#218923 + {"NPU_COMPILATION_MODE_PARAMS", "dynamic-dim-alignment=false enable-auto-unrolling=false"}}, + {{"NPU_COMPILER_TYPE", "PLUGIN"}, + {"NPU_COMPILATION_MODE", "HostCompile"}, + {"NPU_CREATE_EXECUTOR", "0"}, + {"NPU_COMMANDLIST_MODE", "DEFAULT"}, + {"NPU_COMPILATION_MODE_PARAMS", "dynamic-dim-alignment=false enable-auto-unrolling=false"}}, + {{"NPU_COMPILER_TYPE", "PLUGIN"}, + {"NPU_COMPILATION_MODE", "HostCompile"}, + {"NPU_CREATE_EXECUTOR", "0"}, + {"NPU_COMMANDLIST_MODE", "ENABLE_MUTABLE_COMMANDLIST"}, + {"NPU_COMPILATION_MODE_PARAMS", "dynamic-dim-alignment=false enable-auto-unrolling=false"}}, + {{"NPU_COMPILER_TYPE", "PLUGIN"}, + {"NPU_COMPILATION_MODE", "HostCompile"}, + {"NPU_CREATE_EXECUTOR", "0"}, + {"NPU_COMMANDLIST_MODE", "FORCE_COMMANDLIST_RECORDING_ONLY"}, + {"NPU_COMPILATION_MODE_PARAMS", "dynamic-dim-alignment=false enable-auto-unrolling=false"}}, + {{"NPU_COMPILER_TYPE", "PLUGIN"}, + {"NPU_COMPILATION_MODE", "HostCompile"}, + {"NPU_CREATE_EXECUTOR", "0"}, + {"NPU_COMMANDLIST_MODE", "FORCE_UPDATE_MUTABLE_COMMANDLIST"}, {"NPU_COMPILATION_MODE_PARAMS", "dynamic-dim-alignment=false enable-auto-unrolling=false"}}}; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, diff --git a/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.hpp b/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.hpp index 9a8c879b59a759..773d10740aca00 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/dynamic_host_pipeline/infer_with_host_compile.hpp @@ -10,6 +10,7 @@ #include #include +#include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/openvino.hpp" #include "openvino/opsets/opset6.hpp" #include "openvino/pass/manager.hpp" @@ -21,8 +22,13 @@ namespace ov { namespace test { namespace behavior { +// These are debug logs during execution, help to detect the execute flow +inline constexpr const char* kLogResetCommandList = "Reset command list to run with runtime"; +inline constexpr const char* kLogUpdateCommandList = "Update command list and execute directly"; +inline constexpr const char* kLogReuseCommandList = "Reuse command list without update since no tensor change detected"; + inline std::shared_ptr createMaxPoolModel() { - auto input = std::make_shared(ov::element::f32, + auto input = std::make_shared(ov::element::f16, ov::PartialShape{1, 16, 720, ov::Dimension(10, 1280)}); input->set_friendly_name("input1"); @@ -56,6 +62,13 @@ class InferWithHostCompileTests : public testing::WithParamInterface& model); @@ -188,7 +201,7 @@ void InferWithHostCompileTests::inferAndCompare(const std::shared_ptr OV_ASSERT_NO_THROW(reqReference.infer()); try { compareInferenceResult(model, reqDynamic, reqReference); - } catch (const ov::Exception& e) { + } catch (const std::exception& e) { FAIL() << "Inference result comparison failed at stage " << stage << ": " << e.what(); } } @@ -203,8 +216,44 @@ void InferWithHostCompileTests::setInputInferAndCompare(const std::shared_ptrsecond.as(); + if (mode == "FORCE_COMMANDLIST_RECORDING_ONLY") { + // In this mode, command list will always be reset and recorded for each inference, so we check reset log only. + return logCapture.str().find(kLogResetCommandList) != std::string::npos; + } else if (mode == "FORCE_UPDATE_MUTABLE_COMMANDLIST") { + if (BindingStatus::shape_changed == status) { + // If shape changed, command list needs to be reset since the old one is not valid anymore. So we check + // reset log. + return logCapture.str().find(kLogResetCommandList) != std::string::npos; + } + // If ptr changed or nothing changed, force call runtime update mutable commandlist API to check performance. + return logCapture.str().find(kLogUpdateCommandList) != std::string::npos; + } else { + // DEFAULT mode + if (BindingStatus::ptr_changed == status) { + if (mode == "ENABLE_MUTABLE_COMMANDLIST") { + // If ptr changed and mutable command list is enabled, update command list is expected. + return logCapture.str().find(kLogUpdateCommandList) != std::string::npos; + } + // If ptr_changed, command list reset is expected if mutable command list is not enabled + return logCapture.str().find(kLogResetCommandList) != std::string::npos; + } else if (BindingStatus::unchanged == status) { + // If nothing changed, command list reuse is expected + return logCapture.str().find(kLogReuseCommandList) != std::string::npos; + } else if (BindingStatus::shape_changed == status) { + // If shape changed, command list reset is expected since the old one is not valid anymore. + return logCapture.str().find(kLogResetCommandList) != std::string::npos; + } + } + return false; } InferWithHostCompileTests::RuntimeCompareSetupResult InferWithHostCompileTests::prepareRuntimeCompareContext( @@ -247,6 +296,7 @@ InferWithHostCompileTests::RuntimeCompareSetupResult InferWithHostCompileTests:: return result; } +// Basic test of flow TEST_P(InferWithHostCompileTests, CompileAndImportAndInfer) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() @@ -308,16 +358,14 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithDecreasedSize) { inTensor, "CompileAndInferWithDecreasedSize_first"); // The first run materializes runtime state for the initial shape. - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime', but got: " << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed, got: " << logCapture.str(); logCapture.clear(); inferAndCompare(model, testContext.reqDynamic, testContext.reqReference, "CompileAndInferWithDecreasedSize_second"); // Reusing the same input should keep the existing command list intact. - ASSERT_TRUE(logContains(logCapture, "Reuse command list without update since no tensor change detected")) - << "Expected log to contain 'Reuse command list without update since no tensor change detected' for second " - "inference, but got: " - << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for second inference, got: " << logCapture.str(); logCapture.clear(); ov::Tensor inTensor1 = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); @@ -326,11 +374,9 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithDecreasedSize) { testContext.reqReference, inTensor1, "CompileAndInferWithDecreasedSize_third"); - // A new host tensor with the same shape should still reuse the command list. - ASSERT_TRUE(logContains(logCapture, "Reuse command list without update since no tensor change detected")) - << "Expected log to contain 'Reuse command list without update since no tensor change detected' for third " - "inference, but got: " - << logCapture.str(); + // A new host tensor with the same shape should still reuse the command list. No binding changed. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for third inference, got: " << logCapture.str(); logCapture.clear(); ov::Shape shape2 = {1, 16, 720, 720}; @@ -341,10 +387,8 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithDecreasedSize) { inTensor3, "CompileAndInferWithDecreasedSize_fourth"); // Shrinking the shape should force runtime reconfiguration for the new tensor layout. - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime' for fourth inference with new shape, but " - "got: " - << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::shape_changed)) + << "Log content validation failed for fourth inference with new shape, got: " << logCapture.str(); } // Compile, infer with a small shape, then grow the input shape and verify both output correctness and command-list @@ -379,16 +423,14 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithIncreasedSize) { inTensor, "CompileAndInferWithIncreasedSize_first"); // The first run materializes runtime state for the initial shape. - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime', but got: " << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for first inference, got: " << logCapture.str(); logCapture.clear(); inferAndCompare(model, testContext.reqDynamic, testContext.reqReference, "CompileAndInferWithIncreasedSize_second"); // Reusing the same input should keep the existing command list intact. - ASSERT_TRUE(logContains(logCapture, "Reuse command list without update since no tensor change detected")) - << "Expected log to contain 'Reuse command list without update since no tensor change detected' for second " - "inference, but got: " - << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for second inference, got: " << logCapture.str(); logCapture.clear(); ov::Tensor inTensor1 = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); @@ -398,10 +440,8 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithIncreasedSize) { inTensor1, "CompileAndInferWithIncreasedSize_third"); // A new host tensor with the same shape should still reuse the command list. - ASSERT_TRUE(logContains(logCapture, "Reuse command list without update since no tensor change detected")) - << "Expected log to contain 'Reuse command list without update since no tensor change detected' for third " - "inference, but got: " - << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for third inference, got: " << logCapture.str(); logCapture.clear(); ov::Shape shape2 = {1, 16, 720, 1280}; @@ -412,10 +452,8 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithIncreasedSize) { inTensor3, "CompileAndInferWithIncreasedSize_fourth"); // Growing the shape should force runtime reconfiguration for the new tensor layout. - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime' for fourth inference with new shape, but " - "got: " - << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::shape_changed)) + << "Log content validation failed for fourth inference with new shape, got: " << logCapture.str(); } // Exercise imported Level Zero tensors and verify both output correctness and command-list pointer updates. @@ -449,16 +487,16 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensor) { "CompileAndInferWithZeroTensor_first"); // The first run materializes runtime state for the initial shape. - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime', but got: " << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for first inference, got: " << logCapture.str(); logCapture.clear(); ov::InferRequest reqDynamic1 = testContext.compiledModel.create_infer_request(); ov::InferRequest reqReference1 = testContext.referenceCompiledModel.create_infer_request(); setInputInferAndCompare(model, reqDynamic1, reqReference1, inTensor, "CompileAndInferWithZeroTensor_second"); // A fresh infer request rebuilds runtime state on its first execution. - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime', but got: " << logCapture.str(); + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for second inference, got: " << logCapture.str(); logCapture.clear(); auto outputTensorFromReq = testContext.reqDynamic.get_tensor(model->output()); @@ -467,11 +505,9 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensor) { reqReference1, outputTensorFromReq, "CompileAndInferWithZeroTensor_third"); - // Feeding an imported output tensor, ptr change detected and rebuild runtime - // TODO: Update commandlist once dynamic stride supported - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime' for third inference, but got: " - << logCapture.str(); + // Feeding a zero tensor should update the command list to the new pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for third inference, got: " << logCapture.str(); logCapture.clear(); auto zeroContext = core->get_default_context(target_device); @@ -488,11 +524,9 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensor) { reqReference1, inputHostTensorForForthInfer, "CompileAndInferWithZeroTensor_fourth"); - // Feeding a context-allocated host tensor, ptr change detected and rebuild runtime - // TODO: Update commandlist once dynamic stride supported - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime' for fourth inference, but got: " - << logCapture.str(); + // Feeding a context-allocated host tensor should also update the command list to the new pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for fourth inference, got: " << logCapture.str(); logCapture.clear(); auto outputShape = reqDynamic1.get_tensor(model->output()).get_shape(); @@ -506,11 +540,9 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensor) { hostTensorSourceForOutputForFifthInfer.get_byte_size()); OV_ASSERT_NO_THROW(reqDynamic1.set_tensor(model->output(), zeroOutputTensorForFifthInfer)); inferAndCompare(model, reqDynamic1, reqReference1, "CompileAndInferWithZeroTensor_fifth"); - // Feeding a context-allocated host tensor as output, ptr change detected and rebuild runtime - // TODO: Update commandlist once dynamic stride supported - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime' for fifth inference, but got: " - << logCapture.str(); + // Feeding a context-allocated host tensor should also update the command list to the new pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for fifth inference, got: " << logCapture.str(); logCapture.clear(); auto inputTensorForSixthInfer = @@ -535,11 +567,304 @@ TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensor) { reqReference1, inputTensorForSixthInfer, "CompileAndInferWithZeroTensor_sixth"); - // Feeding a context-allocated host tensor, ptr change detected and rebuild runtime - // TODO: Update commandlist once dynamic stride supported - ASSERT_TRUE(logContains(logCapture, "Reset command list to run with runtime")) - << "Expected log to contain 'Reset command list to run with runtime' for sixth inference, but got: " - << logCapture.str(); + // Feeding a context-allocated host tensor should also update the command list to the new pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for sixth inference, got: " << logCapture.str(); +} + +// Compare HostCompile inference results against the Template plugin while also checking command-list reuse behavior. +TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensorCompareWithReference) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + if (!isTargetDevice) { + GTEST_SKIP() << "Skip test for current device"; + } + + auto model = createMaxPoolModel(); + ScopedLogCapture logCapture; + + core->set_property("NPU", ov::log::level(ov::log::Level::DEBUG)); + auto setupResult = prepareRuntimeCompareContext(model); + if (setupResult.status == RuntimeCompareStatus::fail) { + FAIL() << setupResult.message; + } + if (setupResult.status == RuntimeCompareStatus::skip) { + GTEST_SKIP() << setupResult.message; + } + auto& testContext = setupResult.context; + + // Use a regular host tensor for the initial comparison against the Template plugin. + ov::Shape shape = {1, 16, 720, 1280}; + ov::Tensor inTensor = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + setInputInferAndCompare(model, + testContext.reqDynamic, + testContext.reqReference, + inTensor, + "CompileAndInferWithZeroTensorCompareWithReference_first"); + + // The first run materializes runtime state for the initial shape. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for first inference, got: " << logCapture.str(); + + logCapture.clear(); + inferAndCompare(model, + testContext.reqDynamic, + testContext.reqReference, + "CompileAndInferWithZeroTensorCompareWithReference_second"); + // Reusing the same input should keep the existing command list intact. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for second inference, got: " << logCapture.str(); + + auto npuOutputTensorSecondRun = testContext.reqDynamic.get_tensor(model->output()); + + logCapture.clear(); + ov::InferRequest reqDynamic1 = testContext.compiledModel.create_infer_request(); + OV_ASSERT_NO_THROW(reqDynamic1.infer()); + // A fresh infer request rebuilds runtime state on its first execution. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for third inference, got: " << logCapture.str(); + + logCapture.clear(); + ov::InferRequest reqReference1 = testContext.referenceCompiledModel.create_infer_request(); + setInputInferAndCompare(model, + reqDynamic1, + reqReference1, + npuOutputTensorSecondRun, + "CompileAndInferWithZeroTensorCompareWithReference_fourth"); + + // Feeding an imported output tensor should update the command list to the new pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for fourth inference, got: " << logCapture.str(); +} + +// Exercise page-aligned external memory and verify both output correctness and command-list pointer updates. +TEST_P(InferWithHostCompileTests, CompileAndInferWithAlignedTensor) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + if (!isTargetDevice) { + GTEST_SKIP() << "Skip test for current device"; + } + + auto model = createMaxPoolModel(); + ScopedLogCapture logCapture; + + core->set_property("NPU", ov::log::level(ov::log::Level::DEBUG)); + auto setupResult = prepareRuntimeCompareContext(model); + if (setupResult.status == RuntimeCompareStatus::fail) { + FAIL() << setupResult.message; + } + if (setupResult.status == RuntimeCompareStatus::skip) { + GTEST_SKIP() << setupResult.message; + } + auto& testContext = setupResult.context; + + // Start from a regular host tensor. + ov::Shape shape = {1, 16, 720, 768}; + ov::Tensor inTensor = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + setInputInferAndCompare(model, + testContext.reqDynamic, + testContext.reqReference, + inTensor, + "CompileAndInferWithAlignedTensor_first"); + + // The first run materializes runtime state for the initial shape. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for first inference, got: " << logCapture.str(); + + logCapture.clear(); + // Allocate page-aligned external memory so the import path can be exercised. + auto alignedData = std::unique_ptr( + static_cast( + ::operator new(ov::shape_size(shape) * model->input().get_element_type().size(), std::align_val_t(4096))), + [](float* ptr) { + ::operator delete(ptr, std::align_val_t(4096)); + }); + ov::Tensor inTensor1(model->input().get_element_type(), shape, alignedData.get()); + ASSERT_EQ(inTensor.get_byte_size(), inTensor1.get_byte_size()) + << "Source and destination tensors must have identical byte sizes for copy"; + std::memcpy(inTensor1.data(), inTensor.data(), inTensor.get_byte_size()); + + setInputInferAndCompare(model, + testContext.reqDynamic, + testContext.reqReference, + inTensor1, + "CompileAndInferWithAlignedTensor_second"); + + if (::intel_npu::ZeroInitStructsHolder::getInstance()->isExternalMemoryStandardAllocationSupported()) { + // Importable external memory should switch execution to the new tensor pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for second inference, got: " << logCapture.str(); + } else { + // Without import support, execution falls back to copying into the existing internal allocation. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for second inference, got: " << logCapture.str(); + } +} + +TEST_P(InferWithHostCompileTests, CompileAndInferWithRandomSize) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + if (!isTargetDevice) { + GTEST_SKIP() << "Skip test for current device"; + } + + auto model = createMaxPoolModel(); + ov::CompiledModel compiledModel; + + // Create log callback function which will store log to string, the set to ov + ScopedLogCapture logCapture; + + core->set_property("NPU", ov::log::level(ov::log::Level::DEBUG)); + + OV_ASSERT_NO_THROW(compiledModel = core->compile_model(model, target_device, configuration)); + + ov::InferRequest reqDynamic; + try { + reqDynamic = compiledModel.create_infer_request(); + } catch (const ov::Exception& e) { + ASSERT_TRUE(std::string(e.what()).find("Cannot load library") != std::string::npos) + << "Expected exception message to contain 'Cannot load library', but got: " << e.what(); + GTEST_SKIP() << "Cannot load library, skip test."; + } + + // create input tensor match the customized models + ov::Shape shape = {1, 16, 720, 720}; + ov::Tensor inTensor = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inTensor)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Set new tensor with same shape, it can not be used by runtime directly, local LevelZero tensor are reused + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for first inference, got: " << logCapture.str(); + + logCapture.clear(); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Reusing the same input should keep the existing command list intact. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for second inference, got: " << logCapture.str(); + + logCapture.clear(); + ov::Tensor inTensor1 = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inTensor1)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Tensor with same shape should reuse commandlist. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::unchanged)) + << "Log content validation failed for third inference, got: " << logCapture.str(); + + logCapture.clear(); + ov::Shape shape2 = {1, 16, 720, 1024}; + ov::Tensor inTensor2 = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape2, 100, 0); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inTensor2)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Shape change should force runtime reconfiguration for the new tensor layout. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::shape_changed)) + << "Log content validation failed for fourth inference with new shape, got: " << logCapture.str(); + + logCapture.clear(); + ov::Shape shape3 = {1, 16, 720, 360}; + ov::Tensor inTensor3 = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape3, 100, 0); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inTensor3)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Shape change should force runtime reconfiguration for the new tensor layout. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::shape_changed)) + << "Log content validation failed for fifth inference with new shape, got: " << logCapture.str(); + + logCapture.clear(); + ov::Shape shape4 = {1, 16, 720, 1280}; + ov::Tensor inTensor4 = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape4, 100, 0); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inTensor4)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Shape change should force runtime reconfiguration for the new tensor layout. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::shape_changed)) + << "Log content validation failed for sixth inference with new shape, got: " << logCapture.str(); +} + +// Exercise imported Level Zero tensors and verify both output correctness and command-list pointer updates. +TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensorWithoutReference) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + if (!isTargetDevice) { + GTEST_SKIP() << "Skip test for current device"; + } + + auto model = createMaxPoolModel(); + ScopedLogCapture logCapture; + + core->set_property("NPU", ov::log::level(ov::log::Level::DEBUG)); + ov::CompiledModel compiledModel; + OV_ASSERT_NO_THROW(compiledModel = core->compile_model(model, target_device, configuration)); + + ov::InferRequest reqDynamic; + try { + reqDynamic = compiledModel.create_infer_request(); + } catch (const ov::Exception& e) { + ASSERT_TRUE(std::string(e.what()).find("Cannot load library") != std::string::npos) + << "Expected exception message to contain 'Cannot load library', but got: " << e.what(); + GTEST_SKIP() << "Cannot load library, skip test."; + } + + // create input tensor match the customized models + ov::Shape shape = {1, 16, 720, 720}; + ov::Tensor inTensor = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inTensor)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Set new tensor with same shape, it can not be used by runtime directly, local LevelZero tensor are reused + ASSERT_TRUE(logCheck(logCapture, BindingStatus::initial)) + << "Log content validation failed for first inference with new shape, got: " << logCapture.str(); + + logCapture.clear(); + auto zeroContext = core->get_default_context(target_device); + auto inputHostTensor = zeroContext.create_host_tensor(model->input().get_element_type(), shape); + auto hostTensorSource = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + ASSERT_EQ(hostTensorSource.get_byte_size(), inputHostTensor.get_byte_size()) + << "Source and destination tensors must have identical byte sizes for copy"; + std::memcpy(inputHostTensor.data(), hostTensorSource.data(), hostTensorSource.get_byte_size()); + OV_ASSERT_NO_THROW(reqDynamic.set_input_tensor(0, inputHostTensor)); + OV_ASSERT_NO_THROW(reqDynamic.infer()); + // Feeding a context-allocated host tensor should also update the command list to the new pointer. + ASSERT_TRUE(logCheck(logCapture, BindingStatus::ptr_changed)) + << "Log content validation failed for second inference with new pointer, got: " << logCapture.str(); +} + +// Update output tensor to check result +TEST_P(InferWithHostCompileTests, CompileAndInferWithZeroTensorAsOutput) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + if (!isTargetDevice) { + GTEST_SKIP() << "Skip test for current device"; + } + + auto model = createMaxPoolModel(); + + core->set_property("NPU", ov::log::level(ov::log::Level::DEBUG)); + auto setupResult = prepareRuntimeCompareContext(model); + if (setupResult.status == RuntimeCompareStatus::fail) { + FAIL() << setupResult.message; + } + if (setupResult.status == RuntimeCompareStatus::skip) { + GTEST_SKIP() << setupResult.message; + } + auto& testContext = setupResult.context; + + // Start from a regular host tensor. + ov::Shape shape = {1, 16, 720, 1280}; + ov::Tensor inTensor = ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), shape, 100, 0); + ov::InferRequest reqDynamic1 = testContext.compiledModel.create_infer_request(); + ov::InferRequest reqReference1 = testContext.referenceCompiledModel.create_infer_request(); + setInputInferAndCompare(model, reqDynamic1, reqReference1, inTensor, "CompileAndInferWithZeroTensor_first"); + + auto zeroContext = core->get_default_context(target_device); + auto outputShape = reqDynamic1.get_tensor(model->output()).get_shape(); + auto zeroOutputTensorForSecondInfer = + zeroContext.create_host_tensor(model->input().get_element_type(), outputShape); + auto hostTensorSourceForOutputForSecondInfer = + ov::test::utils::create_and_fill_tensor(model->input().get_element_type(), outputShape, 100, 0); + ASSERT_EQ(hostTensorSourceForOutputForSecondInfer.get_byte_size(), zeroOutputTensorForSecondInfer.get_byte_size()) + << "Source and destination tensors must have identical byte sizes for copy"; + std::memcpy(zeroOutputTensorForSecondInfer.data(), + hostTensorSourceForOutputForSecondInfer.data(), + hostTensorSourceForOutputForSecondInfer.get_byte_size()); + OV_ASSERT_NO_THROW(reqDynamic1.set_tensor(model->output(), zeroOutputTensorForSecondInfer)); + inferAndCompare(model, reqDynamic1, reqReference1, "CompileAndInferWithZeroTensor_second"); } } // namespace behavior diff --git a/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp b/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp index c3e0afc72f0e89..fc6db4860856a7 100644 --- a/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp @@ -118,6 +118,7 @@ class PropertiesManagerTests : public ov::test::behavior::OVPluginTestBase, REGISTER_OPTION(PROFILING_TYPE); REGISTER_OPTION(BACKEND_COMPILATION_PARAMS); REGISTER_OPTION(BATCH_MODE); + REGISTER_OPTION(COMMANDLIST_MODE); REGISTER_OPTION(BYPASS_UMD_CACHING); REGISTER_OPTION(DEFER_WEIGHTS_LOAD); REGISTER_OPTION(WEIGHTS_PATH);