-
Notifications
You must be signed in to change notification settings - Fork 3.2k
[GPU] tensor from nthandle #35543
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
[GPU] tensor from nthandle #35543
Changes from all commits
68d68c1
3a92b0c
17b5f13
603c4fa
2ebfdd3
58ad113
70604fb
e3025dc
fb20b2c
526e80e
c962e4b
903fc7d
68e500b
ad3e5f6
434293f
1b3dec9
d6af4b9
7ca34c8
eed1327
ec48e76
629c5d2
1b98154
20c11a2
c239ab5
5462dec
56c2108
9e6bdff
b5f52f1
060f076
3d6f997
47fd2d0
7ace878
1991726
0833756
3a35dd0
56248c2
67c5f96
b4b57e0
a7eb0b5
7152ce6
d402d76
94f33d4
0ff1e89
0add3e3
0c7eff6
ee6533f
0cf9f43
802c5ee
18a7c18
806e501
082175a
744f6d3
30e5bb5
cb070a9
2609a52
55ee151
8bf92fe
79b1a12
4a68bd3
ecd3967
94d9819
f93a237
2b1130e
f814f50
a5453ca
83c675e
2ad311a
850f127
8c1af47
9b64a51
19f8fcb
ed41e78
f20aaf2
7e87eb8
1cd0009
0046453
bfcfa60
db6d80c
39aa251
7447945
41c91f9
d37555d
0e8fff7
9685a7d
568e042
825024d
e1df435
99b0578
4f5cc5f
4fe5523
e541e85
23edd7c
6e37a77
7a1c41c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -38,8 +38,19 @@ namespace ocl { | |||||||
| * @brief Shortcut for defining a handle parameter | ||||||||
| * @ingroup ov_runtime_ocl_gpu_cpp_api | ||||||||
| */ | ||||||||
|
|
||||||||
| using gpu_handle_param = void*; | ||||||||
|
|
||||||||
| /** | ||||||||
| * @brief Shortcut for defining a HANDLE on windows or file descriptor on linux | ||||||||
| * @ingroup ov_runtime_ocl_gpu_cpp_api | ||||||||
| */ | ||||||||
| #ifdef __linux__ | ||||||||
| using os_handle_param = int; | ||||||||
| #else | ||||||||
| using os_handle_param = void*; | ||||||||
| #endif | ||||||||
|
|
||||||||
| /** | ||||||||
| * @brief This class represents an abstraction for GPU plugin remote tensor | ||||||||
| * which can be shared with user-supplied OpenCL buffer. | ||||||||
|
|
@@ -58,6 +69,7 @@ class ClBufferTensor : public RemoteTensor { | |||||||
| {{std::string(ov::intel_gpu::mem_handle.name()), {}}, | ||||||||
| {std::string(ov::intel_gpu::shared_mem_type.name()), | ||||||||
| {ov::Any(ov::intel_gpu::SharedMemType::OCL_BUFFER).as<std::string>(), | ||||||||
| ov::Any(ov::intel_gpu::SharedMemType::BUFFER_FROM_HANDLE).as<std::string>(), | ||||||||
| ov::Any(ov::intel_gpu::SharedMemType::DX_BUFFER).as<std::string>()}}}); | ||||||||
| } | ||||||||
|
|
||||||||
|
|
@@ -307,6 +319,30 @@ class ClContext : public RemoteContext { | |||||||
| return create_tensor(type, shape, params).as<ClImage2DTensor>(); | ||||||||
| } | ||||||||
|
|
||||||||
| /** | ||||||||
| * @brief This function is used to obtain a remote tensor object from a user-supplied external memory handle | ||||||||
| * The API mirrors the NPU pointer-based create_tensor form. | ||||||||
| * @param type Tensor element type | ||||||||
| * @param shape Tensor shape | ||||||||
| * @param shared_buffer External memory handle from another API (DX12 shared NT handle on Windows passed as void*, | ||||||||
| * DMA-BUF fd on Linux passed as int) | ||||||||
| * @param memory_type Memory type to use; only MemType::SHARED_BUF is currently supported | ||||||||
| * @return A remote tensor instance | ||||||||
| */ | ||||||||
| ClBufferTensor create_tensor(const element::Type type, | ||||||||
| const Shape& shape, | ||||||||
| os_handle_param shared_buffer, | ||||||||
| const MemType memory_type) { | ||||||||
| #ifndef __linux__ | ||||||||
| OPENVINO_ASSERT(shared_buffer != nullptr, "shared_buffer must not be nullptr for SHARED_BUF memory type"); | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
At first glance this check is useless as
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you update documentation as well? docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device/remote-tensor-api-gpu-plugin.rst |
||||||||
| #endif | ||||||||
| OPENVINO_ASSERT(memory_type == MemType::SHARED_BUF, | ||||||||
| "Only SHARED_BUF memory type is supported for raw buffer pointer or NT handle"); | ||||||||
| AnyMap params = {{ov::intel_gpu::shared_mem_type.name(), ov::intel_gpu::SharedMemType::BUFFER_FROM_HANDLE}, | ||||||||
| {ov::intel_gpu::mem_handle.name(), reinterpret_cast<void*>(shared_buffer)}}; | ||||||||
| return create_tensor(type, shape, params).as<ClBufferTensor>(); | ||||||||
| } | ||||||||
|
|
||||||||
| /** | ||||||||
| * @brief This function is used to obtain remote tensor object from user-supplied USM pointer | ||||||||
| * @param type Tensor element type | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,6 @@ | |
| #include <string> | ||
| #include <vector> | ||
| #include <memory> | ||
| #include <set> | ||
| #include <stdexcept> | ||
|
|
||
| // NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation | ||
|
|
@@ -46,6 +45,16 @@ ocl_error::ocl_error(cl::Error const& err) | |
| : ov::Exception("[GPU] " + std::string(err.what()) + std::string(", error code: ") + std::to_string(err.err())) {} | ||
| OPENVINO_SUPPRESS_DEPRECATED_END | ||
|
|
||
| namespace { | ||
| cl_platform_id get_platform_id_for_device(const cl::Device& device) { | ||
| cl_platform_id platform = nullptr; | ||
| cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_PLATFORM, sizeof(platform), &platform, nullptr); | ||
| OPENVINO_ASSERT(err == CL_SUCCESS && platform != nullptr, | ||
| "[GPU] Failed to retrieve CL_DEVICE_PLATFORM, error: ", err); | ||
| return platform; | ||
| } | ||
| } // namespace | ||
|
|
||
| ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type) | ||
| : engine(dev) { | ||
| OPENVINO_ASSERT(runtime_type == runtime_types::ocl, "[GPU] Invalid runtime type specified for OCL engine. Only OCL runtime is supported"); | ||
|
|
@@ -96,6 +105,63 @@ allocation_type ocl_engine::detect_usm_allocation_type(const void* memory) const | |
| : allocation_type::unknown; | ||
| } | ||
|
|
||
| memory::ptr ocl_engine::import_buffer(const layout& layout, shared_handle external_handle) { | ||
| OPENVINO_ASSERT(external_handle != nullptr, "[GPU] External memory handle must not be null"); | ||
| OPENVINO_ASSERT(extension_supported("cl_khr_external_memory"), | ||
| "[GPU] Selected OpenCL device does not advertise cl_khr_external_memory; " | ||
| "external memory import is not supported"); | ||
|
|
||
| #ifndef CL_VERSION_3_0 | ||
| OPENVINO_THROW("[GPU] External memory import is not supported on this platform"); | ||
| #else | ||
| #ifdef _WIN32 | ||
| constexpr auto handle_type_token = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; | ||
| #elif defined(__linux__) | ||
| constexpr auto handle_type_token = CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this PR support dmabuf as well? if not, I think we need to add an assert, instead.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. supports but tests from vulkan was deleted |
||
| #else | ||
| OPENVINO_THROW("[GPU] External memory import is not supported on this platform"); | ||
| #endif | ||
|
|
||
| cl_mem_properties props[] = { | ||
| static_cast<cl_mem_properties>(handle_type_token), | ||
| static_cast<cl_mem_properties>(reinterpret_cast<intptr_t>(external_handle)), | ||
| 0, | ||
| }; | ||
|
|
||
| cl_int errcode = CL_SUCCESS; | ||
| auto cl_ctx = static_cast<cl_context>(get_user_context()); | ||
| OPENVINO_ASSERT(cl_ctx != nullptr, "[GPU] OpenCL context is null while importing external buffer"); | ||
| const auto byte_size = layout.bytes_count(); | ||
| cl_mem imported = clCreateBufferWithProperties(cl_ctx, props, CL_MEM_READ_WRITE, byte_size, nullptr, &errcode); | ||
|
Lyamin-Roman marked this conversation as resolved.
|
||
| OPENVINO_ASSERT(errcode == CL_SUCCESS && imported != nullptr, | ||
| "[GPU] Failed to import external memory handle via clCreateBufferWithProperties, error: ", | ||
| errcode); | ||
|
|
||
| cl_platform_id platform = get_platform_id_for_device(get_cl_device()); | ||
| auto& svc_stream = downcast<ocl_stream>(get_service_stream()); | ||
| cl_command_queue q = svc_stream.get_cl_queue().get(); | ||
| cl_int acquire_err = cl::ExternalMemoryHelper::acquire(platform, q, imported); | ||
| if (acquire_err != CL_SUCCESS) { | ||
| clReleaseMemObject(imported); | ||
| OPENVINO_THROW("[GPU] clEnqueueAcquireExternalMemObjectsKHR failed or unavailable, error: ", acquire_err); | ||
| } | ||
| clFinish(q); | ||
| cl::Buffer buf(imported, true); | ||
| auto memory = std::make_shared<ocl::gpu_buffer_from_handle>(this, layout, buf, nullptr); | ||
| clReleaseMemObject(imported); | ||
| return memory; | ||
| #endif | ||
| } | ||
|
|
||
| void ocl_engine::release_external_memory(cl_mem mem) const { | ||
| cl_platform_id platform = get_platform_id_for_device(get_cl_device()); | ||
| auto& opencl_stream = downcast<ocl_stream>(get_service_stream()); | ||
| cl_command_queue q = opencl_stream.get_cl_queue().get(); | ||
| // If the extension entrypoint is missing, the cl_mem refcount drop on dtor will still proceed. | ||
| cl::ExternalMemoryHelper::release(platform, q, mem); | ||
| clFinish(q); | ||
| } | ||
|
|
||
| memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) { | ||
| OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate memory for dynamic layout"); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should support importing usm as well. It is identical with usm-version for first three arguments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that at least at windows creating handles supports only allocation on gpu