Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ class SDPAOptImpl : public SDPAImplBase {
};

bool SDPAOpt::supports_micro_sdpa(const RuntimeParams& params) {
#ifdef OV_GPU_WITH_ZE_RT
std::cout << "Level Zero: not supporting SDPA" << std::endl;
return false;
#endif
#ifdef ENABLE_ONEDNN_FOR_GPU
auto& engine = params.get_program().get_engine();
const auto& device_info = engine.get_device_info();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ KernelsPriority ConvolutionKernel_b_fs_yx_fsv16::GetKernelsPriority(const Params
}

bool ConvolutionKernel_b_fs_yx_fsv16::Validate(const Params& p) const {

#if OV_GPU_WITH_ZE_RT
DO_NOT_USE_THIS_KERNEL(p.layerID);
#endif

if (!ConvolutionKernelBase::Validate(p) || !ConvolutionCheckInput(p)) {
DO_NOT_USE_THIS_KERNEL(p.layerID);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ KernelsPriority ConvolutionKernel_b_fs_yx_fsv16_1x1::GetKernelsPriority(const Pa
}

bool ConvolutionKernel_b_fs_yx_fsv16_1x1::Validate(const Params& p) const {

#if OV_GPU_WITH_ZE_RT
DO_NOT_USE_THIS_KERNEL(p.layerID)
#endif

if (!ConvolutionKernelBase::Validate(p)) {
DO_NOT_USE_THIS_KERNEL(p.layerID);
}
Expand Down Expand Up @@ -273,10 +278,10 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolut
DimensionAccessHelperJit output_dims(params.outputs[0]);
DimensionAccessHelperJit output_padded_dims(params.outputs[0], true);

const auto padded_input = "(" + input0_padded_dims.x_pad().first + "+" + input0_padded_dims.x_pad().first + ") != 0";
const auto padded_input = "(" + input0_padded_dims.x_pad().first + "+" + input0_padded_dims.x_pad().second + ") != 0";
jit.AddConstant(MakeJitConstant("PADDED_INPUT", padded_input));

const auto padded_output = "(" + output_padded_dims.x_pad().first + "+" + output_padded_dims.x_pad().first + ") != 0";
const auto padded_output = "(" + output_padded_dims.x_pad().first + "+" + output_padded_dims.x_pad().second + ") != 0";
jit.AddConstant(MakeJitConstant("PADDED_OUTPUT", padded_output));

// In shape agnostic kernel, the fused shape cannot be specified at build time or run time.
Expand Down
28 changes: 28 additions & 0 deletions src/plugins/intel_gpu/src/runtime/ze/ze_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,22 @@ namespace cldnn {
namespace ze {

namespace {
thread_local std::string last_enqueued_kernel_id;

bool sync_after_each_kernel_enabled() {
static const bool enabled = [] {
const char* value = std::getenv("OV_GPU_ZE_SYNC_EACH_KERNEL");
return value != nullptr && std::string(value) != "0";
}();
return enabled;
}

std::string ze_result_to_hex(ze_result_t status) {
std::ostringstream oss;
oss << "0x" << std::hex << std::uppercase << static_cast<uint32_t>(status);
return oss.str();
}

inline ze_group_count_t to_group_count(const std::vector<size_t>& v) {
switch (v.size()) {
case 1:
Expand Down Expand Up @@ -259,6 +275,8 @@ event::ptr ze_stream::enqueue_kernel(kernel& kernel,
const kernel_arguments_data& /* args */,
std::vector<event::ptr> const& deps,
bool is_output) {
last_enqueued_kernel_id = kernel.get_id();

auto& ze_kernel = downcast<ze::ze_kernel>(kernel);

auto kern = ze_kernel.get_kernel_handle();
Expand Down Expand Up @@ -290,6 +308,16 @@ event::ptr ze_stream::enqueue_kernel(kernel& kernel,
dep_events_ptr == nullptr ? 0 : static_cast<uint32_t>(dep_events_ptr->size()),
dep_events_ptr == nullptr ? 0 : &dep_events_ptr->front()));

if (sync_after_each_kernel_enabled()) {
const auto status = ze::zeCommandListHostSynchronize(m_command_list, endless_wait);
if (status != ZE_RESULT_SUCCESS) {
OPENVINO_THROW("[GPU] Kernel failed during OV_GPU_ZE_SYNC_EACH_KERNEL mode: ",
kernel.get_id(),
", zeCommandListHostSynchronize status=",
ze_result_to_hex(status));
}
}

return ev;
}

Expand Down
Loading