Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ struct DispatchDataFunc {
explicit DispatchDataFunc(std::nullptr_t) {}

void operator()(const RuntimeParams& params, KernelData& kd, ImplRuntimeParams* rt_params = nullptr) const {
m_dispatch_data_func(params, kd, rt_params);
if (m_dispatch_data_func) {
m_dispatch_data_func(params, kd, rt_params);
}
}
Comment thread
xzhan34 marked this conversation as resolved.
};

Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/kv_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ struct kv_cache_impl : multi_stage_primitive<kv_cache> {
auto scale_zp_concat_kernel_impl = scale_zp_concat_kernel_selector.GetImplementation(_kernels_data[*scale_concat_stage].kernelName);
scale_zp_concat_kernel_impl->GetUpdateDispatchDataFunc(_kernels_data[*scale_concat_stage]);
}

if (const auto zp_concat_stage = stages.try_get_index(kv_stage::zp_concat)) {
auto& zp_concat_kernel_selector = kernel_selector_t::Instance();
auto zp_concat_kernel_impl = zp_concat_kernel_selector.GetImplementation(_kernels_data[*zp_concat_stage].kernelName);
zp_concat_kernel_impl->GetUpdateDispatchDataFunc(_kernels_data[*zp_concat_stage]);
}
}
}
void set_arguments_impl(kv_cache_inst& instance) override {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1031,8 +1031,20 @@ class moe_3gemm_swiglu_opt_impl : public PrimitiveImplOCL {
_shared_down_proj->forward(stream, batch, gate_mem_dnnl, output_dnnl, scalar_gate_dnnl);
}

void save(BinaryOutputBuffer& ob) const override {
PrimitiveImplOCL::save(ob);
ob << use_micro_gemm_prefill;
ob << use_gpu_mask_gen_prefill;
ob << use_grouped_gemm_prefill;
}

void load(BinaryInputBuffer& ib) override {
PrimitiveImplOCL::load(ib);
// Read execution-path flags before init() so any future init() logic
// that depends on them sees the deserialized (not default) values.
ib >> use_micro_gemm_prefill;
ib >> use_gpu_mask_gen_prefill;
ib >> use_grouped_gemm_prefill;
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
init(impl_params->typed_desc<moe_3gemm_fused_compressed>());
}
Comment thread
xzhan34 marked this conversation as resolved.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -567,8 +567,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
const bool disable_moe_opt = GPU_DEBUG_VALUE_OR(config.get_disable_moe_opt(), false);

// MOE: TiledMoeBlock -> GatherMatmuls(compressed) -> MoeOp(compressed) -> MoeOpWithRouting(compressed).
// Gated on supports_immad: GatherMatmul backend is systolic-only.
if (device_info.supports_immad) {
// Gated on supports_immad (systolic-only) and oneDNN (required for expert GEMM dispatch).
// Note: even though we are already inside `if (supports_immad)`, oneDNN can still be explicitly disabled by the user.
if (device_info.supports_immad && config.get_use_onednn()) {
Comment thread
xzhan34 marked this conversation as resolved.
Comment thread
xzhan34 marked this conversation as resolved.
manager.register_pass<ov::pass::ConvertTiledMoeBlockToGatherMatmuls>();

// f32 listed because this pass runs before ConvertPrecision (line ~588);
Expand All @@ -583,6 +584,10 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
const bool has_batch_dim = !is_pa;
manager.register_pass<ov::pass::MoeOpFusion>(has_batch_dim);
manager.register_pass<ov::intel_gpu::FuseMOESharedExpert>();
// MOE3GemmFusedCompressed kernel dispatches expert GEMMs through
// oneDNN, which requires an in-order OCL queue. If oneDNN is
// disabled (e.g. via OV_GPU_USE_ONEDNN=0 on an IMMAD GPU), the
// queue stays out-of-order and the oneDNN stream creation may assert.
manager.register_pass<ov::intel_gpu::FuseMOE3GemmCompressed>();
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// Regression tests for the null-guard added to DispatchDataFunc::operator().
// Before the fix, calling a DispatchDataFunc constructed from nullptr would
// invoke a null std::function and throw std::bad_function_call.

#include "test_utils.h"
#include "common_utils/kernel_generator_base.hpp"

using namespace ov::intel_gpu;

TEST(dispatch_data_func, null_func_does_not_crash) {
DispatchDataFunc func{nullptr};
KernelData kd;
RuntimeParams params;

ASSERT_NO_THROW(func(params, kd, nullptr));
}

TEST(dispatch_data_func, valid_func_is_called) {
bool called = false;
DispatchDataFunc func{[&called](const RuntimeParams&, KernelData&, ImplRuntimeParams*) {
called = true;
}};
KernelData kd;
RuntimeParams params;

func(params, kd, nullptr);
ASSERT_TRUE(called);
}

TEST(dispatch_data_func, default_constructed_is_null_safe) {
// Default-constructed KernelData has update_dispatch_data_func{nullptr}.
KernelData kd;
RuntimeParams params;

ASSERT_NO_THROW(kd.update_dispatch_data_func(params, kd, nullptr));
}
Loading
Loading