Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/ap/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
# import test_binary_trivial_reduce
import test_matmul_binary
import test_matmul_epilogue
import test_zip_binary
2 changes: 2 additions & 0 deletions tests/ap/make_axpr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ FILENAMES_ARRAY=(
"matmul_epilogue_pass"
"test_matmul_binary"
"test_matmul_epilogue"
"zip_variadic_tpl"
"test_zip_binary"
)
for filename in "${FILENAMES_ARRAY[@]}"
do
Expand Down
163 changes: 163 additions & 0 deletions tests/ap/paddle-tests/test_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from os.path import dirname

sys.path.append(dirname(__file__))

import unittest
import utils

import paddle
from paddle.static import InputSpec


def moe_zip(
unzipped_tokens,
zipped_expertwise_rowmap,
expert_routemap_topk,
unzipped_token_probs,
):
zipped_tokens, zipped_prob_topk = paddle._C_ops._moe_zip(
unzipped_tokens,
zipped_expertwise_rowmap,
expert_routemap_topk,
unzipped_token_probs,
)
return zipped_tokens, zipped_prob_topk


class CINNSubGraphNet(paddle.nn.Layer):
def __init__(self, fn):
super().__init__()
self.fn = fn

def forward(self, x1, x2, x3, x4):
zipped_tokens, zipped_prob_topk = self.fn(x1, x2, x3, x4)
return zipped_tokens, zipped_prob_topk


class TestAPZip(unittest.TestCase):
"""
Test Pir API + @to_static + CINN.
"""

def setUp(self):
paddle.seed(2022)
self.prepare_data()

def prepare_data(self):
u_seqlen = 4
token_len = 8
seqlen = 3
num_experts = 4
topk = 8
unzipped_tokens_data = [
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
[3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0],
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
]
self.unzipped_tokens_shape = [u_seqlen, token_len]
self.unzipped_tokens_dtype = "bfloat16"
self.unzipped_tokens = paddle.to_tensor(
unzipped_tokens_data, dtype=self.unzipped_tokens_dtype
)
self.unzipped_tokens.stop_gradient = False

zipped_expertwise_rowmap_data = [
[0, 3, -1, -1],
[-1, 1, -1, -1],
[2, -1, -1, -1],
]
self.zipped_expertwise_rowmap_shape = [seqlen, num_experts]
self.zipped_expertwise_rowmap_dtype = "int32"
self.zipped_expertwise_rowmap = paddle.to_tensor(
zipped_expertwise_rowmap_data, self.zipped_expertwise_rowmap_dtype
)
self.zipped_expertwise_rowmap.stop_gradient = False

routemap_topk_data = [
[-1, -1, 0, 1, -1, -1, -1, -1],
[1, -1, -1, -1, -1, -1, -1, -1],
[-1, 0, -1, -1, -1, -1, -1, -1],
]
self.expert_routemap_topk_shape = [seqlen, topk]
self.expert_routemap_topk_dtype = "int32"
self.expert_routemap_topk = paddle.to_tensor(
routemap_topk_data, dtype=self.expert_routemap_topk_dtype
)
self.expert_routemap_topk.stop_gradient = False

unzipped_token_probs_data = [[0.50000000], [1.0], [1.0], [0.50000000]]
self.unzipped_token_probs_shape = [u_seqlen, 1]
self.unzipped_token_probs_dtype = "float32"
self.unzipped_token_probs = paddle.to_tensor(
unzipped_token_probs_data, self.unzipped_token_probs_dtype
)
self.unzipped_token_probs.stop_gradient = False
self.zipped_tokens_type = "bfloat16"
self.zipped_prob_topk_type = "float32"

def eval_symbolic(self, net, use_cinn, profile):
input_spec = [
InputSpec(
shape=self.unzipped_tokens_shape, dtype=self.unzipped_tokens_dtype
),
InputSpec(
shape=self.zipped_expertwise_rowmap_shape,
dtype=self.zipped_expertwise_rowmap_dtype,
),
InputSpec(
shape=self.expert_routemap_topk_shape,
dtype=self.expert_routemap_topk_dtype,
),
InputSpec(
shape=self.unzipped_token_probs_shape,
dtype=self.unzipped_token_probs_dtype,
),
]
net = utils.apply_to_static(net, use_cinn, input_spec)
net.eval()
zipped_tokens, zipped_prob_topk = utils.run_with_profile(
profile,
net,
self.unzipped_tokens,
self.zipped_expertwise_rowmap,
self.expert_routemap_topk,
self.unzipped_token_probs,
)
return zipped_tokens, zipped_prob_topk

def test_pure_zip(self):
profile = False
net = CINNSubGraphNet(moe_zip)
cinn_out = self.eval_symbolic(net, use_cinn=True, profile=profile)
dy2st_out = self.eval_symbolic(net, use_cinn=False, profile=profile)
if not profile:
utils.check_result(
self.zipped_tokens_type, cinn_out[0].numpy(), dy2st_out[0].numpy(), True
)

utils.check_result(
self.zipped_prob_topk_type,
cinn_out[1].numpy(),
dy2st_out[1].numpy(),
True,
)


if __name__ == "__main__":
unittest.main()
25 changes: 25 additions & 0 deletions tests/ap/test_zip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

export CUDA_VISIBLE_DEVICES="2"
export NVIDIA_TF32_OVERRIDE=0

sh make_axpr.sh

# AP specific settings
export FLAGS_enable_ap=1
export AP_WORKSPACE_DIR=$(pwd)/ap_workspace
export AP_PATH=$(pwd)/

# CINN related settings
export FLAGS_check_infer_symbolic=1
export FLAGS_enable_pir_api=1
export FLAGS_cinn_bucket_compile=True
export FLAGS_prim_enable_dynamic=true
export FLAGS_prim_all=True
export FLAGS_pir_apply_shape_optimization_pass=1
export FLAGS_group_schedule_tiling_first=1
export FLAGS_cinn_new_group_scheduler=1

export GLOG_vmodule=ap_generic_drr_pass=6

python $(pwd)/paddle-tests/test_zip.py
79 changes: 79 additions & 0 deletions tests/ap/test_zip_binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import pir
import abstract_drr

import zip_variadic_tpl
import kernel_arg_id_util
import program_translator_util
import op_compute_translator_util


@abstract_drr.register_drr_pass("pure_zip_fuse", nice=0)
class PureZipFuse(abstract_drr.DrrPass):

def source_pattern(self, o, t):
print("in source pattern")
o.moe_zip_op = o.ap_native_op("pd_op._moe_zip")
o.moe_zip_op(
[
t.unzipped_tokens,
t.zipped_expertwise_rowmap,
t.expert_routemap_topk,
t.unzipped_token_probs,
],
[t.zipped_tokens, t.zipped_probs_topk],
)

def constraint(self, o, t):
return True

def result_pattern(self, o, t):
o.fustion_op = o.ap_pattern_fusion_op(self.code_gen)
o.fustion_op(
[
t.unzipped_tokens,
t.zipped_expertwise_rowmap,
t.expert_routemap_topk,
t.unzipped_token_probs,
],
[t.zipped_tokens, t.zipped_probs_topk],
)

def code_gen(self, ctx, o, t):
mut_kernel_arg_id_registry = kernel_arg_id_util.KernelArgIdNameRegistry(
code_gen_ctx=ctx, tensor_match_ctx=t, name_prefix=""
)
template_module = zip_variadic_tpl.ZipVariadicTemplate(
mut_kernel_arg_id_registry=mut_kernel_arg_id_registry
)
return template_module.compile(
unzipped_tokens_in_karg=ctx.in_tensor_data_ptr_kernel_arg_id(
t.unzipped_tokens
),
zipped_expertwise_rowmap_in_karg=ctx.in_tensor_data_ptr_kernel_arg_id(
t.zipped_expertwise_rowmap
),
expert_routemap_topk_in_karg=ctx.in_tensor_data_ptr_kernel_arg_id(
t.expert_routemap_topk
),
unzipped_token_probs_in_karg=ctx.in_tensor_data_ptr_kernel_arg_id(
t.unzipped_token_probs
),
zipped_tokens_out_karg=ctx.out_tensor_data_ptr_kernel_arg_id(
t.zipped_tokens
),
zipped_probs_topk_out_karg=ctx.out_tensor_data_ptr_kernel_arg_id(
t.zipped_probs_topk
),
topk_kargs=ctx.dim_expr_kernel_arg_id(
t.expert_routemap_topk.symbolic_shape_to_list()[1]
),
num_experts_kargs=ctx.dim_expr_kernel_arg_id(
t.zipped_expertwise_rowmap.symbolic_shape_to_list()[1]
),
token_length_kargs=ctx.dim_expr_kernel_arg_id(
t.unzipped_tokens.symbolic_shape_to_list()[1]
),
total_zipped_tokens_num_kargs=ctx.dim_expr_kernel_arg_id(
t.zipped_tokens.symbolic_shape_to_list()[0]
),
)
Loading