From 79f265bd6614190a1a6d18469cf1c976a39dfde2 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Tue, 9 Jun 2026 17:15:10 +0200 Subject: [PATCH 01/26] Native: add tensor bucketize operator --- kernels/portable/cpu/op_bucketize.cpp | 135 ++++++++++++++++++++ kernels/portable/functions.yaml | 10 ++ kernels/test/CMakeLists.txt | 1 + kernels/test/op_bucketize_test.cpp | 174 ++++++++++++++++++++++++++ 4 files changed, 320 insertions(+) create mode 100644 kernels/portable/cpu/op_bucketize.cpp create mode 100644 kernels/test/op_bucketize_test.cpp diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp new file mode 100644 index 00000000000..abe536b5b90 --- /dev/null +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include + +namespace torch { +namespace executor { +namespace native { + +namespace { + +template +int64_t +cus_lower_bound(int64_t start, int64_t end, const CTYPE val, const CTYPE* bd) { + while (start < end) { + const int64_t mid = start + ((end - start) >> 1); + if (bd[mid] < val) { + start = mid + 1; + } else { + end = mid; + } + } + return start; +} + +template +int64_t +cus_upper_bound(int64_t start, int64_t end, const CTYPE val, const CTYPE* bd) { + while (start < end) { + const int64_t mid = start + ((end - start) >> 1); + if (bd[mid] <= val) { + start = mid + 1; + } else { + end = mid; + } + } + return start; +} + +template +void searchsorted_cpu( + KernelRuntimeContext& context, + const Tensor& input, + const Tensor& boundaries, + const bool& right, + Tensor& out) { + const auto bd_data = boundaries.const_data_ptr(); + const auto in_data = input.const_data_ptr(); + OUT_CTYPE* out_data = out.mutable_data_ptr(); + int64_t end_bd = boundaries.sizes().back(); + + const bool success = parallel_for( + 0, input.numel(), 200, [&](const auto begin, const auto end) { + for (const auto out_i : c10::irange(begin, end)) { + int64_t pos = right + ? cus_upper_bound(0, end_bd, in_data[out_i], bd_data) + : cus_lower_bound(0, end_bd, in_data[out_i], bd_data); + out_data[out_i] = pos; + } + }); + ET_KERNEL_CHECK_MSG(context, success, Internal, , "parallel_for failed"); +} + +void bucketize_pre_check( + KernelRuntimeContext& context, + const Tensor& input, + const Tensor& boundaries, + bool out_int32, + Tensor& out) { + ET_KERNEL_CHECK_MSG( + context, + boundaries.dim() == 1, + InvalidArgument, + , + "boundaries tensor must be 1 dimension, but got dim(", + boundaries.dim(), + ")"); + + ScalarType out_dtype = out.scalar_type(); + ET_KERNEL_CHECK_MSG( + context, + (out_dtype == ScalarType::Long && !out_int32) || + (out_dtype == ScalarType::Int && out_int32), + InvalidArgument, + , + "torch.bucketize(): output tensor's dtype is wrong, it can only be Int(int32) or Long(int64) depending on ", + "whether out_int32 flag is True, but we got output tensor's dtype ", + out_dtype, + " and out_int32 flag is ", + (out_int32 ? "True" : "False")); + + ET_KERNEL_CHECK( + context, tensors_have_same_shape(input, out), InvalidArgument, ); +} + +} // namespace + +Tensor& bucketize_tensor_out( + KernelRuntimeContext& context, + const Tensor& self, + const Tensor& boundaries, + bool out_int32, + bool right, + Tensor& out) { + bucketize_pre_check(context, self, boundaries, out_int32, out); + + ScalarType common_type = + promoteTypes(self.scalar_type(), boundaries.scalar_type()); + + ET_SWITCH_REALHBF16_TYPES( + common_type, context, "bucketize.Tensor_out", CTYPE, [&]() { + if (out_int32) { + searchsorted_cpu( + context, self, boundaries, right, out); + } else { + searchsorted_cpu( + context, self, boundaries, right, out); + } + }); + return out; +} +Tensor& bucketize_scalar_out( + KernelRuntimeContext& context, + const Scalar& self, + const Tensor& boundaries, + bool out_int32, + bool right, + Tensor& out) { + return out; + +} // namespace +} // namespace native +} // namespace executor +} // namespace torch \ No newline at end of file diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml index ecf62ee3606..8f4dcf6e4bd 100644 --- a/kernels/portable/functions.yaml +++ b/kernels/portable/functions.yaml @@ -242,6 +242,16 @@ - arg_meta: null kernel_name: torch::executor::bmm_out +- op: bucketize.Tensor_out + kernels: + - arg_meta: null + kernel_name: torch::executor::bucketize_tensor_out + +- op: bucketize.Scalar_out + kernels: + - arg_meta: null + kernel_name: torch::executor::bucketize_scalar_out + - op: cat.out kernels: - arg_meta: null diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt index 2707ba5db71..e45fed272ef 100644 --- a/kernels/test/CMakeLists.txt +++ b/kernels/test/CMakeLists.txt @@ -184,6 +184,7 @@ set(all_test_sources "op_bitwise_right_shift_test.cpp" "op_bitwise_xor_test.cpp" "op_bmm_test.cpp" + "op_bucketize_test.cpp" "op_cat_test.cpp" "op_cdist_forward_test.cpp" "op_ceil_test.cpp" diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp new file mode 100644 index 00000000000..4efe63d1ba8 --- /dev/null +++ b/kernels/test/op_bucketize_test.cpp @@ -0,0 +1,174 @@ +#include // Declares the operator +#include +#include +#include +#include + +#include + +using namespace ::testing; +using executorch::aten::ScalarType; +using executorch::aten::Tensor; +using torch::executor::testing::TensorFactory; + +class OpBucketizeTest : public OperatorTest { + protected: + Tensor& op_bucketize_out( + const Tensor& in, + const Tensor& boundaries, + bool out_int32, + bool right, + Tensor& out) { + return torch::executor::aten::bucketize_outf( + context_, in, boundaries, out_int32, right, out); + } + + template + void run_smoke_test_int64() { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); + Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); + } + + template + void run_smoke_test_int32() { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); + Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, true, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); + } + + template + void run_smoke_test_non_int_out() { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1.5, 2.5, 3.5, 4.5}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor expected = tf_dtype.make({2, 2}, {1, 2, 3, 4}); + Tensor out = tf_out.zeros({2, 2}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, true, false, out)); + } +}; + +TEST_F(OpBucketizeTest, SmokeTestInt64) { +#define RUN_SMOKE_TEST(ctype, dtype) run_smoke_test_int64(); + ET_FORALL_REALHBF16_TYPES(RUN_SMOKE_TEST); +#undef RUN_SMOKE_TEST +} + +TEST_F(OpBucketizeTest, SmokeTestInt32) { +#define RUN_SMOKE_TEST(ctype, dtype) run_smoke_test_int32(); + ET_FORALL_REALHBF16_TYPES(RUN_SMOKE_TEST); +#undef RUN_SMOKE_TEST +} + +TEST_F(OpBucketizeTest, RightTest) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 2, 3, 4}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, LeftTest) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 2, 3, 4}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor expected = tf_out.make({2, 2}, {0, 1, 2, 3}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, false, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, OutOfBoundaryTest) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor expected = tf_out.make({2, 2}, {0, 0, 5, 5}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, false, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, Boundaries1DTest) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor expected = tf_out.make({2, 2}, {0, 0, 5, 5}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, false, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, BoundariesNDimTest) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({3, 2}, {1, 2, 3, 4, 5, 6}); + Tensor out = tf_out.zeros({2, 2}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, false, false, out)); +} + +TEST_F(OpBucketizeTest, MismatchingInOutTest) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor out = tf_out.zeros({2, 3}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, false, false, out)); +} + +TEST_F(OpBucketizeTest, NonIntOutTest) { +#define RUN_SMOKE_TEST(ctype, dtype) \ + run_smoke_test_non_int_out(); + ET_FORALL_FLOAT_TYPES(RUN_SMOKE_TEST); +#undef RUN_SMOKE_TEST +} \ No newline at end of file From ef17bca0ed373f4c851944d3dd49c90499ff91e1 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Wed, 10 Jun 2026 22:28:17 +0200 Subject: [PATCH 02/26] Bucketize: handle input and boundaries with different types --- kernels/portable/cpu/op_bucketize.cpp | 100 +++++++++++++++++--------- 1 file changed, 68 insertions(+), 32 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index abe536b5b90..284446a9338 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -1,8 +1,7 @@ +#include #include #include #include -#include -#include namespace torch { namespace executor { @@ -10,12 +9,23 @@ namespace native { namespace { +using namespace torch::executor::native::utils::internal; +using namespace torch::executor::native::utils; + template -int64_t -cus_lower_bound(int64_t start, int64_t end, const CTYPE val, const CTYPE* bd) { +int64_t cus_lower_bound( + int64_t end, + const CTYPE val, + const char* bd, + load_to_compute_fn bd_load_fn, + ssize_t bd_elem_size) { + int64_t start = 0; + while (start < end) { const int64_t mid = start + ((end - start) >> 1); - if (bd[mid] < val) { + CTYPE mid_bd = bd_load_fn(&bd[mid * bd_elem_size]); + + if (mid_bd < val) { start = mid + 1; } else { end = mid; @@ -25,11 +35,19 @@ cus_lower_bound(int64_t start, int64_t end, const CTYPE val, const CTYPE* bd) { } template -int64_t -cus_upper_bound(int64_t start, int64_t end, const CTYPE val, const CTYPE* bd) { +int64_t cus_upper_bound( + int64_t end, + const CTYPE val, + const char* bd, + load_to_compute_fn bd_load_fn, + ssize_t bd_elem_size) { + ino64_t start = 0; + while (start < end) { const int64_t mid = start + ((end - start) >> 1); - if (bd[mid] <= val) { + CTYPE mid_bd = bd_load_fn(&bd[mid * bd_elem_size]); + + if (mid_bd <= val) { start = mid + 1; } else { end = mid; @@ -38,33 +56,44 @@ cus_upper_bound(int64_t start, int64_t end, const CTYPE val, const CTYPE* bd) { return start; } -template -void searchsorted_cpu( +template +void bucketize_tensor( KernelRuntimeContext& context, - const Tensor& input, + const Tensor& self, const Tensor& boundaries, const bool& right, Tensor& out) { - const auto bd_data = boundaries.const_data_ptr(); - const auto in_data = input.const_data_ptr(); - OUT_CTYPE* out_data = out.mutable_data_ptr(); - int64_t end_bd = boundaries.sizes().back(); - - const bool success = parallel_for( - 0, input.numel(), 200, [&](const auto begin, const auto end) { - for (const auto out_i : c10::irange(begin, end)) { + auto in_load_fn = get_load_to_compute_fn( + context, self, SupportedTensorDtypes::REALHBF16); + const ssize_t in_size = self.element_size(); + auto in_data = reinterpret_cast(self.const_data_ptr()); + + auto bd_load_fn = get_load_to_compute_fn( + context, boundaries, SupportedTensorDtypes::REALHBF16); + const ssize_t bd_elem_size = boundaries.element_size(); + auto bd_data = reinterpret_cast(boundaries.const_data_ptr()); + int64_t bd_end = boundaries.sizes().back(); + + auto out_data = out.mutable_data_ptr(); + + const bool success = + parallel_for(0, self.numel(), 200, [&](const auto begin, const auto end) { + for (const auto i : c10::irange(begin, end)) { + auto compute_val = in_load_fn(&in_data[i * in_size]); int64_t pos = right - ? cus_upper_bound(0, end_bd, in_data[out_i], bd_data) - : cus_lower_bound(0, end_bd, in_data[out_i], bd_data); - out_data[out_i] = pos; + ? cus_upper_bound( + bd_end, compute_val, bd_data, bd_load_fn, bd_elem_size) + : cus_lower_bound( + bd_end, compute_val, bd_data, bd_load_fn, bd_elem_size); + out_data[i] = pos; } }); + ET_KERNEL_CHECK_MSG(context, success, Internal, , "parallel_for failed"); } -void bucketize_pre_check( +void bucketize_common_pre_checks( KernelRuntimeContext& context, - const Tensor& input, const Tensor& boundaries, bool out_int32, Tensor& out) { @@ -89,9 +118,6 @@ void bucketize_pre_check( out_dtype, " and out_int32 flag is ", (out_int32 ? "True" : "False")); - - ET_KERNEL_CHECK( - context, tensors_have_same_shape(input, out), InvalidArgument, ); } } // namespace @@ -103,23 +129,33 @@ Tensor& bucketize_tensor_out( bool out_int32, bool right, Tensor& out) { - bucketize_pre_check(context, self, boundaries, out_int32, out); + bucketize_common_pre_checks(context, boundaries, out_int32, out); + // Check manually as bucketize_common_pre_checks do not return + if (context.failure_state() != Error::Ok) { + return out; + } + ET_KERNEL_CHECK( + context, tensors_have_same_shape(self, out), InvalidArgument, out); ScalarType common_type = promoteTypes(self.scalar_type(), boundaries.scalar_type()); + ScalarType compute_type = utils::get_compute_type(common_type); + + static constexpr const char op_name[] = "bucketize.Tensor_out"; ET_SWITCH_REALHBF16_TYPES( - common_type, context, "bucketize.Tensor_out", CTYPE, [&]() { + compute_type, context, op_name, CTYPE_COMPUTE, [&]() { if (out_int32) { - searchsorted_cpu( + bucketize_tensor( context, self, boundaries, right, out); } else { - searchsorted_cpu( + bucketize_tensor( context, self, boundaries, right, out); } }); return out; } + Tensor& bucketize_scalar_out( KernelRuntimeContext& context, const Scalar& self, @@ -128,8 +164,8 @@ Tensor& bucketize_scalar_out( bool right, Tensor& out) { return out; +} -} // namespace } // namespace native } // namespace executor } // namespace torch \ No newline at end of file From 9fe3c3fa16c0d51ecbba693d8507141ee4242af0 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Wed, 10 Jun 2026 22:31:43 +0200 Subject: [PATCH 03/26] Bucketize: add scalar implementation --- kernels/portable/cpu/op_bucketize.cpp | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 284446a9338..e8d60fa4dbd 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -92,6 +92,29 @@ void bucketize_tensor( ET_KERNEL_CHECK_MSG(context, success, Internal, , "parallel_for failed"); } +template +void bucketize_scalar( + KernelRuntimeContext& context, + const Scalar self, + const Tensor& boundaries, + const bool& right, + Tensor& out) { + CTYPE_COMPUTE compute_val = utils::scalar_to(self); + + auto bd_load_fn = get_load_to_compute_fn( + context, boundaries, SupportedTensorDtypes::REALHBF16); + const ssize_t bd_elem_size = boundaries.element_size(); + auto bd_data = reinterpret_cast(boundaries.const_data_ptr()); + int64_t bd_end = boundaries.sizes().back(); + + auto out_data = out.mutable_data_ptr(); + + int64_t pos = right + ? cus_upper_bound(bd_end, compute_val, bd_data, bd_load_fn, bd_elem_size) + : cus_lower_bound(bd_end, compute_val, bd_data, bd_load_fn, bd_elem_size); + out_data[0] = pos; +} + void bucketize_common_pre_checks( KernelRuntimeContext& context, const Tensor& boundaries, @@ -163,6 +186,30 @@ Tensor& bucketize_scalar_out( bool out_int32, bool right, Tensor& out) { + bucketize_common_pre_checks(context, boundaries, out_int32, out); + // Check manually as bucketize_common_pre_checks do not return + if (context.failure_state() != Error::Ok) { + return out; + } + ET_KERNEL_CHECK(context, out.sizes().back() == 1, InvalidArgument, out); + + ScalarType common_type = + utils::promote_type_with_scalar(boundaries.scalar_type(), self); + ScalarType compute_type = utils::get_compute_type(common_type); + + static constexpr const char op_name[] = "bucketize.Scalar_out"; + + ET_SWITCH_REALHBF16_TYPES( + compute_type, context, op_name, CTYPE_COMPUTE, [&]() { + if (out_int32) { + bucketize_scalar( + context, self, boundaries, right, out); + } else { + bucketize_scalar( + context, self, boundaries, right, out); + } + }); + return out; } From 6f204306cfcc86e5a5150a84a22fc3ca8d685871 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Wed, 10 Jun 2026 22:32:56 +0200 Subject: [PATCH 04/26] Bucketize: add scalar, types and mismatching out_int32 tests --- kernels/test/op_bucketize_test.cpp | 227 +++++++++++++++++++++++------ 1 file changed, 179 insertions(+), 48 deletions(-) diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 4efe63d1ba8..f0da3c3b916 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -1,3 +1,4 @@ +#include #include // Declares the operator #include #include @@ -7,82 +8,193 @@ #include using namespace ::testing; +using executorch::aten::Scalar; using executorch::aten::ScalarType; using executorch::aten::Tensor; using torch::executor::testing::TensorFactory; -class OpBucketizeTest : public OperatorTest { +class OpBucketizeScalarTest : public OperatorTest { protected: Tensor& op_bucketize_out( - const Tensor& in, + const Scalar& self, const Tensor& boundaries, bool out_int32, bool right, Tensor& out) { return torch::executor::aten::bucketize_outf( - context_, in, boundaries, out_int32, right, out); + context_, self, boundaries, out_int32, right, out); } - template - void run_smoke_test_int64() { + template + void test_bucketize_types() { TensorFactory tf_out; - TensorFactory tf_dtype; + TensorFactory tf_bound; - Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); - Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); - Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); - Tensor out = tf_out.zeros({2, 2}); + Scalar value = 2; + Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); + Tensor expected = tf_out.make({1}, {1}); + Tensor out = tf_out.zeros({1}); - Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + Tensor ret = op_bucketize_out(value, boundaries, false, true, out); EXPECT_TENSOR_EQ(ret, expected); EXPECT_TENSOR_EQ(out, expected); } - template - void run_smoke_test_int32() { - TensorFactory tf_out; - TensorFactory tf_dtype; + void test_bucketize_bound_types() { +#define RUN_TEST(ctype, dtype) test_bucketize_types(); + ET_FORALL_REALHBF16_TYPES(RUN_TEST) +#undef RUN_TEST + } +}; + +TEST_F(OpBucketizeScalarTest, SanityCheck) { + TensorFactory tf_out; + TensorFactory tf_bound; + + Scalar value = 2.5; + Tensor boundaries = tf_bound.make({10}, {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}); + Tensor expected = tf_out.make({1}, {2}); + Tensor out = tf_out.zeros({1}); + + Tensor ret = op_bucketize_out(value, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeScalarTest, ScalarBoundaryTypes) { + test_bucketize_bound_types(); +} + +TEST_F(OpBucketizeScalarTest, ScalarOut1DFails) { + TensorFactory tf_out; + TensorFactory tf_bound; + + Scalar value = 2; + Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); + Tensor out = tf_out.zeros({5}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(value, boundaries, false, true, out)); +} + +TEST_F(OpBucketizeScalarTest, ScalarOutNDFails) { + TensorFactory tf_out; + TensorFactory tf_bound; - Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); - Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); + Scalar value = 2; + Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); + Tensor out = tf_out.zeros({5, 5}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(value, boundaries, false, true, out)); +} + +class OpBucketizeTest : public OperatorTest { + protected: + Tensor& op_bucketize_out( + const Tensor& in, + const Tensor& boundaries, + bool out_int32, + bool right, + Tensor& out) { + return torch::executor::aten::bucketize_outf( + context_, in, boundaries, out_int32, right, out); + } + + template + void test_bucketize_types() { + TensorFactory tf_out; + TensorFactory tf_in; + TensorFactory tf_bound; + + Tensor values = tf_in.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); Tensor out = tf_out.zeros({2, 2}); - Tensor ret = op_bucketize_out(values, boundaries, true, true, out); + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); EXPECT_TENSOR_EQ(ret, expected); EXPECT_TENSOR_EQ(out, expected); } - template - void run_smoke_test_non_int_out() { - TensorFactory tf_out; - TensorFactory tf_dtype; - - Tensor values = tf_dtype.make({2, 2}, {1.5, 2.5, 3.5, 4.5}); - Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); - Tensor expected = tf_dtype.make({2, 2}, {1, 2, 3, 4}); - Tensor out = tf_out.zeros({2, 2}); + template + void test_bucketize_bound_types() { +#define RUN_TEST(ctype, dtype) \ + test_bucketize_types(); + ET_FORALL_REALHBF16_TYPES(RUN_TEST) +#undef RUN_TEST + } - ET_EXPECT_KERNEL_FAILURE( - context_, op_bucketize_out(values, boundaries, true, false, out)); + void test_bucketize_in_types() { +#define RUN_TEST(ctype, dtype) test_bucketize_bound_types(); + ET_FORALL_REALHBF16_TYPES(RUN_TEST) +#undef RUN_TEST } }; -TEST_F(OpBucketizeTest, SmokeTestInt64) { -#define RUN_SMOKE_TEST(ctype, dtype) run_smoke_test_int64(); - ET_FORALL_REALHBF16_TYPES(RUN_SMOKE_TEST); -#undef RUN_SMOKE_TEST +TEST_F(OpBucketizeTest, SanityCheck) { + TensorFactory tf_out; + TensorFactory tf_comp; + + Tensor values = + tf_comp.make({2, 4, 4}, {1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8, + + 1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8}); + + Tensor boundaries = tf_comp.make({5}, {0, 3, 5, 7, 9}); + + Tensor expected = + tf_out.make({2, 4, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, + + 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}); + + Tensor out = tf_out.zeros({2, 4, 4}); + + // The execution of the operator + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, InAndBoundaryTypes) { + test_bucketize_in_types(); } -TEST_F(OpBucketizeTest, SmokeTestInt32) { -#define RUN_SMOKE_TEST(ctype, dtype) run_smoke_test_int32(); - ET_FORALL_REALHBF16_TYPES(RUN_SMOKE_TEST); -#undef RUN_SMOKE_TEST +TEST_F(OpBucketizeTest, Int64Out) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); + Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, Int32Out) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); + Tensor expected = tf_out.make({2, 2}, {1, 2, 3, 4}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, true, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeTest, RightTest) { +TEST_F(OpBucketizeTest, BoundariesRight) { TensorFactory tf_out; TensorFactory tf_dtype; @@ -97,7 +209,7 @@ TEST_F(OpBucketizeTest, RightTest) { EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeTest, LeftTest) { +TEST_F(OpBucketizeTest, BoundariesLeft) { TensorFactory tf_out; TensorFactory tf_dtype; @@ -112,7 +224,7 @@ TEST_F(OpBucketizeTest, LeftTest) { EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeTest, OutOfBoundaryTest) { +TEST_F(OpBucketizeTest, OutOfBoundary) { TensorFactory tf_out; TensorFactory tf_dtype; @@ -127,7 +239,7 @@ TEST_F(OpBucketizeTest, OutOfBoundaryTest) { EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeTest, Boundaries1DTest) { +TEST_F(OpBucketizeTest, Boundaries1D) { TensorFactory tf_out; TensorFactory tf_dtype; @@ -142,7 +254,9 @@ TEST_F(OpBucketizeTest, Boundaries1DTest) { EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeTest, BoundariesNDimTest) { +TEST_F(OpBucketizeTest, BoundaryTypeNonRealHBF16Fails) {} + +TEST_F(OpBucketizeTest, BoundariesNDFails) { TensorFactory tf_out; TensorFactory tf_dtype; @@ -154,7 +268,7 @@ TEST_F(OpBucketizeTest, BoundariesNDimTest) { context_, op_bucketize_out(values, boundaries, false, false, out)); } -TEST_F(OpBucketizeTest, MismatchingInOutTest) { +TEST_F(OpBucketizeTest, MismatchingInOutDimsFails) { TensorFactory tf_out; TensorFactory tf_dtype; @@ -166,9 +280,26 @@ TEST_F(OpBucketizeTest, MismatchingInOutTest) { context_, op_bucketize_out(values, boundaries, false, false, out)); } -TEST_F(OpBucketizeTest, NonIntOutTest) { -#define RUN_SMOKE_TEST(ctype, dtype) \ - run_smoke_test_non_int_out(); - ET_FORALL_FLOAT_TYPES(RUN_SMOKE_TEST); -#undef RUN_SMOKE_TEST +TEST_F(OpBucketizeTest, MismatchingIntArg32Fails) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor out = tf_out.zeros({2, 2}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, true, false, out)); +} + +TEST_F(OpBucketizeTest, MismatchingIntArg64Fails) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor out = tf_out.zeros({2, 2}); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, false, false, out)); } \ No newline at end of file From c7095ebfbc47b2f2594c1f4bb45de5636244a3ea Mon Sep 17 00:00:00 2001 From: Gallinator Date: Wed, 10 Jun 2026 23:08:16 +0200 Subject: [PATCH 05/26] Bucketize: improve error messages and pre check flow --- kernels/portable/cpu/op_bucketize.cpp | 45 ++++++++++++--------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index e8d60fa4dbd..f7ba43bf50b 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -115,32 +116,26 @@ void bucketize_scalar( out_data[0] = pos; } -void bucketize_common_pre_checks( - KernelRuntimeContext& context, +Error bucketize_common_pre_checks( const Tensor& boundaries, bool out_int32, Tensor& out) { - ET_KERNEL_CHECK_MSG( - context, + ET_CHECK_OR_RETURN_ERROR( boundaries.dim() == 1, InvalidArgument, - , - "boundaries tensor must be 1 dimension, but got dim(", - boundaries.dim(), - ")"); + "boundaries tensor must be 1 dimension, but got dim(%zu)", + boundaries.dim()); ScalarType out_dtype = out.scalar_type(); - ET_KERNEL_CHECK_MSG( - context, + ET_CHECK_OR_RETURN_ERROR( (out_dtype == ScalarType::Long && !out_int32) || (out_dtype == ScalarType::Int && out_int32), InvalidArgument, - , - "torch.bucketize(): output tensor's dtype is wrong, it can only be Int(int32) or Long(int64) depending on ", - "whether out_int32 flag is True, but we got output tensor's dtype ", - out_dtype, - " and out_int32 flag is ", + "torch.bucketize(): output tensor's dtype is wrong, it can only be Int(int32) or Long(int64) depending on whether out_int32 flag is True, but we got output tensor dtype %s and out_int32 flag is %s", + toString(out_dtype), (out_int32 ? "True" : "False")); + + return Error::Ok; } } // namespace @@ -152,11 +147,11 @@ Tensor& bucketize_tensor_out( bool out_int32, bool right, Tensor& out) { - bucketize_common_pre_checks(context, boundaries, out_int32, out); - // Check manually as bucketize_common_pre_checks do not return - if (context.failure_state() != Error::Ok) { - return out; - } + ET_KERNEL_CHECK( + context, + bucketize_common_pre_checks(boundaries, out_int32, out) == Error::Ok, + InvalidArgument, + out); ET_KERNEL_CHECK( context, tensors_have_same_shape(self, out), InvalidArgument, out); @@ -186,11 +181,11 @@ Tensor& bucketize_scalar_out( bool out_int32, bool right, Tensor& out) { - bucketize_common_pre_checks(context, boundaries, out_int32, out); - // Check manually as bucketize_common_pre_checks do not return - if (context.failure_state() != Error::Ok) { - return out; - } + ET_KERNEL_CHECK( + context, + bucketize_common_pre_checks(boundaries, out_int32, out) == Error::Ok, + InvalidArgument, + out); ET_KERNEL_CHECK(context, out.sizes().back() == 1, InvalidArgument, out); ScalarType common_type = From 97f19bcb455029f0dd2699c8baca32f7dcf15143 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 13:06:23 +0200 Subject: [PATCH 06/26] Bucketize: expect 0 dimensional output in scalar version --- kernels/portable/cpu/op_bucketize.cpp | 2 +- kernels/test/op_bucketize_test.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index f7ba43bf50b..17492a541b7 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -186,7 +186,7 @@ Tensor& bucketize_scalar_out( bucketize_common_pre_checks(boundaries, out_int32, out) == Error::Ok, InvalidArgument, out); - ET_KERNEL_CHECK(context, out.sizes().back() == 1, InvalidArgument, out); + ET_KERNEL_CHECK(context, out.dim() == 0, InvalidArgument, out); ScalarType common_type = utils::promote_type_with_scalar(boundaries.scalar_type(), self); diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index f0da3c3b916..3f6dedebeca 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -32,8 +32,8 @@ class OpBucketizeScalarTest : public OperatorTest { Scalar value = 2; Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); - Tensor expected = tf_out.make({1}, {1}); - Tensor out = tf_out.zeros({1}); + Tensor expected = tf_out.make({}, {1}); + Tensor out = tf_out.zeros({}); Tensor ret = op_bucketize_out(value, boundaries, false, true, out); @@ -54,8 +54,8 @@ TEST_F(OpBucketizeScalarTest, SanityCheck) { Scalar value = 2.5; Tensor boundaries = tf_bound.make({10}, {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}); - Tensor expected = tf_out.make({1}, {2}); - Tensor out = tf_out.zeros({1}); + Tensor expected = tf_out.make({}, {2}); + Tensor out = tf_out.zeros({}); Tensor ret = op_bucketize_out(value, boundaries, false, true, out); From 8c26f1e146d6af074993f14be80f122adb1ab2b9 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 13:24:29 +0200 Subject: [PATCH 07/26] Bucketize: add input and boundaries realhbf16 dtypes check --- kernels/portable/cpu/op_bucketize.cpp | 12 +++++++ kernels/test/op_bucketize_test.cpp | 46 +++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 17492a541b7..ee35eecf421 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -8,6 +9,8 @@ namespace torch { namespace executor { namespace native { +using namespace executorch::runtime; + namespace { using namespace torch::executor::native::utils::internal; @@ -135,6 +138,13 @@ Error bucketize_common_pre_checks( toString(out_dtype), (out_int32 ? "True" : "False")); + ScalarType bound_dtype = boundaries.scalar_type(); + ET_CHECK_OR_RETURN_ERROR( + isRealHBF16Type(bound_dtype), + InvalidArgument, + "boundaries tensor of type %s is not supported", + toString(bound_dtype)); + return Error::Ok; } @@ -154,6 +164,8 @@ Tensor& bucketize_tensor_out( out); ET_KERNEL_CHECK( context, tensors_have_same_shape(self, out), InvalidArgument, out); + ET_KERNEL_CHECK( + context, tensor_is_realhbf16_type(self), InvalidArgument, out); ScalarType common_type = promoteTypes(self.scalar_type(), boundaries.scalar_type()); diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 3f6dedebeca..8d3c314bc99 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -120,6 +120,38 @@ class OpBucketizeTest : public OperatorTest { EXPECT_TENSOR_EQ(out, expected); } + template + void test_bucketize_complex_boundary() { + TensorFactory tf_out; + TensorFactory tf_in; + TensorFactory tf_bound; + + Tensor values = tf_in.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_bound.make({1}, {CTYPE(0, 1)}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, false, false, out)); + } + + template + void test_bucketize_complex_input() { + TensorFactory tf_out; + TensorFactory tf_in; + TensorFactory tf_bound; + + Tensor values = tf_in.make({1}, {CTYPE(0, 1)}); + Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + ET_EXPECT_KERNEL_FAILURE( + context_, op_bucketize_out(values, boundaries, false, false, out)); + } + template void test_bucketize_bound_types() { #define RUN_TEST(ctype, dtype) \ @@ -302,4 +334,18 @@ TEST_F(OpBucketizeTest, MismatchingIntArg64Fails) { ET_EXPECT_KERNEL_FAILURE( context_, op_bucketize_out(values, boundaries, false, false, out)); +} + +TEST_F(OpBucketizeTest, ComplexBoundaryTypesFails) { +#define RUN_TEST(ctype, dtype) \ + test_bucketize_complex_boundary(); + ET_FORALL_COMPLEXH_TYPES(RUN_TEST) +#undef RUN_TEST +} + +TEST_F(OpBucketizeTest, ComplexInputTypesFails) { +#define RUN_TEST(ctype, dtype) \ + test_bucketize_complex_input(); + ET_FORALL_COMPLEXH_TYPES(RUN_TEST) +#undef RUN_TEST } \ No newline at end of file From b89f3eb8e2027237e887a3cdc722e3ae0e12552d Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 14:17:17 +0200 Subject: [PATCH 08/26] Bucketize: fix typo --- kernels/portable/cpu/op_bucketize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index ee35eecf421..ad259badc40 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -45,7 +45,7 @@ int64_t cus_upper_bound( const char* bd, load_to_compute_fn bd_load_fn, ssize_t bd_elem_size) { - ino64_t start = 0; + int64_t start = 0; while (start < end) { const int64_t mid = start + ((end - start) >> 1); From 394f463ae59ea0a90ca2f100f29ca12c7d9c2bef Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 14:37:16 +0200 Subject: [PATCH 09/26] Bucketize: add python tests --- kernels/test/targets.bzl | 1 + kernels/test/test_bucketize.py | 135 +++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 kernels/test/test_bucketize.py diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl index 5212d691c5b..93bc17c036d 100644 --- a/kernels/test/targets.bzl +++ b/kernels/test/targets.bzl @@ -210,6 +210,7 @@ def define_common_targets(): _common_op_test("op_bitwise_or_test", ["aten", "portable"]) _common_op_test("op_bitwise_right_shift_test", ["portable"]) _common_op_test("op_bitwise_xor_test", ["aten", "portable"]) + _common_op_test("op_bucketize_test", ["portable"]) _common_op_test("op_bmm_test", ["aten", "portable", "optimized"]) _common_op_test("op_cat_test", ["aten", "portable"]) _common_op_test("op_cdist_forward_test", ["aten", "portable"]) diff --git a/kernels/test/test_bucketize.py b/kernels/test/test_bucketize.py new file mode 100644 index 00000000000..50f6f307406 --- /dev/null +++ b/kernels/test/test_bucketize.py @@ -0,0 +1,135 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Test for bucketize operations in ExecuTorch. + +This test validates that the bucketize operator work correctly +by creating simple models that use the operation and running inference. +""" + +import tempfile +import unittest +from pathlib import Path + +import torch +from executorch.exir import ( + EdgeCompileConfig, + ExecutorchBackendConfig, + to_edge_transform_and_lower, +) +from executorch.extension.export_util.utils import save_pte_program +from executorch.runtime import Runtime + + +class BucketizeModule(torch.nn.Module): + """Module that uses bucketize""" + + def __init__(self, out_int32: bool, right: bool): + super().__init__() + self.out_int32 = out_int32 + self.right = right + + def forward(self, x, bounds: torch.Tensor) -> torch.Tensor: + return torch.bucketize(x, bounds, out_int32=self.out_int32, right=self.right) + + +def export_and_generate_pte(model, example_inputs, output_path): + """Export a model and generate a .pte file.""" + exported_program = torch.export.export(model, example_inputs) + edge_program_manager = to_edge_transform_and_lower( + exported_program, + partitioner=None, + compile_config=EdgeCompileConfig( + _core_aten_ops_exception_list=[ + torch.ops.aten.bucketize.Tensor, + torch.ops.aten.bucketize.Scalar, + ] + ), + ) + executorch_program_manager = edge_program_manager.to_executorch( + config=ExecutorchBackendConfig(extract_delegate_segments=False) + ) + save_pte_program(executorch_program_manager, str(output_path)) + + +class TestBucketizeOperator(unittest.TestCase): + """Test bucketize operator in ExecuTorch.""" + + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.temp_path = Path(self.temp_dir.name) + + def tearDown(self): + self.temp_dir.cleanup() + + def _run_and_compare(self, model, inputs, pte_name): + """Helper to export, run, and compare outputs.""" + model.eval() + expected = model(*inputs) + + pte_path = self.temp_path / pte_name + export_and_generate_pte(model, inputs, pte_path) + + runtime = Runtime.get() + method = runtime.load_program(pte_path).load_method("forward") + outputs = method.execute(list(inputs)) + + self.assertEqual(len(outputs), 1) + print(outputs[0]) + print(expected) + torch.testing.assert_close(outputs[0], expected) + return outputs[0] + + # ========================================================================== + # Core tests: one per operator signature + # ========================================================================== + + def test_bucketize_tensor_out_int64(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, False) + x = torch.tensor([[1, 4, 6, 8]], dtype=torch.float) + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_out_int64.pte") + + def test_bucketize_tensor_out_int32(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(True, False) + x = torch.tensor([[1, 4, 6, 8]], dtype=torch.float) + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_out_int32.pte") + + def test_bucketize_tensor_right(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, True) + x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) + bounds = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_right.pte") + + def test_bucketize_tensor_left(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, False) + x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) + bounds = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_left.pte") + + def test_bucketize_scalar_out_int64(self): + """Test bucketize.Tensor_out: (Scalar, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, False) + x = 1 + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_scalar_out_int64.pte") + + def test_bucketize_scalar_out_int32(self): + """Test bucketize.Tensor_out: (Scalar, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, False) + x = 1 + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_scalar_out_int32.pte") + + +if __name__ == "__main__": + unittest.main() From 6b8b5148159201cf465bef7ca59ff05bd0a7cf19 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 14:44:04 +0200 Subject: [PATCH 10/26] Bucketize: add python edge cases tests --- kernels/test/test_bucketize.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernels/test/test_bucketize.py b/kernels/test/test_bucketize.py index 50f6f307406..8c7115bfb65 100644 --- a/kernels/test/test_bucketize.py +++ b/kernels/test/test_bucketize.py @@ -130,6 +130,24 @@ def test_bucketize_scalar_out_int32(self): bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_scalar_out_int32.pte") + # ========================================================================== + # Edge cases tests + # ========================================================================== + + def test_bucketize_tensor_empty_boundary(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, False) + x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) + bounds = torch.tensor([], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_left.pte") + + def test_bucketize_tensor_empty_input(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(False, False) + x = torch.tensor([[]], dtype=torch.float) + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_left.pte") + if __name__ == "__main__": unittest.main() From 7a18241ff5ca07a5fa1fda7708fb7b992763469b Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 21:02:52 +0200 Subject: [PATCH 11/26] Bucketize: add empty inputs tests --- kernels/test/op_bucketize_test.cpp | 62 +++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 8d3c314bc99..64263a682a0 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -48,7 +48,7 @@ class OpBucketizeScalarTest : public OperatorTest { } }; -TEST_F(OpBucketizeScalarTest, SanityCheck) { +TEST_F(OpBucketizeScalarTest, ScalarEmptyBoundaries) { TensorFactory tf_out; TensorFactory tf_bound; @@ -63,6 +63,21 @@ TEST_F(OpBucketizeScalarTest, SanityCheck) { EXPECT_TENSOR_EQ(out, expected); } +TEST_F(OpBucketizeScalarTest, SanityCheck) { + TensorFactory tf_out; + TensorFactory tf_bound; + + Scalar value = 2.5; + Tensor boundaries = tf_bound.make({0}, {}); + Tensor expected = tf_out.make({}, {0}); + Tensor out = tf_out.zeros({}); + + Tensor ret = op_bucketize_out(value, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + TEST_F(OpBucketizeScalarTest, ScalarBoundaryTypes) { test_bucketize_bound_types(); } @@ -286,6 +301,51 @@ TEST_F(OpBucketizeTest, Boundaries1D) { EXPECT_TENSOR_EQ(out, expected); } +TEST_F(OpBucketizeTest, EmptyBoundaries) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor boundaries = tf_dtype.make({0}, {}); + Tensor expected = tf_out.make({2, 2}, {0, 0, 0, 0}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, false, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, EmptyInput) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({0}, {}); + Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor expected = tf_out.make({0}, {}); + Tensor out = tf_out.zeros({0}); + + Tensor ret = op_bucketize_out(values, boundaries, false, false, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, EmptyAll) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({0}, {}); + Tensor boundaries = tf_dtype.make({0}, {}); + Tensor expected = tf_out.make({0}, {}); + Tensor out = tf_out.zeros({0}); + + Tensor ret = op_bucketize_out(values, boundaries, false, false, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + TEST_F(OpBucketizeTest, BoundaryTypeNonRealHBF16Fails) {} TEST_F(OpBucketizeTest, BoundariesNDFails) { From 52b347f98d63d9f4bf2e994cd6716aee95b6cf23 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 21:08:20 +0200 Subject: [PATCH 12/26] Bucketize: replace using directives with using declarations --- kernels/portable/cpu/op_bucketize.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index ad259badc40..2de8d2e0146 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -8,13 +8,12 @@ namespace torch { namespace executor { namespace native { - -using namespace executorch::runtime; - namespace { -using namespace torch::executor::native::utils::internal; -using namespace torch::executor::native::utils; +using executorch::runtime::isRealHBF16Type; +using torch::executor::native::utils::SupportedTensorDtypes; +using torch::executor::native::utils::internal::get_load_to_compute_fn; +using torch::executor::native::utils::internal::load_to_compute_fn; template int64_t cus_lower_bound( @@ -150,6 +149,8 @@ Error bucketize_common_pre_checks( } // namespace +using executorch::runtime::tensor_is_realhbf16_type; + Tensor& bucketize_tensor_out( KernelRuntimeContext& context, const Tensor& self, From de0b4d244e240803ff85bb1c345991974d90fb39 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sat, 13 Jun 2026 21:09:32 +0200 Subject: [PATCH 13/26] =?UTF-8?q?Bucketize:=20rename=20bucketize=5Fscalar?= =?UTF-8?q?=20and=20bucketize=5Ftensor=20to=20bc=C3=ACucketize=5Fscalar=5F?= =?UTF-8?q?impl=20and=20buccketize=5Ftensor=5Fimpl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernels/portable/cpu/op_bucketize.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 2de8d2e0146..4c58d8da66a 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -60,7 +60,7 @@ int64_t cus_upper_bound( } template -void bucketize_tensor( +void bucketize_tensor_impl( KernelRuntimeContext& context, const Tensor& self, const Tensor& boundaries, @@ -96,7 +96,7 @@ void bucketize_tensor( } template -void bucketize_scalar( +void bucketize_scalar_impl( KernelRuntimeContext& context, const Scalar self, const Tensor& boundaries, @@ -177,10 +177,10 @@ Tensor& bucketize_tensor_out( ET_SWITCH_REALHBF16_TYPES( compute_type, context, op_name, CTYPE_COMPUTE, [&]() { if (out_int32) { - bucketize_tensor( + bucketize_tensor_impl( context, self, boundaries, right, out); } else { - bucketize_tensor( + bucketize_tensor_impl( context, self, boundaries, right, out); } }); @@ -210,10 +210,10 @@ Tensor& bucketize_scalar_out( ET_SWITCH_REALHBF16_TYPES( compute_type, context, op_name, CTYPE_COMPUTE, [&]() { if (out_int32) { - bucketize_scalar( + bucketize_scalar_impl( context, self, boundaries, right, out); } else { - bucketize_scalar( + bucketize_scalar_impl( context, self, boundaries, right, out); } }); From 76641c89970468916482deb0fa5fd22fc7af256f Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sun, 14 Jun 2026 15:10:35 +0200 Subject: [PATCH 14/26] Bucketize: remove Boundaries1D test, reformat, improve test values --- kernels/test/op_bucketize_test.cpp | 57 ++++++++++-------------------- 1 file changed, 18 insertions(+), 39 deletions(-) diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 64263a682a0..327365cd867 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -48,12 +48,12 @@ class OpBucketizeScalarTest : public OperatorTest { } }; -TEST_F(OpBucketizeScalarTest, ScalarEmptyBoundaries) { +TEST_F(OpBucketizeScalarTest, SanityCheck) { TensorFactory tf_out; TensorFactory tf_bound; Scalar value = 2.5; - Tensor boundaries = tf_bound.make({10}, {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}); + Tensor boundaries = tf_bound.make({5}, {0, 2, 4, 6, 8}); Tensor expected = tf_out.make({}, {2}); Tensor out = tf_out.zeros({}); @@ -63,7 +63,7 @@ TEST_F(OpBucketizeScalarTest, ScalarEmptyBoundaries) { EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeScalarTest, SanityCheck) { +TEST_F(OpBucketizeScalarTest, ScalarEmptyBoundaries) { TensorFactory tf_out; TensorFactory tf_bound; @@ -145,8 +145,6 @@ class OpBucketizeTest : public OperatorTest { Tensor boundaries = tf_bound.make({1}, {CTYPE(0, 1)}); Tensor out = tf_out.zeros({2, 2}); - Tensor ret = op_bucketize_out(values, boundaries, false, true, out); - ET_EXPECT_KERNEL_FAILURE( context_, op_bucketize_out(values, boundaries, false, false, out)); } @@ -161,8 +159,6 @@ class OpBucketizeTest : public OperatorTest { Tensor boundaries = tf_bound.make({5}, {0, 3, 5, 7, 9}); Tensor out = tf_out.zeros({2, 2}); - Tensor ret = op_bucketize_out(values, boundaries, false, true, out); - ET_EXPECT_KERNEL_FAILURE( context_, op_bucketize_out(values, boundaries, false, false, out)); } @@ -186,17 +182,17 @@ TEST_F(OpBucketizeTest, SanityCheck) { TensorFactory tf_out; TensorFactory tf_comp; - Tensor values = - tf_comp.make({2, 4, 4}, {1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8, + Tensor values = tf_comp.make( + {2, 4, 4}, {0, 4, 6, 8, 1, 4, 5, 8, 1, 5, 6, 8, -1, 4, 6, 9, - 1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8, 1, 4, 6, 8}); + 1, 4, 6, 8, 1, 4, 7, 8, -2, 4, 6, 8, 1, 4, 6, 8}); Tensor boundaries = tf_comp.make({5}, {0, 3, 5, 7, 9}); Tensor expected = - tf_out.make({2, 4, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, + tf_out.make({2, 4, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 3, 3, 4, 0, 2, 3, 5, - 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}); + 1, 2, 3, 4, 1, 2, 4, 4, 0, 2, 3, 4, 1, 2, 3, 4}); Tensor out = tf_out.zeros({2, 4, 4}); @@ -275,22 +271,7 @@ TEST_F(OpBucketizeTest, OutOfBoundary) { TensorFactory tf_out; TensorFactory tf_dtype; - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); - Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); - Tensor expected = tf_out.make({2, 2}, {0, 0, 5, 5}); - Tensor out = tf_out.zeros({2, 2}); - - Tensor ret = op_bucketize_out(values, boundaries, false, false, out); - - EXPECT_TENSOR_EQ(ret, expected); - EXPECT_TENSOR_EQ(out, expected); -} - -TEST_F(OpBucketizeTest, Boundaries1D) { - TensorFactory tf_out; - TensorFactory tf_dtype; - - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor values = tf_dtype.make({2, 2}, {-1, -2, 6, 40}); Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); Tensor expected = tf_out.make({2, 2}, {0, 0, 5, 5}); Tensor out = tf_out.zeros({2, 2}); @@ -305,7 +286,7 @@ TEST_F(OpBucketizeTest, EmptyBoundaries) { TensorFactory tf_out; TensorFactory tf_dtype; - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); Tensor boundaries = tf_dtype.make({0}, {}); Tensor expected = tf_out.make({2, 2}, {0, 0, 0, 0}); Tensor out = tf_out.zeros({2, 2}); @@ -346,14 +327,12 @@ TEST_F(OpBucketizeTest, EmptyAll) { EXPECT_TENSOR_EQ(out, expected); } -TEST_F(OpBucketizeTest, BoundaryTypeNonRealHBF16Fails) {} - TEST_F(OpBucketizeTest, BoundariesNDFails) { TensorFactory tf_out; TensorFactory tf_dtype; - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); - Tensor boundaries = tf_dtype.make({3, 2}, {1, 2, 3, 4, 5, 6}); + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({3, 2}, {0, 3, 5, 7, 9, 11}); Tensor out = tf_out.zeros({2, 2}); ET_EXPECT_KERNEL_FAILURE( @@ -364,8 +343,8 @@ TEST_F(OpBucketizeTest, MismatchingInOutDimsFails) { TensorFactory tf_out; TensorFactory tf_dtype; - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); - Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); Tensor out = tf_out.zeros({2, 3}); ET_EXPECT_KERNEL_FAILURE( @@ -376,8 +355,8 @@ TEST_F(OpBucketizeTest, MismatchingIntArg32Fails) { TensorFactory tf_out; TensorFactory tf_dtype; - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); - Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); Tensor out = tf_out.zeros({2, 2}); ET_EXPECT_KERNEL_FAILURE( @@ -388,8 +367,8 @@ TEST_F(OpBucketizeTest, MismatchingIntArg64Fails) { TensorFactory tf_out; TensorFactory tf_dtype; - Tensor values = tf_dtype.make({2, 2}, {-1, -2, 30, 40}); - Tensor boundaries = tf_dtype.make({5}, {1, 2, 3, 4, 5}); + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); Tensor out = tf_out.zeros({2, 2}); ET_EXPECT_KERNEL_FAILURE( From 0e78a6c68eab7203cf4ac56a3303c8bfb1aec233 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sun, 14 Jun 2026 15:28:10 +0200 Subject: [PATCH 15/26] Bucketize: use keyword arguments in model creation, fix typos --- kernels/test/test_bucketize.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/kernels/test/test_bucketize.py b/kernels/test/test_bucketize.py index 8c7115bfb65..037411bed8d 100644 --- a/kernels/test/test_bucketize.py +++ b/kernels/test/test_bucketize.py @@ -7,7 +7,7 @@ """ Test for bucketize operations in ExecuTorch. -This test validates that the bucketize operator work correctly +This test validates that the bucketize operator works correctly by creating simple models that use the operation and running inference. """ @@ -90,42 +90,42 @@ def _run_and_compare(self, model, inputs, pte_name): def test_bucketize_tensor_out_int64(self): """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, False) + model = BucketizeModule(out_int32=False, right=False) x = torch.tensor([[1, 4, 6, 8]], dtype=torch.float) bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_out_int64.pte") def test_bucketize_tensor_out_int32(self): """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(True, False) + model = BucketizeModule(out_int32=True, right=False) x = torch.tensor([[1, 4, 6, 8]], dtype=torch.float) bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_out_int32.pte") def test_bucketize_tensor_right(self): """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, True) + model = BucketizeModule(out_int32=False, right=True) x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) bounds = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_right.pte") def test_bucketize_tensor_left(self): """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, False) + model = BucketizeModule(out_int32=False, right=False) x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) bounds = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_left.pte") def test_bucketize_scalar_out_int64(self): """Test bucketize.Tensor_out: (Scalar, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, False) + model = BucketizeModule(out_int32=False, right=False) x = 1 bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_scalar_out_int64.pte") def test_bucketize_scalar_out_int32(self): """Test bucketize.Tensor_out: (Scalar, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, False) + model = BucketizeModule(out_int32=False, right=False) x = 1 bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) self._run_and_compare(model, (x, bounds), "test_bucketize_scalar_out_int32.pte") @@ -136,17 +136,21 @@ def test_bucketize_scalar_out_int32(self): def test_bucketize_tensor_empty_boundary(self): """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, False) + model = BucketizeModule(out_int32=False, right=False) x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) bounds = torch.tensor([], dtype=torch.float) - self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_left.pte") + self._run_and_compare( + model, (x, bounds), "test_bucketize_tensor_empty_boundary.pte" + ) def test_bucketize_tensor_empty_input(self): """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" - model = BucketizeModule(False, False) - x = torch.tensor([[]], dtype=torch.float) + model = BucketizeModule(out_int32=False, right=False) + x = torch.tensor([], dtype=torch.float) bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) - self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_left.pte") + self._run_and_compare( + model, (x, bounds), "test_bucketize_tensor_empty_input.pte" + ) if __name__ == "__main__": From c6626b945f15e046e5282b0531d0cea56521b808 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sun, 14 Jun 2026 15:33:28 +0200 Subject: [PATCH 16/26] Bucketize; extract paraller for grain size to variable --- kernels/portable/cpu/op_bucketize.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 4c58d8da66a..76fa5330d15 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -15,6 +15,8 @@ using torch::executor::native::utils::SupportedTensorDtypes; using torch::executor::native::utils::internal::get_load_to_compute_fn; using torch::executor::native::utils::internal::load_to_compute_fn; +constexpr int64_t BUCKETIZE_GRAIN_SIZE = 200; + template int64_t cus_lower_bound( int64_t end, @@ -79,8 +81,11 @@ void bucketize_tensor_impl( auto out_data = out.mutable_data_ptr(); - const bool success = - parallel_for(0, self.numel(), 200, [&](const auto begin, const auto end) { + const bool success = parallel_for( + 0, + self.numel(), + BUCKETIZE_GRAIN_SIZE, + [&](const auto begin, const auto end) { for (const auto i : c10::irange(begin, end)) { auto compute_val = in_load_fn(&in_data[i * in_size]); int64_t pos = right From 711d9378b1cad848665b4f694b35c1d31ed52412 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sun, 14 Jun 2026 15:59:48 +0200 Subject: [PATCH 17/26] Bucketize: add comments to pre checks --- kernels/portable/cpu/op_bucketize.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 76fa5330d15..db89b0d02a6 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -123,6 +123,10 @@ void bucketize_scalar_impl( out_data[0] = pos; } +// Performs check which are common to both tensor and scalar implementations: +// - Boundaries must be 1D +// - Out type must be consistent with out_int32 parameter +// - Boundaries type must be realhbf16 Error bucketize_common_pre_checks( const Tensor& boundaries, bool out_int32, From 989caa3833776e4841094dd3884c9b3ce37a169f Mon Sep 17 00:00:00 2001 From: Gallinator Date: Sun, 14 Jun 2026 16:05:06 +0200 Subject: [PATCH 18/26] Bucketize: comment on out type missing check --- kernels/portable/cpu/op_bucketize.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index db89b0d02a6..51dd14b99a2 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -127,6 +127,8 @@ void bucketize_scalar_impl( // - Boundaries must be 1D // - Out type must be consistent with out_int32 parameter // - Boundaries type must be realhbf16 +// Boundaries size is not checked against out type as SizesType always fits into +// an int32_t. Error bucketize_common_pre_checks( const Tensor& boundaries, bool out_int32, From afb0d909b1b7e34eee8ec1e11153b08743abae46 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Tue, 16 Jun 2026 12:43:12 +0200 Subject: [PATCH 19/26] Bucketize: add tests for inf inputs and boundaries --- kernels/test/op_bucketize_test.cpp | 53 +++++++++++++++++++++++++++++- kernels/test/test_bucketize.py | 23 +++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 327365cd867..5ae4d29cd05 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -4,8 +4,8 @@ #include #include #include - #include +#include using namespace ::testing; using executorch::aten::Scalar; @@ -78,6 +78,21 @@ TEST_F(OpBucketizeScalarTest, ScalarEmptyBoundaries) { EXPECT_TENSOR_EQ(out, expected); } +TEST_F(OpBucketizeScalarTest, ScalarInfInput) { + TensorFactory tf_out; + TensorFactory tf_bound; + + Scalar value = std::numeric_limits::infinity(); + Tensor boundaries = tf_bound.make({5}, {0, 2, 4, 6, 8}); + Tensor expected = tf_out.make({}, {5}); + Tensor out = tf_out.zeros({}); + + Tensor ret = op_bucketize_out(value, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + TEST_F(OpBucketizeScalarTest, ScalarBoundaryTypes) { test_bucketize_bound_types(); } @@ -327,6 +342,42 @@ TEST_F(OpBucketizeTest, EmptyAll) { EXPECT_TENSOR_EQ(out, expected); } +TEST_F(OpBucketizeTest, InfInput) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make( + {2}, + {-std::numeric_limits::infinity(), + std::numeric_limits::infinity()}); + Tensor boundaries = tf_dtype.make({5}, {0, 3, 5, 7, 9}); + Tensor expected = tf_out.make({2}, {0, 5}); + Tensor out = tf_out.zeros({2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + +TEST_F(OpBucketizeTest, InfBoundaries) { + TensorFactory tf_out; + TensorFactory tf_dtype; + + Tensor values = tf_dtype.make({2, 2}, {1, 4, 6, 8}); + Tensor boundaries = tf_dtype.make( + {2}, + {-std::numeric_limits::infinity(), + std::numeric_limits::infinity()}); + Tensor expected = tf_out.ones({2, 2}); + Tensor out = tf_out.zeros({2, 2}); + + Tensor ret = op_bucketize_out(values, boundaries, false, true, out); + + EXPECT_TENSOR_EQ(ret, expected); + EXPECT_TENSOR_EQ(out, expected); +} + TEST_F(OpBucketizeTest, BoundariesNDFails) { TensorFactory tf_out; TensorFactory tf_dtype; diff --git a/kernels/test/test_bucketize.py b/kernels/test/test_bucketize.py index 037411bed8d..c4ecc97afcd 100644 --- a/kernels/test/test_bucketize.py +++ b/kernels/test/test_bucketize.py @@ -152,6 +152,29 @@ def test_bucketize_tensor_empty_input(self): model, (x, bounds), "test_bucketize_tensor_empty_input.pte" ) + def test_bucketize_tensor_inf_input(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(out_int32=False, right=False) + x = torch.tensor([-torch.inf, torch.inf], dtype=torch.float) + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_tensor_inf_input.pte") + + def test_bucketize_tensor_inf_boundary(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(out_int32=False, right=False) + x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float) + bounds = torch.tensor([-torch.inf, torch.inf], dtype=torch.float) + self._run_and_compare( + model, (x, bounds), "test_bucketize_tensor_inf_boundary.pte" + ) + + def test_bucketize_scalar_inf_input(self): + """Test bucketize.Tensor_out: (Tensor, Tensor, bool, bool) -> Tensor.""" + model = BucketizeModule(out_int32=False, right=False) + x = torch.inf + bounds = torch.tensor([0, 3, 5, 7, 9], dtype=torch.float) + self._run_and_compare(model, (x, bounds), "test_bucketize_scalar_inf_input.pte") + if __name__ == "__main__": unittest.main() From 0744f2f7d42b946c8fc78d114c5032cfaf612969 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Wed, 17 Jun 2026 13:21:53 +0200 Subject: [PATCH 20/26] Bucketize: check input and output dim order match --- kernels/portable/cpu/op_bucketize.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 51dd14b99a2..dbfbe955ef2 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -176,6 +176,8 @@ Tensor& bucketize_tensor_out( out); ET_KERNEL_CHECK( context, tensors_have_same_shape(self, out), InvalidArgument, out); + ET_KERNEL_CHECK( + context, tensors_have_same_dim_order(self, out), InvalidArgument, out); ET_KERNEL_CHECK( context, tensor_is_realhbf16_type(self), InvalidArgument, out); From 30576429f55392b723119d46680e4d4c97d2e267 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Wed, 17 Jun 2026 13:22:33 +0200 Subject: [PATCH 21/26] Bucketize: use 4D tensors in sanity check test --- kernels/test/op_bucketize_test.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 5ae4d29cd05..82591e1f3cd 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -198,18 +198,18 @@ TEST_F(OpBucketizeTest, SanityCheck) { TensorFactory tf_comp; Tensor values = tf_comp.make( - {2, 4, 4}, {0, 4, 6, 8, 1, 4, 5, 8, 1, 5, 6, 8, -1, 4, 6, 9, + {2, 2, 2, 4}, {0, 4, 6, 8, 1, 4, 5, 8, 1, 5, 6, 8, -1, 4, 6, 9, - 1, 4, 6, 8, 1, 4, 7, 8, -2, 4, 6, 8, 1, 4, 6, 8}); + 1, 4, 6, 8, 1, 4, 7, 8, -2, 4, 6, 8, 1, 4, 6, 8}); Tensor boundaries = tf_comp.make({5}, {0, 3, 5, 7, 9}); - Tensor expected = - tf_out.make({2, 4, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 3, 3, 4, 0, 2, 3, 5, + Tensor expected = tf_out.make( + {2, 2, 2, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 3, 3, 4, 0, 2, 3, 5, - 1, 2, 3, 4, 1, 2, 4, 4, 0, 2, 3, 4, 1, 2, 3, 4}); + 1, 2, 3, 4, 1, 2, 4, 4, 0, 2, 3, 4, 1, 2, 3, 4}); - Tensor out = tf_out.zeros({2, 4, 4}); + Tensor out = tf_out.zeros({2, 2, 2, 4}); // The execution of the operator Tensor ret = op_bucketize_out(values, boundaries, false, true, out); From bcff767f79c1f6c01197a79c90db4aaf0e8afe29 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Mon, 22 Jun 2026 19:52:36 +0200 Subject: [PATCH 22/26] Bucketize: add missing license headers --- kernels/portable/cpu/op_bucketize.cpp | 8 ++++++++ kernels/test/op_bucketize_test.cpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index dbfbe955ef2..51a12a98841 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -1,3 +1,11 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + #include #include #include diff --git a/kernels/test/op_bucketize_test.cpp b/kernels/test/op_bucketize_test.cpp index 82591e1f3cd..3d3e6df881c 100644 --- a/kernels/test/op_bucketize_test.cpp +++ b/kernels/test/op_bucketize_test.cpp @@ -1,3 +1,11 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + #include #include // Declares the operator #include From 081b2ae733bad8d93d2bc6fbc4cb27afaa104e2d Mon Sep 17 00:00:00 2001 From: Gallinator Date: Mon, 22 Jun 2026 20:00:44 +0200 Subject: [PATCH 23/26] Bucketize: pass right by value in bucketize_scalar_impl and bucketize_tensor_impl --- kernels/portable/cpu/op_bucketize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 51a12a98841..478baa3145f 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -74,7 +74,7 @@ void bucketize_tensor_impl( KernelRuntimeContext& context, const Tensor& self, const Tensor& boundaries, - const bool& right, + bool right, Tensor& out) { auto in_load_fn = get_load_to_compute_fn( context, self, SupportedTensorDtypes::REALHBF16); @@ -113,7 +113,7 @@ void bucketize_scalar_impl( KernelRuntimeContext& context, const Scalar self, const Tensor& boundaries, - const bool& right, + bool right, Tensor& out) { CTYPE_COMPUTE compute_val = utils::scalar_to(self); From ab1697dd779b18dc2ead8222e9e319e7e445eb0c Mon Sep 17 00:00:00 2001 From: Gallinator Date: Mon, 22 Jun 2026 20:04:28 +0200 Subject: [PATCH 24/26] Bucketize: pass scalar by reference in bucketize_scalar_impl --- kernels/portable/cpu/op_bucketize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 478baa3145f..75f2f37ecf1 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -111,7 +111,7 @@ void bucketize_tensor_impl( template void bucketize_scalar_impl( KernelRuntimeContext& context, - const Scalar self, + const Scalar& self, const Tensor& boundaries, bool right, Tensor& out) { From 7431b290d999bb995b5bd9bccd82c44a93a978d4 Mon Sep 17 00:00:00 2001 From: Gallinator Date: Mon, 22 Jun 2026 20:10:31 +0200 Subject: [PATCH 25/26] Bucketize: refactor --- kernels/portable/cpu/op_bucketize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 75f2f37ecf1..3c496d0b36f 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -85,7 +85,7 @@ void bucketize_tensor_impl( context, boundaries, SupportedTensorDtypes::REALHBF16); const ssize_t bd_elem_size = boundaries.element_size(); auto bd_data = reinterpret_cast(boundaries.const_data_ptr()); - int64_t bd_end = boundaries.sizes().back(); + int64_t bd_end = boundaries.numel(); auto out_data = out.mutable_data_ptr(); @@ -121,7 +121,7 @@ void bucketize_scalar_impl( context, boundaries, SupportedTensorDtypes::REALHBF16); const ssize_t bd_elem_size = boundaries.element_size(); auto bd_data = reinterpret_cast(boundaries.const_data_ptr()); - int64_t bd_end = boundaries.sizes().back(); + int64_t bd_end = boundaries.numel(); auto out_data = out.mutable_data_ptr(); From 4f7a6551632a251a202963dea8f837ce370f46ce Mon Sep 17 00:00:00 2001 From: Gallinator Date: Mon, 22 Jun 2026 20:26:53 +0200 Subject: [PATCH 26/26] Bucketize: resize the out tensor instead of checking its shape --- kernels/portable/cpu/op_bucketize.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernels/portable/cpu/op_bucketize.cpp b/kernels/portable/cpu/op_bucketize.cpp index 3c496d0b36f..5da44c7ed1d 100644 --- a/kernels/portable/cpu/op_bucketize.cpp +++ b/kernels/portable/cpu/op_bucketize.cpp @@ -183,7 +183,10 @@ Tensor& bucketize_tensor_out( InvalidArgument, out); ET_KERNEL_CHECK( - context, tensors_have_same_shape(self, out), InvalidArgument, out); + context, + resize_tensor(out, self.sizes()) == Error::Ok, + InvalidArgument, + out); ET_KERNEL_CHECK( context, tensors_have_same_dim_order(self, out), InvalidArgument, out); ET_KERNEL_CHECK(