diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index feaae336166..139f441f8c6 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -401,8 +401,13 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, auto output_type_id = output->Type()->id(); if (output_type_id == arrow::Type::BOOL) { SetPackedBitValue(output_ref, loop_var, output_value->data()); - } else if (arrow::is_primitive(output_type_id) || - output_type_id == arrow::Type::DECIMAL) { + } else if (output_type_id == arrow::Type::DECIMAL) { + // Arrow decimal128 data is only 8-byte aligned, not 16-byte aligned. + // Use CreateAlignedStore with 8-byte alignment to match Arrow's actual alignment. + auto slot_offset = + builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); + builder->CreateAlignedStore(output_value->data(), slot_offset, llvm::MaybeAlign(8)); + } else if (arrow::is_primitive(output_type_id)) { auto slot_offset = builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); builder->CreateStore(output_value->data(), slot_offset); @@ -643,7 +648,12 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueDex& dex) { case arrow::Type::DECIMAL: { auto slot_offset = builder->CreateGEP(types->i128_type(), slot_ref, slot_index); - slot_value = builder->CreateLoad(types->i128_type(), slot_offset, dex.FieldName()); + // Arrow decimal128 data is only 8-byte aligned, not 16-byte aligned. + // Using CreateLoad with default alignment (16 for i128) causes crashes on + // misaligned data. Use CreateAlignedLoad with 8-byte alignment to match Arrow's + // actual alignment. + slot_value = builder->CreateAlignedLoad( + types->i128_type(), slot_offset, llvm::MaybeAlign(8), false, dex.FieldName()); lvalue = generator_->BuildDecimalLValue(slot_value, dex.FieldType()); break; } diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt index 690609146ae..09428a87056 100644 --- a/cpp/src/gandiva/tests/CMakeLists.txt +++ b/cpp/src/gandiva/tests/CMakeLists.txt @@ -21,6 +21,7 @@ add_gandiva_test(projector-test binary_test.cc boolean_expr_test.cc date_time_test.cc + decimal_alignment_test.cc decimal_single_test.cc decimal_test.cc filter_project_test.cc diff --git a/cpp/src/gandiva/tests/decimal_alignment_test.cc b/cpp/src/gandiva/tests/decimal_alignment_test.cc new file mode 100644 index 00000000000..3028ec81f31 --- /dev/null +++ b/cpp/src/gandiva/tests/decimal_alignment_test.cc @@ -0,0 +1,252 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Test for decimal128 alignment issue fix. +// Arrow decimal128 data may be 8-byte aligned but not 16-byte aligned. +// This test verifies that Gandiva handles such data correctly. + +#include + +#include "arrow/array/array_decimal.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" +#include "arrow/util/decimal.h" + +#include "gandiva/decimal_type_util.h" +#include "gandiva/projector.h" +#include "gandiva/tests/test_util.h" +#include "gandiva/tree_expr_builder.h" + +using arrow::Decimal128; + +namespace gandiva { + +class TestDecimalAlignment : public ::testing::Test { + public: + void SetUp() { pool_ = arrow::default_memory_pool(); } + + protected: + arrow::MemoryPool* pool_; +}; + +// Create a decimal128 array with data at a specific alignment offset +// This simulates the real-world scenario where Arrow data from external sources +// (like JNI/Java) may not be 16-byte aligned. +std::shared_ptr MakeMisalignedDecimalArray( + const std::shared_ptr& type, + const std::vector& values, int alignment_offset) { + // Allocate buffer with extra space for misalignment + int64_t data_size = values.size() * 16; // 16 bytes per Decimal128 + int64_t buffer_size = data_size + 16; // Extra space for offset + + std::shared_ptr buffer; + ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer)); + + // Calculate the starting offset to achieve desired alignment + // We want the data to be 8-byte aligned but NOT 16-byte aligned + uint8_t* raw_data = buffer->mutable_data(); + uintptr_t addr = reinterpret_cast(raw_data); + + // Find offset to get to 8-byte aligned but not 16-byte aligned address + int offset_to_8 = (8 - (addr % 8)) % 8; + int current_16_alignment = (addr + offset_to_8) % 16; + + int final_offset; + if (alignment_offset == 8) { + // Want 8-byte aligned but NOT 16-byte aligned + if (current_16_alignment == 0) { + final_offset = offset_to_8 + 8; // Add 8 to break 16-byte alignment + } else { + final_offset = offset_to_8; + } + } else { + // Want 16-byte aligned + final_offset = (16 - (addr % 16)) % 16; + } + + // Copy decimal values to the offset location + uint8_t* data_start = raw_data + final_offset; + for (size_t i = 0; i < values.size(); i++) { + memcpy(data_start + i * 16, values[i].ToBytes().data(), 16); + } + + // Verify alignment + uintptr_t data_addr = reinterpret_cast(data_start); + EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned"; + if (alignment_offset == 8) { + EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned"; + } + + // Create a sliced buffer starting at our offset + auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size); + + // Create validity buffer (all valid) + std::shared_ptr validity_buffer; + ARROW_EXPECT_OK(arrow::AllocateBuffer((values.size() + 7) / 8).Value(&validity_buffer)); + memset(validity_buffer->mutable_data(), 0xFF, validity_buffer->size()); + + // Create the array with our misaligned data buffer + auto array_data = arrow::ArrayData::Make(type, static_cast(values.size()), + {validity_buffer, sliced_buffer}); + + return std::make_shared(array_data); +} + +// Test that decimal operations work correctly with 8-byte aligned (but not 16-byte +// aligned) data +TEST_F(TestDecimalAlignment, TestMisalignedDecimalSubtract) { + constexpr int32_t precision = 38; + constexpr int32_t scale = 17; + auto decimal_type = std::make_shared(precision, scale); + auto field_a = arrow::field("a", decimal_type); + auto field_b = arrow::field("b", decimal_type); + auto schema = arrow::schema({field_a, field_b}); + + Decimal128TypePtr output_type; + auto status = DecimalTypeUtil::GetResultType( + DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type); + ASSERT_OK(status); + + auto res = arrow::field("res", output_type); + auto node_a = TreeExprBuilder::MakeField(field_a); + auto node_b = TreeExprBuilder::MakeField(field_b); + auto subtract = + TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type); + auto expr = TreeExprBuilder::MakeExpression(subtract, res); + + std::shared_ptr projector; + status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + ASSERT_OK(status); + + // Create test data + std::vector values_a = {Decimal128(100), Decimal128(200), Decimal128(300)}; + std::vector values_b = {Decimal128(10), Decimal128(20), Decimal128(30)}; + + // Create arrays with 8-byte alignment (but NOT 16-byte aligned) + auto array_a = MakeMisalignedDecimalArray(decimal_type, values_a, 8); + auto array_b = MakeMisalignedDecimalArray(decimal_type, values_b, 8); + + auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b}); + + // This should NOT crash even with misaligned data + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + ASSERT_OK(status); + + // Verify results: 100-10=90, 200-20=180, 300-30=270 + auto result = std::dynamic_pointer_cast(outputs[0]); + ASSERT_NE(result, nullptr); + EXPECT_EQ(result->length(), 3); +} + +// Create a misaligned output buffer for decimal128 +std::shared_ptr MakeMisalignedDecimalOutput( + const std::shared_ptr& type, int64_t num_records, + int alignment_offset) { + // Allocate data buffer with extra space for misalignment + int64_t data_size = num_records * 16; // 16 bytes per Decimal128 + int64_t buffer_size = data_size + 16; // Extra space for offset + + std::shared_ptr buffer; + ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer)); + + uint8_t* raw_data = const_cast(buffer->data()); + uintptr_t addr = reinterpret_cast(raw_data); + + // Find offset to get to 8-byte aligned but not 16-byte aligned address + int offset_to_8 = (8 - (addr % 8)) % 8; + int current_16_alignment = (addr + offset_to_8) % 16; + + int final_offset; + if (alignment_offset == 8) { + if (current_16_alignment == 0) { + final_offset = offset_to_8 + 8; + } else { + final_offset = offset_to_8; + } + } else { + final_offset = (16 - (addr % 16)) % 16; + } + + // Verify alignment + uintptr_t data_addr = reinterpret_cast(raw_data + final_offset); + EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned"; + if (alignment_offset == 8) { + EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned"; + } + + auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size); + + // Create validity buffer + int64_t bitmap_size = (num_records + 7) / 8; + std::shared_ptr validity_buffer; + ARROW_EXPECT_OK(arrow::AllocateBuffer(bitmap_size).Value(&validity_buffer)); + memset(const_cast(validity_buffer->data()), 0xFF, validity_buffer->size()); + + return arrow::ArrayData::Make(type, num_records, {validity_buffer, sliced_buffer}); +} + +// Test that decimal STORES work correctly with 8-byte aligned (but not 16-byte aligned) +// output +TEST_F(TestDecimalAlignment, TestMisalignedDecimalStore) { + constexpr int32_t precision = 38; + constexpr int32_t scale = 17; + auto decimal_type = std::make_shared(precision, scale); + auto field_a = arrow::field("a", decimal_type); + auto field_b = arrow::field("b", decimal_type); + auto schema = arrow::schema({field_a, field_b}); + + Decimal128TypePtr output_type; + auto status = DecimalTypeUtil::GetResultType( + DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type); + ASSERT_OK(status); + + auto res = arrow::field("res", output_type); + auto node_a = TreeExprBuilder::MakeField(field_a); + auto node_b = TreeExprBuilder::MakeField(field_b); + auto subtract = + TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type); + auto expr = TreeExprBuilder::MakeExpression(subtract, res); + + std::shared_ptr projector; + status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + ASSERT_OK(status); + + // Create ALIGNED input arrays (using standard Arrow allocation) + auto array_a = MakeArrowArrayDecimal( + decimal_type, {Decimal128(100), Decimal128(200), Decimal128(300)}, + {true, true, true}); + auto array_b = MakeArrowArrayDecimal( + decimal_type, {Decimal128(10), Decimal128(20), Decimal128(30)}, {true, true, true}); + + auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b}); + + // Create MISALIGNED output buffer (8-byte aligned but NOT 16-byte aligned) + auto output_data = MakeMisalignedDecimalOutput(output_type, 3, 8); + + // This should NOT crash even with misaligned output buffer + status = projector->Evaluate(*in_batch, {output_data}); + ASSERT_OK(status); + + // Verify the output was written correctly + auto result = std::make_shared(output_data); + EXPECT_EQ(result->length(), 3); +} + +} // namespace gandiva