From 9ea2b4c257a9389a0ba087dc02b5d0776aaff77c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 8 May 2026 11:22:11 -0700 Subject: [PATCH 1/3] Use leastRestrictive for mvappend element-type widening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `MVAppendFunctionImpl.updateMostGeneralType` used strict {@code Object.equals} to compare each operand's component type against the running "most general" type, falling back to Calcite's {@code ANY} on any mismatch. That's too aggressive: {@code Object.equals} returns false for type pairs that differ only in nullability tag (e.g. {@code array(1, 2)} synthesizes INTEGER NULLABLE for its component while literal {@code 3} is INTEGER NOT NULL), and for straightforwardly-widenable numerics like INTEGER + DOUBLE. The PPL UDF result would then be {@code ARRAY}. The Calcite engine's enumerable runtime tolerates {@code ANY} because {@code MVAppendImplementor.eval} processes elements through {@code Object} — the declared element type is unused at execution time. The analytics-engine route is stricter: substrait can't serialize {@code ANY}, so isthmus throws {@code UnsupportedOperationException: Unable to convert the type ANY} during the substrait conversion phase. Widen with {@link RelDataTypeFactory#leastRestrictive} — the same routine {@code SqlLibraryOperators.ARRAY} uses for its return-type inference. Falls back to ANY only when {@code leastRestrictive} returns null (genuinely incompatible operand types like INT + VARCHAR), preserving the original behavior on those queries. # Test plan * {@code :core:test --tests "*MVAppend*"} — passes (no existing test asserted on the {@code ANY} fallback). * Companion to opensearch-project/OpenSearch#21554 — unblocks 8+ tests in {@code CalciteMVAppendFunctionIT} force-routed through the analytics-engine path that previously failed with "Unable to convert the type ANY". Signed-off-by: Kai Huang --- .../CollectionUDF/MVAppendFunctionImpl.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java index 107df5eea4e..921e60c6105 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java @@ -78,12 +78,18 @@ private static RelDataType updateMostGeneralType( if (current == null) { return candidate; } - - if (!current.equals(candidate)) { - return typeFactory.createSqlType(SqlTypeName.ANY); - } else { - return current; - } + // Widen via Calcite's leastRestrictive (the same routine SqlLibraryOperators.ARRAY uses + // for its return-type inference) instead of strict Object.equals. {@code Object.equals} + // returns false for type pairs that only differ in nullability tag — e.g. {@code array(1, 2)} + // synthesizes INTEGER NULLABLE for its component while a literal {@code 3} is INTEGER NOT + // NULL — which then fell into the {@code ANY} fallback. Calcite's {@code ANY} type isn't + // substrait-serializable, so any analytics-engine query passing through {@code mvappend} + // with mixed-nullability operands or with widenable numerics (INT + DOUBLE) would fail + // substrait conversion with "Unable to convert the type ANY". {@code leastRestrictive} + // returns null when no common type exists (genuinely incompatible types like INT + VARCHAR); + // fall back to ANY in that case to preserve the original behavior on those queries. + RelDataType least = typeFactory.leastRestrictive(java.util.List.of(current, candidate)); + return least != null ? least : typeFactory.createSqlType(SqlTypeName.ANY); } public static class MVAppendImplementor implements NotNullImplementor { From c691e6092908b89c39cfaa12050d027696ec12fc Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 8 May 2026 12:22:49 -0700 Subject: [PATCH 2/3] Restrict mvappend widening to nullability-only mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling Calcite's leastRestrictive widens mixed numerics like INT + DECIMAL to a single common numeric type (e.g. DECIMAL(11,1)). The Calcite engine then casts each operand to that type at codegen — Integer(1) becomes BigDecimal with scale 1, which renders as 0.1 (or 0 after JSON round-trip), breaking testMvappendWithIntAndDouble that expects mvappend(1, 2.5) to return [1, 2.5]. The original goal was just to bridge the nullability-tag mismatch that synthesizes an array's component as INTEGER NULLABLE versus a bare literal's INTEGER NOT NULL. Limit the widening to that case via equalSansNullability and keep the ANY fallback for genuinely different types — preserving the Calcite engine's heterogeneous-Object[] runtime semantics that pre-existing tests rely on. Signed-off-by: Kai Huang Signed-off-by: Kai Huang --- .../CollectionUDF/MVAppendFunctionImpl.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java index 921e60c6105..0797c8b0e52 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java @@ -20,6 +20,7 @@ import org.apache.calcite.sql.SqlOperatorBinding; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -78,18 +79,23 @@ private static RelDataType updateMostGeneralType( if (current == null) { return candidate; } - // Widen via Calcite's leastRestrictive (the same routine SqlLibraryOperators.ARRAY uses - // for its return-type inference) instead of strict Object.equals. {@code Object.equals} - // returns false for type pairs that only differ in nullability tag — e.g. {@code array(1, 2)} - // synthesizes INTEGER NULLABLE for its component while a literal {@code 3} is INTEGER NOT - // NULL — which then fell into the {@code ANY} fallback. Calcite's {@code ANY} type isn't - // substrait-serializable, so any analytics-engine query passing through {@code mvappend} - // with mixed-nullability operands or with widenable numerics (INT + DOUBLE) would fail - // substrait conversion with "Unable to convert the type ANY". {@code leastRestrictive} - // returns null when no common type exists (genuinely incompatible types like INT + VARCHAR); - // fall back to ANY in that case to preserve the original behavior on those queries. - RelDataType least = typeFactory.leastRestrictive(java.util.List.of(current, candidate)); - return least != null ? least : typeFactory.createSqlType(SqlTypeName.ANY); + if (current.equals(candidate)) { + return current; + } + // Tolerate a nullability-only mismatch by widening to the nullable form. {@code Object.equals} + // distinguishes INTEGER NULLABLE from INTEGER NOT NULL — e.g. {@code array(1, 2)} synthesizes + // an INTEGER NULLABLE component while a bare literal {@code 3} is INTEGER NOT NULL — which + // would otherwise fall into the {@code ANY} fallback. Calcite's {@code ANY} type isn't + // substrait-serializable, so any analytics-engine query passing through {@code mvappend} with + // mixed-nullability operands would fail substrait conversion with "Unable to convert the type + // ANY". For genuinely different types (INT + DOUBLE, INT + VARCHAR, …) keep the {@code ANY} + // fallback unchanged: the Calcite engine relies on {@code Object[]} runtime semantics to + // preserve heterogeneous values as-is, and widening to a single numeric type would corrupt + // values at codegen time (e.g. casting integer 1 to {@code DECIMAL(11,1)}). + if (SqlTypeUtil.equalSansNullability(typeFactory, current, candidate)) { + return typeFactory.createTypeWithNullability(current, true); + } + return typeFactory.createSqlType(SqlTypeName.ANY); } public static class MVAppendImplementor implements NotNullImplementor { From 1906994c6ec5b808e25cc4fc82c3775ef4a7eb2e Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 8 May 2026 13:52:31 -0700 Subject: [PATCH 3/3] Widen mvappend element type via leastRestrictive + pre-cast operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous nullability-only bridge fixed `array(1, 2) + literal 3` but left `mvappend(1, 2.5)` falling back to ARRAY. ARRAY is not substrait-serializable, so any analytics-engine query through that call fails at substrait conversion. Aggressive `leastRestrictive` widening was the obvious next step but earlier triggered a runtime corruption — Integer 1 showed up as 0 in the response — because the Avatica result-set's ArrayAccessor uses element-type-specific accessors (e.g. `DoubleAccessor.getDouble` does `(Double) value`), and an Integer cell in a declared-DOUBLE list triggered a ClassCastException that the error path masked as `[0, 2.5]`. Fix the corruption by pre-casting each scalar operand to the call's element Java class in `MVAppendImplementor` via `EnumUtils.convert`. The result list is now homogeneously typed at codegen, so Avatica's per-element cast succeeds. Promote DECIMAL → DOUBLE on the way through `updateMostGeneralType` because `RowResponseCodec` maps DECIMAL cells to FloatingPoint(DOUBLE) anyway; an explicit DECIMAL element type triggers Calcite's element coercion to BigDecimal, which the JSON formatter renders inconsistently across paths. For genuinely incompatible operand pairs (INT + VARCHAR, …) `leastRestrictive` returns null and the existing `ANY` fallback stands — heterogeneous mvappend output stays on the Calcite engine path; only the analytics-engine route can't emit substrait for those. Local verification: - :core:test --tests *MVAppend* — green - :integ-test:integTest --tests CalciteMVAppendFunctionIT — 15/15 - :integ-test:integTest --tests CalciteArrayFunctionIT — 60/60 Signed-off-by: Kai Huang Signed-off-by: Kai Huang --- .../CollectionUDF/MVAppendFunctionImpl.java | 70 +++++++++++++++---- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java index 0797c8b0e52..bafbeb09c43 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java @@ -7,7 +7,10 @@ import static org.apache.calcite.sql.type.SqlTypeUtil.createArrayType; +import java.math.BigDecimal; +import java.util.ArrayList; import java.util.List; +import org.apache.calcite.adapter.enumerable.EnumUtils; import org.apache.calcite.adapter.enumerable.NotNullImplementor; import org.apache.calcite.adapter.enumerable.NullPolicy; import org.apache.calcite.adapter.enumerable.RexToLixTranslator; @@ -20,7 +23,6 @@ import org.apache.calcite.sql.SqlOperatorBinding; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql.type.SqlTypeUtil; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -82,33 +84,71 @@ private static RelDataType updateMostGeneralType( if (current.equals(candidate)) { return current; } - // Tolerate a nullability-only mismatch by widening to the nullable form. {@code Object.equals} - // distinguishes INTEGER NULLABLE from INTEGER NOT NULL — e.g. {@code array(1, 2)} synthesizes - // an INTEGER NULLABLE component while a bare literal {@code 3} is INTEGER NOT NULL — which - // would otherwise fall into the {@code ANY} fallback. Calcite's {@code ANY} type isn't - // substrait-serializable, so any analytics-engine query passing through {@code mvappend} with - // mixed-nullability operands would fail substrait conversion with "Unable to convert the type - // ANY". For genuinely different types (INT + DOUBLE, INT + VARCHAR, …) keep the {@code ANY} - // fallback unchanged: the Calcite engine relies on {@code Object[]} runtime semantics to - // preserve heterogeneous values as-is, and widening to a single numeric type would corrupt - // values at codegen time (e.g. casting integer 1 to {@code DECIMAL(11,1)}). - if (SqlTypeUtil.equalSansNullability(typeFactory, current, candidate)) { - return typeFactory.createTypeWithNullability(current, true); + // Widen via Calcite's {@code leastRestrictive} — the same routine + // {@code SqlLibraryOperators.ARRAY} uses for its return-type inference. For genuinely + // incompatible operand types (INT + VARCHAR, …) it returns null; fall back to {@code ANY} + // there to preserve the in-process Calcite engine's {@code Object[]} runtime semantics + // that pre-existing tests rely on. Promote DECIMAL → DOUBLE on the way through: the row + // codec on the analytics-engine route maps DECIMAL cells to {@code FloatingPoint(DOUBLE)} + // anyway, and an explicit DECIMAL element type triggers Calcite's element coercion to + // BigDecimal, which downstream Avatica array accessors and the JSON formatter render + // inconsistently across paths. + RelDataType least = typeFactory.leastRestrictive(java.util.List.of(current, candidate)); + if (least == null) { + return typeFactory.createSqlType(SqlTypeName.ANY); } - return typeFactory.createSqlType(SqlTypeName.ANY); + if (least.getSqlTypeName() == SqlTypeName.DECIMAL) { + return typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.DOUBLE), true); + } + return least; } public static class MVAppendImplementor implements NotNullImplementor { @Override public Expression implement( RexToLixTranslator translator, RexCall call, List translatedOperands) { + // Pre-cast each scalar operand to the call's element Java class so the result list is + // homogeneously typed. Avatica's {@code AbstractCursor.ArrayAccessor} dispatches the + // per-element accessor by the declared SQL type — e.g. {@code DoubleAccessor.getDouble} + // does {@code (Double) value} — and would throw a runtime ClassCastException on an + // {@code Integer} cell when the call's element type widens to DOUBLE. Array operands + // pass through; their element-type alignment is the planner's responsibility. + RelDataType elementType = call.getType().getComponentType(); + Class elementClass = + elementType == null ? Object.class : boxedJavaClass(elementType.getSqlTypeName()); + List coerced = new ArrayList<>(translatedOperands.size()); + for (int i = 0; i < translatedOperands.size(); i++) { + Expression op = translatedOperands.get(i); + RelDataType opType = call.getOperands().get(i).getType(); + if (opType.getComponentType() != null || elementClass == Object.class) { + coerced.add(op); + } else { + coerced.add(EnumUtils.convert(op, elementClass)); + } + } return Expressions.call( Types.lookupMethod(MVAppendFunctionImpl.class, "mvappend", Object[].class), - Expressions.newArrayInit(Object.class, translatedOperands)); + Expressions.newArrayInit(Object.class, coerced)); } } public static Object mvappend(Object... args) { return MVAppendCore.collectElements(args); } + + private static Class boxedJavaClass(SqlTypeName sqlType) { + return switch (sqlType) { + case BOOLEAN -> Boolean.class; + case TINYINT -> Byte.class; + case SMALLINT -> Short.class; + case INTEGER -> Integer.class; + case BIGINT -> Long.class; + case FLOAT, REAL -> Float.class; + case DOUBLE -> Double.class; + case DECIMAL -> BigDecimal.class; + case CHAR, VARCHAR -> String.class; + default -> Object.class; + }; + } }