From 972d63276a87433aa7eb3ef395c969502b402192 Mon Sep 17 00:00:00 2001 From: shee <13843187+qzsee@users.noreply.github.com> Date: Thu, 28 May 2026 15:22:44 +0800 Subject: [PATCH] [BUG](exec) fix coalesce function output null (#63092) ### What problem does this PR solve? Issue Number: close #xxx Example: COALESCE(same_department_income_amount, 0) ==> outputs NULL (where same_department_income_amount is of type double). When assigning the value to the result column in the computation, the assignment is done unconditionally (forced), as in: ```cpp result_raw_data[row] += column_raw_data[row] * typename ColumnType::value_type(!(null_map_data[row] | filled_flag[row])); ``` If the argument column column_raw_data's null_map[row] is 1, then the value stored in column_raw_data[row] is garbage data. This garbage may contain values such as NaN. If a preceding argument of COALESCE happens to be assigned NaN, then during subsequent assignments we run into cases like: 0 * NaN = NaN num + NaN = NaN so the assigned result also becomes NaN, which causes value pollution. By rights the final output should also be NaN, but what is actually returned is NULL. The reason is that during result serialization/output, NaN values are emitted as NULL. ```cpp tatus DataTypeNumberSerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int row_idx, bool col_const, const FormatOptions& options) const { //... else if constexpr (std::is_same_v) { if (std::isnan(data[col_index])) { // Handle NaN for float, we should push null value buf_ret = result.push_null(); } else { buf_ret = result.push_float(data[col_index]); } } //... } ``` Co-authored-by: garenshi --- be/src/vec/exprs/vcondition_expr.cpp | 6 + be/test/exprs/vcondition_expr_test.cpp | 358 +++++++++++++++++++++++++ 2 files changed, 364 insertions(+) create mode 100644 be/test/exprs/vcondition_expr_test.cpp diff --git a/be/src/vec/exprs/vcondition_expr.cpp b/be/src/vec/exprs/vcondition_expr.cpp index fca4b0876e284c..646e83e4e4a0c7 100644 --- a/be/src/vec/exprs/vcondition_expr.cpp +++ b/be/src/vec/exprs/vcondition_expr.cpp @@ -583,6 +583,12 @@ void insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_col binary_cast(result_raw_data[row]) + binary_cast(column_raw_data[row]) * int64_t(!(null_map_data[row] | filled_flag[row]))); + } else if constexpr (std::is_same_v || + std::is_same_v) { + auto flag = (!(null_map_data[row] | filled_flag[row])); + result_raw_data[row] += std::isfinite(column_raw_data[row]) + ? column_raw_data[row] * flag + : (flag ? column_raw_data[row] : 0); } else { result_raw_data[row] += column_raw_data[row] * diff --git a/be/test/exprs/vcondition_expr_test.cpp b/be/test/exprs/vcondition_expr_test.cpp new file mode 100644 index 00000000000000..b26f01cc2664c5 --- /dev/null +++ b/be/test/exprs/vcondition_expr_test.cpp @@ -0,0 +1,358 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/vcondition_expr.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" + +namespace doris { + +// Build a minimal TExprNode as the input of VectorizedCoalesceExpr. +// Only fields required by the VExpr base ctor (so that create_data_type works) are set. +static TExprNode make_coalesce_node(TPrimitiveType::type ptype, bool is_nullable) { + TExprNode node; + node.node_type = TExprNodeType::FUNCTION_CALL; + node.num_children = 0; + node.__set_is_nullable(is_nullable); + + TTypeDesc type_desc; + TTypeNode type_node; + type_node.type = TTypeNodeType::SCALAR; + TScalarType scalar_type; + scalar_type.__set_type(ptype); + type_node.__set_scalar_type(scalar_type); + type_desc.types.push_back(type_node); + node.__set_type(type_desc); + + TFunction fn; + TFunctionName fn_name; + fn_name.function_name = "coalesce"; + fn.name = fn_name; + node.__set_fn(fn); + + return node; +} + +// Mock child expression: returns the pre-injected ColumnPtr / DataTypePtr to the parent expr. +// Behavior is modeled after MockVExprForTryCast in try_cast_expr_test.cpp. +class MockChildVExpr : public VExpr { +public: + MockChildVExpr(ColumnPtr column, DataTypePtr type) + : _column(std::move(column)), _type(std::move(type)) {} + + MOCK_CONST_METHOD0(clone, VExprSPtr()); + + const std::string& expr_name() const override { return _name; } + + Status execute(VExprContext* context, Block* block, int* result_column_id) const override { + block->insert({_column, _type, "mock_child"}); + *result_column_id = static_cast(block->columns()) - 1; + return Status::OK(); + } + + Status execute_column_impl(VExprContext* /*context*/, const Block* /*block*/, + const Selector* /*selector*/, size_t /*count*/, + ColumnPtr& result_column) const override { + result_column = _column; + return Status::OK(); + } + + DataTypePtr execute_type(const Block* /*block*/) const override { return _type; } + +private: + ColumnPtr _column; + DataTypePtr _type; + std::string _name = "mock_child"; +}; + +// Helper: build a nullable Float64 column from a list of (value, is_null) pairs. +static ColumnPtr make_nullable_float64_column(const std::vector>& values) { + auto nested = ColumnFloat64::create(); + auto null_map = ColumnUInt8::create(); + for (auto& [v, is_null] : values) { + nested->insert_value(v); + null_map->insert_value(is_null ? 1 : 0); + } + return ColumnNullable::create(std::move(nested), std::move(null_map)); +} + +// Helper: build a non-nullable Float64 column from a list of doubles. +static ColumnPtr make_float64_column(const std::vector& values) { + auto col = ColumnFloat64::create(); + for (auto v : values) { + col->insert_value(v); + } + return col; +} + +// Helper: extract the Float64 value at `row` from the result column, +// handling both nullable and non-nullable cases. +static double get_float64_value(const ColumnPtr& column, size_t row, bool* is_null = nullptr) { + if (const auto* nullable = check_and_get_column(column.get())) { + if (is_null) { + *is_null = nullable->is_null_at(row); + } + const auto& nested = + assert_cast(nullable->get_nested_column()).get_data(); + return nested[row]; + } + if (is_null) { + *is_null = false; + } + return assert_cast(column.get())->get_data()[row]; +} + +class VConditionExprCoalesceTest : public ::testing::Test {}; + +// Targets the fix: after some rows of col0 are filled, NaN/Inf values in the +// same rows of later columns must not pollute the already-filled result via +// "value * 0" arithmetic. +// Input: +// col0 (nullable, double): [1.0, NULL] +// col1 (non-nullable, double): [NaN, 100.0] +// Expected coalesce result: [1.0, 100.0] +TEST_F(VConditionExprCoalesceTest, Float64_NaN_NotPolluteResult) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + + // _data_type is already set to Nullable(Float64) by the base ctor; reassert it explicitly + // through the public data_type() accessor (the underlying field is protected). + coalesce_expr->data_type() = + std::make_shared(std::make_shared()); + + const double kNaN = std::numeric_limits::quiet_NaN(); + auto col0 = make_nullable_float64_column({{1.0, false}, {0.0, true}}); + auto col1 = make_float64_column({kNaN, 100.0}); + + auto child0 = std::make_shared( + col0, std::make_shared(std::make_shared())); + auto child1 = std::make_shared(col1, std::make_shared()); + coalesce_expr->add_child(child0); + coalesce_expr->add_child(child1); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_TRUE(result.get() != nullptr); + ASSERT_EQ(result->size(), 2); + + bool is_null = false; + auto v0 = get_float64_value(result, 0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0; + EXPECT_DOUBLE_EQ(v0, 1.0); + + auto v1 = get_float64_value(result, 1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_DOUBLE_EQ(v1, 100.0); +} + +// Same shape as above, but the polluting value in the later column is +Inf: +// col0 (nullable, double): [2.0, NULL] +// col1 (non-nullable, double): [+Inf, 200.0] +// Expected coalesce result: [2.0, 200.0] +TEST_F(VConditionExprCoalesceTest, Float64_Inf_NotPolluteResult) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->data_type() = + std::make_shared(std::make_shared()); + + const double kInf = std::numeric_limits::infinity(); + auto col0 = make_nullable_float64_column({{2.0, false}, {0.0, true}}); + auto col1 = make_float64_column({kInf, 200.0}); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + bool is_null = false; + auto v0 = get_float64_value(result, 0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by Inf*0, got " << v0; + EXPECT_TRUE(std::isfinite(v0)); + EXPECT_DOUBLE_EQ(v0, 2.0); + + auto v1 = get_float64_value(result, 1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_DOUBLE_EQ(v1, 200.0); +} + +// Verify that when NaN itself is the valid non-null value picked by coalesce, +// the result must preserve the NaN (the fix should not over-correct). +// col0 (nullable, double): [NaN, NULL] +// col1 (non-nullable, double): [10.0, 20.0] +// Expected: row0 takes col0's NaN (non-null, value is NaN); row1 takes col1's 20.0. +TEST_F(VConditionExprCoalesceTest, Float64_NaN_PreservedWhenSelected) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->data_type() = + std::make_shared(std::make_shared()); + + const double kNaN = std::numeric_limits::quiet_NaN(); + auto col0 = make_nullable_float64_column({{kNaN, false}, {0.0, true}}); + auto col1 = make_float64_column({10.0, 20.0}); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + bool is_null = false; + auto v0 = get_float64_value(result, 0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_TRUE(std::isnan(v0)) << "row0 should preserve NaN from col0"; + + auto v1 = get_float64_value(result, 1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_DOUBLE_EQ(v1, 20.0); +} + +// Float32 variant: same shape as the Float64 NaN case. +TEST_F(VConditionExprCoalesceTest, Float32_NaN_NotPolluteResult) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::FLOAT, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->data_type() = + std::make_shared(std::make_shared()); + + const float kNaN = std::numeric_limits::quiet_NaN(); + + auto nested0 = ColumnFloat32::create(); + nested0->insert_value(1.5f); + nested0->insert_value(0.0f); + auto null_map0 = ColumnUInt8::create(); + null_map0->insert_value(0); + null_map0->insert_value(1); + ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0)); + + auto nested1 = ColumnFloat32::create(); + nested1->insert_value(kNaN); + nested1->insert_value(7.5f); + ColumnPtr col1 = std::move(nested1); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + auto get_f32 = [&](size_t row, bool* is_null) -> float { + if (const auto* nullable = check_and_get_column(result.get())) { + *is_null = nullable->is_null_at(row); + return assert_cast(nullable->get_nested_column()).get_data()[row]; + } + *is_null = false; + return assert_cast(result.get())->get_data()[row]; + }; + + bool is_null = false; + float v0 = get_f32(0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0; + EXPECT_FLOAT_EQ(v0, 1.5f); + + float v1 = get_f32(1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FLOAT_EQ(v1, 7.5f); +} + +// Regression: verify that the non-floating (integer) path is unaffected by the Float branch change. +TEST_F(VConditionExprCoalesceTest, Int32_NormalPathStillWorks) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::INT, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->data_type() = + std::make_shared(std::make_shared()); + + auto nested0 = ColumnInt32::create(); + nested0->insert_value(11); + nested0->insert_value(0); + auto null_map0 = ColumnUInt8::create(); + null_map0->insert_value(0); + null_map0->insert_value(1); + ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0)); + + auto nested1 = ColumnInt32::create(); + nested1->insert_value(99); + nested1->insert_value(22); + ColumnPtr col1 = std::move(nested1); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + auto get_int = [&](size_t row, bool* is_null) -> int32_t { + if (const auto* nullable = check_and_get_column(result.get())) { + *is_null = nullable->is_null_at(row); + return assert_cast(nullable->get_nested_column()).get_data()[row]; + } + *is_null = false; + return assert_cast(result.get())->get_data()[row]; + }; + + bool is_null = false; + EXPECT_EQ(get_int(0, &is_null), 11); + EXPECT_FALSE(is_null); + EXPECT_EQ(get_int(1, &is_null), 22); + EXPECT_FALSE(is_null); +} + +} // namespace doris