From 632671a40a3ea4c22ee6a48aa703e7e04b6dbebc Mon Sep 17 00:00:00 2001 From: nuno-faria Date: Wed, 4 Feb 2026 08:51:51 +0000 Subject: [PATCH 1/2] fix: Ensure columns are casted to the correct names with Unions --- datafusion/expr/src/expr_rewriter/mod.rs | 11 ++++++++--- datafusion/optimizer/tests/optimizer_integration.rs | 6 +++--- datafusion/substrait/tests/cases/logical_plans.rs | 2 -- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index a0faca76e91e4..51d5ce2e1223a 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -261,9 +261,14 @@ fn coerce_exprs_for_schema( #[expect(deprecated)] Expr::Wildcard { .. } => Ok(expr), _ => { - // maintain the original name when casting - let name = dst_schema.field(idx).name(); - Ok(expr.cast_to(new_type, src_schema)?.alias(name)) + match expr { + // maintain the original name when casting a column + Expr::Column(ref column) => { + let name = column.name().to_owned(); + Ok(expr.cast_to(new_type, src_schema)?.alias(name)) + } + _ => Ok(expr.cast_to(new_type, src_schema)?), + } } } } else { diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 36a6df54ddaf0..fd4991c24413f 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -543,7 +543,7 @@ fn recursive_cte_projection_pushdown() -> Result<()> { RecursiveQuery: is_distinct=false Projection: test.col_int32 AS id TableScan: test projection=[col_int32] - Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id + Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) Filter: nodes.id < Int32(3) TableScan: nodes projection=[id] " @@ -567,7 +567,7 @@ fn recursive_cte_with_aliased_self_reference() -> Result<()> { RecursiveQuery: is_distinct=false Projection: test.col_int32 AS id TableScan: test projection=[col_int32] - Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id + Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) SubqueryAlias: child Filter: nodes.id < Int32(3) TableScan: nodes projection=[id] @@ -630,7 +630,7 @@ fn recursive_cte_projection_pushdown_baseline() -> Result<()> { Projection: test.col_int32 AS n Filter: test.col_int32 = Int32(5) TableScan: test projection=[col_int32] - Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n + Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) Filter: countdown.n > Int32(1) TableScan: countdown projection=[n] " diff --git a/datafusion/substrait/tests/cases/logical_plans.rs b/datafusion/substrait/tests/cases/logical_plans.rs index 115c5984301d7..79a19dc92b452 100644 --- a/datafusion/substrait/tests/cases/logical_plans.rs +++ b/datafusion/substrait/tests/cases/logical_plans.rs @@ -231,8 +231,6 @@ mod tests { } #[tokio::test] - // Test still failing, issue tracked in "https://github.com/apache/datafusion/issues/20123". - #[ignore] async fn duplicate_name_in_union() -> Result<()> { let proto_plan = read_json("tests/testdata/test_plans/duplicate_name_in_union.substrait.json"); From 5d1ca1f222b9dfeb8594a61349f7c5629b528f2d Mon Sep 17 00:00:00 2001 From: nuno-faria Date: Thu, 5 Feb 2026 08:30:23 +0000 Subject: [PATCH 2/2] Add context about aliasing casted columns --- datafusion/expr/src/expr_rewriter/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 51d5ce2e1223a..32a88ab8cf310 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -262,7 +262,9 @@ fn coerce_exprs_for_schema( Expr::Wildcard { .. } => Ok(expr), _ => { match expr { - // maintain the original name when casting a column + // maintain the original name when casting a column, to avoid the + // tablename being added to it when not explicitly set by the query + // (see: https://github.com/apache/datafusion/issues/18818) Expr::Column(ref column) => { let name = column.name().to_owned(); Ok(expr.cast_to(new_type, src_schema)?.alias(name))