From 4c117bc04dc4b2b868de8f10aee08659be3399cf Mon Sep 17 00:00:00 2001 From: nuno-faria Date: Mon, 1 Dec 2025 13:59:51 +0000 Subject: [PATCH 1/3] fix: Ensure column names do not change with expand_views_at_output --- datafusion/expr/src/expr_rewriter/mod.rs | 6 +++- datafusion/sqllogictest/test_files/cast.slt | 36 +++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 31759f1cc9cfe..9b62245754c00 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -260,7 +260,11 @@ fn coerce_exprs_for_schema( } #[expect(deprecated)] Expr::Wildcard { .. } => Ok(expr), - _ => expr.cast_to(new_type, src_schema), + _ => { + // maintain the original name when casting + let name = dst_schema.field(idx).name(); + Ok(expr.cast_to(new_type, src_schema)?.alias(name)) + } } } else { Ok(expr) diff --git a/datafusion/sqllogictest/test_files/cast.slt b/datafusion/sqllogictest/test_files/cast.slt index 3466354e54d71..916895b8be1eb 100644 --- a/datafusion/sqllogictest/test_files/cast.slt +++ b/datafusion/sqllogictest/test_files/cast.slt @@ -89,3 +89,39 @@ select * from t0 where v0<1e100; statement ok drop table t0; + + +# ensure that automatically casting with "datafusion.optimizer.expand_views_at_output" does not +# change the column name + +statement ok +create table t(a int, b varchar); + +statement ok +set datafusion.optimizer.expand_views_at_output = true; + +query TT +explain select * from t; +---- +logical_plan +01)Projection: t.a, CAST(t.b AS LargeUtf8) AS b +02)--TableScan: t projection=[a, b] +physical_plan +01)ProjectionExec: expr=[a@0 as a, CAST(b@1 AS LargeUtf8) as b] +02)--DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain select b from t; +---- +logical_plan +01)Projection: CAST(t.b AS LargeUtf8) AS b +02)--TableScan: t projection=[b] +physical_plan +01)ProjectionExec: expr=[CAST(b@0 AS LargeUtf8) as b] +02)--DataSourceExec: partitions=1, partition_sizes=[0] + +statement ok +set datafusion.optimizer.expand_views_at_output = false; + +statement ok +drop table t; From cccf9aa81c33fa1412f795bc27a014d34e2b6598 Mon Sep 17 00:00:00 2001 From: nuno-faria Date: Mon, 1 Dec 2025 14:40:37 +0000 Subject: [PATCH 2/3] Fix type coercion test --- datafusion/optimizer/src/analyzer/type_coercion.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index a557d3356dba0..ba65451f2549b 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1305,7 +1305,7 @@ mod test { true, plan.clone(), @r" - Projection: CAST(a AS LargeUtf8) + Projection: CAST(a AS LargeUtf8) AS a EmptyRelation: rows=0 " )?; @@ -1436,7 +1436,7 @@ mod test { true, plan.clone(), @r" - Projection: CAST(a AS LargeBinary) + Projection: CAST(a AS LargeBinary) AS a EmptyRelation: rows=0 " )?; From 9344f442cbead165c7dba27ad766d7184f52ad3c Mon Sep 17 00:00:00 2001 From: nuno-faria Date: Mon, 1 Dec 2025 15:10:31 +0000 Subject: [PATCH 3/3] Fix more tests --- .../optimizer/src/analyzer/type_coercion.rs | 10 ++-- .../optimizer/tests/optimizer_integration.rs | 54 ++++++++++--------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index ba65451f2549b..85751fd70c05e 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1341,7 +1341,7 @@ mod test { true, plan.clone(), @r" - Projection: CAST(a AS LargeUtf8) + Projection: CAST(a AS LargeUtf8) AS a EmptyRelation: rows=0 " )?; @@ -1371,7 +1371,7 @@ mod test { true, sort_plan.clone(), @r" - Projection: CAST(a AS LargeUtf8) + Projection: CAST(a AS LargeUtf8) AS a Sort: a ASC NULLS FIRST Projection: a EmptyRelation: rows=0 @@ -1400,7 +1400,7 @@ mod test { true, plan.clone(), @r" - Projection: CAST(a AS LargeUtf8) + Projection: CAST(a AS LargeUtf8) AS a Sort: a ASC NULLS FIRST Projection: a EmptyRelation: rows=0 @@ -1493,7 +1493,7 @@ mod test { true, sort_plan.clone(), @r" - Projection: CAST(a AS LargeBinary) + Projection: CAST(a AS LargeBinary) AS a Sort: a ASC NULLS FIRST Projection: a EmptyRelation: rows=0 @@ -1524,7 +1524,7 @@ mod test { true, plan.clone(), @r" - Projection: CAST(a AS LargeBinary) + Projection: CAST(a AS LargeBinary) AS a Sort: a ASC NULLS FIRST Projection: a EmptyRelation: rows=0 diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index c0f48b8ebfc40..6576298e22e42 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -536,14 +536,15 @@ fn recursive_cte_projection_pushdown() -> Result<()> { // columns from the base table and recursive table, eliminating unused columns assert_snapshot!( format!("{plan}"), - @r#"SubqueryAlias: nodes - RecursiveQuery: is_distinct=false - Projection: test.col_int32 AS id - TableScan: test projection=[col_int32] - Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) - Filter: nodes.id < Int32(3) - TableScan: nodes projection=[id] -"# + @r" + SubqueryAlias: nodes + RecursiveQuery: is_distinct=false + Projection: test.col_int32 AS id + TableScan: test projection=[col_int32] + Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id + Filter: nodes.id < Int32(3) + TableScan: nodes projection=[id] + " ); Ok(()) } @@ -559,14 +560,16 @@ fn recursive_cte_with_aliased_self_reference() -> Result<()> { assert_snapshot!( format!("{plan}"), - @r#"SubqueryAlias: nodes - RecursiveQuery: is_distinct=false - Projection: test.col_int32 AS id - TableScan: test projection=[col_int32] - Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) - SubqueryAlias: child - Filter: nodes.id < Int32(3) - TableScan: nodes projection=[id]"#, + @r" + SubqueryAlias: nodes + RecursiveQuery: is_distinct=false + Projection: test.col_int32 AS id + TableScan: test projection=[col_int32] + Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id + SubqueryAlias: child + Filter: nodes.id < Int32(3) + TableScan: nodes projection=[id] + ", ); Ok(()) } @@ -618,15 +621,16 @@ fn recursive_cte_projection_pushdown_baseline() -> Result<()> { // and only the needed column is selected from the recursive table assert_snapshot!( format!("{plan}"), - @r#"SubqueryAlias: countdown - RecursiveQuery: is_distinct=false - Projection: test.col_int32 AS n - Filter: test.col_int32 = Int32(5) - TableScan: test projection=[col_int32] - Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) - Filter: countdown.n > Int32(1) - TableScan: countdown projection=[n] -"# + @r" + SubqueryAlias: countdown + RecursiveQuery: is_distinct=false + Projection: test.col_int32 AS n + Filter: test.col_int32 = Int32(5) + TableScan: test projection=[col_int32] + Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n + Filter: countdown.n > Int32(1) + TableScan: countdown projection=[n] + " ); Ok(()) }