From e7060bf2aa883fc6d4d1b28ad50c05b6fc0733de Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 3 Feb 2026 13:34:09 +0000 Subject: [PATCH 1/6] Some stuff Signed-off-by: Adam Gutglick --- Cargo.lock | 139 ++++++++++++++++++++++++++++-- Cargo.toml | 2 + vortex-sqllogictest/Cargo.toml | 29 +++++++ vortex-sqllogictest/src/duckdb.rs | 39 +++++++++ vortex-sqllogictest/src/error.rs | 20 +++++ vortex-sqllogictest/src/lib.rs | 2 + vortex-sqllogictest/src/main.rs | 3 + 7 files changed, 225 insertions(+), 9 deletions(-) create mode 100644 vortex-sqllogictest/Cargo.toml create mode 100644 vortex-sqllogictest/src/duckdb.rs create mode 100644 vortex-sqllogictest/src/error.rs create mode 100644 vortex-sqllogictest/src/lib.rs create mode 100644 vortex-sqllogictest/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index d44bf39ab22..60404bcb4c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1815,7 +1815,7 @@ checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -1939,7 +1939,7 @@ checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ "strum 0.26.3", "strum_macros 0.26.4", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -2032,7 +2032,7 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width", + "unicode-width 0.2.2", "windows-sys 0.61.2", ] @@ -3887,6 +3887,18 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "either" version = "1.15.0" @@ -3928,6 +3940,26 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "env_filter" version = "0.1.4" @@ -3973,6 +4005,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "ethnum" version = "1.5.2" @@ -4201,6 +4239,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "3.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf68cef89750956493a66a10f512b9e58d9db21f2a573c079c0bdf1207a54a7" +dependencies = [ + "autocfg", +] + [[package]] name = "fs4" version = "0.8.4" @@ -4918,7 +4965,7 @@ dependencies = [ "console 0.16.2", "futures-core", "portable-atomic", - "unicode-width", + "unicode-width 0.2.2", "unit-prefix", "web-time", ] @@ -5864,6 +5911,18 @@ dependencies = [ "redox_syscall 0.7.0", ] +[[package]] +name = "libtest-mimic" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +dependencies = [ + "anstream", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "line-clipping" version = "0.3.5" @@ -6860,6 +6919,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "owo-colors" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" + [[package]] name = "papergrid" version = "0.17.0" @@ -6868,7 +6933,7 @@ checksum = "6978128c8b51d8f4080631ceb2302ab51e32cc6e8615f735ee2f83fd269ae3f1" dependencies = [ "bytecount", "fnv", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -7978,7 +8043,7 @@ dependencies = [ "thiserror 2.0.18", "unicode-segmentation", "unicode-truncate", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -8029,7 +8094,7 @@ dependencies = [ "strum 0.27.2", "time", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -8975,6 +9040,31 @@ dependencies = [ "der", ] +[[package]] +name = "sqllogictest" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dffbf03091090a9330529c3926313be0a0570f036edfd490b11db39eea4b7118" +dependencies = [ + "async-trait", + "educe", + "fs-err", + "futures", + "glob", + "humantime", + "itertools 0.13.0", + "libtest-mimic", + "md-5", + "owo-colors", + "rand 0.8.5", + "regex", + "similar", + "subst", + "tempfile", + "thiserror 2.0.18", + "tracing", +] + [[package]] name = "sqlparser" version = "0.58.0" @@ -9089,6 +9179,16 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "subst" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a9a86e5144f63c2d18334698269a8bfae6eece345c70b64821ea5b35054ec99" +dependencies = [ + "memchr", + "unicode-width 0.1.14", +] + [[package]] name = "subtle" version = "2.6.1" @@ -9523,7 +9623,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8daae29995a24f65619e19d8d31dea5b389f3d853d8bf297bbf607cd0014cc" dependencies = [ - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -10047,9 +10147,15 @@ checksum = "16b380a1238663e5f8a691f9039c73e1cdae598a30e9855f541d29b08b53e9a5" dependencies = [ "itertools 0.14.0", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-width" version = "0.2.2" @@ -11111,6 +11217,21 @@ dependencies = [ "vortex-session", ] +[[package]] +name = "vortex-sqllogictest" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "datafusion 52.1.0", + "sqllogictest", + "thiserror 2.0.18", + "tokio", + "vortex", + "vortex-datafusion", + "vortex-duckdb", +] + [[package]] name = "vortex-test-e2e" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index ba9a0268b87..f56511ffdc2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ members = [ "benchmarks/datafusion-bench", "benchmarks/duckdb-bench", "benchmarks/random-access-bench", + "vortex-sqllogictest", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" @@ -134,6 +135,7 @@ datafusion-physical-expr-adapter = { version = "52" } datafusion-physical-expr-common = { version = "52" } datafusion-physical-plan = { version = "52" } datafusion-pruning = { version = "52" } +datafusion-sqllogictest = { version = "52" } dirs = "6.0.0" divan = { package = "codspeed-divan-compat", version = "4.0.4" } enum-iterator = "2.0.0" diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml new file mode 100644 index 00000000000..7ddf64ad2db --- /dev/null +++ b/vortex-sqllogictest/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "vortex-sqllogictest" +authors = { workspace = true } +description = "Test runner for SQL integrations" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +publish = false +readme = "README.md" +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[dependencies] +anyhow = { workspace = true } +async-trait = { workspace = true } +datafusion = { workspace = true } +datafusion-sqllogictest = { workspace = true } +sqllogictest = "0.29" +thiserror = { workspace = true } +tokio = { workspace = true, features = ["full"] } +vortex = { workspace = true } +vortex-datafusion = { workspace = true } +vortex-duckdb = { workspace = true } + +[lints] +workspace = true diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs new file mode 100644 index 00000000000..2056e17870d --- /dev/null +++ b/vortex-sqllogictest/src/duckdb.rs @@ -0,0 +1,39 @@ +use std::{error::Error, process::Command, time::Duration}; + +use async_trait::async_trait; +use sqllogictest::{DBOutput, DefaultColumnType, runner::AsyncDB}; + +use crate::error::TestError; + +struct DuckDB {} + +#[async_trait] +impl AsyncDB for DuckDB { + type Error = TestError; + type ColumnType = DefaultColumnType; + + async fn run(&mut self, sql: &str) -> Result, Self::Error> { + todo!() + } + + async fn shutdown(&mut self) { + todo!() + } + + fn engine_name(&self) -> &str { + "DuckDB" + } + + async fn sleep(dur: Duration) { + tokio::time::sleep(dur).await + } + + /// [`Runner`] calls this function to run a system command. + /// + /// The default implementation is `std::process::Command::output`, which is universal to any + /// async runtime but would block the current thread. If you are running in tokio runtime, you + /// should override this by `tokio::process::Command::output`. + async fn run_command(command: Command) -> std::io::Result { + tokio::process::Command::from(command).output().await + } +} diff --git a/vortex-sqllogictest/src/error.rs b/vortex-sqllogictest/src/error.rs new file mode 100644 index 00000000000..2e089531314 --- /dev/null +++ b/vortex-sqllogictest/src/error.rs @@ -0,0 +1,20 @@ +use datafusion_sqllogictest::DFSqlLogicTestError; + +#[derive(Debug, thiserror::Error)] +pub enum TestError { + Other(String), +} + +impl std::fmt::Display for TestError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TestError::Other(msg) => write!(f, "Other: {msg}"), + } + } +} + +impl From for TestError { + fn from(value: DFSqlLogicTestError) -> Self { + match value {} + } +} diff --git a/vortex-sqllogictest/src/lib.rs b/vortex-sqllogictest/src/lib.rs new file mode 100644 index 00000000000..031655b658b --- /dev/null +++ b/vortex-sqllogictest/src/lib.rs @@ -0,0 +1,2 @@ +pub mod duckdb; +pub mod error; diff --git a/vortex-sqllogictest/src/main.rs b/vortex-sqllogictest/src/main.rs new file mode 100644 index 00000000000..e7a11a969c0 --- /dev/null +++ b/vortex-sqllogictest/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} From ae134bd95a3ba7449f2e93c7bbfbd219d83c5a6a Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 3 Feb 2026 13:39:51 +0000 Subject: [PATCH 2/6] license header Signed-off-by: Adam Gutglick --- vortex-sqllogictest/src/duckdb.rs | 3 +++ vortex-sqllogictest/src/error.rs | 3 +++ vortex-sqllogictest/src/lib.rs | 3 +++ vortex-sqllogictest/src/main.rs | 3 +++ 4 files changed, 12 insertions(+) diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs index 2056e17870d..eec949f8447 100644 --- a/vortex-sqllogictest/src/duckdb.rs +++ b/vortex-sqllogictest/src/duckdb.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use std::{error::Error, process::Command, time::Duration}; use async_trait::async_trait; diff --git a/vortex-sqllogictest/src/error.rs b/vortex-sqllogictest/src/error.rs index 2e089531314..cf1477b78ba 100644 --- a/vortex-sqllogictest/src/error.rs +++ b/vortex-sqllogictest/src/error.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use datafusion_sqllogictest::DFSqlLogicTestError; #[derive(Debug, thiserror::Error)] diff --git a/vortex-sqllogictest/src/lib.rs b/vortex-sqllogictest/src/lib.rs index 031655b658b..7d70d3e8585 100644 --- a/vortex-sqllogictest/src/lib.rs +++ b/vortex-sqllogictest/src/lib.rs @@ -1,2 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + pub mod duckdb; pub mod error; diff --git a/vortex-sqllogictest/src/main.rs b/vortex-sqllogictest/src/main.rs index e7a11a969c0..bae792e3f1f 100644 --- a/vortex-sqllogictest/src/main.rs +++ b/vortex-sqllogictest/src/main.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + fn main() { println!("Hello, world!"); } From f8342c85b76a578469550d31bf21426151419d54 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 3 Feb 2026 14:10:52 +0000 Subject: [PATCH 3/6] more work Signed-off-by: Adam Gutglick --- Cargo.lock | 446 +++++++++++++++++- vortex-sqllogictest/Cargo.toml | 6 + vortex-sqllogictest/src/args.rs | 17 + .../{main.rs => bin/sqllogictests-runner.rs} | 5 +- vortex-sqllogictest/src/error.rs | 8 +- vortex-sqllogictest/src/lib.rs | 1 + 6 files changed, 479 insertions(+), 4 deletions(-) create mode 100644 vortex-sqllogictest/src/args.rs rename vortex-sqllogictest/src/{main.rs => bin/sqllogictests-runner.rs} (66%) diff --git a/Cargo.lock b/Cargo.lock index 60404bcb4c5..dd1816ad137 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,6 +145,35 @@ version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +[[package]] +name = "apache-avro" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" +dependencies = [ + "bigdecimal", + "bon", + "bzip2", + "crc32fast", + "digest", + "liblzma", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.18", + "uuid", + "zstd", +] + [[package]] name = "approx" version = "0.5.1" @@ -154,6 +183,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.4.2" @@ -446,6 +484,7 @@ dependencies = [ "arrow-select 57.2.0", "flatbuffers", "lz4_flex 0.12.0", + "zstd", ] [[package]] @@ -1280,6 +1319,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", + "serde", ] [[package]] @@ -1527,7 +1567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c6d47a4e2961fb8721bcfc54feae6455f2f64e7054f9bc67e875f0e77f4c58d" dependencies = [ "rust_decimal", - "schemars", + "schemars 1.2.1", "serde", "utf8-width", ] @@ -1985,9 +2025,13 @@ version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" dependencies = [ + "bzip2", "compression-core", "flate2", + "liblzma", "memchr", + "zstd", + "zstd-safe", ] [[package]] @@ -2495,6 +2539,7 @@ dependencies = [ "arrow-schema 57.2.0", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-catalog 52.1.0", "datafusion-catalog-listing 52.1.0", @@ -2502,6 +2547,7 @@ dependencies = [ "datafusion-common-runtime 52.1.0", "datafusion-datasource 52.1.0", "datafusion-datasource-arrow", + "datafusion-datasource-avro", "datafusion-datasource-csv 52.1.0", "datafusion-datasource-json 52.1.0", "datafusion-datasource-parquet", @@ -2521,8 +2567,10 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", "datafusion-sql 52.1.0", + "flate2", "futures", "itertools 0.14.0", + "liblzma", "log", "object_store", "parking_lot", @@ -2534,6 +2582,7 @@ dependencies = [ "tokio", "url", "uuid", + "zstd", ] [[package]] @@ -2690,6 +2739,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" dependencies = [ "ahash 0.8.12", + "apache-avro", "arrow 57.2.0", "arrow-ipc 57.2.0", "chrono", @@ -2701,6 +2751,7 @@ dependencies = [ "object_store", "parquet 57.2.0", "paste", + "recursive", "sqlparser 0.59.0", "tokio", "web-time", @@ -2764,8 +2815,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" dependencies = [ "arrow 57.2.0", + "async-compression", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-common 52.1.0", "datafusion-common-runtime 52.1.0", @@ -2776,14 +2829,18 @@ dependencies = [ "datafusion-physical-expr-common 52.1.0", "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", + "flate2", "futures", "glob", "itertools 0.14.0", + "liblzma", "log", "object_store", "rand 0.9.2", "tokio", + "tokio-util", "url", + "zstd", ] [[package]] @@ -2810,6 +2867,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource-avro" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "828088c2fb681cc0e06fb42f541f76c82a0c10278f9fd6334e22c8d1e3574ee7" +dependencies = [ + "apache-avro", + "arrow 57.2.0", + "async-trait", + "bytes", + "datafusion-common 52.1.0", + "datafusion-datasource 52.1.0", + "datafusion-physical-expr-common 52.1.0", + "datafusion-physical-plan 52.1.0", + "datafusion-session 52.1.0", + "futures", + "num-traits", + "object_store", +] + [[package]] name = "datafusion-datasource-csv" version = "50.3.0" @@ -3027,6 +3104,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "paste", + "recursive", "serde_json", "sqlparser 0.59.0", ] @@ -3095,6 +3173,8 @@ dependencies = [ "arrow 57.2.0", "arrow-buffer 57.2.0", "base64", + "blake2", + "blake3", "chrono", "chrono-tz", "datafusion-common 52.1.0", @@ -3106,9 +3186,11 @@ dependencies = [ "hex", "itertools 0.14.0", "log", + "md-5", "num-traits", "rand 0.9.2", "regex", + "sha2", "unicode-segmentation", "uuid", ] @@ -3370,6 +3452,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "recursive", "regex", "regex-syntax", ] @@ -3417,6 +3500,7 @@ dependencies = [ "parking_lot", "paste", "petgraph 0.8.3", + "recursive", "tokio", ] @@ -3516,6 +3600,7 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-pruning 52.1.0", "itertools 0.14.0", + "recursive", ] [[package]] @@ -3653,6 +3738,29 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "datafusion-spark" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556c431f5f2259620c8223254c0ef57aa9a85c576d4da0166157260f71eb0e25" +dependencies = [ + "arrow 57.2.0", + "bigdecimal", + "chrono", + "crc32fast", + "datafusion-catalog 52.1.0", + "datafusion-common 52.1.0", + "datafusion-execution 52.1.0", + "datafusion-expr 52.1.0", + "datafusion-functions 52.1.0", + "datafusion-functions-nested 52.1.0", + "log", + "percent-encoding", + "rand 0.9.2", + "sha1", + "url", +] + [[package]] name = "datafusion-sql" version = "50.3.0" @@ -3682,10 +3790,58 @@ dependencies = [ "datafusion-expr 52.1.0", "indexmap", "log", + "recursive", "regex", "sqlparser 0.59.0", ] +[[package]] +name = "datafusion-sqllogictest" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d388fec80647198ae041d314dd7d9e2305207836ecec3ad48908eac6844cdef" +dependencies = [ + "arrow 57.2.0", + "async-trait", + "bigdecimal", + "clap", + "datafusion 52.1.0", + "datafusion-spark", + "datafusion-substrait", + "futures", + "half", + "indicatif", + "itertools 0.14.0", + "log", + "object_store", + "sqllogictest 0.28.4", + "sqlparser 0.59.0", + "tempfile", + "thiserror 2.0.18", + "tokio", +] + +[[package]] +name = "datafusion-substrait" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6042adacd0bd64e56c22f6a7f9ce0ce1793dd367c899d868179d029f110d9215" +dependencies = [ + "async-recursion", + "async-trait", + "chrono", + "datafusion 52.1.0", + "half", + "itertools 0.14.0", + "object_store", + "pbjson-types", + "prost 0.14.3", + "substrait", + "tokio", + "url", + "uuid", +] + [[package]] name = "deepsize" version = "0.2.0" @@ -6486,6 +6642,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -6630,6 +6787,15 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.5" @@ -7081,6 +7247,43 @@ dependencies = [ "stfu8", ] +[[package]] +name = "pbjson" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" +dependencies = [ + "base64", + "serde", +] + +[[package]] +name = "pbjson-build" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" +dependencies = [ + "heck", + "itertools 0.14.0", + "prost 0.14.3", + "prost-types 0.14.3", +] + +[[package]] +name = "pbjson-types" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" +dependencies = [ + "bytes", + "chrono", + "pbjson", + "pbjson-build", + "prost 0.14.3", + "prost-build 0.14.3", + "serde", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -7647,6 +7850,16 @@ dependencies = [ "prost 0.14.3", ] +[[package]] +name = "psm" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa96cb91275ed31d6da3e983447320c4eb219ac180fa1679a0889ff32861e2d" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -7787,6 +8000,12 @@ dependencies = [ "url", ] +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + [[package]] name = "quick-xml" version = "0.37.5" @@ -8123,6 +8342,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.114", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -8207,6 +8446,16 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "regress" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" +dependencies = [ + "hashbrown 0.16.1", + "memchr", +] + [[package]] name = "relative-path" version = "1.9.3" @@ -8614,6 +8863,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "1.2.1" @@ -8626,6 +8887,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.114", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -8702,6 +8975,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "seq-macro" @@ -8719,6 +8996,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -8739,6 +9026,17 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -8781,6 +9079,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.114", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -8793,6 +9103,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -9040,6 +9363,31 @@ dependencies = [ "der", ] +[[package]] +name = "sqllogictest" +version = "0.28.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" +dependencies = [ + "async-trait", + "educe", + "fs-err", + "futures", + "glob", + "humantime", + "itertools 0.13.0", + "libtest-mimic", + "md-5", + "owo-colors", + "rand 0.8.5", + "regex", + "similar", + "subst", + "tempfile", + "thiserror 2.0.18", + "tracing", +] + [[package]] name = "sqllogictest" version = "0.29.0" @@ -9082,6 +9430,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -9102,6 +9451,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -9189,6 +9551,31 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "substrait" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +dependencies = [ + "heck", + "pbjson", + "pbjson-build", + "pbjson-types", + "prettyplease", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.114", + "typify", + "walkdir", +] + [[package]] name = "subtle" version = "2.6.1" @@ -10102,6 +10489,53 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typify" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" +dependencies = [ + "heck", + "log", + "proc-macro2", + "quote", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "syn 2.0.114", + "thiserror 2.0.18", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" +dependencies = [ + "proc-macro2", + "quote", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.114", + "typify-impl", +] + [[package]] name = "ucd-trie" version = "0.1.7" @@ -10180,6 +10614,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -11223,8 +11663,10 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "clap", "datafusion 52.1.0", - "sqllogictest", + "datafusion-sqllogictest", + "sqllogictest 0.29.0", "thiserror 2.0.18", "tokio", "vortex", diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml index 7ddf64ad2db..bca339d80ba 100644 --- a/vortex-sqllogictest/Cargo.toml +++ b/vortex-sqllogictest/Cargo.toml @@ -16,6 +16,7 @@ version = { workspace = true } [dependencies] anyhow = { workspace = true } async-trait = { workspace = true } +clap = { workspace = true, features = ["derive"] } datafusion = { workspace = true } datafusion-sqllogictest = { workspace = true } sqllogictest = "0.29" @@ -27,3 +28,8 @@ vortex-duckdb = { workspace = true } [lints] workspace = true + +[[test]] +harness = false +name = "sqllogictests" +path = "bin/sqllogictests-runner.rs" diff --git a/vortex-sqllogictest/src/args.rs b/vortex-sqllogictest/src/args.rs new file mode 100644 index 00000000000..2c52f39dd54 --- /dev/null +++ b/vortex-sqllogictest/src/args.rs @@ -0,0 +1,17 @@ +use clap::Parser; + +#[derive(clap::ValueEnum, Clone, Copy)] +pub enum Engine { + #[clap(name = "datafusion")] + DataFusion, + #[clap(name = "duckdb")] + DuckDB, +} + +#[derive(Parser)] +pub struct Args { + #[arg(short, long, value_enum, value_delimiter = ',')] + engine: Option>, + #[arg(action)] + filter: Option, +} diff --git a/vortex-sqllogictest/src/main.rs b/vortex-sqllogictest/src/bin/sqllogictests-runner.rs similarity index 66% rename from vortex-sqllogictest/src/main.rs rename to vortex-sqllogictest/src/bin/sqllogictests-runner.rs index bae792e3f1f..2e5f42d79c0 100644 --- a/vortex-sqllogictest/src/main.rs +++ b/vortex-sqllogictest/src/bin/sqllogictests-runner.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -fn main() { +#[tokio::main] +async fn main() -> anyhow::Result<()> { println!("Hello, world!"); + + Ok(()) } diff --git a/vortex-sqllogictest/src/error.rs b/vortex-sqllogictest/src/error.rs index cf1477b78ba..9957103e062 100644 --- a/vortex-sqllogictest/src/error.rs +++ b/vortex-sqllogictest/src/error.rs @@ -18,6 +18,12 @@ impl std::fmt::Display for TestError { impl From for TestError { fn from(value: DFSqlLogicTestError) -> Self { - match value {} + match value { + DFSqlLogicTestError::SqlLogicTest(test_error) => todo!(), + DFSqlLogicTestError::DataFusion(data_fusion_error) => todo!(), + DFSqlLogicTestError::Sql(parser_error) => todo!(), + DFSqlLogicTestError::Arrow(arrow_error) => todo!(), + DFSqlLogicTestError::Other(_) => todo!(), + } } } diff --git a/vortex-sqllogictest/src/lib.rs b/vortex-sqllogictest/src/lib.rs index 7d70d3e8585..e6adcb5ef0b 100644 --- a/vortex-sqllogictest/src/lib.rs +++ b/vortex-sqllogictest/src/lib.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +pub mod args; pub mod duckdb; pub mod error; From 834aef4c23cbdb46a543e59236a7a9f262b222e6 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 3 Feb 2026 14:15:07 +0000 Subject: [PATCH 4/6] . Signed-off-by: Adam Gutglick --- vortex-sqllogictest/{src => }/bin/sqllogictests-runner.rs | 5 +++++ vortex-sqllogictest/src/args.rs | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) rename vortex-sqllogictest/{src => }/bin/sqllogictests-runner.rs (63%) diff --git a/vortex-sqllogictest/src/bin/sqllogictests-runner.rs b/vortex-sqllogictest/bin/sqllogictests-runner.rs similarity index 63% rename from vortex-sqllogictest/src/bin/sqllogictests-runner.rs rename to vortex-sqllogictest/bin/sqllogictests-runner.rs index 2e5f42d79c0..8b129ee9e11 100644 --- a/vortex-sqllogictest/src/bin/sqllogictests-runner.rs +++ b/vortex-sqllogictest/bin/sqllogictests-runner.rs @@ -1,9 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use clap::Parser; +use vortex_sqllogictest::args::Args; + #[tokio::main] async fn main() -> anyhow::Result<()> { + let args = Args::parse(); println!("Hello, world!"); + println!("Args: {args:?}"); Ok(()) } diff --git a/vortex-sqllogictest/src/args.rs b/vortex-sqllogictest/src/args.rs index 2c52f39dd54..ee4e29e1106 100644 --- a/vortex-sqllogictest/src/args.rs +++ b/vortex-sqllogictest/src/args.rs @@ -1,6 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use clap::Parser; -#[derive(clap::ValueEnum, Clone, Copy)] +#[derive(clap::ValueEnum, Clone, Copy, Debug)] pub enum Engine { #[clap(name = "datafusion")] DataFusion, @@ -8,7 +11,7 @@ pub enum Engine { DuckDB, } -#[derive(Parser)] +#[derive(Parser, Debug)] pub struct Args { #[arg(short, long, value_enum, value_delimiter = ',')] engine: Option>, From f2331b294175f478e3c9dba20fee9f9beac75dff Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 3 Feb 2026 14:52:42 +0000 Subject: [PATCH 5/6] More work Signed-off-by: Adam Gutglick --- Cargo.lock | 29 +--------- vortex-duckdb/src/duckdb/connection.rs | 10 +++- vortex-duckdb/src/duckdb/logical_type.rs | 16 ++++-- vortex-duckdb/src/duckdb/query_result.rs | 6 ++- vortex-duckdb/src/lib.rs | 2 +- vortex-sqllogictest/Cargo.toml | 2 +- vortex-sqllogictest/src/duckdb.rs | 67 ++++++++++++++++++++++-- 7 files changed, 90 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd1816ad137..7c8a1b8dc6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3814,7 +3814,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "sqllogictest 0.28.4", + "sqllogictest", "sqlparser 0.59.0", "tempfile", "thiserror 2.0.18", @@ -9388,31 +9388,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "sqllogictest" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dffbf03091090a9330529c3926313be0a0570f036edfd490b11db39eea4b7118" -dependencies = [ - "async-trait", - "educe", - "fs-err", - "futures", - "glob", - "humantime", - "itertools 0.13.0", - "libtest-mimic", - "md-5", - "owo-colors", - "rand 0.8.5", - "regex", - "similar", - "subst", - "tempfile", - "thiserror 2.0.18", - "tracing", -] - [[package]] name = "sqlparser" version = "0.58.0" @@ -11666,7 +11641,7 @@ dependencies = [ "clap", "datafusion 52.1.0", "datafusion-sqllogictest", - "sqllogictest 0.29.0", + "sqllogictest", "thiserror 2.0.18", "tokio", "vortex", diff --git a/vortex-duckdb/src/duckdb/connection.rs b/vortex-duckdb/src/duckdb/connection.rs index af712acd09d..4670f79724d 100644 --- a/vortex-duckdb/src/duckdb/connection.rs +++ b/vortex-duckdb/src/duckdb/connection.rs @@ -200,8 +200,14 @@ mod tests { .query("SELECT 1 as int_col, 'text' as str_col") .unwrap(); - assert_eq!(result.column_type(0), cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - assert_eq!(result.column_type(1), cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); + assert_eq!( + result.column_type(0).as_type_id(), + cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER + ); + assert_eq!( + result.column_type(1).as_type_id(), + cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR + ); } #[test] diff --git a/vortex-duckdb/src/duckdb/logical_type.rs b/vortex-duckdb/src/duckdb/logical_type.rs index 61d393a1a9d..59604fa7b66 100644 --- a/vortex-duckdb/src/duckdb/logical_type.rs +++ b/vortex-duckdb/src/duckdb/logical_type.rs @@ -134,10 +134,6 @@ impl LogicalType { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BLOB) } - pub fn int64() -> Self { - Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT) - } - pub fn uint64() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT) } @@ -146,10 +142,22 @@ impl LogicalType { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER) } + pub fn int64() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT) + } + pub fn bool() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN) } + pub fn float32() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_FLOAT) + } + + pub fn float64() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE) + } + pub fn as_decimal(&self) -> (u8, u8) { unsafe { ( diff --git a/vortex-duckdb/src/duckdb/query_result.rs b/vortex-duckdb/src/duckdb/query_result.rs index ff285f9ff84..bad1941e10e 100644 --- a/vortex-duckdb/src/duckdb/query_result.rs +++ b/vortex-duckdb/src/duckdb/query_result.rs @@ -7,6 +7,7 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; +use crate::LogicalType; use crate::cpp; use crate::duckdb::DataChunk; use crate::wrapper; @@ -67,8 +68,9 @@ impl QueryResult { } /// Get the type of a column by index. - pub fn column_type(&self, col_idx: usize) -> cpp::DUCKDB_TYPE { - unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) } + pub fn column_type(&self, col_idx: usize) -> LogicalType { + let dtype = unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) }; + LogicalType::new(dtype) } } diff --git a/vortex-duckdb/src/lib.rs b/vortex-duckdb/src/lib.rs index d4ffaf90eb0..9f85fae772c 100644 --- a/vortex-duckdb/src/lib.rs +++ b/vortex-duckdb/src/lib.rs @@ -23,7 +23,7 @@ pub use crate::duckdb::LogicalType; pub use crate::duckdb::Value; use crate::scan::VortexTableFunction; -mod convert; +pub mod convert; pub mod duckdb; pub mod exporter; mod scan; diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml index bca339d80ba..bbea16dba29 100644 --- a/vortex-sqllogictest/Cargo.toml +++ b/vortex-sqllogictest/Cargo.toml @@ -19,7 +19,7 @@ async-trait = { workspace = true } clap = { workspace = true, features = ["derive"] } datafusion = { workspace = true } datafusion-sqllogictest = { workspace = true } -sqllogictest = "0.29" +sqllogictest = "0.28" thiserror = { workspace = true } tokio = { workspace = true, features = ["full"] } vortex = { workspace = true } diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs index eec949f8447..9432332a551 100644 --- a/vortex-sqllogictest/src/duckdb.rs +++ b/vortex-sqllogictest/src/duckdb.rs @@ -1,22 +1,79 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::{error::Error, process::Command, time::Duration}; +use std::sync::Arc; +use std::{process::Command, time::Duration}; use async_trait::async_trait; -use sqllogictest::{DBOutput, DefaultColumnType, runner::AsyncDB}; +use datafusion_sqllogictest::DFColumnType; +use sqllogictest::{DBOutput, runner::AsyncDB}; + +use vortex_duckdb::LogicalType; +use vortex_duckdb::duckdb::Connection; +use vortex_duckdb::duckdb::Database; +use vortex_duckdb::duckdb::{Config, DuckDBType}; use crate::error::TestError; -struct DuckDB {} +struct Inner { + conn: Connection, + db: Database, +} + +unsafe impl Send for Inner {} +unsafe impl Sync for Inner {} + +struct DuckDB { + inner: Arc, +} + +impl DuckDB { + fn normalize_column_type(dtype: LogicalType) -> DFColumnType { + let type_id = dtype.as_type_id(); + if type_id == LogicalType::int32().as_type_id() + || type_id == LogicalType::int64().as_type_id() + || type_id == LogicalType::uint64().as_type_id() + { + DFColumnType::Integer + } else if type_id == LogicalType::varchar().as_type_id() { + DFColumnType::Text + } else if type_id == LogicalType::bool().as_type_id() { + DFColumnType::Boolean + } else if type_id == LogicalType::float32().as_type_id() + || type_id == LogicalType::float64().as_type_id() + { + DFColumnType::Float + } else { + DFColumnType::Another + } + } +} #[async_trait] impl AsyncDB for DuckDB { type Error = TestError; - type ColumnType = DefaultColumnType; + type ColumnType = DFColumnType; async fn run(&mut self, sql: &str) -> Result, Self::Error> { - todo!() + let r = self + .inner + .conn + .query(sql) + .map_err(|e| TestError::Other(e.to_string()))?; + + if r.column_count() == 0 && r.row_count() == 0 { + Ok(DBOutput::StatementComplete(0)) + } else { + let mut types = Vec::default(); + let rows = Vec::default(); + + for col_idx in 0..r.column_count() as usize { + let dtype = r.column_type(col_idx); + types.push(Self::normalize_column_type(dtype)); + } + + Ok(DBOutput::Rows { types, rows }) + } } async fn shutdown(&mut self) { From f2f1faeb8515915ec0f6911c64157057c1224925 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 4 Feb 2026 12:36:41 +0000 Subject: [PATCH 6/6] save some work Signed-off-by: Adam Gutglick --- Cargo.lock | 2 + vortex-sqllogictest/Cargo.toml | 4 +- .../bin/sqllogictests-runner.rs | 71 ++++++++++++++++++- vortex-sqllogictest/build.rs | 5 ++ vortex-sqllogictest/slt/create.slt | 2 + vortex-sqllogictest/src/args.rs | 48 ++++++++++++- vortex-sqllogictest/src/duckdb.rs | 4 +- vortex-sqllogictest/src/lib.rs | 1 + vortex-sqllogictest/src/utils.rs | 27 +++++++ 9 files changed, 155 insertions(+), 9 deletions(-) create mode 100644 vortex-sqllogictest/build.rs create mode 100644 vortex-sqllogictest/slt/create.slt create mode 100644 vortex-sqllogictest/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 7c8a1b8dc6f..3675701dc3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11641,6 +11641,8 @@ dependencies = [ "clap", "datafusion 52.1.0", "datafusion-sqllogictest", + "futures", + "indicatif", "sqllogictest", "thiserror 2.0.18", "tokio", diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml index bbea16dba29..04492cb55cf 100644 --- a/vortex-sqllogictest/Cargo.toml +++ b/vortex-sqllogictest/Cargo.toml @@ -19,10 +19,12 @@ async-trait = { workspace = true } clap = { workspace = true, features = ["derive"] } datafusion = { workspace = true } datafusion-sqllogictest = { workspace = true } +futures.workspace = true +indicatif.workspace = true sqllogictest = "0.28" thiserror = { workspace = true } tokio = { workspace = true, features = ["full"] } -vortex = { workspace = true } +vortex = { workspace = true, features = ["tokio"] } vortex-datafusion = { workspace = true } vortex-duckdb = { workspace = true } diff --git a/vortex-sqllogictest/bin/sqllogictests-runner.rs b/vortex-sqllogictest/bin/sqllogictests-runner.rs index 8b129ee9e11..ca1778b023f 100644 --- a/vortex-sqllogictest/bin/sqllogictests-runner.rs +++ b/vortex-sqllogictest/bin/sqllogictests-runner.rs @@ -1,14 +1,79 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::sync::Arc; + use clap::Parser; -use vortex_sqllogictest::args::Args; +use datafusion::common::GetExt; +use datafusion::{ + datasource::provider::DefaultTableFactory, execution::SessionStateBuilder, + prelude::SessionContext, +}; +use datafusion_sqllogictest::DataFusion; +use futures::{StreamExt, TryStreamExt}; +use indicatif::ProgressBar; +use sqllogictest::parse_file; +use vortex_datafusion::VortexFormatFactory; +use vortex_sqllogictest::{args::Args, utils::list_files}; + +fn main() -> anyhow::Result<()> { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()? + .block_on(run_all())?; + Ok(()) +} -#[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn run_all() -> anyhow::Result<()> { let args = Args::parse(); println!("Hello, world!"); println!("Args: {args:?}"); + if args.list { + eprintln!("Ignoring `--list` which is unsupported by `sqlogictests-runner`"); + + return Ok(()); + } + + let pb = ProgressBar::new_spinner(); + + let all_errors = futures::stream::iter(list_files("../slt")?) + .map(|path| { + let pb = pb.clone(); + + async move { + let mut errors = vec![]; + let factory = Arc::new(VortexFormatFactory::new()); + let session_state_builder = SessionStateBuilder::new() + .with_default_features() + .with_table_factory( + factory.get_ext().to_uppercase(), + Arc::new(DefaultTableFactory::new()), + ) + .with_file_formats(vec![factory]); + + let session = SessionContext::new_with_state(session_state_builder.build()) + .enable_url_table(); + + let mut runner = sqllogictest::Runner::new(|| async { + Ok(DataFusion::new(session.clone(), path.clone(), pb.clone())) + }); + + for record in parse_file(path.canonicalize()?)? { + if let Err(e) = runner.run_async(record).await { + errors.push(e.to_string()); + } + } + + anyhow::Ok(errors) + } + }) + .buffer_unordered(args.test_threads) + .flat_map(|errs| { + let errs = errs?; + Ok(errs) + }) + .try_collect::>>(); + Ok(()) } diff --git a/vortex-sqllogictest/build.rs b/vortex-sqllogictest/build.rs new file mode 100644 index 00000000000..f8eca277962 --- /dev/null +++ b/vortex-sqllogictest/build.rs @@ -0,0 +1,5 @@ +fn main() { + // Propagate DuckDB rpath from vortex-duckdb + let duckdb_lib = std::env::var("DEP_DUCKDB_LIB_DIR").unwrap(); + println!("cargo:rustc-link-arg=-Wl,-rpath,{duckdb_lib}"); +} diff --git a/vortex-sqllogictest/slt/create.slt b/vortex-sqllogictest/slt/create.slt new file mode 100644 index 00000000000..677b893a205 --- /dev/null +++ b/vortex-sqllogictest/slt/create.slt @@ -0,0 +1,2 @@ +statement ok +CREATE TABLE foo AS VALUES(1,2),(2,3); \ No newline at end of file diff --git a/vortex-sqllogictest/src/args.rs b/vortex-sqllogictest/src/args.rs index ee4e29e1106..0d8cc311688 100644 --- a/vortex-sqllogictest/src/args.rs +++ b/vortex-sqllogictest/src/args.rs @@ -14,7 +14,51 @@ pub enum Engine { #[derive(Parser, Debug)] pub struct Args { #[arg(short, long, value_enum, value_delimiter = ',')] - engine: Option>, + pub engine: Option>, #[arg(action)] - filter: Option, + pub filter: Option, + + #[clap( + long, + help = "IGNORED (for compatibility with built in rust test runner)" + )] + pub format: Option, + + #[clap( + short = 'Z', + long, + help = "IGNORED (for compatibility with built in rust test runner)" + )] + pub z_options: Option, + + #[clap( + long, + help = "IGNORED (for compatibility with built in rust test runner)" + )] + pub show_output: bool, + + #[clap( + long, + help = "Quits immediately, not listing anything (for compatibility with built-in rust test runner)" + )] + pub list: bool, + + #[clap( + long, + help = "IGNORED (for compatibility with built-in rust test runner)" + )] + pub ignored: bool, + + #[clap( + long, + help = "IGNORED (for compatibility with built-in rust test runner)" + )] + pub nocapture: bool, + + #[clap( + long, + help = "Number of threads used for running tests in parallel", + default_value_t = 16 + )] + pub test_threads: usize, } diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs index 9432332a551..53599f30d59 100644 --- a/vortex-sqllogictest/src/duckdb.rs +++ b/vortex-sqllogictest/src/duckdb.rs @@ -76,9 +76,7 @@ impl AsyncDB for DuckDB { } } - async fn shutdown(&mut self) { - todo!() - } + async fn shutdown(&mut self) {} fn engine_name(&self) -> &str { "DuckDB" diff --git a/vortex-sqllogictest/src/lib.rs b/vortex-sqllogictest/src/lib.rs index e6adcb5ef0b..fb4e37445e7 100644 --- a/vortex-sqllogictest/src/lib.rs +++ b/vortex-sqllogictest/src/lib.rs @@ -4,3 +4,4 @@ pub mod args; pub mod duckdb; pub mod error; +pub mod utils; diff --git a/vortex-sqllogictest/src/utils.rs b/vortex-sqllogictest/src/utils.rs new file mode 100644 index 00000000000..4abb4e74b32 --- /dev/null +++ b/vortex-sqllogictest/src/utils.rs @@ -0,0 +1,27 @@ +use std::path::{Path, PathBuf}; + +pub fn list_files(path: impl AsRef) -> anyhow::Result> { + let mut file_paths = vec![]; + + list_files_impl(&mut file_paths, path)?; + + Ok(file_paths) +} + +fn list_files_impl(file_paths: &mut Vec, path: impl AsRef) -> anyhow::Result<()> { + let read_dir = std::fs::read_dir(path)?; + for entry in read_dir { + let entry = entry?; + + if entry.metadata()?.is_dir() { + list_files_impl(file_paths, entry.path())?; + } else { + let path = entry.path(); + if path.extension().is_some_and(|ext| ext == "slt") { + file_paths.push(entry.path()); + } + } + } + + Ok(()) +}