diff --git a/encodings/fastlanes/benches/bitpacking_take.rs b/encodings/fastlanes/benches/bitpacking_take.rs index 3c688598721..a71c5655b3f 100644 --- a/encodings/fastlanes/benches/bitpacking_take.rs +++ b/encodings/fastlanes/benches/bitpacking_take.rs @@ -11,6 +11,9 @@ use rand::distr::Uniform; use rand::prelude::StdRng; use vortex_array::Array; use vortex_array::IntoArray as _; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; +use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::compute::warm_up_vtables; use vortex_array::validity::Validity; @@ -31,8 +34,14 @@ fn take_10_stratified(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0..10).map(|i| i * 10_000)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -43,8 +52,14 @@ fn take_10_contiguous(bencher: Bencher) { let indices = buffer![0..10].into_array(); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -58,8 +73,14 @@ fn take_10k_random(bencher: Bencher) { let indices = PrimitiveArray::from_iter(rng.sample_iter(range).take(10_000).map(|i| i as u32)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -70,8 +91,14 @@ fn take_10k_contiguous(bencher: Bencher) { let indices = PrimitiveArray::from_iter(0..10_000); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -82,8 +109,14 @@ fn take_200k_dispersed(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0..200_000).map(|i| (i * 42) % values.len() as u64)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -94,8 +127,14 @@ fn take_200k_first_chunk_only(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0..200_000).map(|i| ((i * 42) % 1024) as u64)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } fn fixture(len: usize, bits: usize) -> Buffer { @@ -133,8 +172,14 @@ fn patched_take_10_stratified(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0..10).map(|i| i * 10_000)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -152,8 +197,14 @@ fn patched_take_10_contiguous(bencher: Bencher) { let indices = buffer![0..10].into_array(); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -167,8 +218,14 @@ fn patched_take_10k_random(bencher: Bencher) { let indices = PrimitiveArray::from_iter(rng.sample_iter(range).take(10_000).map(|i| i as u32)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -179,8 +236,14 @@ fn patched_take_10k_contiguous_not_patches(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0u32..NUM_EXCEPTIONS).cycle().take(10000)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -199,8 +262,14 @@ fn patched_take_10k_contiguous_patches(bencher: Bencher) { PrimitiveArray::from_iter((BIG_BASE2..BIG_BASE2 + NUM_EXCEPTIONS).cycle().take(10000)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -211,8 +280,14 @@ fn patched_take_200k_dispersed(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0..200_000).map(|i| (i * 42) % values.len() as u64)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -223,8 +298,14 @@ fn patched_take_200k_first_chunk_only(bencher: Bencher) { let indices = PrimitiveArray::from_iter((0..200_000).map(|i| ((i * 42) % 1024) as u64)); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } #[divan::bench] @@ -242,6 +323,12 @@ fn patched_take_10k_adversarial(bencher: Bencher) { ); bencher - .with_inputs(|| (&packed, &indices)) - .bench_refs(|(packed, indices)| packed.take(indices.to_array()).unwrap()) + .with_inputs(|| (&packed, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(packed, indices, execution_ctx)| { + packed + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }) } diff --git a/encodings/runend/benches/run_end_compress.rs b/encodings/runend/benches/run_end_compress.rs index 24cabc6f155..ebf898fd8b9 100644 --- a/encodings/runend/benches/run_end_compress.rs +++ b/encodings/runend/benches/run_end_compress.rs @@ -7,6 +7,9 @@ use divan::Bencher; use itertools::repeat_n; use vortex_array::Array; use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; +use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::compute::warm_up_vtables; use vortex_array::validity::Validity; @@ -95,6 +98,18 @@ fn take_indices(bencher: Bencher, (length, run_step): (usize, usize)) { .to_array(); bencher - .with_inputs(|| (&source_array, &runend_array)) - .bench_refs(|(array, indices)| array.take(indices.to_array()).unwrap()); + .with_inputs(|| { + ( + &source_array, + &runend_array, + LEGACY_SESSION.create_execution_ctx(), + ) + }) + .bench_refs(|(array, indices, execution_ctx)| { + array + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }); } diff --git a/java/testfiles/Cargo.lock b/java/testfiles/Cargo.lock index 63a35967386..e5c8d3b8f19 100644 --- a/java/testfiles/Cargo.lock +++ b/java/testfiles/Cargo.lock @@ -387,6 +387,24 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef0a3155e943e341e557863e69a708999c94ede624e37865c8e2a91b94efa78f" +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.114", +] + [[package]] name = "bit-vec" version = "0.8.0" @@ -443,9 +461,9 @@ checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" @@ -459,6 +477,15 @@ dependencies = [ "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -476,6 +503,17 @@ dependencies = [ "windows-link", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -559,6 +597,18 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "custom-labels" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a750ea4bdb7dbf9584b5d5c668bfa3835f88275781a947b5ea0212945bbdd41f" +dependencies = [ + "bindgen", + "cc", + "libc", + "pin-project-lite", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -659,16 +709,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "exponential-decay-histogram" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7962d7e9baab6ea05af175491fa6a8441f3bf461558037622142573d98dd6d6" -dependencies = [ - "ordered-float 5.1.0", - "rand 0.9.2", -] - [[package]] name = "ext-trait" version = "1.0.1" @@ -1106,6 +1146,15 @@ dependencies = [ "rustversion", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1274,6 +1323,16 @@ version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.16" @@ -1329,6 +1388,12 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "moka" version = "0.12.13" @@ -1377,6 +1442,16 @@ version = "6.6.666" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf5a574dadd7941adeaa71823ecba5e28331b8313fb2e1c6a5c7e5981ea53ad6" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nougat" version = "0.2.4" @@ -1472,24 +1547,6 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - [[package]] name = "parking" version = "2.2.1" @@ -1660,7 +1717,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.114", @@ -1916,16 +1973,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-value" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" -dependencies = [ - "ordered-float 2.10.1", - "serde", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -2275,7 +2322,7 @@ dependencies = [ name = "vortex-alp" version = "0.1.0" dependencies = [ - "itertools", + "itertools 0.14.0", "num-traits", "prost", "rustc-hash", @@ -2312,7 +2359,7 @@ dependencies = [ "goldenfile", "humansize", "inventory", - "itertools", + "itertools 0.14.0", "multiversion", "num-traits", "num_enum", @@ -2344,9 +2391,11 @@ dependencies = [ name = "vortex-btrblocks" version = "0.1.0" dependencies = [ + "enum-iterator", "getrandom 0.3.4", - "itertools", + "itertools 0.14.0", "num-traits", + "pco", "rand 0.9.2", "rustc-hash", "tracing", @@ -2360,12 +2409,14 @@ dependencies = [ "vortex-fastlanes", "vortex-fsst", "vortex-mask", + "vortex-pco", "vortex-runend", "vortex-scalar", "vortex-sequence", "vortex-sparse", "vortex-utils", "vortex-zigzag", + "vortex-zstd", ] [[package]] @@ -2375,7 +2426,7 @@ dependencies = [ "arrow-buffer", "bitvec", "bytes", - "itertools", + "itertools 0.14.0", "simdutf8", "vortex-error", ] @@ -2391,6 +2442,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-session", ] [[package]] @@ -2401,7 +2453,7 @@ dependencies = [ "arrow-buffer", "arrow-schema", "half", - "itertools", + "itertools 0.14.0", "multiversion", "num-traits", "paste", @@ -2434,6 +2486,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-session", ] [[package]] @@ -2448,6 +2501,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-session", ] [[package]] @@ -2459,7 +2513,7 @@ dependencies = [ "arrow-schema", "flatbuffers", "half", - "itertools", + "itertools 0.14.0", "jiff", "num-traits", "num_enum", @@ -2492,7 +2546,7 @@ version = "0.1.0" dependencies = [ "arrayref", "fastlanes", - "itertools", + "itertools 0.14.0", "lending-iterator", "num-traits", "prost", @@ -2515,7 +2569,7 @@ dependencies = [ "flatbuffers", "futures", "getrandom 0.3.4", - "itertools", + "itertools 0.14.0", "kanal", "oneshot", "parking_lot", @@ -2524,6 +2578,7 @@ dependencies = [ "uuid", "vortex-alp", "vortex-array", + "vortex-btrblocks", "vortex-buffer", "vortex-bytebool", "vortex-datetime-parts", @@ -2582,6 +2637,7 @@ dependencies = [ "async-stream", "async-trait", "bytes", + "custom-labels", "futures", "getrandom 0.3.4", "handle", @@ -2607,7 +2663,7 @@ dependencies = [ "bytes", "flatbuffers", "futures", - "itertools", + "itertools 0.14.0", "pin-project-lite", "vortex-array", "vortex-buffer", @@ -2626,14 +2682,13 @@ dependencies = [ "async-trait", "flatbuffers", "futures", - "itertools", + "itertools 0.14.0", "kanal", "moka", "once_cell", "oneshot", "parking_lot", "paste", - "pco", "pin-project-lite", "prost", "rustc-hash", @@ -2650,12 +2705,10 @@ dependencies = [ "vortex-io", "vortex-mask", "vortex-metrics", - "vortex-pco", "vortex-scalar", "vortex-sequence", "vortex-session", "vortex-utils", - "vortex-zstd", ] [[package]] @@ -2663,7 +2716,7 @@ name = "vortex-mask" version = "0.1.0" dependencies = [ "arrow-buffer", - "itertools", + "itertools 0.14.0", "vortex-buffer", "vortex-error", ] @@ -2674,8 +2727,8 @@ version = "0.1.0" dependencies = [ "getrandom 0.3.4", "parking_lot", + "sketches-ddsketch", "vortex-session", - "witchcraft-metrics", ] [[package]] @@ -2706,7 +2759,7 @@ name = "vortex-runend" version = "0.1.0" dependencies = [ "arrow-array", - "itertools", + "itertools 0.14.0", "num-traits", "prost", "vortex-array", @@ -2724,7 +2777,7 @@ version = "0.1.0" dependencies = [ "arrow-array", "bytes", - "itertools", + "itertools 0.14.0", "num-traits", "paste", "prost", @@ -2746,7 +2799,7 @@ dependencies = [ "async-trait", "bit-vec", "futures", - "itertools", + "itertools 0.14.0", "parking_lot", "sketches-ddsketch", "tracing", @@ -2775,6 +2828,7 @@ dependencies = [ "vortex-proto", "vortex-runend", "vortex-scalar", + "vortex-session", ] [[package]] @@ -2792,7 +2846,7 @@ dependencies = [ name = "vortex-sparse" version = "0.1.0" dependencies = [ - "itertools", + "itertools 0.14.0", "num-traits", "prost", "vortex-array", @@ -2801,6 +2855,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-session", ] [[package]] @@ -2835,6 +2890,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-session", "zigzag", ] @@ -2842,7 +2898,7 @@ dependencies = [ name = "vortex-zstd" version = "0.1.0" dependencies = [ - "itertools", + "itertools 0.14.0", "prost", "vortex-array", "vortex-buffer", @@ -2850,6 +2906,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-session", "zstd", ] @@ -3093,19 +3150,6 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -[[package]] -name = "witchcraft-metrics" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "867a74dec702d742179279ab0b5bcab72ca5858c0c3ccf870bdb5c99f54d675b" -dependencies = [ - "exponential-decay-histogram", - "once_cell", - "parking_lot", - "serde", - "serde-value", -] - [[package]] name = "writeable" version = "0.6.2" diff --git a/vortex-array/benches/take_fsl.rs b/vortex-array/benches/take_fsl.rs index cd5a7233196..c4eea1890e8 100644 --- a/vortex-array/benches/take_fsl.rs +++ b/vortex-array/benches/take_fsl.rs @@ -16,6 +16,9 @@ use rand::SeedableRng; use rand::rngs::StdRng; use vortex_array::Array; use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; +use vortex_array::VortexSessionExecute; use vortex_array::arrays::FixedSizeListArray; use vortex_array::validity::Validity; use vortex_buffer::Buffer; @@ -60,8 +63,14 @@ fn take_fsl_random(bencher: Bencher, num_indices: usize) let indices_array = indices.into_array(); bencher - .with_inputs(|| (&fsl, &indices_array)) - .bench_refs(|(array, indices)| array.take(indices.to_array()).unwrap()); + .with_inputs(|| (&fsl, &indices_array, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(array, indices, execution_ctx)| { + array + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }); } #[divan::bench(args = NUM_INDICES, consts = LIST_SIZES)] @@ -79,6 +88,12 @@ fn take_fsl_nullable_random(bencher: Bencher, num_indice let indices_array = indices.into_array(); bencher - .with_inputs(|| (&fsl, &indices_array)) - .bench_refs(|(array, indices)| array.take(indices.to_array()).unwrap()); + .with_inputs(|| (&fsl, &indices_array, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(array, indices, execution_ctx)| { + array + .take(indices.to_array()) + .unwrap() + .execute::(execution_ctx) + .unwrap() + }); } diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index 16efa3a688d..cbc8ddfeecf 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -60,7 +60,6 @@ pub struct BoolArray { } pub struct BoolArrayParts { - pub dtype: DType, pub bits: BufferHandle, pub offset: usize, pub len: usize, @@ -196,7 +195,6 @@ impl BoolArray { #[inline] pub fn into_parts(self) -> BoolArrayParts { BoolArrayParts { - dtype: self.dtype, bits: self.bits, offset: self.offset, len: self.len, diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index b3352444e6c..ab3c2f127a0 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -12,7 +12,6 @@ use vortex_dtype::DecimalDType; use vortex_dtype::DecimalType; use vortex_dtype::IntegerPType; use vortex_dtype::NativeDecimalType; -use vortex_dtype::Nullability; use vortex_dtype::match_each_decimal_value_type; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; @@ -95,7 +94,6 @@ pub struct DecimalArray { pub struct DecimalArrayParts { pub decimal_dtype: DecimalDType, - pub nullability: Nullability, pub values: BufferHandle, pub values_type: DecimalType, pub validity: Validity, @@ -279,12 +277,10 @@ impl DecimalArray { } pub fn into_parts(self) -> DecimalArrayParts { - let nullability = self.dtype.nullability(); let decimal_dtype = self.dtype.into_decimal_opt().vortex_expect("cannot fail"); DecimalArrayParts { decimal_dtype, - nullability, values: self.values, values_type: self.values_type, validity: self.validity, diff --git a/vortex-array/src/arrays/listview/array.rs b/vortex-array/src/arrays/listview/array.rs index 64cafe65d2d..418307fd0a7 100644 --- a/vortex-array/src/arrays/listview/array.rs +++ b/vortex-array/src/arrays/listview/array.rs @@ -6,7 +6,6 @@ use std::sync::Arc; use num_traits::AsPrimitive; use vortex_dtype::DType; use vortex_dtype::IntegerPType; -use vortex_dtype::Nullability; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -129,8 +128,6 @@ pub struct ListViewArray { } pub struct ListViewArrayParts { - pub nullability: Nullability, - pub elements_dtype: Arc, /// See `ListViewArray::elements` @@ -345,10 +342,8 @@ impl ListViewArray { } pub fn into_parts(self) -> ListViewArrayParts { - let nullability = self.dtype.nullability(); let dtype = self.dtype.into_list_element_opt().vortex_expect("is list"); ListViewArrayParts { - nullability, elements_dtype: dtype, elements: self.elements, offsets: self.offsets, diff --git a/vortex-array/src/arrays/primitive/array/mod.rs b/vortex-array/src/arrays/primitive/array/mod.rs index 8eda66cc255..bff84cb3ddd 100644 --- a/vortex-array/src/arrays/primitive/array/mod.rs +++ b/vortex-array/src/arrays/primitive/array/mod.rs @@ -77,7 +77,6 @@ pub struct PrimitiveArray { pub struct PrimitiveArrayParts { pub ptype: PType, - pub nullability: Nullability, pub buffer: BufferHandle, pub validity: Validity, } @@ -182,10 +181,8 @@ impl PrimitiveArray { /// Consume the primitive array and returns its component parts. pub fn into_parts(self) -> PrimitiveArrayParts { let ptype = self.ptype(); - let nullability = self.dtype.nullability(); PrimitiveArrayParts { ptype, - nullability, buffer: self.buffer, validity: self.validity, } diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 4cbeb38bbbb..cfe758c3f0f 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -8,7 +8,6 @@ use std::sync::Arc; use vortex_dtype::DType; use vortex_dtype::FieldName; use vortex_dtype::FieldNames; -use vortex_dtype::Nullability; use vortex_dtype::StructFields; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -151,7 +150,6 @@ pub struct StructArray { pub struct StructArrayParts { pub struct_fields: StructFields, - pub nullability: Nullability, pub fields: Arc<[ArrayRef]>, pub validity: Validity, } @@ -356,11 +354,9 @@ impl StructArray { } pub fn into_parts(self) -> StructArrayParts { - let nullability = self.dtype.nullability(); let struct_fields = self.dtype.into_struct_fields(); StructArrayParts { struct_fields, - nullability, fields: self.fields, validity: self.validity, } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 20707fb2e2b..ca9a9ca1ac9 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -3,6 +3,8 @@ //! Encodings that enable zero-copy sharing of data with Arrow. +use std::sync::Arc; + use vortex_buffer::Buffer; use vortex_dtype::DType; use vortex_dtype::NativePType; @@ -18,23 +20,29 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::BoolArray; +use crate::arrays::BoolArrayParts; use crate::arrays::BoolVTable; use crate::arrays::DecimalArray; +use crate::arrays::DecimalArrayParts; use crate::arrays::DecimalVTable; use crate::arrays::ExtensionArray; use crate::arrays::ExtensionVTable; use crate::arrays::FixedSizeListArray; use crate::arrays::FixedSizeListVTable; use crate::arrays::ListViewArray; +use crate::arrays::ListViewArrayParts; use crate::arrays::ListViewRebuildMode; use crate::arrays::ListViewVTable; use crate::arrays::NullArray; use crate::arrays::NullVTable; use crate::arrays::PrimitiveArray; +use crate::arrays::PrimitiveArrayParts; use crate::arrays::PrimitiveVTable; use crate::arrays::StructArray; +use crate::arrays::StructArrayParts; use crate::arrays::StructVTable; use crate::arrays::VarBinViewArray; +use crate::arrays::VarBinViewArrayParts; use crate::arrays::VarBinViewVTable; use crate::arrays::constant_canonicalize; use crate::builders::builder_with_capacity; @@ -498,6 +506,242 @@ impl Executable for Canonical { } } +/// Recursively execute the array until it reaches canonical form along with its validity. +/// +/// Callers should prefer to execute into `Columnar` instead of this specific target. +/// This target is useful when preparing arrays for writing. +pub struct CanonicalValidity(pub Canonical); + +impl Executable for CanonicalValidity { + fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { + match array.execute::(ctx)? { + n @ Canonical::Null(_) => Ok(CanonicalValidity(n)), + Canonical::Bool(b) => { + let BoolArrayParts { + bits, + offset, + len, + validity, + } = b.into_parts(); + Ok(CanonicalValidity(Canonical::Bool( + BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?, + ))) + } + Canonical::Primitive(p) => { + let PrimitiveArrayParts { + ptype, + buffer, + validity, + } = p.into_parts(); + Ok(CanonicalValidity(Canonical::Primitive(unsafe { + PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?) + }))) + } + Canonical::Decimal(d) => { + let DecimalArrayParts { + decimal_dtype, + values, + values_type, + validity, + } = d.into_parts(); + Ok(CanonicalValidity(Canonical::Decimal(unsafe { + DecimalArray::new_unchecked_handle( + values, + values_type, + decimal_dtype, + validity.execute(ctx)?, + ) + }))) + } + Canonical::VarBinView(vbv) => { + let VarBinViewArrayParts { + dtype, + buffers, + views, + validity, + } = vbv.into_parts(); + Ok(CanonicalValidity(Canonical::VarBinView(unsafe { + VarBinViewArray::new_handle_unchecked( + views, + buffers, + dtype, + validity.execute(ctx)?, + ) + }))) + } + Canonical::List(l) => { + let ListViewArrayParts { + elements, + offsets, + sizes, + validity, + .. + } = l.into_parts(); + Ok(CanonicalValidity(Canonical::List(unsafe { + ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?) + }))) + } + Canonical::FixedSizeList(fsl) => { + let list_size = fsl.list_size(); + let len = fsl.len(); + let (elements, validity, _) = fsl.into_parts(); + Ok(CanonicalValidity(Canonical::FixedSizeList( + FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len), + ))) + } + Canonical::Struct(st) => { + let len = st.len(); + let StructArrayParts { + struct_fields, + fields, + validity, + } = st.into_parts(); + Ok(CanonicalValidity(Canonical::Struct(unsafe { + StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?) + }))) + } + Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension( + ExtensionArray::new( + ext.ext_dtype().clone(), + ext.storage() + .clone() + .execute::(ctx)? + .0 + .into_array(), + ), + ))), + } + } +} + +/// Recursively execute the array until all of its children are canonical. +/// +/// This method is useful to guarantee that all operators are fully executed, +/// callers should prefer an execution target that's suitable for their use case instead of this one. +pub struct RecursiveCanonical(pub Canonical); + +impl Executable for RecursiveCanonical { + fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { + match array.execute::(ctx)? { + n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)), + Canonical::Bool(b) => { + let BoolArrayParts { + bits, + offset, + len, + validity, + } = b.into_parts(); + Ok(RecursiveCanonical(Canonical::Bool( + BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?, + ))) + } + Canonical::Primitive(p) => { + let PrimitiveArrayParts { + ptype, + buffer, + validity, + } = p.into_parts(); + Ok(RecursiveCanonical(Canonical::Primitive(unsafe { + PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?) + }))) + } + Canonical::Decimal(d) => { + let DecimalArrayParts { + decimal_dtype, + values, + values_type, + validity, + } = d.into_parts(); + Ok(RecursiveCanonical(Canonical::Decimal(unsafe { + DecimalArray::new_unchecked_handle( + values, + values_type, + decimal_dtype, + validity.execute(ctx)?, + ) + }))) + } + Canonical::VarBinView(vbv) => { + let VarBinViewArrayParts { + dtype, + buffers, + views, + validity, + } = vbv.into_parts(); + Ok(RecursiveCanonical(Canonical::VarBinView(unsafe { + VarBinViewArray::new_handle_unchecked( + views, + buffers, + dtype, + validity.execute(ctx)?, + ) + }))) + } + Canonical::List(l) => { + let ListViewArrayParts { + elements, + offsets, + sizes, + validity, + .. + } = l.into_parts(); + Ok(RecursiveCanonical(Canonical::List(unsafe { + ListViewArray::new_unchecked( + elements.execute::(ctx)?.0.into_array(), + offsets.execute::(ctx)?.0.into_array(), + sizes.execute::(ctx)?.0.into_array(), + validity.execute(ctx)?, + ) + }))) + } + Canonical::FixedSizeList(fsl) => { + let list_size = fsl.list_size(); + let len = fsl.len(); + let (elements, validity, _) = fsl.into_parts(); + Ok(RecursiveCanonical(Canonical::FixedSizeList( + FixedSizeListArray::new( + elements.execute::(ctx)?.0.into_array(), + list_size, + validity.execute(ctx)?, + len, + ), + ))) + } + Canonical::Struct(st) => { + let len = st.len(); + let StructArrayParts { + struct_fields, + fields, + validity, + } = st.into_parts(); + let executed_fields = fields + .iter() + .map(|f| Ok(f.clone().execute::(ctx)?.0.into_array())) + .collect::>>()?; + + Ok(RecursiveCanonical(Canonical::Struct(unsafe { + StructArray::new_unchecked( + executed_fields, + struct_fields, + len, + validity.execute(ctx)?, + ) + }))) + } + Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension( + ExtensionArray::new( + ext.ext_dtype().clone(), + ext.storage() + .clone() + .execute::(ctx)? + .0 + .into_array(), + ), + ))), + } + } +} + /// Execute a primitive typed array into a buffer of native values, assuming all values are valid. /// /// # Errors diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 8a800a90abe..3d2e611b317 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -22,6 +22,8 @@ use vortex_scalar::Scalar; use crate::Array; use crate::ArrayRef; +use crate::Canonical; +use crate::ExecutionCtx; use crate::IntoArray; use crate::ToCanonical; use crate::arrays::BoolArray; @@ -45,6 +47,16 @@ pub enum Validity { Array(ArrayRef), } +impl Validity { + /// Make a step towards canonicalising validity if necessary + pub fn execute(self, ctx: &mut ExecutionCtx) -> VortexResult { + match self { + v @ Validity::NonNullable | v @ Validity::AllValid | v @ Validity::AllInvalid => Ok(v), + Validity::Array(a) => Ok(Validity::Array(a.execute::(ctx)?.into_array())), + } + } +} + impl Validity { /// The [`DType`] of the underlying validity array (if it exists). pub const DTYPE: DType = DType::Bool(Nullability::NonNullable); diff --git a/vortex-duckdb/src/exporter/decimal.rs b/vortex-duckdb/src/exporter/decimal.rs index cb71089cc15..b2bfa1e977d 100644 --- a/vortex-duckdb/src/exporter/decimal.rs +++ b/vortex-duckdb/src/exporter/decimal.rs @@ -47,9 +47,9 @@ pub(crate) fn new_exporter( decimal_dtype, values_type, values, - nullability, } = array.into_parts(); let dest_values_type = precision_to_duckdb_storage_size(&decimal_dtype)?; + let nullability = validity.nullability(); let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() { diff --git a/vortex-duckdb/src/exporter/list_view.rs b/vortex-duckdb/src/exporter/list_view.rs index d529df82eaa..3a39705475d 100644 --- a/vortex-duckdb/src/exporter/list_view.rs +++ b/vortex-duckdb/src/exporter/list_view.rs @@ -53,10 +53,10 @@ pub(crate) fn new_exporter( offsets, sizes, validity, - nullability, } = array.into_parts(); // Cache an `elements` vector up front so that future exports can reference it. let num_elements = elements.len(); + let nullability = validity.nullability(); let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() {