Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 16 additions & 4 deletions benchmarks-website-v2/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,21 @@ async function refresh() {
}
if (!groups[group]) continue;

const [query, series] = b.name.split("/");
const seriesName = rename(series || "default");
const chartName = formatQuery(query);
// Random access names have the form: random-access/{dataset}/{pattern}/{format}
// Historical random access names: random-access/{format}
// Other benchmarks use: {query}/{series}
let seriesName, chartName;
const parts = b.name.split("/");
if (group === "Random Access" && parts.length === 4) {
chartName = `${parts[1]}/${parts[2]}`.toUpperCase().replace(/[_-]/g, " ");
seriesName = rename(parts[3] || "default");
} else if (group === "Random Access" && parts.length === 2) {
chartName = "RANDOM ACCESS";
seriesName = rename(parts[1] || "default");
} else {
seriesName = rename(parts[1] || "default");
chartName = formatQuery(parts[0]);
}
if (chartName.includes("PARQUET-UNC")) continue;

// Skip throughput metrics (keep only time/size)
Expand All @@ -286,7 +298,7 @@ async function refresh() {
else unit = "ns";
}

const sortPos = query.match(/q(\d+)$/i)?.[1]
const sortPos = parts[0].match(/q(\d+)$/i)?.[1]
? parseInt(RegExp.$1, 10)
: 0;
const idx = commitIdx.get(commit.id);
Expand Down
3 changes: 3 additions & 0 deletions benchmarks-website-v2/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ export const ENGINE_RENAMES = {
"duckdb:duckdb": "duckdb:duckdb",
"duckdb:vortex-compact": "duckdb:vortex-compact",
"vortex-tokio-local-disk": "vortex-nvme",
"vortex-compact-tokio-local-disk": "vortex-compact-nvme",
"lance-tokio-local-disk": "lance-nvme",
"parquet-tokio-local-disk": "parquet-nvme",
lance: "lance",
Expand All @@ -89,6 +90,7 @@ const BESPOKE_CONFIGS = [
name: "Random Access",
renamedDatasets: {
"vortex-tokio-local-disk": "vortex-nvme",
"vortex-compact-tokio-local-disk": "vortex-compact-nvme",
"lance-tokio-local-disk": "lance-nvme",
"parquet-tokio-local-disk": "parquet-nvme",
},
Expand Down Expand Up @@ -242,6 +244,7 @@ export const ENGINE_LABELS = {
// Series color map
export const SERIES_COLOR_MAP = {
"vortex-nvme": "#19a508",
"vortex-compact-nvme": "#15850a",
"parquet-nvme": "#ef7f1d",
"lance-nvme": "#3B82F6",
"datafusion:arrow": "#7a27b1",
Expand Down
84 changes: 62 additions & 22 deletions benchmarks/lance-bench/src/random_access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,19 @@ use lance::dataset::WriteParams;
use lance_encoding::version::LanceFileVersion;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use vortex_bench::Format;
use vortex_bench::datasets::taxi_data::taxi_data_parquet;
use vortex_bench::datasets::feature_vectors;
use vortex_bench::datasets::nested_lists;
use vortex_bench::datasets::nested_structs;
use vortex_bench::datasets::taxi_data;
use vortex_bench::idempotent_async;
use vortex_bench::random_access::RandomAccessor;
use vortex_bench::random_access::data_path;

pub async fn taxi_data_lance() -> anyhow::Result<PathBuf> {
idempotent_async("taxi/taxi.lance", |output_fname| async move {
let parquet_path = taxi_data_parquet().await?;

/// Convert a parquet file to lance format.
///
/// Uses `idempotent_async` to skip conversion if the output already exists.
async fn parquet_to_lance_file(parquet_path: PathBuf, lance_path: &str) -> anyhow::Result<PathBuf> {
idempotent_async(lance_path, |output_fname| async move {
let file = File::open(&parquet_path)?;
let builder = ParquetRecordBatchReaderBuilder::try_new(file)?;
let reader = builder.build()?;
Expand All @@ -39,13 +44,58 @@ pub async fn taxi_data_lance() -> anyhow::Result<PathBuf> {
.await
}

pub async fn taxi_data_lance() -> anyhow::Result<PathBuf> {
let parquet_path = taxi_data::taxi_data_parquet().await?;
parquet_to_lance_file(parquet_path, &data_path(taxi_data::DATASET, Format::Lance)).await
}

pub async fn feature_vectors_lance() -> anyhow::Result<PathBuf> {
let parquet_path = feature_vectors::feature_vectors_parquet().await?;
parquet_to_lance_file(
parquet_path,
&data_path(feature_vectors::DATASET, Format::Lance),
)
.await
}

pub async fn nested_lists_lance() -> anyhow::Result<PathBuf> {
let parquet_path = nested_lists::nested_lists_parquet().await?;
parquet_to_lance_file(
parquet_path,
&data_path(nested_lists::DATASET, Format::Lance),
)
.await
}

pub async fn nested_structs_lance() -> anyhow::Result<PathBuf> {
let parquet_path = nested_structs::nested_structs_parquet().await?;
parquet_to_lance_file(
parquet_path,
&data_path(nested_structs::DATASET, Format::Lance),
)
.await
}

/// Random accessor for Lance format files.
///
/// The dataset handle is opened at construction time and reused across `take()` calls.
pub struct LanceRandomAccessor {
path: PathBuf,
name: String,
dataset: Dataset,
}

impl LanceRandomAccessor {
pub fn new(path: PathBuf) -> Self {
Self { path }
/// Open a Lance dataset and return a ready-to-use accessor.
pub async fn open(path: PathBuf, name: impl Into<String>) -> anyhow::Result<Self> {
let dataset = Dataset::open(
path.to_str()
.ok_or_else(|| anyhow!("Invalid dataset path"))?,
)
.await?;
Ok(Self {
name: name.into(),
dataset,
})
}
}

Expand All @@ -56,22 +106,12 @@ impl RandomAccessor for LanceRandomAccessor {
}

fn name(&self) -> &str {
"random-access/lance-tokio-local-disk"
}

fn path(&self) -> &PathBuf {
&self.path
&self.name
}

async fn take(&self, indices: Vec<u64>) -> anyhow::Result<usize> {
let dataset = Dataset::open(
self.path
.to_str()
.ok_or_else(|| anyhow!("Invalid dataset path"))?,
)
.await?;
let projection = ProjectionRequest::from_schema(dataset.schema().clone()); // All columns.
let result = dataset.take(indices.as_slice(), projection).await?;
async fn take(&self, indices: &[u64]) -> anyhow::Result<usize> {
let projection = ProjectionRequest::from_schema(self.dataset.schema().clone());
let result = self.dataset.take(indices, projection).await?;
Ok(result.num_rows())
}
}
3 changes: 3 additions & 0 deletions benchmarks/random-access-bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ publish = false

[dependencies]
anyhow = { workspace = true }
async-trait = { workspace = true }
clap = { workspace = true, features = ["derive"] }
indicatif = { workspace = true }
lance-bench = { path = "../lance-bench", optional = true }
rand = { workspace = true }
rand_distr = { workspace = true }
tokio = { workspace = true, features = ["full"] }
vortex = { workspace = true }
vortex-bench = { workspace = true }
Expand Down
Loading
Loading