From a8eceb9b8df8b5cdc62a967d017c986270140e95 Mon Sep 17 00:00:00 2001 From: Karol Gotkowski Date: Thu, 19 Feb 2026 10:57:00 +0100 Subject: [PATCH 1/4] Adds mmap support for store containers Enables memory mapping for DictStore and EmbedStore containers to improve read access performance. This enhancement allows opening store container files (b2z, b2d, b2e) in read-only mode using memory mapping, potentially reducing memory usage and improving read speeds. It introduces an optional `mmap_mode` parameter with "r" as the only supported value. Also, adds validation to ensure mmap_mode is only "r" or None, and that it is only used when mode is "r". --- src/blosc2/dict_store.py | 69 ++++++++++++++++++++++++++++++++++----- src/blosc2/embed_store.py | 12 ++++++- tests/test_dict_store.py | 27 ++++++++++++++- tests/test_embed_store.py | 21 +++++++++++- tests/test_tree_store.py | 23 ++++++++++++- 5 files changed, 140 insertions(+), 12 deletions(-) diff --git a/src/blosc2/dict_store.py b/src/blosc2/dict_store.py index b4281615c..8f3244be1 100644 --- a/src/blosc2/dict_store.py +++ b/src/blosc2/dict_store.py @@ -44,6 +44,9 @@ class DictStore: will be treated as a Blosc2 zip format (B2ZIP). mode : str, optional File mode ('r', 'w', 'a'). Default is 'a'. + mmap_mode : str or None, optional + Memory mapping mode for read access. For now, only ``"r"`` is supported, + and only when ``mode="r"``. Default is None. tmpdir : str or None, optional Temporary directory to use when working with ".b2z" files. If None, a system temporary directory will be managed. Default is None. @@ -97,6 +100,8 @@ def __init__( dparams: blosc2.DParams | None = None, storage: blosc2.Storage | None = None, threshold: int | None = 2**13, + *, + mmap_mode: str | None = None, ): """ See :class:`DictStore` for full documentation of parameters. @@ -106,8 +111,13 @@ def __init__( raise ValueError(f"localpath must have a .b2z or .b2d extension; you passed: {self.localpath}") if mode not in ("r", "w", "a"): raise ValueError("For DictStore containers, mode must be 'r', 'w', or 'a'") + if mmap_mode not in (None, "r"): + raise ValueError("For DictStore containers, mmap_mode must be None or 'r'") + if mmap_mode == "r" and mode != "r": + raise ValueError("For DictStore containers, mmap_mode='r' requires mode='r'") self.mode = mode + self.mmap_mode = mmap_mode self.threshold = threshold self.cparams = cparams or blosc2.CParams() self.dparams = dparams or blosc2.DParams() @@ -153,7 +163,13 @@ def _init_read_mode(self, dparams: blosc2.DParams | None = None): if "embed.b2e" not in self.offsets: raise FileNotFoundError("Embed file embed.b2e not found in store.") estore_offset = self.offsets["embed.b2e"]["offset"] - schunk = blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=estore_offset, dparams=dparams) + schunk = blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=estore_offset, + mmap_mode=self.mmap_mode, + dparams=dparams, + ) for filepath in self.offsets: if filepath.endswith((".b2nd", ".b2f")): key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4] @@ -161,7 +177,13 @@ def _init_read_mode(self, dparams: blosc2.DParams | None = None): else: # .b2d if not os.path.isdir(self.localpath): raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.") - schunk = blosc2.blosc2_ext.open(self.estore_path, mode="r", offset=0, dparams=dparams) + schunk = blosc2.blosc2_ext.open( + self.estore_path, + mode="r", + offset=0, + mmap_mode=self.mmap_mode, + dparams=dparams, + ) self._update_map_tree() self._estore = EmbedStore(_from_schunk=schunk) @@ -267,11 +289,22 @@ def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array: filepath = self.map_tree[key] if filepath in self.offsets: offset = self.offsets[filepath]["offset"] - return blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams) + return blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, + ) else: urlpath = os.path.join(self.working_dir, filepath) if os.path.exists(urlpath): - return blosc2.open(urlpath, mode="r" if self.mode == "r" else "a", dparams=self.dparams) + return blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ) else: raise KeyError(f"File for key '{key}' not found in offsets or temporary directory.") @@ -332,11 +365,20 @@ def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]: if filepath in self.offsets: offset = self.offsets[filepath]["offset"] yield blosc2.blosc2_ext.open( - self.b2z_path, mode="r", offset=offset, dparams=self.dparams + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, ) else: urlpath = os.path.join(self.working_dir, filepath) - yield blosc2.open(urlpath, mode="r" if self.mode == "r" else "a", dparams=self.dparams) + yield blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ) elif key in self._estore: yield self._estore[key] @@ -352,10 +394,21 @@ def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]: if self.is_zip_store: if filepath in self.offsets: offset = self.offsets[filepath]["offset"] - yield key, blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset) + yield key, blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, + ) else: urlpath = os.path.join(self.working_dir, filepath) - yield key, blosc2.open(urlpath, mode="r" if self.mode == "r" else "a") + yield key, blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ) elif key in self._estore: yield key, self._estore[key] diff --git a/src/blosc2/embed_store.py b/src/blosc2/embed_store.py index 7d6316fe7..757c02455 100644 --- a/src/blosc2/embed_store.py +++ b/src/blosc2/embed_store.py @@ -36,6 +36,9 @@ class EmbedStore: deserialized later using the :func:`blosc2.from_cframe` function. mode : str, optional File mode ('r', 'w', 'a'). Default is 'w'. + mmap_mode : str or None, optional + Memory mapping mode for read access. For now, only ``"r"`` is supported, + and only when ``mode="r"``. Default is None. cparams : dict or None, optional Compression parameters for nodes and the embed store itself. Default is None, which uses the default Blosc2 parameters. @@ -76,6 +79,8 @@ def __init__( storage: blosc2.Storage | None = None, chunksize: int | None = 2**13, _from_schunk: SChunk | None = None, + *, + mmap_mode: str | None = None, ): """Initialize EmbedStore.""" @@ -84,6 +89,11 @@ def __init__( # Let's use the SChunk store by default and continue experimenting. self._schunk_store = True # put this to False to use an NDArray instead of a SChunk self.urlpath = urlpath + if mmap_mode not in (None, "r"): + raise ValueError("For EmbedStore containers, mmap_mode must be None or 'r'") + if mmap_mode == "r" and mode != "r": + raise ValueError("For EmbedStore containers, mmap_mode='r' requires mode='r'") + self.mmap_mode = mmap_mode if _from_schunk is not None: self.cparams = _from_schunk.cparams @@ -108,7 +118,7 @@ def __init__( self.storage = storage if mode in ("r", "a") and urlpath: - self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0) + self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0, mmap_mode=mmap_mode) self._load_metadata() return diff --git a/tests/test_dict_store.py b/tests/test_dict_store.py index 8ba844b7e..2510194db 100644 --- a/tests/test_dict_store.py +++ b/tests/test_dict_store.py @@ -445,7 +445,32 @@ def test_open_context_manager(populated_dict_store): dstore_fixture.close() # Test opening via blosc2.open as a context manager - with blosc2.open(path, mode="r") as dstore: + with blosc2.open(path, mode="r", mmap_mode="r") as dstore: assert isinstance(dstore, DictStore) assert "/node1" in dstore assert np.array_equal(dstore["/node1"][:], np.array([1, 2, 3])) + + +@pytest.mark.parametrize("storage_type", ["b2d", "b2z"]) +def test_mmap_mode_read_access(storage_type, tmp_path): + path = tmp_path / f"test_mmap_dstore.{storage_type}" + external_path = tmp_path / "external_node.b2nd" + + with DictStore(str(path), mode="w", threshold=None) as dstore: + dstore["/embedded"] = np.arange(16) + external = blosc2.arange(32, urlpath=str(external_path), mode="w") + dstore["/external"] = external + + with DictStore(str(path), mode="r", mmap_mode="r") as dstore: + assert np.array_equal(dstore["/embedded"][3:7], np.arange(3, 7)) + assert np.array_equal(dstore["/external"][11:15], np.arange(11, 15)) + + +def test_mmap_mode_validation(tmp_path): + path = tmp_path / "test_mmap_validation.b2z" + + with pytest.raises(ValueError, match="mmap_mode must be None or 'r'"): + DictStore(str(path), mode="r", mmap_mode="r+") + + with pytest.raises(ValueError, match="mmap_mode='r' requires mode='r'"): + DictStore(str(path), mode="a", mmap_mode="r") diff --git a/tests/test_embed_store.py b/tests/test_embed_store.py index a29d5b20b..719505a51 100644 --- a/tests/test_embed_store.py +++ b/tests/test_embed_store.py @@ -213,7 +213,26 @@ def test_open_context_manager(cleanup_files): estore["/node1"] = np.arange(10) # Test opening via blosc2.open as a context manager - with blosc2.open(path, mode="r") as estore_read: + with blosc2.open(path, mode="r", mmap_mode="r") as estore_read: assert isinstance(estore_read, blosc2.EmbedStore) assert "/node1" in estore_read assert np.array_equal(estore_read["/node1"][:], np.arange(10)) + + +def test_mmap_mode_read_access(cleanup_files): + path = "test_embed_mmap_read.b2e" + cleanup_files.append(path) + + estore = blosc2.EmbedStore(path, mode="w") + estore["/node1"] = np.arange(20) + + estore_read = blosc2.EmbedStore(path, mode="r", mmap_mode="r") + assert np.array_equal(estore_read["/node1"][5:11], np.arange(5, 11)) + + +def test_mmap_mode_validation(): + with pytest.raises(ValueError, match="mmap_mode must be None or 'r'"): + blosc2.EmbedStore(urlpath="test_invalid.b2e", mode="r", mmap_mode="r+") + + with pytest.raises(ValueError, match="mmap_mode='r' requires mode='r'"): + blosc2.EmbedStore(urlpath="test_invalid.b2e", mode="a", mmap_mode="r") diff --git a/tests/test_tree_store.py b/tests/test_tree_store.py index 09f86b172..9f101594c 100644 --- a/tests/test_tree_store.py +++ b/tests/test_tree_store.py @@ -930,7 +930,28 @@ def test_open_context_manager(populated_tree_store): tstore_fixture.close() # Test opening via blosc2.open as a context manager - with blosc2.open(path, mode="r") as tstore: + with blosc2.open(path, mode="r", mmap_mode="r") as tstore: assert isinstance(tstore, TreeStore) assert "/child0/data" in tstore assert np.array_equal(tstore["/child0/data"][:], np.array([1, 2, 3])) + + +@pytest.mark.parametrize("storage_type", ["b2d", "b2z"]) +def test_mmap_mode_read_access(storage_type, tmp_path): + path = tmp_path / f"test_tstore_mmap.{storage_type}" + + with TreeStore(str(path), mode="w") as tstore: + tstore["/group/node"] = np.arange(12) + + with TreeStore(str(path), mode="r", mmap_mode="r") as tstore: + assert np.array_equal(tstore["/group/node"][4:9], np.arange(4, 9)) + + +def test_mmap_mode_validation(tmp_path): + path = tmp_path / "test_tstore_mmap_validation.b2z" + + with pytest.raises(ValueError, match="mmap_mode must be None or 'r'"): + TreeStore(str(path), mode="r", mmap_mode="c") + + with pytest.raises(ValueError, match="mmap_mode='r' requires mode='r'"): + TreeStore(str(path), mode="a", mmap_mode="r") From ec5ea9efd47ad701ca262424584953ae06dd9b53 Mon Sep 17 00:00:00 2001 From: Karol Gotkowski Date: Thu, 19 Feb 2026 11:10:25 +0100 Subject: [PATCH 2/4] Document initial store-container mmap support DictStore/TreeStore/EmbedStore docs updated | note current limits (only "r", requires mode="r") | add release-notes entry --- RELEASE_NOTES.md | 5 +++++ doc/reference/dict_store.rst | 12 ++++++++++++ doc/reference/embed_store.rst | 8 ++++++++ doc/reference/tree_store.rst | 11 +++++++++++ 4 files changed, 36 insertions(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index d3d0c9645..8eb1cc65a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -4,6 +4,11 @@ XXX version-specific blurb XXX +- Add initial read-only mmap support for store containers: + `DictStore`, `TreeStore`, and `EmbedStore` now accept `mmap_mode="r"` + when opened with `mode="r"` (including via `blosc2.open` for `.b2d`, + `.b2z`, and `.b2e`). + ## Changes from 4.0.0-b1 to 4.0.0 - On Windows, miniexpr is temporarily disabled for integral outputs and mixed-dtype expressions. diff --git a/doc/reference/dict_store.rst b/doc/reference/dict_store.rst index dd0ba6cc1..612d53e5d 100644 --- a/doc/reference/dict_store.rst +++ b/doc/reference/dict_store.rst @@ -14,6 +14,10 @@ DictStore lets you store and retrieve arrays by string keys (paths like ``"/dir/ Supported values include ``blosc2.NDArray``, ``blosc2.SChunk`` and ``blosc2.C2Array`` (as well as ``numpy.ndarray``, which is converted to NDArray). Small arrays (below a configurable compression‑size threshold) and in‑memory objects are kept inside the embedded store; larger or explicitly external arrays live as regular ``.b2nd`` (NDArray) or ``.b2f`` (SChunk) files. ``C2Array`` objects are always stored in the embedded store. You can mix all types seamlessly and use the usual mapping methods (``__getitem__``, ``__setitem__``, ``keys()``, ``items()``...). +For read-only workloads, DictStore supports memory-mapped opens via +``mmap_mode="r"`` (constructor or :func:`blosc2.open`). This works for both +``.b2d`` and ``.b2z`` containers. + Quick example ------------- @@ -34,6 +38,14 @@ Quick example print(sorted(dstore.keys())) # ['/dir1/node3', '/node1', '/node2'] print(dstore["/node1"][:]) # [1 2 3] + # Reopen in read-only mmap mode + with blosc2.open("my_dstore.b2z", mode="r", mmap_mode="r") as dstore_mmap: + print(dstore_mmap["/dir1/node3"][1:3]) + +.. note:: + For store containers, only ``mmap_mode="r"`` is currently supported, and it + requires ``mode="r"``. + .. currentmodule:: blosc2 .. autoclass:: DictStore diff --git a/doc/reference/embed_store.rst b/doc/reference/embed_store.rst index 261541a51..46f8709b3 100644 --- a/doc/reference/embed_store.rst +++ b/doc/reference/embed_store.rst @@ -16,6 +16,9 @@ Important: Only remote ``C2Array`` objects are stored as lightweight references Typical use cases include bundling several small/medium arrays together, shipping datasets as one file, or creating a simple keyed store for heterogeneous array sources. +For read-only access, EmbedStore supports memory-mapped opens via +``mmap_mode="r"`` (constructor or :func:`blosc2.open`) for ``.b2e`` files. + Quickstart ---------- @@ -45,10 +48,15 @@ Quickstart estore = blosc2.open("example_estore.b2e", mode="r") print(list(estore.keys())) + # Reopen in read-only mmap mode + estore_mmap = blosc2.open("example_estore.b2e", mode="r", mmap_mode="r") + print(estore_mmap["/node2"][:]) + .. note:: - Embedded arrays (NumPy, NDArray, and SChunk) increase the size of the ``.b2e`` container. - Remote ``C2Array`` nodes only store lightweight references; reading them requires access to the remote source. NDArrays coming from external ``.b2nd`` files are embedded into the store. - When retrieving, ``estore[key]`` may return either an ``NDArray`` or an ``SChunk`` depending on what was originally stored; deserialization uses :func:`blosc2.from_cframe`. + - For store containers, only ``mmap_mode="r"`` is currently supported, and it requires ``mode="r"``. .. currentmodule:: blosc2 diff --git a/doc/reference/tree_store.rst b/doc/reference/tree_store.rst index 29ac41aea..82e31058e 100644 --- a/doc/reference/tree_store.rst +++ b/doc/reference/tree_store.rst @@ -23,6 +23,9 @@ store, while larger arrays or explicitly external arrays are stored as separate ``.b2nd`` files. You can traverse your dataset hierarchically with ``walk()``, query children/descendants, or focus on a subtree view with ``get_subtree()``. +TreeStore also supports read-only memory-mapped opens via ``mmap_mode="r"`` +(constructor or :func:`blosc2.open`) for both ``.b2d`` and ``.b2z`` formats. + Quick example ------------- @@ -51,6 +54,14 @@ Quick example with blosc2.open("my_tree.b2z", mode="r") as tstore: print(sorted(tstore.keys())) + # Reopen in read-only mmap mode + with blosc2.open("my_tree.b2z", mode="r", mmap_mode="r") as tstore_mmap: + print(tstore_mmap["/child0/leaf1"][0:2]) + +.. note:: + For store containers, only ``mmap_mode="r"`` is currently supported, and it + requires ``mode="r"``. + .. currentmodule:: blosc2 .. autoclass:: TreeStore From 091f3407e6cd5208b1ad7d547214f7869caa662b Mon Sep 17 00:00:00 2001 From: Karol Gotkowski Date: Thu, 19 Feb 2026 11:58:55 +0100 Subject: [PATCH 3/4] Apply ruff-format output after store mmap changes Accept formatter-only tuple-yield rewrite in DictStore.items(); no functional change. --- src/blosc2/dict_store.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/blosc2/dict_store.py b/src/blosc2/dict_store.py index 8f3244be1..b1c7ad9e8 100644 --- a/src/blosc2/dict_store.py +++ b/src/blosc2/dict_store.py @@ -394,20 +394,26 @@ def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]: if self.is_zip_store: if filepath in self.offsets: offset = self.offsets[filepath]["offset"] - yield key, blosc2.blosc2_ext.open( - self.b2z_path, - mode="r", - offset=offset, - mmap_mode=self.mmap_mode, - dparams=self.dparams, + yield ( + key, + blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, + ), ) else: urlpath = os.path.join(self.working_dir, filepath) - yield key, blosc2.open( - urlpath, - mode="r" if self.mode == "r" else "a", - mmap_mode=self.mmap_mode if self.mode == "r" else None, - dparams=self.dparams, + yield ( + key, + blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ), ) elif key in self._estore: yield key, self._estore[key] From 7e4ff8483f0b62faa0f69aeedb8b4283db90b639 Mon Sep 17 00:00:00 2001 From: Karol Gotkowski Date: Fri, 20 Feb 2026 10:42:02 +0100 Subject: [PATCH 4/4] Adds mmap read benchmark Introduces a benchmark script to compare read performance between regular and memory-mapped read paths for different store containers (EmbedStore, DictStore, TreeStore). This allows for evaluating the impact of mmap on read throughput and latency under various scenarios, including warm and cold cache conditions. The benchmark supports different data layouts (embedded, external, mixed) and generates detailed metrics such as open time, read time, throughput, and speedup ratios. --- bench/README_mmap_store_read.md | 119 +++++++ bench/mmap_store_read.py | 579 ++++++++++++++++++++++++++++++++ 2 files changed, 698 insertions(+) create mode 100644 bench/README_mmap_store_read.md create mode 100644 bench/mmap_store_read.py diff --git a/bench/README_mmap_store_read.md b/bench/README_mmap_store_read.md new file mode 100644 index 000000000..07354b595 --- /dev/null +++ b/bench/README_mmap_store_read.md @@ -0,0 +1,119 @@ +# mmap Store Read Benchmark + +This benchmark compares read performance between: + +- `mode="r", mmap_mode=None` (regular read path) +- `mode="r", mmap_mode="r"` (memory-mapped read path) + +for: + +- `EmbedStore` (`.b2e`) +- `DictStore` (`.b2d`, `.b2z`) +- `TreeStore` (`.b2d`, `.b2z`) + +Script: `bench/mmap_store_read.py` + +## What It Measures + +For each selected combination of container/storage/layout/scenario, it reports: + +- open time median +- read time median +- total time median +- total p10 / p90 +- effective throughput (MiB/s) +- speedup ratio (`regular / mmap`) printed to stdout + +## Scenarios + +### 1. `warm_full_scan` +Reads every node completely (`node[:]`) in-process, repeatedly. + +Use this for steady-state throughput after the OS cache is warm. + +### 2. `warm_random_slices` +Reads random slices from each node (`node[start:start+slice_len]`) in-process, repeatedly. + +Use this for latency-sensitive random access when files are likely warm. + +### 3. `cold_full_scan_drop_caches` +Before each run: calls Linux `drop_caches`, then reads every node completely. + +Use this for first-touch behavior with minimal page-cache carryover. + +### 4. `cold_random_slices_drop_caches` +Before each run: calls Linux `drop_caches`, then performs random-slice reads. + +Use this for first-touch random-access behavior. + +## Dataset Layouts + +- `embedded`: all values stored inside the container payload. +- `external`: for `DictStore` / `TreeStore`, all values are written as external `.b2nd` nodes and referenced. +- `mixed`: alternating embedded/external nodes (for `DictStore` / `TreeStore`). + +`EmbedStore` is benchmarked with `embedded` layout only (by design of this benchmark). + +## Usage Examples + +### Quick warm benchmark across all containers + +```bash +python bench/mmap_store_read.py \ + --scenario warm_full_scan warm_random_slices \ + --runs 7 --n-nodes 128 --node-len 100000 +``` + +### Cold benchmark (Linux + root required) + +```bash +sudo python bench/mmap_store_read.py \ + --scenario cold_full_scan_drop_caches cold_random_slices_drop_caches \ + --runs 5 --drop-caches-value 3 +``` + +### Focused TreeStore benchmark with JSON output + +```bash +sudo python bench/mmap_store_read.py \ + --container tree --storage b2d b2z --layout embedded external mixed \ + --scenario warm_full_scan cold_full_scan_drop_caches \ + --runs 9 --json-out bench_mmap_tree_results.json +``` + +## Important Caveats + +1. Linux root-only for cold scenarios +- Cold scenarios use `/proc/sys/vm/drop_caches` and require root. +- They are intentionally disabled on non-Linux platforms. + +2. `drop_caches` is system-wide and intrusive +- It affects the whole machine, not only this benchmark process. +- Avoid running it on shared or production systems. + +3. mmap is still page-cache based +- `mmap` does not bypass OS caching; it changes IO path and access behavior. +- Warm-cache and cold-cache results can differ substantially. + +4. Compression/decompression may dominate +- If decompression CPU cost is dominant, mmap gains can be small even when IO path improves. +- Compare both full-scan and random-slice scenarios before concluding. + +5. Storage format impacts behavior +- `.b2d` and `.b2z` can behave differently due to layout and access locality. +- Keep format fixed when making A/B claims. + +6. Benchmark realism +- Use representative node sizes, counts, and slice patterns from your target workload. +- Defaults are useful for quick checks, not a universal production proxy. + +7. Variance is expected +- Background IO, filesystem state, and thermal/power conditions affect runs. +- Use medians and p10/p90 (already reported) rather than single-run minima. + +## Recommended Evaluation Workflow + +1. Run warm scenarios first to estimate steady-state behavior. +2. Run cold scenarios (as root) to estimate first-touch behavior. +3. Compare by container + storage format + layout separately. +4. Validate improvements hold for your real slice patterns and dataset scales. diff --git a/bench/mmap_store_read.py b/bench/mmap_store_read.py new file mode 100644 index 000000000..50a30b15d --- /dev/null +++ b/bench/mmap_store_read.py @@ -0,0 +1,579 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +""" +Benchmark mmap read-mode vs regular read-mode for EmbedStore, DictStore and TreeStore. + +This script creates deterministic datasets, then compares: + - mode="r", mmap_mode=None + - mode="r", mmap_mode="r" + +It supports multiple read scenarios: + * warm_full_scan: full reads with warm OS cache. + * warm_random_slices: random small slices with warm OS cache. + * cold_full_scan_drop_caches: full reads after dropping Linux page cache. + * cold_random_slices_drop_caches: random small slices after dropping Linux page cache. + +For cold scenarios, the cache drop mechanism relies on Linux +(/proc/sys/vm/drop_caches) and root privileges. +""" + +from __future__ import annotations + +import argparse +import json +import math +import os +import platform +import time +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + +import numpy as np + +import blosc2 + +SCENARIOS = { + "warm_full_scan": { + "pattern": "full", + "drop_caches": False, + "description": "Read all nodes completely in-process with warm cache.", + }, + "warm_random_slices": { + "pattern": "random", + "drop_caches": False, + "description": "Read random slices from each node in-process with warm cache.", + }, + "cold_full_scan_drop_caches": { + "pattern": "full", + "drop_caches": True, + "description": "Read all nodes after dropping Linux page cache before each run.", + }, + "cold_random_slices_drop_caches": { + "pattern": "random", + "drop_caches": True, + "description": "Read random slices after dropping Linux page cache before each run.", + }, +} + + +@dataclass +class RunMetrics: + open_s: float + read_s: float + total_s: float + bytes_read: int + sink: float + + +@dataclass +class SummaryMetrics: + open_median_s: float + read_median_s: float + total_median_s: float + total_p10_s: float + total_p90_s: float + throughput_mib_s: float + + +@dataclass +class ResultRow: + container: str + storage: str + layout: str + scenario: str + mode: str + open_median_s: float + read_median_s: float + total_median_s: float + total_p10_s: float + total_p90_s: float + throughput_mib_s: float + + +def q(values: list[float], quantile: float) -> float: + return float(np.quantile(np.asarray(values, dtype=np.float64), quantile)) + + +def summarize(metrics: list[RunMetrics]) -> SummaryMetrics: + open_vals = [m.open_s for m in metrics] + read_vals = [m.read_s for m in metrics] + total_vals = [m.total_s for m in metrics] + bytes_read = metrics[0].bytes_read if metrics else 0 + total_median = float(np.median(total_vals)) + mib = bytes_read / 2**20 + throughput = mib / total_median if total_median > 0 else math.inf + return SummaryMetrics( + open_median_s=float(np.median(open_vals)), + read_median_s=float(np.median(read_vals)), + total_median_s=total_median, + total_p10_s=q(total_vals, 0.1), + total_p90_s=q(total_vals, 0.9), + throughput_mib_s=throughput, + ) + + +def drop_linux_page_cache(drop_caches_value: int = 3) -> None: + if platform.system() != "Linux": + raise RuntimeError("drop_caches is supported only on Linux") + if os.geteuid() != 0: + raise PermissionError("drop_caches requires root privileges") + if drop_caches_value not in (1, 2, 3): + raise ValueError("drop_caches_value must be 1, 2, or 3") + + os.sync() + with open("/proc/sys/vm/drop_caches", "w", encoding="ascii") as f: + f.write(str(drop_caches_value)) + + +def container_cls(container: str): + if container == "embed": + return blosc2.EmbedStore + if container == "dict": + return blosc2.DictStore + if container == "tree": + return blosc2.TreeStore + raise ValueError(f"Unknown container: {container}") + + +def valid_storage_values(container: str) -> tuple[str, ...]: + if container == "embed": + return ("b2e",) + return ("b2d", "b2z") + + +def store_path(dataset_root: Path, container: str, storage: str, layout: str) -> Path: + base = dataset_root / f"{container}_{layout}" + if storage == "b2e": + return base.with_suffix(".b2e") + if storage == "b2d": + return base.with_suffix(".b2d") + if storage == "b2z": + return base.with_suffix(".b2z") + raise ValueError(f"Unknown storage format: {storage}") + + +def external_data_dir(dataset_root: Path, container: str, storage: str, layout: str) -> Path: + return dataset_root / f"external_{container}_{storage}_{layout}" + + +def node_key(i: int) -> str: + return f"/group_{i % 8:02d}/node_{i:05d}" + + +def node_array(i: int, node_len: int, dtype: np.dtype) -> np.ndarray: + start = i * node_len + stop = start + node_len + return np.arange(start, stop, dtype=dtype) + + +def is_external_node(i: int, layout: str) -> bool: + if layout == "embedded": + return False + if layout == "external": + return True + if layout == "mixed": + return (i % 2) == 1 + raise ValueError(f"Unknown layout: {layout}") + + +def cleanup_path(path: Path) -> None: + blosc2.remove_urlpath(str(path)) + + +def create_dataset( + *, + dataset_root: Path, + container: str, + storage: str, + layout: str, + n_nodes: int, + node_len: int, + dtype: np.dtype, + clevel: int, + codec: blosc2.Codec, +) -> Path: + if container == "embed" and layout != "embedded": + raise ValueError("EmbedStore supports only layout=embedded in this benchmark") + + s_path = store_path(dataset_root, container, storage, layout) + ext_dir = external_data_dir(dataset_root, container, storage, layout) + + cleanup_path(s_path) + cleanup_path(ext_dir) + dataset_root.mkdir(parents=True, exist_ok=True) + ext_dir.mkdir(parents=True, exist_ok=True) + + cparams = blosc2.CParams(clevel=clevel, codec=codec) + dparams = blosc2.DParams(nthreads=blosc2.nthreads) + + if container == "embed": + with blosc2.EmbedStore(urlpath=str(s_path), mode="w", cparams=cparams, dparams=dparams) as store: + for i in range(n_nodes): + store[node_key(i)] = node_array(i, node_len, dtype) + return s_path + + cls = container_cls(container) + with cls(str(s_path), mode="w", threshold=None, cparams=cparams, dparams=dparams) as store: + for i in range(n_nodes): + key = node_key(i) + if is_external_node(i, layout): + epath = ext_dir / f"node_{i:05d}.b2nd" + arr = blosc2.asarray( + node_array(i, node_len, dtype), + urlpath=str(epath), + mode="w", + cparams=cparams, + dparams=dparams, + ) + store[key] = arr + else: + store[key] = node_array(i, node_len, dtype) + + return s_path + + +def open_store(container: str, path: Path, mmap_enabled: bool): + mmap_mode = "r" if mmap_enabled else None + dparams = blosc2.DParams(nthreads=blosc2.nthreads) + + if container == "embed": + return blosc2.EmbedStore(urlpath=str(path), mode="r", mmap_mode=mmap_mode, dparams=dparams) + if container == "dict": + return blosc2.DictStore(str(path), mode="r", mmap_mode=mmap_mode, dparams=dparams) + if container == "tree": + return blosc2.TreeStore(str(path), mode="r", mmap_mode=mmap_mode, dparams=dparams) + raise ValueError(f"Unknown container: {container}") + + +def workload_full_scan(store: Any, keys: list[str]) -> tuple[int, float]: + bytes_read = 0 + sink = 0.0 + for key in keys: + arr = store[key] + data = arr[:] + bytes_read += int(np.asarray(data).nbytes) + if len(data) > 0: + sink += float(np.asarray(data).reshape(-1)[0]) + return bytes_read, sink + + +def workload_random_slices( + store: Any, + keys: list[str], + *, + slice_len: int, + reads_per_node: int, + rng: np.random.Generator, +) -> tuple[int, float]: + bytes_read = 0 + sink = 0.0 + for key in keys: + arr = store[key] + n = len(arr) + if n <= 0: + continue + width = min(slice_len, n) + hi = n - width + for _ in range(reads_per_node): + start = int(rng.integers(0, hi + 1)) if hi > 0 else 0 + data = arr[start : start + width] + data_np = np.asarray(data) + bytes_read += int(data_np.nbytes) + sink += float(data_np.reshape(-1)[0]) + return bytes_read, sink + + +def run_once( + *, + container: str, + path: Path, + data_keys: list[str], + mmap_enabled: bool, + pattern: str, + slice_len: int, + reads_per_node: int, + seed: int, +) -> RunMetrics: + t0 = time.perf_counter() + store = open_store(container, path, mmap_enabled=mmap_enabled) + t1 = time.perf_counter() + + try: + # Use the known data-node keys generated during dataset creation. + # This avoids structural subtree keys in TreeStore (e.g. "/group_xx"), + # which are valid keys but do not represent leaf array payloads. + keys = data_keys + if pattern == "full": + bytes_read, sink = workload_full_scan(store, keys) + elif pattern == "random": + rng = np.random.default_rng(seed) + bytes_read, sink = workload_random_slices( + store, + keys, + slice_len=slice_len, + reads_per_node=reads_per_node, + rng=rng, + ) + else: + raise ValueError(f"Unknown pattern: {pattern}") + finally: + # DictStore/TreeStore expose close(); EmbedStore currently does not. + # Use close() when available and otherwise just release the reference. + close = getattr(store, "close", None) + if callable(close): + close() + del store + + t2 = time.perf_counter() + return RunMetrics(open_s=t1 - t0, read_s=t2 - t1, total_s=t2 - t0, bytes_read=bytes_read, sink=sink) + + +def run_mode( + *, + container: str, + path: Path, + data_keys: list[str], + mmap_enabled: bool, + pattern: str, + drop_caches: bool, + runs: int, + slice_len: int, + reads_per_node: int, + seed: int, + drop_caches_value: int, +) -> list[RunMetrics]: + metrics: list[RunMetrics] = [] + for i in range(runs): + if drop_caches: + drop_linux_page_cache(drop_caches_value=drop_caches_value) + metrics.append( + run_once( + container=container, + path=path, + data_keys=data_keys, + mmap_enabled=mmap_enabled, + pattern=pattern, + slice_len=slice_len, + reads_per_node=reads_per_node, + seed=seed + i, + ) + ) + return metrics + + +def print_scenario_header(name: str) -> None: + meta = SCENARIOS[name] + print(f" Scenario: {name}") + print(f" Description: {meta['description']}") + + +def print_compare(a: SummaryMetrics, b: SummaryMetrics) -> None: + speedup = a.total_median_s / b.total_median_s if b.total_median_s > 0 else math.inf + print( + " regular median={:.4f}s ({:.1f} MiB/s) | mmap median={:.4f}s ({:.1f} MiB/s) | speedup={:.3f}x".format( + a.total_median_s, + a.throughput_mib_s, + b.total_median_s, + b.throughput_mib_s, + speedup, + ) + ) + + +def parse_combinations( + containers: list[str], + storages: list[str], + layouts: list[str], +) -> list[tuple[str, str, str]]: + combos: list[tuple[str, str, str]] = [] + for container in containers: + valid_storages = valid_storage_values(container) + for storage in storages: + if storage not in valid_storages: + continue + for layout in layouts: + if container == "embed" and layout != "embedded": + continue + combos.append((container, storage, layout)) + return combos + + +def validate_args(args: argparse.Namespace) -> None: + if args.drop_caches_value not in (1, 2, 3): + raise ValueError("--drop-caches-value must be 1, 2, or 3") + + cold_selected = any(SCENARIOS[s]["drop_caches"] for s in args.scenarios) + if cold_selected: + if platform.system() != "Linux": + raise RuntimeError("cold/drop-cache scenarios are supported only on Linux") + if os.geteuid() != 0: + raise PermissionError("cold/drop-cache scenarios require root") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Benchmark mmap read mode for EmbedStore/DictStore/TreeStore", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--dataset-root", type=Path, default=Path("bench_mmap_store_data")) + parser.add_argument("--container", nargs="+", default=["embed", "dict", "tree"], choices=["embed", "dict", "tree"]) + parser.add_argument("--storage", nargs="+", default=["b2e", "b2d", "b2z"], choices=["b2e", "b2d", "b2z"]) + parser.add_argument("--layout", nargs="+", default=["embedded", "external", "mixed"], choices=["embedded", "external", "mixed"]) + parser.add_argument("--scenario", nargs="+", dest="scenarios", default=list(SCENARIOS), choices=list(SCENARIOS)) + + parser.add_argument("--n-nodes", type=int, default=128) + parser.add_argument("--node-len", type=int, default=100_000) + parser.add_argument("--dtype", type=str, default="float64") + parser.add_argument("--clevel", type=int, default=5) + parser.add_argument("--codec", type=str, default="ZSTD", choices=[c.name for c in blosc2.Codec]) + + parser.add_argument("--runs", type=int, default=7) + parser.add_argument("--slice-len", type=int, default=4096) + parser.add_argument("--reads-per-node", type=int, default=8) + parser.add_argument("--seed", type=int, default=12345) + parser.add_argument("--drop-caches-value", type=int, default=3) + + parser.add_argument("--json-out", type=Path, default=None, help="Optional JSON output path") + parser.add_argument("--keep-dataset", action="store_true", help="Keep generated benchmark files") + return parser + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + validate_args(args) + + dtype = np.dtype(args.dtype) + codec = blosc2.Codec[args.codec] + + combos = parse_combinations(args.container, args.storage, args.layout) + if not combos: + raise ValueError("No valid (container, storage, layout) combinations selected") + + rows: list[ResultRow] = [] + + print("mmap read benchmark") + print(f" dataset_root={args.dataset_root}") + print(f" runs={args.runs}, n_nodes={args.n_nodes}, node_len={args.node_len}, dtype={dtype}") + + for container, storage, layout in combos: + print(f"\nDataset: container={container}, storage={storage}, layout={layout}") + data_keys = [node_key(i) for i in range(args.n_nodes)] + s_path = create_dataset( + dataset_root=args.dataset_root, + container=container, + storage=storage, + layout=layout, + n_nodes=args.n_nodes, + node_len=args.node_len, + dtype=dtype, + clevel=args.clevel, + codec=codec, + ) + + for scenario in args.scenarios: + print_scenario_header(scenario) + meta = SCENARIOS[scenario] + regular_runs = run_mode( + container=container, + path=s_path, + data_keys=data_keys, + mmap_enabled=False, + pattern=meta["pattern"], + drop_caches=meta["drop_caches"], + runs=args.runs, + slice_len=args.slice_len, + reads_per_node=args.reads_per_node, + seed=args.seed, + drop_caches_value=args.drop_caches_value, + ) + mmap_runs = run_mode( + container=container, + path=s_path, + data_keys=data_keys, + mmap_enabled=True, + pattern=meta["pattern"], + drop_caches=meta["drop_caches"], + runs=args.runs, + slice_len=args.slice_len, + reads_per_node=args.reads_per_node, + seed=args.seed, + drop_caches_value=args.drop_caches_value, + ) + + regular_summary = summarize(regular_runs) + mmap_summary = summarize(mmap_runs) + print_compare(regular_summary, mmap_summary) + + rows.append( + ResultRow( + container=container, + storage=storage, + layout=layout, + scenario=scenario, + mode="regular", + open_median_s=regular_summary.open_median_s, + read_median_s=regular_summary.read_median_s, + total_median_s=regular_summary.total_median_s, + total_p10_s=regular_summary.total_p10_s, + total_p90_s=regular_summary.total_p90_s, + throughput_mib_s=regular_summary.throughput_mib_s, + ) + ) + rows.append( + ResultRow( + container=container, + storage=storage, + layout=layout, + scenario=scenario, + mode="mmap", + open_median_s=mmap_summary.open_median_s, + read_median_s=mmap_summary.read_median_s, + total_median_s=mmap_summary.total_median_s, + total_p10_s=mmap_summary.total_p10_s, + total_p90_s=mmap_summary.total_p90_s, + throughput_mib_s=mmap_summary.throughput_mib_s, + ) + ) + + if not args.keep_dataset: + cleanup_path(s_path) + cleanup_path(external_data_dir(args.dataset_root, container, storage, layout)) + + if args.json_out is not None: + args.json_out.parent.mkdir(parents=True, exist_ok=True) + payload = { + "config": { + "dataset_root": str(args.dataset_root), + "container": args.container, + "storage": args.storage, + "layout": args.layout, + "scenarios": args.scenarios, + "n_nodes": args.n_nodes, + "node_len": args.node_len, + "dtype": str(dtype), + "clevel": args.clevel, + "codec": args.codec, + "runs": args.runs, + "slice_len": args.slice_len, + "reads_per_node": args.reads_per_node, + "seed": args.seed, + "drop_caches_value": args.drop_caches_value, + }, + "results": [asdict(r) for r in rows], + } + with open(args.json_out, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2) + print(f"\nSaved JSON results to: {args.json_out}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())