From 26e38d45382ef22d981c52c079f12d83cf1c1702 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Thu, 18 Dec 2025 15:01:45 -0500 Subject: [PATCH] Cache the result of the zero-check Closes https://github.com/zarr-developers/zarr-python/issues/3627 --- changes/3628.misc.md | 1 + src/zarr/core/codec_pipeline.py | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 changes/3628.misc.md diff --git a/changes/3628.misc.md b/changes/3628.misc.md new file mode 100644 index 0000000000..0aa706e5cd --- /dev/null +++ b/changes/3628.misc.md @@ -0,0 +1 @@ +Avoid reading lazy arrays or on device arrays twice when comparing them to 0 during the writing process. diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index fd557ac43e..e77526e5b8 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -413,6 +413,12 @@ async def _read_key( if chunk_array is None: chunk_array_batch.append(None) # type: ignore[unreachable] else: + # The operation array_equal operation below effectively will force the array + # into memory. + # if the result is useful, we want to avoid reading it twice + # from a potentially lazy operation. So we cache it here. + # If the result is not useful, we leave it for the garbage collector. + chunk_array._data = chunk_array.as_numpy_array() if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( fill_value_or_default(chunk_spec) ):