diff --git a/changes/3628.misc.md b/changes/3628.misc.md new file mode 100644 index 0000000000..0aa706e5cd --- /dev/null +++ b/changes/3628.misc.md @@ -0,0 +1 @@ +Avoid reading lazy arrays or on device arrays twice when comparing them to 0 during the writing process. diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index fd557ac43e..e77526e5b8 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -413,6 +413,12 @@ async def _read_key( if chunk_array is None: chunk_array_batch.append(None) # type: ignore[unreachable] else: + # The operation array_equal operation below effectively will force the array + # into memory. + # if the result is useful, we want to avoid reading it twice + # from a potentially lazy operation. So we cache it here. + # If the result is not useful, we leave it for the garbage collector. + chunk_array._data = chunk_array.as_numpy_array() if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( fill_value_or_default(chunk_spec) ):