diff --git a/.gitignore b/.gitignore index 3284865d6c..59b6632a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -92,5 +92,9 @@ tests/.hypothesis zarr/version.py zarr.egg-info/ +# Local agent / planning notes (not versioned) +.claude/ +CLAUDE.md +docs/superpowers/ # zarr-metadata package lockfile (a library, not an app) packages/zarr-metadata/uv.lock diff --git a/changes/3885.feature.1.md b/changes/3885.feature.1.md new file mode 100644 index 0000000000..0a51ba6844 --- /dev/null +++ b/changes/3885.feature.1.md @@ -0,0 +1 @@ +Added `SyncByteGetter` and `SyncByteSetter` runtime-checkable protocols and a `get_ranges_sync` method on the `Store` ABC. These let custom byte getters/setters opt into the synchronous codec pipeline's fast path for in-memory IO, which the sharding codec uses for its inner chunks. diff --git a/changes/3885.feature.md b/changes/3885.feature.md new file mode 100644 index 0000000000..9f4cd5cfbc --- /dev/null +++ b/changes/3885.feature.md @@ -0,0 +1 @@ +`FusedCodecPipeline` is now the default codec pipeline. It runs codec compute synchronously and in bulk (avoiding the per-chunk async scheduling overhead of `BatchedCodecPipeline`), giving large speedups for sharded arrays (up to ~24x writes / ~14x reads on many-chunks-per-shard layouts, more with compression) and no regressions on compute-bound workloads. The previous behavior is available by setting `zarr.config.set({"codec_pipeline.path": "zarr.core.codec_pipeline.BatchedCodecPipeline"})`. diff --git a/pyproject.toml b/pyproject.toml index 9f6005f981..c7f99a454e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -445,6 +445,18 @@ filterwarnings = [ # collected without being entered. This is a known issue in s3fs/aiobotocore, and pytest # per-test filterwarnings markers can't catch it (https://github.com/pytest-dev/pytest/issues/14096). "ignore:Exception ignored ((on calling weakref callback)|(in[\\s\\S]*Session was never entered)):pytest.PytestUnraisableExceptionWarning", + # pytest-asyncio implicitly creates an event loop in _get_event_loop_no_warn during + # fixture setup/teardown and never closes it (allocation site verified with + # PYTHONTRACEMALLOC: pytest_asyncio/plugin.py). When the garbage collector reclaims + # that loop (and its self-pipe socketpair: AF_UNIX family=1 on POSIX, emulated with + # AF_INET family=2 on Windows) mid-test, the unraisable hook fails whichever unrelated + # test happens to be running — the long-standing "random cross-file failure" in the + # pipeline suites. The message contains only the __del__ repr, so these patterns cannot + # scope to pytest-asyncio specifically: a loop/socketpair leak in zarr's own sync + # machinery would also be silenced. Accepted tradeoff — revisit if zarr.core.sync grows + # loop-lifecycle changes. + "ignore:Exception ignored in[\\s\\S]* Sequence[tuple[int, Buffer | None]]: + """Synchronous, coalescing counterpart of `get_ranges`. + + Plans merged fetches with the same `coalesce_ranges` policy as the async + path, then issues one synchronous `get_sync` per merged group (or per + uncoalescable request) and slices results back into per-input buffers. + Used by the sync codec pipeline's partial-shard reads so they get the + same byte-range coalescing as the async path, without an event loop. + + Returns a list of `(input_index, Buffer | None)`. Raises + `BaseExceptionGroup` containing a `FileNotFoundError` if the key is + absent (matching `get_ranges`), so callers can handle a deleted shard + uniformly across the sync and async paths. + + Requires the store to implement `get_sync` (`SupportsGetSync`). + """ + from zarr.core._coalesce import coalesce_ranges + + if not isinstance(self, SupportsGetSync): + raise TypeError(f"{type(self).__name__} does not support synchronous reads") + + groups, uncoalescable = coalesce_ranges( + byte_ranges, max_gap_bytes=max_gap_bytes, max_coalesced_bytes=max_coalesced_bytes + ) + results: list[tuple[int, Buffer | None]] = [] + errors: list[BaseException] = [] + + def _get(req: ByteRequest | None) -> Buffer | None: + return self.get_sync(key, prototype=prototype, byte_range=req) + + for idx, req in uncoalescable: + buf = _get(req) + if buf is None: + errors.append(FileNotFoundError(key)) + else: + results.append((idx, buf)) + + for members in groups: + if len(members) == 1: + solo_idx, solo_req = members[0] + buf = _get(solo_req) + if buf is None: + errors.append(FileNotFoundError(key)) + else: + results.append((solo_idx, buf)) + continue + start = members[0][1].start + end = max(r.end for _, r in members) + big = _get(RangeByteRequest(start, end)) + if big is None: + errors.append(FileNotFoundError(key)) + continue + for member_idx, r in members: + results.append((member_idx, big[r.start - start : r.end - start])) + + if errors: + raise BaseExceptionGroup("chunk read failed", errors) + return results + async def getsize(self, key: str) -> int: """ Return the size, in bytes, of a value in a Store. @@ -562,6 +631,33 @@ async def delete(self) -> None: ... async def set_if_not_exists(self, default: Buffer) -> None: ... +@runtime_checkable +class SyncByteGetter(Protocol): + """A `ByteGetter` that can also fetch synchronously, without an event loop. + + Non-StorePath byte getters (e.g. the sharding codec's in-memory + `_ShardingByteGetter`) implement this so a synchronous codec pipeline can + take its sync fast path on them instead of scheduling one coroutine per + chunk. Note that `StorePath` also *has* a `get_sync` method (so it matches + this protocol structurally) but it only works when its store supports + synchronous IO — callers gate `StorePath` on the store's `SupportsGetSync` + instead of on this protocol. + """ + + def get_sync( + self, prototype: BufferPrototype | None = None, byte_range: ByteRequest | None = None + ) -> Buffer | None: ... + + +@runtime_checkable +class SyncByteSetter(SyncByteGetter, Protocol): + """A `ByteSetter` that can also write synchronously. See `SyncByteGetter`.""" + + def set_sync(self, value: Buffer) -> None: ... + + def delete_sync(self) -> None: ... + + @runtime_checkable class SupportsGetSync(Protocol): def get_sync( diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py index 3c6c99c21c..7fdf408d1d 100644 --- a/src/zarr/codecs/_v2.py +++ b/src/zarr/codecs/_v2.py @@ -23,7 +23,7 @@ class V2Codec(ArrayBytesCodec): is_fixed_size = False - async def _decode_single( + def _decode_sync( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, @@ -31,14 +31,14 @@ async def _decode_single( cdata = chunk_bytes.as_array_like() # decompress if self.compressor: - chunk = await asyncio.to_thread(self.compressor.decode, cdata) + chunk = self.compressor.decode(cdata) else: chunk = cdata # apply filters if self.filters: for f in reversed(self.filters): - chunk = await asyncio.to_thread(f.decode, chunk) + chunk = f.decode(chunk) # view as numpy array with correct dtype chunk = ensure_ndarray_like(chunk) @@ -70,7 +70,7 @@ async def _decode_single( return get_ndbuffer_class().from_ndarray_like(chunk) - async def _encode_single( + def _encode_sync( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, @@ -83,18 +83,32 @@ async def _encode_single( # apply filters if self.filters: for f in self.filters: - chunk = await asyncio.to_thread(f.encode, chunk) + chunk = f.encode(chunk) # check object encoding if ensure_ndarray_like(chunk).dtype == object: raise RuntimeError("cannot write object array without object codec") # compress if self.compressor: - cdata = await asyncio.to_thread(self.compressor.encode, chunk) + cdata = self.compressor.encode(chunk) else: cdata = chunk cdata = ensure_bytes(cdata) return chunk_spec.prototype.buffer.from_bytes(cdata) + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + return await asyncio.to_thread(self._decode_sync, chunk_bytes, chunk_spec) + + async def _encode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + return await asyncio.to_thread(self._encode_sync, chunk_array, chunk_spec) + def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: raise NotImplementedError diff --git a/src/zarr/codecs/numcodecs/_codecs.py b/src/zarr/codecs/numcodecs/_codecs.py index 1be1381a08..8b84ce538a 100644 --- a/src/zarr/codecs/numcodecs/_codecs.py +++ b/src/zarr/codecs/numcodecs/_codecs.py @@ -47,7 +47,7 @@ if TYPE_CHECKING: from zarr.abc.numcodec import Numcodec from zarr.core.array_spec import ArraySpec - from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer + from zarr.core.buffer import Buffer, NDBuffer CODEC_PREFIX = "numcodecs." @@ -134,53 +134,63 @@ class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) - async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: - return await asyncio.to_thread( - as_numpy_array_wrapper, - self._codec.decode, - chunk_data, - chunk_spec.prototype, - ) + def _decode_sync(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: + return as_numpy_array_wrapper(self._codec.decode, chunk_data, chunk_spec.prototype) - def _encode(self, chunk_data: Buffer, prototype: BufferPrototype) -> Buffer: + def _encode_sync(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: encoded = self._codec.encode(chunk_data.as_array_like()) if isinstance(encoded, np.ndarray): # Required for checksum codecs - return prototype.buffer.from_bytes(encoded.tobytes()) - return prototype.buffer.from_bytes(encoded) + return chunk_spec.prototype.buffer.from_bytes(encoded.tobytes()) + return chunk_spec.prototype.buffer.from_bytes(encoded) + + async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: + return await asyncio.to_thread(self._decode_sync, chunk_data, chunk_spec) async def _encode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: - return await asyncio.to_thread(self._encode, chunk_data, chunk_spec.prototype) + return await asyncio.to_thread(self._encode_sync, chunk_data, chunk_spec) class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) - async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: + def _decode_sync(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: chunk_ndarray = chunk_data.as_ndarray_like() - out = await asyncio.to_thread(self._codec.decode, chunk_ndarray) + out = self._codec.decode(chunk_ndarray) return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) - async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: + def _encode_sync(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: chunk_ndarray = chunk_data.as_ndarray_like() - out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) + out = self._codec.encode(chunk_ndarray) return chunk_spec.prototype.nd_buffer.from_ndarray_like(out) + async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: + return await asyncio.to_thread(self._decode_sync, chunk_data, chunk_spec) + + async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: + return await asyncio.to_thread(self._encode_sync, chunk_data, chunk_spec) + class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) - async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer: + def _decode_sync(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer: chunk_bytes = chunk_data.to_bytes() - out = await asyncio.to_thread(self._codec.decode, chunk_bytes) + out = self._codec.decode(chunk_bytes) return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) - async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer: + def _encode_sync(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer: chunk_ndarray = chunk_data.as_ndarray_like() - out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) + out = self._codec.encode(chunk_ndarray) return chunk_spec.prototype.buffer.from_bytes(out) + async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer: + return await asyncio.to_thread(self._decode_sync, chunk_data, chunk_spec) + + async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer: + return await asyncio.to_thread(self._encode_sync, chunk_data, chunk_spec) + # bytes-to-bytes codecs class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 657484e9af..ab8282338e 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, replace from enum import Enum from functools import lru_cache -from typing import TYPE_CHECKING, Any, Literal, NamedTuple, cast +from typing import TYPE_CHECKING, Any, Literal, NamedTuple import numpy as np import numpy.typing as npt @@ -15,13 +15,16 @@ ArrayBytesCodecPartialEncodeMixin, Codec, CodecPipeline, + SupportsSyncCodec, ) from zarr.abc.store import ( ByteGetter, ByteRequest, ByteSetter, RangeByteRequest, + Store, SuffixByteRequest, + SupportsGetSync, ) from zarr.codecs.bytes import BytesCodec from zarr.codecs.crc32c_ import Crc32cCodec @@ -41,6 +44,8 @@ parse_shapelike, product, ) +from zarr.core.config import config as zarr_config +from zarr.core.dtype.common import HasEndianness from zarr.core.dtype.npy.int import UInt64 from zarr.core.indexing import ( BasicIndexer, @@ -96,13 +101,21 @@ def parse_index_location(data: object) -> ShardingCodecIndexLocation: @dataclass(frozen=True) class _ShardingByteGetter(ByteGetter): + """In-memory byte getter for one inner chunk of a shard. + + Implements `SyncByteGetter` (dict access needs no event loop), so the + synchronous codec pipeline takes its sync fast path on inner chunks + instead of scheduling one coroutine per chunk; the async `get` simply + delegates to `get_sync`. + """ + shard_dict: ShardMapping chunk_coords: tuple[int, ...] - async def get( - self, prototype: BufferPrototype, byte_range: ByteRequest | None = None + def get_sync( + self, prototype: BufferPrototype | None = None, byte_range: ByteRequest | None = None ) -> Buffer | None: - assert prototype == default_buffer_prototype(), ( + assert prototype is None or prototype == default_buffer_prototype(), ( f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}" ) value = self.shard_dict.get(self.chunk_coords) @@ -113,17 +126,33 @@ async def get( start, stop = _normalize_byte_range_index(value, byte_range) return value[start:stop] + async def get( + self, prototype: BufferPrototype, byte_range: ByteRequest | None = None + ) -> Buffer | None: + return self.get_sync(prototype, byte_range) + @dataclass(frozen=True) class _ShardingByteSetter(_ShardingByteGetter, ByteSetter): + """In-memory byte setter for one inner chunk of a shard. + + Implements `SyncByteSetter`; the async methods delegate to the sync ones. + """ + shard_dict: ShardMutableMapping + def set_sync(self, value: Buffer) -> None: + self.shard_dict[self.chunk_coords] = value + + def delete_sync(self) -> None: + del self.shard_dict[self.chunk_coords] + async def set(self, value: Buffer, byte_range: ByteRequest | None = None) -> None: assert byte_range is None, "byte_range is not supported within shards" - self.shard_dict[self.chunk_coords] = value + self.set_sync(value) async def delete(self) -> None: - del self.shard_dict[self.chunk_coords] + self.delete_sync() async def set_if_not_exists(self, default: Buffer) -> None: self.shard_dict.setdefault(self.chunk_coords, default) @@ -147,6 +176,24 @@ def is_all_empty(self) -> bool: def get_full_chunk_map(self) -> npt.NDArray[np.bool_]: return np.not_equal(self.offsets_and_lengths[..., 0], MAX_UINT_64) + def is_dense(self, chunk_byte_length: int) -> bool: + """True when every chunk is present, fixed-length, and uniquely placed. + + Used to gate the vectorized whole-shard decode: a dense fixed-size shard + is a regular grid of equal-length payloads, so it can be reshaped/scattered + in bulk rather than decoded chunk-by-chunk. + """ + offsets = self.offsets_and_lengths[..., 0].reshape(-1) + lengths = self.offsets_and_lengths[..., 1].reshape(-1) + # all present + if bool(np.any(offsets == MAX_UINT_64)): + return False + # all the same fixed length + if not bool(np.all(lengths == chunk_byte_length)): + return False + # offsets unique (no two chunks share a slot) + return int(np.unique(offsets).size) == int(offsets.size) + def get_chunk_slice(self, chunk_coords: tuple[int, ...]) -> tuple[int, int] | None: localized_chunk = self._localize_chunk(chunk_coords) chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] @@ -301,6 +348,8 @@ class ShardingCodec( does not recover it (the setting reverts to the `morton` default per codec instance). """ + is_fixed_size = False + chunk_shape: tuple[int, ...] codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] @@ -339,6 +388,13 @@ def __init__( # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + object.__setattr__(self, "_shard_index_size", lru_cache()(self._shard_index_size)) + object.__setattr__( + self, "_get_inner_chunk_transform", lru_cache()(self._get_inner_chunk_transform) + ) + object.__setattr__( + self, "_get_index_chunk_transform", lru_cache()(self._get_index_chunk_transform) + ) # todo: typedict return type def __getstate__(self) -> dict[str, Any]: @@ -358,6 +414,13 @@ def __setstate__(self, state: dict[str, Any]) -> None: # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + object.__setattr__(self, "_shard_index_size", lru_cache()(self._shard_index_size)) + object.__setattr__( + self, "_get_inner_chunk_transform", lru_cache()(self._get_inner_chunk_transform) + ) + object.__setattr__( + self, "_get_index_chunk_transform", lru_cache()(self._get_index_chunk_transform) + ) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: @@ -366,8 +429,48 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: @property def codec_pipeline(self) -> CodecPipeline: + # Resolve against the configured pipeline (registry default), matching the + # rest of this module's use of get_pipeline_class — NOT a hard-coded + # BatchedCodecPipeline. This restores main's behavior (#2179) that the + # branch had reverted: the inner sub-chunk pipeline follows the same + # codec_pipeline.path config as the outer array. return get_pipeline_class().from_codecs(self.codecs) + def _get_inner_pipeline(self, shard_spec: ArraySpec) -> CodecPipeline: + """The nested pipeline for inner-chunk IO, evolved against the inner + chunk spec. + + Evolving matters for two reasons: it threads the spec through the inner + codec chain (spec-changing codecs see the spec they will actually + operate on), and — for a synchronous pipeline — it builds the sync + transform, so inner-chunk IO over the (sync-capable) sharding byte + getters takes the pipeline's sync fast path instead of scheduling one + coroutine per inner chunk. The bare `codec_pipeline` property returns an + unevolved pipeline, which a synchronous pipeline can only run through + its async fallback. + + Memoized per (pipeline class, batch size, shard_spec): evolving builds + a ChunkTransform, which is wasteful to redo on every shard operation. + The pipeline class and batch size participate in the key so the + `codec_pipeline.path` and `codec_pipeline.batch_size` configs are still + honored after the first use (`from_codecs` captures batch_size at + construction). A benign construction race between threads is possible + (last writer wins) — same as the other caches here. + """ + cache: dict[tuple[type[CodecPipeline], int, ArraySpec], CodecPipeline] | None = getattr( + self, "_inner_pipeline_cache", None + ) + if cache is None: + cache = {} + object.__setattr__(self, "_inner_pipeline_cache", cache) + key = (get_pipeline_class(), zarr_config.get("codec_pipeline.batch_size"), shard_spec) + pipeline = cache.get(key) + if pipeline is None: + chunk_spec = self._get_chunk_spec(shard_spec) + pipeline = self.codec_pipeline.evolve_from_array_spec(chunk_spec) + cache[key] = pipeline + return pipeline + def to_dict(self) -> dict[str, JSON]: return { "name": "sharding_indexed", @@ -380,8 +483,16 @@ def to_dict(self) -> dict[str, JSON]: } def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + # Thread the spec through the inner chain (`evolve_codecs`): each codec + # is evolved against the spec produced by the previous one. Evolving + # every codec against the same unthreaded spec is the bug shape that + # strips `BytesCodec.endian` behind a dtype-changing codec — and this + # method runs on the real array-creation path, baking the damaged chain + # into the evolved instance before the transform builders ever run. + from zarr.core.codec_pipeline import evolve_codecs + shard_spec = self._get_chunk_spec(array_spec) - evolved_codecs = tuple(c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs) + evolved_codecs = evolve_codecs(self.codecs, shard_spec) if evolved_codecs != self.codecs: return replace(self, codecs=evolved_codecs) return self @@ -416,6 +527,395 @@ def validate( f"divisible by the shard's inner chunk size {inner}." ) + def _get_inner_chunk_transform(self, shard_spec: ArraySpec) -> Any: + """The synchronous transform for the inner codec chain. + + Memoized by the instance-local `lru_cache` wrapping installed in + `__init__`/`__setstate__` (the single cache mechanism for these + builders — do not add another layer inside the body). + + Codecs are evolved with the spec THREADED forward (`evolve_codecs`): + each inner codec is evolved against the spec produced by the previous + one, not the original chunk spec. Evolving every codec against the same + unthreaded spec is the bug shape that stripped `BytesCodec.endian` at + the pipeline level (see `evolve_codecs`) — the inner chain must use the + same single source of truth. + """ + from zarr.core.codec_pipeline import ChunkTransform, evolve_codecs + + chunk_spec = self._get_chunk_spec(shard_spec) + return ChunkTransform(codecs=evolve_codecs(self.codecs, chunk_spec)) + + def _get_index_chunk_transform(self, chunks_per_shard: tuple[int, ...]) -> Any: + """The synchronous transform for the index codec chain. + + Memoized via instance-local `lru_cache` and spec-threaded for the same + reasons as `_get_inner_chunk_transform`. + """ + from zarr.core.codec_pipeline import ChunkTransform, evolve_codecs + + index_spec = self._get_index_chunk_spec(chunks_per_shard) + return ChunkTransform(codecs=evolve_codecs(self.index_codecs, index_spec)) + + def _decode_shard_index_sync( + self, index_bytes: Buffer, chunks_per_shard: tuple[int, ...] + ) -> _ShardIndex: + """Decode shard index synchronously using ChunkTransform.""" + index_transform = self._get_index_chunk_transform(chunks_per_shard) + index_spec = self._get_index_chunk_spec(chunks_per_shard) + index_array = index_transform.decode_chunk(index_bytes, index_spec) + return _ShardIndex(chunks_per_shard, index_array.as_numpy_array()) + + def _encode_shard_index_sync(self, index: _ShardIndex) -> Buffer: + """Encode shard index synchronously using ChunkTransform.""" + index_transform = self._get_index_chunk_transform(index.chunks_per_shard) + index_spec = self._get_index_chunk_spec(index.chunks_per_shard) + index_nd = get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths) + result: Buffer | None = index_transform.encode_chunk(index_nd, index_spec) + assert result is not None + return result + + def _shard_reader_from_bytes_sync( + self, buf: Buffer, chunks_per_shard: tuple[int, ...] + ) -> _ShardReader: + """Sync version of _ShardReader.from_bytes.""" + shard_index_size = self._shard_index_size(chunks_per_shard) + if self.index_location == ShardingCodecIndexLocation.start: + shard_index_bytes = buf[:shard_index_size] + else: + shard_index_bytes = buf[-shard_index_size:] + index = self._decode_shard_index_sync(shard_index_bytes, chunks_per_shard) + reader = _ShardReader() + reader.buf = buf + reader.index = index + return reader + + def _decode_sync( + self, + shard_bytes: Buffer, + shard_spec: ArraySpec, + ) -> NDBuffer: + """Decode a full shard synchronously. + + Sync counterpart to `_decode_single`. Same semantics (decode every + inner chunk and assemble the full shard array) but routes through + `ChunkTransform` instead of the async codec pipeline, so it can + run on the sync codec-pipeline fast path without an event loop. + + For a partial read where the caller only needs a slice of the shard, + use `_decode_partial_sync` instead — it fetches only the byte + ranges that overlap the selection. + + This method does not parallelize decompression, but should. + See TODO: make issue for handling subchunk parallelism + """ + shard_shape = shard_spec.shape + chunk_shape = self.chunk_shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + inner_transform = self._get_inner_chunk_transform(shard_spec) + + indexer = BasicIndexer( + tuple(slice(0, s) for s in shard_shape), + shape=shard_shape, + chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape), + ) + + out = chunk_spec.prototype.nd_buffer.empty( + shape=shard_shape, + dtype=shard_spec.dtype.to_native_dtype(), + order=shard_spec.order, + ) + + shard_dict = self._shard_reader_from_bytes_sync(shard_bytes, chunks_per_shard) + + if shard_dict.index.is_all_empty(): + out.fill(shard_spec.fill_value) + return out + + from zarr.core.codec_pipeline import decode_and_scatter_chunk + + for chunk_coords, chunk_selection, out_selection, _ in indexer: + # the GetResult status is discarded: missing INNER chunks of a + # present shard always fill (read_missing_chunks is a store-key + # level promise, applied to top-level statuses at the array layer) + decode_and_scatter_chunk( + shard_dict.get(chunk_coords), + out, + chunk_spec=chunk_spec, + chunk_selection=chunk_selection, + out_selection=out_selection, + drop_axes=(), + decode=inner_transform.decode_chunk, + ) + + return out + + def _encode_sync( + self, + shard_array: NDBuffer, + shard_spec: ArraySpec, + ) -> Buffer | None: + """Encode a full shard synchronously. + + Sync counterpart to `_encode_single`. This is reached when a + `ShardingCodec` is an *inner* codec of another sharding codec (nested + sharding): the outer codec encodes each inner chunk through its + `ChunkTransform`, which calls this method on the inner `ShardingCodec`. + + Each inner chunk is encoded through the inner `ChunkTransform` and + collected into an intermediate `dict`. The dict's key order is + immaterial — the physical on-disk layout is decided downstream by the + `subchunk_write_order` loop in `_encode_shard_dict_sync` (this method + does NOT impose a layout). Empty inner chunks become `None` entries when + `write_empty_chunks` is False, signalling `_encode_shard_dict_sync` to + elide them from the data section and mark them empty in the shard index. + + Returns `None` if every inner chunk was elided (an all-empty shard) — + callers treat that as "delete the shard key". + + This method does not parallelize compression, but should. + See TODO: make issue for handling subchunk parallelism + + For a partial write that only touches some inner chunks, use + `_encode_partial_sync` instead. + """ + shard_shape = shard_spec.shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + inner_transform = self._get_inner_chunk_transform(shard_spec) + + indexer = BasicIndexer( + tuple(slice(0, s) for s in shard_shape), + shape=shard_shape, + chunk_grid=ChunkGrid.from_sizes(shard_shape, self.chunk_shape), + ) + + from zarr.core.codec_pipeline import encode_or_elide_chunk + + # Key order here is immaterial; _encode_shard_dict_sync lays the present + # chunks out in subchunk_write_order. + shard_builder: dict[tuple[int, ...], Buffer | None] = dict.fromkeys( + morton_order_iter(chunks_per_shard) + ) + + for chunk_coords, _chunk_selection, out_selection, _ in indexer: + # None = chunk normalized to missing (see encode_or_elide_chunk) + shard_builder[chunk_coords] = encode_or_elide_chunk( + shard_array[out_selection], chunk_spec, inner_transform.encode_chunk + ) + + return self._encode_shard_dict_sync( + shard_builder, + chunks_per_shard=chunks_per_shard, + buffer_prototype=default_buffer_prototype(), + ) + + def _encode_partial_sync( + self, + byte_setter: Any, + value: NDBuffer, + selection: SelectorTuple, + shard_spec: ArraySpec, + ) -> None: + """Sync equivalent of `_encode_partial_single`. + + Receives the source data for the written region (not a pre-merged + shard array) and the selection within the shard, matching the + calling convention of the async partial-encode path used by + `BatchedCodecPipeline`. + + This method does not parallelize compression, but should. + See TODO: make issue for handling subchunk parallelism + + Loads the existing shard, merges the written region into the affected + inner chunks, and rewrites the whole shard. + """ + shard_shape = shard_spec.shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + inner_transform = self._get_inner_chunk_transform(shard_spec) + + indexer = list( + get_indexer( + selection, + shape=shard_shape, + chunk_grid=ChunkGrid.from_sizes(shard_shape, self.chunk_shape), + ) + ) + + is_complete = self._is_complete_shard_write(indexer, chunks_per_shard) + + is_scalar = len(value.shape) == 0 + + # Load existing inner-chunk bytes into a dict (same structure as + # the async path's shard_dict). + if is_complete: + shard_dict: dict[tuple[int, ...], Buffer | None] = dict.fromkeys( + morton_order_iter(chunks_per_shard) + ) + else: + existing_bytes = byte_setter.get_sync(prototype=shard_spec.prototype) + if existing_bytes is not None: + shard_reader_fb = self._shard_reader_from_bytes_sync( + existing_bytes, chunks_per_shard + ) + # Build the dict with one vectorized index lookup over all chunks, + # matching the async _encode_partial_single path. A per-coordinate + # __getitem__ loop here is O(n_chunks) Python overhead that dominates + # partial writes into shards with many inner chunks. + shard_dict = shard_reader_fb.to_dict_vectorized( + np.indices(chunks_per_shard).reshape(len(chunks_per_shard), -1).T + ) + else: + shard_dict = dict.fromkeys(morton_order_iter(chunks_per_shard)) + + from zarr.core.codec_pipeline import merge_and_encode_chunk + + # Merge, encode, and store each affected inner chunk into shard_dict via + # the canonical merge_and_encode_chunk (None = normalized to missing). + # + # Scalar fast path: when the written value is a scalar broadcast, every + # *complete* inner chunk is byte-for-byte identical — same fill, same + # empty-check, same encoded bytes. Compute that outcome once and reuse it + # for all complete chunks instead of re-merging, re-checking, and + # re-encoding tens of thousands of identical chunks. Incomplete (edge) + # chunks still merge against their own existing data individually. + # `_sentinel` distinguishes "not computed yet" from a memoized `None` + # (an empty chunk). + _sentinel = object() + scalar_complete_result: Buffer | None | object = _sentinel + + for chunk_coords, chunk_sel, out_sel, is_complete_chunk in indexer: + if is_scalar and is_complete_chunk: + if scalar_complete_result is _sentinel: + scalar_complete_result = merge_and_encode_chunk( + None, + value, + chunk_spec=chunk_spec, + chunk_selection=chunk_sel, + out_selection=out_sel, + is_complete=is_complete_chunk, + drop_axes=(), + decode=inner_transform.decode_chunk, + encode=inner_transform.encode_chunk, + ) + shard_dict[chunk_coords] = scalar_complete_result # type: ignore[assignment] + continue + + # A complete chunk fully overwrites: skip decoding what it replaces. + existing_raw = None if is_complete_chunk else shard_dict.get(chunk_coords) + shard_dict[chunk_coords] = merge_and_encode_chunk( + existing_raw, + value, + chunk_spec=chunk_spec, + chunk_selection=chunk_sel, + out_selection=out_sel, + is_complete=is_complete_chunk, + drop_axes=(), + decode=inner_transform.decode_chunk, + encode=inner_transform.encode_chunk, + ) + + blob = self._encode_shard_dict_sync( + shard_dict, + chunks_per_shard=chunks_per_shard, + buffer_prototype=default_buffer_prototype(), + ) + if blob is None: + byte_setter.delete_sync() + else: + byte_setter.set_sync(blob) + + def _build_shard_layout( + self, + shard_dict: ShardMapping, + chunks_per_shard: tuple[int, ...], + ) -> tuple[_ShardIndex, list[Buffer]] | None: + """Lay out the present inner chunks of a shard. Pure compute, no IO. + + Packs the encoded inner chunks (in the codec's `subchunk_write_order`) + into a contiguous data section and builds a shard index pointing each + present chunk at its ABSOLUTE byte offset within the final blob: when + the index is stored at the start, offsets are pre-shifted by the index + size (known without encoding — the index codecs are fixed-size, which + every index read path already relies on), so the index can be encoded + exactly once by the caller. + + Returns `(index, data_buffers)`, or `None` for an all-empty shard (no + chunks present). Shared by the sync and async `_encode_shard_dict*` so + the layout/offset logic cannot drift between them. + """ + index = _ShardIndex.create_empty(chunks_per_shard) + buffers: list[Buffer] = [] + chunk_start = ( + self._shard_index_size(chunks_per_shard) + if self.index_location == ShardingCodecIndexLocation.start + else 0 + ) + + for chunk_coords in self._subchunk_order_iter(chunks_per_shard, self.subchunk_write_order): + value = shard_dict.get(chunk_coords) + if value is None or len(value) == 0: + continue + chunk_length = len(value) + buffers.append(value) + index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length)) + chunk_start += chunk_length + + if len(buffers) == 0: + return None + return index, buffers + + def _assemble_shard( + self, + index_bytes: Buffer, + buffers: list[Buffer], + buffer_prototype: BufferPrototype, + *, + chunks_per_shard: tuple[int, ...], + ) -> Buffer: + """Concatenate the encoded index and data buffers into the shard blob. + + The layout from `_build_shard_layout` already assumes the index size, so + the encoded index length must match `_shard_index_size` exactly — guard + that assumption rather than silently corrupt offsets. The guard lives + here, once, for both the sync and async encode paths. + """ + if len(index_bytes) != self._shard_index_size(chunks_per_shard): + raise RuntimeError( + "encoded shard index size does not match _shard_index_size; " + "variable-size index codecs are not supported" + ) + if self.index_location == ShardingCodecIndexLocation.start: + buffers.insert(0, index_bytes) + else: + buffers.append(index_bytes) + template = buffer_prototype.buffer.create_zero_length() + return template.combine(buffers) + + def _encode_shard_dict_sync( + self, + shard_dict: ShardMapping, + chunks_per_shard: tuple[int, ...], + buffer_prototype: BufferPrototype, + ) -> Buffer | None: + """Sync version of _encode_shard_dict. + + Layout via the shared `_build_shard_layout` (offsets already absolute), + then a single index encode and concatenation. + + Returns `None` for an all-empty shard (no chunks present). + """ + layout = self._build_shard_layout(shard_dict, chunks_per_shard) + if layout is None: + return None + index, buffers = layout + index_bytes = self._encode_shard_index_sync(index) + return self._assemble_shard( + index_bytes, buffers, buffer_prototype, chunks_per_shard=chunks_per_shard + ) + async def _decode_single( self, shard_bytes: Buffer, @@ -445,7 +945,7 @@ async def _decode_single( return out # decoding chunks and writing them into the output buffer - await self.codec_pipeline.read( + await self._get_inner_pipeline(shard_spec).read( [ ( _ShardingByteGetter(shard_dict, chunk_coords), @@ -511,7 +1011,7 @@ async def _decode_partial_single( shard_dict = shard_dict_maybe # decoding chunks and writing them into the output buffer - await self.codec_pipeline.read( + await self._get_inner_pipeline(shard_spec).read( [ ( _ShardingByteGetter(shard_dict, chunk_coords), @@ -548,6 +1048,192 @@ def _subchunk_order_iter( raise ValueError(f"Unrecognized subchunk write order: {subchunk_write_order!r}.") return subchunk_iter + def _decode_full_shard_bulk( + self, + shard_bytes: Buffer, + shard_spec: ArraySpec, + indexer: Any, + ) -> NDBuffer | None: + """Vectorized whole-shard decode for dense, fixed-size, uncompressed shards. + + Returns the assembled shard array, or None if the fast path does not + apply (so the caller falls back to the per-chunk loop). Conditions: + - inner codec chain is fixed-size (no compression / variable-length); + - the inner codec chain is exactly a single BytesCodec — decode is a + dtype/endian view with no reordering. A trailing crc32c is NOT accepted + (the bulk path can't verify per-chunk checksums, so crc shards keep the + per-chunk path's corruption detection); + - the stored index is dense (every chunk present, equal fixed length, + contiguous) so the data section is a regular grid of chunk payloads. + + Chunk positions are read from the stored index, so this is correct for + any `subchunk_write_order` (morton / lexicographic / colexicographic / + unordered). The on-disk byte order is taken from the BytesCodec's + `endian`, so big- and little-endian shards both decode correctly. + """ + # --- gate on a trivial, fixed-size inner codec chain --- + if not self._inner_codecs_fixed_size: + return None + # The inner chain must be exactly a single BytesCodec (a dtype/endian + # view, no reordering). A trailing Crc32cCodec is excluded on purpose: + # the bulk path would have to strip-and-discard the per-chunk checksum + # bytes, silently dropping the corruption detection the per-chunk path + # enforces (Crc32cCodec._decode_sync raises on mismatch). crc-protected + # shards therefore fall through to the per-chunk path. + if len(self.codecs) != 1 or not isinstance(self.codecs[0], BytesCodec): + return None + ab_codec = self.codecs[0] + + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + n_chunks = product(chunks_per_shard) + if n_chunks == 0: + return None + + # Only valid when the read is the ENTIRE shard contiguously (output shape + # equals the shard shape). A strided/fancy read may touch all chunks but + # not want the whole grid laid out densely — those must use the per-chunk + # path so chunk_selection / out_selection are honored. + if tuple(indexer.shape) != tuple(shard_spec.shape): + return None + chunk_byte_length = self._inner_chunk_byte_length(chunk_spec) + + shard_index_size = self._shard_index_size(chunks_per_shard) + if len(shard_bytes) != n_chunks * chunk_byte_length + shard_index_size: + return None # not a dense fixed-size shard + + # --- decode the index; require dense layout --- + if self.index_location == ShardingCodecIndexLocation.start: + index_bytes = shard_bytes[:shard_index_size] + else: + index_bytes = shard_bytes[-shard_index_size:] + index = self._decode_shard_index_sync(index_bytes, chunks_per_shard) + if not index.is_dense(chunk_byte_length): + return None + + # --- bulk reconstruct --- + # The index gives each chunk's absolute byte offset within the blob; with + # a dense, crc-free, fixed-size layout the payload length is exactly the + # encoded item-bytes of one chunk. + native_dtype = shard_spec.dtype.to_native_dtype() + raw = shard_bytes.as_numpy_array().view(np.uint8) + payload = chunk_byte_length + cs = self.chunk_shape + + # On-disk byte order is carried by the BytesCodec's `endian`, NOT by the + # data type (zarr v3). Build the read-view dtype from the codec's endian + # exactly as BytesCodec._decode_sync does, so a big-endian shard read on a + # little-endian host (or vice versa) is interpreted correctly. Assigning + # into the native-dtype `out` then performs any needed byteswap. + endian_str = ab_codec.endian.value if ab_codec.endian is not None else None + if isinstance(chunk_spec.dtype, HasEndianness): + stored_dtype = replace(chunk_spec.dtype, endianness=endian_str).to_native_dtype() # type: ignore[call-arg] + else: + stored_dtype = chunk_spec.dtype.to_native_dtype() + + offsets = index.offsets_and_lengths[..., 0].reshape(-1) # localized coords, C-order + coords_c = list(np.ndindex(chunks_per_shard)) + out = shard_spec.prototype.nd_buffer.empty( + shape=indexer.shape, dtype=native_dtype, order=shard_spec.order + ) + for flat, coord in enumerate(coords_c): + start = int(offsets[flat]) + chunk = raw[start : start + payload].view(stored_dtype).reshape(cs) + sel = tuple(slice(c * s, c * s + s) for c, s in zip(coord, cs, strict=True)) + out[sel] = chunk + return out + + def _decode_partial_sync( + self, + byte_getter: Any, + selection: SelectorTuple, + shard_spec: ArraySpec, + ) -> NDBuffer | None: + """Sync equivalent of `_decode_partial_single`. + + Reads only the inner-chunk byte ranges that overlap `selection` + (plus the shard index) and decodes them through the inner codec + chain. The store must support `get_sync` with byte ranges. + + This method does not parallelize decompression, but should. + See TODO: make issue for handling subchunk parallelism + + Two sub-paths: + - If `selection` covers the entire shard, just fetch the whole + blob — that's strictly cheaper than two round trips (index, then + data) plus the per-chunk overhead of partial fetches. + - Otherwise fetch the index alone, look up only the byte slices of + the inner chunks the selection touches, fetch those, and decode. + """ + shard_shape = shard_spec.shape + chunk_shape = self.chunk_shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + inner_transform = self._get_inner_chunk_transform(shard_spec) + + indexer = get_indexer( + selection, + shape=shard_shape, + chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape), + ) + + out = shard_spec.prototype.nd_buffer.empty( + shape=indexer.shape, + dtype=shard_spec.dtype.to_native_dtype(), + order=shard_spec.order, + ) + + indexed_chunks = list(indexer) + all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks} + + # Read just the inner chunks we need. + if self._is_total_shard(all_chunk_coords, chunks_per_shard): + shard_bytes = byte_getter.get_sync(prototype=chunk_spec.prototype) + if shard_bytes is None: + return None + # Bulk fast path: a whole-shard read of a dense, fixed-size shard + # (no compression/filters) is just the data section reshaped and + # reordered into the grid -- no per-chunk decode/scatter loop. + # Order-agnostic: chunk positions come from the stored index, so it + # is correct for any subchunk_write_order. Falls through on any + # mismatch (compression, partial shard, non-trivial inner codec). + bulk = self._decode_full_shard_bulk(shard_bytes, shard_spec, indexer) + if bulk is not None: + if hasattr(indexer, "sel_shape"): + return bulk.reshape(indexer.sel_shape) + return bulk + shard_reader = self._shard_reader_from_bytes_sync(shard_bytes, chunks_per_shard) + shard_dict: ShardMapping = shard_reader + else: + # Partial read: fetch only the touched inner chunks, coalescing + # adjacent byte ranges (mirrors the async _load_partial_shard_maybe + # / #3004). Returns None if the shard is absent. + partial = self._load_partial_shard_maybe_sync( + byte_getter, chunk_spec.prototype, chunks_per_shard, all_chunk_coords + ) + if partial is None: + return None + shard_dict = partial + + from zarr.core.codec_pipeline import decode_and_scatter_chunk + + # Decode each needed inner chunk and scatter into out (statuses + # discarded: missing inner chunks fill, see _decode_sync). + for chunk_coords, chunk_selection, out_selection, _ in indexed_chunks: + decode_and_scatter_chunk( + shard_dict.get(chunk_coords), + out, + chunk_spec=chunk_spec, + chunk_selection=chunk_selection, + out_selection=out_selection, + drop_axes=(), + decode=inner_transform.decode_chunk, + ) + + if hasattr(indexer, "sel_shape"): + return out.reshape(indexer.sel_shape) + return out + async def _encode_single( self, shard_array: NDBuffer, @@ -569,7 +1255,7 @@ async def _encode_single( # decided later by the `subchunk_write_order` loop in `_encode_shard_dict`. shard_builder = dict.fromkeys(np.ndindex(chunks_per_shard)) - await self.codec_pipeline.write( + await self._get_inner_pipeline(shard_spec).write( [ ( _ShardingByteSetter(shard_builder, chunk_coords), @@ -621,10 +1307,10 @@ async def _encode_partial_single( shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard) # Use vectorized lookup for better performance shard_dict = shard_reader.to_dict_vectorized( - np.array(list(np.ndindex(chunks_per_shard))) + np.indices(chunks_per_shard).reshape(len(chunks_per_shard), -1).T ) - await self.codec_pipeline.write( + await self._get_inner_pipeline(shard_spec).write( [ ( _ShardingByteSetter(shard_dict, chunk_coords), @@ -654,40 +1340,17 @@ async def _encode_shard_dict( chunks_per_shard: tuple[int, ...], buffer_prototype: BufferPrototype, ) -> Buffer | None: - index = _ShardIndex.create_empty(chunks_per_shard) - - buffers = [] - - template = buffer_prototype.buffer.create_zero_length() - chunk_start = 0 - for chunk_coords in self._subchunk_order_iter(chunks_per_shard, self.subchunk_write_order): - value = map.get(chunk_coords) - if value is None: - continue - - if len(value) == 0: - continue - - chunk_length = len(value) - buffers.append(value) - index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length)) - chunk_start += chunk_length - - if len(buffers) == 0: + """Layout via the shared `_build_shard_layout` (offsets already + absolute), then a single index encode and concatenation. Async twin of + `_encode_shard_dict_sync`.""" + layout = self._build_shard_layout(map, chunks_per_shard) + if layout is None: return None - + index, buffers = layout index_bytes = await self._encode_shard_index(index) - if self.index_location == ShardingCodecIndexLocation.start: - empty_chunks_mask = index.offsets_and_lengths[..., 0] == MAX_UINT_64 - index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes) - index_bytes = await self._encode_shard_index( - index - ) # encode again with corrected offsets - buffers.insert(0, index_bytes) - else: - buffers.append(index_bytes) - - return template.combine(buffers) + return self._assemble_shard( + index_bytes, buffers, buffer_prototype, chunks_per_shard=chunks_per_shard + ) def _is_total_shard( self, all_chunk_coords: set[tuple[int, ...]], chunks_per_shard: tuple[int, ...] @@ -706,23 +1369,35 @@ def _is_complete_shard_write( is_complete_chunk for *_, is_complete_chunk in indexed_chunks ) + def _index_codecs_sync_capable(self) -> bool: + return all(isinstance(c, SupportsSyncCodec) for c in self.index_codecs) + async def _decode_shard_index( self, index_bytes: Buffer, chunks_per_shard: tuple[int, ...] ) -> _ShardIndex: + # Pure compute (the bytes are already in hand): delegate to the sync + # implementation instead of spinning up a pipeline + per-call + # AsyncChunkTransform for a tiny fixed-size decode. The default + # (bytes + crc32c) index chain is sync-capable; an async-only + # third-party index codec falls back to the full async pipeline, which + # the synchronous read paths cannot use but this async path still can. + if self._index_codecs_sync_capable(): + return self._decode_shard_index_sync(index_bytes, chunks_per_shard) index_array = next( iter( await get_pipeline_class() .from_codecs(self.index_codecs) - .decode( - [(index_bytes, self._get_index_chunk_spec(chunks_per_shard))], - ) + .decode([(index_bytes, self._get_index_chunk_spec(chunks_per_shard))]) ) ) - # This cannot be None because we have the bytes already - index_array = cast(NDBuffer, index_array) + assert index_array is not None # the bytes are already in hand return _ShardIndex(chunks_per_shard, index_array.as_numpy_array()) async def _encode_shard_index(self, index: _ShardIndex) -> Buffer: + # Pure compute: delegate to the sync implementation, with the same + # async-pipeline fallback as _decode_shard_index. + if self._index_codecs_sync_capable(): + return self._encode_shard_index_sync(index) index_bytes = next( iter( await get_pipeline_class() @@ -733,12 +1408,11 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer: get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths), self._get_index_chunk_spec(index.chunks_per_shard), ) - ], + ] ) ) ) assert index_bytes is not None - assert isinstance(index_bytes, Buffer) return index_bytes def _shard_index_size(self, chunks_per_shard: tuple[int, ...]) -> int: @@ -780,30 +1454,48 @@ def _get_chunks_per_shard(self, shard_spec: ArraySpec) -> tuple[int, ...]: ) ) + def _shard_index_byte_range( + self, chunks_per_shard: tuple[int, ...] + ) -> RangeByteRequest | SuffixByteRequest: + """Byte range of the shard index within the shard blob. + + Single source of truth for the index-location arithmetic, shared by the + sync and async index loaders so they cannot drift. + """ + shard_index_size = self._shard_index_size(chunks_per_shard) + if self.index_location == ShardingCodecIndexLocation.start: + return RangeByteRequest(0, shard_index_size) + return SuffixByteRequest(shard_index_size) + + @staticmethod + def _pair_chunks_with_byte_ranges( + shard_index: _ShardIndex, all_chunk_coords: set[tuple[int, ...]] + ) -> list[tuple[tuple[int, ...], RangeByteRequest]]: + """Pair each requested chunk coord with its byte range in the shard. + + Coords whose chunk is absent from the index are omitted. Shared by the + sync and async partial-shard loaders. + """ + chunk_coord_byte_ranges: list[tuple[tuple[int, ...], RangeByteRequest]] = [] + for chunk_coord in all_chunk_coords: + chunk_byte_slice = shard_index.get_chunk_slice(chunk_coord) + if chunk_byte_slice is not None: + chunk_coord_byte_ranges.append( + (chunk_coord, RangeByteRequest(chunk_byte_slice[0], chunk_byte_slice[1])) + ) + return chunk_coord_byte_ranges + async def _load_shard_index_maybe( self, byte_getter: ByteGetter, chunks_per_shard: tuple[int, ...] ) -> _ShardIndex | None: - shard_index_size = self._shard_index_size(chunks_per_shard) - if self.index_location == ShardingCodecIndexLocation.start: - index_bytes = await byte_getter.get( - prototype=numpy_buffer_prototype(), - byte_range=RangeByteRequest(0, shard_index_size), - ) - else: - index_bytes = await byte_getter.get( - prototype=numpy_buffer_prototype(), byte_range=SuffixByteRequest(shard_index_size) - ) + index_bytes = await byte_getter.get( + prototype=numpy_buffer_prototype(), + byte_range=self._shard_index_byte_range(chunks_per_shard), + ) if index_bytes is not None: return await self._decode_shard_index(index_bytes, chunks_per_shard) return None - async def _load_shard_index( - self, byte_getter: ByteGetter, chunks_per_shard: tuple[int, ...] - ) -> _ShardIndex: - return ( - await self._load_shard_index_maybe(byte_getter, chunks_per_shard) - ) or _ShardIndex.create_empty(chunks_per_shard) - async def _load_full_shard_maybe( self, byte_getter: ByteGetter, prototype: BufferPrototype, chunks_per_shard: tuple[int, ...] ) -> _ShardReader | None: @@ -815,6 +1507,19 @@ async def _load_full_shard_maybe( else None ) + @property + def _inner_codecs_fixed_size(self) -> bool: + """True when all inner codecs produce fixed-size output (no compression).""" + return all(c.is_fixed_size for c in self.codecs) + + def _inner_chunk_byte_length(self, chunk_spec: ArraySpec) -> int: + """Encoded byte length of a single inner chunk. Only valid when _inner_codecs_fixed_size.""" + raw_byte_length = 1 + for s in self.chunk_shape: + raw_byte_length *= s + raw_byte_length *= chunk_spec.dtype.item_size # type: ignore[attr-defined] + return int(self.codec_pipeline.compute_encoded_size(raw_byte_length, chunk_spec)) + async def _load_partial_shard_maybe( self, byte_getter: ByteGetter, @@ -830,14 +1535,7 @@ async def _load_partial_shard_maybe( if shard_index is None: return None - # Pair up chunks and their byte ranges as list[tuple[chunk_coord, byte_range]] - chunk_coord_byte_ranges: list[tuple[tuple[int, ...], RangeByteRequest]] = [] - for chunk_coord in all_chunk_coords: - chunk_byte_slice = shard_index.get_chunk_slice(chunk_coord) - if chunk_byte_slice is not None: - chunk_coord_byte_ranges.append( - (chunk_coord, RangeByteRequest(chunk_byte_slice[0], chunk_byte_slice[1])) - ) + chunk_coord_byte_ranges = self._pair_chunks_with_byte_ranges(shard_index, all_chunk_coords) if not chunk_coord_byte_ranges: return {} @@ -875,6 +1573,68 @@ async def _load_partial_shard_maybe( return shard_dict + def _load_shard_index_maybe_sync( + self, byte_getter: Any, chunks_per_shard: tuple[int, ...] + ) -> _ShardIndex | None: + """Sync counterpart of `_load_shard_index_maybe`.""" + index_bytes = byte_getter.get_sync( + prototype=numpy_buffer_prototype(), + byte_range=self._shard_index_byte_range(chunks_per_shard), + ) + if index_bytes is not None: + return self._decode_shard_index_sync(index_bytes, chunks_per_shard) + return None + + def _load_partial_shard_maybe_sync( + self, + byte_getter: Any, + prototype: BufferPrototype, + chunks_per_shard: tuple[int, ...], + all_chunk_coords: set[tuple[int, ...]], + ) -> ShardMapping | None: + """Sync counterpart of `_load_partial_shard_maybe` (the #3004 read path). + + Reads the shard index, then fetches only the touched inner chunks via the + store's coalescing `get_ranges_sync` (merging adjacent ranges into fewer + reads), matching the async path's IO shape without an event loop. + """ + shard_index = self._load_shard_index_maybe_sync(byte_getter, chunks_per_shard) + if shard_index is None: + return None + + chunk_coord_byte_ranges = self._pair_chunks_with_byte_ranges(shard_index, all_chunk_coords) + + if not chunk_coord_byte_ranges: + return {} + + shard_dict: ShardMutableMapping = {} + store = byte_getter.store if hasattr(byte_getter, "store") else None + if isinstance(store, Store) and isinstance(store, SupportsGetSync): + # External store: coalesce via get_ranges_sync (mirrors get_ranges). + byte_ranges = [byte_range for _, byte_range in chunk_coord_byte_ranges] + try: + for idx, buf in store.get_ranges_sync( + byte_getter.path, byte_ranges, prototype=prototype + ): + if buf is not None: + chunk_coord, _ = chunk_coord_byte_ranges[idx] + shard_dict[chunk_coord] = buf + except BaseExceptionGroup as eg: + # Mirror the async path: a FileNotFoundError means the shard was + # deleted mid-read -> treat as "gone" (None). Re-raise anything else. + _, rest = eg.split(FileNotFoundError) + if rest is not None: + raise rest from None + return None + else: + # Nested sharding: an in-memory _ShardingByteGetter, no IO to coalesce. + for chunk_coord, byte_range in chunk_coord_byte_ranges: + buf = byte_getter.get_sync(prototype=prototype, byte_range=byte_range) + if buf is not None: + shard_dict[chunk_coord] = buf + + return shard_dict + def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int: chunks_per_shard = self._get_chunks_per_shard(shard_spec) return input_byte_length + self._shard_index_size(chunks_per_shard) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index d15c70064b..bdf0bbe639 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -227,10 +227,35 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None pass if isinstance(metadata, ArrayV3Metadata): - return get_pipeline_class().from_codecs(metadata.codecs) + pipeline = get_pipeline_class().from_codecs(metadata.codecs) + from zarr.core.metadata.v3 import RegularChunkGridMetadata + + # Use the regular chunk shape if available, otherwise use a + # placeholder. The ChunkTransform is shape-agnostic — the actual + # chunk shape is passed per-call at decode/encode time. + if isinstance(metadata.chunk_grid, RegularChunkGridMetadata): + chunk_shape = metadata.chunk_grid.chunk_shape + else: + chunk_shape = (1,) * len(metadata.shape) + chunk_spec = ArraySpec( + shape=chunk_shape, + dtype=metadata.data_type, + fill_value=metadata.fill_value, + config=ArrayConfig.from_dict({}), + prototype=default_buffer_prototype(), + ) + return pipeline.evolve_from_array_spec(chunk_spec) elif isinstance(metadata, ArrayV2Metadata): v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor) - return get_pipeline_class().from_codecs([v2_codec]) + pipeline = get_pipeline_class().from_codecs([v2_codec]) + chunk_spec = ArraySpec( + shape=metadata.chunks, + dtype=metadata.dtype, + fill_value=metadata.fill_value, + config=ArrayConfig.from_dict({"order": metadata.order}), + prototype=default_buffer_prototype(), + ) + return pipeline.evolve_from_array_spec(chunk_spec) raise TypeError # pragma: no cover diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index 5c26681d6b..b8cff3c55c 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -1,8 +1,11 @@ from __future__ import annotations +import asyncio +import threading +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field from itertools import islice, pairwise -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from warnings import warn from zarr.abc.codec import ( @@ -23,7 +26,7 @@ from zarr.registry import register_pipeline if TYPE_CHECKING: - from collections.abc import Iterable, Iterator + from collections.abc import Callable, Iterable, Iterator, Sequence from typing import Self from zarr.abc.store import ByteGetter, ByteSetter @@ -33,6 +36,73 @@ from zarr.core.metadata.v3 import ChunkGridMetadata +_pool: ThreadPoolExecutor | None = None +_pool_size: int = 0 +_pool_lock = threading.Lock() + + +def _resolve_max_workers() -> int: + """Resolve `codec_pipeline.max_workers` config to an effective worker count. + + The default is `1` (sequential, no thread pool). `None` means "auto" → + `os.cpu_count()` (or 1 if unavailable). Values < 1 are clamped to 1. + + Notes + ----- + Threading is opt-in. The default is sequential because parallelism here is + not universally a win and carries downstream risk: enabling it runs custom + stores/codecs concurrently, and on many-core nodes a pool sized to + `cpu_count` can oversubscribe workloads that already parallelize at a higher + level (dask, MPI). It also slows small chunks (≲ 64 KB) by 1.5-3x, where the + per-task pool overhead (≈ 30-50 µs submit + worker handoff) outweighs the + work. + + For large chunks (≳ 1 MB encoded) where per-chunk decode + scatter is real + work, threading helps; opt in with an explicit positive count, or `None` for + auto (`os.cpu_count()`): + + zarr.config.set({"codec_pipeline.max_workers": 8}) + zarr.config.set({"codec_pipeline.max_workers": None}) # auto -> cpu_count + + Approximate breakeven on uncompressed reads: + 256-512 KB per chunk; compressed chunks shift it lower because decode is real + CPU work that benefits from parallelism. + """ + import os as _os + + cfg = config.get("codec_pipeline.max_workers", default=None) + if cfg is None: + return _os.cpu_count() or 1 + return max(1, int(cfg)) + + +def _get_pool(max_workers: int) -> ThreadPoolExecutor: + """Get or create the module-level thread pool, sized to `max_workers`. + + The pool grows on demand — if a request arrives for more workers than the + current pool has, it is replaced with a larger one. The previous pool is NOT + shut down here: another thread may be holding a reference to it and about to + submit (`shutdown` would make its `pool.map` raise "cannot schedule new + futures after shutdown"). The orphaned pool finishes its in-flight tasks and + is garbage-collected once no caller references it. The pool only grows, never + shrinks (a shrink request reuses the larger pool, leaving workers idle). + + Callers that want sequential execution should not call this — they + should run the task list inline. `max_workers` must be >= 1. + """ + global _pool, _pool_size + if max_workers < 1: + raise ValueError(f"max_workers must be >= 1, got {max_workers}") + if _pool is None or _pool_size < max_workers: + with _pool_lock: + if _pool is None or _pool_size < max_workers: + # Replace without shutting down the old pool (see docstring): + # avoids a race with a concurrent in-flight pool.map on it. + _pool = ThreadPoolExecutor(max_workers=max_workers) + _pool_size = max_workers + return _pool + + def _unzip2[T, U](iterable: Iterable[tuple[T, U]]) -> tuple[list[T], list[U]]: out0: list[T] = [] out1: list[U] = [] @@ -54,6 +124,101 @@ def resolve_batched(codec: Codec, chunk_specs: Iterable[ArraySpec]) -> Iterable[ return [codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] +def resolve_aa_specs( + aa_codecs: tuple[Codec, ...], chunk_spec: ArraySpec +) -> tuple[tuple[ArraySpec, ...], ArraySpec]: + """Resolve the per-stage chunk specs for a single chunk's codec chain. + + Threads `chunk_spec` forward through the array->array codecs via + `resolve_metadata` (each codec sees the spec produced by the previous one), + returning `(aa_specs, ab_spec)`: + + * `aa_specs[i]` is the spec the i-th AA codec operates on (its *input* on + encode / *output* on decode); + * `ab_spec` is the spec after all AA codecs — what the array->bytes codec + and the bytes->bytes codecs operate on. + + This is the single source of truth for per-stage spec evolution, shared by + the synchronous `ChunkTransform` and the asynchronous + `AsyncChunkTransform`. It is pure metadata (only `resolve_metadata`), so + it places no synchronous-codec requirement on the codecs. + """ + aa_specs: list[ArraySpec] = [] + spec = chunk_spec + for aa_codec in aa_codecs: + aa_specs.append(spec) + spec = aa_codec.resolve_metadata(spec) + return tuple(aa_specs), spec + + +def evolve_codecs(codecs: Iterable[Codec], array_spec: ArraySpec) -> tuple[Codec, ...]: + """Evolve a codec chain against ``array_spec``, threading the spec forward. + + Each codec is evolved against the spec produced by the previous one — NOT + the original ``array_spec`` — because earlier array->array codecs may + transform the chunk spec (e.g. ``cast_value`` widening int8 -> int16). A + later codec (notably the array->bytes serializer) must be evolved against + the spec it will actually see at run time; evolving every codec against the + unthreaded original spec would, for example, strip a ``BytesCodec``'s + ``endian`` (it sees the single-byte source dtype) and then fail at decode + time on the multi-byte target. + + This is the single source of truth for pipeline-construction-time codec + evolution, shared by every ``CodecPipeline.evolve_from_array_spec``. (The + per-chunk decode/encode counterpart is ``resolve_aa_specs``.) + """ + evolved: list[Codec] = [] + spec = array_spec + for codec in codecs: + evolved_codec = codec.evolve_from_array_spec(array_spec=spec) + evolved.append(evolved_codec) + spec = evolved_codec.resolve_metadata(spec) + return tuple(evolved) + + +def pipeline_supports_partial_decode( + array_bytes_codec: ArrayBytesCodec, + *, + array_array_codecs: tuple[ArrayArrayCodec, ...], + bytes_bytes_codecs: tuple[BytesBytesCodec, ...], + require_no_aa_bb: bool, +) -> bool: + """Whether a codec pipeline can decode a partial selection without a full read. + + Requires the array->bytes codec to implement + ``ArrayBytesCodecPartialDecodeMixin``. When ``require_no_aa_bb`` is True it + additionally requires no array->array / bytes->bytes codecs, because those + can change the slice<->byte-range correspondence (an AA codec can make the + selection non-contiguous, a BB codec can rewrite the bytes), making partial + decode infeasible. + + NOTE: the two pipelines currently pass different ``require_no_aa_bb`` values + (Batched: True; Fused: False). That divergence is intentional-for-now and + tracked separately; this function centralizes the predicate without changing + either pipeline's behavior. + """ + if require_no_aa_bb and (len(array_array_codecs) + len(bytes_bytes_codecs)) != 0: + return False + return isinstance(array_bytes_codec, ArrayBytesCodecPartialDecodeMixin) + + +def pipeline_supports_partial_encode( + array_bytes_codec: ArrayBytesCodec, + *, + array_array_codecs: tuple[ArrayArrayCodec, ...], + bytes_bytes_codecs: tuple[BytesBytesCodec, ...], + require_no_aa_bb: bool, +) -> bool: + """Whether a codec pipeline can encode a partial selection without a full rewrite. + + Mirror of ``pipeline_supports_partial_decode`` for encoding. See its note re: + the per-pipeline ``require_no_aa_bb`` divergence. + """ + if require_no_aa_bb and (len(array_array_codecs) + len(bytes_bytes_codecs)) != 0: + return False + return isinstance(array_bytes_codec, ArrayBytesCodecPartialEncodeMixin) + + def fill_value_or_default(chunk_spec: ArraySpec) -> Any: fill_value = chunk_spec.fill_value if fill_value is None: @@ -67,26 +232,355 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any: return fill_value +def _merge_chunk_array( + existing_chunk_array: NDBuffer | None, + value: NDBuffer, + out_selection: SelectorTuple, + chunk_spec: ArraySpec, + chunk_selection: SelectorTuple, + is_complete_chunk: bool, + drop_axes: tuple[int, ...], +) -> NDBuffer: + """Merge `value` into a full-chunk-shaped NDBuffer at `chunk_selection`. + + If `is_complete_chunk` and `value[out_selection]` is exactly chunk-shaped, + that VIEW of the caller's `value` is returned without copying — callers + (and the codecs they pass it to) must treat it as read-only, since + mutating it would corrupt the user's source array. Otherwise, a writable + buffer is materialized — either from `existing_chunk_array.copy()` if + one was read from the store, or freshly allocated and filled with the + chunk's fill value — and the relevant slice of `value` is written into it. + """ + if is_complete_chunk and value.shape != (): + selected = value[out_selection] + # The shape check guards against a partial edge chunk arriving with + # is_complete_chunk=True, and against dropped axes (size-1 integer + # dims), where the selection is not exactly chunk-shaped. + if selected.shape == chunk_spec.shape: + return selected + if existing_chunk_array is None: + chunk_array = chunk_spec.prototype.nd_buffer.create( + shape=chunk_spec.shape, + dtype=chunk_spec.dtype.to_native_dtype(), + order=chunk_spec.order, + fill_value=fill_value_or_default(chunk_spec), + ) + else: + chunk_array = existing_chunk_array.copy() + if chunk_selection == () or is_scalar( + value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype() + ): + chunk_value = value + else: + chunk_value = value[out_selection] + if drop_axes: + item = tuple( + None if idx in drop_axes else slice(None) for idx in range(chunk_spec.ndim) + ) + chunk_value = chunk_value[item] + chunk_array[chunk_selection] = chunk_value + return chunk_array + + +def chunk_is_empty(chunk_array: NDBuffer, chunk_spec: ArraySpec) -> bool: + """THE empty-chunk normalization rule, in one place. + + With ``write_empty_chunks=False`` (the default), a chunk whose decoded + content equals the fill value normalizes to *missing*: it must not be + stored, and readers reconstruct it from the fill value. Every write path + (fused, async fallback, shard inner chunks) must apply this same rule — + scattering inline ``all_equal`` checks is how the rule drifts. + """ + return not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( + fill_value_or_default(chunk_spec) + ) + + +def encode_or_elide_chunk( + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + encode: Callable[[NDBuffer, ArraySpec], Buffer | None], +) -> Buffer | None: + """Encode a merged chunk, normalizing empties to missing. + + Returns the bytes to store, or ``None`` meaning the chunk must NOT be + stored (either it normalized to empty per `chunk_is_empty`, or the codec + chain elided it). ``None`` is the single "missing" convention shared by + the chunk write paths and the shard dicts. + """ + if chunk_is_empty(chunk_array, chunk_spec): + return None + return encode(chunk_array, chunk_spec) + + +def merge_and_encode_chunk( + existing_bytes: Buffer | None, + value: NDBuffer, + *, + chunk_spec: ArraySpec, + chunk_selection: SelectorTuple, + out_selection: SelectorTuple, + is_complete: bool, + drop_axes: tuple[int, ...], + decode: Callable[[Buffer, ArraySpec], NDBuffer], + encode: Callable[[NDBuffer, ArraySpec], Buffer | None], +) -> Buffer | None: + """The canonical single-chunk write: merge, normalize, encode. + + decode existing (``None`` = chunk currently missing) -> merge ``value`` at + ``chunk_selection`` -> normalize empties to missing -> encode. Returns the + bytes to store or ``None`` = do not store / delete. This is the one + state-transition every per-chunk write path expresses; only the IO around + it (where ``existing_bytes`` comes from, where the result goes) differs. + """ + existing_array = decode(existing_bytes, chunk_spec) if existing_bytes is not None else None + merged = _merge_chunk_array( + existing_array, value, out_selection, chunk_spec, chunk_selection, is_complete, drop_axes + ) + return encode_or_elide_chunk(merged, chunk_spec, encode) + + +def scatter_chunk( + selected: NDBuffer | None, + out: NDBuffer, + *, + chunk_spec: ArraySpec, + out_selection: SelectorTuple, + drop_axes: tuple[int, ...], +) -> GetResult: + """Scatter one chunk's (already-selected) decoded region into ``out``. + + ``None`` = the chunk is missing: the fill value is scattered instead and a + ``missing`` status is returned. POLICY-FREE by design: whether a missing + chunk is an error (``read_missing_chunks=False``) is decided by the array + layer from the returned statuses — which is also what makes missing INNER + chunks of a present shard fill rather than raise (the sharding codec + discards the nested read's statuses; only top-level statuses reach the + array layer). + """ + if selected is None: + out[out_selection] = fill_value_or_default(chunk_spec) + return GetResult(status="missing") + if drop_axes: + selected = selected.squeeze(axis=drop_axes) + out[out_selection] = selected + return GetResult(status="present") + + +def decode_and_scatter_chunk( + chunk_bytes: Buffer | None, + out: NDBuffer, + *, + chunk_spec: ArraySpec, + chunk_selection: SelectorTuple, + out_selection: SelectorTuple, + drop_axes: tuple[int, ...], + decode: Callable[[Buffer, ArraySpec], NDBuffer], +) -> GetResult: + """The canonical single-chunk read: decode stored bytes (``None`` = + missing), select, and scatter into ``out`` via `scatter_chunk`. The read + twin of `merge_and_encode_chunk`. + """ + if chunk_bytes is None: + return scatter_chunk( + None, out, chunk_spec=chunk_spec, out_selection=out_selection, drop_axes=drop_axes + ) + selected = decode(chunk_bytes, chunk_spec)[chunk_selection] + return scatter_chunk( + selected, out, chunk_spec=chunk_spec, out_selection=out_selection, drop_axes=drop_axes + ) + + +async def _async_read_fallback( + pipeline: CodecPipeline, + batch: list[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], + out: NDBuffer, + drop_axes: tuple[int, ...], +) -> tuple[GetResult, ...]: + """Async fallback read used when no fast-path is available. + + Fetches every chunk's bytes via `concurrent_map` (sized by + `async.concurrency`), decodes the batch through `pipeline.decode`, + then scatters each decoded chunk into `out` at its `out_selection`. + + Used by both `BatchedCodecPipeline.read_batch` (non-partial-decode + branch) and `FusedCodecPipeline.read` (when the store is not a + `SupportsGetSync` / sync transform is unavailable). + """ + chunk_bytes_batch = await concurrent_map( + [(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch], + lambda byte_getter, prototype: byte_getter.get(prototype), + config.get("async.concurrency"), + ) + if isinstance(pipeline, FusedCodecPipeline) and pipeline.sync_transform is not None: + chunk_array_batch = await asyncio.to_thread( + pipeline.decode_sync, + [ + (chunk_bytes, chunk_spec) + for chunk_bytes, (_, chunk_spec, *_) in zip(chunk_bytes_batch, batch, strict=False) + ], + ) + else: + chunk_array_batch = await pipeline.decode( + [ + (chunk_bytes, chunk_spec) + for chunk_bytes, (_, chunk_spec, *_) in zip(chunk_bytes_batch, batch, strict=False) + ], + ) + results: list[GetResult] = [] + for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip( + chunk_array_batch, batch, strict=False + ): + selected = None if chunk_array is None else chunk_array[chunk_selection] + results.append( + scatter_chunk( + selected, + out, + chunk_spec=chunk_spec, + out_selection=out_selection, + drop_axes=drop_axes, + ) + ) + return tuple(results) + + +async def _async_write_fallback( + pipeline: CodecPipeline, + batch: list[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], + value: NDBuffer, + drop_axes: tuple[int, ...], +) -> None: + """Async fallback write used when no fast-path is available. + + For each chunk in `batch`: read its existing bytes from the store + (skipping the read for complete chunks), decode the batch via + `pipeline.decode`, merge `value` into each decoded chunk via + `_merge_chunk_array`, drop chunks that are all-fill when + `write_empty_chunks` is False, encode the surviving chunks via + `pipeline.encode`, then `set` the encoded bytes (or `delete` + if encoding produced `None` or the chunk dropped). + + Used by both `BatchedCodecPipeline.write_batch` (non-partial-encode + branch) and `FusedCodecPipeline.write` (when the store is not a + `SupportsSetSync` / sync transform is unavailable). + """ + + async def _read_key( + byte_setter: ByteSetter | None, prototype: BufferPrototype + ) -> Buffer | None: + if byte_setter is None: + return None + return await byte_setter.get(prototype=prototype) + + chunk_bytes_batch: Iterable[Buffer | None] + chunk_bytes_batch = await concurrent_map( + [ + ( + None if is_complete_chunk else byte_setter, + chunk_spec.prototype, + ) + for byte_setter, chunk_spec, chunk_selection, _, is_complete_chunk in batch + ], + _read_key, + config.get("async.concurrency"), + ) + + if use_sync := ( + isinstance(pipeline, FusedCodecPipeline) and pipeline.sync_transform is not None + ): + chunk_array_decoded = await asyncio.to_thread( + pipeline.decode_sync, + [ + (chunk_bytes, chunk_spec) + for chunk_bytes, (_, chunk_spec, *_) in zip(chunk_bytes_batch, batch, strict=False) + ], + ) + else: + chunk_array_decoded = await pipeline.decode( + [ + (chunk_bytes, chunk_spec) + for chunk_bytes, (_, chunk_spec, *_) in zip(chunk_bytes_batch, batch, strict=False) + ], + ) + chunk_array_merged = [ + _merge_chunk_array( + chunk_array, + value, + out_selection, + chunk_spec, + chunk_selection, + is_complete_chunk, + drop_axes, + ) + for chunk_array, ( + _, + chunk_spec, + chunk_selection, + out_selection, + is_complete_chunk, + ) in zip(chunk_array_decoded, batch, strict=False) + ] + chunk_array_batch: list[NDBuffer | None] = [] + for chunk_array, (_, chunk_spec, *_) in zip(chunk_array_merged, batch, strict=False): + if chunk_array is None: + chunk_array_batch.append(None) # type: ignore[unreachable] + else: + if chunk_is_empty(chunk_array, chunk_spec): + chunk_array_batch.append(None) + else: + chunk_array_batch.append(chunk_array) + + if use_sync: + chunk_bytes_batch = await asyncio.to_thread( + cast(FusedCodecPipeline, pipeline).encode_sync, + [ + (chunk_array, chunk_spec) + for chunk_array, (_, chunk_spec, *_) in zip(chunk_array_batch, batch, strict=False) + ], + ) + else: + chunk_bytes_batch = await pipeline.encode( + [ + (chunk_array, chunk_spec) + for chunk_array, (_, chunk_spec, *_) in zip(chunk_array_batch, batch, strict=False) + ], + ) + + async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> None: + if chunk_bytes is None: + await byte_setter.delete() + else: + await byte_setter.set(chunk_bytes) + + await concurrent_map( + [ + (byte_setter, chunk_bytes) + for chunk_bytes, (byte_setter, *_) in zip(chunk_bytes_batch, batch, strict=False) + ], + _write_key, + config.get("async.concurrency"), + ) + + @dataclass(slots=True, kw_only=True) class ChunkTransform: - """A synchronous codec chain bound to an ArraySpec. + """A synchronous codec chain. - Provides `encode` and `decode` for pure-compute codec operations - (no IO, no threading, no batching). + Provides `encode_chunk` and `decode_chunk` for pure-compute codec + operations (no IO, no threading, no batching). The `chunk_spec` is + supplied per call so the same transform can be reused across chunks + with different shapes, prototypes, etc. All codecs must implement `SupportsSyncCodec`. Construction will raise `TypeError` if any codec does not. """ codecs: tuple[Codec, ...] - array_spec: ArraySpec - # (sync codec, input_spec) pairs in pipeline order. - _aa_codecs: tuple[tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ArraySpec], ...] = field( + _aa_codecs: tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ...] = field( init=False, repr=False, compare=False ) _ab_codec: SupportsSyncCodec[NDBuffer, Buffer] = field(init=False, repr=False, compare=False) - _ab_spec: ArraySpec = field(init=False, repr=False, compare=False) _bb_codecs: tuple[SupportsSyncCodec[Buffer, Buffer], ...] = field( init=False, repr=False, compare=False ) @@ -100,65 +594,96 @@ def __post_init__(self) -> None: ) aa, ab, bb = codecs_from_list(list(self.codecs)) + # SupportsSyncCodec was verified above; the cast is purely for mypy. + self._aa_codecs = cast("tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ...]", tuple(aa)) + self._ab_codec = cast("SupportsSyncCodec[NDBuffer, Buffer]", ab) + self._bb_codecs = cast("tuple[SupportsSyncCodec[Buffer, Buffer], ...]", tuple(bb)) + + _cached_key: ArraySpec | None = field(init=False, repr=False, compare=False, default=None) + _cached_aa_specs: tuple[ArraySpec, ...] | None = field( + init=False, repr=False, compare=False, default=None + ) + _cached_ab_spec: ArraySpec | None = field(init=False, repr=False, compare=False, default=None) - aa_codecs: list[tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ArraySpec]] = [] - spec = self.array_spec - for aa_codec in aa: - assert isinstance(aa_codec, SupportsSyncCodec) - aa_codecs.append((aa_codec, spec)) - spec = aa_codec.resolve_metadata(spec) + def _resolve_specs(self, chunk_spec: ArraySpec) -> tuple[tuple[ArraySpec, ...], ArraySpec]: + """Return per-AA-codec input specs and the AB spec for `chunk_spec`. - self._aa_codecs = tuple(aa_codecs) - assert isinstance(ab, SupportsSyncCodec) - self._ab_codec = ab - self._ab_spec = spec - bb_sync: list[SupportsSyncCodec[Buffer, Buffer]] = [] - for bb_codec in bb: - assert isinstance(bb_codec, SupportsSyncCodec) - bb_sync.append(bb_codec) - self._bb_codecs = tuple(bb_sync) - - def decode( - self, - chunk_bytes: Buffer, - ) -> NDBuffer: + The resolved chain depends only on the value of `chunk_spec`, so we cache + it keyed on `chunk_spec` itself (ArraySpec is a frozen, hashable dataclass + — value identity). Keying on `id(chunk_spec)` would be unsafe: ids are + recycled after garbage collection, so a freed spec's id reused by a + different spec (same shape, different prototype/dtype/config) could yield + a stale hit. Value identity avoids that entirely. + """ + if not self._aa_codecs: + return (), chunk_spec + key = chunk_spec + if self._cached_key == key: + assert self._cached_aa_specs is not None + assert self._cached_ab_spec is not None + return self._cached_aa_specs, self._cached_ab_spec + + aa_specs_t, spec = resolve_aa_specs(cast("tuple[Codec, ...]", self._aa_codecs), chunk_spec) + self._cached_key = key + self._cached_aa_specs = aa_specs_t + self._cached_ab_spec = spec + return aa_specs_t, spec + + def decode_chunk(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> NDBuffer: """Decode a single chunk through the full codec chain, synchronously. Pure compute -- no IO. + + Parameters + ---------- + chunk_bytes : Buffer + The encoded chunk bytes. + chunk_spec : ArraySpec + The array spec describing shape, dtype, fill value, and codec + configuration for this chunk. """ + aa_specs, ab_spec = self._resolve_specs(chunk_spec) + data: Buffer = chunk_bytes for bb_codec in reversed(self._bb_codecs): - data = bb_codec._decode_sync(data, self._ab_spec) + data = bb_codec._decode_sync(data, ab_spec) - chunk_array: NDBuffer = self._ab_codec._decode_sync(data, self._ab_spec) + chunk_array: NDBuffer = self._ab_codec._decode_sync(data, ab_spec) - for aa_codec, spec in reversed(self._aa_codecs): - chunk_array = aa_codec._decode_sync(chunk_array, spec) + for aa_codec, aa_spec in zip(reversed(self._aa_codecs), reversed(aa_specs), strict=True): + chunk_array = aa_codec._decode_sync(chunk_array, aa_spec) return chunk_array - def encode( - self, - chunk_array: NDBuffer, - ) -> Buffer | None: + def encode_chunk(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> Buffer | None: """Encode a single chunk through the full codec chain, synchronously. Pure compute -- no IO. + + Parameters + ---------- + chunk_array : NDBuffer + The chunk data to encode. + chunk_spec : ArraySpec + The array spec describing shape, dtype, fill value, and codec + configuration for this chunk. """ + aa_specs, ab_spec = self._resolve_specs(chunk_spec) + aa_data: NDBuffer = chunk_array - for aa_codec, spec in self._aa_codecs: - aa_result = aa_codec._encode_sync(aa_data, spec) + for aa_codec, aa_spec in zip(self._aa_codecs, aa_specs, strict=True): + aa_result = aa_codec._encode_sync(aa_data, aa_spec) if aa_result is None: return None aa_data = aa_result - ab_result = self._ab_codec._encode_sync(aa_data, self._ab_spec) + ab_result = self._ab_codec._encode_sync(aa_data, ab_spec) if ab_result is None: return None bb_data: Buffer = ab_result for bb_codec in self._bb_codecs: - bb_result = bb_codec._encode_sync(bb_data, self._ab_spec) + bb_result = bb_codec._encode_sync(bb_data, ab_spec) if bb_result is None: return None bb_data = bb_result @@ -172,6 +697,76 @@ def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: return byte_length +@dataclass(slots=True, kw_only=True) +class AsyncChunkTransform: + """A per-chunk asynchronous codec chain — the async mirror of ChunkTransform. + + Decodes/encodes a SINGLE chunk through the full codec chain, awaiting each + codec's per-chunk async method (`_decode_single`/`_encode_single`) with the + correctly-evolved per-stage spec (via the shared `resolve_aa_specs`). + + Unlike ChunkTransform it places no `SupportsSyncCodec` requirement on the + codecs, so it works for async-only codecs. Unlike the batched codec API it + operates on one chunk at a time — the mini-batch fan-out is a + BatchedCodecPipeline concern and deliberately not reintroduced here. + """ + + codecs: tuple[Codec, ...] + + _aa_codecs: tuple[ArrayArrayCodec, ...] = field(init=False, repr=False, compare=False) + _ab_codec: ArrayBytesCodec = field(init=False, repr=False, compare=False) + _bb_codecs: tuple[BytesBytesCodec, ...] = field(init=False, repr=False, compare=False) + + def __post_init__(self) -> None: + aa, ab, bb = codecs_from_list(list(self.codecs)) + self._aa_codecs = aa + self._ab_codec = ab + self._bb_codecs = bb + + async def decode_chunk(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> NDBuffer: + """Decode one chunk through the chain (bb -> ab -> aa), async.""" + aa_specs, ab_spec = resolve_aa_specs(self._aa_codecs, chunk_spec) + + data: Buffer = chunk_bytes + for bb_codec in reversed(self._bb_codecs): + data = await bb_codec._decode_single(data, ab_spec) + + chunk_array: NDBuffer = await self._ab_codec._decode_single(data, ab_spec) + + for aa_codec, aa_spec in zip(reversed(self._aa_codecs), reversed(aa_specs), strict=True): + chunk_array = await aa_codec._decode_single(chunk_array, aa_spec) + + return chunk_array + + async def encode_chunk(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> Buffer | None: + """Encode one chunk through the chain (aa -> ab -> bb), async. + + Returns None if any stage drops the chunk (e.g. an all-fill chunk under + write_empty_chunks=False), matching ChunkTransform.encode_chunk. + """ + aa_specs, ab_spec = resolve_aa_specs(self._aa_codecs, chunk_spec) + + aa_data: NDBuffer = chunk_array + for aa_codec, aa_spec in zip(self._aa_codecs, aa_specs, strict=True): + aa_result = await aa_codec._encode_single(aa_data, aa_spec) + if aa_result is None: + return None + aa_data = aa_result + + ab_result = await self._ab_codec._encode_single(aa_data, ab_spec) + if ab_result is None: + return None + + bb_data: Buffer = ab_result + for bb_codec in self._bb_codecs: + bb_result = await bb_codec._encode_single(bb_data, ab_spec) + if bb_result is None: + return None + bb_data = bb_result + + return bb_data + + @dataclass(frozen=True) class BatchedCodecPipeline(CodecPipeline): """Default codec pipeline. @@ -187,18 +782,7 @@ class BatchedCodecPipeline(CodecPipeline): batch_size: int def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: - # Each codec must be evolved against the spec it will actually see - # at run-time, not the original array spec. Earlier array->array - # codecs may transform the dtype (e.g. cast_value), so the spec - # threaded into later codecs (the array->bytes serializer and any - # bytes->bytes filters) must reflect those transformations. - evolved: list[Codec] = [] - spec = array_spec - for codec in self: - evolved_codec = codec.evolve_from_array_spec(array_spec=spec) - evolved.append(evolved_codec) - spec = evolved_codec.resolve_metadata(spec) - return type(self).from_codecs(evolved) + return type(self).from_codecs(evolve_codecs(self, array_spec)) @classmethod def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self: @@ -213,34 +797,22 @@ def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) @property def supports_partial_decode(self) -> bool: - """Determines whether the codec pipeline supports partial decoding. - - Currently, only codec pipelines with a single ArrayBytesCodec that supports - partial decoding can support partial decoding. This limitation is due to the fact - that ArrayArrayCodecs can change the slice selection leading to non-contiguous - slices and BytesBytesCodecs can change the chunk bytes in a way that slice - selections cannot be attributed to byte ranges anymore which renders partial - decoding infeasible. - - This limitation may softened in the future.""" - return (len(self.array_array_codecs) + len(self.bytes_bytes_codecs)) == 0 and isinstance( - self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin + # Only a single ArrayBytesCodec that supports partial decoding, and no + # AA/BB codecs (they break the slice<->byte-range correspondence). + return pipeline_supports_partial_decode( + self.array_bytes_codec, + array_array_codecs=self.array_array_codecs, + bytes_bytes_codecs=self.bytes_bytes_codecs, + require_no_aa_bb=True, ) @property def supports_partial_encode(self) -> bool: - """Determines whether the codec pipeline supports partial encoding. - - Currently, only codec pipelines with a single ArrayBytesCodec that supports - partial encoding can support partial encoding. This limitation is due to the fact - that ArrayArrayCodecs can change the slice selection leading to non-contiguous - slices and BytesBytesCodecs can change the chunk bytes in a way that slice - selections cannot be attributed to byte ranges anymore which renders partial - encoding infeasible. - - This limitation may softened in the future.""" - return (len(self.array_array_codecs) + len(self.bytes_bytes_codecs)) == 0 and isinstance( - self.array_bytes_codec, ArrayBytesCodecPartialEncodeMixin + return pipeline_supports_partial_encode( + self.array_bytes_codec, + array_array_codecs=self.array_array_codecs, + bytes_bytes_codecs=self.bytes_bytes_codecs, + require_no_aa_bb=True, ) def __iter__(self) -> Iterator[Codec]: @@ -367,8 +939,8 @@ async def read_batch( out: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> tuple[GetResult, ...]: - results: list[GetResult] = [] if self.supports_partial_decode: + results: list[GetResult] = [] batch_info_list = list(batch_info) chunk_array_batch = await self.decode_partial_batch( [ @@ -387,81 +959,8 @@ async def read_batch( else: out[out_selection] = fill_value_or_default(chunk_spec) results.append(GetResult(status="missing")) - else: - batch_info_list = list(batch_info) - chunk_bytes_batch = await concurrent_map( - [ - (byte_getter, array_spec.prototype) - for byte_getter, array_spec, *_ in batch_info_list - ], - lambda byte_getter, prototype: byte_getter.get(prototype), - config.get("async.concurrency"), - ) - chunk_array_batch = await self.decode_batch( - [ - (chunk_bytes, chunk_spec) - for chunk_bytes, (_, chunk_spec, *_) in zip( - chunk_bytes_batch, batch_info_list, strict=False - ) - ], - ) - for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip( - chunk_array_batch, batch_info_list, strict=False - ): - if chunk_array is not None: - tmp = chunk_array[chunk_selection] - if drop_axes: - tmp = tmp.squeeze(axis=drop_axes) - out[out_selection] = tmp - results.append(GetResult(status="present")) - else: - out[out_selection] = fill_value_or_default(chunk_spec) - results.append(GetResult(status="missing")) - return tuple(results) - - def _merge_chunk_array( - self, - existing_chunk_array: NDBuffer | None, - value: NDBuffer, - out_selection: SelectorTuple, - chunk_spec: ArraySpec, - chunk_selection: SelectorTuple, - is_complete_chunk: bool, - drop_axes: tuple[int, ...], - ) -> NDBuffer: - if ( - is_complete_chunk - and value.shape == chunk_spec.shape - # Guard that this is not a partial chunk at the end with is_complete_chunk=True - and value[out_selection].shape == chunk_spec.shape - ): - return value - if existing_chunk_array is None: - chunk_array = chunk_spec.prototype.nd_buffer.create( - shape=chunk_spec.shape, - dtype=chunk_spec.dtype.to_native_dtype(), - order=chunk_spec.order, - fill_value=fill_value_or_default(chunk_spec), - ) - else: - chunk_array = existing_chunk_array.copy() # make a writable copy - if chunk_selection == () or is_scalar( - value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype() - ): - chunk_value = value - else: - chunk_value = value[out_selection] - # handle missing singleton dimensions - if drop_axes: - item = tuple( - None # equivalent to np.newaxis - if idx in drop_axes - else slice(None) - for idx in range(chunk_spec.ndim) - ) - chunk_value = chunk_value[item] - chunk_array[chunk_selection] = chunk_value - return chunk_array + return tuple(results) + return await _async_read_fallback(self, list(batch_info), out, drop_axes) async def write_batch( self, @@ -486,93 +985,8 @@ async def write_batch( ], ) - else: - # Read existing bytes if not total slice - async def _read_key( - byte_setter: ByteSetter | None, prototype: BufferPrototype - ) -> Buffer | None: - if byte_setter is None: - return None - return await byte_setter.get(prototype=prototype) - - chunk_bytes_batch: Iterable[Buffer | None] - chunk_bytes_batch = await concurrent_map( - [ - ( - None if is_complete_chunk else byte_setter, - chunk_spec.prototype, - ) - for byte_setter, chunk_spec, chunk_selection, _, is_complete_chunk in batch_info - ], - _read_key, - config.get("async.concurrency"), - ) - chunk_array_decoded = await self.decode_batch( - [ - (chunk_bytes, chunk_spec) - for chunk_bytes, (_, chunk_spec, *_) in zip( - chunk_bytes_batch, batch_info, strict=False - ) - ], - ) - - chunk_array_merged = [ - self._merge_chunk_array( - chunk_array, - value, - out_selection, - chunk_spec, - chunk_selection, - is_complete_chunk, - drop_axes, - ) - for chunk_array, ( - _, - chunk_spec, - chunk_selection, - out_selection, - is_complete_chunk, - ) in zip(chunk_array_decoded, batch_info, strict=False) - ] - chunk_array_batch: list[NDBuffer | None] = [] - for chunk_array, (_, chunk_spec, *_) in zip( - chunk_array_merged, batch_info, strict=False - ): - if chunk_array is None: - chunk_array_batch.append(None) # type: ignore[unreachable] - else: - if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( - fill_value_or_default(chunk_spec) - ): - chunk_array_batch.append(None) - else: - chunk_array_batch.append(chunk_array) - - chunk_bytes_batch = await self.encode_batch( - [ - (chunk_array, chunk_spec) - for chunk_array, (_, chunk_spec, *_) in zip( - chunk_array_batch, batch_info, strict=False - ) - ], - ) - - async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> None: - if chunk_bytes is None: - await byte_setter.delete() - else: - await byte_setter.set(chunk_bytes) - - await concurrent_map( - [ - (byte_setter, chunk_bytes) - for chunk_bytes, (byte_setter, *_) in zip( - chunk_bytes_batch, batch_info, strict=False - ) - ], - _write_key, - config.get("async.concurrency"), - ) + return + await _async_write_fallback(self, list(batch_info), value, drop_axes) async def decode( self, @@ -632,11 +1046,13 @@ def codecs_from_list( ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: from zarr.codecs.sharding import ShardingCodec + codecs = tuple(codecs) # materialize to avoid generator consumption issues + array_array: tuple[ArrayArrayCodec, ...] = () array_bytes_maybe: ArrayBytesCodec | None = None bytes_bytes: tuple[BytesBytesCodec, ...] = () - if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(tuple(codecs)) > 1: + if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(codecs) > 1: warn( "Combining a `sharding_indexed` codec disables partial reads and " "writes, which may lead to inefficient performance.", @@ -690,3 +1106,466 @@ def codecs_from_list( register_pipeline(BatchedCodecPipeline) + + +@dataclass(frozen=True) +class FusedCodecPipeline(CodecPipeline): + """Codec pipeline that runs codec compute synchronously, in bulk. + + This is an opt-in alternative to `BatchedCodecPipeline`. The win is NOT + "separating IO from compute" — the codecs (notably `ShardingCodec`) still + perform their own storage IO. The win is replacing the batched pipeline's + per-chunk *async scheduling* (≈one coroutine per chunk, which dominates real + codec work) with synchronous, batched/coalesced execution: + + 1. When every codec implements `SupportsSyncCodec`, a `ChunkTransform` + runs the codec chain synchronously (no event loop, no per-chunk coroutine) + — optionally across a thread pool for CPU-heavy decode/encode. + 2. Sharded reads use the codec's synchronous IO methods: byte-range reads + coalesced via `Store.get_ranges_sync`, and a vectorized whole-shard bulk + decode for dense, fixed-size, uncompressed shards. Sharded writes go + through the codec's synchronous full-shard-rewrite path. + 3. When the store lacks synchronous IO (e.g. ZipStore) the pipeline falls + back to the async path, equivalent to `BatchedCodecPipeline`. + + IO ownership: the sharding codec holds the byte getter/setter and reads/ + writes storage directly (the same model as zarrs; unlike tensorstore, which + keeps codecs storage-free). A storage-free codec is a possible future + direction (see the pure-codec design notes) but is explicitly NOT what this + pipeline does. + """ + + codecs: tuple[Codec, ...] + array_array_codecs: tuple[ArrayArrayCodec, ...] + array_bytes_codec: ArrayBytesCodec + bytes_bytes_codecs: tuple[BytesBytesCodec, ...] + sync_transform: ChunkTransform | None + batch_size: int + + @classmethod + def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self: + codec_list = tuple(codecs) + aa, ab, bb = codecs_from_list(codec_list) + + if batch_size is None: + batch_size = config.get("codec_pipeline.batch_size") + + return cls( + codecs=codec_list, + array_array_codecs=aa, + array_bytes_codec=ab, + bytes_bytes_codecs=bb, + sync_transform=None, + batch_size=batch_size, + ) + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + evolved_codecs = evolve_codecs(self.codecs, array_spec) + aa, ab, bb = codecs_from_list(evolved_codecs) + + try: + sync_transform: ChunkTransform | None = ChunkTransform(codecs=evolved_codecs) + except TypeError: + sync_transform = None + + return type(self)( + codecs=evolved_codecs, + array_array_codecs=aa, + array_bytes_codec=ab, + bytes_bytes_codecs=bb, + sync_transform=sync_transform, + batch_size=self.batch_size, + ) + + def __iter__(self) -> Iterator[Codec]: + return iter(self.codecs) + + @property + def supports_partial_decode(self) -> bool: + # NOTE: unlike BatchedCodecPipeline this does NOT require the AA/BB codec + # lists to be empty (require_no_aa_bb=False). That divergence is tracked + # separately; see pipeline_supports_partial_decode. + return pipeline_supports_partial_decode( + self.array_bytes_codec, + array_array_codecs=self.array_array_codecs, + bytes_bytes_codecs=self.bytes_bytes_codecs, + require_no_aa_bb=False, + ) + + @property + def supports_partial_encode(self) -> bool: + return pipeline_supports_partial_encode( + self.array_bytes_codec, + array_array_codecs=self.array_array_codecs, + bytes_bytes_codecs=self.bytes_bytes_codecs, + require_no_aa_bb=False, + ) + + def validate( + self, + *, + shape: tuple[int, ...], + dtype: ZDType[TBaseDType, TBaseScalar], + chunk_grid: ChunkGridMetadata, + ) -> None: + for codec in self.codecs: + codec.validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid) + + def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: + for codec in self: + byte_length = codec.compute_encoded_size(byte_length, array_spec) + array_spec = codec.resolve_metadata(array_spec) + return byte_length + + # -- async decode/encode (required by ABC) and sync versions -- + + async def decode( + self, + chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + # Decode each chunk through AsyncChunkTransform, which threads the + # per-stage spec correctly (via resolve_aa_specs). This is the single + # source of truth for async per-chunk decode; earlier this method + # reused one flat `chunk_specs` across every codec stage, which silently + # corrupted/crashed spec-changing codecs (transpose/cast/scale_offset) + # on the async fallback path. + async_transform = AsyncChunkTransform(codecs=self.codecs) + out: list[NDBuffer | None] = [] + for chunk_bytes, chunk_spec in chunk_bytes_and_specs: + if chunk_bytes is None: + out.append(None) + else: + out.append(await async_transform.decode_chunk(chunk_bytes, chunk_spec)) + return out + + def decode_sync( + self, + chunk_bytes_and_specs: Sequence[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + """A sync version of `decode`. + + Parameters + ---------- + chunk_bytes_and_specs + The chunks and their descriptions (i.e., shape, fill value etc.) + + Returns + ------- + An iterator over decoded chunks + + Raises + ------ + RuntimeError + If there is no sync transform + """ + transform = self.sync_transform + if transform is None: + raise RuntimeError("Do not call this method without a sync transform") + max_workers = _resolve_max_workers() + + def _decode(item: tuple[Buffer | None, ArraySpec]) -> NDBuffer | None: + return None if item[0] is None else transform.decode_chunk(item[0], item[1]) + + if max_workers > 1 and len(chunk_bytes_and_specs) > 1: + pool = _get_pool(max_workers) + return list( + pool.map( + _decode, + chunk_bytes_and_specs, + ) + ) + else: + return [_decode(item) for item in chunk_bytes_and_specs] + + async def encode( + self, + chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + async_transform = AsyncChunkTransform(codecs=self.codecs) + out: list[Buffer | None] = [] + for chunk_array, chunk_spec in chunk_arrays_and_specs: + if chunk_array is None: + out.append(None) + else: + out.append(await async_transform.encode_chunk(chunk_array, chunk_spec)) + return out + + def encode_sync( + self, + chunk_arrays_and_specs: Sequence[tuple[NDBuffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + """A sync version of `encode`. + + Parameters + ---------- + chunk_arrays_and_specs + The chunks and their descriptions (i.e., shape, fill value etc.) + + Returns + ------- + An iterator over encoded chunks + + Raises + ------ + RuntimeError + If there is no sync transform + """ + transform = self.sync_transform + if transform is None: + raise RuntimeError("Do not call this method without a sync transform") + + max_workers = _resolve_max_workers() + + def _encode(item: tuple[NDBuffer | None, ArraySpec]) -> Buffer | None: + return None if item[0] is None else transform.encode_chunk(item[0], item[1]) + + if max_workers > 1 and len(chunk_arrays_and_specs) > 1: + pool = _get_pool(max_workers) + return list( + pool.map( + _encode, + chunk_arrays_and_specs, + ) + ) + else: + return [_encode(item) for item in chunk_arrays_and_specs] + + # -- sync read/write -- + + def read_sync( + self, + batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], + out: NDBuffer, + drop_axes: tuple[int, ...] = (), + max_workers: int = 1, + ) -> tuple[GetResult, ...]: + """Synchronous read: fetch -> decode -> scatter, per chunk. + + When `max_workers > 1` and there are multiple chunks, each + chunk's full lifecycle (fetch + decode + scatter) runs as one + task on a thread pool sized to `max_workers` — overlapping IO + of one chunk with decode/scatter of another. Scatter is + thread-safe because the chunks have non-overlapping output + selections. + + `max_workers=1` runs everything sequentially in the calling + thread (no pool involvement). + + Mirrors `BatchedCodecPipeline.read_batch`: when the AB codec + supports partial decoding (e.g. sharding), the codec handles its + own IO and only fetches the inner-chunk byte ranges that overlap + the read selection. Otherwise the pipeline fetches the full + blob and decodes the whole chunk. + """ + assert self.sync_transform is not None + transform = self.sync_transform + + batch = list(batch_info) + if not batch: + return () + + # Partial-decode fast path: the AB codec owns IO (read only the + # byte ranges needed for the requested selection). Same condition + # and dispatch as BatchedCodecPipeline.read_batch. + if self.supports_partial_decode: + codec = self.array_bytes_codec + assert hasattr(codec, "_decode_partial_sync") + + def _read_one( + item: tuple[Any, ArraySpec, SelectorTuple, SelectorTuple, bool], + ) -> GetResult: + byte_getter, chunk_spec, chunk_selection, out_selection, _ = item + # the partial decode returns the already-selected region + decoded = codec._decode_partial_sync(byte_getter, chunk_selection, chunk_spec) + return scatter_chunk( + decoded, + out, + chunk_spec=chunk_spec, + out_selection=out_selection, + drop_axes=drop_axes, + ) + + else: + # Per-chunk fused path: fetch + decode + scatter as one task. + def _read_one( + item: tuple[Any, ArraySpec, SelectorTuple, SelectorTuple, bool], + ) -> GetResult: + byte_getter, chunk_spec, chunk_selection, out_selection, _ = item + raw = byte_getter.get_sync(prototype=chunk_spec.prototype) + return decode_and_scatter_chunk( + raw, + out, + chunk_spec=chunk_spec, + chunk_selection=chunk_selection, + out_selection=out_selection, + drop_axes=drop_axes, + decode=transform.decode_chunk, + ) + + if max_workers > 1 and len(batch) > 1: + pool = _get_pool(max_workers) + return tuple(pool.map(_read_one, batch)) + return tuple(_read_one(item) for item in batch) + + def write_sync( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + max_workers: int = 1, + ) -> None: + """Synchronous write: fetch existing -> merge+encode -> store. + + When `max_workers > 1` and there are multiple chunks, each + chunk's full lifecycle (get-existing + merge + encode + set/delete) + runs as one task on a thread pool sized to `max_workers` — + overlapping IO of one chunk with compute of another. + + `max_workers=1` runs everything sequentially in the calling + thread (no pool involvement). + + When the codec pipeline supports partial encoding (e.g. a + sharding codec with no outer AA/BB codecs), the AB codec handles + the full write cycle — reading existing data, merging, encoding, + and writing — matching the async `BatchedCodecPipeline` path. + """ + assert self.sync_transform is not None + transform = self.sync_transform + + batch = list(batch_info) + if not batch: + return + + # Partial-encode path: the AB codec owns IO (read, merge, encode, + # write). Same condition and calling convention as + # BatchedCodecPipeline.write_batch. + if self.supports_partial_encode: + codec = self.array_bytes_codec + assert hasattr(codec, "_encode_partial_sync") + scalar = len(value.shape) == 0 + + def _write_one( + item: tuple[Any, ArraySpec, SelectorTuple, SelectorTuple, bool], + ) -> None: + bs, chunk_spec, chunk_selection, out_selection, _is_complete = item + chunk_value = value if scalar else value[out_selection] + codec._encode_partial_sync(bs, chunk_value, chunk_selection, chunk_spec) + + else: + # Per-chunk fused path: get-existing + merge + encode + set/delete as one task. + def _write_one( + item: tuple[Any, ArraySpec, SelectorTuple, SelectorTuple, bool], + ) -> None: + bs, chunk_spec, chunk_selection, out_selection, is_complete = item + existing_bytes: Buffer | None = None + if not is_complete: + existing_bytes = bs.get_sync(prototype=chunk_spec.prototype) + + encoded = merge_and_encode_chunk( + existing_bytes, + value, + chunk_spec=chunk_spec, + chunk_selection=chunk_selection, + out_selection=out_selection, + is_complete=is_complete, + drop_axes=drop_axes, + decode=transform.decode_chunk, + encode=transform.encode_chunk, + ) + if encoded is None: + bs.delete_sync() + else: + bs.set_sync(encoded) + + if max_workers > 1 and len(batch) > 1: + pool = _get_pool(max_workers) + list(pool.map(_write_one, batch)) + else: + for item in batch: + _write_one(item) + + # -- async read/write -- + + async def read( + self, + batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], + out: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> tuple[GetResult, ...]: + batch = list(batch_info) + if not batch: + return () + + # Fast path: sync transform plus synchronous IO. For StorePath the gate + # is on the STORE's sync support (StorePath always has a get_sync + # method, but it only works when its store does); for other byte + # getters (e.g. the sharding codec's in-memory _ShardingByteGetter) the + # SyncByteGetter protocol is the gate. + from zarr.abc.store import SupportsGetSync, SyncByteGetter + from zarr.storage._common import StorePath + + first_bg = batch[0][0] + if self.sync_transform is not None and ( + (isinstance(first_bg, StorePath) and isinstance(first_bg.store, SupportsGetSync)) + or (not isinstance(first_bg, StorePath) and isinstance(first_bg, SyncByteGetter)) + ): + return self.read_sync(batch, out, drop_axes, max_workers=_resolve_max_workers()) + + # Non-sync store (e.g. ZipStore): can't use the sync fast path. But if the + # array-bytes codec supports partial decoding (sharding), still route + # through the async partial-decode path — it fetches only the needed + # inner-chunk byte ranges (coalesced via get_ranges), matching + # BatchedCodecPipeline. Without this, the whole-shard _async_read_fallback + # below would over-read and diverge from the batched pipeline's IO. + if self.supports_partial_decode: + assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin) + chunk_array_batch = await self.array_bytes_codec.decode_partial( + [ + (byte_getter, chunk_selection, chunk_spec) + for byte_getter, chunk_spec, chunk_selection, *_ in batch + ] + ) + results: list[GetResult] = [] + for chunk_array, (_, chunk_spec, _, out_selection, _) in zip( + chunk_array_batch, batch, strict=False + ): + if chunk_array is not None: + if drop_axes: + chunk_array = chunk_array.squeeze(axis=drop_axes) + out[out_selection] = chunk_array + results.append(GetResult(status="present")) + else: + out[out_selection] = fill_value_or_default(chunk_spec) + results.append(GetResult(status="missing")) + return tuple(results) + + return await _async_read_fallback(self, batch, out, drop_axes) + + async def write( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + batch = list(batch_info) + if not batch: + return + + # Fast path: sync transform plus synchronous IO. Mirrors `read`: gate + # StorePath on the store's sync support, other byte setters (e.g. the + # sharding codec's in-memory _ShardingByteSetter) on SyncByteSetter. + from zarr.abc.store import SupportsSetSync, SyncByteSetter + from zarr.storage._common import StorePath + + first_bs = batch[0][0] + if self.sync_transform is not None and ( + (isinstance(first_bs, StorePath) and isinstance(first_bs.store, SupportsSetSync)) + or (not isinstance(first_bs, StorePath) and isinstance(first_bs, SyncByteSetter)) + ): + self.write_sync(batch, value, drop_axes, max_workers=_resolve_max_workers()) + return + + await _async_write_fallback(self, batch, value, drop_axes) + + +register_pipeline(FusedCodecPipeline) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 7dcbc78e31..746f22ff66 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -104,8 +104,12 @@ def enable_gpu(self) -> ConfigSet: "threading": {"max_workers": None}, "json_indent": 2, "codec_pipeline": { - "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", + "path": "zarr.core.codec_pipeline.FusedCodecPipeline", "batch_size": 1, + # Default to sequential (no thread pool). Threading-by-default is a + # separate, larger change (downstream thread-safety, oversubscription + # on many-core nodes); opt in with codec_pipeline.max_workers > 1. + "max_workers": 1, }, "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", diff --git a/src/zarr/storage/_fsspec.py b/src/zarr/storage/_fsspec.py index 29201a6fee..90e809cd60 100644 --- a/src/zarr/storage/_fsspec.py +++ b/src/zarr/storage/_fsspec.py @@ -42,13 +42,13 @@ async def _close_fs(fs: AsyncFileSystem) -> None: """ Best-effort async close of an fsspec async filesystem owned by FsspecStore. - For filesystems that expose ``set_session()`` (e.g. s3fs) the underlying - aiohttp ``ClientSession`` is closed explicitly, which prevents - "Unclosed client session" ``ResourceWarning``s from aiohttp. For all + For filesystems that expose `set_session()` (e.g. s3fs) the underlying + aiohttp `ClientSession` is closed explicitly, which prevents + "Unclosed client session" `ResourceWarning`s from aiohttp. For all other filesystem types the call is a no-op (not every implementation manages an HTTP session directly). - Note that ``set_session()`` lazily creates a session if none exists yet, so + Note that `set_session()` lazily creates a session if none exists yet, so closing a store that never performed any I/O may instantiate a session purely to close it. This is accepted best-effort behavior; fsspec does not expose a stable, cross-implementation way to test for an existing session. @@ -286,7 +286,7 @@ def with_read_only(self, read_only: bool = False) -> FsspecStore: ) # The derived store shares the same fs. Transfer ownership so the # surviving store closes it, and clear ours to avoid a double-close. - # Otherwise the common ``from_url(...).with_read_only()`` chain would + # Otherwise the common `from_url(...).with_read_only()` chain would # drop the only owner (the unreferenced source) and leak the session. new_store._owns_fs = self._owns_fs self._owns_fs = False diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index 6096ece2f8..f666801694 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -13,6 +13,8 @@ from collections.abc import Iterable from typing import Self + from zarr.abc.store import ByteRequest + __all__ = [ "NDBufferUsingTestNDArrayLike", @@ -72,6 +74,13 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None assert isinstance(value, TestBuffer) await super().set(key, value, byte_range) + def set_sync(self, key: str, value: Buffer) -> None: + # Synchronous counterpart of `set`, used by FusedCodecPipeline. Mirror the + # same buffer-type guard so the invariant holds whichever pipeline writes. + if "json" not in key: + assert isinstance(value, TestBuffer) + super().set_sync(key, value) + async def get( self, key: str, @@ -84,3 +93,18 @@ async def get( if ret is not None: assert isinstance(ret, prototype.buffer) return ret + + def get_sync( + self, + key: str, + *, + prototype: BufferPrototype | None = None, + byte_range: ByteRequest | None = None, + ) -> Buffer | None: + # Synchronous counterpart of `get`, used by FusedCodecPipeline. + if "json" not in key and prototype is not None: + assert prototype.buffer is TestBuffer + ret = super().get_sync(key=key, prototype=prototype, byte_range=byte_range) + if ret is not None and prototype is not None: + assert isinstance(ret, prototype.buffer) + return ret diff --git a/tests/benchmarks/test_e2e.py b/tests/benchmarks/test_e2e.py index 65d0e65ac9..0e5e88b72c 100644 --- a/tests/benchmarks/test_e2e.py +++ b/tests/benchmarks/test_e2e.py @@ -9,16 +9,21 @@ from tests.benchmarks.common import Layout if TYPE_CHECKING: + from collections.abc import Iterator + from pytest_benchmark.fixture import BenchmarkFixture from zarr.abc.store import Store from zarr.core.common import NamedConfig + from operator import getitem, setitem from typing import Any, Literal import pytest from zarr import create_array +from zarr.core.config import config as zarr_config +from zarr.testing.store import LatencyStore CompressorName = Literal["gzip"] | None @@ -37,18 +42,63 @@ Layout(shape=(1_000_000,), chunks=(100,), shards=(10000 * 100,)), ) +_PIPELINE_PATHS = { + "batched": "zarr.core.codec_pipeline.BatchedCodecPipeline", + "fused": "zarr.core.codec_pipeline.FusedCodecPipeline", +} + +_LATENCY_VALUES = (0.0, 0.001, 0.05, 0.2) + + +@pytest.fixture(params=_LATENCY_VALUES, ids=lambda v: f"latency={v}") +def latency(request: pytest.FixtureRequest) -> float: + return request.param # type: ignore[no-any-return] + + +@pytest.fixture +def bench_store(store: Store, latency: float, request: pytest.FixtureRequest) -> Store: + """Wraps the underlying store in LatencyStore when latency > 0. + + Local-store cases skip nonzero latency — synthetic latency on top of + a real LocalStore is double-counting; latency simulation only applies + to the in-process memory store. + """ + callspec = getattr(request.node, "callspec", None) + store_kind = callspec.params.get("store", "memory") if callspec is not None else "memory" + if latency > 0: + if store_kind == "local": + pytest.skip("latency injection only applies to in-memory store") + return LatencyStore(store, get_latency=latency, set_latency=latency) + return store + + +@pytest.fixture(params=["batched", "fused"]) +def pipeline(request: pytest.FixtureRequest) -> Iterator[str]: + """Set ``codec_pipeline.path`` for the duration of the benchmark. + + Yields the pipeline name so each parametrize cell has a distinct + benchmark id. + """ + name = request.param + with zarr_config.set({"codec_pipeline.path": _PIPELINE_PATHS[name]}): + yield name + @pytest.mark.parametrize("compression_name", [None, "gzip"]) @pytest.mark.parametrize("layout", layouts, ids=str) @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) def test_write_array( - store: Store, layout: Layout, compression_name: CompressorName, benchmark: BenchmarkFixture + bench_store: Store, + layout: Layout, + compression_name: CompressorName, + pipeline: str, + benchmark: BenchmarkFixture, ) -> None: """ Test the time required to fill an array with a single value """ arr = create_array( - store, + bench_store, dtype="uint8", shape=layout.shape, chunks=layout.chunks, @@ -64,13 +114,17 @@ def test_write_array( @pytest.mark.parametrize("layout", layouts, ids=str) @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) def test_read_array( - store: Store, layout: Layout, compression_name: CompressorName, benchmark: BenchmarkFixture + bench_store: Store, + layout: Layout, + compression_name: CompressorName, + pipeline: str, + benchmark: BenchmarkFixture, ) -> None: """ Test the time required to fill an array with a single value """ arr = create_array( - store, + bench_store, dtype="uint8", shape=layout.shape, chunks=layout.chunks, diff --git a/tests/test_sync_codec_pipeline.py b/tests/test_chunk_transform.py similarity index 83% rename from tests/test_sync_codec_pipeline.py rename to tests/test_chunk_transform.py index 1bfde7c837..7be9d1ce9c 100644 --- a/tests/test_sync_codec_pipeline.py +++ b/tests/test_chunk_transform.py @@ -1,3 +1,13 @@ +"""Unit tests for ChunkTransform -- the per-chunk synchronous codec chain. + +ChunkTransform is the data structure FusedCodecPipeline uses to encode/decode a +single chunk through a sequence of codecs synchronously. These tests exercise it +directly (no pipeline, no store): construction and its rejection of codecs that +lack a synchronous implementation, encode/decode roundtrips across codec chains, +compute_encoded_size, and None short-circuiting when an array->array codec +returns None. End-to-end pipeline behavior lives in the pipeline test modules. +""" + from __future__ import annotations from typing import Any @@ -58,8 +68,8 @@ def _make_nd_buffer(arr: np.ndarray[Any, np.dtype[Any]]) -> NDBuffer: ) def test_construction(shape: tuple[int, ...], codecs: tuple[Codec, ...]) -> None: """Construction succeeds when all codecs implement SupportsSyncCodec.""" - spec = _make_array_spec(shape, np.dtype("float64")) - ChunkTransform(codecs=codecs, array_spec=spec) + _ = _make_array_spec(shape, np.dtype("float64")) + ChunkTransform(codecs=codecs) @pytest.mark.parametrize( @@ -72,9 +82,9 @@ def test_construction(shape: tuple[int, ...], codecs: tuple[Codec, ...]) -> None ) def test_construction_rejects_non_sync(shape: tuple[int, ...], codecs: tuple[Codec, ...]) -> None: """Construction raises TypeError when any codec lacks SupportsSyncCodec.""" - spec = _make_array_spec(shape, np.dtype("float64")) + _ = _make_array_spec(shape, np.dtype("float64")) with pytest.raises(TypeError, match="AsyncOnlyCodec"): - ChunkTransform(codecs=codecs, array_spec=spec) + ChunkTransform(codecs=codecs) @pytest.mark.parametrize( @@ -96,12 +106,12 @@ def test_encode_decode_roundtrip( ) -> None: """Data survives a full encode/decode cycle.""" spec = _make_array_spec(arr.shape, arr.dtype) - chain = ChunkTransform(codecs=codecs, array_spec=spec) + chain = ChunkTransform(codecs=codecs) nd_buf = _make_nd_buffer(arr) - encoded = chain.encode(nd_buf) + encoded = chain.encode_chunk(nd_buf, spec) assert encoded is not None - decoded = chain.decode(encoded) + decoded = chain.decode_chunk(encoded, spec) np.testing.assert_array_equal(arr, decoded.as_numpy_array()) @@ -122,7 +132,7 @@ def test_compute_encoded_size( ) -> None: """compute_encoded_size returns the correct byte length.""" spec = _make_array_spec(shape, np.dtype("float64")) - chain = ChunkTransform(codecs=codecs, array_spec=spec) + chain = ChunkTransform(codecs=codecs) assert chain.compute_encoded_size(input_size, spec) == expected_size @@ -138,8 +148,7 @@ def _encode_sync(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer spec = _make_array_spec((3, 4), np.dtype("float64")) chain = ChunkTransform( codecs=(NoneReturningAACodec(order=(1, 0)), BytesCodec()), - array_spec=spec, ) arr = np.arange(12, dtype="float64").reshape(3, 4) nd_buf = _make_nd_buffer(arr) - assert chain.encode(nd_buf) is None + assert chain.encode_chunk(nd_buf, spec) is None diff --git a/tests/test_codec_pipeline.py b/tests/test_codec_pipeline.py index fa41c2867b..656969b3a3 100644 --- a/tests/test_codec_pipeline.py +++ b/tests/test_codec_pipeline.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Any + import numpy as np import pytest @@ -7,29 +9,55 @@ from zarr.codecs import BytesCodec, CastValue from zarr.core.array import _get_chunk_spec from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.config import config as zarr_config from zarr.core.indexing import BasicIndexer from zarr.storage import MemoryStore +if TYPE_CHECKING: + from collections.abc import Generator + + +@pytest.fixture(autouse=True) +def _enable_rectilinear_chunks() -> Generator[None]: + """Enable rectilinear chunks for all tests in this module.""" + with zarr_config.set({"array.rectilinear_chunks": True}): + yield + + +pipeline_paths = [ + "zarr.core.codec_pipeline.BatchedCodecPipeline", + "zarr.core.codec_pipeline.FusedCodecPipeline", +] + + +@pytest.fixture(params=pipeline_paths, ids=["batched", "sync"]) +def pipeline_class(request: pytest.FixtureRequest) -> Generator[str]: + """Temporarily set the codec pipeline class for the test.""" + path = request.param + with zarr_config.set({"codec_pipeline.path": path}): + yield path + + +# --------------------------------------------------------------------------- +# GetResult status tests (low-level pipeline API) +# --------------------------------------------------------------------------- + @pytest.mark.parametrize( ("write_slice", "read_slice", "expected_statuses"), [ - # Write all chunks, read all — all present (slice(None), slice(None), ("present", "present", "present")), - # Write first chunk only, read all — first present, rest missing (slice(0, 2), slice(None), ("present", "missing", "missing")), - # Write nothing, read all — all missing (None, slice(None), ("missing", "missing", "missing")), ], ) async def test_read_returns_get_results( + pipeline_class: str, write_slice: slice | None, read_slice: slice, expected_statuses: tuple[str, ...], ) -> None: - """ - Test that CodecPipeline.read returns a tuple of GetResult with correct statuses. - """ + """CodecPipeline.read returns GetResult with correct statuses.""" store = MemoryStore() arr = zarr.open_array(store, mode="w", shape=(6,), chunks=(2,), dtype="int64", fill_value=-1) @@ -74,6 +102,34 @@ async def test_read_returns_get_results( assert result["status"] == expected_status +# --------------------------------------------------------------------------- +# write_empty_chunks / read_missing_chunks config tests +# --------------------------------------------------------------------------- + + +async def test_write_empty_chunks_false_no_store(pipeline_class: str) -> None: + """With write_empty_chunks=False, fill_value-only chunks should not be stored.""" + store: dict[str, Any] = {} + arr = zarr.create_array( + store=store, + shape=(20,), + dtype="float64", + chunks=(10,), + shards=None, + compressors=None, + fill_value=0.0, + config={"write_empty_chunks": False}, + ) + arr[:] = 0.0 # all fill_value + + # Chunks should NOT be persisted + assert "c/0" not in store + assert "c/1" not in store + + # But reading should still return fill values + np.testing.assert_array_equal(arr[:], np.zeros(20, dtype="float64")) + + try: import cast_value_rs # noqa: F401 @@ -120,3 +176,78 @@ def test_codec_pipeline_threads_dtype_through_evolve(source_dtype: str, target_d ) arr[:] = np.asarray([0, 1, 2, 3], dtype=source_dtype) np.testing.assert_array_equal(arr[:], np.asarray([0, 1, 2, 3], dtype=source_dtype)) + + +def test_evolve_threads_spec_preserving_serializer_endian(pipeline_class: str) -> None: + """Regression for #3937, dependency-free variant. + + `evolve_from_array_spec` must thread the spec FORWARD through the codec chain: + each codec is evolved against the spec produced by the previous one, not the + original array spec. An array->array codec that widens the dtype from a + single-byte type (no endianness) to a multi-byte type means the BytesCodec + serializer must be evolved against the *widened* dtype — otherwise it sees + the single-byte source, strips its `endian` to None, and later fails to + decode the multi-byte data. + + The original regression test for this needs `cast_value_rs` (so it only runs + in the optional-deps CI job). This variant uses a minimal dtype-widening AA + codec stub, so it runs everywhere and on both pipelines via `pipeline_class`. + """ + from dataclasses import dataclass + + from zarr.abc.codec import ArrayArrayCodec + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.dtype import get_data_type_from_native_dtype + from zarr.registry import get_pipeline_class + + @dataclass(frozen=True) + class _WidenToInt16(ArrayArrayCodec): + """Test-only AA codec: reports the encoded dtype as int16 (no real encode).""" + + is_fixed_size = True + + def to_dict(self) -> dict[str, Any]: + return {"name": "_widen_to_int16"} + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> _WidenToInt16: + return cls() + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + from dataclasses import replace + + return replace(chunk_spec, dtype=get_data_type_from_native_dtype(np.dtype("int16"))) + + def compute_encoded_size(self, input_byte_length: int, _spec: ArraySpec) -> int: + return input_byte_length + + async def _decode_single(self, chunk_array: Any, chunk_spec: ArraySpec) -> Any: + return chunk_array # pragma: no cover + + async def _encode_single(self, chunk_array: Any, chunk_spec: ArraySpec) -> Any: + return chunk_array # pragma: no cover + + zdtype = get_data_type_from_native_dtype(np.dtype("int8")) # single-byte source + spec = ArraySpec( + shape=(4,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=default_buffer_prototype(), + ) + + from zarr.core.codec_pipeline import BatchedCodecPipeline, FusedCodecPipeline + + pipeline = get_pipeline_class().from_codecs((_WidenToInt16(), BytesCodec(endian="little"))) + evolved = pipeline.evolve_from_array_spec(spec) + # Both concrete pipelines expose `array_bytes_codec`; narrow off the ABC. + assert isinstance(evolved, (BatchedCodecPipeline, FusedCodecPipeline)) + serializer = evolved.array_bytes_codec + + # The serializer must keep its little-endian setting: it is evolved against + # the widened (int16) dtype, not the single-byte source. + assert isinstance(serializer, BytesCodec) + assert serializer.endian is not None, ( + "BytesCodec serializer lost its `endian` — evolve_from_array_spec did not " + "thread the dtype-widening AA codec's spec into the serializer" + ) diff --git a/tests/test_codec_pipeline_suite.py b/tests/test_codec_pipeline_suite.py new file mode 100644 index 0000000000..07e1aa2ec4 --- /dev/null +++ b/tests/test_codec_pipeline_suite.py @@ -0,0 +1,582 @@ +"""Shared codec-pipeline behavior suite, run against EVERY codec pipeline. + +The defining property of a codec pipeline is that the array semantics it +produces must be identical no matter which pipeline is configured. To make +"one pipeline diverges from the others" structurally hard to ship, every +pipeline-agnostic behavior test lives as a method on ``CodecPipelineTests`` and +is instantiated once per pipeline (``TestBatchedPipeline`` / ``TestFusedPipeline``). + +Each test also runs over a *store axis* that exercises both code paths the +synchronous pipelines branch on: + +* ``sync`` -> ``MemoryStore`` (supports ``get_sync``/``set_sync``: fast path) +* ``async`` -> ``LatencyStore(MemoryStore())`` (NOT sync-capable: async fallback) + +The async axis is deliberate: a regression that only affects the async fallback +of the default pipeline (e.g. a codec-spec-evolution bug that surfaces only on +remote stores) is invisible if every test runs on MemoryStore. Running the same +battery over a non-sync store closes that gap. + +Pipeline-specific tests (construction, ``from_codecs``, the byte-range write +fast path, etc.) stay in their own modules; only behavior that ALL pipelines +must share belongs here. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +import numcodecs +import numpy as np +import pytest + +import zarr +from zarr.codecs import BytesCodec, GzipCodec, ShardingCodec, TransposeCodec +from zarr.core.config import config as zarr_config +from zarr.errors import ChunkNotFoundError +from zarr.storage import MemoryStore +from zarr.testing.store import LatencyStore + +if TYPE_CHECKING: + from collections.abc import Iterator + + from zarr.abc.store import Store + from zarr.codecs.sharding import SubchunkWriteOrder + + +# --- store axis: a sync store and a non-sync (async-fallback) store ---------- + +STORE_KINDS = ["sync", "async"] + + +def _make_store(kind: str) -> Store: + if kind == "sync": + # MemoryStore supports get_sync/set_sync -> synchronous fast path. + return MemoryStore() + if kind == "async": + # LatencyStore is NOT SupportsGetSync/SupportsSetSync, so a synchronous + # pipeline must fall back to its async path. Zero latency keeps it fast. + return LatencyStore(MemoryStore(), get_latency=0.0, set_latency=0.0) + raise AssertionError(kind) + + +# --- scenario model ---------------------------------------------------------- +# +# Most pipeline behavior tests have one shape: +# create an array, apply some writes, (optionally) assert which chunk keys +# exist, then assert reads come back correct. A Scenario captures exactly those +# variables so one parametrized test covers them all. Correctness is checked +# against a numpy reference array that the scenario mutates in lock-step with +# the zarr array, so cases don't hand-maintain expected values. + + +@dataclass(frozen=True) +class Scenario: + id: str + array_kwargs: dict[str, Any] + # (selection, value) writes applied in order. value may be a scalar or array. + writes: tuple[tuple[Any, Any], ...] = () + # selections to read back and check against the reference. () means "read all". + reads: tuple[Any, ...] = (slice(None),) + # substrings of chunk keys that must be present / absent after the writes. + # Only checked on the sync store (key layout is identical across stores, but + # we keep it to one axis to avoid asserting store internals twice). + keys_present: tuple[str, ...] = () + keys_absent: tuple[str, ...] = () + + def reference(self) -> np.ndarray: + """The numpy array the scenario's writes should produce, starting from + the array's fill value.""" + kw = self.array_kwargs + shape = kw["shape"] + dtype = np.dtype(kw["dtype"]) + fill = kw.get("fill_value", 0) + ref = np.full(shape, fill, dtype=dtype) + for sel, value in self.writes: + ref[sel] = value + return ref + + +def _val(n: int, dtype: str, offset: int = 1) -> np.ndarray: + return np.arange(offset, offset + n, dtype=dtype) + + +# Common dtype/chunk presets reused below. +_F64 = {"dtype": "float64", "fill_value": 0.0} +_I32 = {"dtype": "int32", "fill_value": -1} + +SCENARIOS: tuple[Scenario, ...] = ( + # --- full-array roundtrips across layouts/codecs ------------------------ + Scenario( + "1d-unsharded-roundtrip", + {"shape": (100,), "chunks": (10,), "shards": None, "compressors": None, **_F64}, + writes=((slice(None), _val(100, "float64")),), + ), + Scenario( + "1d-sharded-roundtrip", + {"shape": (100,), "chunks": (10,), "shards": (100,), "compressors": None, **_F64}, + writes=((slice(None), _val(100, "float64")),), + ), + Scenario( + "1d-multi-chunk-shard-roundtrip", + {"shape": (100,), "chunks": (10,), "shards": (50,), "compressors": None, **_F64}, + writes=((slice(None), _val(100, "float64")),), + ), + Scenario( + "2d-unsharded-roundtrip", + {"shape": (10, 20), "chunks": (5, 10), "shards": None, "compressors": None, **_I32}, + writes=((slice(None), np.arange(200, dtype="int32").reshape(10, 20)),), + ), + Scenario( + "2d-sharded-roundtrip", + {"shape": (20, 20), "chunks": (5, 5), "shards": (10, 10), "compressors": None, **_I32}, + writes=((slice(None), np.arange(400, dtype="int32").reshape(20, 20)),), + ), + Scenario( + "1d-gzip-roundtrip", + { + "shape": (100,), + "chunks": (10,), + "shards": None, + "compressors": {"name": "gzip", "configuration": {"level": 1}}, + **_F64, + }, + writes=((slice(None), _val(100, "float64")),), + ), + Scenario( + "1d-zstd-roundtrip", + { + "shape": (100,), + "chunks": (10,), + "shards": None, + "compressors": {"name": "zstd", "configuration": {"level": 1}}, + **_F64, + }, + writes=((slice(None), _val(100, "float64")),), + ), + Scenario( + "1d-float32-roundtrip", + { + "shape": (50,), + "chunks": (10,), + "shards": None, + "compressors": None, + "dtype": "float32", + "fill_value": 0.0, + }, + writes=((slice(None), _val(50, "float32")),), + ), + # zarr v2 goes through the V2Codec wrapper (filters + compressor), a + # different codec path than the v3 AA/AB/BB chain — and a different sync + # implementation under FusedCodecPipeline. Without these scenarios, v2 was + # only exercised implicitly via whichever pipeline is the global default. + Scenario( + "v2-roundtrip", + { + "shape": (100,), + "chunks": (10,), + "shards": None, + "compressors": None, + "zarr_format": 2, + **_F64, + }, + writes=((slice(None), _val(100, "float64")),), + ), + Scenario( + "v2-gzip-roundtrip", + { + "shape": (100,), + "chunks": (10,), + "shards": None, + "compressors": numcodecs.GZip(level=1), + "zarr_format": 2, + **_F64, + }, + writes=((slice(None), _val(100, "float64")),), + ), + # v2 filters are the other half of the V2Codec wrapper (numcodecs + # array->array filters, a distinct branch from the compressor in + # _encode_sync/_decode_sync). + Scenario( + "v2-filter-gzip-roundtrip", + { + "shape": (100,), + "chunks": (10,), + "shards": None, + "filters": numcodecs.Delta(dtype="float64"), + "compressors": numcodecs.GZip(level=1), + "zarr_format": 2, + **_F64, + }, + writes=((slice(None), _val(100, "float64")),), + ), + # --- read unwritten chunks -> fill value -------------------------------- + Scenario( + "missing-chunks-fill", + { + "shape": (100,), + "chunks": (10,), + "shards": None, + "compressors": None, + "dtype": "float64", + "fill_value": -7.0, + }, + writes=(), + ), + Scenario( + "missing-chunks-fill-sharded", + { + "shape": (100,), + "chunks": (10,), + "shards": (100,), + "compressors": None, + "dtype": "float64", + "fill_value": -7.0, + }, + writes=(), + ), + # --- partial write, varied read selections ------------------------------ + Scenario( + "partial-write-full-read", + {"shape": (100,), "chunks": (10,), "shards": None, "compressors": None, **_F64}, + writes=((slice(5, 15), _val(10, "float64")),), + reads=(slice(None),), + ), + Scenario( + "full-write-strided-read", + {"shape": (100,), "chunks": (10,), "shards": None, "compressors": None, **_F64}, + writes=((slice(None), _val(100, "float64")),), + reads=(np.s_[::3], np.s_[10:20]), + ), + Scenario( + "partial-write-partial-read-sharded", + {"shape": (100,), "chunks": (10,), "shards": (100,), "compressors": None, **_F64}, + writes=((slice(20, 70), _val(50, "float64")),), + reads=(np.s_[30:60], slice(None)), + ), + # scalar single-element reads from a sharded array hit the sharding codec's + # partial-decode path (_decode_partial_single), distinct from slice reads. + Scenario( + "sharded-scalar-reads-1d", + {"shape": (100,), "chunks": (10,), "shards": (50,), "compressors": None, **_F64}, + writes=((slice(None), _val(100, "float64")),), + reads=(np.s_[0], np.s_[50], np.s_[99], np.s_[::3]), + ), + Scenario( + "sharded-scalar-reads-2d", + {"shape": (20, 20), "chunks": (5, 5), "shards": (10, 10), "compressors": None, **_I32}, + writes=((slice(None), np.arange(400, dtype="int32").reshape(20, 20)),), + reads=(np.s_[0, 0], np.s_[10, 10], np.s_[19, 19]), + ), + # --- spec-changing codec (transpose): the async-spec-evolution guard ---- + Scenario( + "transpose", + { + "shape": (8, 12), + "chunks": (2, 4), + "shards": None, + "filters": [TransposeCodec(order=(1, 0))], + "serializer": BytesCodec(), + **_I32, + }, + writes=((slice(None), np.arange(96, dtype="int32").reshape(8, 12)),), + reads=(slice(None), np.s_[1:7, 2:10]), + ), + Scenario( + "transpose-gzip", + { + "shape": (8, 12), + "chunks": (2, 4), + "shards": None, + "filters": [TransposeCodec(order=(1, 0))], + "serializer": BytesCodec(), + "compressors": GzipCodec(level=1), + **_I32, + }, + writes=((slice(None), np.arange(96, dtype="int32").reshape(8, 12)),), + reads=(slice(None), np.s_[1:7, 2:10]), + ), + # --- nested sharding ---------------------------------------------------- + Scenario( + "nested-sharding", + { + "shape": (20, 20), + "chunks": (10, 10), + "shards": None, + "compressors": None, + **_I32, + "fill_value": 0, + "serializer": ShardingCodec( + chunk_shape=(10, 10), codecs=[ShardingCodec(chunk_shape=(5, 5))] + ), + }, + writes=((slice(None), np.arange(400, dtype="int32").reshape(20, 20)),), + ), + # --- partial overwrite of an existing shard (merge) --------------------- + Scenario( + "partial-shard-overwrite", + { + "shape": (40,), + "chunks": (4,), + "shards": (40,), + "compressors": None, + **_I32, + "config": {"write_empty_chunks": True}, + }, + writes=( + (slice(None), np.arange(40, dtype="int32")), + (slice(7, 18), _val(11, "int32", 700)), + ), + ), + # --- write_empty_chunks: storage-key presence/absence ------------------- + Scenario( + "write-empty-false-omits-fill-chunk", + { + "shape": (20,), + "chunks": (10,), + "shards": None, + "compressors": None, + **_F64, + "config": {"write_empty_chunks": False}, + }, + writes=((slice(0, 10), _val(10, "float64")), (slice(10, 20), np.zeros(10, "float64"))), + keys_present=("c/0",), + keys_absent=("c/1",), + ), + Scenario( + "write-empty-true-persists-fill-chunk", + { + "shape": (20,), + "chunks": (10,), + "shards": None, + "compressors": None, + **_F64, + "config": {"write_empty_chunks": True}, + }, + writes=((slice(None), np.zeros(20, "float64")),), + keys_present=("c/0", "c/1"), + ), + # default config (no explicit write_empty_chunks) must still skip fill chunks + Scenario( + "default-config-omits-fill-chunk", + {"shape": (20,), "chunks": (10,), "shards": None, "compressors": None, **_F64}, + writes=((slice(10, 20), np.zeros(10, "float64")),), + keys_absent=("c/1",), + ), +) + + +class CodecPipelineTests: + """Behavior every codec pipeline must satisfy, on sync and async stores. + + Subclasses set ``pipeline_path`` to the fully-qualified pipeline class. + """ + + pipeline_path: str + + @pytest.fixture(autouse=True) + def _use_pipeline(self) -> Iterator[None]: + with zarr_config.set({"codec_pipeline.path": self.pipeline_path}): + yield + + @pytest.fixture(params=STORE_KINDS) + def store(self, request: pytest.FixtureRequest) -> Store: + return _make_store(request.param) + + @staticmethod + def _chunk_keys(store: Store) -> set[str]: + """All non-metadata keys currently in the store (v3 and v2 metadata).""" + import asyncio + + def _is_metadata(key: str) -> bool: + tail = key.rsplit("/", 1)[-1] + return tail in ("zarr.json", ".zarray", ".zattrs", ".zgroup", ".zmetadata") + + async def _list() -> set[str]: + return {k async for k in store.list() if not _is_metadata(k)} + + return asyncio.run(_list()) + + # -- the common shape: create -> write -> [assert keys] -> assert reads ---- + + @pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) + def test_scenario(self, store: Store, scenario: Scenario) -> None: + """Create an array, apply the scenario's writes, optionally assert which + chunk keys exist, then assert each read selection matches a numpy + reference. Run against every pipeline (subclass) and store kind (fixture). + """ + arr = zarr.create_array(store=store, **scenario.array_kwargs) + for sel, value in scenario.writes: + arr[sel] = value + + ref = scenario.reference() + for sel in scenario.reads: + np.testing.assert_array_equal( + arr[sel], ref[sel], err_msg=f"{scenario.id}: read {sel!r} mismatch" + ) + + if scenario.keys_present or scenario.keys_absent: + keys = self._chunk_keys(store) + for present in scenario.keys_present: + assert any(present in k for k in keys), (present, keys) + for absent in scenario.keys_absent: + assert not any(absent in k for k in keys), (absent, keys) + + # -- outliers that don't fit the create/write/read scenario shape ---------- + + def test_read_missing_chunks_false_raises(self, store: Store) -> None: + """read_missing_chunks=False makes reading an unwritten chunk an error, + not a fill — a different assertion (raises) than the scenario shape.""" + arr = zarr.create_array( + store=store, + shape=(20,), + dtype="float64", + chunks=(10,), + shards=None, + compressors=None, + fill_value=0.0, + config={"read_missing_chunks": False}, + ) + with pytest.raises(ChunkNotFoundError): + arr[:] + + def test_read_missing_chunks_false_sharded_semantics(self, store: Store) -> None: + """read_missing_chunks=False is a STORE-KEY-level promise on sharded arrays. + + The config exists to help consumers distinguish a transport error from a + truly missing chunk. That distinction applies to store keys: a missing + SHARD key raises ChunkNotFoundError. It does not cleanly apply to inner + subchunks of a shard that was fetched successfully — there is no + transport ambiguity there, the shard index simply records the subchunk + as absent — so missing inner subchunks fill with the fill value rather + than raising. This pins that asymmetry as intentional. + """ + arr = zarr.create_array( + store=store, + shape=(100,), + dtype="float64", + chunks=(10,), + shards=(50,), + compressors=None, + fill_value=-1.0, + config={"read_missing_chunks": False}, + ) + # No shard key exists yet: reading is a missing-store-key error. + with pytest.raises(ChunkNotFoundError): + arr[:] + + # Write one inner chunk of the first shard. The shard key now exists, + # but most inner subchunks are absent from its index. + arr[20:30] = np.arange(10, dtype="float64") + + # Reading across written + absent inner subchunks of the EXISTING shard + # fills rather than raises. + out = arr[15:35] + expected = np.full(20, -1.0) + expected[5:15] = np.arange(10, dtype="float64") + np.testing.assert_array_equal(out, expected) + + # Both halves of the asymmetry in ONE read against the SAME partially + # written array: shard 0 exists (absent subchunks fill), shard 1 has no + # store key (raises) — pins that the raise still fires once some shard + # exists, and not only on a fully-empty array. + with pytest.raises(ChunkNotFoundError): + arr[:] + with pytest.raises(ChunkNotFoundError): + arr[45:55] # spans the existing and the missing shard + + @pytest.mark.parametrize("subchunk_write_order", ["morton", "lexicographic", "colexicographic"]) + def test_partial_write_after_reopen_is_correct( + self, store: Store, subchunk_write_order: SubchunkWriteOrder + ) -> None: + """Has an extra step the scenario shape lacks — a REOPEN between writes. + + Reopening a sharded array and partially overwriting it must read back + correctly regardless of the original subchunk_write_order. subchunk_write_ + order is intentionally NOT recoverable on reopen, so chunk locations on a + write to an existing shard must come from the STORED shard index, not the + (now-default) live order. A non-square inner grid makes the orders + physically distinct, so a wrong offset would corrupt data and fail here. + """ + shape, shard, inner = (6, 4), (6, 4), (2, 2) + arr = zarr.create_array( + store=store, + shape=shape, + dtype="int32", + chunks=shard, + fill_value=-1, + compressors=None, + config={"write_empty_chunks": True}, + serializer=ShardingCodec( + chunk_shape=inner, codecs=[BytesCodec()], subchunk_write_order=subchunk_write_order + ), + ) + ref = np.arange(24, dtype="int32").reshape(shape) + arr[:] = ref + + reopened = zarr.open_array(store=store, mode="r+") + reopened[1:5, 0:3] = 777 # partial overwrite into the existing shard + ref[1:5, 0:3] = 777 + np.testing.assert_array_equal(reopened[:], ref) + + def test_empty_shard_deleted_after_overwrite_to_fill(self, store: Store) -> None: + """A shard written with real data and then fully overwritten back to the + fill value must have its store key deleted, not left as a stale blob. + + This has a mid-sequence key assertion (present after write 1, absent + after write 2) that the create/write/read scenario shape can't express. + """ + arr = zarr.create_array( + store=store, + shape=(16,), + chunks=(4,), + shards=(8,), + dtype="float64", + compressors=None, + fill_value=0.0, + ) + arr[0:8] = np.arange(8, dtype="float64") + 1 + assert any("c/0" in k for k in self._chunk_keys(store)) + arr[0:8] = 0.0 + assert not any("c/0" in k for k in self._chunk_keys(store)), ( + "shard should be deleted when fully overwritten to fill value" + ) + + def test_read_write_methods_do_not_branch_on_sharding_codec_type(self) -> None: + """Pipeline read/write must dispatch on supports_partial_encode/decode, + not isinstance(ShardingCodec) — a static guard against type-branching. + + Scoped to this pipeline's own read/write methods (other helpers, e.g. + metadata validation, may legitimately isinstance-check ShardingCodec). + """ + import inspect + import re + + from zarr.registry import get_pipeline_class + + # The autouse _use_pipeline fixture has set codec_pipeline.path to this + # subclass's pipeline; resolve the class it points at and guard that. + # reload_config=False so the fixture's config override is honored + # (reload_config=True re-reads the base config, ignoring the override). + cls = get_pipeline_class(reload_config=False) + + pattern = re.compile(r"isinstance\s*\([^)]*ShardingCodec[^)]*\)") + for method_name in ("read", "write", "read_sync", "write_sync"): + method = getattr(cls, method_name, None) + if method is None: + continue + matches = pattern.findall(inspect.getsource(method)) + assert not matches, ( + f"{cls.__name__}.{method_name} contains an isinstance check on " + f"ShardingCodec; use supports_partial_encode/decode instead. " + f"Matches: {matches}" + ) + + +class TestBatchedPipeline(CodecPipelineTests): + pipeline_path = "zarr.core.codec_pipeline.BatchedCodecPipeline" + + +class TestFusedPipeline(CodecPipelineTests): + pipeline_path = "zarr.core.codec_pipeline.FusedCodecPipeline" diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index 6e3e3f6d28..e1177b087e 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -402,3 +402,41 @@ async def test_resize(store: Store) -> None: assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None + + +def _resolve_metadata_codecs() -> list[Codec]: + from zarr.codecs.crc32c_ import Crc32cCodec + from zarr.codecs.zstd import ZstdCodec + + return [ + BytesCodec(), + GzipCodec(level=1), + TransposeCodec(order=(0,)), + Crc32cCodec(), + ZstdCodec(level=1), + ] + + +@pytest.mark.parametrize("codec", _resolve_metadata_codecs(), ids=lambda c: type(c).__name__) +def test_resolve_metadata_only_mutates_shape(codec: Codec) -> None: + """A codec's resolve_metadata may change a chunk's `shape` but must leave the + prototype, dtype, fill_value, and config untouched -- the pipeline relies on + those being stable across the codec chain. + """ + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.dtype import get_data_type_from_native_dtype + + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec_in = ArraySpec( + shape=(10,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0.0), + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=default_buffer_prototype(), + ) + spec_out = codec.resolve_metadata(spec_in) + name = type(codec).__name__ + assert spec_out.prototype is spec_in.prototype, f"{name} changed prototype" + assert spec_out.dtype == spec_in.dtype, f"{name} changed dtype" + assert spec_out.fill_value == spec_in.fill_value, f"{name} changed fill_value" + assert spec_out.config == spec_in.config, f"{name} changed config" diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index 856d29ef7a..c65bf3de43 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -28,6 +28,64 @@ from .test_codecs import _AsyncArrayProxy, order_from_dim +def _reads_are_sync(store_mock: AsyncMock) -> bool: + """True when the partial-shard read for this store+pipeline goes through the + synchronous methods (get_sync / get_ranges_sync). That requires BOTH the + configured pipeline to be the sync (Fused) one AND the store to support sync + reads — a Fused read against a non-sync store (e.g. ZipStore) falls back to + the async path. Lets the partial-shard-read tests assert the same intent + against whichever method family is actually exercised.""" + from zarr.abc.store import SupportsGetSync + from zarr.core.config import config + + pipeline_is_sync = "Fused" in config.get("codec_pipeline.path") + # store_mock wraps the real store; check the wrapped class for sync support. + wrapped = getattr(store_mock, "_mock_wraps", store_mock) + return pipeline_is_sync and isinstance(wrapped, SupportsGetSync) + + +def _index_read_count(store_mock: AsyncMock) -> int: + """Number of shard-index reads, regardless of sync/async pipeline.""" + method = store_mock.get_sync if _reads_are_sync(store_mock) else store_mock.get + return int(method.call_count) + + +def _range_read_count(store_mock: AsyncMock) -> int: + """Number of coalesced chunk-data reads, regardless of sync/async pipeline.""" + method = store_mock.get_ranges_sync if _reads_are_sync(store_mock) else store_mock.get_ranges + return int(method.call_count) + + +def _fail_index_read(store_mock: AsyncMock) -> None: + """Simulate the shard-index load returning nothing, for the active path.""" + if _reads_are_sync(store_mock): + store_mock.get_sync.return_value = None + else: + store_mock.get.return_value = None + + +def _fail_chunk_reads( + store_mock: AsyncMock, key_absent_exc: type[Exception] = FileNotFoundError +) -> None: + """Simulate chunk-data loads failing (key absent), for the active path. + + Async get_ranges raises a BaseExceptionGroup; the sync get_ranges_sync mirrors + that contract, so both inject a FileNotFoundError-bearing group.""" + if _reads_are_sync(store_mock): + + def fail_sync(key: str, byte_ranges: Any, **kwargs: Any) -> Any: + raise BaseExceptionGroup("chunk read failed", [key_absent_exc(key)]) + + store_mock.get_ranges_sync = fail_sync + else: + + async def fail_async(key: str, byte_ranges: Any, **kwargs: Any) -> Any: + raise BaseExceptionGroup("chunk read failed", [key_absent_exc(key)]) + yield # type: ignore[unreachable] # marks this as an async generator + + store_mock.get_ranges = fail_async + + @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize( @@ -231,18 +289,18 @@ def test_sharding_multiple_chunks_partial_shard_read( # for a total of 6 chunks accessed assert np.allclose(a[0, 22:42], np.arange(22, 42, dtype="float32")) - # 2 shard index reads via store.get() + 2 get_ranges calls (one per shard) - assert store_mock.get.call_count == 2 - assert store_mock.get_ranges.call_count == 2 + # 2 shard index reads + 2 coalesced chunk-data reads (one per shard) + assert _index_read_count(store_mock) == 2 + assert _range_read_count(store_mock) == 2 store_mock.reset_mock() # Reads 4 chunks from both shards along dimension 0 for a total of 8 chunks accessed assert np.allclose(a[:, 0], np.arange(0, data.size, array_shape[1], dtype="float32")) - # 2 shard index reads via store.get() + 2 get_ranges calls (one per shard) - assert store_mock.get.call_count == 2 - assert store_mock.get_ranges.call_count == 2 + # 2 shard index reads + 2 coalesced chunk-data reads (one per shard) + assert _index_read_count(store_mock) == 2 + assert _range_read_count(store_mock) == 2 @pytest.mark.parametrize("index_location", ["start", "end"]) @@ -278,9 +336,9 @@ def test_sharding_duplicate_read_indexes( indexer = [8, 8, 12, 12] assert np.array_equal(a[indexer], data[indexer]) - # 1 shard index read via store.get() + 1 get_ranges call - assert store_mock.get.call_count == 1 - assert store_mock.get_ranges.call_count == 1 + # 1 shard index read + 1 coalesced chunk-data read + assert _index_read_count(store_mock) == 1 + assert _range_read_count(store_mock) == 1 @pytest.mark.parametrize("index_location", ["start", "end"]) @@ -369,8 +427,6 @@ def test_sharding_partial_shard_read__index_load_fails( fill_value = -999 store_mock = AsyncMock(wraps=store, spec=store.__class__) - # loading the index is the first call to .get() so returning None will simulate an index load failure - store_mock.get.return_value = None a = zarr.create_array( StorePath(store_mock), @@ -383,6 +439,10 @@ def test_sharding_partial_shard_read__index_load_fails( ) a[:] = data + # Loading the index returns None -> simulate an index load failure, on + # whichever read method the active pipeline uses (get / get_sync). + _fail_index_read(store_mock) + # Read from one of two chunks in a shard to test the partial shard read path assert a[0] == fill_value assert a[0] != data[0] @@ -449,16 +509,12 @@ def test_sharding_partial_shard_read__chunk_load_fails( a[:] = data # Set up store mock after array creation to simulate chunk load failure. - # Index loads still succeed (via store.get), but chunk-byte loads fail - # (via store.get_ranges raising BaseExceptionGroup containing FileNotFoundError — - # the same shape Store.get_ranges produces when a key is absent). + # Index loads still succeed, but chunk-byte loads fail (the coalesced range + # read raises a BaseExceptionGroup containing FileNotFoundError — the same + # shape produced when a key is absent), on whichever read method the active + # pipeline uses (get_ranges / get_ranges_sync). store_mock.reset_mock() - - async def fail_chunk_reads(key: str, byte_ranges: Any, **kwargs: Any) -> Any: - raise BaseExceptionGroup("chunk read failed", [FileNotFoundError(key)]) - yield # type: ignore[unreachable] # marks this as an async generator - - store_mock.get_ranges = fail_chunk_reads + _fail_chunk_reads(store_mock) # Read from one of two chunks in a shard to test the partial shard read path assert a[0] == fill_value diff --git a/tests/test_codecs/test_sharding_unit.py b/tests/test_codecs/test_sharding_unit.py index 6e022ed9fa..6736b1047c 100644 --- a/tests/test_codecs/test_sharding_unit.py +++ b/tests/test_codecs/test_sharding_unit.py @@ -1,6 +1,14 @@ +import asyncio +from dataclasses import dataclass, replace +from typing import Any + import numpy as np import pytest +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gzip import GzipCodec from zarr.codecs.sharding import ( MAX_UINT_64, ShardingCodec, @@ -8,8 +16,12 @@ _ShardingByteGetter, _ShardReader, ) -from zarr.core.buffer import default_buffer_prototype +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.core.buffer import Buffer as ABCBuffer +from zarr.core.buffer import NDBuffer, default_buffer_prototype from zarr.core.buffer.cpu import Buffer +from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer +from zarr.core.dtype import get_data_type_from_native_dtype from zarr.storage._common import StorePath from zarr.storage._memory import MemoryStore @@ -486,3 +498,254 @@ def test_is_total_shard_1d() -> None: # Partial partial_coords: set[tuple[int, ...]] = {(0,), (2,)} assert codec._is_total_shard(partial_coords, chunks_per_shard) is False + + +# ============================================================================ +# _inner_codecs_fixed_size tests +# ============================================================================ + + +def test_inner_codecs_fixed_size_no_compression() -> None: + """Inner codecs without compression should be fixed-size.""" + codec = ShardingCodec(chunk_shape=(10,), codecs=[BytesCodec()]) + assert codec._inner_codecs_fixed_size is True + + +def test_inner_codecs_fixed_size_with_compression() -> None: + """Inner codecs with compression should NOT be fixed-size.""" + codec = ShardingCodec(chunk_shape=(10,), codecs=[BytesCodec(), GzipCodec()]) + assert codec._inner_codecs_fixed_size is False + + +# ============================================================================ +# inner-chain spec threading +# ============================================================================ + + +@dataclass(frozen=True) +class _WidenToInt16(ArrayArrayCodec): + """Test-only sync-capable AA codec that reports its output dtype as int16.""" + + is_fixed_size = True + + def to_dict(self) -> dict[str, Any]: + return {"name": "_widen_to_int16"} + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "_WidenToInt16": + return cls() + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + return replace(chunk_spec, dtype=get_data_type_from_native_dtype(np.dtype("int16"))) + + def compute_encoded_size(self, input_byte_length: int, _spec: ArraySpec) -> int: + return input_byte_length + + def _encode_sync(self, chunk_array: Any, chunk_spec: ArraySpec) -> Any: + return chunk_array # pragma: no cover + + def _decode_sync(self, chunk_array: Any, chunk_spec: ArraySpec) -> Any: + return chunk_array # pragma: no cover + + async def _encode_single(self, chunk_array: Any, chunk_spec: ArraySpec) -> Any: + return chunk_array # pragma: no cover + + async def _decode_single(self, chunk_array: Any, chunk_spec: ArraySpec) -> Any: + return chunk_array # pragma: no cover + + +def _int8_spec(shape: tuple[int, ...]) -> ArraySpec: + zdtype = get_data_type_from_native_dtype(np.dtype("int8")) # single-byte source + return ArraySpec( + shape=shape, + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=default_buffer_prototype(), + ) + + +def test_inner_chunk_transform_threads_spec() -> None: + """The inner codec chain must be evolved with the spec threaded forward. + + A dtype-widening inner array->array codec means the BytesCodec serializer + is evolved against the WIDENED dtype, not the single-byte source — + otherwise it strips its `endian` to None and fails to decode multi-byte + inner chunks. Same contract as the pipeline-level `evolve_codecs` + regression test, applied to `_get_inner_chunk_transform`. + """ + codec = ShardingCodec(chunk_shape=(4,), codecs=[_WidenToInt16(), BytesCodec(endian="little")]) + shard_spec = _int8_spec((8,)) + + transform = codec._get_inner_chunk_transform(shard_spec) + serializer = transform._ab_codec + assert isinstance(serializer, BytesCodec) + assert serializer.endian is not None, ( + "inner BytesCodec lost its `endian` — _get_inner_chunk_transform did not " + "thread the dtype-widening codec's spec into the serializer" + ) + + +def test_evolve_from_array_spec_threads_spec() -> None: + """`ShardingCodec.evolve_from_array_spec` must thread the spec through the + inner chain, like `_get_inner_chunk_transform` does. + + This method runs EARLIER, on the real array-creation path (the outer + pipeline evolves the sharding codec itself), so an unthreaded evolve here + bakes an endian-stripped BytesCodec into the evolved instance's `codecs` + before the transform builders ever run — and the later threaded evolve then + raises instead of recovering. Calling `_get_inner_chunk_transform` on the + EVOLVED instance pins the full real path. + """ + codec = ShardingCodec(chunk_shape=(4,), codecs=[_WidenToInt16(), BytesCodec(endian="little")]) + # the array spec the OUTER pipeline evolves the sharding codec against + array_spec = _int8_spec((8,)) + + evolved = codec.evolve_from_array_spec(array_spec) + inner_serializer = next(c for c in evolved.codecs if isinstance(c, BytesCodec)) + assert inner_serializer.endian is not None, ( + "evolve_from_array_spec evolved the inner BytesCodec against the " + "un-widened spec, stripping its `endian`" + ) + + # and the evolved instance must still build a working inner transform + transform = evolved._get_inner_chunk_transform(array_spec) + serializer = transform._ab_codec + assert isinstance(serializer, BytesCodec) + assert serializer.endian is not None + + +# ============================================================================ +# async whole-shard codec methods +# +# `ShardingCodec` advertises partial decode/encode, so the codec pipeline +# always routes sharded reads/writes through `_decode_partial_single` / +# `_encode_partial_single`. The whole-shard async methods `_decode_single` / +# `_encode_single` are reached only via the direct `ArrayBytesCodec` API (e.g. +# a consumer that calls the codec outside a pipeline), so they get no coverage +# from end-to-end array tests. Pin them with a direct round-trip. +# ============================================================================ + + +@pytest.mark.parametrize("write_empty_chunks", [True, False]) +def test_decode_single_encode_single_roundtrip(write_empty_chunks: bool) -> None: + """`ShardingCodec._encode_single` then `_decode_single` round-trips a whole + shard. Covers the async whole-shard path the pipeline bypasses in favor of + the partial methods.""" + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec = ArraySpec( + shape=(50,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=write_empty_chunks), + prototype=default_buffer_prototype(), + ) + codec = ShardingCodec(chunk_shape=(10,), codecs=[BytesCodec()]) + data = np.arange(50, dtype="float64") + value = CPUNDBuffer.from_numpy_array(data) + + encoded = asyncio.run(codec._encode_single(value, spec)) + assert encoded is not None # data is non-empty -> a shard is always produced + decoded = asyncio.run(codec._decode_single(encoded, spec)) + np.testing.assert_array_equal(decoded.as_numpy_array(), data) + + +def test_encode_single_all_empty_returns_none() -> None: + """`_encode_single` of an all-fill shard under write_empty_chunks=False + elides every inner chunk and returns None (the all-empty branch).""" + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec = ArraySpec( + shape=(50,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=default_buffer_prototype(), + ) + codec = ShardingCodec(chunk_shape=(10,), codecs=[BytesCodec()]) + fill = CPUNDBuffer.from_numpy_array(np.zeros(50, dtype="float64")) + + assert asyncio.run(codec._encode_single(fill, spec)) is None + + +def test_decode_single_all_empty_fills() -> None: + """`_decode_single` of a shard whose index is all-empty fills the output + with the fill value (the is_all_empty fast path).""" + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec = ArraySpec( + shape=(50,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(-1.0), + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=default_buffer_prototype(), + ) + codec = ShardingCodec(chunk_shape=(10,), codecs=[BytesCodec()]) + # an empty shard is just the encoded empty index + empty_index = asyncio.run(codec._encode_shard_index(_ShardIndex.create_empty((5,)))) + decoded = asyncio.run(codec._decode_single(empty_index, spec)) + np.testing.assert_array_equal(decoded.as_numpy_array(), np.full(50, -1.0)) + + +# ============================================================================ +# async-only index codec fallback (#269) +# +# `_decode_shard_index` / `_encode_shard_index` delegate to their sync twins +# when every index codec is sync-capable, and otherwise fall back to the async +# pipeline. The default index chain (bytes + crc32c) is sync-capable, so the +# fallback is exercised only by an async-only index codec. +# ============================================================================ + + +class _AsyncOnlyBytesCodec(ArrayBytesCodec): + """An array<->bytes codec that implements ONLY the async per-chunk methods. + + Wraps a real `BytesCodec` for the actual conversion but deliberately omits + `_encode_sync`/`_decode_sync`, so it is NOT a `SupportsSyncCodec`. Used as + an index codec to force the async-pipeline fallback in + `_decode_shard_index`/`_encode_shard_index`. + """ + + _inner = BytesCodec() + + def to_dict(self) -> dict[str, Any]: + return {"name": "_async_only_bytes"} + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "_AsyncOnlyBytesCodec": + return cls() + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> "_AsyncOnlyBytesCodec": + return self + + def compute_encoded_size(self, input_byte_length: int, _spec: ArraySpec) -> int: + return input_byte_length + + async def _decode_single(self, chunk_bytes: ABCBuffer, chunk_spec: ArraySpec) -> NDBuffer: + return await self._inner._decode_single(chunk_bytes, chunk_spec) + + async def _encode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> ABCBuffer: + result = await self._inner._encode_single(chunk_array, chunk_spec) + assert result is not None + return result + + +def test_shard_index_async_fallback_for_async_only_index_codec() -> None: + """An async-only index codec is not sync-capable, so `_encode_shard_index` + and `_decode_shard_index` must take the async-pipeline fallback (#269) + instead of the sync twins — and still round-trip.""" + from zarr.abc.codec import SupportsSyncCodec + + codec = ShardingCodec( + chunk_shape=(10,), + codecs=[BytesCodec()], + index_codecs=[_AsyncOnlyBytesCodec(), Crc32cCodec()], + ) + assert not codec._index_codecs_sync_capable() + assert not isinstance(_AsyncOnlyBytesCodec(), SupportsSyncCodec) + + chunks_per_shard = (5,) + index = _ShardIndex.create_empty(chunks_per_shard) + index.set_chunk_slice((0,), slice(0, 42)) + + encoded = asyncio.run(codec._encode_shard_index(index)) + decoded = asyncio.run(codec._decode_shard_index(encoded, chunks_per_shard)) + np.testing.assert_array_equal(decoded.offsets_and_lengths, index.offsets_and_lengths) diff --git a/tests/test_config.py b/tests/test_config.py index 4e293e968f..c0cb0a30bb 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -61,8 +61,9 @@ def test_config_defaults_set() -> None: "threading": {"max_workers": None}, "json_indent": 2, "codec_pipeline": { - "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", + "path": "zarr.core.codec_pipeline.FusedCodecPipeline", "batch_size": 1, + "max_workers": 1, }, "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", @@ -134,7 +135,7 @@ def test_config_codec_pipeline_class(store: Store) -> None: # has default value assert get_pipeline_class().__name__ != "" - config.set({"codec_pipeline.name": "zarr.core.codec_pipeline.BatchedCodecPipeline"}) + config.set({"codec_pipeline.path": "zarr.core.codec_pipeline.BatchedCodecPipeline"}) assert get_pipeline_class() == zarr.core.codec_pipeline.BatchedCodecPipeline _mock = Mock() @@ -189,10 +190,19 @@ def test_config_codec_implementation(store: Store) -> None: _mock = Mock() class MockBloscCodec(BloscCodec): + # Record a call from whichever encode entry point the active codec + # pipeline uses: the async `_encode_single` (BatchedCodecPipeline) or the + # synchronous `_encode_sync` (FusedCodecPipeline, the default). Overriding + # both keeps this test ("the configured codec is actually used") + # independent of which pipeline is the default. async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer | None: _mock.call() return None + def _encode_sync(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer | None: + _mock.call() + return None + register_codec("blosc", MockBloscCodec) with config.set({"codecs.blosc": fully_qualified_name(MockBloscCodec)}): assert get_codec_class("blosc") == MockBloscCodec diff --git a/tests/test_fastpath_equivalence.py b/tests/test_fastpath_equivalence.py new file mode 100644 index 0000000000..e822ca6e68 --- /dev/null +++ b/tests/test_fastpath_equivalence.py @@ -0,0 +1,309 @@ +"""Property tests: every fast path must equal the general path. + +The codec pipelines contain fast paths that skip work whose result is known — +the complete-chunk merge view, the vectorized whole-shard bulk decode, the +scalar-broadcast write memoization, byte-range coalescing. Each is only safe if +it produces results identical to the general path it bypasses. These tests pin +that equivalence on randomized inputs, so a fast path that silently diverges +(the bug class behind the bulk-decode endianness fix) fails here instead of +corrupting data downstream. + +Convention for new fast paths: a fast path is "skip work whose result is +known", never "a different algorithm" — and it ships with a property test in +this module asserting equality with the general path. +""" + +from __future__ import annotations + +from typing import Any + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +import numpy as np +from hypothesis import given, settings + +import zarr +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.core.buffer import default_buffer_prototype +from zarr.core.buffer.cpu import Buffer as CPUBuffer +from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer +from zarr.core.chunk_grids import ChunkGrid +from zarr.core.codec_pipeline import _merge_chunk_array +from zarr.core.dtype import get_data_type_from_native_dtype +from zarr.core.indexing import BasicIndexer +from zarr.storage import MemoryStore + +_DTYPES = st.sampled_from(["uint8", "int16", "float32"]) + + +def _spec(shape: tuple[int, ...], dtype: str, *, write_empty_chunks: bool = True) -> ArraySpec: + zdtype = get_data_type_from_native_dtype(np.dtype(dtype)) + return ArraySpec( + shape=shape, + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=write_empty_chunks), + prototype=default_buffer_prototype(), + ) + + +# --------------------------------------------------------------------------- +# _merge_chunk_array: the complete-chunk early return (a view of +# value[out_selection]) must equal the general create/copy + setitem path. +# --------------------------------------------------------------------------- + + +@st.composite +def _merge_cases(draw: st.DrawFn) -> tuple[np.ndarray, tuple[int, ...], tuple[slice, ...]]: + ndim = draw(st.integers(1, 3)) + chunk_shape = tuple(draw(st.integers(1, 5)) for _ in range(ndim)) + n_blocks = draw(st.integers(1, 3)) + dtype = draw(_DTYPES) + # value spans n_blocks chunk-sized blocks along axis 0; out_selection picks one + value_shape = (chunk_shape[0] * n_blocks, *chunk_shape[1:]) + value = draw(npst.arrays(dtype=np.dtype(dtype), shape=value_shape)) + block = draw(st.integers(0, n_blocks - 1)) + out_selection = ( + slice(block * chunk_shape[0], (block + 1) * chunk_shape[0]), + *(slice(0, s) for s in chunk_shape[1:]), + ) + return value, chunk_shape, out_selection + + +@settings(max_examples=200, deadline=None) +@given(case=_merge_cases(), with_existing=st.booleans()) +def test_merge_complete_chunk_equals_general_path( + case: tuple[np.ndarray, tuple[int, ...], tuple[slice, ...]], with_existing: bool +) -> None: + """The is_complete_chunk fast path (return a view of value[out_selection]) + must produce exactly what the general merge path produces — and a complete + write must be independent of any existing chunk content.""" + value_np, chunk_shape, out_selection = case + spec = _spec(chunk_shape, str(value_np.dtype)) + value = CPUNDBuffer.from_numpy_array(value_np) + chunk_selection = tuple(slice(0, s) for s in chunk_shape) + + existing = None + if with_existing: + existing = CPUNDBuffer.from_numpy_array(np.full(chunk_shape, 7, dtype=value_np.dtype)) + + fast = _merge_chunk_array(existing, value, out_selection, spec, chunk_selection, True, ()) + general = _merge_chunk_array(existing, value, out_selection, spec, chunk_selection, False, ()) + np.testing.assert_array_equal(fast.as_numpy_array(), general.as_numpy_array()) + np.testing.assert_array_equal(fast.as_numpy_array(), value_np[out_selection]) + + +# --------------------------------------------------------------------------- +# _decode_full_shard_bulk: the vectorized dense-shard decode must equal the +# general per-chunk decode (_decode_sync), across dtypes, endianness, write +# orders, and index locations. This is the bug class of the historical +# bulk-decode endianness fix. +# --------------------------------------------------------------------------- + + +@st.composite +def _shard_cases(draw: st.DrawFn) -> dict[str, Any]: + ndim = draw(st.integers(1, 2)) + chunk_shape = tuple(draw(st.integers(1, 4)) for _ in range(ndim)) + grid = tuple(draw(st.integers(1, 3)) for _ in range(ndim)) + shard_shape = tuple(c * g for c, g in zip(chunk_shape, grid, strict=True)) + dtype = draw(_DTYPES) + data = draw(npst.arrays(dtype=np.dtype(dtype), shape=shard_shape)) + return { + "chunk_shape": chunk_shape, + "shard_shape": shard_shape, + "data": data, + "endian": draw(st.sampled_from(["little", "big"])), + "index_location": draw(st.sampled_from(list(ShardingCodecIndexLocation))), + "subchunk_write_order": draw( + st.sampled_from(["morton", "lexicographic", "colexicographic", "unordered"]) + ), + } + + +@settings(max_examples=100, deadline=None) +@given(case=_shard_cases()) +def test_bulk_shard_decode_equals_general_decode(case: dict[str, Any]) -> None: + """For dense fixed-size uncompressed shards, the vectorized bulk decode must + reproduce the general per-chunk decode exactly, whatever the endianness, + subchunk write order, or index location.""" + codec = ShardingCodec( + chunk_shape=case["chunk_shape"], + codecs=[BytesCodec(endian=case["endian"])], + index_location=case["index_location"], + subchunk_write_order=case["subchunk_write_order"], + ) + spec = _spec(case["shard_shape"], str(case["data"].dtype), write_empty_chunks=True) + blob = codec._encode_sync(CPUNDBuffer.from_numpy_array(case["data"]), spec) + assert blob is not None # write_empty_chunks=True -> dense, never elided + + general = codec._decode_sync(blob, spec) + indexer = BasicIndexer( + tuple(slice(0, s) for s in case["shard_shape"]), + shape=case["shard_shape"], + chunk_grid=ChunkGrid.from_sizes(case["shard_shape"], case["chunk_shape"]), + ) + bulk = codec._decode_full_shard_bulk(blob, spec, indexer) + # the fast path must APPLY for this dense uncompressed configuration — + # a vacuous None would silently stop testing the equivalence + assert bulk is not None + np.testing.assert_array_equal(bulk.as_numpy_array(), general.as_numpy_array()) + np.testing.assert_array_equal(general.as_numpy_array(), case["data"]) + + +def test_merge_complete_chunk_returns_view_and_write_does_not_mutate_source() -> None: + """The complete-chunk merge fast path returns a VIEW of the caller's value + (no copy — that is the perf win), and a multi-chunk write through either + pipeline leaves the user's source array untouched. + + Pins both halves of the aliasing contract: a future "defensive copy" + refactor that silently reintroduces the per-chunk create/fill/copy breaks + the first assertion, and an in-place-mutating codec that corrupts the + user's array through the shared view breaks the second. + """ + # the fast path must return a view aliasing `value`, not a copy + value_np = np.arange(30, dtype="uint16") + spec = _spec((10,), "uint16") + value = CPUNDBuffer.from_numpy_array(value_np) + merged = _merge_chunk_array(None, value, (slice(10, 20),), spec, (slice(0, 10),), True, ()) + assert np.shares_memory(merged.as_numpy_array(), value_np), ( + "complete-chunk merge no longer returns a view of the caller's value" + ) + + # end-to-end: the source array is byte-identical after a multi-chunk write + for pipeline_path in ( + "zarr.core.codec_pipeline.FusedCodecPipeline", + "zarr.core.codec_pipeline.BatchedCodecPipeline", + ): + with zarr.config.set({"codec_pipeline.path": pipeline_path}): + arr = zarr.create_array( + store=MemoryStore(), + shape=(30,), + chunks=(10,), + dtype="uint16", + compressors=None, + fill_value=0, + ) + source = np.arange(30, dtype="uint16") + snapshot = source.copy() + arr[:] = source + np.testing.assert_array_equal(source, snapshot, err_msg=pipeline_path) + np.testing.assert_array_equal(arr[:], snapshot, err_msg=pipeline_path) + + +# --------------------------------------------------------------------------- +# Scalar-broadcast write memoization: writing a scalar must produce the same +# STORED BYTES as writing the equivalent broadcast array. +# --------------------------------------------------------------------------- + + +@st.composite +def _scalar_cases(draw: st.DrawFn) -> dict[str, Any]: + n_chunks = draw(st.integers(2, 6)) + chunk = draw(st.integers(2, 6)) + shape = n_chunks * chunk + start = draw(st.integers(0, shape - 1)) + stop = draw(st.integers(start + 1, shape)) + return { + "shape": shape, + "chunk": chunk, + "sel": slice(start, stop), + "scalar": draw(st.integers(0, 255)), + "write_empty_chunks": draw(st.booleans()), + "sharded": draw(st.booleans()), + } + + +@settings(max_examples=100, deadline=None) +@given(case=_scalar_cases()) +def test_scalar_write_equals_broadcast_write(case: dict[str, Any]) -> None: + """arr[sel] = scalar and arr[sel] = full(sel_shape, scalar) must leave the + store byte-identical (pins the scalar-broadcast memoization in the sharded + partial-write path, incl. its empty-chunk normalization).""" + + def build() -> tuple[MemoryStore, zarr.Array[Any]]: + store = MemoryStore() + arr = zarr.create_array( + store=store, + shape=(case["shape"],), + chunks=(case["chunk"],), + shards=(case["shape"],) if case["sharded"] else None, + dtype="uint8", + compressors=None, + fill_value=0, + config={"write_empty_chunks": case["write_empty_chunks"]}, + ) + return store, arr + + store_a, arr_a = build() + arr_a[case["sel"]] = case["scalar"] + + store_b, arr_b = build() + n = case["sel"].stop - case["sel"].start + arr_b[case["sel"]] = np.full(n, case["scalar"], dtype="uint8") + + keys_a = {k: bytes(v.to_bytes()) for k, v in store_a._store_dict.items()} + keys_b = {k: bytes(v.to_bytes()) for k, v in store_b._store_dict.items()} + assert keys_a == keys_b + + +# --------------------------------------------------------------------------- +# get_ranges_sync coalescing: merged fetches must return exactly what +# individual per-range gets return, for any gap/coalesce limits. +# --------------------------------------------------------------------------- + + +@st.composite +def _range_cases(draw: st.DrawFn) -> dict[str, Any]: + blob_len = draw(st.integers(1, 200)) + n = draw(st.integers(1, 8)) + ranges: list[RangeByteRequest | OffsetByteRequest | SuffixByteRequest | None] = [] + for _ in range(n): + kind = draw(st.sampled_from(["range", "offset", "suffix", "none"])) + if kind == "range": + start = draw(st.integers(0, blob_len - 1)) + end = draw(st.integers(start + 1, blob_len)) + ranges.append(RangeByteRequest(start, end)) + elif kind == "offset": + ranges.append(OffsetByteRequest(draw(st.integers(0, blob_len - 1)))) + elif kind == "suffix": + ranges.append(SuffixByteRequest(draw(st.integers(1, blob_len)))) + else: + ranges.append(None) + return { + "blob": draw(st.binary(min_size=blob_len, max_size=blob_len)), + "ranges": ranges, + "max_gap": draw(st.integers(0, 64)), + "max_coalesced": draw(st.integers(1, 512)), + } + + +@settings(max_examples=200, deadline=None) +@given(case=_range_cases()) +def test_get_ranges_sync_equals_individual_gets(case: dict[str, Any]) -> None: + """Coalesced byte-range reads must return exactly what one get_sync per + range returns — for any gap/coalesce limits (the offset re-slicing math is + where a coalescing bug would corrupt data).""" + store = MemoryStore() + store._is_open = True + proto = default_buffer_prototype() + store._store_dict["k"] = CPUBuffer.from_bytes(case["blob"]) + + expected = [store.get_sync("k", prototype=proto, byte_range=r) for r in case["ranges"]] + + got: dict[int, bytes | None] = {} + for idx, buf in store.get_ranges_sync( + "k", + case["ranges"], + prototype=proto, + max_gap_bytes=case["max_gap"], + max_coalesced_bytes=case["max_coalesced"], + ): + got[idx] = None if buf is None else bytes(buf.to_bytes()) + + for i, exp in enumerate(expected): + exp_bytes = None if exp is None else bytes(exp.to_bytes()) + assert got.get(i) == exp_bytes, f"range {i} ({case['ranges'][i]!r}) mismatch" diff --git a/tests/test_fused_pipeline.py b/tests/test_fused_pipeline.py new file mode 100644 index 0000000000..db3a1b1f89 --- /dev/null +++ b/tests/test_fused_pipeline.py @@ -0,0 +1,784 @@ +"""Tests for FusedCodecPipeline -- the per-chunk-fused codec pipeline.""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +import zarr +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.gzip import GzipCodec +from zarr.codecs.transpose import TransposeCodec +from zarr.codecs.zstd import ZstdCodec +from zarr.core.codec_pipeline import FusedCodecPipeline +from zarr.core.config import config as zarr_config +from zarr.storage import MemoryStore, StorePath + + +@pytest.mark.parametrize( + "codecs", + [ + (BytesCodec(),), + (BytesCodec(), GzipCodec(level=1)), + (BytesCodec(), ZstdCodec(level=1)), + (TransposeCodec(order=(1, 0)), BytesCodec()), + (TransposeCodec(order=(1, 0)), BytesCodec(), ZstdCodec(level=1)), + ], + ids=["bytes-only", "gzip", "zstd", "transpose", "transpose+zstd"], +) +def test_construction(codecs: tuple[Any, ...]) -> None: + """FusedCodecPipeline can be constructed from valid codec combinations.""" + pipeline = FusedCodecPipeline.from_codecs(codecs) + assert pipeline.codecs == codecs + + +def test_evolve_from_array_spec() -> None: + """evolve_from_array_spec creates a sync transform.""" + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.dtype import get_data_type_from_native_dtype + + pipeline = FusedCodecPipeline.from_codecs((BytesCodec(),)) + assert pipeline.sync_transform is None + + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec = ArraySpec( + shape=(100,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + evolved = pipeline.evolve_from_array_spec(spec) + assert evolved.sync_transform is not None + + +# --------------------------------------------------------------------------- +# Sync path tests +# +# These exercise FusedCodecPipeline's synchronous API (write_sync / read_sync / +# sync_transform), which has no equivalent on BatchedCodecPipeline -- so they +# cannot live in the pipeline-agnostic CodecPipelineTests suite. The async +# roundtrip / fill-value behaviour is covered there (test_scenario) across both +# pipelines and sync/async stores. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + ("dtype", "shape"), + [ + ("float64", (100,)), + ("float32", (50,)), + ("int32", (200,)), + ("float64", (10, 10)), + ], + ids=["f64-1d", "f32-1d", "i32-1d", "f64-2d"], +) +def test_read_write_sync_roundtrip(dtype: str, shape: tuple[int, ...]) -> None: + """Data written via write_sync can be read back via read_sync.""" + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer + from zarr.core.dtype import get_data_type_from_native_dtype + + store = MemoryStore() + zdtype = get_data_type_from_native_dtype(np.dtype(dtype)) + spec = ArraySpec( + shape=shape, + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + + pipeline = FusedCodecPipeline.from_codecs((BytesCodec(),)) + pipeline = pipeline.evolve_from_array_spec(spec) + + data = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) + value = CPUNDBuffer.from_numpy_array(data) + chunk_selection = tuple(slice(0, s) for s in shape) + out_selection = chunk_selection + store_path = StorePath(store, "c/0") + + # Write sync + pipeline.write_sync( + [(store_path, spec, chunk_selection, out_selection, True)], + value, + ) + + # Read sync + out = CPUNDBuffer.from_numpy_array(np.zeros(shape, dtype=dtype)) + pipeline.read_sync( + [(store_path, spec, chunk_selection, out_selection, True)], + out, + ) + + np.testing.assert_array_equal(data, out.as_numpy_array()) + + +def test_read_sync_missing_chunk_fills() -> None: + """Sync read of a missing chunk fills with the fill value.""" + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer + from zarr.core.dtype import get_data_type_from_native_dtype + + store = MemoryStore() + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec = ArraySpec( + shape=(10,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(42.0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + + pipeline = FusedCodecPipeline.from_codecs((BytesCodec(),)) + pipeline = pipeline.evolve_from_array_spec(spec) + + out = CPUNDBuffer.from_numpy_array(np.zeros(10, dtype="float64")) + store_path = StorePath(store, "c/0") + chunk_sel = (slice(0, 10),) + + pipeline.read_sync( + [(store_path, spec, chunk_sel, chunk_sel, True)], + out, + ) + + np.testing.assert_array_equal(out.as_numpy_array(), np.full(10, 42.0)) + + +def test_sync_write_async_read_roundtrip() -> None: + """Data written via write_sync can be read back via async read.""" + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer + from zarr.core.dtype import get_data_type_from_native_dtype + from zarr.core.sync import sync + + store = MemoryStore() + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + spec = ArraySpec( + shape=(100,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + + pipeline = FusedCodecPipeline.from_codecs((BytesCodec(),)) + pipeline = pipeline.evolve_from_array_spec(spec) + + data = np.arange(100, dtype="float64") + value = CPUNDBuffer.from_numpy_array(data) + chunk_sel = (slice(0, 100),) + store_path = StorePath(store, "c/0") + + # Write sync + pipeline.write_sync( + [(store_path, spec, chunk_sel, chunk_sel, True)], + value, + ) + + # Read async + out = CPUNDBuffer.from_numpy_array(np.zeros(100, dtype="float64")) + sync( + pipeline.read( + [(store_path, spec, chunk_sel, chunk_sel, True)], + out, + ) + ) + + +# --- byte-range-write fast-path tests: disabled --- +# The sharding codec's byte-range-write fast path (set_range_sync) was removed +# from this PR pending a decision on the store interface; partial shard writes +# now always take the full-shard-rewrite path. These tests are known-good and +# kept commented out to restore once the store byte-range-write design lands. +# def test_partial_shard_write_uses_set_range() -> None: +# """Partial shard writes with fixed-size codecs should use set_range_sync. +# +# Only the FusedCodecPipeline uses byte-range writes for partial shard +# updates; skipped under other pipelines. +# """ +# from unittest.mock import patch +# +# store = zarr.storage.MemoryStore() +# # write_empty_chunks=True keeps a fixed-size dense layout, which is +# # required for the byte-range fast path (chunks never transition +# # present <-> absent). +# arr = zarr.create_array( +# store=store, +# shape=(100,), +# dtype="float64", +# chunks=(10,), +# shards=(100,), +# compressors=None, +# fill_value=0.0, +# config={"write_empty_chunks": True}, +# ) +# if not isinstance(arr._async_array.codec_pipeline, FusedCodecPipeline): +# pytest.skip("byte-range write optimization is specific to FusedCodecPipeline") +# +# # Initial full write to create the shard blob +# arr[:] = np.arange(100, dtype="float64") +# +# # Partial write — should use set_range_sync, not set_sync +# with patch.object(type(store), "set_range_sync", wraps=store.set_range_sync) as mock_set_range: +# arr[5] = 999.0 +# +# # set_range_sync should be called: once for the chunk data, once for the index +# assert mock_set_range.call_count >= 1, ( +# "Expected set_range_sync to be called for partial shard write" +# ) +# +# # Verify correctness +# expected = np.arange(100, dtype="float64") +# expected[5] = 999.0 +# np.testing.assert_array_equal(arr[:], expected) +# +# +# def test_partial_shard_write_falls_back_for_compressed() -> None: +# """Partial shard writes with compressed inner codecs should NOT use set_range. +# +# Only meaningful under FusedCodecPipeline (which can use byte-range writes +# for fixed-size inner codecs). Other pipelines never use set_range_sync, +# so the assertion is trivially true and the test is uninformative. +# """ +# from unittest.mock import patch +# +# store = zarr.storage.MemoryStore() +# arr = zarr.create_array( +# store=store, +# shape=(100,), +# dtype="float64", +# chunks=(10,), +# shards=(100,), +# compressors=GzipCodec(), +# fill_value=0.0, +# ) +# if not isinstance(arr._async_array.codec_pipeline, FusedCodecPipeline): +# pytest.skip("byte-range write optimization is specific to FusedCodecPipeline") +# arr[:] = np.arange(100, dtype="float64") +# +# with patch.object(type(store), "set_range_sync", wraps=store.set_range_sync) as mock_set_range: +# arr[5] = 999.0 +# +# # With compression, set_range_sync should NOT be used +# assert mock_set_range.call_count == 0, ( +# "set_range_sync should not be used with compressed inner codecs" +# ) +# +# expected = np.arange(100, dtype="float64") +# expected[5] = 999.0 +# np.testing.assert_array_equal(arr[:], expected) +# +# +# def test_partial_shard_write_skips_set_range_when_write_empty_chunks_false() -> None: +# """The byte-range fast path must NOT fire under the default write_empty_chunks=False. +# +# The fast path assumes a fixed, dense shard layout. With empty-chunk skipping +# (the default) a chunk can transition present<->absent, so an in-place +# byte-range overwrite would corrupt the layout. The complement of +# test_partial_shard_write_uses_set_range (which uses write_empty_chunks=True). +# """ +# from unittest.mock import patch +# +# store = zarr.storage.MemoryStore() +# arr = zarr.create_array( +# store=store, +# shape=(100,), +# dtype="float64", +# chunks=(10,), +# shards=(100,), +# compressors=None, +# fill_value=0.0, +# # default config: write_empty_chunks=False +# ) +# if not isinstance(arr._async_array.codec_pipeline, FusedCodecPipeline): +# pytest.skip("byte-range write optimization is specific to FusedCodecPipeline") +# arr[:] = np.arange(100, dtype="float64") +# +# with patch.object(type(store), "set_range_sync", wraps=store.set_range_sync) as mock_set_range: +# arr[5] = 999.0 +# +# assert mock_set_range.call_count == 0, ( +# "byte-range fast path was taken with write_empty_chunks=False; " +# "this would produce a dense layout incompatible with empty-chunk skipping" +# ) +# +# expected = np.arange(100, dtype="float64") +# expected[5] = 999.0 +# np.testing.assert_array_equal(arr[:], expected) +# +# +# def test_partial_shard_write_handles_readonly_store_buffers(tmp_path: Path) -> None: +# """The byte-range path decodes the shard index from a store buffer and mutates +# it; LocalStore returns read-only buffers, so the path must copy before writing. +# +# Without the copy, the partial write raises +# ``ValueError: assignment destination is read-only``. Fused-only because only +# the Fused byte-range path decodes+mutates a shard index in place. +# """ +# store = zarr.storage.LocalStore(tmp_path / "data.zarr") +# arr = zarr.create_array( +# store=store, +# shape=(16,), +# chunks=(4,), +# shards=(8,), +# dtype="float64", +# compressors=None, +# fill_value=0.0, +# config={"write_empty_chunks": True}, +# ) +# if not isinstance(arr._async_array.codec_pipeline, FusedCodecPipeline): +# pytest.skip("byte-range write optimization is specific to FusedCodecPipeline") +# arr[:] = np.arange(16, dtype="float64") +# arr[2] = 42.0 # triggers the byte-range path against a read-only store buffer +# assert arr[2] == 42.0 + + +def test_chunk_transform_uses_runtime_prototype() -> None: + """ChunkTransform must pass each codec the prototype from the runtime chunk_spec, + not one captured at evolve time. Constructs ChunkTransform directly (a + Fused-internal data structure with no BatchedCodecPipeline equivalent). + """ + from zarr.abc.codec import BytesBytesCodec + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import Buffer, BufferPrototype, default_buffer_prototype + from zarr.core.codec_pipeline import ChunkTransform + from zarr.core.dtype import get_data_type_from_native_dtype + + class _PrototypeRecordingCodec(BytesBytesCodec): # type: ignore[misc,unused-ignore] + """A no-op BB codec that records the prototype it was called with.""" + + is_fixed_size = True + seen_prototypes: list[object] + + def __init__(self) -> None: + object.__setattr__(self, "seen_prototypes", []) + + def to_dict(self) -> dict[str, Any]: + return {"name": "_prototype_recording", "configuration": {}} + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> _PrototypeRecordingCodec: + return cls() + + def compute_encoded_size(self, input_byte_length: int, _spec: ArraySpec) -> int: + return input_byte_length + + def _encode_sync(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer | None: + self.seen_prototypes.append(chunk_spec.prototype) + return chunk_bytes + + def _decode_sync(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: + self.seen_prototypes.append(chunk_spec.prototype) + return chunk_bytes + + async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer | None: + return self._encode_sync(chunk_bytes, chunk_spec) + + async def _decode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: + return self._decode_sync(chunk_bytes, chunk_spec) + + recording = _PrototypeRecordingCodec() + transform = ChunkTransform(codecs=(BytesCodec(), recording)) + + zdtype = get_data_type_from_native_dtype(np.dtype("float64")) + + def _spec(prototype: BufferPrototype) -> ArraySpec: + return ArraySpec( + shape=(10,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0.0), + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=prototype, + ) + + proto_default = default_buffer_prototype() + # A distinct BufferPrototype instance with the same buffer/nd_buffer types -- + # fails an identity check but works at runtime. + proto_other = BufferPrototype(buffer=proto_default.buffer, nd_buffer=proto_default.nd_buffer) + assert proto_other is not proto_default + + arr = proto_default.nd_buffer.from_numpy_array(np.arange(10, dtype="float64")) + transform.encode_chunk(arr, _spec(proto_default)) + transform.encode_chunk(arr, _spec(proto_other)) + + assert recording.seen_prototypes[0] is proto_default + assert recording.seen_prototypes[1] is proto_other, ( + "ChunkTransform did not pass the runtime prototype to the codec" + ) + + +# --------------------------------------------------------------------------- +# Thread-pool (max_workers > 1) tests +# +# The pool dispatch in read_sync/write_sync is Fused-only and off by default +# (codec_pipeline.max_workers defaults to 1 == sequential). These tests opt in +# and exercise the pool path end-to-end, exception propagation from workers, +# and concurrent decode through the shared ChunkTransform. +# --------------------------------------------------------------------------- + +_FUSED_POOL_CONFIG = { + "codec_pipeline.path": "zarr.core.codec_pipeline.FusedCodecPipeline", + "codec_pipeline.max_workers": 4, +} + + +def test_read_write_with_thread_pool() -> None: + """With max_workers > 1, multi-chunk reads and writes dispatch through the + thread pool (pool.map in read_sync/write_sync) and produce the same results + as sequential execution. + + The `_get_pool` spy pins that the pool branch actually fires: without it, + a config-resolution regression (renamed key, `_resolve_max_workers` + returning 1) would silently degrade all the pool tests into re-testing the + sequential branch while staying green. + """ + from unittest.mock import patch + + import zarr.core.codec_pipeline as cp_mod + + with zarr_config.set(_FUSED_POOL_CONFIG): + assert cp_mod._resolve_max_workers() == 4, ( + "codec_pipeline.max_workers config did not reach _resolve_max_workers" + ) + store = MemoryStore() + arr = zarr.create_array( + store=store, + shape=(100,), + chunks=(10,), + dtype="float64", + compressors=None, + fill_value=0.0, + ) + assert isinstance(arr._async_array.codec_pipeline, FusedCodecPipeline) + data = np.arange(100, dtype="float64") + with patch.object(cp_mod, "_get_pool", wraps=cp_mod._get_pool) as pool_spy: + arr[:] = data # 10 chunks -> pool dispatch in write_sync + assert pool_spy.call_count >= 1, "multi-chunk write did not take the pool branch" + writes = pool_spy.call_count + np.testing.assert_array_equal(arr[:], data) # pool dispatch in read_sync + assert pool_spy.call_count > writes, "multi-chunk read did not take the pool branch" + arr[5:25] = 7.0 # partial write: merge path through the pool + data[5:25] = 7.0 + np.testing.assert_array_equal(arr[:], data) + + +def test_thread_pool_write_worker_exception_propagates() -> None: + """A store error raised inside a pool worker during write_sync surfaces to + the caller (write_sync consumes pool.map, so worker exceptions re-raise).""" + from unittest.mock import patch + + with zarr_config.set(_FUSED_POOL_CONFIG): + store = MemoryStore() + arr = zarr.create_array( + store=store, + shape=(100,), + chunks=(10,), + dtype="float64", + compressors=None, + fill_value=0.0, + ) + with ( + patch.object(store, "set_sync", side_effect=RuntimeError("simulated store error")), + pytest.raises(RuntimeError, match="simulated store error"), + ): + arr[:] = np.arange(100, dtype="float64") + + +def test_thread_pool_read_worker_exception_propagates() -> None: + """A store error raised inside a pool worker during read_sync surfaces to + the caller (read_sync consumes pool.map into a tuple).""" + from unittest.mock import patch + + with zarr_config.set(_FUSED_POOL_CONFIG): + store = MemoryStore() + arr = zarr.create_array( + store=store, + shape=(100,), + chunks=(10,), + dtype="float64", + compressors=None, + fill_value=0.0, + ) + arr[:] = np.arange(100, dtype="float64") + with ( + patch.object(store, "get_sync", side_effect=RuntimeError("simulated store error")), + pytest.raises(RuntimeError, match="simulated store error"), + ): + arr[:] + + +def test_concurrent_reads_shared_transform_with_pool() -> None: + """Concurrent decode through the shared ChunkTransform produces correct data. + + The transform's `_resolve_specs` cache is shared mutable state. With no + array->array codecs the cache is bypassed entirely, so this uses a transpose + filter to force cache traffic, max_workers=4 so pool workers decode chunks + concurrently, and an outer thread pool so multiple reads are in flight at + once. Each round RE-OPENS the array so the shared transform starts with a + cold spec cache and the concurrent readers race the non-atomic first fill — + reading through a single pre-warmed handle would only ever exercise cache + hits. This pins correctness under concurrency (it cannot prove the absence + of a race, but a torn cache would corrupt results here). + """ + from concurrent.futures import ThreadPoolExecutor + + with zarr_config.set(_FUSED_POOL_CONFIG): + store = MemoryStore() + arr = zarr.create_array( + store=store, + shape=(40, 40), + chunks=(5, 5), + dtype="int32", + filters=[TransposeCodec(order=(1, 0))], + serializer=BytesCodec(), + compressors=None, + fill_value=-1, + ) + data = np.arange(1600, dtype="int32").reshape(40, 40) + arr[:] = data + + for _ in range(5): # several rounds, each racing a cold cache + fresh = zarr.open_array(store=store, mode="r") + + def read_row_block(i: int, handle: zarr.Array[Any] = fresh) -> np.ndarray: + return np.asarray(handle[i * 4 : (i + 1) * 4, :]) + + with ThreadPoolExecutor(max_workers=8) as ex: + futures = {ex.submit(read_row_block, i): i for i in range(10)} + for fut, i in futures.items(): + np.testing.assert_array_equal(fut.result(), data[i * 4 : (i + 1) * 4, :]) + + +def test_sharded_fallback_inner_chunks_avoid_async_transform() -> None: + """Inner chunks of a shard on a NON-sync store decode through the sync + ChunkTransform, not per-chunk AsyncChunkTransform coroutines. + + The sharding byte getters are in-memory dict wrappers; they implement + SyncByteGetter/SyncByteSetter, and the nested inner pipeline is evolved + (so its sync transform exists), letting the nested read/write take the + sync fast path. Without this, every inner chunk pays a coroutine for a + dict lookup plus an async per-chunk transform — measured at 1.5x (raw) to + 3.6x (gzip) of sharded fallback read time. + """ + from unittest.mock import patch + + from zarr.core.codec_pipeline import AsyncChunkTransform + from zarr.testing.store import LatencyStore + + calls = {"decode": 0, "encode": 0} + orig_decode = AsyncChunkTransform.decode_chunk + orig_encode = AsyncChunkTransform.encode_chunk + + async def spy_decode(self: Any, *args: Any, **kwargs: Any) -> Any: + calls["decode"] += 1 + return await orig_decode(self, *args, **kwargs) + + async def spy_encode(self: Any, *args: Any, **kwargs: Any) -> Any: + calls["encode"] += 1 + return await orig_encode(self, *args, **kwargs) + + # LatencyStore is not sync-capable -> the OUTER pipeline takes the async + # fallback; the INNER chunks go over the sharding byte getters. + store = LatencyStore(MemoryStore(), get_latency=0.0, set_latency=0.0) + arr = zarr.create_array( + store=store, + shape=(100,), + chunks=(10,), + shards=(50,), + dtype="uint8", + compressors=None, + fill_value=0, + ) + if not isinstance(arr._async_array.codec_pipeline, FusedCodecPipeline): + pytest.skip("sync fast path for inner chunks is specific to FusedCodecPipeline") + + data = np.arange(100, dtype="uint8") + sync_calls = {"read_sync": 0} + orig_read_sync = FusedCodecPipeline.read_sync + + def spy_read_sync(self: Any, *args: Any, **kwargs: Any) -> Any: + sync_calls["read_sync"] += 1 + return orig_read_sync(self, *args, **kwargs) + + with ( + patch.object(AsyncChunkTransform, "decode_chunk", spy_decode), + patch.object(AsyncChunkTransform, "encode_chunk", spy_encode), + patch.object(FusedCodecPipeline, "read_sync", spy_read_sync), + ): + arr[:] = data + out = np.asarray(arr[:]) + + np.testing.assert_array_equal(out, data) + assert calls == {"decode": 0, "encode": 0}, ( + f"inner chunks went through per-chunk AsyncChunkTransform coroutines: {calls}" + ) + # The outer store is not sync-capable, so any read_sync calls are the + # NESTED pipeline taking the sync fast path over the sharding byte getters + # (the SyncByteGetter gate). Without the gate, inner chunks go through + # concurrent_map with one coroutine per chunk. (Writes don't appear here: + # the fallback write encodes whole shards through the outer sync transform + # -> ShardingCodec._encode_sync, never touching the nested byte setters.) + assert sync_calls["read_sync"] >= 1, "nested read did not take the sync fast path" + + +def test_write_over_sync_byte_setter_takes_sync_path() -> None: + """`FusedCodecPipeline.write` routes a non-StorePath `SyncByteSetter` (the + sharding codec's `_ShardingByteSetter`) through `write_sync`. + + This is the write-side twin of the SyncByteGetter gate: the read test + above cannot guard it because fallback whole-array writes encode shards + via `_encode_sync` and never touch the nested byte setters. The nested + `write` over `_ShardingByteSetter` is reached from the async shard encode + paths (`_encode_single`/`_encode_partial_single`), so pin the gate + directly: without it, this write degrades to the async fallback (one + coroutine per inner chunk for an in-memory dict store). + """ + import asyncio + from unittest.mock import patch + + from zarr.codecs.sharding import _ShardingByteSetter + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.dtype import get_data_type_from_native_dtype + + zdtype = get_data_type_from_native_dtype(np.dtype("uint8")) + spec = ArraySpec( + shape=(10,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + pipeline = FusedCodecPipeline.from_codecs([BytesCodec()]).evolve_from_array_spec(spec) + assert pipeline.sync_transform is not None + + shard_dict: dict[tuple[int, ...], Any] = {} + setter = _ShardingByteSetter(shard_dict, (0,)) + value = default_buffer_prototype().nd_buffer.from_numpy_array(np.arange(10, dtype="uint8")) + + sync_calls = {"write_sync": 0} + orig_write_sync = FusedCodecPipeline.write_sync + + def spy_write_sync(self: Any, *args: Any, **kwargs: Any) -> Any: + sync_calls["write_sync"] += 1 + return orig_write_sync(self, *args, **kwargs) + + sel = (slice(0, 10),) + with patch.object(FusedCodecPipeline, "write_sync", spy_write_sync): + asyncio.run(pipeline.write([(setter, spec, sel, sel, True)], value)) + + assert sync_calls["write_sync"] >= 1, ( + "write over a SyncByteSetter did not take the sync fast path" + ) + written = shard_dict[(0,)] + np.testing.assert_array_equal( + np.frombuffer(written.to_bytes(), dtype="uint8"), np.arange(10, dtype="uint8") + ) + + +# --------------------------------------------------------------------------- +# AsyncChunkTransform: the async per-chunk codec chain used on the async +# fallback path. It is the async mirror of ChunkTransform, so it must produce +# identical bytes/arrays. The default (Fused, sync-store) path never uses it; +# these tests drive it directly over multi-codec chains so the aa/bb loops and +# the all-fill drop branch are exercised. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "codecs", + [ + (BytesCodec(),), + (BytesCodec(), GzipCodec(level=1)), + (TransposeCodec(order=(1, 0)), BytesCodec()), + (TransposeCodec(order=(1, 0)), BytesCodec(), ZstdCodec(level=1)), + ], + ids=["bytes-only", "bb", "aa", "aa+ab+bb"], +) +def test_async_chunk_transform_matches_sync(codecs: tuple[Any, ...]) -> None: + """`AsyncChunkTransform.decode_chunk`/`encode_chunk` must round-trip and + produce exactly what the synchronous `ChunkTransform` produces, across + array->array, array->bytes, and bytes->bytes codec combinations. + + This is the async mirror of the codecs the default pipeline runs + synchronously; a divergence here corrupts data only on the async fallback + path (remote stores), which no end-to-end test of the default pipeline + touches. + """ + import asyncio + + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer + from zarr.core.codec_pipeline import AsyncChunkTransform, ChunkTransform, evolve_codecs + from zarr.core.dtype import get_data_type_from_native_dtype + + shape = (4, 4) + zdtype = get_data_type_from_native_dtype(np.dtype("int32")) + spec = ArraySpec( + shape=shape, + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + evolved = evolve_codecs(codecs, spec) + sync_t = ChunkTransform(codecs=evolved) + async_t = AsyncChunkTransform(codecs=evolved) + + data = np.arange(16, dtype="int32").reshape(shape) + value = CPUNDBuffer.from_numpy_array(data) + + sync_bytes = sync_t.encode_chunk(value, spec) + async_bytes = asyncio.run(async_t.encode_chunk(value, spec)) + assert sync_bytes is not None + assert async_bytes is not None + np.testing.assert_array_equal(async_bytes.to_bytes(), sync_bytes.to_bytes()) + + sync_arr = sync_t.decode_chunk(async_bytes, spec) + async_arr = asyncio.run(async_t.decode_chunk(async_bytes, spec)) + np.testing.assert_array_equal(async_arr.as_numpy_array(), sync_arr.as_numpy_array()) + np.testing.assert_array_equal(async_arr.as_numpy_array(), data) + + +def test_async_decode_encode_passes_through_none_chunks() -> None: + """`FusedCodecPipeline.decode`/`encode` (the async batch entry points used + on the fallback path) map a None chunk to None and leave real chunks + untouched — pins the None-passthrough branch the default sync path skips.""" + import asyncio + + from zarr.core.array_spec import ArrayConfig, ArraySpec + from zarr.core.buffer import default_buffer_prototype + from zarr.core.buffer.cpu import NDBuffer as CPUNDBuffer + from zarr.core.dtype import get_data_type_from_native_dtype + + zdtype = get_data_type_from_native_dtype(np.dtype("int32")) + spec = ArraySpec( + shape=(4,), + dtype=zdtype, + fill_value=zdtype.cast_scalar(0), + config=ArrayConfig(order="C", write_empty_chunks=True), + prototype=default_buffer_prototype(), + ) + pipeline = FusedCodecPipeline.from_codecs([BytesCodec()]).evolve_from_array_spec(spec) + + data = np.arange(4, dtype="int32") + value = CPUNDBuffer.from_numpy_array(data) + + # encode a real chunk and a None chunk together + encoded = list(asyncio.run(pipeline.encode([(value, spec), (None, spec)]))) + assert encoded[1] is None + assert encoded[0] is not None + + # decode the real chunk and a None chunk together + decoded = list(asyncio.run(pipeline.decode([(encoded[0], spec), (None, spec)]))) + assert decoded[1] is None + assert decoded[0] is not None + np.testing.assert_array_equal(decoded[0].as_numpy_array(), data) diff --git a/tests/test_pipeline_parity.py b/tests/test_pipeline_parity.py new file mode 100644 index 0000000000..49bf25af69 --- /dev/null +++ b/tests/test_pipeline_parity.py @@ -0,0 +1,415 @@ +"""Pipeline parity test — exhaustive matrix of read/write scenarios. + +For every cell of the matrix (codec config x layout x operation +sequence x runtime config), assert that ``FusedCodecPipeline`` and +``BatchedCodecPipeline`` produce semantically identical results: + + * Same returned array contents on read. + * Same set of store keys after writes (catches divergent empty-shard + handling: one pipeline deletes, the other writes an empty blob). + * Reading each pipeline's store contents through the *other* pipeline + yields the same array (catches "wrote a layout that only one + pipeline can read" bugs). + +Pipeline-divergence bugs (e.g. one pipeline writes a dense shard +layout while the other writes a compact layout) fail this test +loudly with a clear diff, instead of waiting for a downstream +test to trip over the symptom. + +Byte-for-byte equality of store contents is intentionally NOT +checked: codecs like gzip embed the wall-clock timestamp in their +output, so two compressions of the same data done at different +seconds produce different bytes despite being semantically +identical. + +The matrix axes are: + + * codec chain — bytes-only, gzip, with/without sharding + * layout — chunk_shape, shard_shape (None for no sharding) + * write sequence — full overwrite, partial in middle, scalar to one + cell, multiple overlapping writes, sequence ending in fill values + * runtime config — write_empty_chunks True/False +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gzip import GzipCodec +from zarr.codecs.sharding import SUBCHUNK_WRITE_ORDER, ShardingCodec, SubchunkWriteOrder +from zarr.core.config import config as zarr_config +from zarr.storage import MemoryStore + +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + + +# --------------------------------------------------------------------------- +# Reference helpers +# --------------------------------------------------------------------------- + + +def _store_snapshot(store: MemoryStore) -> dict[str, bytes]: + """Return {key: bytes} for every entry in the store.""" + return {k: bytes(v.to_bytes()) for k, v in store._store_dict.items()} + + +# --------------------------------------------------------------------------- +# Matrix definitions +# --------------------------------------------------------------------------- + + +# Each codec config is (filters, serializer, compressors). We only vary the +# pieces that actually affect the pipeline. compressors=None means a +# fixed-size chain (the byte-range fast path is eligible when sharded). +CodecConfig = dict[str, Any] + +CODEC_CONFIGS: list[tuple[str, CodecConfig]] = [ + ("bytes-only", {"compressors": None}), + ("gzip", {"compressors": GzipCodec(level=1)}), + # Big-endian serializer: the on-disk byte order is carried by the BytesCodec, + # not the dtype. Guards the bulk whole-shard decode against ignoring endian + # (it would otherwise reinterpret big-endian bytes as native — silent + # corruption). dtype is int32 so endianness is observable. + ( + "bytes-big-endian", + {"compressors": None, "serializer": BytesCodec(endian="big"), "dtype": "int32"}, + ), + # crc32c as a bytes->bytes codec after the serializer: the bulk fast path + # must NOT silently drop checksum verification (it falls through to the + # per-chunk path). Parity still requires identical bytes + contents across + # pipelines. (crc32c is a BytesBytesCodec, so it goes in `compressors`.) + ( + "bytes-crc32c", + {"compressors": [Crc32cCodec()], "serializer": BytesCodec(), "dtype": "int32"}, + ), +] + + +# (id, kwargs) — chunks/shards layout. kwargs are passed to create_array. +LayoutConfig = dict[str, Any] + +LAYOUT_CONFIGS: list[tuple[str, LayoutConfig]] = [ + ("1d-unsharded", {"shape": (100,), "chunks": (10,), "shards": None}), + ("1d-1chunk-per-shard", {"shape": (100,), "chunks": (10,), "shards": (10,)}), + ("1d-multi-chunk-per-shard", {"shape": (100,), "chunks": (10,), "shards": (50,)}), + ("2d-unsharded", {"shape": (20, 20), "chunks": (5, 5), "shards": None}), + ("2d-sharded", {"shape": (20, 20), "chunks": (5, 5), "shards": (10, 10)}), + # Nested sharding: outer chunk (10,10) sharded into inner chunks (5,5). + # Restricted to bytes-only codec because combining an outer ShardingCodec + # with a compressor (gzip) triggers a ZarrUserWarning and results in a + # checksum mismatch inside the inner shard index — a known limitation, not + # a pipeline-parity bug. The bytes-only path still exercises the full + # two-level shard encoding/decoding in both pipelines. + ( + "2d-nested-sharded", + { + "shape": (20, 20), + "chunks": (10, 10), + "shards": None, + "serializer": ShardingCodec( + chunk_shape=(10, 10), + codecs=[ShardingCodec(chunk_shape=(5, 5))], + ), + # Only run with the bytes-only codec config; gzip is incompatible + # with nested sharding (see comment above). + "_codec_ids": {"bytes-only"}, + }, + ), +] + + +WriteOp = tuple[Any, Any] # (selection, value) +WriteSequence = tuple[str, list[WriteOp]] + + +def _full_overwrite(shape: tuple[int, ...]) -> list[WriteOp]: + return [((slice(None),) * len(shape), np.arange(int(np.prod(shape))).reshape(shape) + 1)] + + +def _partial_middle(shape: tuple[int, ...]) -> list[WriteOp]: + if len(shape) == 1: + n = shape[0] + return [((slice(n // 4, 3 * n // 4),), 7)] + # 2D: write a centered block + rs = slice(shape[0] // 4, 3 * shape[0] // 4) + cs = slice(shape[1] // 4, 3 * shape[1] // 4) + return [((rs, cs), 7)] + + +def _scalar_one_cell(shape: tuple[int, ...]) -> list[WriteOp]: + if len(shape) == 1: + return [((shape[0] // 2,), 99)] + return [((shape[0] // 2, shape[1] // 2), 99)] + + +def _overlapping(shape: tuple[int, ...]) -> list[WriteOp]: + if len(shape) == 1: + n = shape[0] + return [ + ((slice(0, n // 2),), 1), + ((slice(n // 4, 3 * n // 4),), 2), + ((slice(n // 2, n),), 3), + ] + rs1, cs1 = slice(0, shape[0] // 2), slice(0, shape[1] // 2) + rs2, cs2 = slice(shape[0] // 4, 3 * shape[0] // 4), slice(shape[1] // 4, 3 * shape[1] // 4) + return [((rs1, cs1), 1), ((rs2, cs2), 2)] + + +def _ends_in_fill(shape: tuple[int, ...]) -> list[WriteOp]: + """Write something then overwrite it with fill — exercises empty-chunk handling.""" + full = (slice(None),) * len(shape) + return [(full, 5), (full, 0)] + + +def _ends_in_partial_fill(shape: tuple[int, ...]) -> list[WriteOp]: + """Write data, then overwrite half with fill — some chunks become empty.""" + full: tuple[slice, ...] + half: tuple[slice, ...] + if len(shape) == 1: + full = (slice(None),) + half = (slice(0, shape[0] // 2),) + else: + full = (slice(None), slice(None)) + half = (slice(0, shape[0] // 2), slice(None)) + return [(full, 5), (half, 0)] + + +SEQUENCES: list[tuple[str, Callable[[tuple[int, ...]], list[WriteOp]]]] = [ + ("full-overwrite", _full_overwrite), + ("partial-middle", _partial_middle), + ("scalar-one-cell", _scalar_one_cell), + ("overlapping", _overlapping), + ("ends-in-fill", _ends_in_fill), + ("ends-in-partial-fill", _ends_in_partial_fill), +] + + +WRITE_EMPTY_CHUNKS = [False, True] + + +# --------------------------------------------------------------------------- +# Matrix iteration (pruned) +# --------------------------------------------------------------------------- + + +def _matrix() -> Iterator[Any]: + for codec_id, codec_kwargs in CODEC_CONFIGS: + for layout_id, layout in LAYOUT_CONFIGS: + allowed = layout.get("_codec_ids") + if allowed is not None and codec_id not in allowed: + continue + for seq_id, seq_fn in SEQUENCES: + for wec in WRITE_EMPTY_CHUNKS: + yield pytest.param( + codec_kwargs, + layout, + seq_fn, + wec, + id=f"{layout_id}-{codec_id}-{seq_id}-wec{wec}", + ) + + +# --------------------------------------------------------------------------- +# The parity test +# --------------------------------------------------------------------------- + + +def _write_under_pipeline( + pipeline_path: str, + codec_kwargs: CodecConfig, + layout: LayoutConfig, + sequence: list[WriteOp], + write_empty_chunks: bool, +) -> tuple[MemoryStore, Any]: + """Apply a sequence of writes via the chosen pipeline. + + Returns (store with the written data, final array contents read back). + """ + # Strip private metadata keys (e.g. "_codec_ids") before passing to create_array. + array_layout = {k: v for k, v in layout.items() if not k.startswith("_")} + # dtype defaults to float64 but a codec config may override it (e.g. an + # endian-sensitive int dtype). Merge so the override wins without a dup kwarg. + create_kwargs = {"dtype": "float64", **array_layout, **codec_kwargs} + store = MemoryStore() + with zarr_config.set({"codec_pipeline.path": pipeline_path}): + arr = zarr.create_array( + store=store, + fill_value=0, + config={"write_empty_chunks": write_empty_chunks}, + **create_kwargs, + ) + for sel, val in sequence: + arr[sel] = val + contents = arr[...] + return store, contents + + +def _read_under_pipeline(pipeline_path: str, store: MemoryStore) -> Any: + """Re-open an existing store under the chosen pipeline and read it whole.""" + with zarr_config.set({"codec_pipeline.path": pipeline_path}): + arr = zarr.open_array(store=store, mode="r") + return arr[...] + + +_BATCHED = "zarr.core.codec_pipeline.BatchedCodecPipeline" +_FUSED = "zarr.core.codec_pipeline.FusedCodecPipeline" + + +@pytest.mark.parametrize( + ("codec_kwargs", "layout", "sequence_fn", "write_empty_chunks"), + list(_matrix()), +) +def test_pipeline_parity( + codec_kwargs: CodecConfig, + layout: LayoutConfig, + sequence_fn: Callable[[tuple[int, ...]], list[WriteOp]], + write_empty_chunks: bool, +) -> None: + """FusedCodecPipeline must be semantically identical to BatchedCodecPipeline. + + Three checks, in order of decreasing diagnostic value: + + 1. Both pipelines return the same array contents after the same + write sequence (catches semantic correctness bugs). + 2. Both pipelines produce the same set of store keys (catches + empty-shard divergence: one deletes, the other doesn't). + 3. Each pipeline can correctly read the *other* pipeline's + output (catches layout-divergence bugs that would prevent + interop, e.g. dense vs compact shard layouts). + + Byte-for-byte store equality is intentionally not checked: codecs + like gzip embed wall-clock timestamps that vary between runs. + """ + sequence = sequence_fn(layout["shape"]) + + batched_store, batched_arr = _write_under_pipeline( + _BATCHED, codec_kwargs, layout, sequence, write_empty_chunks + ) + sync_store, sync_arr = _write_under_pipeline( + _FUSED, codec_kwargs, layout, sequence, write_empty_chunks + ) + + # 1. Array contents must agree. + np.testing.assert_array_equal( + sync_arr, + batched_arr, + err_msg="FusedCodecPipeline returned different array contents than BatchedCodecPipeline", + ) + + # 2. Store key sets must agree. + batched_keys = set(batched_store._store_dict) - {"zarr.json"} + sync_keys = set(sync_store._store_dict) - {"zarr.json"} + assert sync_keys == batched_keys, ( + f"Pipelines disagree on which store keys exist.\n" + f" only in batched: {sorted(batched_keys - sync_keys)}\n" + f" only in sync: {sorted(sync_keys - batched_keys)}" + ) + + # 3. Cross-read: each pipeline must correctly read the other's output. + sync_reads_batched = _read_under_pipeline(_FUSED, batched_store) + batched_reads_sync = _read_under_pipeline(_BATCHED, sync_store) + np.testing.assert_array_equal( + sync_reads_batched, + batched_arr, + err_msg="FusedCodecPipeline could not correctly read BatchedCodecPipeline's output", + ) + np.testing.assert_array_equal( + batched_reads_sync, + sync_arr, + err_msg="BatchedCodecPipeline could not correctly read FusedCodecPipeline's output", + ) + + +# --------------------------------------------------------------------------- +# Partial-read parity across subchunk write orders +# --------------------------------------------------------------------------- +# +# Note: general partial-read coverage (scalar single-element and strided reads +# from sharded arrays, which hit the sharding codec's partial-decode path) lives +# in tests/test_codec_pipeline_suite.py as Scenarios. Those run each pipeline +# against a numpy reference -- strictly stronger than checking the two pipelines +# only against each other, and they cover both the sync (_decode_partial_sync) +# and async (_decode_partial_single) partial-decode variants. What remains here +# is the cross-pipeline byte-identical-layout check, which the per-pipeline +# suite structurally cannot express. + + +@pytest.mark.parametrize("subchunk_write_order", SUBCHUNK_WRITE_ORDER) +@pytest.mark.parametrize("index_location", ["start", "end"]) +def test_pipeline_parity_subchunk_write_order( + subchunk_write_order: SubchunkWriteOrder, index_location: str +) -> None: + """Both pipelines must agree across every subchunk_write_order, including a + PARTIAL write into an already-dense fixed-size shard. + + This is the regression net for the byte-range write fast path, which derives + each chunk's physical slot from its rank in subchunk_write_order. A wrong + (e.g. hardcoded morton) assumption corrupts non-default orders silently, so + we assert both identical contents AND identical stored bytes across pipelines. + write_empty_chunks=True keeps every slot present, making the shard dense and + the byte-range write path eligible. + """ + # 2D, fixed-size (no compression). The shard (array `chunks`) must hold + # MULTIPLE inner chunks, and be non-square, so morton / lexicographic / + # colexicographic produce physically DIFFERENT layouts — with one inner + # chunk per shard all orders coincide and a wrong-order bug is invisible. + # inner chunk = (2, 2); shard = (6, 4) -> a 3x2 grid of inner chunks. + shape, shard_shape, inner_chunk = (12, 8), (6, 4), (2, 2) + serializer = ShardingCodec( + chunk_shape=inner_chunk, + codecs=[BytesCodec()], + index_location=index_location, + subchunk_write_order=subchunk_write_order, + ) + ref = np.arange(int(np.prod(shape)), dtype="int32").reshape(shape) + + def run(pipeline_path: str) -> tuple[dict[str, bytes], Any]: + store = MemoryStore() + with zarr_config.set({"codec_pipeline.path": pipeline_path}): + arr = zarr.create_array( + store=store, + shape=shape, + chunks=shard_shape, # array "chunks" == shard size for a ShardingCodec serializer + dtype="int32", + fill_value=-1, + serializer=serializer, + compressors=None, + config={"write_empty_chunks": True}, + ) + arr[:] = ref # dense full write + arr[3:9, 1:6] = 777 # partial write INTO the dense shard + contents = arr[...] + return _store_snapshot(store), contents + + batched_bytes, batched_contents = run(_BATCHED) + sync_bytes, sync_contents = run(_FUSED) + + # Contents must always match across pipelines and equal the reference — + # this catches a wrong-order byte-range write (it corrupts the data). + expected = ref.copy() + expected[3:9, 1:6] = 777 + np.testing.assert_array_equal(batched_contents, expected) + np.testing.assert_array_equal( + sync_contents, + batched_contents, + err_msg=f"pipeline contents diverged for subchunk_write_order={subchunk_write_order!r}", + ) + # The two pipelines must also produce byte-identical shards — a stronger + # check that they agree on physical layout. This holds for EVERY order + # (without special-casing any by name): both pipelines lay chunks out via + # the same `_subchunk_order_iter`, so for a given codec instance they must + # land on the same bytes whatever that order resolves to. We make no + # assumption here about what any particular order "means" — only that the + # two implementations agree. + assert sync_bytes == batched_bytes, ( + f"pipelines wrote different bytes for subchunk_write_order={subchunk_write_order!r} " + f"(index_location={index_location!r}) — byte-range write fast path likely assumed " + f"the wrong physical chunk order" + ) diff --git a/tests/test_store/test_get_ranges.py b/tests/test_store/test_get_ranges.py index 8f0c6a4814..f04251adf4 100644 --- a/tests/test_store/test_get_ranges.py +++ b/tests/test_store/test_get_ranges.py @@ -18,6 +18,7 @@ from zarr.core.buffer import default_buffer_prototype from zarr.storage import MemoryStore from zarr.storage._wrapper import WrapperStore +from zarr.testing.store import LatencyStore if TYPE_CHECKING: from collections.abc import AsyncIterator, Sequence @@ -58,6 +59,46 @@ async def test_memory_store_get_ranges_missing_key_raises() -> None: await anext(agen) +def test_get_ranges_sync_reads_multiple_ranges() -> None: + """The synchronous `get_ranges_sync` on a sync-capable store returns each + requested range, mirroring the async `get_ranges` happy path.""" + import asyncio + + store = MemoryStore() + blob = bytes(i % 256 for i in range(512)) + asyncio.run(_write(store, "blob", blob)) + + ranges = [RangeByteRequest(0, 10), RangeByteRequest(100, 110)] + proto = default_buffer_prototype() + flat: dict[int, bytes] = {} + for idx, buf in store.get_ranges_sync("blob", ranges, prototype=proto): + assert buf is not None + flat[idx] = buf.to_bytes() + + assert flat[0] == blob[0:10] + assert flat[1] == blob[100:110] + + +def test_get_ranges_sync_missing_key_raises() -> None: + """A missing key makes `get_ranges_sync` raise a BaseExceptionGroup + containing FileNotFoundError — the same contract as async `get_ranges`, so + callers handle a deleted shard uniformly across sync and async paths.""" + store = MemoryStore() + proto = default_buffer_prototype() + with pytest.RaisesGroup(pytest.RaisesExc(FileNotFoundError)): + store.get_ranges_sync("does-not-exist", [RangeByteRequest(0, 10)], prototype=proto) + + +def test_get_ranges_sync_on_non_sync_store_raises_type_error() -> None: + """`get_ranges_sync` requires the store to support synchronous reads + (`SupportsGetSync`); a non-sync store raises TypeError rather than silently + falling back.""" + store = LatencyStore(MemoryStore(), get_latency=0.0, set_latency=0.0) + proto = default_buffer_prototype() + with pytest.raises(TypeError, match="does not support synchronous reads"): + store.get_ranges_sync("k", [RangeByteRequest(0, 10)], prototype=proto) + + async def test_wrapper_store_delegates_get_ranges() -> None: """WrapperStore.get_ranges must delegate to the wrapped store, not fall back to the default.""" diff --git a/tests/test_store/test_local.py b/tests/test_store/test_local.py index 6756bc83d9..72b30e0ac2 100644 --- a/tests/test_store/test_local.py +++ b/tests/test_store/test_local.py @@ -8,6 +8,10 @@ import zarr from zarr import create_array + +# SupportsSetRange import disabled with the byte-range-write tests below +# (removed from this PR pending a store-interface decision). +# from zarr.abc.store import SupportsSetRange from zarr.core.buffer import Buffer, cpu from zarr.storage import LocalStore from zarr.storage._local import _atomic_write @@ -108,6 +112,58 @@ async def test_move( ): await store2.move(destination) + # --- byte-range-write tests: disabled --- + # Byte-range-write support (set_range / set_range_sync / SupportsSetRange) + # was removed from this PR pending a decision on the store interface. These + # tests are known-good and kept commented out to restore once that lands. + # def test_supports_set_range(self, store: LocalStore) -> None: + # """LocalStore should implement SupportsSetRange.""" + # assert isinstance(store, SupportsSetRange) + # + # @pytest.mark.parametrize( + # ("start", "patch", "expected"), + # [ + # (0, b"XX", b"XXAAAAAAAA"), + # (3, b"XX", b"AAAXXAAAAA"), + # (8, b"XX", b"AAAAAAAAXX"), + # (0, b"ZZZZZZZZZZ", b"ZZZZZZZZZZ"), + # (5, b"B", b"AAAAABAAAA"), + # (0, b"BCDE", b"BCDEAAAAAA"), + # ], + # ids=["start", "middle", "end", "full-overwrite", "single-byte", "multi-byte-start"], + # ) + # async def test_set_range( + # self, store: LocalStore, start: int, patch: bytes, expected: bytes + # ) -> None: + # """set_range should overwrite bytes at the given offset.""" + # await store.set("test/key", cpu.Buffer.from_bytes(b"AAAAAAAAAA")) + # await store.set_range("test/key", cpu.Buffer.from_bytes(patch), start=start) + # result = await store.get("test/key", prototype=cpu.buffer_prototype) + # assert result is not None + # assert result.to_bytes() == expected + # + # @pytest.mark.parametrize( + # ("start", "patch", "expected"), + # [ + # (0, b"XX", b"XXAAAAAAAA"), + # (3, b"XX", b"AAAXXAAAAA"), + # (8, b"XX", b"AAAAAAAAXX"), + # (0, b"ZZZZZZZZZZ", b"ZZZZZZZZZZ"), + # (5, b"B", b"AAAAABAAAA"), + # (0, b"BCDE", b"BCDEAAAAAA"), + # ], + # ids=["start", "middle", "end", "full-overwrite", "single-byte", "multi-byte-start"], + # ) + # def test_set_range_sync( + # self, store: LocalStore, start: int, patch: bytes, expected: bytes + # ) -> None: + # """set_range_sync should overwrite bytes at the given offset.""" + # sync(store.set("test/key", cpu.Buffer.from_bytes(b"AAAAAAAAAA"))) + # store.set_range_sync("test/key", cpu.Buffer.from_bytes(patch), start=start) + # result = store.get_sync(key="test/key", prototype=cpu.buffer_prototype) + # assert result is not None + # assert result.to_bytes() == expected + @pytest.mark.parametrize("exclusive", [True, False]) def test_atomic_write_successful(tmp_path: pathlib.Path, exclusive: bool) -> None: diff --git a/tests/test_store/test_memory.py b/tests/test_store/test_memory.py index 1e3ee89e92..a3c0509519 100644 --- a/tests/test_store/test_memory.py +++ b/tests/test_store/test_memory.py @@ -8,6 +8,10 @@ import pytest import zarr + +# SupportsSetRange import disabled with the byte-range-write tests below +# (removed from this PR pending a store-interface decision). +# from zarr.abc.store import SupportsSetRange from zarr.core.buffer import Buffer, cpu, gpu from zarr.errors import ZarrUserWarning from zarr.storage import GpuMemoryStore, ManagedMemoryStore, MemoryStore @@ -76,6 +80,59 @@ async def test_deterministic_size( np.testing.assert_array_equal(a[:3], 1) np.testing.assert_array_equal(a[3:], 0) + # --- byte-range-write tests: disabled --- + # Byte-range-write support (set_range / set_range_sync / SupportsSetRange) + # was removed from this PR pending a decision on the store interface. These + # tests are known-good and kept commented out to restore once that lands. + # def test_supports_set_range(self, store: MemoryStore) -> None: + # """MemoryStore should implement SupportsSetRange.""" + # assert isinstance(store, SupportsSetRange) + # + # @pytest.mark.parametrize( + # ("start", "patch", "expected"), + # [ + # (0, b"XX", b"XXAAAAAAAA"), + # (3, b"XX", b"AAAXXAAAAA"), + # (8, b"XX", b"AAAAAAAAXX"), + # (0, b"ZZZZZZZZZZ", b"ZZZZZZZZZZ"), + # (5, b"B", b"AAAAABAAAA"), + # (0, b"BCDE", b"BCDEAAAAAA"), + # ], + # ids=["start", "middle", "end", "full-overwrite", "single-byte", "multi-byte-start"], + # ) + # async def test_set_range( + # self, store: MemoryStore, start: int, patch: bytes, expected: bytes + # ) -> None: + # """set_range should overwrite bytes at the given offset.""" + # await store.set("test/key", cpu.Buffer.from_bytes(b"AAAAAAAAAA")) + # await store.set_range("test/key", cpu.Buffer.from_bytes(patch), start=start) + # result = await store.get("test/key", prototype=cpu.buffer_prototype) + # assert result is not None + # assert result.to_bytes() == expected + # + # @pytest.mark.parametrize( + # ("start", "patch", "expected"), + # [ + # (0, b"XX", b"XXAAAAAAAA"), + # (3, b"XX", b"AAAXXAAAAA"), + # (8, b"XX", b"AAAAAAAAXX"), + # (0, b"ZZZZZZZZZZ", b"ZZZZZZZZZZ"), + # (5, b"B", b"AAAAABAAAA"), + # (0, b"BCDE", b"BCDEAAAAAA"), + # ], + # ids=["start", "middle", "end", "full-overwrite", "single-byte", "multi-byte-start"], + # ) + # def test_set_range_sync( + # self, store: MemoryStore, start: int, patch: bytes, expected: bytes + # ) -> None: + # """set_range_sync should overwrite bytes at the given offset.""" + # store._is_open = True + # store._store_dict["test/key"] = cpu.Buffer.from_bytes(b"AAAAAAAAAA") + # store.set_range_sync("test/key", cpu.Buffer.from_bytes(patch), start=start) + # result = store.get_sync(key="test/key", prototype=cpu.buffer_prototype) + # assert result is not None + # assert result.to_bytes() == expected + # TODO: fix this warning @pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning")