From ec99c0250b4e3a3ec34ecf3f9a9cd8d4f5a76d6b Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 4 May 2026 16:07:09 -0400 Subject: [PATCH 01/20] start to migrate to zarr 3 --- scallops/stitch/fuse.py | 26 +++-- scallops/tests/test_io.py | 2 + scallops/utils.py | 2 +- scallops/zarr_io.py | 200 +++++++++++++++++--------------------- 4 files changed, 112 insertions(+), 118 deletions(-) diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index 640c27f..0ff7ae3 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -22,12 +22,14 @@ from scallops.stitch._radial import radial_correct from scallops.stitch.utils import _crop_image, dtype_convert from scallops.utils import _cpu_count, _dask_from_array_no_copy +from scallops.zarr_io import _current_format logger = logging.getLogger("scallops") def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: str): - return { + fmt = _current_format() + d = { "multiscales": [ { "axes": [ @@ -38,10 +40,10 @@ def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: s { "coordinateTransformations": [ { - "scale": [ + "scale": ( float(image_spacing[0]), float(image_spacing[1]), - ], + ), "type": "scale", } ], @@ -49,10 +51,14 @@ def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: s } ], "name": f"/labels/{label_name}", - "version": "0.4", + "version": fmt.version, } ] } + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + return d + + return {"ome": d} def _create_ome_metadata( @@ -64,9 +70,10 @@ def _create_ome_metadata( metadata = {} metadata.update(**kwargs) metadata["stitch_coords"] = dict() + fmt = _current_format() for c in stitch_coords: # convert to dict metadata["stitch_coords"][c] = stitch_coords[c].to_list() - return { + d = { "multiscales": [ { "metadata": metadata, @@ -79,11 +86,11 @@ def _create_ome_metadata( { "coordinateTransformations": [ { - "scale": [ + "scale": ( 1.0, float(image_spacing[0]), float(image_spacing[1]), - ], + ), "type": "scale", } ], @@ -91,10 +98,13 @@ def _create_ome_metadata( } ], "name": f"/images/{image_key}", - "version": "0.4", + "version": fmt.version, } ] } + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + return d + return {"ome": d} def _fuse( diff --git a/scallops/tests/test_io.py b/scallops/tests/test_io.py index 05f0985..009a96b 100644 --- a/scallops/tests/test_io.py +++ b/scallops/tests/test_io.py @@ -222,7 +222,9 @@ def test_write_non_ome_zarr_image(tmp_path, dask): data_zarr = read_image(f"{zarr_path}/images/foo", dask=False) data_ome_zarr = read_image(f"{zarr_path}/images/foo2", dask=False) + xr.testing.assert_equal(data_zarr, data_ome_zarr) + xr.testing.assert_equal(image, data_ome_zarr) @pytest.mark.io diff --git a/scallops/utils.py b/scallops/utils.py index 35f13d8..df876d6 100644 --- a/scallops/utils.py +++ b/scallops/utils.py @@ -64,7 +64,7 @@ def _tqdm_shim(iterator, *args, **kwargs): return iterator -def _fix_json(d): +def _fix_json(d: dict) -> dict: """Attempts to serialize and deserialize a dictionary to ensure it can be safely converted to JSON. diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index 792949b..413b8c8 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -24,7 +24,7 @@ from dask.delayed import Delayed from dask.graph_manipulation import bind from ome_zarr.axes import KNOWN_AXES -from ome_zarr.format import CurrentFormat +from ome_zarr.format import FormatV04 from ome_zarr.io import parse_url from ome_zarr.scale import Scaler from ome_zarr.types import JSONDict @@ -38,6 +38,18 @@ logger = logging.getLogger("scallops") +def _current_format(): + return FormatV04() + + +def _get_store_path(group: zarr.Group): + if hasattr(group.store, "root"): + return str(group.store.root) + if hasattr(group.store, "path"): + return group.store.path + return "" + + def is_anndata_zarr(store: StoreLike) -> bool: """Determines whether store is an AnnData Zarr . @@ -76,7 +88,7 @@ def is_ome_zarr_array(node: zarr.Group) -> bool: result = is_ome_zarr_array(root) print(result) # Output: True """ - return node is not None and "multiscales" in node.attrs + return node is not None and ("multiscales" in node.attrs or "ome" in node.attrs) def _get_fs(group: zarr.Group): @@ -134,7 +146,7 @@ def _create_omero_metadata( # Napari requires that colors are specified if channel names are specified channels = ( [ - dict(label=channel_names[i], color=colors[i % len(colors)]) + dict(label=str(channel_names[i]), color=colors[i % len(colors)]) for i in range(len(channel_names)) ] if not np.isscalar(channel_names) @@ -210,33 +222,8 @@ def _attrs_axes_coordinates( - Updated image attributes dictionary. - List of axes dictionaries. - List of coordinate transformations dictionaries or None. - - :example: - - .. code-block:: python - - import xarray as xr - import numpy as np - from scallops.zarr_io import _attrs_axes_coordinates - - data = np.random.rand(5, 10, 512, 512) - dims = ("c", "z", "y", "x") - coords = {"c": ["DAPI", "FITC", "TRITC", "Cy5", "Cy7"]} - array = xr.DataArray(data, dims=dims, coords=coords) - image_attrs = { - "physical_pixel_sizes": [0.1, 0.1, 0.5], - "physical_pixel_units": ["um", "um", "um"], - } - - # Prepare attributes, axes, and coordinate transformations - updated_attrs, axes, coord_transformations = _attrs_axes_coordinates( - image_attrs, array.coords, array.dims - ) - print(updated_attrs) - print(axes) - print(coord_transformations) """ - image_attrs = _fix_json(image_attrs) + omero = _create_omero_metadata(coords, dims) if omero is not None: image_attrs["omero"] = omero @@ -270,6 +257,8 @@ def _attrs_axes_coordinates( space_index = space_index + 1 axes.append(axis) + _fix_attrs(image_attrs) + image_attrs = _fix_json(image_attrs) return image_attrs, axes, coordinate_transformations @@ -399,18 +388,17 @@ def write_zarr( data = data.data if isinstance(data, da.Array): data = rechunk(data) - axes = None - coordinate_transformations = None - if image_attrs is not None: - # Metadata can't be numpy arrays or python classes so do a round trip - # conversion to convert to JSON serializable - _fix_attrs(image_attrs) - if metadata is not None: - image_attrs.update(metadata) - image_attrs, axes, coordinate_transformations = _attrs_axes_coordinates( - image_attrs, coords, dims - ) + + if image_attrs is None: + image_attrs = {} + + if metadata is not None: + image_attrs.update(metadata) + image_attrs, axes, coordinate_transformations = _attrs_axes_coordinates( + image_attrs, coords, dims + ) dask_delayed = [] + fmt = _current_format() if zarr_format == "zarr": # No axis validation if isinstance(data, da.Array): d = da.to_zarr( @@ -428,28 +416,38 @@ def write_zarr( datasets = [{"path": "0"}] if coordinate_transformations is not None: datasets[0]["coordinateTransformations"] = coordinate_transformations - multiscales = [ - dict(version=CurrentFormat().version, datasets=datasets, name=grp.name) - ] - d = {"multiscales": multiscales} + + multiscales = [dict(version=fmt.version, datasets=datasets, name=grp.name)] + zarr_attrs = {"multiscales": multiscales} if axes is not None: multiscales[0]["axes"] = axes - if image_attrs is not None: - multiscales[0]["metadata"] = image_attrs - if "omero" in image_attrs: - d["omero"] = image_attrs["omero"] + + if fmt.version in fmt.version in ("0.5"): + omero = zarr_attrs["ome"].get("omero", {}) + omero.update(image_attrs.pop("omero")) + zarr_attrs["ome"]["omero"] = omero + zarr_attrs = {"ome": zarr_attrs} + else: + omero = zarr_attrs.get("omero", {}) + omero.update(image_attrs.pop("omero")) + zarr_attrs["omero"] = omero + multiscales[0]["metadata"] = image_attrs + if len(dask_delayed) > 0: @dask.delayed def _write_metadata_delayed(grp, d): grp.attrs.update(d) - return dask_delayed + [bind(_write_metadata_delayed, dask_delayed)(grp, d)] + return dask_delayed + [ + bind(_write_metadata_delayed, dask_delayed)(grp, zarr_attrs) + ] else: - grp.attrs.update(d) + grp.attrs.update(zarr_attrs) return dask_delayed else: return write_image( + fmt=fmt, image=data, group=grp, scaler=scaler, @@ -565,49 +563,42 @@ def _write_zarr_labels( ) -def _read_zarr_attrs(multiscale0: zarr.Group) -> tuple[dict, dict, list[str]]: - """Read attributes from a Zarr multiscale group. +def _read_zarr_attrs(attrs) -> tuple[dict, dict, list[str]]: + """Read attributes from Zarr. This function reads and processes the attributes, coordinates, and dimensions from the first multiscale dataset in a Zarr group. It also handles physical pixel sizes and units if available. - :param multiscale0: The Zarr group containing the multiscale dataset. + :param attrs: Zarr attributes. :return: A tuple containing: - coords: Dictionary of coordinates. - attrs: Dictionary of attributes. - dims: List of dimension names. - - :example: - - .. code-block:: python - - import zarr - from scallops.zarr_io import _read_zarr_attrs - - # Create a Zarr group with multiscale attributes - store = zarr.DirectoryStore("example.zarr") - root = zarr.group(store=store) - multiscale0 = root.create_group("multiscales") - multiscale0.attrs["axes"] = [{"name": "x"}, {"name": "y"}, {"name": "z"}] - multiscale0.attrs["datasets"] = [ - {"coordinateTransformations": [{"scale": [1.0, 0.5, 0.5]}]} - ] - - # Read attributes from the multiscale group - coords, attrs, dims = _read_zarr_attrs(multiscale0) - print(coords) - print(attrs) - print(dims) """ - attrs = multiscale0.get("metadata") - if attrs is None: - attrs = {} + # v3 + # ome/omero for channel metadata + # ome/multiscales[0]/metadata for other metadata + + # v2: + # omero for channel metadata + # multiscales[0]/metadata for other metadata + + if "ome" in attrs: + attrs = attrs["ome"] + multiscales = attrs["multiscales"] + if len(multiscales) > 0: + multiscale0 = multiscales[0] + else: + return None, None, None + axes = multiscale0["axes"] dims = [axis["name"] for axis in axes] - - coords = {d: attrs[d] for d in dims if d in attrs and d != "c"} - if "omero" in attrs and "c" in dims: + metadata = multiscale0.get("metadata") + if metadata is None: + metadata = {} + coords = {d: metadata[d] for d in dims if d in metadata and d != "c"} + if "c" in dims and "omero" in attrs: channel_names = attrs["omero"].get("channels") if channel_names is not None: coords["c"] = [c["label"] for c in channel_names] @@ -624,26 +615,9 @@ def _read_zarr_attrs(multiscale0: zarr.Group) -> tuple[dict, dict, list[str]]: if len(space_indices_with_units) > 0: scale = multiscale0["datasets"][0]["coordinateTransformations"][0]["scale"] physical_pixel_sizes = tuple([scale[d] for d in space_indices_with_units]) - attrs["physical_pixel_sizes"] = physical_pixel_sizes - attrs["physical_pixel_units"] = tuple(units) - return coords, attrs, dims - - -def _get_multiscales_path(node: zarr.Group) -> str | None: - if "multiscales" in node.attrs: - multiscales = node.attrs["multiscales"] - key = "0" - - if len(multiscales) > 0: - multiscale0 = multiscales[0] - if "datasets" in multiscale0: - tmp = multiscale0["datasets"] - if len(tmp) > 0: - tmp = tmp[0] - if "path" in tmp: - key = tmp["path"] - return key - return None + metadata["physical_pixel_sizes"] = physical_pixel_sizes + metadata["physical_pixel_units"] = tuple(units) + return coords, metadata, dims def _read_ome_zarr_array( @@ -660,16 +634,24 @@ def _read_ome_zarr_array( node = zarr.open(node, mode="r") if node is None: raise ValueError(f"{_node} not found") - if "multiscales" in node.attrs: - dims = None - coords = {} - attrs = {} + if "ome" in node.attrs or "multiscales" in node.attrs: + coords, attrs, dims = _read_zarr_attrs(node.attrs) + + multiscales = ( + node.attrs["multiscales"] + if "multiscales" in node.attrs + else node.attrs["ome"]["multiscales"] + ) + key = "0" - key = _get_multiscales_path(node) - multiscales = node.attrs["multiscales"] if len(multiscales) > 0: multiscale0 = multiscales[0] - coords, attrs, dims = _read_zarr_attrs(multiscale0) + if "datasets" in multiscale0: + tmp = multiscale0["datasets"] + if len(tmp) > 0: + tmp = tmp[0] + if "path" in tmp: + key = tmp["path"] array = node[key] return array, dims, coords, attrs else: # see if user passed test.zarr and zarr file only has one image @@ -678,7 +660,7 @@ def _read_ome_zarr_array( image_keys = list(images.keys()) if len(image_keys) == 1: return _read_ome_zarr_array(images[image_keys[0]]) - logger.warning("multiscales not found in attrs") + logger.warning(f"multiscales not found in attrs for {node} ") def read_ome_zarr_array( From 28d09552597530de1e713d2bcfe353d275d43649 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 08:15:26 -0400 Subject: [PATCH 02/20] Upgraded zarr to version 3 (continue to write ome zarr 0.4 using zarr format 2) --- pyproject.toml | 12 +- requirements.txt | 26 +-- scallops/_bioio_zarr_reader.py | 263 ------------------------ scallops/cli/illumination_correction.py | 3 +- scallops/cli/pooled_if_sbs.py | 15 +- scallops/cli/register.py | 5 +- scallops/io.py | 17 +- scallops/registration/itk.py | 8 +- scallops/stitch/_stitch.py | 34 +-- scallops/stitch/fuse.py | 16 +- scallops/stitch/utils.py | 29 ++- scallops/tests/test_features.py | 11 +- scallops/tests/test_io.py | 48 +++-- scallops/utils.py | 2 +- scallops/visualize/napari.py | 4 +- scallops/zarr_io.py | 83 ++++---- 16 files changed, 180 insertions(+), 396 deletions(-) delete mode 100644 scallops/_bioio_zarr_reader.py diff --git a/pyproject.toml b/pyproject.toml index 03ecbae..8a66f03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,13 +33,15 @@ classifiers = [# https://pypi.python.org/pypi?%3Aaction=list_classifiers dependencies = [ "adjustText", "anndata>=0.12.4", # https://github.com/scverse/anndata/issues/2166 - "bioio<2", + "bioio", "bioio-nd2", + "bioio-ome-tiff", + "bioio-ome-zarr", "bioio-tifffile", "centrosome", "cp-measure>=0.1.16", "dask-image", - "dask<=2025.11.0", + "dask", "decorator", "filelock", "flox", @@ -54,7 +56,7 @@ dependencies = [ "natsort", "numcodecs", "numpy", - "ome-zarr<0.12.0", + "ome-zarr", "pandas", "pint", "psutil", @@ -68,9 +70,9 @@ dependencies = [ "stardist", "statsmodels", "tensorflow", - "tifffile<=2025.5.10", + "tifffile", "xarray", - "zarr<3" + "zarr" ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index c88fdad..d0f18b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,35 @@ -anndata==0.12.12 adjustText==1.3.0 -bioio-nd2==1.3.0 +anndata==0.12.12 +bioio-nd2==1.7.0 +bioio-ome-tiff==1.4.0 +bioio-ome-zarr==3.4.0 bioio-tifffile==1.3.0 -bioio==1.6.1 +bioio==3.3.0 centrosome==1.3.3 cp-measure==0.1.19 cython==3.2.4 dask-image==2025.11.0 -dask==2025.11.0 +dask==2026.3.0 decorator==5.2.1 filelock==3.29.0 flox==0.11.2 -fsspec==2026.3.0 +fsspec==2026.4.0 igraph==1.0.0 -itk-elastix==0.25.2 -itk==5.4.5 +itk-elastix==0.25.3 +itk==5.4.6 joblib==1.5.3 kneed==0.8.6 mahotas==1.4.18 matplotlib==3.10.8 natsort==8.4.0 -numcodecs==0.15.1 +numcodecs==0.16.5 numpy==2.4.4 -ome-zarr==0.11.1 +ome-zarr==0.16.0 pandas==2.3.3 pint==0.25.3 psutil==7.2.2 pyarrow==23.0.1 -pydantic==2.12.5 +pydantic==2.13.4 scikit-image==0.26.0 scikit-learn==1.8.0 scipy==1.17.1 @@ -36,6 +38,6 @@ shapely==2.1.2 stardist==0.9.2 statsmodels==0.14.6 tensorflow==2.21.0 -tifffile==2025.5.10 +tifffile==2026.5.2 xarray==2026.2.0 -zarr==2.18.7 +zarr==3.2.1 diff --git a/scallops/_bioio_zarr_reader.py b/scallops/_bioio_zarr_reader.py deleted file mode 100644 index 6118c46..0000000 --- a/scallops/_bioio_zarr_reader.py +++ /dev/null @@ -1,263 +0,0 @@ -import logging -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -import xarray as xr -from bioio_base import constants, dimensions, exceptions, io, reader, types -from fsspec.spec import AbstractFileSystem -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader as ZarrReader - -logger = logging.getLogger("scallops") - - -# Same as https://github.com/bioio-devs/bioio-ome-zarr/blob/main/bioio_ome_zarr/reader.py but fixes bug in channel names -# Also checks to see if zarr path is {zarr_path}/images/image1 with only 1 image -# See https://github.com/bioio-devs/bioio-ome-zarr/pull/22 -class ScallopsZarrReader(reader.Reader): - """The main class of each reader plugin. This class is subclass of the abstract class reader - (BaseReader) in bioio-base. - - Parameters - ---------- - image: types.PathLike - String or Path to the ZARR root - fs_kwargs: Dict[str, Any] - Ignored - """ - - _xarray_dask_data: Optional["xr.DataArray"] = None - _xarray_data: Optional["xr.DataArray"] = None - _mosaic_xarray_dask_data: Optional["xr.DataArray"] = None - _mosaic_xarray_data: Optional["xr.DataArray"] = None - _dims: Optional[dimensions.Dimensions] = None - _metadata: Optional[Any] = None - _scenes: Optional[Tuple[str, ...]] = None - _current_scene_index: int = 0 - # Do not provide default value because - # they may not need to be used by your reader (i.e. input param is an array) - _fs: "AbstractFileSystem" - _path: str - - # Required Methods - - def __init__( - self, - image: types.PathLike, - fs_kwargs: Dict[str, Any] = {}, - ): - # Expand details of provided image - self._fs, self._path = io.pathlike_to_fs( - image, - enforce_exists=False, - fs_kwargs=fs_kwargs, - ) - - # Enforce valid image - if not self._is_supported_image(self._fs, self._path): - raise exceptions.UnsupportedFileFormatError( - self.__class__.__name__, - self._path, - "Could not find a .zgroup or .zarray file at the provided path.", - ) - - self._zarr = get_zarr_reader(self._fs, self._path).zarr - self._physical_pixel_sizes: Optional[types.PhysicalPixelSizes] = None - self._channel_names: Optional[List[str]] = None - - @staticmethod - def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool: - try: - get_zarr_reader(fs, path) - return True - except AttributeError: - return False - - @classmethod - def is_supported_image( - cls, - image: types.ImageLike, - fs_kwargs: Dict[str, Any] = {}, - **kwargs: Any, - ) -> bool: - if isinstance(image, (str, Path)): - return cls._is_supported_image(None, str(image), **kwargs) - else: - return reader.Reader.is_supported_image( - cls, image, fs_kwargs=fs_kwargs, **kwargs - ) - - @property - def scenes(self) -> Tuple[str, ...]: - if self._scenes is None: - scenes = self._zarr.root_attrs["multiscales"] - - # if (each scene has a name) and (that name is unique) use name. - # otherwise generate scene names. - if all("name" in scene for scene in scenes) and ( - len({scene["name"] for scene in scenes}) == len(scenes) - ): - self._scenes = tuple(str(scene["name"]) for scene in scenes) - else: - self._scenes = tuple( - f"scene_{i}" - for i in range(len(self._zarr.root_attrs["multiscales"])) - ) - return self._scenes - - @property - def resolution_levels(self) -> Tuple[int, ...]: - """ - Returns - ------- - resolution_levels: Tuple[str, ...] - Return the available resolution levels for the current scene. - By default these are ordered from highest resolution to lowest - resolution. - """ - return tuple( - rl - for rl in range( - len( - self._zarr.root_attrs["multiscales"][self.current_scene_index][ - "datasets" - ] - ) - ) - ) - - def _read_delayed(self) -> xr.DataArray: - return self._xarr_format(delayed=True) - - def _read_immediate(self) -> xr.DataArray: - return self._xarr_format(delayed=False) - - def _xarr_format(self, delayed: bool) -> xr.DataArray: - data_path = self._zarr.root_attrs["multiscales"][self.current_scene_index][ - "datasets" - ][self.current_resolution_level]["path"] - image_data = self._zarr.load(data_path) - - axes = self._zarr.root_attrs["multiscales"][self.current_scene_index].get( - "axes" - ) - if axes: - dims = [sub["name"].upper() for sub in axes] - else: - dims = list(reader.Reader._guess_dim_order(image_data.shape)) - - if not delayed: - image_data = image_data.compute() - - coords = self._get_coords( - dims, - image_data.shape, - scene=self.current_scene, - channel_names=self.channel_names, - ) - - return xr.DataArray( - image_data, - dims=dims, - coords=coords, - attrs={constants.METADATA_UNPROCESSED: self._zarr.root_attrs}, - ) - - # Optional Methods - @property - def physical_pixel_sizes(self) -> types.PhysicalPixelSizes: - """Return the physical pixel sizes of the image.""" - if self._physical_pixel_sizes is None: - try: - z_size, y_size, x_size = self._get_pixel_size( - list(self.dims.order), - ) - except Exception as e: - logger.warning(f"Could not parse zarr pixel size: {e}") - z_size, y_size, x_size = None, None, None - - self._physical_pixel_sizes = types.PhysicalPixelSizes( - z_size, y_size, x_size - ) - return self._physical_pixel_sizes - - def _get_pixel_size( - self, - dims: List[str], - ) -> Tuple[Optional[float], Optional[float], Optional[float]]: - # OmeZarr file may contain an additional set of "coordinateTransformations" - # these coefficents are applied to all resolution levels. - if ( - "coordinateTransformations" - in self._zarr.root_attrs["multiscales"][self.current_scene_index] - ): - universal_res_consts = self._zarr.root_attrs["multiscales"][ - self.current_scene_index - ]["coordinateTransformations"][0]["scale"] - else: - universal_res_consts = [1.0 for _ in range(len(dims))] - - coord_transform = self._zarr.root_attrs["multiscales"][ - self.current_scene_index - ]["datasets"][self.current_resolution_level]["coordinateTransformations"] - - spatial_coeffs = {} - - for dim in [ - dimensions.DimensionNames.SpatialX, - dimensions.DimensionNames.SpatialY, - dimensions.DimensionNames.SpatialZ, - ]: - if dim in dims: - dim_index = dims.index(dim) - spatial_coeffs[dim] = ( - coord_transform[0]["scale"][dim_index] - * universal_res_consts[dim_index] - ) - else: - spatial_coeffs[dim] = None - - return ( - spatial_coeffs[dimensions.DimensionNames.SpatialZ], - spatial_coeffs[dimensions.DimensionNames.SpatialY], - spatial_coeffs[dimensions.DimensionNames.SpatialX], - ) - - @property - def channel_names(self) -> Optional[List[str]]: - if self._channel_names is None: - if "omero" in self._zarr.root_attrs: - self._channel_names = [ - str(channel["label"]) - for channel in self._zarr.root_attrs["omero"]["channels"] - ] - return self._channel_names - - @staticmethod - def _get_coords( - dims: List[str], - shape: Tuple[int, ...], - scene: str, - channel_names: Optional[List[str]], - ) -> Dict[str, Any]: - coords: Dict[str, Any] = {} - - # Use dims for coord determination - if dimensions.DimensionNames.Channel in dims: - # Generate channel names if no existing channel names - if channel_names is None: - coords[dimensions.DimensionNames.Channel] = [ - f"channel_{i}" - for i in range(shape[dims.index(dimensions.DimensionNames.Channel)]) - ] - else: - coords[dimensions.DimensionNames.Channel] = channel_names - - return coords - - -def get_zarr_reader(fs: AbstractFileSystem, path: str) -> ZarrReader: - if fs is not None: - path = fs.unstrip_protocol(path) - - return ZarrReader(parse_url(path, mode="r")) diff --git a/scallops/cli/illumination_correction.py b/scallops/cli/illumination_correction.py index 53af81c..4599d70 100644 --- a/scallops/cli/illumination_correction.py +++ b/scallops/cli/illumination_correction.py @@ -43,6 +43,7 @@ from scallops.zarr_io import ( _get_fs, _get_sep, + _get_store_path, _write_zarr_image, is_ome_zarr_array, open_ome_zarr, @@ -108,7 +109,7 @@ def single_agg_illumination_correction( ) if save_z_index: if output_image_format == "zarr": - path = root.store.path.rstrip(_get_sep(root)) + path = _get_store_path(root).rstrip(_get_sep(root)) if path.endswith(".zarr"): path = path[: -len(".zarr")] protocol = _get_fs_protocol(_get_fs(root)) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 3522679..3294f25 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -75,6 +75,7 @@ from scallops.zarr_io import ( _get_fs, _get_sep, + _get_store_path, _write_zarr_image, is_anndata_zarr, open_ome_zarr, @@ -226,7 +227,9 @@ def spot_detection_pipeline( _, file_list, metadata = image_tuple image_key = metadata["id"] if not force: - points_path = f"{root.store.path.rstrip(_get_sep(root))}{_get_sep(root)}points" + points_path = ( + f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" + ) points_protocol = _get_fs_protocol(_get_fs(root)) if points_protocol != "file": points_path = f"{points_protocol}://{points_path}" @@ -334,7 +337,9 @@ def spot_detection_pipeline( else: del maxed if "peaks" in save_keys: - points_path = f"{root.store.path.rstrip(_get_sep(root))}{_get_sep(root)}points" + points_path = ( + f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" + ) protocol = _get_fs_protocol(_get_fs(root)) if protocol != "file": points_path = f"{protocol}://{points_path}" @@ -911,7 +916,7 @@ def reads_pipeline( logger.info(f"Running reads for {image_key}") spots_sep = _get_sep(spots_root) - points_path = f"{spots_root.store.path.rstrip(spots_sep)}{spots_sep}points" + points_path = f"{_get_store_path(spots_root).rstrip(spots_sep)}{spots_sep}points" spots_protocol = _get_fs_protocol(_get_fs(spots_root)) if spots_protocol != "file": points_path = f"{spots_protocol}://{points_path}" @@ -1229,8 +1234,8 @@ def reads_main(arguments: argparse.Namespace): for key in image_keys: reads_pipeline( key, - spots_root=zarr.open(spots, "r"), - labels_root=zarr.open(labels + labels_fs.sep + "labels", "r"), + spots_root=zarr.open(spots, mode="r"), + labels_root=zarr.open(labels + labels_fs.sep + "labels", mode="r"), barcodes_file=barcodes_file, file_separator=output_fs.sep, threshold_peaks=threshold_peaks, diff --git a/scallops/cli/register.py b/scallops/cli/register.py index ed081d9..a0bac15 100644 --- a/scallops/cli/register.py +++ b/scallops/cli/register.py @@ -45,6 +45,7 @@ from scallops.xr import _z_projection from scallops.zarr_io import ( _get_fs, + _get_store_path, _write_zarr_image, is_ome_zarr_array, open_ome_zarr, @@ -447,7 +448,7 @@ def get_matching_names( zarr_dir = "labels" if labels else "images" if isinstance(image_dir, Group): protocol = _get_fs_protocol(_get_fs(image_dir)) - image_dir = f"{image_dir.store.path}{image_dir.name}" + image_dir = f"{_get_store_path(image_dir)}{image_dir.name}" if protocol != "file": image_dir = f"{protocol}://{image_dir}" @@ -464,7 +465,7 @@ def get_matching_names( results = [] for path in paths: name = os.path.basename(path) - if not name.startswith(".") and is_ome_zarr_array(zarr.open(path, "r")): + if not name.startswith(".") and is_ome_zarr_array(zarr.open(path, mode="r")): results.append(path) return results diff --git a/scallops/io.py b/scallops/io.py index 18aabc4..e34e2ec 100644 --- a/scallops/io.py +++ b/scallops/io.py @@ -54,12 +54,11 @@ from xarray.core.utils import equivalent from zarr.storage import StoreLike -from scallops._bioio_zarr_reader import ScallopsZarrReader from scallops.experiment.elements import Experiment, _LazyLoadData from scallops.externals.tifffile2014 import imsave from scallops.utils import forceTCZYX, mlcs from scallops.xr import _crop -from scallops.zarr_io import _read_zarr_experiment, read_ome_zarr_array +from scallops.zarr_io import _get_store_path, _read_zarr_experiment, read_ome_zarr_array logger = logging.getLogger("scallops") @@ -83,7 +82,7 @@ def _add_suffix(path: str, suffix: str) -> str: """ path = path.rstrip("/") - if not path.lower().endswith(".zarr"): + if not path.lower().endswith(suffix): logger.info(f"Added `{suffix}` extension to {path}") path += suffix return path @@ -234,7 +233,9 @@ def _create_image(path: str, **kwargs) -> bioio.BioImage: base_path_lc, ext = os.path.splitext(path_lc) if "reader" not in img_args: if ext in ["", ".zarr", "/", ".zarr/"]: - img_args["reader"] = ScallopsZarrReader + import bioio_ome_zarr + + img_args["reader"] = bioio_ome_zarr.Reader elif ext in [".tiff", ".tif"] and os.path.splitext(base_path_lc)[1] != ".ome": img_args["reader"] = bioio_tifffile.Reader return bioio.BioImage(path, **img_args) @@ -1367,7 +1368,7 @@ def _images2fov( name = ( os.path.basename(file_list[i]) if not isinstance(file_list[i], zarr.Group) - else file_list[i].store.path + else _get_store_path(file_list[i]) ) src_metadata.append(dict(attrs=image_attrs[i], name=name)) @@ -1640,7 +1641,7 @@ def _get_image_key_func(group_by): group_to_matches[group].append((x, d)) else: x = root - name = Path(x.store.path).stem + name = Path(_get_store_path(x)).stem m = file_regex.match(name) if m: d = m.groupdict() @@ -1793,7 +1794,9 @@ def file_sort_key(x): src=file_list, common_src=mlcs( [ - Path(x).stem if not isinstance(x, zarr.Group) else x.store.path + Path(x).stem + if not isinstance(x, zarr.Group) + else _get_store_path(x) for x in file_list ] ), diff --git a/scallops/registration/itk.py b/scallops/registration/itk.py index 205c15b..4b655c2 100644 --- a/scallops/registration/itk.py +++ b/scallops/registration/itk.py @@ -33,7 +33,7 @@ from scallops.registration.landmarks import _get_translation, find_landmarks from scallops.utils import _dask_from_array_no_copy from scallops.xr import _get_dims -from scallops.zarr_io import open_ome_zarr, write_zarr +from scallops.zarr_io import _chunk_key_encoding, open_ome_zarr, write_zarr logger = logging.getLogger("scallops") @@ -331,12 +331,13 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: group = images_group.create_group( image_name.replace("/", "-"), overwrite=True ) - zarr_dataset = group.create_dataset( + zarr_dataset = group.create_array( "0", shape=shape, chunks=(1,) * (len(shape) - 2) + chunk_size, dtype=dtype, overwrite=True, + chunk_key_encoding=_chunk_key_encoding, ) return { @@ -1164,12 +1165,13 @@ def _itk_transform_image_zarr( image_name.replace("/", "-"), overwrite=True ) chunks = (1,) * len(transform_dims) + (chunksize or (1024, 1024)) - data = group.create_dataset( + data = group.create_array( "0", shape=dim_sizes + output_size, chunks=chunks, dtype=image.dtype, overwrite=True, + chunk_key_encoding=_chunk_key_encoding, ) _itk_transform_image( diff --git a/scallops/stitch/_stitch.py b/scallops/stitch/_stitch.py index 212c820..b5b2cab 100644 --- a/scallops/stitch/_stitch.py +++ b/scallops/stitch/_stitch.py @@ -14,7 +14,6 @@ import pyarrow.parquet as pq import zarr from sklearn.cluster import AgglomerativeClustering -from zarr.errors import PathNotFoundError from scallops.cli.util import _get_cli_logger, cli_metadata from scallops.io import is_parquet_file, read_image @@ -32,7 +31,12 @@ tile_source_labels, ) from scallops.utils import _dask_from_array_no_copy -from scallops.zarr_io import is_ome_zarr_array +from scallops.zarr_io import ( + _chunk_key_encoding, + _current_format, + _da_to_zarr_kwargs, + is_ome_zarr_array, +) logger = _get_cli_logger() @@ -82,14 +86,14 @@ def _single_stitch( if is_ome_zarr_array(image_output_root.get(f"images/{image_key}")): logger.info(f"Skipping stitching for {image_key}.") return - except PathNotFoundError: + except: # noqa: E722 pass elif not no_save_labels: try: if is_ome_zarr_array(image_output_root.get(f"labels/{image_key}-mask")): logger.info(f"Skipping stitching for {image_key}.") return - except PathNotFoundError: + except: # noqa: E722 pass elif is_parquet_file(f"{other_output_path}{image_key}-positions.parquet"): logger.info(f"Skipping stitching for {image_key}.") @@ -404,17 +408,18 @@ def _write_arrays( metadata, ): gc.collect() + fmt = _current_format() if not no_save_labels: labels_group = image_output_root.require_group("labels") group = labels_group.create_group(image_key + "-mask", overwrite=True) - array = group.create_dataset( + array = group.create_array( name="0", shape=(fused_y_size, fused_x_size), chunks=chunk_size, dtype=np.uint8, - dimension_separator="/", overwrite=True, + chunk_key_encoding=_chunk_key_encoding, ) da.to_zarr( @@ -428,20 +433,21 @@ def _write_arrays( ), url=array, compute=True, - dimension_separator="/", + **_da_to_zarr_kwargs(fmt), ) group.attrs.update( _create_label_ome_metadata(image_spacing, image_key + "-mask") ) + if blend == "none": group = labels_group.create_group(image_key + "-tile", overwrite=True) - array = group.create_dataset( + array = group.create_array( name="0", shape=(fused_y_size, fused_x_size), chunks=chunk_size, dtype=np.uint16, - dimension_separator="/", overwrite=True, + chunk_key_encoding=_chunk_key_encoding, ) da.to_zarr( @@ -451,12 +457,16 @@ def _write_arrays( ), url=array, compute=True, - dimension_separator="/", + **_da_to_zarr_kwargs(fmt), ) label_metadata = _create_label_ome_metadata( - image_spacing, image_key + "-tile" + image_spacing, image_key + "-tile", fmt=fmt + ) + + multiscales_path = ( + "multiscales" if "multiscales" in label_metadata else "ome/multiscales" ) - label_metadata["multiscales"][0]["metadata"] = { + label_metadata[multiscales_path][0]["metadata"] = { "source": f"../../images/{image_key}" } group.attrs.update(label_metadata) diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index 0ff7ae3..83cdbac 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -15,6 +15,7 @@ import zarr from dask import delayed from dask.diagnostics import ProgressBar +from ome_zarr.format import Format from skimage.util import img_as_float from sklearn.cluster import AgglomerativeClustering @@ -22,13 +23,16 @@ from scallops.stitch._radial import radial_correct from scallops.stitch.utils import _crop_image, dtype_convert from scallops.utils import _cpu_count, _dask_from_array_no_copy -from scallops.zarr_io import _current_format +from scallops.zarr_io import _chunk_key_encoding, _current_format, _da_to_zarr_kwargs logger = logging.getLogger("scallops") -def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: str): - fmt = _current_format() +def _create_label_ome_metadata( + image_spacing: tuple[float, float], label_name: str, fmt: Format = None +): + if fmt is None: + fmt = _current_format() d = { "multiscales": [ { @@ -232,7 +236,7 @@ def _fuse( locks = np.array(locks) partition_tree = shapely.STRtree(partition_boxes) - result = group.create_dataset( + result = group.create_array( shape=( len(output_channels), # c fused_y_size, @@ -241,8 +245,8 @@ def _fuse( dtype=target_dtype, chunks=(1,) + chunk_size, name="0", - dimension_separator="/", overwrite=True, + chunk_key_encoding=_chunk_key_encoding, ) _fuse_image_delayed = delayed(_fuse_image) @@ -382,7 +386,7 @@ def _fuse( url=result, region=(slice(channel_batch, channel_batch + channels_per_batch),), compute=True, - dimension_separator="/", + **_da_to_zarr_kwargs(_current_format()), ) diff --git a/scallops/stitch/utils.py b/scallops/stitch/utils.py index 4754b7b..f8e29c8 100644 --- a/scallops/stitch/utils.py +++ b/scallops/stitch/utils.py @@ -328,19 +328,18 @@ def get_tile_position(image: bioio.BioImage, image_index: int = 0): ome_metadata = _get_ome(image) physical_size_y_unit = None physical_size_x_unit = None - if ome_metadata is not None: - values = [ - ome_metadata.images[image_index].pixels.planes[0].position_y, - ome_metadata.images[image_index].pixels.planes[0].position_x, - ] - physical_size_y_unit = ( - ome_metadata.images[image_index].pixels.planes[0].position_y_unit.value - ) - physical_size_x_unit = ( - ome_metadata.images[image_index].pixels.planes[0].position_x_unit.value - ) - elif "multiscales" in image.metadata: - metadata = image.metadata["multiscales"][0]["metadata"] + values = None + if ome_metadata is not None and image_index < len(ome_metadata.images): + img = ome_metadata.images[image_index] + if len(img.pixels.planes) > 0: + values = [ + img.pixels.planes[0].position_y, + img.pixels.planes[0].position_x, + ] + physical_size_y_unit = img.pixels.planes[0].position_y_unit.value + physical_size_x_unit = img.pixels.planes[0].position_x_unit.value + if values is None and "multiscales" in image.metadata.attributes: + metadata = image.metadata.attributes["multiscales"][0]["metadata"] values = [metadata["position_y"], metadata["position_x"]] physical_size_y_unit = metadata["position_y_unit"] physical_size_x_unit = metadata["position_x_unit"] @@ -420,8 +419,8 @@ def _pixel_size_from_image(image: bioio.BioImage) -> np.array: ] physical_size_y_unit = ome_metadata.images[0].pixels.physical_size_y_unit.value physical_size_x_unit = ome_metadata.images[0].pixels.physical_size_x_unit.value - elif "multiscales" in image.metadata: - metadata = image.metadata["multiscales"][0]["metadata"] + elif "multiscales" in image.metadata.attributes: + metadata = image.metadata.attributes["multiscales"][0]["metadata"] values = [metadata["physical_size_y"], metadata["physical_size_x"]] physical_size_y_unit = metadata["physical_size_y_unit"] physical_size_x_unit = metadata["physical_size_x_unit"] diff --git a/scallops/tests/test_features.py b/scallops/tests/test_features.py index d6f2f27..a864877 100644 --- a/scallops/tests/test_features.py +++ b/scallops/tests/test_features.py @@ -27,6 +27,7 @@ from scallops.features.spots import spot_count from scallops.features.texture import pftas from scallops.io import read_image, to_label_crops +from scallops.zarr_io import _chunk_key_encoding __this__ = Path(__file__).resolve() __tests__ = __this__.parent @@ -59,12 +60,16 @@ def test_to_label_crops(tmp_path, array_A1_102_cells, array_A1_102_alnpheno): assert len(result_index) == 1 and result_index.values[0] == 2603 group = zarr.group() - intensity_image_zarr = group.create_dataset( - name="image", shape=intensity_image.shape + intensity_image_zarr = group.create_array( + name="image", + shape=intensity_image.shape, + chunk_key_encoding=_chunk_key_encoding, ) intensity_image_zarr[:] = intensity_image.compute() - label_image_zarr = group.create_dataset(name="label", shape=label_image.shape) + label_image_zarr = group.create_array( + name="label", shape=label_image.shape, chunk_key_encoding=_chunk_key_encoding + ) label_image_zarr[:] = label_image.compute() to_label_crops( diff --git a/scallops/tests/test_io.py b/scallops/tests/test_io.py index 73d9a5f..cb57781 100644 --- a/scallops/tests/test_io.py +++ b/scallops/tests/test_io.py @@ -31,6 +31,8 @@ to_image_montage, ) from scallops.zarr_io import ( + _current_format, + _da_to_zarr_kwargs, _write_zarr_image, _write_zarr_labels, is_anndata_zarr, @@ -39,11 +41,6 @@ ) -@pytest.fixture(params=[False, True]) -def dask(request): - return request.param - - @pytest.mark.io def test_is_scallops_zarr(tmp_path): data = anndata.AnnData( @@ -165,7 +162,8 @@ def test_read_experiment_multi_scene(scenes): @pytest.mark.io -def test_read_tif(dask): +@pytest.mark.parametrize("use_dask", [True, False]) +def test_read_tif(use_dask): """Ensures that we can read a tif file using bioio. # In older versions of bioio the following was needed: @@ -178,9 +176,10 @@ def test_read_tif(dask): # bioio.formats.FORMAT_IMPLEMENTATIONS["tif"] = ["bioio.readers.tiff_reader.TiffReader"] """ data = read_image( - "scallops/tests/data/tif/10X_c0-DAPI-p65ab_A1_Tile-7.phenotype.tif", dask=dask + "scallops/tests/data/tif/10X_c0-DAPI-p65ab_A1_Tile-7.phenotype.tif", + dask=use_dask, ) - if dask: + if use_dask: data2 = read_image( "scallops/tests/data/tif/10X_c0-DAPI-p65ab_A1_Tile-7.phenotype.tif", dask=False, @@ -210,19 +209,22 @@ def test_write_ome_zarr_image_dask(tmp_path): @pytest.mark.io -def test_write_non_ome_zarr_image(tmp_path, dask): +@pytest.mark.parametrize("use_dask", [True, False]) +def test_write_non_ome_zarr_image(tmp_path, use_dask): image = read_image( - "scallops/tests/data/tif/10X_c0-DAPI-p65ab_A1_Tile-7.phenotype.tif", dask=dask + "scallops/tests/data/tif/10X_c0-DAPI-p65ab_A1_Tile-7.phenotype.tif", + dask=use_dask, ) image.attrs = {"test": "1"} image.attrs["physical_pixel_sizes"] = (1, 1, 1) image.attrs["physical_pixel_units"] = ("mm", "mm", "mm") - zarr_path = str(tmp_path / "test.zarr") - _write_zarr_image("foo", open_ome_zarr(zarr_path), image, zarr_format="zarr") - _write_zarr_image("foo2", open_ome_zarr(zarr_path), image) + zarr_path1 = str(tmp_path / "test1.zarr") + zarr_path2 = str(tmp_path / "test2.zarr") - data_zarr = read_image(f"{zarr_path}/images/foo", dask=False) - data_ome_zarr = read_image(f"{zarr_path}/images/foo2", dask=False) + _write_zarr_image("test", open_ome_zarr(zarr_path1), image, zarr_format="zarr") + _write_zarr_image("test", open_ome_zarr(zarr_path2), image) + data_zarr = read_image(f"{zarr_path1}/images/test", dask=False) + data_ome_zarr = read_image(f"{zarr_path2}/images/test", dask=False) xr.testing.assert_equal(data_zarr, data_ome_zarr) xr.testing.assert_equal(image, data_ome_zarr) @@ -268,7 +270,8 @@ def test_experiment_pattern_prefix(tmp_path): @pytest.mark.io -def test_experiment_separate_t_c(dask, tmp_path): +@pytest.mark.parametrize("use_dask", [True, False]) +def test_experiment_separate_t_c(use_dask, tmp_path): """Test reading in exp where channels and cycles are both stored in separate images.""" ncycles = 4 nchannels = 5 @@ -296,7 +299,7 @@ def test_experiment_separate_t_c(dask, tmp_path): np.testing.assert_equal( image.isel(c=channel, t=cycle).squeeze().data, test_image, - f"dask: {dask}, channel: {channel}, cycle: {cycle}, value: {value}", + f"dask: {use_dask}, channel: {channel}, cycle: {cycle}, value: {value}", ) gen = list(_set_up_experiment(tmp_path, pattern, ("well",))) assert len(gen) == 1 @@ -311,12 +314,13 @@ def test_experiment_separate_t_c(dask, tmp_path): @pytest.mark.io -def test_group_by_one_field(dask): +@pytest.mark.parametrize("use_dask", [True, False]) +def test_group_by_one_field(use_dask): exp = read_experiment( "scallops/tests/data/experimentC/input", "10X_c{t}-SBS-{t}/{mag}X_c{t}-{exp}-{t}_{well}_Tile-102.{datatype}.tif", group_by=("well",), - dask=dask, + dask=use_dask, ) assert len(exp.images) == 1 image = exp.images["A1"] @@ -345,9 +349,9 @@ def test_read_write_labels(tmp_path, array_A1_102_nuclei): nuclei = array_A1_102_nuclei.squeeze().data _write_zarr_labels( - name="test", root=open_ome_zarr(str(tmp_path), "w"), labels=nuclei + name="test", root=open_ome_zarr(str(tmp_path), mode="w"), labels=nuclei ) - test = read_ome_zarr_array(zarr.open(str(tmp_path / "labels" / "test"), "r")) + test = read_ome_zarr_array(zarr.open(str(tmp_path / "labels" / "test"), mode="r")) np.testing.assert_equal(nuclei, test.data) @@ -605,6 +609,7 @@ def test_dask_zarr_io_component(tmp_path, recwarn): url=path, component="0", fill_value=10, + **_da_to_zarr_kwargs(_current_format()), ) assert zarr.open(path, mode="r")["0"].fill_value == 10 assert len(recwarn) == 0, ( @@ -621,6 +626,7 @@ def test_dask_zarr_io(tmp_path, recwarn): url=path, compute=True, fill_value=10, + **_da_to_zarr_kwargs(_current_format()), ) assert zarr.open(path, mode="r").fill_value == 10 assert len(recwarn) == 0, ( diff --git a/scallops/utils.py b/scallops/utils.py index df876d6..ac06ce9 100644 --- a/scallops/utils.py +++ b/scallops/utils.py @@ -250,8 +250,8 @@ def grid_search( write_image( image=image, group=ome_zarr_root.create_group(name), - scaler=None, axes=["t", "c", "z", "y", "x"], + scale_factors=[], storage_options=dict(dimension_separator="/"), ) ome_zarr_root.create_group("OME").attrs["series"] = image_keys diff --git a/scallops/visualize/napari.py b/scallops/visualize/napari.py index 7c9e52d..8101d44 100644 --- a/scallops/visualize/napari.py +++ b/scallops/visualize/napari.py @@ -143,7 +143,7 @@ def _open_name(item=None): for i in range(len(image_keys)): node_name = image_keys[i] - grp = zarr.open(url + fs.sep + "images" + fs.sep + node_name, "r") + grp = zarr.open(url + fs.sep + "images" + fs.sep + node_name, mode="r") channel_axis = None ch_types = [axis["type"] for axis in grp.attrs["multiscales"][0]["axes"]] @@ -193,7 +193,7 @@ def _open_name(item=None): for label_key in label_keys: label_suffix = label_key.split("-")[-1] - grp = zarr.open(url + fs.sep + "labels" + fs.sep + label_key, "r") + grp = zarr.open(url + fs.sep + "labels" + fs.sep + label_key, mode="r") label_data = grp["0"] params = dict(name=label_key, opacity=0.5, visible=label_suffix in visible) if label_suffix == "spots": # view spots as points diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index 413b8c8..7647b26 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -23,10 +23,10 @@ from dask.array import from_zarr from dask.delayed import Delayed from dask.graph_manipulation import bind +from ome_zarr import USE_DASK_ARRAY_KWARGS from ome_zarr.axes import KNOWN_AXES -from ome_zarr.format import FormatV04 +from ome_zarr.format import Format, FormatV04 from ome_zarr.io import parse_url -from ome_zarr.scale import Scaler from ome_zarr.types import JSONDict from ome_zarr.writer import write_image from xarray.core.coordinates import DataArrayCoordinates @@ -38,10 +38,25 @@ logger = logging.getLogger("scallops") -def _current_format(): +def _current_format() -> Format: return FormatV04() +_chunk_key_encoding = {"name": "v2", "separator": "/"} + + +def _da_to_zarr_kwargs(fmt: Format): + zarr_array_kwargs = dict() + if USE_DASK_ARRAY_KWARGS: + if fmt.zarr_format == 2: + zarr_array_kwargs["chunk_key_encoding"] = _chunk_key_encoding + elif fmt.zarr_format == 2: + zarr_array_kwargs["dimension_separator"] = "/" + if fmt.zarr_format == 2: + zarr_array_kwargs["zarr_format"] = 2 + return zarr_array_kwargs + + def _get_store_path(group: zarr.Group): if hasattr(group.store, "root"): return str(group.store.root) @@ -94,7 +109,7 @@ def is_ome_zarr_array(node: zarr.Group) -> bool: def _get_fs(group: zarr.Group): if hasattr(group.store, "fs"): return group.store.fs - return fsspec.url_to_fs(group.store.path)[0] + return fsspec.url_to_fs(_get_store_path(group))[0] def _get_sep(group: zarr.Group) -> str: @@ -136,6 +151,8 @@ def _create_omero_metadata( # Output: {'channels': [{'label': 'DAPI', 'color': '00FFFF'}, {'label': 'FITC', 'color': 'FFFF00'}, ...]} """ + if dims is None: + return None channel_names = coords["c"] if "c" in dims and "c" in coords else None if channel_names is not None: if isinstance(channel_names, xr.DataArray): @@ -208,7 +225,7 @@ def _fix_attrs(d: dict) -> None: def _attrs_axes_coordinates( image_attrs: dict, coords: DataArrayCoordinates, dims: tuple[Hashable, ...] -) -> tuple[dict, list[dict], list[dict] | None]: +) -> tuple[dict, list[dict] | None, list[dict] | None]: """Prepare attributes, axes, and coordinate transformations for Zarr storage. Processes the attributes, coordinates, and dimensions of a DataArray to generate @@ -220,7 +237,7 @@ def _attrs_axes_coordinates( :param dims: The dimensions of the DataArray. :return: A tuple containing: - Updated image attributes dictionary. - - List of axes dictionaries. + - List of axes dictionaries or None. - List of coordinate transformations dictionaries or None. """ @@ -248,14 +265,16 @@ def _attrs_axes_coordinates( scale = list((1.0,) * len(non_space_dims)) + list(physical_pixel_sizes) coordinate_transformations = [{"scale": scale, "type": "scale"}] - axes = [] space_index = 0 - for d in dims: - axis = {"name": d, "type": KNOWN_AXES.get(d)} - if physical_pixel_units is not None and axis["type"] == "space": - axis["unit"] = physical_pixel_units[space_index] - space_index = space_index + 1 - axes.append(axis) + axes = None + if dims is not None: + axes = [] + for d in dims: + axis = {"name": d, "type": KNOWN_AXES.get(d)} + if physical_pixel_units is not None and axis["type"] == "space": + axis["unit"] = physical_pixel_units[space_index] + space_index = space_index + 1 + axes.append(axis) _fix_attrs(image_attrs) image_attrs = _fix_json(image_attrs) @@ -266,7 +285,6 @@ def _write_zarr_image( name: str | None, root: zarr.Group | str | Path, image: da.Array | np.ndarray | xr.DataArray, - scaler: Scaler = None, metadata: None | dict[str, Any] = None, group: str | None = "images", zarr_format: Literal["ome_zarr", "zarr"] = "ome_zarr", @@ -279,8 +297,6 @@ def _write_zarr_image( :param image: Image to write. A downsampling of the image will be computed if the scaler argument is non-None. Image.attrs will be stored if image is an instance of xr.DataArray - :param scaler: Scaler implementation for downsampling the image argument. If None, - no downsampling is performed. :param group: Group name under root to write image to :param metadata: Additional metadata to store :param zarr_format: Either ome_zarr or zarr. Use zarr for storing non-ome zarr @@ -290,8 +306,6 @@ def _write_zarr_image( :return: Empty list if the compute flag is True, otherwise it returns a list of :class:`dask.delayed.Delayed` representing the value to be computed by dask. """ - if zarr_format == "zarr" and scaler is not None: - raise NotImplementedError("Scaler not implemented for zarr format") if isinstance(root, (str, Path)): root = open_ome_zarr(root, mode="a") @@ -315,7 +329,6 @@ def _write_zarr_image( image_attrs=image_attrs, coords=coords, dims=dims, - scaler=scaler, metadata=metadata, zarr_format=zarr_format, compute=compute, @@ -328,7 +341,6 @@ def write_zarr( image_attrs: dict | None, coords: dict | None, dims: list[str] | tuple[Hashable, ...] | None, - scaler: Scaler = None, metadata: dict[str, Any] | None = None, zarr_format: Literal["ome_zarr", "zarr"] = "ome_zarr", compute: bool = True, @@ -346,8 +358,6 @@ def write_zarr( metadata. :param coords: Coordinates of the DataArray. :param dims: Dimensions of the DataArray. - :param scaler: Scaler implementation for downsampling the data. If None, - no downsampling is performed. :param metadata: Additional metadata to store. :param zarr_format: Format to use for storing the data. Use "zarr" for non-OME Zarr compliant data with dimensions other than (t, c, z, y, x). Default is @@ -358,9 +368,6 @@ def write_zarr( :return: Empty list if the compute flag is True, otherwise a list of dask.delayed.Delayed objects. - :raises NotImplementedError: - If scaler is provided and zarr_format is "zarr". - :example: .. code-block:: python @@ -388,7 +395,6 @@ def write_zarr( data = data.data if isinstance(data, da.Array): data = rechunk(data) - if image_attrs is None: image_attrs = {} @@ -397,6 +403,7 @@ def write_zarr( image_attrs, axes, coordinate_transformations = _attrs_axes_coordinates( image_attrs, coords, dims ) + dask_delayed = [] fmt = _current_format() if zarr_format == "zarr": # No axis validation @@ -406,12 +413,14 @@ def write_zarr( url=grp.store, component=str(Path(grp.path, "0")), compute=compute, - dimension_separator=grp._store._dimension_separator, + **_da_to_zarr_kwargs(fmt), ) if not compute: dask_delayed.append(d) elif not isinstance(data, zarr.Array): - grp.create_dataset("0", data=data, overwrite=True) + grp.create_array( + "0", data=data, overwrite=True, chunk_key_encoding=_chunk_key_encoding + ) datasets = [{"path": "0"}] if coordinate_transformations is not None: @@ -424,12 +433,12 @@ def write_zarr( if fmt.version in fmt.version in ("0.5"): omero = zarr_attrs["ome"].get("omero", {}) - omero.update(image_attrs.pop("omero")) + omero.update(image_attrs.pop("omero", {})) zarr_attrs["ome"]["omero"] = omero zarr_attrs = {"ome": zarr_attrs} else: omero = zarr_attrs.get("omero", {}) - omero.update(image_attrs.pop("omero")) + omero.update(image_attrs.pop("omero", {})) zarr_attrs["omero"] = omero multiscales[0]["metadata"] = image_attrs @@ -450,7 +459,7 @@ def _write_metadata_delayed(grp, d): fmt=fmt, image=data, group=grp, - scaler=scaler, + scale_factors=[], axes=axes, compute=compute, metadata=image_attrs, @@ -504,7 +513,6 @@ def _write_zarr_labels( labels: np.ndarray | xr.DataArray | da.Array, metadata: dict[str, Any] = None, group_metadata: dict[str, Any] = None, - scaler: Scaler = None, compute: bool = True, storage_options: JSONDict | None = None, ) -> list[Delayed]: @@ -516,8 +524,6 @@ def _write_zarr_labels( the scaler argument is non-None. :param metadata: Optional label metadata. :param group_metadata: Optional group level metadata. - :param scaler: Scaler implementation for downsampling the label argument. - If None, no downsampling will be performed. :param compute: If true compute immediately otherwise a list of :class:`dask.delayed.Delayed` is returned. :param storage_options: Optional storage options. @@ -555,7 +561,7 @@ def _write_zarr_labels( return write_image( labels, grp, - scaler=scaler, + scale_factors=[], axes=label_axes, metadata=metadata, compute=compute, @@ -706,10 +712,11 @@ def open_ome_zarr(url: Path | str, mode: str = "a") -> zarr.Group | None: """ try: - loc = parse_url(url, mode=mode) + fmt = _current_format() + loc = parse_url(url, mode=mode, fmt=fmt) if loc is None: return None - return zarr.open(loc.store, mode=mode) + return zarr.open(loc.store, mode=mode, zarr_format=fmt.zarr_format) except Exception as e: logger.error(f"Failed to open OME-Zarr store: {url}") raise e @@ -798,7 +805,7 @@ class _LazyLoadZarrData(_LazyLoadData): root = open_ome_zarr(store=store) group = root.create_group("test_group") - group.create_dataset("0", data=[1, 2, 3, 4, 5]) + group.create_array("0", data=[1, 2, 3, 4, 5]) # Create a _LazyLoadZarrData instance lazy_data = _LazyLoadZarrData(group, dask=True) From 170ee5afafc00806ed9cb3609e3b88ec9c52d7aa Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 08:20:59 -0400 Subject: [PATCH 03/20] Updated dependencies --- pyproject.toml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8a66f03..071daf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,19 +33,20 @@ classifiers = [# https://pypi.python.org/pypi?%3Aaction=list_classifiers dependencies = [ "adjustText", "anndata>=0.12.4", # https://github.com/scverse/anndata/issues/2166 - "bioio", "bioio-nd2", "bioio-ome-tiff", "bioio-ome-zarr", "bioio-tifffile", - "centrosome", - "cp-measure>=0.1.16", + "bioio", + "centrosome>=0.1.16", + "cp-measure", + "cython", "dask-image", "dask", "decorator", "filelock", "flox", - "fsspec!=2023.9.0", # 2023.9.0 causes ome-zarr write image to fail + "fsspec", "igraph", "itk-elastix", "itk", @@ -72,11 +73,11 @@ dependencies = [ "tensorflow", "tifffile", "xarray", - "zarr" + "zarr", ] [project.optional-dependencies] -# Added pysam as an optional extra for Linux/macOS users + dialout = [ "pysam" ] From 2b7a01a9286a6ccc7337995632caf7163489a087 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 08:58:03 -0400 Subject: [PATCH 04/20] Fixed tests --- scallops/tests/test_features.py | 6 ++---- scallops/tests/test_illumination_correction.py | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scallops/tests/test_features.py b/scallops/tests/test_features.py index a864877..5e310c3 100644 --- a/scallops/tests/test_features.py +++ b/scallops/tests/test_features.py @@ -62,15 +62,13 @@ def test_to_label_crops(tmp_path, array_A1_102_cells, array_A1_102_alnpheno): group = zarr.group() intensity_image_zarr = group.create_array( name="image", - shape=intensity_image.shape, + data=intensity_image.compute(), chunk_key_encoding=_chunk_key_encoding, ) - intensity_image_zarr[:] = intensity_image.compute() label_image_zarr = group.create_array( - name="label", shape=label_image.shape, chunk_key_encoding=_chunk_key_encoding + name="label", data=label_image.compute(), chunk_key_encoding=_chunk_key_encoding ) - label_image_zarr[:] = label_image.compute() to_label_crops( intensity_image=intensity_image_zarr, diff --git a/scallops/tests/test_illumination_correction.py b/scallops/tests/test_illumination_correction.py index d997bc4..9328e69 100644 --- a/scallops/tests/test_illumination_correction.py +++ b/scallops/tests/test_illumination_correction.py @@ -4,6 +4,7 @@ import numpy as np import pytest import zarr +from zarr.storage import ZipStore from scallops.io import read_image @@ -28,7 +29,7 @@ def test_illumination_correction_cli(tmp_path): ] subprocess.check_call(args) - store = zarr.ZipStore("scallops/tests/data/ops-illum-corr.zip", mode="r") + store = ZipStore("scallops/tests/data/ops-illum-corr.zip", mode="r") root = zarr.group(store=store) np.testing.assert_equal( root["data"][...], From 76217918d94d669bf60e9b54ad640721d8af7001 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 09:03:33 -0400 Subject: [PATCH 05/20] Updated dependencies --- pyproject.toml | 4 ++-- requirements.txt | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 071daf2..5118fef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "bioio-ome-tiff", "bioio-ome-zarr", "bioio-tifffile", - "bioio", + "bioio>=3", "centrosome>=0.1.16", "cp-measure", "cython", @@ -73,7 +73,7 @@ dependencies = [ "tensorflow", "tifffile", "xarray", - "zarr", + "zarr>=3", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index d0f18b4..c48ea3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ adjustText==1.3.0 -anndata==0.12.12 +anndata==0.12.13 bioio-nd2==1.7.0 bioio-ome-tiff==1.4.0 bioio-ome-zarr==3.4.0 @@ -20,7 +20,7 @@ itk==5.4.6 joblib==1.5.3 kneed==0.8.6 mahotas==1.4.18 -matplotlib==3.10.8 +matplotlib==3.10.9 natsort==8.4.0 numcodecs==0.16.5 numpy==2.4.4 @@ -28,7 +28,7 @@ ome-zarr==0.16.0 pandas==2.3.3 pint==0.25.3 psutil==7.2.2 -pyarrow==23.0.1 +pyarrow==24.0.0 pydantic==2.13.4 scikit-image==0.26.0 scikit-learn==1.8.0 @@ -39,5 +39,5 @@ stardist==0.9.2 statsmodels==0.14.6 tensorflow==2.21.0 tifffile==2026.5.2 -xarray==2026.2.0 +xarray==2026.4.0 zarr==3.2.1 From 143e434a24a501d37a09bf3fe5e34aa2f23e4a9b Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 09:32:20 -0400 Subject: [PATCH 06/20] fixed test --- scallops/tests/test_illumination_correction.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scallops/tests/test_illumination_correction.py b/scallops/tests/test_illumination_correction.py index 9328e69..0532103 100644 --- a/scallops/tests/test_illumination_correction.py +++ b/scallops/tests/test_illumination_correction.py @@ -29,10 +29,10 @@ def test_illumination_correction_cli(tmp_path): ] subprocess.check_call(args) - store = ZipStore("scallops/tests/data/ops-illum-corr.zip", mode="r") - root = zarr.group(store=store) - np.testing.assert_equal( - root["data"][...], - read_image(os.path.join(tmp_path, "images", "A1")).values.squeeze(), - ) - # compare to known good result + with ZipStore("scallops/tests/data/ops-illum-corr.zip", read_only=True) as store: + root = zarr.open(store=store) + # compare to known good result + np.testing.assert_equal( + root["data"][...], + read_image(os.path.join(tmp_path, "images", "A1")).values.squeeze(), + ) From 92a4c70822239f20111fd8c4b97ceb281f3dea8e Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 11:58:59 -0400 Subject: [PATCH 07/20] updated dask from array no copy --- scallops/utils.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/scallops/utils.py b/scallops/utils.py index ac06ce9..dc40bef 100644 --- a/scallops/utils.py +++ b/scallops/utils.py @@ -27,6 +27,7 @@ import numpy as np import pandas as pd import skimage +import xarray as xr from dask import is_dask_collection from dask.array.core import ( getter, @@ -630,7 +631,7 @@ def _dask_from_array_no_copy( chunks="auto", name=None, lock=False, - asarray=False, + asarray=None, fancy=True, getitem=None, meta=None, @@ -643,6 +644,10 @@ def _dask_from_array_no_copy( "Array is already a dask array. Use 'asarray' or 'rechunk' instead." ) + if xr is not None and isinstance(x, xr.DataArray) and x.chunks is not None: + if isinstance(x.data, da.Array): + return x.data + elif is_dask_collection(x): warnings.warn( "Passing an object to dask.array.from_array which is already a " @@ -660,15 +665,27 @@ def _dask_from_array_no_copy( previous_chunks = getattr(x, "chunks", None) + # As of Zarr 3.x, arrays can have a shards attribute. If present, + # this defines the smallest array region that is safe to write, and + # thus this is a better starting point than the chunks attribute. + # We check for chunks AND shards to be somewhat specific to Zarr 3.x arrays + if ( + hasattr(x, "chunks") + and hasattr(x, "shards") + and (x.shards is not None) + and chunks == "auto" + ): + previous_chunks = x.shards + chunks = normalize_chunks( chunks, x.shape, dtype=x.dtype, previous_chunks=previous_chunks ) if name in (None, True): token = tokenize(x, chunks, lock, asarray, fancy, getitem, inline_array) - name = name or "array-" + token + name = name or f"array-{token}" elif name is False: - name = "array-" + str(uuid.uuid1()) + name = f"array-{uuid.uuid1()}" if lock is True: lock = SerializableLock() @@ -764,7 +781,7 @@ def _list_images_wdl( save_group_size: bool = False, expected_cycles_str: int | None = None, ): - """Used by WDL workflow to output info about images""" + """Used by WDL test to output info about images""" from scallops.io import _set_up_experiment batch_size = 0 From b1f2e8874d42611c30edd03c80972b64f16d8005 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 12:05:19 -0400 Subject: [PATCH 08/20] updated dask from array no copy --- scallops/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scallops/utils.py b/scallops/utils.py index dc40bef..865207c 100644 --- a/scallops/utils.py +++ b/scallops/utils.py @@ -657,6 +657,9 @@ def _dask_from_array_no_copy( if isinstance(x, (list, tuple, memoryview) + np.ScalarType): x = np.array(x) + if isinstance(x, np.ndarray): + x = x.view() + x.flags.writeable = False # if is_arraylike(x) and hasattr(x, "copy"): # x = x.copy() From 66c2192b9a4005d2b9c11829b3f5ce33c95dc00f Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 17:21:48 -0400 Subject: [PATCH 09/20] use dask arrays instead of zarr arrays --- scallops/cli/features.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index 5c7b53a..54b3641 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -24,6 +24,7 @@ import zarr from dask.delayed import Delayed from natsort import natsorted +from packaging.version import Version from zarr import Group from scallops.cli.util import ( @@ -197,19 +198,19 @@ def single_feature( output_fs, _ = fsspec.core.url_to_fs(output_dir) join_df = False features_output_suffix = "" if join_df else "-features" - zarr_inputs = True - - for f in file_list: - if not isinstance(f, (zarr.Group, zarr.Array)): - zarr_inputs = False - break + use_zarr_inputs = Version(zarr.__version__).major < 3 + if use_zarr_inputs: + for f in file_list: + if not isinstance(f, (zarr.Group, zarr.Array)): + use_zarr_inputs = False + break - if zarr_inputs and stacked_image_tuple is not None: + if use_zarr_inputs and stacked_image_tuple is not None: for f in stacked_file_list: if not isinstance(f, (zarr.Group, zarr.Array)): - zarr_inputs = False + use_zarr_inputs = False break - if not zarr_inputs: + if not use_zarr_inputs: image = _read_image(file_list, metadata) else: image = [] @@ -218,7 +219,7 @@ def single_feature( image.append(array) n_channels1 = None if stacked_image_tuple is not None: - if not zarr_inputs: + if not use_zarr_inputs: stacked_image = _read_image(stacked_file_list, stacked_metadata) n_channels1 = image.sizes["c"] # clear coords to avoid issues with xr.concat @@ -343,8 +344,8 @@ def single_feature( df = label_features( objects_df=objects_df, - label_image=zarr_labels if zarr_inputs else da.from_zarr(zarr_labels), - intensity_image=image if zarr_inputs else image.data, + label_image=zarr_labels if use_zarr_inputs else da.from_zarr(zarr_labels), + intensity_image=image if use_zarr_inputs else image.data, features=features, normalize=normalize, channel_names=channel_names, From 0dd47113f72e0fd1b0db3036834ed6cdc3a1c42b Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 17:36:35 -0400 Subject: [PATCH 10/20] Fixed errors running on omics --- scallops/cli/pooled_if_sbs.py | 42 ++++++++++++++--------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 3294f25..24d3109 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -179,8 +179,7 @@ def _peaks_to_bases( def spot_detection_pipeline( image_tuple: tuple[tuple[str, ...], list[str], dict], iss_channels: list[int], - file_separator: str, - root: zarr.Group | str, + output: str, max_filter_width: int, sigma_log: float | list[float], z_index: int | str, @@ -226,14 +225,16 @@ def spot_detection_pipeline( """ _, file_list, metadata = image_tuple image_key = metadata["id"] + output_fs = fsspec.url_to_fs(output)[0] + output_sep = output_fs.sep + output = output.rstrip(output_sep) + points_path = f"{output}{output_sep}points" + points_protocol = _get_fs_protocol(output_fs) + if points_protocol != "file": + points_path = f"{points_protocol}://{points_path}" + peaks_path = f"{points_path}{output_sep}{image_key}-peaks.parquet" + if not force: - points_path = ( - f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" - ) - points_protocol = _get_fs_protocol(_get_fs(root)) - if points_protocol != "file": - points_path = f"{points_protocol}://{points_path}" - peaks_path = f"{points_path}{_get_sep(root)}{image_key}-peaks.parquet" if is_parquet_file(peaks_path): logger.info(f"Skipping spot detection for {image_key}") return [] @@ -291,6 +292,7 @@ def spot_detection_pipeline( compute = True metadata = cli_metadata() if not no_version else dict() metadata["image_metadata"] = image_metadata + root = open_ome_zarr(output, mode="a") if "log" in save_keys: loged.attrs.update(metadata) dask_delayed.append( @@ -299,7 +301,6 @@ def spot_detection_pipeline( root=root, image=loged, output_format=output_image_format, - file_separator=file_separator, zarr_format="zarr", compute=compute, ) @@ -314,7 +315,6 @@ def spot_detection_pipeline( root=root, image=std_arr, output_format=output_image_format, - file_separator=file_separator, metadata=dict(parent=image_key), compute=compute, ) @@ -329,7 +329,6 @@ def spot_detection_pipeline( root=root, image=maxed, output_format=output_image_format, - file_separator=file_separator, zarr_format="zarr", compute=compute, ) @@ -337,16 +336,10 @@ def spot_detection_pipeline( else: del maxed if "peaks" in save_keys: - points_path = ( - f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" - ) - protocol = _get_fs_protocol(_get_fs(root)) - if protocol != "file": - points_path = f"{protocol}://{points_path}" - _get_fs(root).makedirs(points_path, exist_ok=True) - peaks_path = f"{points_path}{_get_sep(root)}{image_key}-peaks.parquet" - if _get_fs(root).exists(peaks_path): - _get_fs(root).rm(peaks_path, recursive=True) + output_fs.makedirs(points_path, exist_ok=True) + + if output_fs.exists(peaks_path): + output_fs.rm(peaks_path, recursive=True) dask_delayed.append( _to_parquet( @@ -808,7 +801,7 @@ def spot_detect_main(arguments: argparse.Namespace): chunks = (chunks, chunks) output = _add_suffix(output, ".zarr") - root = open_ome_zarr(output, mode="a") + exp_gen = _set_up_experiment(images, image_pattern, group_by, subset=subset) with ( _create_default_dask_config(), @@ -818,9 +811,8 @@ def spot_detect_main(arguments: argparse.Namespace): for img in exp_gen: delayed_results += spot_detection_pipeline( img, + output=output, iss_channels=channels, - file_separator=None, - root=root, z_index=z_index, output_image_format="zarr", max_filter_width=max_filter_width, From 92295ca76961d95a2c52494664eb966559db0aed Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 18:10:37 -0400 Subject: [PATCH 11/20] set default --- scallops/cli/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scallops/cli/util.py b/scallops/cli/util.py index 0d2da08..6782e78 100644 --- a/scallops/cli/util.py +++ b/scallops/cli/util.py @@ -199,7 +199,7 @@ def _write_image( root: zarr.Group | str, image: np.ndarray | xr.DataArray | da.Array, output_format: str, - file_separator: str, + file_separator: str = "/", metadata: dict | None = None, compute: bool = True, **kwargs, From 758fd921355e1e4519f01e65190b574412a39a92 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 7 May 2026 18:27:13 -0400 Subject: [PATCH 12/20] added return value to _write_image --- scallops/cli/util.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scallops/cli/util.py b/scallops/cli/util.py index 6782e78..876176f 100644 --- a/scallops/cli/util.py +++ b/scallops/cli/util.py @@ -26,6 +26,7 @@ import numpy as np import xarray as xr import zarr +from dask.delayed import Delayed from distributed import Client from scallops.io import save_ome_tiff @@ -203,19 +204,21 @@ def _write_image( metadata: dict | None = None, compute: bool = True, **kwargs, -) -> None: +) -> list[Delayed]: """Write image data to Zarr or TIFF format. :param name: Name of the image. :param root: Zarr root or directory path for saving the image. :param image: Image data to be saved. :param output_format: Format for saving the image ('zarr' or 'tiff'). - :param file_separator: Separator used in file paths. + :param file_separator: Separator used in file paths for tiff files. :param metadata: Optional metadata for the image. :param compute: Whether to compute the Dask array before saving. + :return: Empty list if the compute flag is True, otherwise it returns a list of + :class:`dask.delayed.Delayed` representing the value to be computed by dask. """ if output_format == "zarr": - _write_zarr_image( + return _write_zarr_image( name=name, root=root, image=image, @@ -226,14 +229,14 @@ def _write_image( elif output_format == "tiff": image_path = f"{root}{file_separator}{name}.tif" if isinstance(image, xr.DataArray): - save_ome_tiff( + return save_ome_tiff( data=image.data, uri=image_path, channel_names=image.coords.get("c"), dim_order="".join(image.dims).upper(), ) else: - save_ome_tiff(data=image, uri=image_path) + return save_ome_tiff(data=image, uri=image_path) else: raise ValueError(f"Unknown output format: {output_format}") From 74795af4ca6c09ab0b3760b386e7ed1cdbbe1e03 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Fri, 8 May 2026 13:13:33 -0400 Subject: [PATCH 13/20] hide annoying messages --- scallops/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scallops/__init__.py b/scallops/__init__.py index a044d2d..0550040 100644 --- a/scallops/__init__.py +++ b/scallops/__init__.py @@ -1 +1,18 @@ +import logging +import warnings + from .experiment.elements import Experiment # noqa: F401 + +logging.getLogger("asyncio").setLevel(logging.CRITICAL) +warnings.filterwarnings( + "ignore", + message="Unclosed client session", + category=ResourceWarning, + module="aiohttp", +) +warnings.filterwarnings( + "ignore", + message="Writing zarr v2 data will no longer be the default", + category=UserWarning, + module="anndata", +) From 6d8667dd631d2a0343f3616bb193381f326ce5fe Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 11 May 2026 12:38:34 -0400 Subject: [PATCH 14/20] threads per worker --- scallops/cli/features.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index 54b3641..1ead122 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -450,8 +450,16 @@ def run_pipeline_compute_features(arguments: argparse.Namespace) -> None: if features_plot is None: features_plot = [] if dask_server_url is None and arguments.dask_cluster is None: + threads_per_worker = 1 + if "sizeshape" in unique_features: + threads_per_worker = 4 + else: + for feature in unique_features: + if feature.startswith("correlationpearsonbox"): + threads_per_worker = 2 + break dask_cluster_parameters = _dask_workers_threads( - threads_per_worker=4 if "sizeshape" in unique_features else 1 + threads_per_worker=threads_per_worker ) objects_dir_sep = None From acb5399009653c75bc314111ac4cc7234639032a Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 11 May 2026 14:42:54 -0400 Subject: [PATCH 15/20] read zarr v5 test --- scallops/tests/test_io.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scallops/tests/test_io.py b/scallops/tests/test_io.py index cb57781..09c8801 100644 --- a/scallops/tests/test_io.py +++ b/scallops/tests/test_io.py @@ -188,6 +188,22 @@ def test_read_tif(use_dask): assert len(data.shape) == 5 +@pytest.mark.io +def test_read_ome_zarr_v5(tmp_path): + from ome_zarr.writer import write_image as ome_zarr_write_image + + path = tmp_path / "test.ome.zarr" + + size_xy = 128 + size_z = 10 + rng = np.random.default_rng(0) + data = rng.poisson(lam=10, size=(size_z, size_xy, size_xy)).astype(np.uint8) + ome_zarr_write_image(data, str(path), axes="zyx") + data_from_zarr = read_image(path) + assert data_from_zarr.dims == ("z", "y", "x") + np.testing.assert_array_equal(data_from_zarr.data, data) + + @pytest.mark.io def test_write_ome_zarr_image_dask(tmp_path): data = read_image( From ccd2cda4cd61b116beec773ba1b74f23ccaf66d3 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 13 May 2026 08:17:08 -0400 Subject: [PATCH 16/20] anndata==0.12.14 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c48ea3a..1133eb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ adjustText==1.3.0 -anndata==0.12.13 +anndata==0.12.14 bioio-nd2==1.7.0 bioio-ome-tiff==1.4.0 bioio-ome-zarr==3.4.0 From c868ee1eb2748e7de0914b1dd4184507e0a3d772 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 14 May 2026 10:36:14 -0400 Subject: [PATCH 17/20] fixed find objects --- scallops/cli/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index 1ead122..abedee6 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -260,7 +260,7 @@ def single_feature( label_prefix = _label_name_to_prefix[label_name] if objects_path is None: logger.info(f"Find {label_name} objects for {image_key}.") - objects_df = find_objects(zarr_labels) + objects_df = find_objects(da.from_zarr(zarr_labels)) objects_path = f"{output_dir}{output_sep}{label_name}{output_sep}{image_key}-objects.parquet" objects_df.index.name = "label" objects_df.columns = f"{label_prefix}_" + objects_df.columns From ca72832f66cdb5bdfc64aafc389df3c9b36f10f0 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 14 May 2026 13:19:45 -0400 Subject: [PATCH 18/20] convert to dask array --- scallops/cli/find_objects.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scallops/cli/find_objects.py b/scallops/cli/find_objects.py index ae13586..6370499 100644 --- a/scallops/cli/find_objects.py +++ b/scallops/cli/find_objects.py @@ -8,6 +8,7 @@ import argparse import json +import dask.array as da import fsspec import zarr from zarr import Group @@ -51,7 +52,7 @@ def _execute( return logger.info(f"Finding objects for {metadata['id']}.") array = file_list[0][list(file_list[0].keys())[0]] - df = find_objects(array) + df = find_objects(da.from_zarr(array)) df.index.name = "label" df.columns = f"{_label_name_to_prefix[label_name]}_" + df.columns _to_parquet( From cf04d0250714d5c0b97ea00cb7d7d509346e03fe Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 18 May 2026 11:26:16 -0400 Subject: [PATCH 19/20] merge with main --- scallops/zarr_io.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index 31a5c8e..21d6cd1 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -592,12 +592,15 @@ def _read_zarr_attrs(attrs) -> tuple[dict, dict, list[str]]: if "ome" in attrs: attrs = attrs["ome"] + multiscales = attrs["multiscales"] if len(multiscales) > 0: multiscale0 = multiscales[0] else: - return None, None, None - + return None, attrs, None + if "axes" not in multiscale0: + # not an ome-zarr + return None, attrs, None axes = multiscale0["axes"] dims = [axis["name"] for axis in axes] metadata = multiscale0.get("metadata") From aa57c473282c1cee83a058d12966cb2b0fdd073e Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 21 May 2026 12:40:34 -0400 Subject: [PATCH 20/20] anndata==0.12.16 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1133eb4..442b60a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ adjustText==1.3.0 -anndata==0.12.14 +anndata==0.12.16 bioio-nd2==1.7.0 bioio-ome-tiff==1.4.0 bioio-ome-zarr==3.4.0