diff --git a/pyproject.toml b/pyproject.toml index 07ea8b8..9ecb7c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,8 +24,8 @@ dependencies = [ "jinja2>=3.1.6", "numpy>=1.26", "pandas>=3.0.2", - "pyarrow>=16", - "scikit-image>=0.24", + "scikit-image>=0.26", + "scipy>=1.17.1", ] scripts.ZedProfiler = "ZedProfiler.cli:trigger" diff --git a/src/zedprofiler/featurization/intensity.py b/src/zedprofiler/featurization/intensity.py index dd65a0e..52e2b7a 100644 --- a/src/zedprofiler/featurization/intensity.py +++ b/src/zedprofiler/featurization/intensity.py @@ -1,10 +1,195 @@ -"""Intensity featurization module scaffold.""" +"""Intensity feature extraction utilities for 3D image objects. -from __future__ import annotations +Provides functions to compute intensity statistics (mean, median, min, max, +standard deviation, quartiles), edge-based measurements, center-of-mass +coordinates, and mass displacement for segmented 3D objects. +""" -from zedprofiler.exceptions import ZedProfilerError +import numpy +import scipy.ndimage +import skimage.segmentation +from zedprofiler.IO.loading_classes import ObjectLoader -def compute() -> dict[str, list[float]]: - """Placeholder for intensity computation implementation.""" - raise ZedProfilerError("intensity.compute is not implemented yet") + +def get_outline(mask: numpy.ndarray) -> numpy.ndarray: + """ + Get the outline of a 3D mask. + + Parameters + ---------- + mask : numpy.ndarray + The input mask. + + Returns + ------- + numpy.ndarray + The outline of the mask. + """ + outline = numpy.zeros_like(mask) + for z in range(mask.shape[0]): + outline[z] = skimage.segmentation.find_boundaries(mask[z]) + return outline + + +def compute_intensity( # noqa: PLR0915 + object_loader: ObjectLoader, +) -> dict: + """ + Measure the intensity of objects in a 3D image. + + Parameters + ---------- + object_loader : ObjectLoader + The object loader containing the image and label image. + + Returns + ------- + dict + A dictionary containing the measurements for each object. + The keys are the measurement names and the values are the corresponding values. + """ + image_object = object_loader.image + label_object = object_loader.label_image + labels = object_loader.object_ids + + output_dict = { + "object_id": [], + "feature_name": [], + "channel": [], + "compartment": [], + "value": [], + } + # loop through each object and calculate measurements + for index, label in enumerate(labels): + selected_label_object = label_object.copy() + selected_image_object = image_object.copy() + + selected_label_object[selected_label_object != label] = 0 + selected_label_object[selected_label_object > 0] = ( + 1 # binarize the label for volume calcs + ) + selected_image_object[selected_label_object != 1] = 0 + non_zero_pixels_object = selected_image_object[selected_image_object > 0] + if non_zero_pixels_object.size == 0: + non_zero_pixels_object = numpy.array([0], dtype=numpy.float32) + mask_outlines = get_outline(selected_label_object) + + # Extract only coordinates where object exists + z_indices, y_indices, x_indices = numpy.where(selected_label_object > 0) + min_z, max_z = numpy.min(z_indices), numpy.max(z_indices) + min_y, max_y = numpy.min(y_indices), numpy.max(y_indices) + min_x, max_x = numpy.min(x_indices), numpy.max(x_indices) + + # Crop to bounding box for efficiency + cropped_label = selected_label_object[ + min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1 + ] + cropped_image = selected_image_object[ + min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1 + ] + + # Create coordinate grids for the bounding box + mesh_z, mesh_y, mesh_x = numpy.mgrid[ + min_z : max_z + 1, # + 1 to include the max index + min_y : max_y + 1, + min_x : max_x + 1, + ] + + # calculate the integrated intensity + integrated_intensity = scipy.ndimage.sum( + selected_image_object, + selected_label_object, + ) + # calculate the volume + volume = numpy.sum(selected_label_object) + + # Skip if volume is zero to avoid division by zero + if volume == 0 or integrated_intensity == 0: + continue + + # calculate the mean intensity + mean_intensity = integrated_intensity / volume + # calculate the standard deviation + std_intensity = numpy.std(non_zero_pixels_object) + # min intensity + min_intensity = numpy.min(non_zero_pixels_object) + # max intensity + max_intensity = numpy.max(non_zero_pixels_object) + # lower quartile + lower_quartile_intensity = numpy.percentile(non_zero_pixels_object, 25) + # upper quartile + upper_quartile_intensity = numpy.percentile(non_zero_pixels_object, 75) + # median intensity + median_intensity = numpy.median(non_zero_pixels_object) + # max intensity location + max_z, max_y, max_x = scipy.ndimage.maximum_position( + selected_image_object, + ) # z, y, x + + # Calculate center of mass (geometric center) using cropped arrays + object_mask = cropped_label > 0 + cm_x = numpy.mean(mesh_x[object_mask]) + cm_y = numpy.mean(mesh_y[object_mask]) + cm_z = numpy.mean(mesh_z[object_mask]) + + # Calculate intensity-weighted center of mass using cropped arrays + intensity_x_coord = cropped_image * mesh_x + intensity_y_coord = cropped_image * mesh_y + intensity_z_coord = cropped_image * mesh_z + i_x = numpy.sum(intensity_x_coord[object_mask]) + i_y = numpy.sum(intensity_y_coord[object_mask]) + i_z = numpy.sum(intensity_z_coord[object_mask]) + # calculate the center of mass + cmi_x = i_x / integrated_intensity + cmi_y = i_y / integrated_intensity + cmi_z = i_z / integrated_intensity + # calculate the center of mass distance + diff_x = cm_x - cmi_x + diff_y = cm_y - cmi_y + diff_z = cm_z - cmi_z + # mass displacement + mass_displacement = numpy.sqrt(diff_x**2 + diff_y**2 + diff_z**2) + # mean absolute deviation + mad_intensity = numpy.mean(numpy.abs(non_zero_pixels_object - mean_intensity)) + edge_count = scipy.ndimage.sum(mask_outlines) + integrated_intensity_edge = numpy.sum(selected_image_object[mask_outlines > 0]) + mean_intensity_edge = integrated_intensity_edge / edge_count + std_intensity_edge = numpy.std(selected_image_object[mask_outlines > 0]) + min_intensity_edge = numpy.min(selected_image_object[mask_outlines > 0]) + max_intensity_edge = numpy.max(selected_image_object[mask_outlines > 0]) + measurements_dict = { + "IntegratedIntensity": integrated_intensity, + "MeanIntensity": mean_intensity, + "StdIntensity": std_intensity, + "MinIntensity": min_intensity, + "MaxIntensity": max_intensity, + "LowerQuartileIntensity": lower_quartile_intensity, + "UpperQuartileIntensity": upper_quartile_intensity, + "MedianIntensity": median_intensity, + "MassDisplacement": mass_displacement, + "MeanAbsoluteDeviationIntensity": mad_intensity, + "IntegratedIntensityEdge": integrated_intensity_edge, + "MeanIntensityEdge": mean_intensity_edge, + "StdIntensityEdge": std_intensity_edge, + "MinIntensityEdge": min_intensity_edge, + "MaxIntensityEdge": max_intensity_edge, + "MaxZ": max_z, + "MaxY": max_y, + "MaxX": max_x, + "CMI.X": cmi_x, + "CMI.Y": cmi_y, + "CMI.Z": cmi_z, + } + + for feature_name, measurement_value in measurements_dict.items(): + if measurement_value.dtype != numpy.float32: + coerced_value = numpy.float32(measurement_value) + else: + coerced_value = measurement_value + output_dict["object_id"].append(numpy.int32(label)) + output_dict["feature_name"].append(feature_name) + output_dict["channel"].append(object_loader.channel) + output_dict["compartment"].append(object_loader.compartment) + output_dict["value"].append(coerced_value) + return output_dict diff --git a/tests/featurization/test_intensity.py b/tests/featurization/test_intensity.py new file mode 100644 index 0000000..0391e02 --- /dev/null +++ b/tests/featurization/test_intensity.py @@ -0,0 +1,143 @@ +from types import SimpleNamespace + +import numpy as np + +from zedprofiler.featurization.intensity import compute_intensity, get_outline + +EXPECTED_MEASUREMENT_COUNT = 42 +EXPECTED_OBJECT_ONE_PEAK_COORD = 0.0 +EXPECTED_OBJECT_TWO_PEAK_COORD = 3.0 + + +def test_get_outline_marks_boundaries_per_slice() -> None: + mask = np.zeros((2, 4, 4), dtype=bool) + mask[0, 1:3, 1:3] = True + mask[1, 0:2, 0:2] = True + + outline = get_outline(mask) + + assert outline.shape == mask.shape + assert outline.dtype == bool + assert outline[0].any() + assert outline[1].any() + + +def test_compute_intensity_returns_measurements_for_objects() -> None: + image = np.zeros((3, 3, 3), dtype=float) + image[0, 0, 0] = 1.0 + image[0, 0, 1] = 2.0 + image[0, 1, 0] = 3.0 + image[0, 1, 1] = 4.0 + image[1, 1, 1] = 5.0 + image[1, 1, 2] = 6.0 + + labels = np.zeros((3, 3, 3), dtype=int) + labels[0, 0:2, 0:2] = 1 + labels[1, 1, 1:3] = 2 + + loader = SimpleNamespace( + image=image, + label_image=labels, + object_ids=[1, 2], + channel="channel_a", + compartment="nuclei", + ) + + result = compute_intensity(loader) + + expected_features = { + "IntegratedIntensity", + "MeanIntensity", + "StdIntensity", + "MinIntensity", + "MaxIntensity", + "LowerQuartileIntensity", + "UpperQuartileIntensity", + "MedianIntensity", + "MassDisplacement", + "MeanAbsoluteDeviationIntensity", + "IntegratedIntensityEdge", + "MeanIntensityEdge", + "StdIntensityEdge", + "MinIntensityEdge", + "MaxIntensityEdge", + "MaxZ", + "MaxY", + "MaxX", + "CMI.X", + "CMI.Y", + "CMI.Z", + } + + assert set(result) == { + "object_id", + "feature_name", + "channel", + "compartment", + "value", + } + assert len(result["object_id"]) == EXPECTED_MEASUREMENT_COUNT + assert set(result["feature_name"]) == expected_features + assert result["channel"] == ["channel_a"] * EXPECTED_MEASUREMENT_COUNT + assert result["compartment"] == ["nuclei"] * EXPECTED_MEASUREMENT_COUNT + assert len(result["value"]) == EXPECTED_MEASUREMENT_COUNT + + +def test_compute_intensity_skips_empty_object_without_signal() -> None: + image = np.zeros((2, 2, 2), dtype=float) + labels = np.zeros((2, 2, 2), dtype=int) + labels[0, 0, 0] = 1 + + loader = SimpleNamespace( + image=image, + label_image=labels, + object_ids=[1], + channel="channel_b", + compartment="cell", + ) + + result = compute_intensity(loader) + + assert result == { + "object_id": [], + "feature_name": [], + "channel": [], + "compartment": [], + "value": [], + } + + +def test_compute_intensity_peak_location_is_within_object() -> None: + image = np.zeros((4, 4, 4), dtype=float) + image[0, 0, 0] = 10.0 + image[3, 3, 3] = 100.0 + + labels = np.zeros((4, 4, 4), dtype=int) + labels[0, 0, 0] = 1 + labels[3, 3, 3] = 2 + + loader = SimpleNamespace( + image=image, + label_image=labels, + object_ids=[1, 2], + channel="channel_c", + compartment="nuclei", + ) + + result = compute_intensity(loader) + + def _value_for(object_id: int, feature_name: str) -> float: + for idx, current_object_id in enumerate(result["object_id"]): + if ( + int(current_object_id) == object_id + and result["feature_name"][idx] == feature_name + ): + return float(result["value"][idx]) + raise AssertionError(f"Missing {feature_name} for object {object_id}") + + assert _value_for(1, "MaxZ") == EXPECTED_OBJECT_ONE_PEAK_COORD + assert _value_for(1, "MaxY") == EXPECTED_OBJECT_ONE_PEAK_COORD + assert _value_for(1, "MaxX") == EXPECTED_OBJECT_ONE_PEAK_COORD + assert _value_for(2, "MaxZ") == EXPECTED_OBJECT_TWO_PEAK_COORD + assert _value_for(2, "MaxY") == EXPECTED_OBJECT_TWO_PEAK_COORD + assert _value_for(2, "MaxX") == EXPECTED_OBJECT_TWO_PEAK_COORD diff --git a/uv.lock b/uv.lock index 86807ad..46e0a6a 100644 --- a/uv.lock +++ b/uv.lock @@ -800,6 +800,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646 }, ] +[[package]] +name = "imageio" +version = "2.37.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/84/93bcd1300216ea50811cee96873b84a1bebf8d0489ffaf7f2a3756bab866/imageio-2.37.3.tar.gz", hash = "sha256:bbb37efbfc4c400fcd534b367b91fcd66d5da639aaa138034431a1c5e0a41451", size = 389673 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646 }, +] + [[package]] name = "imagesize" version = "2.0.0" @@ -1191,15 +1205,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/5a/736dd2f4535dbf3bf26523f9158c011389ef88dd06ec2eef67fd744f1c7b/jupytext-1.19.1-py3-none-any.whl", hash = "sha256:d8975035155d034bdfde5c0c37891425314b7ea8d3a6c4b5d18c294348714cd9", size = 170478 }, ] -[[package]] -name = "lark" -version = "1.3.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151 }, -] - [[package]] name = "lazy-loader" version = "0.5" @@ -2339,7 +2344,8 @@ dependencies = [ { name = "imageio" }, { name = "lazy-loader" }, { name = "networkx" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "packaging" }, { name = "pillow" }, { name = "scipy" }, @@ -2386,7 +2392,8 @@ name = "scipy" version = "1.17.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822 } wheels = [ @@ -2667,7 +2674,8 @@ name = "tifffile" version = "2026.4.11" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d7/4a/e687f5957fead200faad58dbf9c9431a2bbb118040e96f5fb8a55f7ebc50/tifffile-2026.4.11.tar.gz", hash = "sha256:17758ff0c0d4db385792a083ad3ca51fcb0f4d942642f4d8f8bc1287fdcf17bc", size = 394956 } wheels = [ @@ -2903,8 +2911,8 @@ dependencies = [ { name = "jinja2" }, { name = "numpy" }, { name = "pandas" }, - { name = "pyarrow" }, { name = "scikit-image" }, + { name = "scipy" }, ] [package.dev-dependencies] @@ -2936,8 +2944,8 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "numpy", specifier = ">=1.26" }, { name = "pandas", specifier = ">=3.0.2" }, - { name = "pyarrow", specifier = ">=16.0" }, - { name = "scikit-image", specifier = ">=0.24" }, + { name = "scikit-image", specifier = ">=0.26.0" }, + { name = "scipy", specifier = ">=1.17.1" }, ] [package.metadata.requires-dev]