diff --git a/python/seqpro/rag/_core.py b/python/seqpro/rag/_core.py index 10cb40e..5bdb24b 100644 --- a/python/seqpro/rag/_core.py +++ b/python/seqpro/rag/_core.py @@ -1786,6 +1786,7 @@ def to_numpy( raise ValueError("cannot convert a jagged Ragged to a dense array") packed = self if self.is_base else self.to_packed() row_len = int(lengths.flat[0]) if lengths.size else 0 + n_rows = lengths.size else: # trust the caller: infer row_len from total // n_rows, no uniformity # scan. numpy's reshape still rejects a total-size mismatch for free. @@ -1799,8 +1800,11 @@ def to_numpy( total = packed._rl.data.shape[0] row_len = total // n_rows if n_rows else 0 leading = packed.shape[: packed.rag_dim] + # When fully indexed to a single ragged axis, `leading` is empty; use the + # explicit row count instead of -1 so an empty row (row_len == 0) reshapes + # cleanly (numpy cannot infer -1 against a 0 dimension). See #67. return packed._rl.data.reshape( # pyrefly: ignore[no-matching-overload] - *(leading or (-1,)), row_len, *packed._rl.data.shape[1:] + *(leading or (n_rows,)), row_len, *packed._rl.data.shape[1:] ) def __array__(self, dtype: Any = None) -> NDArray[Any]: diff --git a/tests/test_ragged_core.py b/tests/test_ragged_core.py index 3565a68..fa84088 100644 --- a/tests/test_ragged_core.py +++ b/tests/test_ragged_core.py @@ -330,6 +330,19 @@ def test_to_numpy_jagged_raises(): rag.to_numpy() +@pytest.mark.parametrize("validate", [True, False]) +def test_to_numpy_indexed_empty_row(validate): + # Regression for #67: an empty row (length 0) obtained by indexing a + # multi-dimensional Ragged (is_base=False) must convert to an empty dense + # array instead of raising on the final reshape. + data = np.array([5, 7], np.int32) + rag = Ragged.from_lengths(data, np.array([[[2], [0]]])) + empty = rag[0, 1, 0] + assert not empty.is_base + out = empty.to_numpy(validate=validate) + assert out.shape == (1, 0) + + def test_ingest_from_ak_numeric(): arr = ak.Array([[1, 2, 3], [4, 5]]) rag = Ragged(arr)