Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 1 addition & 69 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,80 +33,12 @@ Finally, a :doc:`cli` allows comparing two files in any of the formats above, or
directory trees full of files, as long as they can be loaded with
:func:`xarray.open_dataset`.

Examples
--------

.. code::

from recursive_diff import recursive_diff

lhs = {
'foo': [1, 2, ('one', 5.2), 4],
'only_lhs': 1
}
rhs = {
'foo': [1, 2, ['two', 5.200001, 3]],
'only_rhs': 1
}

for diff in recursive_diff(lhs, rhs, abs_tol=.1):
print(diff)

Output::

Pair only_lhs:1 is in LHS only
Pair only_rhs:1 is in RHS only
[foo]: LHS has 1 more elements than RHS: [4]
[foo][2]: object type differs: tuple != list
[foo][2]: RHS has 1 more elements than LHS: [3]
[foo][2][0]: one != two


Or as a unit test:

.. code::

from recursive_diff import recursive_eq

def test1():
recursive_eq(lhs, rhs, abs_tol=.1)

py.test output::

==================== FAILURES ===================
E AssertionError: 6 differences found

-------------- Captured stdout call --------------

Pair only_lhs:1 is in LHS only
Pair only_rhs:1 is in RHS only
[foo]: LHS has 1 more elements than RHS: [4]
[foo][2]: object type differs: tuple != list
[foo][2]: RHS has 1 more elements than LHS: [3]
[foo][2][0]: one != two


Compare two nested directory trees that contain ``.json``, ``.jsonl``, ``.yaml``,
``.msgpack``, ``.nc``, or ``.zarr`` files:

.. code::

from recursive_diff import recursive_open, recursive_eq

lhs = recursive_open("baseline")
rhs = recursive_open("new_output")
recursive_eq(lhs, rhs)


Same as above, but from the command line::

$ recursive-diff -r baseline new_output

Index
-----

.. toctree::

quickstart
installing
api
extend
Expand Down
68 changes: 68 additions & 0 deletions doc/quickstart.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
Quick Start
===========

.. code::

from recursive_diff import recursive_diff

lhs = {
'foo': [1, 2, ('one', 5.2), 4],
'only_lhs': 1
}
rhs = {
'foo': [1, 2, ['two', 5.200001, 3]],
'only_rhs': 1
}

for diff in recursive_diff(lhs, rhs, abs_tol=.1):
print(diff)

Output::

Pair only_lhs:1 is in LHS only
Pair only_rhs:1 is in RHS only
[foo]: LHS has 1 more elements than RHS: [4]
[foo][2]: object type differs: tuple != list
[foo][2]: RHS has 1 more elements than LHS: [3]
[foo][2][0]: one != two


Or as a unit test:

.. code::

from recursive_diff import recursive_eq

def test1():
recursive_eq(lhs, rhs, abs_tol=.1)

py.test output::

==================== FAILURES ===================
E AssertionError: 6 differences found

-------------- Captured stdout call --------------

Pair only_lhs:1 is in LHS only
Pair only_rhs:1 is in RHS only
[foo]: LHS has 1 more elements than RHS: [4]
[foo][2]: object type differs: tuple != list
[foo][2]: RHS has 1 more elements than LHS: [3]
[foo][2][0]: one != two


Compare two nested directory trees that contain ``.json``, ``.jsonl``, ``.yaml``,
``.msgpack``, ``.nc``, or ``.zarr`` files:

.. code::

from recursive_diff import recursive_open, recursive_eq

lhs = recursive_open("baseline")
rhs = recursive_open("new_output")
recursive_eq(lhs, rhs)


Same as above, but from the command line::

$ recursive-diff -r baseline new_output
9 changes: 6 additions & 3 deletions recursive_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from recursive_diff.cast import cast
from recursive_diff.files import open, recursive_open
from recursive_diff.public import diff_arrays, display_diffs
from recursive_diff.recursive_diff import recursive_diff
from recursive_diff.recursive_eq import recursive_eq
from recursive_diff.public import (
diff_arrays,
display_diffs,
recursive_diff,
recursive_eq,
)

try:
__version__ = importlib.metadata.version("recursive_diff")
Expand Down
6 changes: 2 additions & 4 deletions recursive_diff/cli/recursive_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@
logger,
recursive_open,
)
from recursive_diff.files import (
open as open_,
)
from recursive_diff.recursive_diff import recursive_diff
from recursive_diff.files import open as open_
from recursive_diff.public import recursive_diff

LOGFORMAT = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

Expand Down
155 changes: 145 additions & 10 deletions recursive_diff/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,147 @@

from __future__ import annotations

from typing import Any, Collection, Hashable, Literal
from collections.abc import Collection, Generator, Hashable
from typing import Any, Literal

import numpy as np
import pandas as pd

from recursive_diff.dask_compat import compute
from recursive_diff.recursive_diff import _recursive_diff
from recursive_diff.dask_compat import Array, Delayed, compute
from recursive_diff.recursive_diff import recursive_diff_impl


def recursive_diff(
lhs: Any,
rhs: Any,
*,
rel_tol: float = 1e-09,
abs_tol: float = 0.0,
brief_dims: Collection[Hashable] | Literal["all"] = (),
) -> Generator[str]:
"""Compare two objects and yield all differences.
The two objects must any of:

- basic types (int, float, complex, bool, str, bytes)
- basic collections (list, tuple, dict, set, frozenset)
- numpy scalar types
- :class:`numpy.ndarray`
- :class:`pandas.Series`
- :class:`pandas.DataFrame`
- :class:`pandas.Index`
- :class:`xarray.DataArray`
- :class:`xarray.Dataset`
- :class:`dask.delayed.Delayed`
- any recursive combination of the above
- any other object (compared with ==)

Special treatment is reserved to different types:

- floats and ints are compared with tolerance, using :func:`math.isclose`
- complex numbers are compared with tolerance, using :func:`math.isclose`
separately on the real and imaginary parts
- NaN equals to NaN
- floats without decimals compare as equal to ints
- complex numbers without imaginary part DO NOT compare as equal to floats,
as they have substantially different behaviour
- bools are only equal to other bools
- numpy arrays are compared elementwise and with tolerance,
also testing the dtype, using :func:`numpy.isclose(lhs, rhs) <numpy.isclose>`
for numeric arrays and equality for other dtypes.
- pandas and Xarray objects are compared elementwise, with tolerance, and
without order. Duplicate indices are not supported.
- Xarray dimensions and variables are compared without order
- collections (list, tuple, dict, set, frozenset) are recursively
descended into
- generic/unknown objects are compared with ==

Custom classes can be registered to benefit from the above behaviour;
see :func:`cast`.

:param lhs:
left-hand-side data structure
:param rhs:
right-hand-side data structure
:param float rel_tol:
relative tolerance when comparing numbers.
Applies to floats, integers, and all numpy-based data.
:param float abs_tol:
absolute tolerance when comparing numbers.
Applies to floats, integers, and all numpy-based data.
:param brief_dims:
One of:

- collection of strings representing Xarray dimensions. If one or more
differences are found along one of these dimensions, only one message
will be reported, stating the differences count.
- "all", to produce one line only for every Xarray variable that
differs

Omit to output a line for every single different cell.

Yields strings containing difference messages, prepended by the path to
the point that differs.
"""
# For as long as we don't encounter any Delayed or dask-backed xarray objects in lhs
# or rhs, yield diff messages directly from the recursive generator, without
# accumulating them. This allows to start printing differences as soon as they are
# found, without waiting for the whole recursion to finish. Once we encounter a
# Delayed or dask-backed xarray object, we start accumulating all eager messages and
# Delayed[list[str]] in a list and compute all the delayeds at once.
diffs: list[list[str] | Array | Delayed] = []
for diff in recursive_diff_impl(
lhs,
rhs,
rel_tol=rel_tol,
abs_tol=abs_tol,
brief_dims=brief_dims,
as_dataframes=False,
path=[],
seen_lhs={},
seen_rhs={},
):
if isinstance(diff, str):
if diffs:
diffs.append([diff])
else:
yield diff
else:
assert isinstance(diff, (Delayed, Array))
# Comparison of Delayed objects or Dask-backed arrays
diffs.append(diff)

(computed_diffs,) = compute(diffs)
for diff_batch in computed_diffs:
yield from diff_batch


def recursive_eq(
lhs: Any,
rhs: Any,
rel_tol: float = 1e-09,
abs_tol: float = 0.0,
*, # TODO move before rel_tol (breaking change)
brief_dims: Collection[Hashable] | Literal["all"] = (),
) -> None:
"""Wrapper around :func:`recursive_diff`.

Print out all differences to stdout and finally assert that there are none.
This is meant to be used inside pytest, where stdout is captured.
"""
diffs_iter = recursive_diff(
lhs, rhs, rel_tol=rel_tol, abs_tol=abs_tol, brief_dims=brief_dims
)
i = -1
for i, diff in enumerate(diffs_iter): # noqa: B007
print(diff)
i += 1
if i == 0:
return
if brief_dims:
msg = "Found differences; see stdout"
else:
msg = f"Found {i} differences; see stdout"
raise AssertionError(msg)


def diff_arrays(
Expand All @@ -19,16 +153,17 @@ def diff_arrays(
abs_tol: float = 0.0,
brief_dims: Collection[Hashable] | Literal["all"] = (),
) -> tuple[dict[str, pd.DataFrame], list[str]]:
"""Compare two objects with :func:`~recursive_diff.recursive_diff`.
"""Compare two objects with :func:`recursive_diff`.

Return tuple of:

- {path: dataframe of differences} for all array objects found.
- {path: dataframe of differences} for all NumPy, Pandas, and Xarray objects found.
Arrays with no differences won't be returned.
- List of all other differences found.
- List of all other differences found. This includes differences in metadata,
shape, dtype, and indices in NumPy, Pandas, and Xarray objects.
"""
diffs = list(
_recursive_diff(
recursive_diff_impl(
lhs,
rhs,
rel_tol=rel_tol,
Expand Down Expand Up @@ -74,10 +209,10 @@ def display_diffs(
abs_tol: float = 0.0,
brief_dims: Collection[Hashable] | Literal["all"] = (),
) -> None:
"""Compare two objects with :func:`~recursive_diff.recursive_diff`.
"""Compare two objects with :func:`recursive_diff`.

Display all differences in Jupyter notebook, with diffs in array objects
displayed as tables.
Display all differences in Jupyter notebook, with diffs in NumPy, Pandas, and Xarray
objects displayed as tables.
"""
from IPython.display import HTML, display

Expand Down
Loading