From 0e73573dd3bb8369c78598fcfcb6d93fdbf9e2a3 Mon Sep 17 00:00:00 2001 From: Matt Whiteway Date: Thu, 9 Apr 2026 16:29:54 -0400 Subject: [PATCH 1/4] integration test fixup --- tests/conftest.py | 99 ++++++++++++++++++- tests/scripts/README.md | 83 ++++++++++++++++ .../scripts/test_ibl_paw_multicam_example.py | 17 +++- tests/scripts/test_ibl_pupil_example.py | 12 ++- .../scripts/test_mirrored_multicam_example.py | 14 ++- tests/scripts/test_multicam_example.py | 26 +++-- tests/scripts/test_singlecam_example.py | 14 ++- tests/test_multicam_smoother.py | 3 - 8 files changed, 238 insertions(+), 30 deletions(-) create mode 100644 tests/scripts/README.md diff --git a/tests/conftest.py b/tests/conftest.py index a9a9691..ad7c2dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,19 +1,65 @@ +import io +import shutil import subprocess +import urllib.request +import zipfile +from pathlib import Path from typing import Callable +import pandas as pd import pytest +# URL of the zipped golden files. Update this after uploading a new release to GitHub. +GOLDEN_URL = 'https://github.com/paninski-lab/eks-test-fixtures/releases/download/v1/eks_golden.zip' + + +def pytest_addoption(parser): + parser.addoption( + '--generate-golden', + action='store_true', + default=False, + help='Generate golden output files instead of comparing against them.', + ) + parser.addoption( + '--golden-dir', + action='store', + default=None, + help='Directory to write golden files to (used with --generate-golden).', + ) + + +@pytest.fixture(scope='session') +def golden_dir(tmp_path_factory, pytestconfig): + """Return path to golden files directory, downloading and extracting if necessary.""" + if pytestconfig.getoption('--generate-golden'): + golden_dir_opt = pytestconfig.getoption('--golden-dir') + if golden_dir_opt is None: + raise ValueError('--golden-dir must be specified when using --generate-golden') + path = Path(golden_dir_opt) + path.mkdir(parents=True, exist_ok=True) + return path + + if GOLDEN_URL is None: + return None + + cache_dir = tmp_path_factory.mktemp('golden') + with urllib.request.urlopen(GOLDEN_URL) as response: + data = response.read() + with zipfile.ZipFile(io.BytesIO(data)) as zf: + zf.extractall(cache_dir) + return cache_dir + @pytest.fixture def run_script() -> Callable: - def _run_script(script_file, input_dir, output_dir, **kwargs): + def _run_script(script_file, input_dir, output_dir, **kwargs) -> Path: command_str = [ 'python', script_file, '--input-dir', input_dir, - '--save-dir', output_dir, + '--save-dir', str(output_dir), '--verbose', 'True', ] for key, arg in kwargs.items(): @@ -25,5 +71,54 @@ def _run_script(script_file, input_dir, output_dir, **kwargs): process = subprocess.run(command_str) assert process.returncode == 0 + return Path(str(output_dir)) return _run_script + + +@pytest.fixture +def compare_to_golden(golden_dir, pytestconfig): + """Fixture that either saves CSV outputs as golden files, or compares against them. + + In generate mode (--generate-golden), copies all CSVs from output_dir into + golden_dir//. In compare mode, downloads golden files from the URL + and asserts numerical equality against them. + """ + + def _compare(test_name: str, output_dir: Path): + csv_files = sorted(output_dir.glob('*.csv')) + assert len(csv_files) > 0, f'No CSV files found in {output_dir}' + + if pytestconfig.getoption('--generate-golden'): + dest = golden_dir / test_name + dest.mkdir(parents=True, exist_ok=True) + for csv_file in csv_files: + shutil.copy(csv_file, dest / csv_file.name) + return + + if golden_dir is None: + pytest.skip( + 'No golden URL provided (set --golden-url or EKS_GOLDEN_URL); skipping comparison.' + ) + + golden_test_dir = golden_dir / test_name + assert golden_test_dir.exists(), ( + f'Golden directory not found for test "{test_name}": {golden_test_dir}' + ) + + for csv_file in csv_files: + golden_csv = golden_test_dir / csv_file.name + assert golden_csv.exists(), ( + f'Golden file not found: {golden_csv}. ' + f'Run with --generate-golden to regenerate.' + ) + actual = pd.read_csv(csv_file, index_col=0) + expected = pd.read_csv(golden_csv, index_col=0) + pd.testing.assert_frame_equal( + actual, expected, + check_exact=False, + atol=1e-5, + obj=f'{test_name}/{csv_file.name}', + ) + + return _compare diff --git a/tests/scripts/README.md b/tests/scripts/README.md new file mode 100644 index 0000000..f6a3f22 --- /dev/null +++ b/tests/scripts/README.md @@ -0,0 +1,83 @@ +from tests.conftest import GOLDEN_URL + +# Integration Script Tests + +These tests run the example scripts end-to-end and optionally compare their CSV outputs +against a set of **golden files** — a reference snapshot of known-good outputs. + +## How tests work + +- **Without golden files**: tests only verify that the scripts exit without error (original behavior). +- **With golden files**: after each script runs, all CSV outputs are compared against the + corresponding golden CSVs using `pandas.testing.assert_frame_equal` with `atol=1e-5`. + +--- + +## Generating new golden files + +Run this whenever you want to establish a new baseline (e.g. after an intentional change +to the algorithm, or when setting up golden files for the first time). + +```bash +pytest tests/scripts/ \ + --generate-golden \ + --golden-dir /tmp/eks_golden +``` + +This runs every integration test and copies the CSV outputs into +`/tmp/eks_golden//`. The directory structure will look like: + +``` +/tmp/eks_golden/ + test_singlecam_example_defaults/ + eks_singlecam.csv + test_singlecam_example_fixed_smooth_param/ + eks_singlecam.csv + test_multicam_example_defaults/ + multicam_top_results.csv + multicam_bot_results.csv + test_multicam_example_defaults_nonlinear/ + multicam_Cam-A_results.csv + multicam_Cam-B_results.csv + multicam_Cam-C_results.csv + multicam_3d_results.csv + ... +``` + +### Zip and upload + +```bash +cd /tmp/eks_golden +zip -r eks_golden.zip . +``` + +Upload `eks_golden.zip` to your hosting location. The zip must have the test-name +folders at its root (no extra top-level wrapper directory) — the `cd` + `.` zip +command above ensures this. + +### Update the URL in conftest.py + +Once uploaded, copy the direct download URL of the zip asset from the GitHub release +and set it as `GOLDEN_URL` near the top of `tests/conftest.py`: + +```python +GOLDEN_URL = 'https://github.com/paninski-lab/eks-test-fixtures/releases/download/vX/eks_golden.zip' +``` + +Commit this change so CI and other contributors pick it up automatically. + +--- + +## Running tests with golden comparison + +```bash +pytest tests/scripts/ +``` + +The golden zip is downloaded once per test session and cached in a temporary directory. +Golden comparison is skipped automatically when `GOLDEN_URL = None` in `conftest.py`. + +### Without golden comparison + +Set `GOLDEN_URL = None` in `tests/conftest.py`. Tests will only verify that the scripts +exit without error (original behavior). diff --git a/tests/scripts/test_ibl_paw_multicam_example.py b/tests/scripts/test_ibl_paw_multicam_example.py index ee42163..e3322a6 100644 --- a/tests/scripts/test_ibl_paw_multicam_example.py +++ b/tests/scripts/test_ibl_paw_multicam_example.py @@ -1,18 +1,25 @@ -def test_ibl_paw_multicam_example_defaults(run_script, tmpdir, pytestconfig): +def test_ibl_paw_multicam_example_defaults( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'ibl_paw_multiview_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'ibl-paw'), output_dir=tmpdir, ) + compare_to_golden(request.node.name, output_dir) -def test_ibl_paw_multicam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig): - run_script( +def test_ibl_paw_multicam_example_fixed_smooth_param( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): + + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'ibl_paw_multiview_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'ibl-paw'), output_dir=tmpdir, - s=10 + s=10, ) + compare_to_golden(request.node.name, output_dir) diff --git a/tests/scripts/test_ibl_pupil_example.py b/tests/scripts/test_ibl_pupil_example.py index 458606d..f891af0 100644 --- a/tests/scripts/test_ibl_pupil_example.py +++ b/tests/scripts/test_ibl_pupil_example.py @@ -1,20 +1,24 @@ -def test_ibl_pupil_example_defaults(run_script, tmpdir, pytestconfig): +def test_ibl_pupil_example_defaults(run_script, compare_to_golden, tmpdir, pytestconfig, request): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'ibl_pupil_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'ibl-pupil'), output_dir=tmpdir, ) + compare_to_golden(request.node.name, output_dir) -def test_ibl_pupil_example_fixed_smooth_param(run_script, tmpdir, pytestconfig): +def test_ibl_pupil_example_fixed_smooth_param( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'ibl_pupil_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'ibl-pupil'), output_dir=tmpdir, diameter_s=0.99, com_s=0.99, ) + compare_to_golden(request.node.name, output_dir) diff --git a/tests/scripts/test_mirrored_multicam_example.py b/tests/scripts/test_mirrored_multicam_example.py index 24fb938..e4ae7e1 100644 --- a/tests/scripts/test_mirrored_multicam_example.py +++ b/tests/scripts/test_mirrored_multicam_example.py @@ -1,19 +1,24 @@ -def test_mirrored_multicam_example_defaults(run_script, tmpdir, pytestconfig): +def test_mirrored_multicam_example_defaults( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'mirrored_multicam_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse'), output_dir=tmpdir, bodypart_list=['paw1LH', 'paw2LF'], # , 'paw3RF', 'paw4RH'], # unneeded computation camera_names=['top', 'bot'], ) + compare_to_golden(request.node.name, output_dir) -def test_mirrored_multicam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig): +def test_mirrored_multicam_example_fixed_smooth_param( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'mirrored_multicam_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse'), output_dir=tmpdir, @@ -21,3 +26,4 @@ def test_mirrored_multicam_example_fixed_smooth_param(run_script, tmpdir, pytest camera_names=['top', 'bot'], s=10 ) + compare_to_golden(request.node.name, output_dir) diff --git a/tests/scripts/test_multicam_example.py b/tests/scripts/test_multicam_example.py index 2d41007..7908fc5 100644 --- a/tests/scripts/test_multicam_example.py +++ b/tests/scripts/test_multicam_example.py @@ -1,19 +1,22 @@ -def test_multicam_example_defaults(run_script, tmpdir, pytestconfig): +def test_multicam_example_defaults(run_script, compare_to_golden, tmpdir, pytestconfig, request): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'multicam_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse-separate'), output_dir=tmpdir, bodypart_list=['paw1LH', 'paw2LF'], # , 'paw3RF', 'paw4RH'], # unneeded computation camera_names=['top', 'bot'], ) + compare_to_golden(request.node.name, output_dir) -def test_multicam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig): +def test_multicam_example_fixed_smooth_param( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'multicam_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse-separate'), output_dir=tmpdir, @@ -21,11 +24,14 @@ def test_multicam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig): camera_names=['top', 'bot'], s=10, ) + compare_to_golden(request.node.name, output_dir) -def test_multicam_example_defaults_nonlinear(run_script, tmpdir, pytestconfig): +def test_multicam_example_defaults_nonlinear( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'multicam_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'fly'), output_dir=tmpdir, @@ -33,11 +39,14 @@ def test_multicam_example_defaults_nonlinear(run_script, tmpdir, pytestconfig): camera_names=['Cam-A', 'Cam-B', 'Cam-C'], calibration=str(pytestconfig.rootpath / 'data' / 'fly' / 'calibration.toml'), ) + compare_to_golden(request.node.name, output_dir) -def test_multicam_example_fixed_smooth_param_nonlinear(run_script, tmpdir, pytestconfig): +def test_multicam_example_fixed_smooth_param_nonlinear( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'multicam_example.py'), input_dir=str(pytestconfig.rootpath / 'data' / 'fly'), output_dir=tmpdir, @@ -46,3 +55,4 @@ def test_multicam_example_fixed_smooth_param_nonlinear(run_script, tmpdir, pytes calibration=str(pytestconfig.rootpath / 'data' / 'fly' / 'calibration.toml'), s=10, ) + compare_to_golden(request.node.name, output_dir) diff --git a/tests/scripts/test_singlecam_example.py b/tests/scripts/test_singlecam_example.py index bb681c1..66c2e52 100644 --- a/tests/scripts/test_singlecam_example.py +++ b/tests/scripts/test_singlecam_example.py @@ -1,19 +1,25 @@ -def test_singlecam_example_defaults(run_script, tmpdir, pytestconfig): +def test_singlecam_example_defaults( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'singlecam_example.py'), input_dir=str(pytestconfig.rootpath / 'data/ibl-pupil'), output_dir=tmpdir, ) + compare_to_golden(request.node.name, output_dir) -def test_singlecam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig): +def test_singlecam_example_fixed_smooth_param( + run_script, compare_to_golden, tmpdir, pytestconfig, request, +): - run_script( + output_dir = run_script( script_file=str(pytestconfig.rootpath / 'scripts' / 'singlecam_example.py'), input_dir=str(pytestconfig.rootpath / 'data/ibl-pupil'), output_dir=tmpdir, s=10, ) + compare_to_golden(request.node.name, output_dir) diff --git a/tests/test_multicam_smoother.py b/tests/test_multicam_smoother.py index bcaf518..5c3f03c 100644 --- a/tests/test_multicam_smoother.py +++ b/tests/test_multicam_smoother.py @@ -1,5 +1,3 @@ -import os - import cv2 import jax import jax.numpy as jnp @@ -354,7 +352,6 @@ def test_center_predictions_min_frames(): - Covariance projection via Jacobian vs finite differences """ -os.environ.setdefault("JAX_ENABLE_X64", "true") jax.config.update("jax_enable_x64", True) From 52deb748b86fc30f310f7938fb3bc9ad961c01be Mon Sep 17 00:00:00 2001 From: Matt Whiteway Date: Thu, 9 Apr 2026 16:30:58 -0400 Subject: [PATCH 2/4] lint --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index ad7c2dc..aa811e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ import pytest # URL of the zipped golden files. Update this after uploading a new release to GitHub. -GOLDEN_URL = 'https://github.com/paninski-lab/eks-test-fixtures/releases/download/v1/eks_golden.zip' +GOLDEN_URL = 'https://github.com/paninski-lab/eks-test-fixtures/releases/download/v1/eks_golden.zip' # noqa: E501 def pytest_addoption(parser): From 47d88b478f1a15f83899fe48ed4df519f8cd0acb Mon Sep 17 00:00:00 2001 From: Matt Whiteway Date: Thu, 9 Apr 2026 16:46:30 -0400 Subject: [PATCH 3/4] loosen test tolerance --- tests/conftest.py | 6 ++---- tests/scripts/README.md | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index aa811e5..be7f7dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,9 +97,7 @@ def _compare(test_name: str, output_dir: Path): return if golden_dir is None: - pytest.skip( - 'No golden URL provided (set --golden-url or EKS_GOLDEN_URL); skipping comparison.' - ) + pytest.skip('GOLDEN_URL is None in conftest.py; skipping golden comparison.') golden_test_dir = golden_dir / test_name assert golden_test_dir.exists(), ( @@ -117,7 +115,7 @@ def _compare(test_name: str, output_dir: Path): pd.testing.assert_frame_equal( actual, expected, check_exact=False, - atol=1e-5, + atol=1e-4, obj=f'{test_name}/{csv_file.name}', ) diff --git a/tests/scripts/README.md b/tests/scripts/README.md index f6a3f22..77554e2 100644 --- a/tests/scripts/README.md +++ b/tests/scripts/README.md @@ -9,7 +9,7 @@ against a set of **golden files** — a reference snapshot of known-good outputs - **Without golden files**: tests only verify that the scripts exit without error (original behavior). - **With golden files**: after each script runs, all CSV outputs are compared against the - corresponding golden CSVs using `pandas.testing.assert_frame_equal` with `atol=1e-5`. + corresponding golden CSVs using `pandas.testing.assert_frame_equal` with `atol=1e-4`. --- From 4b69e389480af0fc4a32c5ec337645230b73b5f8 Mon Sep 17 00:00:00 2001 From: Matt Whiteway Date: Thu, 9 Apr 2026 16:57:49 -0400 Subject: [PATCH 4/4] pandas->np testing --- tests/conftest.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index be7f7dd..4cdb803 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Callable +import numpy as np import pandas as pd import pytest @@ -112,11 +113,19 @@ def _compare(test_name: str, output_dir: Path): ) actual = pd.read_csv(csv_file, index_col=0) expected = pd.read_csv(golden_csv, index_col=0) - pd.testing.assert_frame_equal( - actual, expected, - check_exact=False, + assert actual.shape == expected.shape, ( + f'{test_name}/{csv_file.name}: shape mismatch ' + f'{actual.shape} != {expected.shape}' + ) + assert list(actual.columns) == list(expected.columns), ( + f'{test_name}/{csv_file.name}: column mismatch' + ) + np.testing.assert_allclose( + actual.select_dtypes('number').values, + expected.select_dtypes('number').values, + rtol=0, atol=1e-4, - obj=f'{test_name}/{csv_file.name}', + err_msg=f'{test_name}/{csv_file.name}', ) return _compare