diff --git a/.github/workflows/retile.yml b/.github/workflows/retile.yml index 5d61783..623a318 100644 --- a/.github/workflows/retile.yml +++ b/.github/workflows/retile.yml @@ -7,189 +7,124 @@ permissions: contents: read env: - LD_LIBRARY_PATH: "${{ github.workspace }}/install/lib64" - PYTHONPATH: "${{ github.workspace }}/install/python" + PIP_CACHE_KEY: retile-pip-${{ github.run_id }} PYTHONUNBUFFERED: 1 - RUNTIME_DEPS: "python3 python3-boto3 python3-pillow python3-requests \ - zlib libpng libjpeg-turbo libtiff openjpeg2 gdk-pixbuf2 \ - gdk-pixbuf2-modules-extra libxml2 sqlite cairo glib2" + PYTHON_VER: "3.14t" + PYTHON_DEPS: "boto3 openslide-bin openslide-python requests" jobs: - build: - name: Build releases - runs-on: ubuntu-latest - container: registry.fedoraproject.org/fedora:latest - steps: - - name: Install dependencies - run: | - dnf install -y \ - jq xz \ - python3 python3-devel python3-pip python3-pillow python3-wheel \ - gcc meson pkg-config \ - zlib-devel \ - libpng-devel \ - libjpeg-turbo-devel \ - libtiff-devel \ - openjpeg2-devel \ - gdk-pixbuf2-devel \ - gdk-pixbuf2-modules-extra \ - libxml2-devel \ - sqlite-devel \ - cairo-devel \ - glib2-devel - - name: Download releases - run: | - set -euxo pipefail - get_release() { - # Query GitHub for latest release - curl -s -H "Accept: application/vnd.github.v3+json" \ - "https://api.github.com/repos/openslide/$1/releases/latest" \ - > "$1-release.json" - local env_var=$(echo "$1" | tr a-z- A-Z_)_VERSION - local version=$(jq -r .tag_name < "$1-release.json" | sed s/^v//) - echo "${env_var}=${version}" >> ${GITHUB_ENV} - - # Download release - local url=$(jq -r '.assets | map(select(.content_type == "application/x-xz")) | .[0].browser_download_url' < "$1-release.json") - curl -LO "${url}" - - # Unpack - tar xf "$(echo "$1" | tr - _)-${version}.tar.xz" - } - get_release openslide - get_release openslide-python - - name: Build OpenSlide - working-directory: openslide-${{ env.OPENSLIDE_VERSION }} - run: | - meson setup builddir --prefix=${GITHUB_WORKSPACE}/install - meson compile -C builddir - - trap "cat builddir/meson-logs/testlog.txt" ERR - meson test -C builddir - trap - ERR - - meson install -C builddir - - name: Build OpenSlide Python - working-directory: openslide_python-${{ env.OPENSLIDE_PYTHON_VERSION }} - run: | - pip install -t ${GITHUB_WORKSPACE}/install/python . - - name: Upload build - uses: actions/upload-artifact@v5 - with: - name: build - path: install setup: name: Set up tiling environment: demo-site - needs: build runs-on: ubuntu-latest - container: registry.fedoraproject.org/fedora:latest steps: - name: Check out repo uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 with: - path: website + python-version: ${{ env.PYTHON_VER }} - name: Install dependencies - run: dnf install -y ${RUNTIME_DEPS} - - name: Download build - uses: actions/download-artifact@v6 + run: pip install --break-system-packages ${PYTHON_DEPS} + - name: Cache dependencies + uses: actions/cache/save@v4 with: - name: build - path: install + key: ${{ env.PIP_CACHE_KEY }} + path: /home/runner/.cache/pip - name: Set up tiling id: start-tiling - working-directory: website/demo + working-directory: demo env: AWS_ACCESS_KEY_ID: ${{ secrets.DEMO_TILER_AWS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEMO_TILER_AWS_SECRET_KEY }} run: | - ./_synctiles.py start \ - "${{ vars.DEMO_TILER_BUCKET }}" \ - ${GITHUB_WORKSPACE}/context \ - matrix + ./_synctiles.py start "${{ vars.DEMO_TILER_BUCKET }}" context matrix echo "slide-matrix=$(cat matrix)" >> $GITHUB_OUTPUT - name: Upload context uses: actions/upload-artifact@v5 with: name: context - path: context + path: demo/context outputs: slide-matrix: ${{ steps.start-tiling.outputs.slide-matrix }} + tile: name: Tile environment: demo-site needs: setup runs-on: ubuntu-latest - container: registry.fedoraproject.org/fedora:latest strategy: fail-fast: false matrix: ${{ fromJson(needs.setup.outputs.slide-matrix) }} steps: - name: Check out repo uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 with: - path: website - - name: Install dependencies - run: dnf install -y ${RUNTIME_DEPS} - - name: Download build - uses: actions/download-artifact@v6 + python-version: ${{ env.PYTHON_VER }} + - name: Cache dependencies + uses: actions/cache/restore@v4 with: - name: build - path: install + key: ${{ env.PIP_CACHE_KEY }} + path: /home/runner/.cache/pip + fail-on-cache-miss: true + - name: Install dependencies + run: pip install --break-system-packages ${PYTHON_DEPS} - name: Download context uses: actions/download-artifact@v6 with: name: context + path: demo - name: Tile slide - working-directory: website/demo + working-directory: demo env: AWS_ACCESS_KEY_ID: ${{ secrets.DEMO_TILER_AWS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEMO_TILER_AWS_SECRET_KEY }} run: | - ./_synctiles.py tile \ - ${GITHUB_WORKSPACE}/context \ - "${{ matrix.slide }}" \ - ${GITHUB_WORKSPACE}/summary + ./_synctiles.py tile context "${{ matrix.slide }}" summary echo "ARTIFACT_NAME=summary-$(echo "${{ matrix.slide }}" | tr -c "a-zA-Z0-9\n" _)" >> $GITHUB_ENV - name: Upload summary uses: actions/upload-artifact@v5 with: name: ${{ env.ARTIFACT_NAME }} - path: summary + path: demo/summary + finish: name: Finish tiling environment: demo-site needs: tile runs-on: ubuntu-latest - container: registry.fedoraproject.org/fedora:latest steps: - name: Check out repo uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 with: - path: website - - name: Install dependencies - run: dnf install -y ${RUNTIME_DEPS} - - name: Download build - uses: actions/download-artifact@v6 + python-version: ${{ env.PYTHON_VER }} + - name: Cache dependencies + uses: actions/cache/restore@v4 with: - name: build - path: install + key: ${{ env.PIP_CACHE_KEY }} + path: /home/runner/.cache/pip + fail-on-cache-miss: true + - name: Install dependencies + run: pip install --break-system-packages ${PYTHON_DEPS} - name: Download context uses: actions/download-artifact@v6 with: name: context + path: demo - name: Download summaries uses: actions/download-artifact@v6 with: pattern: "summary-*" - path: summary + path: demo/summary merge-multiple: true - name: Finish tiling - working-directory: website/demo + working-directory: demo env: AWS_ACCESS_KEY_ID: ${{ secrets.DEMO_TILER_AWS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEMO_TILER_AWS_SECRET_KEY }} run: | - ./_synctiles.py finish \ - ${GITHUB_WORKSPACE}/context \ - ${GITHUB_WORKSPACE}/summary + ./_synctiles.py finish context summary diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8a7925c..ba05e88 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.21.0 + rev: v3.21.2 hooks: - id: pyupgrade name: Modernize Python code @@ -28,7 +28,7 @@ repos: name: Reorder Python imports with isort - repo: https://github.com/psf/black - rev: 25.9.0 + rev: 25.11.0 hooks: - id: black name: Format Python code with black @@ -47,7 +47,7 @@ repos: additional_dependencies: [flake8-bugbear, Flake8-pyproject] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 + rev: v1.19.0 hooks: - id: mypy name: Check Python types diff --git a/demo/_synctiles.py b/demo/_synctiles.py index 67631da..9c15c49 100755 --- a/demo/_synctiles.py +++ b/demo/_synctiles.py @@ -3,7 +3,7 @@ # _synctiles - Generate and upload Deep Zoom tiles for test slides # # Copyright (c) 2010-2015 Carnegie Mellon University -# Copyright (c) 2016-2023 Benjamin Gilbert +# Copyright (c) 2016-2025 Benjamin Gilbert # # This program is free software; you can redistribute it and/or modify it # under the terms of version 2.1 of the GNU Lesser General Public License @@ -23,12 +23,11 @@ from argparse import ArgumentParser, FileType import base64 from collections.abc import Callable, Iterator +from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from hashlib import md5, sha256 from io import BytesIO import json -from multiprocessing import Pool -from multiprocessing.pool import Pool as PoolType import os from pathlib import Path, PurePath import re @@ -45,14 +44,20 @@ from PIL.ImageCms import ImageCmsProfile import boto3 import openslide -from openslide import AbstractSlide, ImageSlide, OpenSlide, OpenSlideError +from openslide import ( + AbstractSlide, + ImageSlide, + OpenSlide, + OpenSlideCache, + OpenSlideError, +) from openslide.deepzoom import DeepZoomGenerator import requests if TYPE_CHECKING: from mypy_boto3_s3.service_resource import Object -STAMP_VERSION = 'size-510' # change to retile without OpenSlide version bump +STAMP_VERSION = 'threads' # change to retile without OpenSlide version bump CORS_ORIGINS = ['*'] DOWNLOAD_BASE_URL = 'https://openslide.cs.cmu.edu/download/openslide-testdata/' DOWNLOAD_INDEX = 'index.json' @@ -104,9 +109,6 @@ KeyMd5s = dict[PurePath, str] TestDataIndex = dict[str, 'TestDataSlide'] -dz_generators: dict[str | None, Generator] = {} -storage: S3Storage | None = None - class TestDataSlide(TypedDict): """One openslide-testdata slide from index.json.""" @@ -209,7 +211,7 @@ def slugify(text: str) -> str: class Generator: def __init__(self, slide: AbstractSlide): - self._dz = DeepZoomGenerator( + self.dz = DeepZoomGenerator( slide, TILE_SIZE, OVERLAP, limit_bounds=LIMIT_BOUNDS ) self._transform = self._get_transform(slide) @@ -236,7 +238,7 @@ def xfrm(img: Image) -> None: return xfrm def get_tile(self, level: int, address: tuple[int, int]) -> Image: - tile: Image = self._dz.get_tile(level, address) + tile: Image = self.dz.get_tile(level, address) self._transform(tile) return tile @@ -268,66 +270,53 @@ def upload_metadata( ) -def pool_init(bucket_name: str, slide_path: Path) -> None: - global storage, dz_generators - storage = S3Storage(bucket_name) - slide = OpenSlide(slide_path) - dz_generators = {None: Generator(slide)} - for name, image in slide.associated_images.items(): - dz_generators[name] = Generator(ImageSlide(image)) - - @dataclass class Tile: - associated: str | None + storage: S3Storage + generator: Generator level: int address: tuple[int, int] key_name: PurePath cur_md5: str | None - def sync(self) -> PurePath | BaseException: + def sync(self) -> PurePath: """Generate and possibly upload a tile.""" - assert storage is not None - try: - tile = dz_generators[self.associated].get_tile( - self.level, self.address - ) - buf = BytesIO() - tile.save( - buf, - FORMAT, - quality=QUALITY, - icc_profile=tile.info.get('icc_profile'), + tile = self.generator.get_tile(self.level, self.address) + buf = BytesIO() + tile.save( + buf, + FORMAT, + quality=QUALITY, + icc_profile=tile.info.get('icc_profile'), + ) + new_md5 = md5(buf.getbuffer()) + if self.cur_md5 != new_md5.hexdigest(): + self.storage.object(self.key_name).put( + Body=buf.getvalue(), + CacheControl=CACHE_CONTROL_CACHE, + ContentMD5=base64.b64encode(new_md5.digest()).decode(), + ContentType=f'image/{FORMAT}', ) - new_md5 = md5(buf.getbuffer()) - if self.cur_md5 != new_md5.hexdigest(): - storage.object(self.key_name).put( - Body=buf.getvalue(), - CacheControl=CACHE_CONTROL_CACHE, - ContentMD5=base64.b64encode(new_md5.digest()).decode(), - ContentType=f'image/{FORMAT}', - ) - return self.key_name - except BaseException as e: # noqa: B036 - return e + return self.key_name @classmethod def enumerate( cls, - associated: str | None, - dz: DeepZoomGenerator, + storage: S3Storage, + generator: Generator, key_imagepath: PurePath, key_md5sums: KeyMd5s, ) -> Iterator[Self]: """Enumerate tiles in a single image.""" - for level in range(dz.level_count): + for level in range(generator.dz.level_count): key_levelpath = key_imagepath / str(level) - cols, rows = dz.level_tiles[level] + cols, rows = generator.dz.level_tiles[level] for row in range(rows): for col in range(cols): key_name = key_levelpath / f'{col}_{row}.{FORMAT}' yield cls( - associated, + storage, + generator, level, (col, row), key_name, @@ -336,11 +325,11 @@ def enumerate( def sync_image( - pool: PoolType, + exec: ThreadPoolExecutor, storage: S3Storage, + generator: Generator, slide_relpath: PurePath, associated: str | None, - dz: DeepZoomGenerator, key_basepath: PurePath, key_md5sums: KeyMd5s, mpp: float | None = None, @@ -349,10 +338,9 @@ def sync_image( Delete valid tiles from key_md5sums.""" count = 0 - total = dz.tile_count + total = generator.dz.tile_count associated_slug = slugify(associated) if associated else VIEWER_SLIDE_NAME key_imagepath = key_basepath / f'{associated_slug}_files' - iterator = Tile.enumerate(associated, dz, key_imagepath, key_md5sums) def progress() -> None: print( @@ -364,11 +352,13 @@ def progress() -> None: # Sync tiles progress() - for ret in pool.imap_unordered(Tile.sync, iterator, 32): - if isinstance(ret, BaseException): - raise ret - else: - key_md5sums.pop(ret, None) + for future in as_completed( + exec.submit(Tile.sync, tile) + for tile in Tile.enumerate( + storage, generator, key_imagepath, key_md5sums + ) + ): + key_md5sums.pop(future.result(), None) count += 1 if count % 100 == 0: progress() @@ -384,8 +374,8 @@ def progress() -> None: 'TileSize': TILE_SIZE, 'Overlap': OVERLAP, 'Size': { - 'Width': dz.level_dimensions[-1][0], - 'Height': dz.level_dimensions[-1][1], + 'Width': generator.dz.level_dimensions[-1][0], + 'Height': generator.dz.level_dimensions[-1][1], }, } } @@ -486,6 +476,9 @@ def sync_slide( } if slide is not None: + # Configure cache + slide.set_cache(OpenSlideCache(workers << 25)) + # Add slide metadata metadata.update( { @@ -508,21 +501,18 @@ def sync_slide( mpp = None # Start compute pool - pool = Pool(workers, pool_init, (storage.bucket.name, slide_path)) + exec = ThreadPoolExecutor(workers) try: # Tile slide def do_tile( associated: str | None, image: AbstractSlide ) -> ImageInfo: - dz = DeepZoomGenerator( - image, TILE_SIZE, OVERLAP, limit_bounds=LIMIT_BOUNDS - ) return sync_image( - pool, + exec, storage, + Generator(image), slide_relpath, associated, - dz, key_basepath, key_md5sums, mpp if associated is None else None, @@ -537,11 +527,10 @@ def do_tile( cur_props = do_tile(associated, ImageSlide(image)) metadata['associated'].append(cur_props) except BaseException: - pool.terminate() + exec.shutdown(cancel_futures=True) raise finally: - pool.close() - pool.join() + exec.shutdown() # Delete old keys for name in metadata_key_name, properties_key_name: @@ -742,7 +731,7 @@ def finish_retile(ctxfile: TextIO, summarydir: Path) -> None: if __name__ == '__main__': - cpu_count = os.process_cpu_count() + thread_count = 2 * os.process_cpu_count() parser = ArgumentParser() subparsers = parser.add_subparsers(metavar='subcommand', required=True) @@ -780,8 +769,8 @@ def finish_retile(ctxfile: TextIO, summarydir: Path) -> None: metavar='COUNT', dest='workers', type=int, - default=cpu_count, - help=f'number of worker processes to start [{cpu_count}]', + default=thread_count, + help=f'number of threads to start [{thread_count}]', ) parser_tile.set_defaults(cmd='tile')