diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index db65d18..8bc3978 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,6 +14,8 @@ name: CI tags: - "*" pull_request: {} + release: + types: [published] jobs: lint: @@ -26,7 +28,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Run pre-commit uses: pre-commit/action@v3.0.1 @@ -87,7 +89,7 @@ jobs: fetch-depth: 0 # full history for setuptools_scm - name: Build and publish - uses: lsst-sqre/build-and-publish-to-pypi@v2 + uses: lsst-sqre/build-and-publish-to-pypi@v3 with: python-version: "3.12" upload: false @@ -114,6 +116,6 @@ jobs: fetch-depth: 0 # full history for setuptools_scm - name: Build and publish - uses: lsst-sqre/build-and-publish-to-pypi@v2 + uses: lsst-sqre/build-and-publish-to-pypi@v3 with: python-version: "3.12" diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9800f36 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ + + +## 0.6.2 (2024-12-16) + +### Bug fixes + +- Correct typo in pyproject.toml + + +## v6.0.1 (2024-12-16) + +### Bug fixes + +- Make CI attempt to upload on release + + +## v0.6.0 (2024-12-16) + +### New features + +- Support Python 3.13 + +- Work through PyPi publication diff --git a/Dockerfile b/Dockerfile index 6b2f7d8..6f3d32b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,61 +1,99 @@ # Dockerfile for uWSGI wrapped Giftless Git LFS Server +# Shared build ARGs among stages +ARG WORKDIR=/app +ARG VENV="$WORKDIR/.venv" +ARG UV_VERSION=0.5.16 -### --- Build Depdendencies --- - -FROM python:3.12 as builder -MAINTAINER "Shahar Evron " - -# Build wheels for uWSGI and all requirements -RUN DEBIAN_FRONTEND=noninteractive apt-get update \ - && apt-get install -y build-essential libpcre3 libpcre3-dev git -RUN pip install -U pip -RUN mkdir /wheels +### Distroless uv version layer to be copied from (because COPY --from does not interpolate variables) +FROM ghcr.io/astral-sh/uv:$UV_VERSION AS uv +### --- Build Depdendencies --- +FROM python:3.12 AS builder ARG UWSGI_VERSION=2.0.23 -RUN pip wheel -w /wheels uwsgi==$UWSGI_VERSION - -COPY requirements/main.txt /requirements.txt -RUN pip wheel -w /wheels -r /requirements.txt +# Common WSGI middleware modules to be pip-installed +# These are not required in every Giftless installation but are common enough +ARG EXTRA_PACKAGES="wsgi_cors_middleware" +# expose shared ARGs +ARG WORKDIR +ARG VENV + +# Set WORKDIR (also creates the dir) +WORKDIR $WORKDIR + +# Install packages to build wheels for uWSGI and other requirements +RUN set -eux ;\ + export DEBIAN_FRONTEND=noninteractive ;\ + apt-get update ;\ + apt-get install -y --no-install-recommends build-essential libpcre3 libpcre3-dev git ;\ + rm -rf /var/lib/apt/lists/* + +# Install uv to replace pip & friends +COPY --from=uv /uv /uvx /bin/ + +# Set a couple uv-related settings +# Wait a bit longer for slow connections +ENV UV_HTTP_TIMEOUT=100 +# Don't cache packages +ENV UV_NO_CACHE=1 + +# Create virtual env to store dependencies, "activate" it +RUN uv venv "$VENV" +ENV VIRTUAL_ENV="$VENV" PATH="$VENV/bin:$PATH" + +# Install runtime dependencies +RUN --mount=target=/build-ctx \ + uv pip install -r /build-ctx/requirements/main.txt +RUN uv pip install uwsgi==$UWSGI_VERSION +# Install extra packages into the virtual env +RUN uv pip install ${EXTRA_PACKAGES} + +# Copy project contents necessary for an editable install +COPY .git .git/ +COPY giftless giftless/ +COPY pyproject.toml . +# Editable-install the giftless package (add a kind of a project path reference in site-packages) +# To detect the package version dynamically, setuptools-scm needs the git binary +RUN uv pip install -e . ### --- Build Final Image --- - -FROM python:3.12-slim - -RUN DEBIAN_FRONTEND=noninteractive apt-get update \ - && apt-get install -y libpcre3 libxml2 tini git \ - && apt-get clean \ - && apt -y autoremove - -RUN mkdir /app - -# Install dependencies -COPY --from=builder /wheels /wheels -RUN pip install /wheels/*.whl - -# Copy project code -COPY . /app -RUN pip install -e /app +FROM python:3.12-slim AS final +LABEL org.opencontainers.image.authors="Shahar Evron " ARG USER_NAME=giftless +# Writable path for local LFS storage ARG STORAGE_DIR=/lfs-storage -ENV GIFTLESS_TRANSFER_ADAPTERS_basic_options_storage_options_path $STORAGE_DIR - -RUN useradd -d /app $USER_NAME -RUN mkdir $STORAGE_DIR -RUN chown $USER_NAME $STORAGE_DIR - -# Pip-install some common WSGI middleware modules -# These are not required in every Giftless installation but are common enough -ARG EXTRA_PACKAGES="wsgi_cors_middleware" -RUN pip install ${EXTRA_PACKAGES} - +# expose shared ARGs +ARG WORKDIR +ARG VENV + +# Set WORKDIR (also creates the dir) +WORKDIR $WORKDIR + +# Create a user and set local storage write permissions +RUN set -eux ;\ + useradd -d "$WORKDIR" "$USER_NAME" ;\ + mkdir "$STORAGE_DIR" ;\ + chown "$USER_NAME" "$STORAGE_DIR" + +# Install runtime dependencies +RUN set -eux ;\ + export DEBIAN_FRONTEND=noninteractive ;\ + apt-get update ;\ + apt-get install -y libpcre3 libxml2 tini ;\ + rm -rf /var/lib/apt/lists/* + +# Use the virtual env with dependencies from builder stage +COPY --from=builder "$VENV" "$VENV" +ENV VIRTUAL_ENV="$VENV" PATH="$VENV/bin:$PATH" +# Copy project source back into the same path referenced by the editable install +COPY --from=builder "$WORKDIR/giftless" "giftless" + +# Set runtime properties USER $USER_NAME +ENV GIFTLESS_TRANSFER_ADAPTERS_basic_options_storage_options_path="$STORAGE_DIR" +ENV UWSGI_MODULE="giftless.wsgi_entrypoint" -WORKDIR /app - -ENV UWSGI_MODULE "giftless.wsgi_entrypoint" - -ENTRYPOINT ["tini", "uwsgi", "--"] +ENTRYPOINT ["tini", "--", "uwsgi"] CMD ["-s", "127.0.0.1:5000", "-M", "-T", "--threads", "2", "-p", "2", \ "--manage-script-name", "--callable", "app"] diff --git a/Makefile b/Makefile index 86574e2..765a2fa 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,8 @@ TESTS_DIR := tests SHELL := bash PYTHON := python -PIP := pip -PIP_COMPILE := pip-compile +PIP := uv pip +PIP_COMPILE := uv pip compile PYTEST := pytest DOCKER := docker GIT := git @@ -28,6 +28,9 @@ VERSION := $(shell $(PYTHON) -c "from importlib.metadata import version;print(ve default: help +## Install uv (fast pip replacement) +init: $(SENTINELS)/uv + ## Regenerate requirements files requirements: requirements/dev.txt requirements/dev.in requirements/main.txt requirements/main.in @@ -90,6 +93,10 @@ $(SENTINELS)/dev-setup: init requirements/main.txt requirements/dev.txt | $(SENT $(PIP) install -r requirements/dev.txt @touch $@ +$(SENTINELS)/uv: $(SENTINELS) + pip install uv + @touch $@ + # Help related variables and targets GREEN := $(shell tput -Txterm setaf 2) diff --git a/README.md b/README.md index a12bd63..42242cb 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Giftless - a Pluggable Git LFS Server [![Maintainability](https://api.codeclimate.com/v1/badges/58f05c5b5842c8bbbdbb/maintainability)](https://codeclimate.com/github/datopian/giftless/maintainability) [![Test Coverage](https://api.codeclimate.com/v1/badges/58f05c5b5842c8bbbdbb/test_coverage)](https://codeclimate.com/github/datopian/giftless/test_coverage) -Giftless a Python implementation of a [Git LFS][1] Server. It is designed +Giftless is a Python implementation of a [Git LFS][1] Server. It is designed with flexibility in mind, to allow pluggable storage backends, transfer methods and authentication methods. @@ -40,7 +40,7 @@ Documentation License ------- -Copyright (C) 2020, Datopian / Viderum, Inc. +Copyright (C) 2020-2024, Datopian / Viderum, Inc. Giftless is free / open source software and is distributed under the terms of the MIT license. See [LICENSE](LICENSE) for details. diff --git a/docs/source/auth-providers.md b/docs/source/auth-providers.md index ce812eb..df4062f 100644 --- a/docs/source/auth-providers.md +++ b/docs/source/auth-providers.md @@ -196,27 +196,44 @@ servers. This authenticator lets you provide a frictionless LFS backend for existing GitHub repositories. It plays nicely with `git` credential helpers and allows you to use GitHub as the single authentication & authorization provider. ### Details -The authenticator uses [GitHub Personal Access Tokens](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens), the same ones used for cloning a GitHub repo over HTTPS. The provided token is used in a couple GitHub API calls that identify the token's identity and [its permissions](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#get-repository-permissions-for-a-user) for the GitHub organization & repository. The token is supposed to be passed in the password part of the `Basic` HTTP auth (username is ignored). `Bearer` token HTTP auth is also supported, although no git client will likely use it. +The authenticator uses GitHub [Personal Access Tokens](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) and [App Installation tokens](https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/authenticating-as-a-github-app-installation), the same ones used for cloning a GitHub repo over HTTPS. The provided token is used in a couple GitHub API calls that identify the token's identity and [its permissions](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#get-repository-permissions-for-a-user) for the GitHub organization & repository. -For the authenticator to work properly the token must have the `read:org` for "Classic" or `metadata:read` permission for the fine-grained kind. - - Note: Authentication via SSH that could be used to verify the user is [not possible with GitHub at the time of writing](https://github.com/datopian/giftless/issues/128#issuecomment-2037190728). +Note: Authentication via SSH that could be used to verify the user is [not possible with GitHub at the time of writing](https://github.com/datopian/giftless/issues/128#issuecomment-2037190728). The GitHub repository permissions are mapped to [Giftless permissions](#permissions) in the straightforward sense that those able to write will be able to write, same with read; invalid tokens or identities with no repository access will get rejected. To minimize the traffic to GitHub for each LFS action, most of the auth data is being temporarily cached in memory, which improves performance, but naturally also ignores immediate changes for identities with changed permissions. ### GitHub Auth Flow -Here's a description of the authentication & authorization flow. If any of these steps fails, the request gets rejected. +Here's a description of the authentication & authorization flow. If any of these steps fails, the request gets rejected. As the supported token flavors have very different ways of authentication, they're described separately: + +#### Personal Access Tokens (`ghp_`, `_github_pat_` and likely other [token flavors](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-authentication-to-github#githubs-token-formats) `gho_`, `ghu_`) +These tokens eventually represent a real user. For the authenticator to work properly, the token must have these permissions: +- `read:org` for "Classic" or +- `metadata:read` for the fine-grained kind. +- The user has to be a collaborator on the target repository with an adequate role for reading or writing. -1. The URI of the primary git LFS (HTTP) [`batch` request](https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md) is used (as usual) to determine what GitHub organization and repository is being targeted (e.g. `https:////.git/info/lfs/...`). The request's `Authentication` header is also searched for the required GitHub personal access token. +1. The URI of the primary git LFS (HTTP) [`batch` request](https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md) is used to determine what GitHub organization and repository is being targeted (e.g. `https:////.git/info/lfs/...`). The request's `Authentication` header is searched for the required token in the `password` part of the `Basic` HTTP auth. 2. The token is then used in a [`/user`](https://docs.github.com/en/rest/users/users?apiVersion=2022-11-28#get-the-authenticated-user) GitHub API call to get its identity data. 3. Further on the GitHub API is asked for the [user's permissions](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#get-repository-permissions-for-a-user) to the org/repo in question. 4. Based on the information above the user will be granted or rejected access. +#### App Installation Tokens (`ghs_`) +This token represents a special identity of an "application installation", acting on behalf of an installed GitHub App (likely part of an automation integration). This installation is bound to a user or organization (owner) and gets a set of fine-grained permissions applying to `all` or `selected` repositories of the targeted owner. For the authenticator to work properly, the GitHub App must have these permissions: +- `metadata:read` (default) +- `contents:read|write` (the permission to the repository content) +- `organization_administration:read` (required to [list owner's app installations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-app-installations-for-an-organization)) +- The installed App also has to have access to the target repository. + +1. The URI of the primary git LFS (HTTP) [`batch` request](https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md) is used to determine what GitHub organization and repository is being targeted (e.g. `https:////.git/info/lfs/...`). The request's `Authentication` header is searched for the required token in the `password` part of the `Basic` HTTP auth. **The `user` part must contain some identification of the app** (installation `id`, `app_id`, `client_id` or `app_slug` (its dashed name)). +2. The token is then used in the [`/orgs//installations`](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-app-installations-for-an-organization) GitHub API call to get the list of app installations in the target `org`. This list is then searched for the app identification from the `user` part above. The identified entry contains info about the app permissions and whether the installation targets `all` repositories or just `selected`. At this moment the LFS permissions are inferred from the provided `content` permission. If the repository access is `all`, this is everything the logic needs. +3. If the repository access is just for `selected` ones, the GitHub API is asked for the [`/installation/repositories`](https://docs.github.com/en/rest/apps/installations?apiVersion=2022-11-28#list-repositories-accessible-to-the-app-installation), where it must find the target repository. + ### `giftless.auth.github` Configuration Options * `api_url` (`str` = `"https://api.github.com"`): Base URL for the GitHub API (enterprise servers have API at `"https:///api/v3/"`). +* `api_timeout` (`float | tuple[float, float]` = `(10.0, 20.0)`): Timeout for the GitHub API calls ([details](https://requests.readthedocs.io/en/stable/user/advanced/#timeouts)). * `api_version` (`str | None` = `"2022-11-28"`): Target GitHub API version; set to `None` to use GitHub's latest (rather experimental). +* `restrict_to` (`dict[str, list[str] | None] | None` = `None`): Optional (but highly recommended) dictionary of GitHub organizations/users the authentication is restricted to. Each key (organization name) in the dictionary can contain a list of further restricted repository names. When the list is empty (or null), only the organizations are considered. * `cache` (`dict`): Cache configuration section * `token_max_size` (`int` = `32`): Max number of entries in the token -> user LRU cache. This cache holds the authentication data for a token. Evicted tokens will need to be re-authenticated. * `auth_max_size` (`int` = `32`): Max number of [un]authorized org/repos TTL(LRU) for each user. Evicted repos will need to get re-authorized. diff --git a/docs/source/installation.md b/docs/source/installation.md index 41a2428..bb394c0 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -59,7 +59,7 @@ $ git clone https://github.com/datopian/giftless.git $ cd giftless $ python3 -m venv venv $ source venv/bin/activate -(venv) $ pip install -r requirements.txt +(venv) $ pip install -r requirements/main.txt ``` You can then proceed to run Giftless with a WSGI server as diff --git a/giftless/auth/github.py b/giftless/auth/github.py index 7a4d86e..a570d24 100644 --- a/giftless/auth/github.py +++ b/giftless/auth/github.py @@ -1,4 +1,5 @@ """Objects for GitHub "proxy" authentication.""" +import abc import dataclasses import functools import logging @@ -6,11 +7,19 @@ import os import threading import weakref -from collections.abc import Callable, Mapping, MutableMapping -from contextlib import AbstractContextManager, suppress +from collections.abc import ( + Callable, + Generator, + Iterable, + Mapping, + MutableMapping, +) +from contextlib import AbstractContextManager, ExitStack, suppress from operator import attrgetter, itemgetter from threading import Lock, RLock -from typing import Any, Protocol, TypeVar, cast, overload +from types import TracebackType +from typing import Any, ClassVar, Protocol, TypeVar, cast, overload +from urllib.parse import parse_qs, urlparse import cachetools.keys import flask @@ -164,7 +173,37 @@ def wrapper(self: Any, *args: tuple, **kwargs: dict) -> _RT: return decorator -# AUTH MODULE CONFIGURATION OPTIONS +# AUTH MODULE CONFIGURATION OPTIONS (and their validation) +class RequestsTimeout(ma.fields.Field): + """Marshmallow Field validating a requests library timeout.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + pos_float = ma.fields.Float(validate=ma.validate.Range(min=0)) + self.possible_fields = ( + ma.fields.Tuple((pos_float, pos_float)), + pos_float, + ) + + def _deserialize( + self, + value: Any, + attr: str | None, + data: Mapping[str, Any] | None, + **kwargs: Any, + ) -> Any: # float | tuple[float, float] + errors = {} + for field in self.possible_fields: + try: + return field.deserialize(value, **kwargs) + except ma.ValidationError as error: # noqa: PERF203 + if error.valid_data is not None: + # parsing partially successful, don't bother with the rest + raise + errors.update({field.__class__.__name__: error.messages}) + raise ma.ValidationError(errors) + + @dataclasses.dataclass(frozen=True, kw_only=True) class CacheConfig: """Cache configuration.""" @@ -211,6 +250,10 @@ class Config: api_url: str # GitHub API version to target (set to None for the default latest) api_version: str | None + # GitHub API requests timeout + api_timeout: float | tuple[float, float] + # Orgs and repos this instance is restricted to + restrict_to: dict[str, list[str] | None] | None # cache config above cache: CacheConfig @@ -219,6 +262,13 @@ class Schema(ma.Schema): api_version = ma.fields.String( load_default="2022-11-28", allow_none=True ) + api_timeout = RequestsTimeout(load_default=(5.0, 10.0)) + restrict_to = ma.fields.Dict( + keys=ma.fields.String(), + values=ma.fields.List(ma.fields.String(), allow_none=True), + load_default=None, + allow_none=True, + ) # always provide default CacheConfig when not present in the input cache = ma.fields.Nested( CacheConfig.Schema(), @@ -238,39 +288,160 @@ def from_dict(cls, data: Mapping[str, Any]) -> "Config": # CORE AUTH -@dataclasses.dataclass(frozen=True, slots=True) -class _CoreGithubIdentity: - """Entries uniquely identifying a GitHub user (from a token). - - This serves as a key to mappings/caches of unique users. +@dataclasses.dataclass +class CallContext: + """Helper class for GithubAuthenticator to hold various state variables + bound to a single __call__() execution. + It's also a context manager holding an open requests client session. """ - id: str - github_id: str + # authenticator config + cfg: Config + # original flask request to be authenticated + request: dataclasses.InitVar[flask.Request] + # fields inferred from request + org: str = dataclasses.field(init=False) + repo: str = dataclasses.field(init=False) + user: str | None = dataclasses.field(init=False) + token: str = dataclasses.field(init=False) + # GitHub api call variables + _api_url: str = dataclasses.field(init=False) + _api_headers: dict[str, str] = dataclasses.field( + init=False, + default_factory=lambda: {"Accept": "application/vnd.github+json"}, + ) + # requests session to reuse a connection to GitHub + _session: requests.Session | None = dataclasses.field( + init=False, default=None + ) + _exit_stack: ExitStack = dataclasses.field( + init=False, default_factory=ExitStack + ) - @classmethod - def from_token( - cls, token_data: Mapping[str, Any] - ) -> "_CoreGithubIdentity": - return cls(*itemgetter("login", "id")(token_data)) + def __post_init__(self, request: flask.Request) -> None: + org_repo_getter = itemgetter("organization", "repo") + self.org, self.repo = org_repo_getter(request.view_args or {}) + self.user, self.token = self._extract_auth(request) + self._check_restricted_to() + self._api_url = self.cfg.api_url + self._api_headers["Authorization"] = f"Bearer {self.token}" + if self.cfg.api_version: + self._api_headers["X-GitHub-Api-Version"] = self.cfg.api_version -class GithubIdentity(Identity): - """User identity belonging to an authentication token. + def _check_restricted_to(self) -> None: + restrict_to = self.cfg.restrict_to + if restrict_to: + try: + rest_repos = restrict_to[self.org] + except KeyError: + raise Unauthorized( + f"Unauthorized GitHub organization '{self.org}'" + ) from None + if rest_repos and self.repo not in rest_repos: + raise Unauthorized( + f"Unauthorized GitHub repository '{self.org}/{self.repo}'" + ) - Tracks user's permission for particular organizations/repositories. + def __enter__(self) -> "CallContext": + self._session = self._exit_stack.enter_context(requests.Session()) + self._session.headers.update(self._api_headers) + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> Any: + self._session = None + self._exit_stack.close() + + def _extract_auth(self, request: flask.Request) -> tuple[str | None, str]: + if request.authorization is None: + raise Unauthorized("Authorization required") + + user = request.authorization.get("username") + token = request.authorization.password or request.authorization.token + if token is None: + _logger.warning( + f"Request to {self.org}/{self.repo} has no auth token" + ) + raise Unauthorized("Authorization token required") + return user, token + + def api_get(self, uri: str) -> dict[str, Any]: + if self._session is None: + raise RuntimeError( + "CallContext is a context manager maintaining a requests " + "session. Call api_get() only within its entered context." + ) + response = self._session.get( + f"{self._api_url}{uri}", + headers=self._api_headers, + timeout=self.cfg.api_timeout, + ) + response.raise_for_status() + return cast(dict[str, Any], response.json()) + + def api_get_paginated( + self, uri: str, *, per_page: int = 30, list_name: str | None = None + ) -> Generator[dict[str, Any], None, None]: + if self._session is None: + raise RuntimeError( + "CallContext is a context manager maintaining a requests " + "session. Call api_get_paginated() only within its entered " + "context." + ) + + per_page = min(max(per_page, 1), 100) + list_name = list_name or uri.rsplit("/", 1)[-1] + next_page = 1 + while next_page > 0: + response = self._session.get( + f"{self._api_url}{uri}", + params={"per_page": per_page, "page": next_page}, + headers=self._api_headers, + timeout=self.cfg.api_timeout, + ) + response.raise_for_status() + response_json: dict[str, Any] = response.json() + + yield from (item for item in response_json.get(list_name, [])) + + # check the 'link' header for the 'next' page URL + if next_url := response.links.get("next", {}).get("url"): + # extract the page number from the URL that looks like + # https://api.github.com/some/collections?page=4 + # urlparse(next_url).query returns "page=4" + # parse_qs() parses that into {'page': ['4']} + # when 'page' is missing, we supply a fake ['0'] to stop + next_page = int( + parse_qs(urlparse(next_url).query).get("page", ["0"])[0] + ) + else: + next_page = 0 + + @property + def org_repo(self) -> str: + return f"{self.org}/{self.repo}" + + +class GithubIdentity(Identity, abc.ABC): + """GitHub identity belonging to an authentication token. + + Tracks identity's permission for particular organizations/repositories. """ def __init__( self, - core_identity: _CoreGithubIdentity, - token_data: Mapping[str, Any], + id_: str | None, + name: str | None = None, + email: str | None = None, + *, cc: CacheConfig, ) -> None: - super().__init__( - token_data.get("name"), core_identity.id, token_data.get("email") - ) - self.core_identity = core_identity + super().__init__(name, id_, email) # Expiring cache of authorized repos with different TTL for each # permission type. It's assumed that anyone granted the WRITE @@ -278,27 +449,89 @@ def __init__( # or have no permissions whatsoever. Caching the latter has the # complementing effect of keeping unauthorized entities from hammering # the GitHub API. - def expiration(_key: Any, value: set[Permission], now: float) -> float: - ttl = ( - cc.auth_write_ttl - if Permission.WRITE in value - else cc.auth_other_ttl - ) - return now + ttl + def _perm_ttl(perms: set[Permission]) -> float: + if Permission.WRITE in perms: + return cc.auth_write_ttl + else: + return cc.auth_other_ttl + + # expiration factory providing a 'ttu' function for 'TLRUCache' + # respecting specified least_ttl + def expiration( + least_ttl: float | None = None, + ) -> Callable[[Any, set[Permission], float], float]: + if least_ttl is None or least_ttl <= 0.0: + + def _e(_key: Any, value: set[Permission], now: float) -> float: + return now + _perm_ttl(value) + else: + + def _e(_key: Any, value: set[Permission], now: float) -> float: + return now + max(_perm_ttl(value), least_ttl) + + return _e # size-unlimited proxy cache to ensure at least one successful hit + # by is_authorized self._auth_cache_read_proxy: MutableMapping[ Any, set[Permission] - ] = cachetools.TTLCache(math.inf, 60.0) - self._auth_cache = cachetools.TLRUCache(cc.auth_max_size, expiration) + ] = cachetools.TLRUCache(math.inf, expiration(60.0)) + self._auth_cache = cachetools.TLRUCache(cc.auth_max_size, expiration()) self._auth_cache_lock = Lock() - def __getattr__(self, attr: str) -> Any: - # proxy to the core_identity for its attributes - return getattr(self.core_identity, attr) + def __eq__(self, other: object) -> bool: + field_get = attrgetter("id", "name", "email") + return isinstance(other, type(self)) and field_get(self) == field_get( + other + ) + + @classmethod + def authenticate(cls, ctx: CallContext) -> "GithubIdentity": + """Create a GitHub identity from the input data, run basic checks.""" + raise NotImplementedError + + def _authorize(self, ctx: CallContext) -> None: + """Resolve and set access permissions for the particular identity.""" + raise NotImplementedError - def permissions( - self, org: str, repo: str, *, authoritative: bool = False + @single_call_method( + key=lambda self, ctx: cachetools.keys.hashkey( + ctx.org, ctx.repo, id(self) + ) + ) + def authorize(self, ctx: CallContext) -> None: + if (permissions := self._permissions(ctx.org, ctx.repo)) is not None: + perm_list = self._perm_list(permissions) + _logger.debug( + f"{self.id} is already temporarily authorized for " + f"{ctx.org_repo}: {perm_list}" + ) + else: + self._authorize(ctx) + + def _set_permissions( + self, + org: str, + repo: str | None, + permissions: set[Permission] | None, + casual: bool = False, + ) -> None: + """Save user's permission set for an org/repo.""" + key = cachetools.keys.hashkey(org, repo) + perm_set = permissions if permissions is not None else set() + with self._auth_cache_lock: + if casual: + # put the discovered permissions right into the main cache + # without any guarantees it will be retrieved later + with suppress(ValueError): + self._auth_cache[key] = perm_set + else: + # put the discovered permissions into the proxy cache + # to ensure at least one successful 'authoritative' read + self._auth_cache_read_proxy[key] = perm_set + + def _permissions( + self, org: str, repo: str | None, *, authoritative: bool = False ) -> set[Permission] | None: """Return user's permission set for an org/repo.""" key = cachetools.keys.hashkey(org, repo) @@ -319,17 +552,9 @@ def permissions( self._auth_cache[key] = permission return permission - def authorize( - self, org: str, repo: str, permissions: set[Permission] | None - ) -> None: - """Save user's permission set for an org/repo.""" - key = cachetools.keys.hashkey(org, repo) - # put the discovered permissions into the proxy cache - # to ensure at least one successful 'authoritative' read - with self._auth_cache_lock: - self._auth_cache_read_proxy[key] = ( - permissions if permissions is not None else set() - ) + @staticmethod + def _perm_list(permissions: set[Permission]) -> str: + return f"[{', '.join(sorted(p.value for p in permissions))}]" def is_authorized( self, @@ -338,7 +563,7 @@ def is_authorized( permission: Permission, oid: str | None = None, ) -> bool: - permissions = self.permissions(organization, repo, authoritative=True) + permissions = self._permissions(organization, repo, authoritative=True) return permission in permissions if permissions else False def cache_ttl(self, permissions: set[Permission]) -> float: @@ -346,162 +571,360 @@ def cache_ttl(self, permissions: set[Permission]) -> float: return self._auth_cache.ttu(None, permissions, 0.0) -class GithubAuthenticator: - """Main class performing GitHub "proxy" authentication/authorization.""" +class GithubUserIdentity(GithubIdentity): + """User identity belonging to an authentication token. - @dataclasses.dataclass - class CallContext: - """Helper class to pass common auth request variables around.""" - - # original flask request to be authenticated - request: dataclasses.InitVar[flask.Request] - # requests session to reuse a connection to GitHub - session: requests.Session - # fields inferred from request - org: str = dataclasses.field(init=False) - repo: str = dataclasses.field(init=False) - token: str = dataclasses.field(init=False) - - def _extract_token(self, request: flask.Request) -> str: - if request.authorization is None: - raise Unauthorized("Authorization required") - - token = ( - request.authorization.password or request.authorization.token - ) - if token is None: - _logger.warning( - f"Request to {self.org}/{self.repo} has no auth token" - ) - raise Unauthorized("Authorization token required") - return token + Tracks user's permission for particular organizations/repositories. + """ - def __post_init__(self, request: flask.Request) -> None: - org_repo_getter = itemgetter("organization", "repo") - self.org, self.repo = org_repo_getter(request.view_args or {}) - self.token = self._extract_token(request) + @dataclasses.dataclass(frozen=True, slots=True) + class CoreIdentity: + """Entries uniquely identifying a GitHub user (from a token). - def __init__(self, cfg: Config) -> None: - self._api_url = cfg.api_url - self._api_headers = {"Accept": "application/vnd.github+json"} - if cfg.api_version: - self._api_headers["X-GitHub-Api-Version"] = cfg.api_version - # user identities per token - self._token_cache: MutableMapping[ - Any, GithubIdentity - ] = cachetools.LRUCache(maxsize=cfg.cache.token_max_size) - # unique user identities, to get the same identity that's - # potentially already cached for a different token (same user) - # If all the token entries for one user get evicted from the - # token cache, the user entry here automatically ceases to exist too. - self._cached_users: MutableMapping[ - Any, GithubIdentity - ] = weakref.WeakValueDictionary() - self._cache_lock = RLock() - self._cache_config = cfg.cache + This serves as a key to mappings/caches of unique users. + """ - def _api_get(self, uri: str, ctx: CallContext) -> Mapping[str, Any]: - response = ctx.session.get( - f"{self._api_url}{uri}", - headers={"Authorization": f"Bearer {ctx.token}"}, + id: str + github_id: str + + @classmethod + def from_user_data( + cls, user_data: Mapping[str, Any] + ) -> "GithubUserIdentity.CoreIdentity": + return cls(*itemgetter("login", "id")(user_data)) + + # unique user identities, to get the same identity that's + # potentially already cached for a different token (same user) + # If all the token entries for one user get evicted from the + # token cache, the user entry here automatically ceases to exist too. + _cached_users: ClassVar[ + MutableMapping["GithubUserIdentity.CoreIdentity", "GithubUserIdentity"] + ] = weakref.WeakValueDictionary() + _cache_lock: ClassVar[_LockType] = RLock() + + def __init__( + self, + core_identity: CoreIdentity, + user_data: Mapping[str, Any], + cc: CacheConfig, + ) -> None: + super().__init__( + core_identity.id, + user_data.get("name"), + user_data.get("email"), + cc=cc, ) - response.raise_for_status() - return cast(Mapping[str, Any], response.json()) + self.core_identity = core_identity - @cachedmethod_threadsafe( - attrgetter("_token_cache"), - lambda self, ctx: cachetools.keys.hashkey(ctx.token), - attrgetter("_cache_lock"), - ) - def _authenticate(self, ctx: CallContext) -> GithubIdentity: + def __getattr__(self, attr: str) -> Any: + # proxy to the core_identity for its attributes + return getattr(self.core_identity, attr) + + @classmethod + def authenticate(cls, ctx: CallContext) -> "GithubIdentity": """Return internal GitHub user identity for a GitHub token in ctx.""" _logger.debug("Authenticating user") try: - token_data = self._api_get("/user", ctx) + user_data = ctx.api_get("/user") except requests.exceptions.RequestException as e: _logger.warning(msg := f"Couldn't authenticate the user: {e}") raise Unauthorized(msg) from None - core_identity = _CoreGithubIdentity.from_token(token_data) + core_identity = cls.CoreIdentity.from_user_data(user_data) # check if we haven't seen this identity before - # guard the code with the same lock as the _token_cache - with self._cache_lock: + with cls._cache_lock: try: - user = self._cached_users[core_identity] + user = cls._cached_users[core_identity] except KeyError: - user = GithubIdentity( - core_identity, token_data, self._cache_config + user = GithubUserIdentity( + core_identity, user_data, ctx.cfg.cache ) - self._cached_users[core_identity] = user + cls._cached_users[core_identity] = user + _logger.info(f"Authenticated the user as {user}") return user + def _authorize(self, ctx: CallContext) -> None: + org_repo = ctx.org_repo + _logger.debug(f"Checking {self.id}'s permissions for {org_repo}") + try: + repo_data = ctx.api_get( + f"/repos/{org_repo}/collaborators/{self.id}/permission", + ) + except requests.exceptions.RequestException as e: + msg = ( + f"Failed to find {self.id}'s permissions for " + f"{org_repo}: {e}" + ) + _logger.warning(msg) + raise Unauthorized(msg) from None + + gh_permission = repo_data.get("permission") + _logger.debug( + f"User {self.id} has '{gh_permission}' GitHub permission " + f"for {org_repo}" + ) + permissions = set() + if gh_permission in ("admin", "write"): + permissions = Permission.all() + elif gh_permission == "read": + permissions = {Permission.READ, Permission.READ_META} + perm_list = self._perm_list(permissions) + ttl = self.cache_ttl(permissions) + _logger.debug( + f"Authorizing {self.id} (for {ttl}s) for " + f"{org_repo}: {perm_list}" + ) + self._set_permissions(ctx.org, ctx.repo, permissions) + + +class GithubAppIdentity(GithubIdentity): + """App Installation identity belonging to an authentication token. + + Tracks app's permission for particular organization/repositories. + GitHub App installation gets a particular set of permissions per one + user/org and a potential list of repositories that this app is allowed + to act upon. + """ + + def __init__( + self, org: str, installation_data: dict[str, Any], *, cc: CacheConfig + ) -> None: + super().__init__( + str(installation_data["id"]), installation_data["app_slug"], cc=cc + ) + self.client_id: str = installation_data["client_id"] + self.app_id = str(installation_data["app_id"]) + self._orig_org = org + self._orig_installation_data: dict[str, Any] | None = installation_data + + def __eq__(self, other: object) -> bool: + field_get = attrgetter("client_id", "app_id") + return ( + isinstance(other, type(self)) + and super().__eq__(other) + and field_get(self) == field_get(other) + ) + @staticmethod - def _perm_list(permissions: set[Permission]) -> str: - return f"[{', '.join(sorted(p.value for p in permissions))}]" + def _get_installation( + ctx: CallContext, id_: str | None = None + ) -> dict[str, Any]: + """Get the GitHub App installation per its id or the user + from Basic auth. + + This is a GitHub App acting on its own behalf. + Its id must come as the username in the Basic auth; + unlike a user, an app "installation" can't be identified from a token. + """ + some_id = id_ or ctx.user + _logger.debug("Authenticating GitHub App") + if not some_id: + msg = ( + "Couldn't authenticate the GitHub App. Its Installation ID" + ", App ID or Client ID must be sent as the username within" + " the Authorization header's Basic auth payload." + ) + _logger.warning(msg) + raise Unauthorized(msg) - @single_call_method( - key=lambda self, ctx, user: cachetools.keys.hashkey( - ctx.org, ctx.repo, user.core_identity + # get the list of org's GitHub App installations + org = ctx.org + _logger.debug( + f"Checking Github App installation {some_id} permissions for {org}" ) - ) - def _authorize(self, ctx: CallContext, user: GithubIdentity) -> None: - org, repo = ctx.org, ctx.repo - org_repo = f"{org}/{repo}" - if (permissions := user.permissions(org, repo)) is not None: - perm_list = self._perm_list(permissions) - _logger.debug( - f"{user.id} is already temporarily authorized for " - f"{org_repo}: {perm_list}" + try: + org_installations = ctx.api_get(f"/orgs/{org}/installations") + except requests.exceptions.RequestException as e: + msg = ( + f"Failed to get a list of Github App installations for " + f"{org}: {e}. Make sure the app has the 'Administration' " + f"organization (read) permission." ) - else: - _logger.debug(f"Checking {user.id}'s permissions for {org_repo}") - try: - repo_data = self._api_get( - f"/repos/{org_repo}/collaborators/{user.id}/permission", - ctx, + _logger.warning(msg) + raise Unauthorized(msg) from None + + # find the particular GitHub App id in the installations + # if the id_ is missing, search among all possible ids + if id_ is None: + + def pick_ids(inst: dict[str, Any]) -> Iterable[str]: + return ( + str(inst.get("id")), + cast(str, inst.get("client_id")), + str(inst.get("app_id")), + cast(str, inst.get("app_slug")), ) - except requests.exceptions.RequestException as e: - msg = ( - f"Failed to find {user.id}'s permissions for " - f"{org_repo}: {e}" + # otherwise just aim for the installation id + else: + + def pick_ids(inst: dict[str, Any]) -> Iterable[str]: + return (str(inst.get("id")),) + + _logger.debug( + f"Looking for Github App installation {some_id} details." + ) + try: + installation: dict[str, Any] = next( + inst + for inst in org_installations["installations"] + if some_id in pick_ids(inst) + ) + except StopIteration: + msg = ( + f"Failed to find id {some_id} in the list of Github App " + f"installations for {org}." + ) + _logger.warning(msg) + raise Unauthorized(msg) from None + return installation + + @classmethod + def authenticate(cls, ctx: CallContext) -> "GithubIdentity": + gh_installation = cls._get_installation(ctx) + identity = cls(ctx.org, gh_installation, cc=ctx.cfg.cache) + _logger.info( + f"Authenticated the GitHub App '{identity.name}' installation " + f"{identity.id}." + ) + return identity + + def _set_permissions_for_repositories( + self, ctx: CallContext, permissions: set[Permission] + ) -> None: + _logger.debug( + f"Getting Github App {self.name} installation {self.id} " + f"repositories." + ) + org, repo = ctx.org, ctx.repo + # one (final result) less than the auth cache free space + to_cache_casually = max( + 0.0, self._auth_cache.maxsize - self._auth_cache.currsize - 1 + ) + gh_repos = ctx.api_get_paginated("/installation/repositories") + try: + for i, r in enumerate(gh_repos): + r_org = r["owner"]["login"] + r_repo = r["name"] + # is it the repo we're looking for? + if r_org == org and r_repo == repo: + self._set_permissions(org, repo, permissions) + # we found it, stop casual caching + break + if i < to_cache_casually: + # we're not looking for this repo, but + # while we're here, we might as well cache it + self._set_permissions( + r_org, r_repo, permissions, casual=True + ) + + except requests.exceptions.RequestException as e: + msg = ( + f"Failed to get Github App {self.name} installation {self.id} " + f"repositories: {e}" + ) + _logger.warning(msg) + raise Unauthorized(msg) from None + + def _authorize(self, ctx: CallContext) -> None: + org = ctx.org + # reuse eventual GitHub App installation data from the authentication + if self._orig_installation_data: + if self._orig_org != org: + raise RuntimeError( + f"Initial authorization org mismatch: " + f"{org} != {self._orig_org}" ) - _logger.warning(msg) - raise Unauthorized(msg) from None + gh_installation = self._orig_installation_data + self._orig_installation_data = None + # or get new in case the authorization expired + else: + gh_installation = self._get_installation(ctx, self.id) - gh_permission = repo_data.get("permission") - _logger.debug( - f"User {user.id} has '{gh_permission}' GitHub permission " - f"for {org_repo}" + if not (gh_permissions := gh_installation.get("permissions")): + msg = ( + f"GitHub App {self.name} installation {self.id} " + f"has no permissions in {org}." ) - permissions = set() - if gh_permission in ("admin", "write"): - permissions = Permission.all() - elif gh_permission == "read": - permissions = {Permission.READ, Permission.READ_META} - perm_list = self._perm_list(permissions) - ttl = user.cache_ttl(permissions) - _logger.debug( - f"Authorizing {user.id} (for {ttl}s) for " - f"{org_repo}: {perm_list}" + _logger.warning(msg) + raise Unauthorized(msg) + + if not (contents_permission := gh_permissions.get("contents")): + msg = ( + f"GitHub App {self.name} installation {self.id} " + f"has no 'contents' permissions in {org}." + ) + _logger.warning(msg) + raise Unauthorized(msg) + + if contents_permission == "write": + permissions = Permission.all() + elif contents_permission == "read": + permissions = {Permission.READ_META, Permission.READ} + else: + msg = ( + f"GitHub App {self.name} installation {self.id} has no useful " + f"'contents' permissions in {org} ({contents_permission})." ) - user.authorize(org, repo, permissions) + _logger.warning(msg) + raise Unauthorized(msg) + + if gh_installation["repository_selection"] == "all": + # this app controls all repositories in the org + # no need to check particular repos, set a generic org permission + self._set_permissions(org, None, permissions) + else: + # there are selected repositories, we must process them + self._set_permissions_for_repositories(ctx, permissions) + + def _permissions( + self, org: str, repo: str | None, *, authoritative: bool = False + ) -> set[Permission] | None: + # when the app can access all org repos, don't check the per-repo cache + org_permissions = super()._permissions( + org, None, authoritative=authoritative + ) + return org_permissions or super()._permissions( + org, repo, authoritative=authoritative + ) + + +class GithubAuthenticator: + """Main class performing GitHub "proxy" authentication/authorization.""" + + def __init__(self, cfg: Config) -> None: + self._cfg = cfg + # github identities per token + self._token_cache: MutableMapping[ + Any, GithubIdentity + ] = cachetools.LRUCache(maxsize=cfg.cache.token_max_size) + self._cache_lock = RLock() + self._cache_config = cfg.cache + + @cachedmethod_threadsafe( + attrgetter("_token_cache"), + lambda self, ctx: cachetools.keys.hashkey(ctx.token), + attrgetter("_cache_lock"), + ) + def _authenticate(self, ctx: CallContext) -> GithubIdentity: + if ctx.token.startswith("ghs_"): + identity = GithubAppIdentity.authenticate(ctx) + else: + identity = GithubUserIdentity.authenticate(ctx) + return identity def __call__(self, request: flask.Request) -> Identity | None: _logger.debug( f"Handling auth request from pid: {os.getpid()}. " f"tid: {threading.get_native_id()}" ) - with requests.Session() as session: - session.headers.update(self._api_headers) - ctx = self.CallContext(request, session) - user: GithubIdentity = self._authenticate(ctx) - _logger.info(f"Authenticated the user as {user}") - self._authorize(ctx, user) - return user + with CallContext(self._cfg, request) as ctx: + identity: GithubIdentity = self._authenticate(ctx) + identity.authorize(ctx) + return identity @property def api_url(self) -> str: - return self._api_url + return self._cfg.api_url def factory(**options: Any) -> GithubAuthenticator: diff --git a/pyproject.toml b/pyproject.toml index 6532e7b..1854a63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ keywords = [ # https://pypi.org/classifiers/ classifiers = [ "Development Status :: 5 - Production/Stable", - "License :: OSI Approved:: MIT License", + "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", @@ -38,6 +38,9 @@ email="hello@rufuspollock.com" name="Adam Thornton" email="athornton@lsst.org" +[[project.authors]] +name="Vit Zikmund" + [project.urls] Homepage = "https://giftless.datopian.com" Source = "https://github.com/datopian/giftless" diff --git a/requirements/dev.in b/requirements/dev.in index 99cea93..f8f9315 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -1,5 +1,7 @@ -c main.txt +uv + pip-tools tox flake8 @@ -28,4 +30,6 @@ furo sphinx sphinx-autodoc-typehints +# Internal tooling +scriv diff --git a/requirements/dev.txt b/requirements/dev.txt index 2dd422f..7fb7f99 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,13 +1,11 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --no-emit-index-url --output-file=requirements/dev.txt requirements/dev.in -# +# This file was autogenerated by uv via the following command: +# uv pip compile --no-emit-index-url -o requirements/dev.txt requirements/dev.in alabaster==0.7.16 # via sphinx attrs==23.2.0 - # via pytest-mypy + # via + # pytest-mypy + # scriv babel==2.14.0 # via sphinx beautifulsoup4==4.12.3 @@ -38,15 +36,17 @@ charset-normalizer==3.3.2 click==8.1.7 # via # -c requirements/main.txt + # click-log # pip-tools + # scriv +click-log==0.4.0 + # via scriv colorama==0.4.6 # via tox commonmark==0.9.1 # via recommonmark -coverage[toml]==7.4.3 - # via - # coverage - # pytest-cov +coverage==7.4.3 + # via pytest-cov distlib==0.3.8 # via virtualenv docutils==0.20.1 @@ -86,13 +86,18 @@ isort==5.13.2 jinja2==3.1.3 # via # -c requirements/main.txt + # scriv # sphinx +markdown-it-py==3.0.0 + # via scriv markupsafe==2.1.5 # via # -c requirements/main.txt # jinja2 mccabe==0.7.0 # via flake8 +mdurl==0.1.2 + # via markdown-it-py multidict==6.0.5 # via yarl mypy==1.9.0 @@ -107,6 +112,8 @@ packaging==24.0 # pytest # sphinx # tox +pip==24.3.1 + # via pip-tools pip-tools==7.4.1 # via -r requirements/dev.in platformdirs==4.2.0 @@ -171,6 +178,7 @@ requests==2.31.0 # via # -c requirements/main.txt # responses + # scriv # sphinx responses==0.25.0 # via -r requirements/dev.in @@ -178,6 +186,10 @@ rsa==4.9 # via # -c requirements/main.txt # google-auth +scriv==1.5.1 + # via -r requirements/dev.in +setuptools==75.6.0 + # via pip-tools snowballstemmer==2.2.0 # via sphinx soupsieve==2.5 @@ -228,7 +240,6 @@ types-s3transfer==0.10.0 typing-extensions==4.10.0 # via # -c requirements/main.txt - # boto3-stubs # mypy urllib3==2.0.7 # via @@ -236,6 +247,8 @@ urllib3==2.0.7 # requests # responses # types-requests +uv==0.5.8 + # via -r requirements/dev.in vcrpy==6.0.1 # via pytest-vcr virtualenv==20.25.1 @@ -246,7 +259,3 @@ wrapt==1.16.0 # via vcrpy yarl==1.9.4 # via vcrpy - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/requirements/main.txt b/requirements/main.txt index 9afde47..490e194 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1,24 +1,20 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --no-emit-index-url --output-file=requirements/main.txt requirements/main.in -# +# This file was autogenerated by uv via the following command: +# uv pip compile --no-emit-index-url -o requirements/main.txt requirements/main.in azure-core==1.30.1 # via azure-storage-blob azure-storage-blob==12.19.1 - # via -r main.in + # via -r requirements/main.in blinker==1.7.0 # via flask boto3==1.34.59 - # via -r main.in + # via -r requirements/main.in botocore==1.34.59 # via # boto3 # s3transfer cachetools==5.3.3 # via - # -r main.in + # -r requirements/main.in # google-auth certifi==2024.2.2 # via requests @@ -30,19 +26,19 @@ click==8.1.7 # via flask cryptography==42.0.5 # via - # -r main.in + # -r requirements/main.in # azure-storage-blob figcan==0.0.4 - # via -r main.in + # via -r requirements/main.in flask==2.3.3 # via - # -r main.in + # -r requirements/main.in # flask-classful # flask-marshmallow flask-classful==0.16.0 - # via -r main.in + # via -r requirements/main.in flask-marshmallow==0.15.0 - # via -r main.in + # via -r requirements/main.in google-api-core==2.17.1 # via # google-cloud-core @@ -55,7 +51,7 @@ google-auth==2.28.2 google-cloud-core==2.4.1 # via google-cloud-storage google-cloud-storage==2.15.0 - # via -r main.in + # via -r requirements/main.in google-crc32c==1.5.0 # via # google-cloud-storage @@ -66,8 +62,8 @@ googleapis-common-protos==1.63.0 # via google-api-core idna==3.6 # via requests -importlib-metadata==7.0.2 ; python_version < "3.13" - # via -r main.in +importlib-metadata==7.0.2 + # via -r requirements/main.in isodate==0.6.1 # via azure-storage-blob itsdangerous==2.1.2 @@ -104,15 +100,15 @@ pyasn1-modules==0.3.0 pycparser==2.21 # via cffi pyjwt==2.8.0 - # via -r main.in + # via -r requirements/main.in python-dateutil==2.9.0.post0 # via - # -r main.in + # -r requirements/main.in # botocore python-dotenv==1.0.1 - # via -r main.in + # via -r requirements/main.in pyyaml==6.0.1 - # via -r main.in + # via -r requirements/main.in requests==2.31.0 # via # azure-core @@ -129,7 +125,7 @@ six==1.16.0 # python-dateutil typing-extensions==4.10.0 # via - # -r main.in + # -r requirements/main.in # azure-core # azure-storage-blob urllib3==2.0.7 @@ -137,10 +133,10 @@ urllib3==2.0.7 # botocore # requests webargs==8.4.0 - # via -r main.in + # via -r requirements/main.in werkzeug==3.0.3 # via - # -r main.in + # -r requirements/main.in # flask zipp==3.17.0 # via importlib-metadata diff --git a/tests/auth/test_github.py b/tests/auth/test_github.py index 1409284..7d4c443 100644 --- a/tests/auth/test_github.py +++ b/tests/auth/test_github.py @@ -2,6 +2,7 @@ import base64 from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor, as_completed +from copy import deepcopy from random import shuffle from time import sleep from typing import Any, cast @@ -9,11 +10,13 @@ import cachetools.keys import flask import pytest +import requests import responses from marshmallow.exceptions import ValidationError import giftless.auth.github as gh from giftless.auth import Unauthorized +from giftless.auth.github import GithubAppIdentity from giftless.auth.identity import Identity, Permission @@ -173,14 +176,25 @@ def test_config_schema_empty_cache() -> None: _config = gh.Config.from_dict(options) +def test_config_schema_api_timeout() -> None: + with pytest.raises(ValidationError): + _config = gh.Config.from_dict({"api_timeout": "invalid"}) + cfg = gh.Config.from_dict({"api_timeout": 1}) + assert cfg.api_timeout == 1.0 + cfg = gh.Config.from_dict({"api_timeout": [1, 2]}) + assert cfg.api_timeout == (1.0, 2.0) + with pytest.raises(ValidationError): + _config = gh.Config.from_dict({"api_timeout": [1, "invalid"]}) + + DEFAULT_CONFIG = gh.Config.from_dict({}) -DEFAULT_TOKEN_DICT = { +DEFAULT_USER_DICT = { "login": "kingofthebritons", - "id": "12345678", + "id": "125678", "name": "arthur", "email": "arthur@camelot.gov.uk", } -DEFAULT_USER_ARGS = tuple(DEFAULT_TOKEN_DICT.values()) +DEFAULT_USER_ARGS = tuple(DEFAULT_USER_DICT.values()) ZERO_CACHE_CONFIG = gh.CacheConfig( token_max_size=0, auth_max_size=0, @@ -191,15 +205,69 @@ def test_config_schema_empty_cache() -> None: ORG = "my-org" REPO = "my-repo" +DEFAULT_ORG_ACCOUNT = { + "login": ORG, + "id": 12345678, +} +DEFAULT_SEL_ID = 123 +DEFAULT_SEL_CLIENT_ID = "Iv1.4f5cb2a91609a823" +DEFAULT_SEL_APP_ID = 123456 +DEFAULT_SEL_APP_SLUG = "app-with-selected-repos" +DEFAULT_ALL_ID = 456 +DEFAULT_ALL_CLIENT_ID = "Iv23liEtURKGAMtEbGUy" +DEFAULT_ALL_APP_ID = 456123 +DEFAULT_ALL_APP_SLUG = "app-with-all-repos" +DEFAULT_APP_TOKEN = "ghs_tCnvkxzE2v7DgEE45fCGnMMbFLNO8T19EVAH" +DEFAULT_ORG_INSTALLATIONS = { + "total_count": 2, + "installations": [ + { + "id": DEFAULT_SEL_ID, + "client_id": DEFAULT_SEL_CLIENT_ID, + "account": DEFAULT_ORG_ACCOUNT, + "repository_selection": "selected", + "app_id": DEFAULT_SEL_APP_ID, + "app_slug": DEFAULT_SEL_APP_SLUG, + "target_id": DEFAULT_ORG_ACCOUNT["id"], + "target_type": "Organization", + "permissions": {"contents": "read", "metadata": "read"}, + }, + { + "id": DEFAULT_ALL_ID, + "client_id": DEFAULT_ALL_CLIENT_ID, + "account": DEFAULT_ORG_ACCOUNT, + "repository_selection": "all", + "app_id": DEFAULT_ALL_APP_ID, + "app_slug": DEFAULT_ALL_APP_SLUG, + "target_id": DEFAULT_ORG_ACCOUNT["id"], + "target_type": "Organization", + "permissions": { + "organization_administration": "read", + "contents": "read", + "metadata": "read", + }, + }, + ], +} -def test_github_identity_core() -> None: +DEFAULT_INSTALLATION_REPO = { + "id": 123456789, + "name": REPO, + "full_name": f"{ORG}/{REPO}", + "owner": DEFAULT_ORG_ACCOUNT, +} + + +def test_github_user_identity_core() -> None: # use some value to get filtered out - token_dict = DEFAULT_TOKEN_DICT | {"other_field": "other_value"} + user_data = DEFAULT_USER_DICT | {"other_field": "other_value"} cache_cfg = DEFAULT_CONFIG.cache - core_identity = gh._CoreGithubIdentity.from_token(token_dict) - user = gh.GithubIdentity(core_identity, token_dict, cache_cfg) + core_identity = gh.GithubUserIdentity.CoreIdentity.from_user_data( + user_data + ) + user = gh.GithubUserIdentity(core_identity, user_data, cache_cfg) assert (user.id, user.github_id, user.name, user.email) == tuple( - DEFAULT_TOKEN_DICT.values() + DEFAULT_USER_DICT.values() ) assert user.cache_ttl({Permission.WRITE}) == cache_cfg.auth_write_ttl @@ -210,13 +278,15 @@ def test_github_identity_core() -> None: def test_github_identity_authorization_cache() -> None: - core_identity = gh._CoreGithubIdentity.from_token(DEFAULT_TOKEN_DICT) - user = gh.GithubIdentity( - core_identity, DEFAULT_TOKEN_DICT, DEFAULT_CONFIG.cache + core_identity = gh.GithubUserIdentity.CoreIdentity.from_user_data( + DEFAULT_USER_DICT + ) + user = gh.GithubUserIdentity( + core_identity, DEFAULT_USER_DICT, DEFAULT_CONFIG.cache ) assert not user.is_authorized(ORG, REPO, Permission.READ_META) - user.authorize(ORG, REPO, {Permission.READ_META, Permission.READ}) - assert user.permissions(ORG, REPO) == { + user._set_permissions(ORG, REPO, {Permission.READ_META, Permission.READ}) + assert user._permissions(ORG, REPO) == { Permission.READ_META, Permission.READ, } @@ -226,13 +296,15 @@ def test_github_identity_authorization_cache() -> None: def test_github_identity_authorization_proxy_cache_only() -> None: - core_identity = gh._CoreGithubIdentity.from_token(DEFAULT_TOKEN_DICT) - user = gh.GithubIdentity( - core_identity, DEFAULT_TOKEN_DICT, ZERO_CACHE_CONFIG + core_identity = gh.GithubUserIdentity.CoreIdentity.from_user_data( + DEFAULT_USER_DICT + ) + user = gh.GithubUserIdentity( + core_identity, DEFAULT_USER_DICT, ZERO_CACHE_CONFIG ) org, repo, repo2 = ORG, REPO, "repo2" - user.authorize(org, repo, Permission.all()) - user.authorize(org, repo2, Permission.all()) + user._set_permissions(org, repo, Permission.all()) + user._set_permissions(org, repo2, Permission.all()) assert user.is_authorized(org, repo, Permission.READ_META) # without cache, the authorization expires after 1st is_authorized assert not user.is_authorized(org, repo, Permission.READ_META) @@ -240,30 +312,39 @@ def test_github_identity_authorization_proxy_cache_only() -> None: assert not user.is_authorized(org, repo2, Permission.READ_META) -def auth_request( +def auth_request_context( app: flask.Flask, - auth: gh.GithubAuthenticator, org: str = ORG, repo: str = REPO, req_auth_header: str | None = "", + user: str = "token", token: str = "dummy-github-token", -) -> Identity | None: +) -> flask.ctx.RequestContext: if req_auth_header is None: headers = None elif req_auth_header == "": # default - token basic_auth = base64.b64encode( - b":".join([b"token", token.encode()]) + b":".join([user.encode(), token.encode()]) ).decode() headers = {"Authorization": f"Basic {basic_auth}"} else: headers = {"Authorization": req_auth_header} - with app.test_request_context( + return app.test_request_context( f"/{org}/{repo}.git/info/lfs/objects/batch", method="POST", headers=headers, - ): + ) + + +def auth_request( + app: flask.Flask, + auth: gh.GithubAuthenticator, + *args: Any, + **kwargs: Any, +) -> Identity | None: + with auth_request_context(app, *args, **kwargs): return auth(flask.request) @@ -278,7 +359,7 @@ def mock_perm( auth: gh.GithubAuthenticator, org: str = ORG, repo: str = REPO, - login: str = DEFAULT_TOKEN_DICT["login"], + login: str = DEFAULT_USER_DICT["login"], *args: Any, **kwargs: Any, ) -> responses.BaseResponse: @@ -290,6 +371,205 @@ def mock_perm( return cast(responses.BaseResponse, ret) +def mock_org_installations( + api_url: str, + org: str = ORG, + *args: Any, + **kwargs: Any, +) -> responses.BaseResponse: + ret = responses.get( + f"{api_url}/orgs/{org}/installations", + *args, + **kwargs, + ) + return cast(responses.BaseResponse, ret) + + +def installation_repo_data( + repos: list[dict[str, Any]] | None = None, +) -> dict[str, Any]: + if repos is None: + repos = [DEFAULT_INSTALLATION_REPO] + return { + "total_count": len(repos), + "repository_selection": "selected", + "repositories": repos, + } + + +def mock_installation_repos( + api_url: str, + *args: Any, + **kwargs: Any, +) -> responses.BaseResponse: + ret = responses.get( + f"{api_url}/installation/repositories", + *args, + **kwargs, + ) + return cast(responses.BaseResponse, ret) + + +def test_call_context_restrict_to_org_only(app: flask.Flask) -> None: + cfg = gh.Config.from_dict({"restrict_to": {ORG: None}}) + with auth_request_context(app): + ctx = gh.CallContext(cfg, flask.request) + assert ctx is not None + with auth_request_context(app, org="bogus"): + with pytest.raises(Unauthorized): + gh.CallContext(cfg, flask.request) + + +def test_call_context_restrict_to_org_and_repo(app: flask.Flask) -> None: + cfg = gh.Config.from_dict({"restrict_to": {ORG: [REPO]}}) + with auth_request_context(app): + ctx = gh.CallContext(cfg, flask.request) + assert ctx is not None + with auth_request_context(app, repo="bogus"): + with pytest.raises(Unauthorized): + gh.CallContext(cfg, flask.request) + + +def test_call_context_api_get_no_session(app: flask.Flask) -> None: + with auth_request_context(app): + ctx = gh.CallContext(DEFAULT_CONFIG, flask.request) + with pytest.raises(RuntimeError): + ctx.api_get("/dummy") + + +def test_call_context_api_get_paginated_no_session(app: flask.Flask) -> None: + with auth_request_context(app): + ctx = gh.CallContext(DEFAULT_CONFIG, flask.request) + with pytest.raises(RuntimeError): + next(ctx.api_get_paginated("/dummy")) + + +@responses.activate +def test_call_context_api_get_paginated_per_page_min_max( + app: flask.Flask, +) -> None: + uri = "/items" + response_url = f"{DEFAULT_CONFIG.api_url}{uri}" + response_data = {"items": [{"item": 1}]} + resp_min = responses.get( + response_url, + match=[ + responses.matchers.query_param_matcher( + # desired params in the request + {"per_page": 1}, + strict_match=False, + ) + ], + json=response_data, + ) + resp_max = responses.get( + response_url, + match=[ + responses.matchers.query_param_matcher( + {"per_page": 100}, strict_match=False + ) + ], + json=response_data, + ) + with auth_request_context(app): + with gh.CallContext(DEFAULT_CONFIG, flask.request) as ctx: + next(ctx.api_get_paginated(uri, per_page=0)) + next(ctx.api_get_paginated(uri, per_page=101)) + assert resp_min.call_count == 1 + assert resp_max.call_count == 1 + + +@responses.activate +def test_call_context_api_get_paginated_list_name(app: flask.Flask) -> None: + one_item = {"item": 1} + items = {"items": [one_item]} + uri_matching = "/items" + url_matching = f"{DEFAULT_CONFIG.api_url}{uri_matching}" + responses.get(url_matching, json=items) + uri_not_matching = "/nomatch" + url_not_matching = f"{DEFAULT_CONFIG.api_url}{uri_not_matching}" + responses.get(url_not_matching, json=items) + uri_explicit_matching = "/explicit-match" + url_explicit_matching = f"{DEFAULT_CONFIG.api_url}{uri_explicit_matching}" + responses.get(url_explicit_matching, json=items) + + with auth_request_context(app): + with gh.CallContext(DEFAULT_CONFIG, flask.request) as ctx: + # verify getting one item works + paginated_gen = ctx.api_get_paginated(uri_matching, per_page=1) + out_item = next(paginated_gen) + assert out_item == one_item + # verify the iteration ends properly + with pytest.raises(StopIteration): + next(paginated_gen) + # verify the iteration without match ends immediately + with pytest.raises(StopIteration): + next(ctx.api_get_paginated(uri_not_matching, per_page=1)) + # verify the explicitly matching entry works again + out_item = next( + ctx.api_get_paginated( + uri_explicit_matching, per_page=1, list_name="items" + ) + ) + assert out_item == one_item + + +@responses.activate +def test_call_context_api_get_paginated_link(app: flask.Flask) -> None: + one_item = {"item": 1} + other_item = {"item": 2} + uri = "/items" + url = f"{DEFAULT_CONFIG.api_url}{uri}" + # return first page with a link to the second + resp_1 = responses.get( + url, + match=[ + responses.matchers.query_param_matcher( + {"page": 1}, strict_match=False + ) + ], + json={"items": [one_item]}, + headers={"link": f'<{url}?page=2>; rel="next"'}, + ) + # return second page with a link to the third (which is bad) + resp_2 = responses.get( + url, + match=[ + responses.matchers.query_param_matcher( + {"page": 2}, strict_match=False + ) + ], + json={"items": [other_item]}, + headers={"link": f'<{url}?page=3>; rel="next"'}, + ) + resp_3 = responses.get( + url, + match=[ + responses.matchers.query_param_matcher( + {"page": 3}, strict_match=False + ) + ], + json={"error": "not found"}, + status=404, + ) + + with auth_request_context(app): + with gh.CallContext(DEFAULT_CONFIG, flask.request) as ctx: + # verify reading the first page works + paginated_gen = ctx.api_get_paginated(uri, per_page=1) + out_item = next(paginated_gen) + assert out_item == one_item + # verify reading the second page works + out_item = next(paginated_gen) + assert out_item == other_item + # verify the bad iteration + with pytest.raises(requests.exceptions.RequestException): + next(paginated_gen) + assert resp_1.call_count == 1 + assert resp_2.call_count == 1 + assert resp_3.call_count == 1 + + def test_github_auth_request_missing_auth(app: flask.Flask) -> None: auth = gh.factory() with pytest.raises(Unauthorized): @@ -313,7 +593,7 @@ def test_github_auth_request_bad_user(app: flask.Flask) -> None: @responses.activate def test_github_auth_request_bad_perm(app: flask.Flask) -> None: auth = gh.factory(api_version=None) - mock_user(auth, json=DEFAULT_TOKEN_DICT) + mock_user(auth, json=DEFAULT_USER_DICT) mock_perm(auth, json={"error": "Forbidden"}, status=403) with pytest.raises(Unauthorized): @@ -323,7 +603,7 @@ def test_github_auth_request_bad_perm(app: flask.Flask) -> None: @responses.activate def test_github_auth_request_admin(app: flask.Flask) -> None: auth = gh.factory() - mock_user(auth, json=DEFAULT_TOKEN_DICT) + mock_user(auth, json=DEFAULT_USER_DICT) mock_perm(auth, json={"permission": "admin"}) identity = auth_request(app, auth) @@ -334,7 +614,7 @@ def test_github_auth_request_admin(app: flask.Flask) -> None: @responses.activate def test_github_auth_request_read(app: flask.Flask) -> None: auth = gh.factory() - mock_user(auth, json=DEFAULT_TOKEN_DICT) + mock_user(auth, json=DEFAULT_USER_DICT) mock_perm(auth, json={"permission": "read"}) identity = auth_request(app, auth) @@ -346,7 +626,7 @@ def test_github_auth_request_read(app: flask.Flask) -> None: @responses.activate def test_github_auth_request_none(app: flask.Flask) -> None: auth = gh.factory() - mock_user(auth, json=DEFAULT_TOKEN_DICT) + mock_user(auth, json=DEFAULT_USER_DICT) mock_perm(auth, json={"permission": "none"}) identity = auth_request(app, auth) @@ -358,7 +638,7 @@ def test_github_auth_request_none(app: flask.Flask) -> None: @responses.activate def test_github_auth_request_cached(app: flask.Flask) -> None: auth = gh.factory() - user_resp = mock_user(auth, json=DEFAULT_TOKEN_DICT) + user_resp = mock_user(auth, json=DEFAULT_USER_DICT) perm_resp = mock_perm(auth, json={"permission": "admin"}) auth_request(app, auth) @@ -373,17 +653,20 @@ def test_github_auth_request_cached(app: flask.Flask) -> None: @responses.activate def test_github_auth_request_cache_no_leak(app: flask.Flask) -> None: auth = gh.factory(cache={"token_max_size": 2}) - user_resp = mock_user(auth, json=DEFAULT_TOKEN_DICT) + user_resp = mock_user(auth, json=DEFAULT_USER_DICT) perm_resp = mock_perm(auth, json={"permission": "admin"}) # authenticate 1st token, check it got cached properly token1 = "token-1" token1_cache_key = cachetools.keys.hashkey(token1) identity1 = auth_request(app, auth, token=token1) + assert isinstance(identity1, gh.GithubUserIdentity) assert len(auth._token_cache) == 1 assert token1_cache_key in auth._token_cache - assert len(auth._cached_users) == 1 - assert any(i is identity1 for i in auth._cached_users.values()) + assert len(gh.GithubUserIdentity._cached_users) == 1 + assert any( + i is identity1 for i in gh.GithubUserIdentity._cached_users.values() + ) # see both the authentication and authorization requests took place assert user_resp.call_count == 1 assert perm_resp.call_count == 1 @@ -396,8 +679,10 @@ def test_github_auth_request_cache_no_leak(app: flask.Flask) -> None: identity2 = auth_request(app, auth, token=token2) assert len(auth._token_cache) == 2 assert token2_cache_key in auth._token_cache - assert len(auth._cached_users) == 1 - assert any(i is identity2 for i in auth._cached_users.values()) + assert len(gh.GithubUserIdentity._cached_users) == 1 + assert any( + i is identity2 for i in gh.GithubUserIdentity._cached_users.values() + ) # see only the authentication request took place assert user_resp.call_count == 2 assert perm_resp.call_count == 1 @@ -410,8 +695,10 @@ def test_github_auth_request_cache_no_leak(app: flask.Flask) -> None: assert len(auth._token_cache) == 2 assert token3_cache_key in auth._token_cache assert token1_cache_key not in auth._token_cache - assert len(auth._cached_users) == 1 - assert any(i is identity3 for i in auth._cached_users.values()) + assert len(gh.GithubUserIdentity._cached_users) == 1 + assert any( + i is identity3 for i in gh.GithubUserIdentity._cached_users.values() + ) # see only the authentication request took place assert user_resp.call_count == 3 assert perm_resp.call_count == 1 @@ -420,16 +707,251 @@ def test_github_auth_request_cache_no_leak(app: flask.Flask) -> None: # evict 2nd cached token del auth._token_cache[token2_cache_key] assert len(auth._token_cache) == 1 - assert len(auth._cached_users) == 1 + assert len(gh.GithubUserIdentity._cached_users) == 1 # evict 3rd del auth._token_cache[token3_cache_key] assert len(auth._token_cache) == 0 - assert len(auth._cached_users) == 0 + assert len(gh.GithubUserIdentity._cached_users) == 0 # try once more with 1st token auth_request(app, auth, token=token1) assert len(auth._token_cache) == 1 - assert len(auth._cached_users) == 1 + assert len(gh.GithubUserIdentity._cached_users) == 1 # see both the authentication and authorization requests took place assert user_resp.call_count == 4 assert perm_resp.call_count == 2 + + +@responses.activate +def test_github_auth_request_app_no_user(app: flask.Flask) -> None: + auth = gh.factory() + mock_org_installations(auth.api_url, json=DEFAULT_ORG_INSTALLATIONS) + + with pytest.raises(Unauthorized): + auth_request(app, auth, user="", token=DEFAULT_APP_TOKEN) + + +@responses.activate +def test_github_auth_request_app_bad_user(app: flask.Flask) -> None: + auth = gh.factory() + mock_org_installations(auth.api_url, json=DEFAULT_ORG_INSTALLATIONS) + + with pytest.raises(Unauthorized): + auth_request(app, auth, token=DEFAULT_APP_TOKEN) + + +@responses.activate +def test_github_auth_request_app_all_repos(app: flask.Flask) -> None: + auth = gh.factory(cache={"token_max_size": 0}) + resp = mock_org_installations(auth.api_url, json=DEFAULT_ORG_INSTALLATIONS) + + # match for installation id + identity_0 = auth_request( + app, auth, user=str(DEFAULT_ALL_ID), token=DEFAULT_APP_TOKEN + ) + assert identity_0 is not None + assert identity_0.is_authorized(ORG, REPO, Permission.READ) + assert not identity_0.is_authorized(ORG, REPO, Permission.WRITE) + # match for app_id + identity = auth_request( + app, auth, user=str(DEFAULT_ALL_APP_ID), token=DEFAULT_APP_TOKEN + ) + assert identity == identity_0 + # match for client_id + identity = auth_request( + app, auth, user=DEFAULT_ALL_CLIENT_ID, token=DEFAULT_APP_TOKEN + ) + assert identity == identity_0 + # match for client_id + identity = auth_request( + app, auth, user=DEFAULT_ALL_APP_SLUG, token=DEFAULT_APP_TOKEN + ) + assert identity == identity_0 + + assert resp.call_count == 4 + + +@responses.activate +def test_github_auth_request_app_no_org_access(app: flask.Flask) -> None: + auth = gh.factory(cache={"token_max_size": 0}) + resp = mock_org_installations( + auth.api_url, json={"error": "Insufficient access rights."}, status=403 + ) + with pytest.raises(Unauthorized): + auth_request(app, auth, token=DEFAULT_APP_TOKEN) + assert resp.call_count == 1 + + +@responses.activate +def test_github_auth_request_app_reauth(app: flask.Flask) -> None: + auth = gh.factory(cache={"auth_max_size": 0}) + resp = mock_org_installations(auth.api_url, json=DEFAULT_ORG_INSTALLATIONS) + identity = auth_request( + app, auth, user=str(DEFAULT_ALL_ID), token=DEFAULT_APP_TOKEN + ) + assert identity is not None + assert identity.is_authorized(ORG, REPO, Permission.READ) + # the authorization shouldn't be cached + identity = auth_request(app, auth, token=DEFAULT_APP_TOKEN) + assert identity is not None + assert identity.is_authorized(ORG, REPO, Permission.READ) + + assert resp.call_count == 2 + + +@responses.activate +def test_github_auth_request_app_selected_repos(app: flask.Flask) -> None: + auth = gh.factory(cache={"token_max_size": 0}) + resp_i = mock_org_installations( + auth.api_url, json=DEFAULT_ORG_INSTALLATIONS + ) + resp_r = mock_installation_repos( + auth.api_url, json=installation_repo_data() + ) + + identity = auth_request( + app, auth, user=str(DEFAULT_SEL_ID), token=DEFAULT_APP_TOKEN + ) + assert identity is not None + assert identity.is_authorized(ORG, REPO, Permission.READ) + assert resp_i.call_count == 1 + assert resp_r.call_count == 1 + + +@responses.activate +def test_github_auth_request_app_selected_repos_no_match( + app: flask.Flask, +) -> None: + auth = gh.factory(cache={"auth_max_size": 2}) # one gets casually cached + no_match_repo_1 = DEFAULT_INSTALLATION_REPO.copy() + no_match_repo_1_name = "no-match-1" + no_match_repo_1["name"] = no_match_repo_1_name + no_match_repo_2 = DEFAULT_INSTALLATION_REPO.copy() + no_match_repo_2["name"] = "no-match-2" + no_match_repos = [no_match_repo_1, no_match_repo_2] + mock_org_installations(auth.api_url, json=DEFAULT_ORG_INSTALLATIONS) + resp_r = mock_installation_repos( + auth.api_url, json=installation_repo_data(no_match_repos) + ) + + identity = auth_request( + app, auth, user=str(DEFAULT_SEL_ID), token=DEFAULT_APP_TOKEN + ) + assert identity is not None + assert resp_r.call_count == 1 + assert not identity.is_authorized(ORG, REPO, Permission.READ) + assert identity.is_authorized(ORG, no_match_repo_1_name, Permission.READ) + + +@responses.activate +def test_github_auth_request_app_selected_repos_no_access( + app: flask.Flask, +) -> None: + auth = gh.factory() + mock_org_installations(auth.api_url, json=DEFAULT_ORG_INSTALLATIONS) + resp_r = mock_installation_repos( + auth.api_url, json={"error": "Insufficient access rights."}, status=403 + ) + + with pytest.raises(Unauthorized): + auth_request( + app, auth, user=str(DEFAULT_SEL_ID), token=DEFAULT_APP_TOKEN + ) + + assert resp_r.call_count == 1 + + +@responses.activate +def test_github_auth_request_app_selected_repos_bad_authorize( + app: flask.Flask, +) -> None: + mock_org_installations( + DEFAULT_CONFIG.api_url, json=DEFAULT_ORG_INSTALLATIONS + ) + with auth_request_context( + app, user=str(DEFAULT_SEL_ID), token=DEFAULT_APP_TOKEN + ): + with gh.CallContext(DEFAULT_CONFIG, flask.request) as ctx: + identity = GithubAppIdentity.authenticate(ctx) + ctx.org = "whoops" + with pytest.raises(RuntimeError): + identity.authorize(ctx) + + +@responses.activate +def test_github_auth_request_app_missing_permissions(app: flask.Flask) -> None: + auth = gh.factory() + no_perm = deepcopy(DEFAULT_ORG_INSTALLATIONS) + inst = next( + _i + for _i in cast(list, no_perm["installations"]) + if _i["id"] == DEFAULT_SEL_ID + ) + del inst["permissions"] + resp_i = mock_org_installations(auth.api_url, json=no_perm) + + with pytest.raises(Unauthorized): + auth_request( + app, auth, user=str(DEFAULT_SEL_ID), token=DEFAULT_APP_TOKEN + ) + assert resp_i.call_count == 1 + + +@responses.activate +def test_github_auth_request_app_missing_permissions_contents( + app: flask.Flask, +) -> None: + auth = gh.factory() + no_perm = deepcopy(DEFAULT_ORG_INSTALLATIONS) + inst = next( + _i + for _i in cast(list, no_perm["installations"]) + if _i["id"] == DEFAULT_SEL_ID + ) + del inst["permissions"]["contents"] + resp_i = mock_org_installations(auth.api_url, json=no_perm) + + with pytest.raises(Unauthorized): + auth_request( + app, auth, user=str(DEFAULT_SEL_ID), token=DEFAULT_APP_TOKEN + ) + assert resp_i.call_count == 1 + + +@responses.activate +def test_github_auth_request_app_write_permissions(app: flask.Flask) -> None: + auth = gh.factory() + no_perm = deepcopy(DEFAULT_ORG_INSTALLATIONS) + inst = next( + _i + for _i in cast(list, no_perm["installations"]) + if _i["id"] == DEFAULT_ALL_ID + ) + inst["permissions"]["contents"] = "write" + resp_i = mock_org_installations(auth.api_url, json=no_perm) + + identity = auth_request( + app, auth, user=str(DEFAULT_ALL_ID), token=DEFAULT_APP_TOKEN + ) + assert identity is not None + assert identity.is_authorized(ORG, REPO, Permission.WRITE) + assert resp_i.call_count == 1 + + +@responses.activate +def test_github_auth_request_app_unknown_permissions(app: flask.Flask) -> None: + auth = gh.factory() + no_perm = deepcopy(DEFAULT_ORG_INSTALLATIONS) + inst = next( + _i + for _i in cast(list, no_perm["installations"]) + if _i["id"] == DEFAULT_ALL_ID + ) + inst["permissions"]["contents"] = "twist" + resp_i = mock_org_installations(auth.api_url, json=no_perm) + + with pytest.raises(Unauthorized): + auth_request( + app, auth, user=str(DEFAULT_ALL_ID), token=DEFAULT_APP_TOKEN + ) + assert resp_i.call_count == 1