diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c86fc78 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [0.15] - 2025-12-31 + +### Added +- Vault authentication improvements with host-restricted token exchange +- Comprehensive tests for Vault authentication behavior +- Enhanced docstrings across all modules for better documentation coverage +- Support for download redirect handling + +### Fixed +- Vault token exchange now restricted to known hosts for improved security +- Clearer authentication error messages +- README instructions now consistent with PyPI release + +### Changed +- Updated CLI usage documentation to reflect current command structure +- Improved error handling in download operations + +### Notes +- Version 0.15 skips 0.13 and 0.14 as requested in issue #35 +- This release updates the PyPI package to align with current repository features diff --git a/README.md b/README.md index e329578..171590c 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,12 @@ Before using the client, install it via pip: python3 -m pip install databusclient ``` +Note: the PyPI release was updated and this repository prepares version `0.15`. If you previously installed `databusclient` via `pip` and observe different CLI behavior, upgrade to the latest release: + +```bash +python3 -m pip install --upgrade databusclient==0.15 +``` + You can then use the client in the command line: ```bash diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md new file mode 100644 index 0000000..b776ab2 --- /dev/null +++ b/RELEASE_NOTES.md @@ -0,0 +1,97 @@ +# Release Notes for databusclient 0.15 + +## Overview +This release addresses issue #35 by providing a new PyPI package (version 0.15) to ensure `pip install databusclient` provides the latest CLI features and bug fixes. + +## Version +**0.15** (skipping 0.13 and 0.14 as requested) + +## What's New + +### Features & Improvements +- **Vault Authentication Enhancement**: Host-restricted token exchange for improved security +- **Better Error Messages**: Clearer authentication error messages for easier debugging +- **Download Redirect Handling**: Improved handling of redirects during file downloads +- **Comprehensive Documentation**: Enhanced docstrings across all modules + +### Bug Fixes +- Fixed Vault token exchange to only work with known hosts +- Improved error handling in download operations +- Aligned README with current CLI behavior + +### Testing +- Added comprehensive tests for Vault authentication +- Improved test coverage overall + +## Installation + +After this release is published to PyPI, users can install or upgrade with: + +```bash +pip install databusclient==0.15 +# or to upgrade +pip install --upgrade databusclient +``` + +## Build Artifacts + +The following distribution files have been created and validated: +- `databusclient-0.15-py3-none-any.whl` (wheel format) +- `databusclient-0.15.tar.gz` (source distribution) + +Both files have passed `twine check` validation. + +## Publishing Instructions + +### Prerequisites +1. PyPI account with maintainer access to the `databusclient` package +2. PyPI API token configured + +### Steps to Publish + +1. **Verify the build artifacts** (already done): + ```bash + poetry build + twine check dist/* + ``` + +2. **Upload to TestPyPI** (recommended first): + ```bash + twine upload --repository testpypi dist/* + ``` + Then test installation: + ```bash + pip install --index-url https://test.pypi.org/simple/ databusclient==0.15 + ``` + +3. **Upload to PyPI**: + ```bash + twine upload dist/* + ``` + +4. **Create a Git tag**: + ```bash + git tag -a v0.15 -m "Release version 0.15" + git push origin v0.15 + ``` + +5. **Create a GitHub Release**: + - Go to GitHub repository → Releases → Draft a new release + - Choose tag `v0.15` + - Release title: `databusclient 0.15` + - Copy content from CHANGELOG.md + - Attach the dist files as release assets + +## Verification + +After publishing, verify the release: +```bash +pip install --upgrade databusclient==0.15 +databusclient --version +databusclient --help +``` + +## Notes +- This release resolves issue #35 +- The PyPI package will now be consistent with the repository's CLI documentation +- Version numbers 0.13 and 0.14 were intentionally skipped as requested diff --git a/databusclient/__init__.py b/databusclient/__init__.py index d15edb6..7b2c625 100644 --- a/databusclient/__init__.py +++ b/databusclient/__init__.py @@ -1,8 +1,22 @@ +"""Top-level package for the databus Python client. + +This module exposes a small set of convenience functions and the CLI +entrypoint so the package can be used as a library or via +``python -m databusclient``. +""" + from databusclient import cli from databusclient.api.deploy import create_dataset, create_distribution, deploy +__version__ = "0.15" __all__ = ["create_dataset", "deploy", "create_distribution"] def run(): + """Start the Click CLI application. + + This function is used by the ``__main__`` module and the package + entrypoint to invoke the command line interface. + """ + cli.app() diff --git a/databusclient/__main__.py b/databusclient/__main__.py index 8fe6fda..3a50f9a 100644 --- a/databusclient/__main__.py +++ b/databusclient/__main__.py @@ -1,3 +1,19 @@ +"""Module used for ``python -m databusclient`` execution. + +Runs the package's CLI application. +""" + from databusclient import cli -cli.app() + +def main(): + """Invoke the CLI application. + + Kept as a named function for easier testing and clarity. + """ + + cli.app() + + +if __name__ == "__main__": + main() diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py index 7107983..edfb95c 100644 --- a/databusclient/api/delete.py +++ b/databusclient/api/delete.py @@ -1,3 +1,10 @@ +"""Helpers for deleting Databus resources via the Databus HTTP API. + +This module provides utilities to delete groups, artifacts and versions on a +Databus instance using authenticated HTTP requests. The class `DeleteQueue` +also allows batching of deletions. +""" + import json from typing import List @@ -16,23 +23,43 @@ class DeleteQueue: """ def __init__(self, databus_key: str): + """Create a DeleteQueue bound to a given Databus API key. + + Args: + databus_key: API key used to authenticate deletion requests. + """ self.databus_key = databus_key self.queue: set[str] = set() def add_uri(self, databusURI: str): + """Add a single Databus URI to the deletion queue. + + The URI will be deleted when `execute()` is called. + """ self.queue.add(databusURI) def add_uris(self, databusURIs: List[str]): + """Add multiple Databus URIs to the deletion queue. + + Args: + databusURIs: Iterable of full Databus URIs. + """ for uri in databusURIs: self.queue.add(uri) def is_empty(self) -> bool: + """Return True if the queue is empty.""" return len(self.queue) == 0 def is_not_empty(self) -> bool: + """Return True if the queue contains any URIs.""" return len(self.queue) > 0 def execute(self): + """Execute all queued deletions. + + Each queued URI will be deleted using `_delete_resource`. + """ _delete_list( list(self.queue), self.databus_key, @@ -41,16 +68,15 @@ def execute(self): def _confirm_delete(databusURI: str) -> str: - """ - Confirm deletion of a Databus resource with the user. + """Confirm deletion of a Databus resource with the user. - Parameters: - - databusURI: The full databus URI of the resource to delete + Args: + databusURI: The full databus URI of the resource to delete. Returns: - - "confirm" if the user confirms deletion - - "skip" if the user chooses to skip deletion - - "cancel" if the user chooses to cancel the entire deletion process + "confirm" if the user confirms deletion. + "skip" if the user chooses to skip deletion. + "cancel" if the user chooses to cancel the entire deletion process. """ print(f"Are you sure you want to delete: {databusURI}?") print( @@ -81,18 +107,17 @@ def _delete_resource( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete a single Databus resource (version, artifact, group). + """Delete a single Databus resource (version, artifact, group). Equivalent to: curl -X DELETE "" -H "accept: */*" -H "X-API-KEY: " - Parameters: - - databusURI: The full databus URI of the resource to delete - - databus_key: Databus API key to authenticate the deletion request - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URI to the queue instead of deleting immediately + Args: + databusURI: The full databus URI of the resource to delete. + databus_key: Databus API key to authenticate the deletion request. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URI to the queue instead of deleting immediately. """ # Confirm the deletion request, skip the request or cancel deletion process @@ -134,15 +159,14 @@ def _delete_list( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete a list of Databus resources. - - Parameters: - - databusURIs: List of full databus URIs of the resources to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URIs to the queue instead of deleting immediately + """Delete a list of Databus resources. + + Args: + databusURIs: List of full databus URIs of the resources to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URIs to the queue instead of deleting immediately. """ for databusURI in databusURIs: _delete_resource( @@ -157,18 +181,17 @@ def _delete_artifact( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete an artifact and all its versions. + """Delete an artifact and all its versions. This function first retrieves all versions of the artifact and then deletes them one by one. Finally, it deletes the artifact itself. - Parameters: - - databusURI: The full databus URI of the artifact to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URI to the queue instead of deleting immediately + Args: + databusURI: The full databus URI of the artifact to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URI to the queue instead of deleting immediately. """ artifact_body = fetch_databus_jsonld(databusURI, databus_key) @@ -204,18 +227,17 @@ def _delete_group( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete a group and all its artifacts and versions. + """Delete a group and all its artifacts and versions. This function first retrieves all artifacts of the group, then deletes each artifact (which in turn deletes its versions). Finally, it deletes the group itself. - Parameters: - - databusURI: The full databus URI of the group to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URI to the queue instead of deleting immediately + Args: + databusURI: The full databus URI of the group to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URI to the queue instead of deleting immediately. """ group_body = fetch_databus_jsonld(databusURI, databus_key) @@ -242,17 +264,16 @@ def _delete_group( def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool): - """ - Delete a dataset from the databus. + """Delete a dataset from the databus. Delete a group, artifact, or version identified by the given databus URI. Will recursively delete all data associated with the dataset. - Parameters: - - databusURIs: List of full databus URIs of the resources to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, will only print what would be deleted without performing actual deletions - - force: If True, skip confirmation prompt and proceed with deletion + Args: + databusURIs: List of full databus URIs of the resources to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, will only print what would be deleted without performing actual deletions. + force: If True, skip confirmation prompt and proceed with deletion. """ queue = DeleteQueue(databus_key) diff --git a/databusclient/api/deploy.py b/databusclient/api/deploy.py index ef8ebf5..23c77ea 100644 --- a/databusclient/api/deploy.py +++ b/databusclient/api/deploy.py @@ -1,3 +1,10 @@ +"""Build and publish Databus datasets (JSON-LD) from provided metadata. + +This module exposes helpers to create distribution strings, compute file +information (sha256 and size), construct dataset JSON-LD payloads and +publish them to a Databus instance using the Databus publish API. +""" + import hashlib import json from enum import Enum @@ -25,6 +32,13 @@ class DeployLogLevel(Enum): def _get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: + """Parse content-variant key/value pairs from a distribution string. + + The CLI supports passing a distribution as ``url|lang=en_type=parsed|...``. + This helper extracts the ``lang``/``type`` style key/value pairs as a + dictionary. + """ + args = distribution_str.split("|") # cv string is ALWAYS at position 1 after the URL @@ -50,6 +64,12 @@ def _get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: def _get_filetype_definition( distribution_str: str, ) -> Tuple[Optional[str], Optional[str]]: + """Extract an explicit file format and compression from a distribution string. + + Returns (file_extension, compression) where each may be ``None`` if the + format should be inferred from the URL path. + """ + file_ext = None compression = None @@ -87,6 +107,12 @@ def _get_filetype_definition( def _get_extensions(distribution_str: str) -> Tuple[str, str, str]: + """Return tuple `(extension_part, format_extension, compression)`. + + ``extension_part`` is the textual extension appended to generated + filenames (e.g. ".ttl.gz"). + """ + extension_part = "" format_extension, compression = _get_filetype_definition(distribution_str) @@ -126,6 +152,11 @@ def _get_extensions(distribution_str: str) -> Tuple[str, str, str]: def _get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int]]: + """Parse an optional ``sha256sum:length`` tuple from a distribution string. + + Returns (sha256sum, content_length) or (None, None) when not provided. + """ + metadata_list = distribution_str.split("|")[1:] # check whether there is the shasum:length tuple separated by : if len(metadata_list) == 0 or ":" not in metadata_list[-1]: @@ -146,6 +177,12 @@ def _get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int] def _load_file_stats(url: str) -> Tuple[str, int]: + """Download the file at ``url`` and compute its SHA-256 and length. + + This is used as a fallback when the caller did not supply checksum/size + information in the CLI or metadata file. + """ + resp = requests.get(url, timeout=30) if resp.status_code >= 400: raise requests.exceptions.RequestException(response=resp) @@ -156,6 +193,11 @@ def _load_file_stats(url: str) -> Tuple[str, int]: def get_file_info(distribution_str: str) -> Tuple[Dict[str, str], str, str, str, int]: + """Return parsed file information for a distribution string. + + Returns a tuple `(cvs, format_extension, compression, sha256sum, size)`. + """ + cvs = _get_content_variants(distribution_str) extension_part, format_extension, compression = _get_extensions(distribution_str) diff --git a/databusclient/api/download.py b/databusclient/api/download.py index ac55faa..f045ce2 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -33,16 +33,15 @@ def _download_file( auth_url=None, client_id=None, ) -> None: - """ - Download a file from the internet with a progress bar using tqdm. - - Parameters: - - url: the URL of the file to download - - localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory. - - vault_token_file: Path to Vault refresh token file - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download a file from the internet with a progress bar using tqdm. + + Args: + url: The URL of the file to download. + localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory. + vault_token_file: Path to Vault refresh token file. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ if localDir is None: _host, account, group, artifact, version, file = ( @@ -192,16 +191,15 @@ def _download_files( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download multiple files from the databus. - - Parameters: - - urls: List of file download URLs - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - vault_token_file: Path to Vault refresh token file - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download multiple files from the databus. + + Args: + urls: List of file download URLs. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + vault_token_file: Path to Vault refresh token file. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ for url in urls: _download_file( @@ -215,15 +213,14 @@ def _download_files( def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> str: - """ - Get SPARQL query of collection members from databus collection URI. + """Get SPARQL query of collection members from databus collection URI. - Parameters: - - uri: The full databus collection URI - - databus_key: Optional Databus API key for authentication on protected resources + Args: + uri: The full databus collection URI. + databus_key: Optional Databus API key for authentication on protected resources. Returns: - SPARQL query string to get download URLs of all files in the collection. + SPARQL query string to get download URLs of all files in the collection. """ headers = {"Accept": "text/sparql"} if databus_key is not None: @@ -235,16 +232,15 @@ def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: - """ - Query a SPARQL endpoint and return results in JSON format. + """Query a SPARQL endpoint and return results in JSON format. - Parameters: - - endpoint_url: the URL of the SPARQL endpoint - - query: the SPARQL query string - - databus_key: Optional API key for authentication + Args: + endpoint_url: The URL of the SPARQL endpoint. + query: The SPARQL query string. + databus_key: Optional API key for authentication. Returns: - - Dictionary containing the query results + Dictionary containing the query results. """ sparql = SPARQLWrapper(endpoint_url) sparql.method = "POST" @@ -259,16 +255,15 @@ def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: def _get_file_download_urls_from_sparql_query( endpoint_url, query, databus_key=None ) -> List[str]: - """ - Execute a SPARQL query to get databus file download URLs. + """Execute a SPARQL query to get databus file download URLs. - Parameters: - - endpoint_url: the URL of the SPARQL endpoint - - query: the SPARQL query string - - databus_key: Optional API key for authentication + Args: + endpoint_url: The URL of the SPARQL endpoint. + query: The SPARQL query string. + databus_key: Optional API key for authentication. Returns: - - List of file download URLs + List of file download URLs. """ result_dict = _query_sparql_endpoint(endpoint_url, query, databus_key=databus_key) @@ -359,17 +354,16 @@ def _download_collection( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download all files in a databus collection. - - Parameters: - - uri: The full databus collection URI - - endpoint: the databus SPARQL endpoint URL - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - vault_token: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download all files in a databus collection. + + Args: + uri: The full databus collection URI. + endpoint: The databus SPARQL endpoint URL. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + vault_token: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ query = _get_sparql_query_of_collection(uri, databus_key=databus_key) file_urls = _get_file_download_urls_from_sparql_query( @@ -393,16 +387,15 @@ def _download_version( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download all files in a databus artifact version. - - Parameters: - - uri: The full databus artifact version URI - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - vault_token_file: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download all files in a databus artifact version. + + Args: + uri: The full databus artifact version URI. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + vault_token_file: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) file_urls = _get_file_download_urls_from_artifact_jsonld(json_str) @@ -425,17 +418,16 @@ def _download_artifact( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download files in a databus artifact. - - Parameters: - - uri: The full databus artifact URI - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - all_versions: If True, download all versions of the artifact; otherwise, only download the latest version - - vault_token_file: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download files in a databus artifact. + + Args: + uri: The full databus artifact URI. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + all_versions: If True, download all versions of the artifact; otherwise, only download the latest version. + vault_token_file: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) versions = _get_databus_versions_of_artifact(json_str, all_versions=all_versions) @@ -458,16 +450,15 @@ def _download_artifact( def _get_databus_versions_of_artifact( json_str: str, all_versions: bool ) -> str | List[str]: - """ - Parse the JSON-LD of a databus artifact to extract URLs of its versions. + """Parse the JSON-LD of a databus artifact to extract URLs of its versions. - Parameters: - - json_str: JSON-LD string of the databus artifact - - all_versions: If True, return all version URLs; otherwise, return only the latest version URL + Args: + json_str: JSON-LD string of the databus artifact. + all_versions: If True, return all version URLs; otherwise, return only the latest version URL. Returns: - - If all_versions is True: List of all version URLs - - If all_versions is False: URL of the latest version + If all_versions is True: List of all version URLs. + If all_versions is False: URL of the latest version. """ json_dict = json.loads(json_str) versions = json_dict.get("databus:hasVersion") @@ -495,15 +486,15 @@ def _get_databus_versions_of_artifact( def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]: - """ - Parse the JSON-LD of a databus artifact version to extract download URLs. + """Parse the JSON-LD of a databus artifact version to extract download URLs. + Don't get downloadURLs directly from the JSON-LD, but follow the "file" links to count access to databus accurately. - Parameters: - - json_str: JSON-LD string of the databus artifact version + Args: + json_str: JSON-LD string of the databus artifact version. Returns: - List of all file download URLs in the artifact version. + List of all file download URLs in the artifact version. """ databusIdUrl: List[str] = [] @@ -528,17 +519,16 @@ def _download_group( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download files in a databus group. - - Parameters: - - uri: The full databus group URI - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - all_versions: If True, download all versions of each artifact in the group; otherwise, only download the latest version - - vault_token_file: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download files in a databus group. + + Args: + uri: The full databus group URI. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + all_versions: If True, download all versions of each artifact in the group; otherwise, only download the latest version. + vault_token_file: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) artifacts = _get_databus_artifacts_of_group(json_str) @@ -599,19 +589,18 @@ def download( auth_url="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", client_id="vault-token-exchange", ) -> None: - """ - Download datasets from databus. + """Download datasets from databus. Download of files, versions, artifacts, groups or databus collections via their databus URIs or user-defined SPARQL queries that return file download URLs. - Parameters: - - localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. - - endpoint: the databus endpoint URL. If None, inferred from databusURI. Required for user-defined SPARQL queries. - - databusURIs: databus identifiers to specify datasets to download. - - token: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token". - - client_id: Client ID for token exchange. Default is "vault-token-exchange". + Args: + localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. + endpoint: The databus endpoint URL. If None, inferred from databusURI. Required for user-defined SPARQL queries. + databusURIs: Databus identifiers to specify datasets to download. + token: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token". + client_id: Client ID for token exchange. Default is "vault-token-exchange". """ for databusURI in databusURIs: host, account, group, artifact, version, file = ( diff --git a/databusclient/api/utils.py b/databusclient/api/utils.py index 7e27ff3..948268c 100644 --- a/databusclient/api/utils.py +++ b/databusclient/api/utils.py @@ -1,3 +1,9 @@ +"""Utility helpers used by the API submodules. + +Contains small parsing helpers and HTTP helpers that are shared by +`download`, `deploy` and `delete` modules. +""" + from typing import Optional, Tuple import requests @@ -13,17 +19,21 @@ def get_databus_id_parts_from_file_url( Optional[str], Optional[str], ]: - """ - Extract databus ID parts from a given databus URI. + """Extract databus ID parts from a given databus URI. - Parameters: - - uri: The full databus URI of the form - "http(s)://host/accountId/groupId/artifactId/versionId/fileId" + Args: + uri: The full databus URI of the form "http(s)://host/accountId/groupId/artifactId/versionId/fileId". Returns: - A tuple containing (host, accountId, groupId, artifactId, versionId, fileId). - Each element is a string or None if not present. + A tuple containing (host, accountId, groupId, artifactId, versionId, fileId). + Each element is a string or None if not present. + """ + """Split a Databus URI into its six parts. + + The returned tuple is (host, accountId, groupId, artifactId, versionId, fileId). + Missing parts are returned as ``None``. """ + uri = uri.removeprefix("https://").removeprefix("http://") parts = uri.strip("/").split("/") parts += [None] * (6 - len(parts)) # pad with None if less than 6 parts @@ -31,16 +41,16 @@ def get_databus_id_parts_from_file_url( def fetch_databus_jsonld(uri: str, databus_key: str | None = None) -> str: - """ - Retrieve JSON-LD representation of a databus resource. + """Fetch the JSON-LD representation of a Databus resource. - Parameters: - - uri: The full databus URI - - databus_key: Optional Databus API key for authentication on protected resources + Args: + uri: Full Databus resource URI. + databus_key: Optional API key for protected resources. Returns: - JSON-LD string representation of the databus resource. + The response body as a string containing JSON-LD. """ + headers = {"Accept": "application/ld+json"} if databus_key is not None: headers["X-API-KEY"] = databus_key diff --git a/databusclient/cli.py b/databusclient/cli.py index 069408e..1a345f3 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -13,7 +13,11 @@ @click.group() def app(): - """Databus Client CLI""" + """Databus Client CLI. + + Provides `deploy`, `download`, and `delete` commands for interacting + with the DBpedia Databus. + """ pass diff --git a/databusclient/extensions/webdav.py b/databusclient/extensions/webdav.py index c0747f6..7981a49 100644 --- a/databusclient/extensions/webdav.py +++ b/databusclient/extensions/webdav.py @@ -1,3 +1,11 @@ +"""WebDAV/Nextcloud upload helper used by the deploy CLI. + +This module computes SHA-256 checksums and sizes for local files and uses +``rclone`` to copy files to a remote WebDAV/Nextcloud instance. The +`upload_to_webdav` function returns a list of metadata dictionaries suitable +for passing to ``deploy_from_metadata``. +""" + import hashlib import os import posixpath @@ -6,6 +14,14 @@ def compute_sha256_and_length(filepath): + """Compute the SHA-256 hex digest and total byte length of a file. + + Args: + filepath: Path to the file to hash. + + Returns: + Tuple of (sha256_hex, size_in_bytes). + """ sha256 = hashlib.sha256() total_length = 0 with open(filepath, "rb") as f: @@ -19,6 +35,11 @@ def compute_sha256_and_length(filepath): def get_all_files(path): + """Return a list of all files for a path. + + If `path` is a file, returns a single-element list. If it is a directory, + walks the directory recursively and returns absolute file paths. + """ if os.path.isfile(path): return [path] files = [] @@ -31,6 +52,17 @@ def get_all_files(path): def upload_to_webdav( source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str ): + """Upload local files or folders to a configured rclone remote. + + Args: + source_paths: List of files or directories to upload. + remote_name: Name of the rclone remote (e.g., "nextcloud"). + remote_path: Destination path on the remote. + webdav_url: Public WebDAV URL used to construct download URLs. + + Returns: + A list of dicts with keys: ``filename``, ``checksum``, ``size``, ``url``. + """ result = [] for path in source_paths: if not os.path.exists(path): diff --git a/pyproject.toml b/pyproject.toml index 5593c74..92f479b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databusclient" -version = "0.14" +version = "0.15" description = "A simple client for submitting, downloading, and deleting data on the DBpedia Databus" authors = ["DBpedia Association"] license = "Apache-2.0 License"