Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/python_inspector/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,11 @@ def resolve_dependencies(
async def gather_pypi_data():
async def get_pypi_data(package):
data = await get_pypi_data_from_purl(
package, repos=repos, environment=environment, prefer_source=prefer_source
package,
repos=repos,
environment=environment,
prefer_source=prefer_source,
index_urls=list(repos_by_url.keys()),
)

if verbose:
Expand Down
90 changes: 83 additions & 7 deletions src/python_inspector/package_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
from urllib.parse import urlparse, urlunparse

from typing import Dict
from typing import List
from typing import Optional
Expand All @@ -27,7 +30,11 @@


async def get_pypi_data_from_purl(
purl: str, environment: Environment, repos: List[PypiSimpleRepository], prefer_source: bool
purl: str,
environment: Environment,
repos: List[PypiSimpleRepository],
prefer_source: bool,
index_urls: List[str],
) -> Optional[PackageData]:
"""
Generate `Package` object from the `purl` string of pypi type
Expand All @@ -43,7 +50,22 @@ async def get_pypi_data_from_purl(
version = parsed_purl.version
if not version:
raise Exception("Version is not specified in the purl")
base_path = "https://pypi.org/pypi"

# Todo: address the case where several index URLs are passed
if index_urls:
# Backward compatibility: If pypi.org is passed as index url, always resolve against it.
# When multiple index URLs are supported and the todo above is fixed, then this hack can be removed.
if "https://pypi.org/simple" in index_urls:
index_url = None
else:
index_url = index_urls[0]
else:
index_url = None

base_path = (
index_url.removesuffix("/simple") + "/pypi" if index_url else "https://pypi.org/pypi"
)

api_url = f"{base_path}/{name}/{version}/json"

from python_inspector.utils import get_response_async
Expand All @@ -62,10 +84,32 @@ async def get_pypi_data_from_purl(
sdist_url = await get_sdist_download_url(
purl=parsed_purl, repos=repos, python_version=python_version
)

def canonicalize_url(url: str):
# Parse the URL into its components
parsed = urlparse(url)

# Canonicalize the path component to resolve ".."
# os.path.normpath will handle segments like '.' and '..'
canonical_path = os.path.normpath(parsed.path)

# On Windows, normpath uses backslashes ('\\').
# We must replace them with forward slashes ('/') for a valid URL path.
if os.path.sep == "\\":
canonical_path = canonical_path.replace("\\", "/")

# Rebuild the URL with the canonicalized path
# We replace the original path with the new one
parsed = parsed._replace(path=canonical_path)
canonical_url = urlunparse(parsed)

return canonical_url

if sdist_url:
valid_distribution_urls.append(sdist_url)

valid_distribution_urls = [url for url in valid_distribution_urls if url]
valid_distribution_urls = list(map(canonicalize_url, valid_distribution_urls))

# if prefer_source is True then only source distribution is used
# in case of no source distribution available then wheel is used
Expand All @@ -81,28 +125,60 @@ async def get_pypi_data_from_purl(
]
wheel_url = choose_single_wheel(wheel_urls)
if wheel_url:
valid_distribution_urls.insert(0, wheel_url)
valid_distribution_urls.insert(0, canonicalize_url(wheel_url))

urls = {url.get("url"): url for url in response.get("urls") or []}

# Sanitize all URLs that are relative and canonicalize them
urls_sanitized = {}
for url in urls:
value = urls.get(url)

# remove the URL anchor fragment
url_parsed = urlparse(url)
url = urlunparse(url_parsed._replace(fragment=""))

if url.startswith("https"):
url_sanitized = canonicalize_url(url)
else:
url_sanitized = canonicalize_url(base_path + url)

urls_sanitized[url_sanitized] = value

def remove_credentials_from_url(url: str):
# Parse the URL into its components
parsed = urlparse(url)

new_netloc = parsed.hostname
if parsed.port:
new_netloc += f":{parsed.port}"

# Create a new parsed result object, replacing the old netloc
# with our new one that has no credentials.
parsed = parsed._replace(netloc=new_netloc)
url_without_credentials = urlunparse(parsed)

return url_without_credentials

# iterate over the valid distribution urls and return the first
# one that is matching.
for dist_url in valid_distribution_urls:
if dist_url not in urls:
if dist_url not in urls_sanitized:
continue

url_data = urls.get(dist_url)
url_data = urls_sanitized.get(dist_url)
digests = url_data.get("digests") or {}

return PackageData(
primary_language="Python",
description=get_description(info),
homepage_url=homepage_url,
api_data_url=api_url,
api_data_url=remove_credentials_from_url(api_url),
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
license_expression=info.get("license_expression"),
declared_license=get_declared_license(info),
download_url=dist_url,
download_url=remove_credentials_from_url(dist_url),
size=url_data.get("size"),
md5=digests.get("md5") or url_data.get("md5_digest"),
sha256=digests.get("sha256"),
Expand Down
145 changes: 67 additions & 78 deletions tests/data/azure-devops.req-310-expected.json

Large diffs are not rendered by default.

145 changes: 67 additions & 78 deletions tests/data/azure-devops.req-312-expected.json

Large diffs are not rendered by default.

145 changes: 67 additions & 78 deletions tests/data/azure-devops.req-313-expected.json

Large diffs are not rendered by default.

145 changes: 67 additions & 78 deletions tests/data/azure-devops.req-314-expected.json

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions tests/data/azure-devops.req-38-expected.json

Large diffs are not rendered by default.

32 changes: 14 additions & 18 deletions tests/data/example-requirements-ignore-errors-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,12 @@
"type": "pypi",
"namespace": null,
"name": "packaging",
"version": "25.0",
"version": "26.0",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Core utilities for Python packages\npackaging\n=========\n\n.. start-intro\n\nReusable core utilities for various Python Packaging\n`interoperability specifications <https://packaging.python.org/specifications/>`_.\n\nThis library provides utilities that implement the interoperability\nspecifications which have clearly one correct behaviour (eg: :pep:`440`)\nor benefit greatly from having a single shared implementation (eg: :pep:`425`).\n\n.. end-intro\n\nThe ``packaging`` project includes the following: version handling, specifiers,\nmarkers, requirements, tags, utilities.\n\nDocumentation\n-------------\n\nThe `documentation`_ provides information and the API for the following:\n\n- Version Handling\n- Specifiers\n- Markers\n- Requirements\n- Tags\n- Utilities\n\nInstallation\n------------\n\nUse ``pip`` to install these utilities::\n\n pip install packaging\n\nThe ``packaging`` library uses calendar-based versioning (``YY.N``).\n\nDiscussion\n----------\n\nIf you run into bugs, you can file them in our `issue tracker`_.\n\nYou can also join ``#pypa`` on Freenode to ask questions or get involved.\n\n\n.. _`documentation`: https://packaging.pypa.io/\n.. _`issue tracker`: https://github.com/pypa/packaging/issues\n\n\nCode of Conduct\n---------------\n\nEveryone interacting in the packaging project's codebases, issue trackers, chat\nrooms, and mailing lists is expected to follow the `PSF Code of Conduct`_.\n\n.. _PSF Code of Conduct: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md\n\nContributing\n------------\n\nThe ``CONTRIBUTING.rst`` file outlines how to contribute to this project as\nwell as how to report a potential security issue. The documentation for this\nproject also covers information about `project development`_ and `security`_.\n\n.. _`project development`: https://packaging.pypa.io/en/latest/development/\n.. _`security`: https://packaging.pypa.io/en/latest/security/\n\nProject History\n---------------\n\nPlease review the ``CHANGELOG.rst`` file or the `Changelog documentation`_ for\nrecent changes and project history.\n\n.. _`Changelog documentation`: https://packaging.pypa.io/en/latest/changelog/",
"release_date": "2025-04-19T11:48:57",
"description": "Core utilities for Python packages\npackaging\n=========\n\n.. start-intro\n\nReusable core utilities for various Python Packaging\n`interoperability specifications <https://packaging.python.org/specifications/>`_.\n\nThis library provides utilities that implement the interoperability\nspecifications which have clearly one correct behaviour (eg: :pep:`440`)\nor benefit greatly from having a single shared implementation (eg: :pep:`425`).\n\n.. end-intro\n\nThe ``packaging`` project includes the following: version handling, specifiers,\nmarkers, requirements, tags, metadata, lockfiles, utilities.\n\nDocumentation\n-------------\n\nThe `documentation`_ provides information and the API for the following:\n\n- Version Handling\n- Specifiers\n- Markers\n- Requirements\n- Tags\n- Metadata\n- Lockfiles\n- Utilities\n\nInstallation\n------------\n\nUse ``pip`` to install these utilities::\n\n pip install packaging\n\nThe ``packaging`` library uses calendar-based versioning (``YY.N``).\n\nDiscussion\n----------\n\nIf you run into bugs, you can file them in our `issue tracker`_.\n\nYou can also join ``#pypa`` on Freenode to ask questions or get involved.\n\n\n.. _`documentation`: https://packaging.pypa.io/\n.. _`issue tracker`: https://github.com/pypa/packaging/issues\n\n\nCode of Conduct\n---------------\n\nEveryone interacting in the packaging project's codebases, issue trackers, chat\nrooms, and mailing lists is expected to follow the `PSF Code of Conduct`_.\n\n.. _PSF Code of Conduct: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md\n\nContributing\n------------\n\nThe ``CONTRIBUTING.rst`` file outlines how to contribute to this project as\nwell as how to report a potential security issue. The documentation for this\nproject also covers information about `project development`_ and `security`_.\n\n.. _`project development`: https://packaging.pypa.io/en/latest/development/\n.. _`security`: https://packaging.pypa.io/en/latest/security/\n\nProject History\n---------------\n\nPlease review the ``CHANGELOG.rst`` file or the `Changelog documentation`_ for\nrecent changes and project history.\n\n.. _`Changelog documentation`: https://packaging.pypa.io/en/latest/changelog/",
"release_date": "2026-01-21T20:50:37",
"parties": [
{
"type": "person",
Expand All @@ -243,40 +243,36 @@
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Typing :: Typed"
],
"homepage_url": null,
"download_url": "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl",
"size": 66469,
"download_url": "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl",
"size": 74366,
"sha1": null,
"md5": "5fa4842e2eb0d7883b4b0e7c42d6229e",
"sha256": "29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484",
"md5": "6612a2b4cddb48af24b6de0de620e353",
"sha256": "b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/pypa/packaging",
"vcs_url": null,
"copyright": null,
"license_expression": null,
"declared_license": {
"classifiers": [
"License :: OSI Approved :: Apache Software License",
"License :: OSI Approved :: BSD License"
]
},
"license_expression": "Apache-2.0 OR BSD-2-Clause",
"declared_license": {},
"notice_text": null,
"source_packages": [],
"file_references": [],
"extra_data": {},
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/packaging/25.0/json",
"api_data_url": "https://pypi.org/pypi/packaging/26.0/json",
"datasource_id": null,
"purl": "pkg:pypi/packaging@25.0"
"purl": "pkg:pypi/packaging@26.0"
},
{
"type": "pypi",
Expand Down Expand Up @@ -623,7 +619,7 @@
"dependencies": []
},
{
"package": "pkg:pypi/packaging@25.0",
"package": "pkg:pypi/packaging@26.0",
"dependencies": []
},
{
Expand All @@ -639,7 +635,7 @@
"dependencies": [
"pkg:pypi/exceptiongroup@1.3.1",
"pkg:pypi/iniconfig@2.3.0",
"pkg:pypi/packaging@25.0",
"pkg:pypi/packaging@26.0",
"pkg:pypi/pluggy@1.6.0",
"pkg:pypi/pygments@2.19.2",
"pkg:pypi/tomli@2.4.0"
Expand Down
20 changes: 10 additions & 10 deletions tests/data/hash-requirements.txt-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@
"type": "pypi",
"namespace": null,
"name": "certifi",
"version": "2026.1.4",
"version": "2026.2.25",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Python package for providing Mozilla's CA Bundle.\nCertifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.",
"release_date": "2026-01-04T02:42:40",
"release_date": "2026-02-25T02:54:15",
"parties": [
{
"type": "person",
Expand All @@ -201,11 +201,11 @@
"Programming Language :: Python :: 3.9"
],
"homepage_url": "https://github.com/certifi/python-certifi",
"download_url": "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl",
"size": 152900,
"download_url": "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl",
"size": 153684,
"sha1": null,
"md5": "1dab98768140ad2d8dbc9be8f14a2af9",
"sha256": "9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c",
"md5": "1719fecdcfb531a622c0ee93e6bf4ba1",
"sha256": "027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/certifi/python-certifi",
Expand All @@ -225,9 +225,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/certifi/2026.1.4/json",
"api_data_url": "https://pypi.org/pypi/certifi/2026.2.25/json",
"datasource_id": null,
"purl": "pkg:pypi/certifi@2026.1.4"
"purl": "pkg:pypi/certifi@2026.2.25"
},
{
"type": "pypi",
Expand Down Expand Up @@ -514,7 +514,7 @@
"dependencies": []
},
{
"package": "pkg:pypi/certifi@2026.1.4",
"package": "pkg:pypi/certifi@2026.2.25",
"dependencies": []
},
{
Expand All @@ -528,7 +528,7 @@
{
"package": "pkg:pypi/requests@2.25.1",
"dependencies": [
"pkg:pypi/certifi@2026.1.4",
"pkg:pypi/certifi@2026.2.25",
"pkg:pypi/chardet@4.0.0",
"pkg:pypi/idna@2.10",
"pkg:pypi/urllib3@1.26.20"
Expand Down
6 changes: 3 additions & 3 deletions tests/data/resolved_deps/autobahn-310-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"dependencies": [
"pkg:pypi/cryptography@43.0.3",
"pkg:pypi/hyperlink@21.0.0",
"pkg:pypi/setuptools@80.9.0",
"pkg:pypi/setuptools@82.0.1",
"pkg:pypi/txaio@23.6.1"
]
},
Expand Down Expand Up @@ -36,7 +36,7 @@
"dependencies": []
},
{
"package": "pkg:pypi/setuptools@80.9.0",
"package": "pkg:pypi/setuptools@82.0.1",
"dependencies": []
},
{
Expand All @@ -51,7 +51,7 @@
"pkg:pypi/hyperlink@21.0.0",
"pkg:pypi/idna@3.11",
"pkg:pypi/pycparser@2.23",
"pkg:pypi/setuptools@80.9.0",
"pkg:pypi/setuptools@82.0.1",
"pkg:pypi/txaio@23.6.1"
]
]
6 changes: 3 additions & 3 deletions tests/data/resolved_deps/flask-310-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"pkg:pypi/click@8.3.1",
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.6"
]
},
{
Expand All @@ -28,7 +28,7 @@
"dependencies": []
},
{
"package": "pkg:pypi/werkzeug@3.1.5",
"package": "pkg:pypi/werkzeug@3.1.6",
"dependencies": [
"pkg:pypi/markupsafe@3.0.3"
]
Expand All @@ -40,6 +40,6 @@
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/markupsafe@3.0.3",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.6"
]
]
6 changes: 3 additions & 3 deletions tests/data/resolved_deps/flask-310-win-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"pkg:pypi/click@8.3.1",
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.6"
]
},
{
Expand All @@ -34,7 +34,7 @@
"dependencies": []
},
{
"package": "pkg:pypi/werkzeug@3.1.5",
"package": "pkg:pypi/werkzeug@3.1.6",
"dependencies": [
"pkg:pypi/markupsafe@3.0.3"
]
Expand All @@ -47,6 +47,6 @@
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/markupsafe@3.0.3",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.6"
]
]
Loading
Loading