Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Changelog
Unreleased
----------

* **Bugfix** [#120] Prefix section ``id`` and ``refid`` values with docname in merged
``simplepdf`` HTML so sidebar TOC page numbers resolve uniquely (aligned with
`sphinx-doc/sphinx#13739 <https://github.com/sphinx-doc/sphinx/pull/13739>`_).
* **Bugfix** [#134] Improve support for external theme packages by using a ``get_scss_sources_path()`` convention.

- If needed, theme warnings can be suppressed via ``suppress_warnings = ["simplepdf.theme"]``.
Expand Down
71 changes: 71 additions & 0 deletions sphinx_simplepdf/builders/simplepdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any

from bs4 import BeautifulSoup
from docutils import nodes
import sass
from sphinx import __version__
from sphinx.application import Sphinx
Expand Down Expand Up @@ -153,6 +154,76 @@ def bundled_scss_folder():
)
return scss_folder

@staticmethod
def _qualified_anchor_id(docname: str, node_id: str) -> str:
"""Build a docname-qualified section anchor (sphinx-doc/sphinx#13739)."""
return f"/{docname}/#{node_id}"

def fix_refuris(self, tree: nodes.Node) -> None:
"""Keep docname-qualified section anchors; only strip legacy double-hash URIs."""
for refnode in tree.findall(nodes.reference):
if "refuri" not in refnode:
continue
refuri = refnode["refuri"]
if re.match(r"^#/[^#]+/#", refuri):
continue
hashindex = refuri.find("#")
if hashindex < 0:
continue
hashindex = refuri.find("#", hashindex + 1)
if hashindex >= 0:
refnode["refuri"] = refuri[hashindex:]

def ensure_fully_qualified_refids(self, tree: nodes.document) -> None:
"""Prefix refids and ids with docname so merged single-page HTML has unique anchors."""
for node in tree.findall(nodes.Element):
if "refid" not in node and "ids" not in node:
continue
document = node.document
if document is None or "source" not in document:
continue
docname = self.env.path2doc(document["source"])
if docname is None:
continue
if "refid" in node:
node["refid"] = self._qualified_anchor_id(docname, node["refid"])
if "refuri" in node and node["refuri"].startswith("#"):
node["refuri"] = "#" + node["refid"]
if "ids" in node:
node["ids"] = [self._qualified_anchor_id(docname, node_id) for node_id in node["ids"]]

def assemble_doctree(self) -> nodes.document:
tree = super().assemble_doctree()
self.ensure_fully_qualified_refids(tree)
return tree

def assemble_toc_secnumbers(self) -> dict[str, dict[str, tuple[int, ...]]]:
new_secnumbers: dict[str, tuple[int, ...]] = {}
for docname, secnums in self.env.toc_secnumbers.items():
for id_, secnum in secnums.items():
alias = f"/{docname}/{id_}"
new_secnumbers[alias] = secnum

return {self.config.root_doc: new_secnumbers}

def assemble_toc_fignumbers(
self,
) -> dict[str, dict[str, dict[str, tuple[int, ...]]]]:
new_fignumbers: dict[str, dict[str, tuple[int, ...]]] = {}
for docname, fignumlist in self.env.toc_fignumbers.items():
for figtype, fignums in fignumlist.items():
alias = f"/{docname}/#{figtype}"
new_fignumbers.setdefault(alias, {})
for id_, fignum in fignums.items():
new_fignumbers[alias][self._qualified_anchor_id(docname, id_)] = fignum

return {self.config.root_doc: new_fignumbers}

def get_target_uri(self, docname: str, typ: str | None = None) -> str:
if docname in self.env.all_docs:
return f"#/{docname}/"
return docname + self.out_suffix

def finish(self) -> None:
super().finish()

Expand Down
8 changes: 4 additions & 4 deletions sphinx_simplepdf/writers/simplepdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ def get_secnumber(self, node: Element) -> tuple[int, ...] | None:
return node["secnumber"]

if isinstance(node.parent, nodes.section):
docname = self.docnames[-1]
anchorname = "{}/#{}".format(docname, node.parent["ids"][0])
parent_ids = node.parent.get("ids")
anchorname = parent_ids[0] if parent_ids else None
if anchorname not in self.builder.secnumbers:
anchorname = f"{docname}/" # try first heading which has no anchor
anchorname = f"/{self.docnames[-1]}/"

if self.builder.secnumbers.get(anchorname):
return self.builder.secnumbers[anchorname]
Expand All @@ -40,7 +40,7 @@ def get_fignumber(self, node: Element) -> str | None:
logger.warning(msg, location=node)
return None

key = f"{self.docnames[-1]}/{figtype}"
key = f"/{self.docnames[-1]}/#{figtype}"
figure_id = node["ids"][0]
if figure_id not in self.builder.fignumbers.get(key, {}):
return None
Expand Down
6 changes: 6 additions & 0 deletions tests/_weasyprint_mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Install a WeasyPrint stub when native libraries are unavailable (test-only)."""

import sys
from unittest.mock import MagicMock

sys.modules.setdefault("weasyprint", MagicMock())
8 changes: 8 additions & 0 deletions tests/doc_test/with_duplicate_sections/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Configuration for duplicate section title tests."""

project = "DuplicateSections"
extensions = ["sphinx_simplepdf"]
master_doc = "index"
exclude_patterns = ["_build"]

simplepdf_theme = "simplepdf_theme"
8 changes: 8 additions & 0 deletions tests/doc_test/with_duplicate_sections/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Duplicate Section Titles
========================

.. toctree::
:maxdepth: 2

page1
page2
12 changes: 12 additions & 0 deletions tests/doc_test/with_duplicate_sections/page1.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Page 1
======

Introduction
------------

Content on page one.

Details
-------

More content on page one.
12 changes: 12 additions & 0 deletions tests/doc_test/with_duplicate_sections/page2.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Page 2
======

Introduction
------------

Content on page two.

Details
-------

More content on page two.
82 changes: 82 additions & 0 deletions tests/test_unique_toc_anchors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Tests for unique section anchors in merged simplepdf HTML."""

from __future__ import annotations

from collections import Counter
import re
from unittest.mock import MagicMock

from bs4 import BeautifulSoup
from bs4.element import Tag
import pytest

import tests._weasyprint_mock # noqa: F401 # must load before sphinx_simplepdf

from .utils import build_and_capture_stdout


def _collect_html_ids(html: str) -> list[str]:
return re.findall(r'\bid="([^"]+)"', html)


def _sidebar_toc_hrefs(html: str) -> list[str]:
soup = BeautifulSoup(html, "html.parser")
sidebar = soup.find("div", class_="sphinxsidebarwrapper")
if not isinstance(sidebar, Tag):
return []
links = sidebar.find_all("a", class_="reference internal")
return [link.get("href", "") for link in links if link.get("href")]


def _normalize_toc_href(href: str, root_doc: str = "index") -> str:
return href.replace(f"{root_doc}.html", "")


@pytest.fixture(autouse=True)
def _mock_weasyprint(monkeypatch):
"""Allow simplepdf builds without native WeasyPrint libraries."""
monkeypatch.setattr(
"sphinx_simplepdf.builders.simplepdf.subprocess.check_output",
lambda *args, **kwargs: "",
)
monkeypatch.setattr(
"sphinx_simplepdf.builders.simplepdf.weasyprint.HTML",
MagicMock(),
)


def test_merged_html_has_no_duplicate_ids(sphinx_build, capsys):
result = build_and_capture_stdout(
sphinx_build,
capsys,
srcdir="with_duplicate_sections",
confoverrides={"simplepdf_use_weasyprint_api": True},
)

html = result.html_content("index")
ids = _collect_html_ids(html)
duplicates = [id_ for id_, count in Counter(ids).items() if count > 1]

assert duplicates == [], f"duplicate id attributes found: {duplicates}"


def test_sidebar_toc_hrefs_resolve_to_unique_anchors(sphinx_build, capsys):
result = build_and_capture_stdout(
sphinx_build,
capsys,
srcdir="with_duplicate_sections",
confoverrides={"simplepdf_use_weasyprint_api": True},
)

html = result.html_content("index")
ids = set(_collect_html_ids(html))

toc_hrefs = [_normalize_toc_href(href) for href in _sidebar_toc_hrefs(html)]
section_hrefs = [href for href in toc_hrefs if re.match(r"^#/[^#]+/#", href)]

assert section_hrefs, "expected sidebar TOC section links"

for href in section_hrefs:
anchor = href[1:] # strip leading '#'
assert anchor in ids, f"TOC href {href!r} does not match any element id"
assert html.count(f'id="{anchor}"') == 1, f"anchor {anchor!r} is not unique in HTML"