Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,13 @@ Example format:

### Imports

**For standard library modules:**
- Use namespace imports: `import enum` instead of `from enum import Enum`
- For typing, use `import typing as t` and access via namespace: `t.NamedTuple`, etc.
- Use `from __future__ import annotations` at the top of all Python files

**For third-party packages:** Use idiomatic import styles for each library (e.g., `from pygments.token import Token` is fine).

**Always:** Use `from __future__ import annotations` at the top of all Python files.

### Docstrings

Expand Down
3 changes: 3 additions & 0 deletions docs/_ext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Sphinx extensions for vcspull documentation."""

from __future__ import annotations
115 changes: 115 additions & 0 deletions docs/_ext/cli_usage_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Pygments lexer for CLI usage/help output.

This module provides a custom Pygments lexer for highlighting command-line
usage text typically generated by argparse, getopt, or similar libraries.
"""

from __future__ import annotations

from pygments.lexer import RegexLexer, bygroups, include
from pygments.token import Generic, Name, Operator, Punctuation, Text, Whitespace


class CLIUsageLexer(RegexLexer):
"""Lexer for CLI usage/help text (argparse, etc.).

Highlights usage patterns including options, arguments, and meta-variables.

Examples
--------
>>> from pygments.token import Token
>>> lexer = CLIUsageLexer()
>>> tokens = list(lexer.get_tokens("usage: cmd [-h]"))
>>> tokens[0]
(Token.Generic.Heading, 'usage:')
>>> tokens[2]
(Token.Name.Label, 'cmd')
"""

name = "CLI Usage"
aliases = ["cli-usage", "usage"] # noqa: RUF012
filenames: list[str] = [] # noqa: RUF012
mimetypes = ["text/x-cli-usage"] # noqa: RUF012

tokens = { # noqa: RUF012
"root": [
# "usage:" at start of line
(r"^(usage:)(\s+)", bygroups(Generic.Heading, Whitespace)), # type: ignore[no-untyped-call]
# Continuation lines (leading whitespace for wrapped usage)
(r"^(\s+)(?=\S)", Whitespace),
include("inline"),
],
"inline": [
# Whitespace
(r"\s+", Whitespace),
# Long options with = value (e.g., --log-level=VALUE)
(
r"(--[a-zA-Z0-9][-a-zA-Z0-9]*)(=)([A-Z][A-Z0-9_]*|[a-z][-a-z0-9]*)",
bygroups(Name.Tag, Operator, Name.Variable), # type: ignore[no-untyped-call]
),
# Long options standalone
(r"--[a-zA-Z0-9][-a-zA-Z0-9]*", Name.Tag),
# Short options with space-separated value (e.g., -S socket-path)
(
r"(-[a-zA-Z0-9])(\s+)([A-Z][A-Z0-9_]*|[a-z][-a-z0-9]*)",
bygroups(Name.Attribute, Whitespace, Name.Variable), # type: ignore[no-untyped-call]
),
# Short options standalone
(r"-[a-zA-Z0-9]", Name.Attribute),
# UPPERCASE meta-variables (COMMAND, FILE, PATH)
(r"\b[A-Z][A-Z0-9_]+\b", Name.Constant),
# Opening bracket - enter optional state
(r"\[", Punctuation, "optional"),
# Closing bracket (fallback for unmatched)
(r"\]", Punctuation),
# Choice separator (pipe)
(r"\|", Operator),
# Parentheses for grouping
(r"[()]", Punctuation),
# Positional/command names (lowercase with dashes)
(r"\b[a-z][-a-z0-9]*\b", Name.Label),
# Catch-all for any other text
(r"[^\s\[\]|()]+", Text),
],
"optional": [
# Nested optional bracket
(r"\[", Punctuation, "#push"),
# End optional
(r"\]", Punctuation, "#pop"),
# Contents use inline rules
include("inline"),
],
}


def tokenize_usage(text: str) -> list[tuple[str, str]]:
"""Tokenize usage text and return list of (token_type, value) tuples.

Parameters
----------
text : str
CLI usage text to tokenize.

Returns
-------
list[tuple[str, str]]
List of (token_type_name, text_value) tuples.

Examples
--------
>>> result = tokenize_usage("usage: cmd [-h]")
>>> result[0]
('Token.Generic.Heading', 'usage:')
>>> result[2]
('Token.Name.Label', 'cmd')
>>> result[4]
('Token.Punctuation', '[')
>>> result[5]
('Token.Name.Attribute', '-h')
>>> result[6]
('Token.Punctuation', ']')
"""
lexer = CLIUsageLexer()
return [
(str(tok_type), tok_value) for tok_type, tok_value in lexer.get_tokens(text)
]
Loading