Merge remote-tracking branch 'origin/main' #126
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Normalize GitBook assets for Git it Write | |
| on: | |
| push: | |
| branches: [ main ] | |
| workflow_dispatch: | |
| permissions: | |
| contents: write | |
| concurrency: | |
| group: normalize-assets-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| normalize: | |
| if: ${{ github.actor != 'github-actions[bot]' }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: { fetch-depth: 0 } | |
| - name: Configure author | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| - name: Update to latest main (rebase) | |
| run: | | |
| git fetch origin main | |
| git checkout main | |
| git rebase origin/main | |
| - name: Ensure _images exists (flat) | |
| run: mkdir -p _images | |
| # 1) Copy images from .gitbook/assets → _images (flat) | |
| - name: Copy from .gitbook/assets → _images (flat) | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| if [ -d ".gitbook/assets" ]; then | |
| while IFS= read -r -d '' src; do | |
| base="$(basename "$src")" | |
| cp -f "$src" "_images/$base" | |
| done < <(find .gitbook/assets -type f -print0) | |
| fi | |
| # 2) Rename files in _images to remove spaces (spaces -> '-') | |
| - name: Slugify filenames in _images (remove spaces) | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| shopt -s nullglob | |
| for f in _images/*; do | |
| b="$(basename "$f")" | |
| nb="$(printf '%s' "$b" | sed -E 's/%20/ /g; s/[[:space:]]+/-/g; s/-+/-/g')" | |
| if [ "$b" != "$nb" ]; then | |
| tgt="_images/$nb" | |
| if [ -e "$tgt" ]; then | |
| # If target exists and content is IDENTICAL → drop the duplicate | |
| if cmp -s "$f" "$tgt"; then | |
| git rm -f "$f" 2>/dev/null || rm -f "$f" | |
| echo "Removed duplicate identical file: $b (kept $(basename "$tgt"))" | |
| continue | |
| fi | |
| # Else different content → create a unique name (rare) | |
| base="${nb%.*}"; ext="${nb##*.}"; i=1 | |
| while [ -e "_images/${base}-${i}.${ext}" ]; do i=$((i+1)); done | |
| tgt="_images/${base}-${i}.${ext}" | |
| fi | |
| git mv -f "$f" "$tgt" 2>/dev/null || mv -f "$f" "$tgt" | |
| echo "Renamed: $b -> $(basename "$tgt")" | |
| fi | |
| done | |
| # 3) Convert <figure><img ...><figcaption>...</figcaption></figure> →  | |
| - name: Convert <figure><img> blocks to Markdown images (/_images, no spaces) | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| python3 - <<'PY' | |
| import re, glob, html, os | |
| from urllib.parse import unquote | |
| FIG = re.compile(r'<figure\b[^>]*>(.*?)</figure>', re.I|re.S) | |
| IMG = re.compile(r'<img\b[^>]*>', re.I|re.S) | |
| CAP = re.compile(r'<figcaption\b[^>]*>(.*?)</figcaption>', re.I|re.S) | |
| def attr(n,s): | |
| m = re.search(rf'\b{n}\s*=\s*["\']([^"\']*)["\']', s, re.I|re.S) | |
| return m.group(1).strip() if m else '' | |
| def clean_basename(src:str)->str: | |
| """ | |
| Take src like '../.gitbook/assets/image (766).png' | |
| → 'image-(766).png' | |
| """ | |
| # take last path segment, URL-decode | |
| bn = unquote(os.path.basename(src or '').strip()) | |
| if not bn: | |
| return 'unknown.png' | |
| # split name + ext | |
| if '.' in bn: | |
| base, ext = bn.rsplit('.', 1) | |
| ext = '.' + ext.lower() | |
| else: | |
| base, ext = bn, '' | |
| # replace %20/spaces with dash; collapse dashes | |
| base = base.replace('%20', ' ') | |
| base = re.sub(r'\s+', '-', base) | |
| base = re.sub(r'-{2,}', '-', base).strip('-') | |
| # if base ends with (...) and contains a stray quoted fragment inside, drop it but keep () | |
| # (defensive — shouldn’t be present in raw figure src, but safe) | |
| base = re.sub(r'\(\s*([^()"]*?)\s+"(?:[^"\\]|\\.)*"\s*\)$', r'(\1)', base) | |
| # allow only letters, digits, -, _, () | |
| base = re.sub(r'[^A-Za-z0-9_\-()]+', '-', base) | |
| base = re.sub(r'-{2,}', '-', base).strip('-') | |
| return base + ext | |
| def convert_figure(block:str)->str: | |
| mimg = IMG.search(block) | |
| if not mimg: | |
| return block | |
| img = mimg.group(0) | |
| raw_src = attr('src', img) | |
| alt = attr('alt', img) or '' | |
| # title: figcaption (preferred) else alt | |
| mcap = CAP.search(block) | |
| cap_text = html.unescape(re.sub(r'<[^>]+>', '', mcap.group(1))).strip() if mcap else '' | |
| title = cap_text or alt | |
| fn = clean_basename(raw_src) | |
| url = f"/_images/{fn}" | |
| # escape markdown specials safely | |
| alt_md = alt.replace(']', r'\]') | |
| title_md = title.replace('"', r'\"') | |
| # if no title at all, omit the "..." | |
| return f'' if title_md else f'' | |
| files = [p for g in ["**/*.md","**/*.MD","**/*.mdx","**/*.MDX","**/*.markdown","**/*.MARKDOWN"] | |
| for p in glob.glob(g, recursive=True)] | |
| any_changed = False | |
| for path in files: | |
| s = open(path, encoding="utf-8").read() | |
| n = FIG.sub(lambda m: convert_figure(m.group(0)), s) | |
| if n != s: | |
| open(path, "w", encoding="utf-8").write(n) | |
| print(f"Converted figures in: {path}") | |
| any_changed = True | |
| if not any_changed: | |
| print("No <figure> blocks to convert.") | |
| PY | |
| # 4) Normalize any remaining image links to /_images/<no-spaces>, remove prefixes/subfolders, convert bare <img> too | |
| - name: Normalize ALL image links to  exactly + enhance blockquotes | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| python3 - <<'PY' | |
| import re, glob, os | |
| from urllib.parse import unquote | |
| from html import unescape | |
| # -------- front matter helpers -------- | |
| def split_front_matter(text: str): | |
| if text.startswith('\ufeff'): | |
| text = text.lstrip('\ufeff') | |
| if text.startswith('\n---'): | |
| text = text[1:] | |
| m = re.match(r'^(---\s*\n.*?\n---\s*\n)', text, flags=re.S) | |
| if m: | |
| fm = m.group(1) | |
| body = text[m.end():] | |
| return fm, body | |
| return "", text | |
| def join_front_matter(fm: str, body: str) -> str: | |
| return (fm or "") + body | |
| IMG_EXT = r"(?:png|jpe?g|gif|webp|svg|gifv)" | |
| def clean_filename_from_url(url: str) -> str: | |
| if not url: return "unknown.png" | |
| url = url.strip().rstrip('>') | |
| url = re.sub(r'\s*""', '', url) | |
| m = re.findall(rf'([^/?#]+?\.{IMG_EXT})', url, flags=re.I) | |
| bn = m[-1] if m else os.path.basename(url) | |
| bn = unquote(bn) | |
| if "." in bn: base, ext = bn.rsplit(".", 1) | |
| else: base, ext = bn, "" | |
| base = re.sub(r'\(\s*([^()"]*?)\s+"(?:[^"\\]|\\.)*"\s*\)$', r'(\1)', base) | |
| pm = re.search(r'\(([^()]*)\)$', base) | |
| if pm: | |
| inner = re.sub(r'\s+', ' ', pm.group(1)).strip() | |
| base = base[:pm.start()] + f'({inner})' | |
| base = re.sub(r'\s+"(?:[^"\\]|\\.)*"\s*', '', base) | |
| base = re.sub(r'["“”].*$', '', base) | |
| base = re.sub(r"\s+", "-", base) | |
| base = re.sub(r"[^A-Za-z0-9_\-()]", "-", base) | |
| base = re.sub(r"-{2,}", "-", base).strip("-") | |
| return f"{base}.{ext.lower()}" if ext else base | |
| def to_root_image_url(url: str) -> str: | |
| return "/_images/" + clean_filename_from_url(url) | |
| def norm_text(s: str) -> str: | |
| t = unescape(s or "") | |
| t = t.replace("“", '"').replace("”", '"').replace("’", "'") | |
| return t | |
| def build_md(alt: str, url: str) -> str: | |
| alt = norm_text(alt or "") | |
| alt_md = alt.replace("]", r"\]") | |
| if alt.strip(): | |
| title = alt.replace('"', r'\"') | |
| return f'} "{title}")' | |
| else: | |
| return f'})' | |
| md_files = [p for g in ("**/*.md","**/*.MD","**/*.mdx","**/*.MDX","**/*.markdown","**/*.MARKDOWN") | |
| for p in glob.glob(g, recursive=True)] | |
| # ---------- GitBook hint blocks -> Gutenberg blockquote (unchanged) ---------- | |
| HINT_BLOCK = re.compile(r'{%\s*hint\b[^%]*%}(.*?){%\s*endhint\s*%}', re.I | re.S) | |
| MD_LINK = re.compile(r'\[([^\]]+)\]\((https?://[^)]+)\)') | |
| BARE_URL = re.compile(r'^(https?://\S+)$') | |
| def convert_hint_blocks(s: str) -> str: | |
| def _repl(m): | |
| inner = m.group(1).strip() | |
| paras = [] | |
| for line in inner.splitlines(): | |
| line = line.strip() | |
| if not line: continue | |
| line = MD_LINK.sub(r'<a href="\2">\1</a>', line) | |
| bu = BARE_URL.match(line) | |
| if bu: | |
| u = bu.group(1) | |
| line = f'<a href="{u}">{u}</a>' | |
| paras.append(f"<p>{line}</p>") | |
| content = "\n".join(paras) if paras else "<p></p>" | |
| return f'<blockquote class="wp-block-quote">\n{content}\n</blockquote>' | |
| return HINT_BLOCK.sub(_repl, s) | |
| # ---------- enhance existing blockquotes with marker -> class (keep marker <p>) ---------- | |
| import re | |
| MARKERS = { | |
| 'note': 'is-note', | |
| 'tip': 'is-tip', | |
| 'important': 'is-important', | |
| 'warning': 'is-warning', | |
| 'caution': 'is-caution', | |
| 'info': 'is-info', | |
| } | |
| # Match blockquote with class containing wp-block-quote; capture classes and inner HTML | |
| BQ = re.compile( | |
| r'<blockquote\s+class="([^"]*\bwp-block-quote\b[^"]*)">\s*(.*?)\s*</blockquote>', | |
| re.I | re.S | |
| ) | |
| # First paragraph inside the blockquote (no removal, just read it) | |
| FIRST_P = re.compile(r'^\s*<p>(.*?)</p>', re.I | re.S) | |
| def _strip_md_bold(s: str) -> str: | |
| # remove **…** or __…__ only at edges | |
| s = re.sub(r'^\s*(\*\*|__)\s*', '', s) | |
| s = re.sub(r'\s*(\*\*|__)\s*$', '', s) | |
| return s.strip() | |
| def _detect_marker(text_html: str) -> str: | |
| # Get plain-ish text from <p>…</p> | |
| t = re.sub(r'<[^>]+>', '', text_html) | |
| t = _strip_md_bold(t) | |
| t = re.sub(r':\s*$', '', t).strip().lower() | |
| return t | |
| def enhance_blockquotes_keep_marker(html: str) -> str: | |
| def _repl(m): | |
| classes = m.group(1) | |
| inner = m.group(2) | |
| fm = FIRST_P.search(inner) | |
| if not fm: | |
| return m.group(0) | |
| first_p_html = fm.group(1) | |
| key = _detect_marker(first_p_html) | |
| mod = MARKERS.get(key) | |
| if not mod: | |
| return m.group(0) | |
| # add class if missing | |
| class_list = classes.split() | |
| if mod not in class_list: | |
| classes = classes + " " + mod | |
| return f'<blockquote class="{classes}">\n{inner}\n</blockquote>' | |
| return BQ.sub(_repl, html) | |
| # ---------- image normalizations (your existing logic) ---------- | |
| html_img = re.compile( | |
| r'<img\b[^>]*\bsrc=["\']([^"\']+)["\'][^>]*?(?:\balt=["\']([^"\']*)["\'])?[^>]*>', | |
| re.I | |
| ) | |
| md_img_inline = re.compile( | |
| r'!\[([^\]]*)\]' | |
| r'\(' | |
| r'\s*<?' | |
| r'("?)([^)\r\n]+?)\1' | |
| r'>?' | |
| r'(?:\s+"[^"]*")?' | |
| r'\s*\)', | |
| re.I | |
| ) | |
| md_img_any = re.compile( | |
| r'!\[([^\]]*)\]' | |
| r'\(' | |
| r'\s*<?' | |
| r'([^)\r\n]+?)' | |
| r'>?' | |
| r'(?:\s+"[^"]*")?' | |
| r'\s*\)', | |
| re.I | |
| ) | |
| md_img_ref_use = re.compile(r'!\[([^\]]*)\]\s*\[([^\]]+)\]', re.I) | |
| ref_def_loose = re.compile( | |
| r'(\[([^\]]+)\]\s*:\s*)' r'<?("?)([^>\r\n]+?)\3>?' r'(\s+"[^"]*")?\s*$', | |
| re.I | |
| ) | |
| any_changed = False | |
| for path in md_files: | |
| with open(path, encoding="utf-8") as f: | |
| raw = f.read() | |
| fm, s = split_front_matter(raw) | |
| o = s | |
| # 1) HTML <img> → strict MD | |
| s = html_img.sub(lambda m: build_md(m.group(2) or "", m.group(1)), s) | |
| # 1.5) GitBook inline MD images → strict MD to /_images | |
| s = re.compile( | |
| r'!\[([^\]]*)\]\(' | |
| r'\s*<?' | |
| r'([^)\r\n]*?\.gitbook/assets[^)\r\n]+?)' | |
| r'>?' | |
| r'(?:\s+"[^"]*")?' | |
| r'\s*\)', re.I).sub(lambda m: build_md(m.group(1), m.group(2)), s) | |
| # 2) Inline MD (general) → strict MD (title = alt) | |
| s = md_img_inline.sub(lambda m: build_md(m.group(1), m.group(3)), s) | |
| # 2.5) GitBook hint blocks → Gutenberg blockquote | |
| s = convert_hint_blocks(s) | |
| # 2.6) Sanitize ANY Markdown image URL | |
| def _sanitize_md_img(m): | |
| alt = m.group(1) | |
| inner = (m.group(2) or "").strip() | |
| if not inner: return m.group(0) | |
| extm = re.search(rf'\.{IMG_EXT}\b', inner, re.I) | |
| if extm: inner = inner[:extm.end()] | |
| inner = re.sub(r'\s+"(?:[^"\\]|\\.)*"\s*', '', inner) | |
| return build_md(alt, inner) | |
| s = md_img_any.sub(_sanitize_md_img, s) | |
| # 3) Gather reference defs | |
| defs = {} | |
| lines = s.splitlines() | |
| for ln in lines: | |
| m = ref_def_loose.search(ln) | |
| if m: | |
| rid = m.group(2).strip() | |
| url = m.group(4).strip() | |
| defs[rid] = url | |
| # 4) Reference usages → strict MD | |
| def repl_ref_use(m): | |
| alt, rid = m.group(1), m.group(2).strip() | |
| url = defs.get(rid) | |
| return build_md(alt, url) if url else m.group(0) | |
| s = md_img_ref_use.sub(repl_ref_use, s) | |
| # 5) Reference defs to /_images/<slug> | |
| def rewrite_def_line(ln: str) -> str: | |
| m = ref_def_loose.search(ln) | |
| if not m: return ln | |
| head, url, title = m.group(1), m.group(4).strip(), m.group(5) or "" | |
| new_url = to_root_image_url(url) | |
| return f"{head}{new_url}{title}" | |
| s = "\n".join(rewrite_def_line(ln) for ln in s.splitlines()) | |
| # 6) Fix bare .gitbook/assets occurrences outside code fences | |
| out_lines, fenced = [], False | |
| fence_re = re.compile(r'^\s*```') | |
| asset_re = re.compile(r'(\.gitbook/assets/[^"\'<>]+)', re.I) | |
| for ln in s.splitlines(): | |
| if fence_re.match(ln): | |
| fenced = not fenced | |
| out_lines.append(ln); continue | |
| if not fenced and '.gitbook/assets/' in ln: | |
| ln = asset_re.sub(lambda m: to_root_image_url(m.group(1)), ln) | |
| out_lines.append(ln) | |
| s = "\n".join(out_lines) | |
| # 7) Remove lingering ../ or ./ before _images | |
| s = re.sub(r'\]\(\s*<?(?:\.\./|\.?/)+_images/', '](/_images/', s) | |
| # 8) NEW: enhance blockquotes (add classes + remove marker <p>) | |
| s = enhance_blockquotes_keep_marker(s) | |
| if s != o: | |
| with open(path, "w", encoding="utf-8", newline="") as f: | |
| f.write(join_front_matter(fm, s)) | |
| print(f"Fixed: {path}") | |
| any_changed = True | |
| if not any_changed: | |
| print("No image links or blockquotes needed normalization.") | |
| PY | |
| # FINAL PATCH: strip quoted fragments inside filename parentheses before .ext | |
| - name: Final fix — collapse ("…") inside filenames | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| python3 - <<'PY' | |
| import re, glob, io | |
| exts = r'(?:png|jpe?g|gif|webp|svg|gifv)' | |
| # Example it fixes: | |
| # /_images/image-(766 "Clicking the question mark and selecting \"Contact Support\"").png | |
| # -> /_images/image-(766).png | |
| pat = re.compile( | |
| rf'(/_images/[^)\s]*\([^()"\\]*?)\s+"(?:[^"\\]|\\.)*"\s*(\)\.{exts})(?=[\s")])', | |
| re.I | |
| ) | |
| files = [p for g in ("**/*.md","**/*.MD","**/*.mdx","**/*.MDX","**/*.markdown","**/*.MARKDOWN") | |
| for p in glob.glob(g, recursive=True)] | |
| changed = False | |
| for p in files: | |
| with open(p, 'r', encoding='utf-8') as f: | |
| s = f.read() | |
| n = pat.sub(r'\1\2', s) | |
| if n != s: | |
| with open(p, 'w', encoding='utf-8') as f: | |
| f.write(n) | |
| print(f"Collapsed quoted fragment inside filename: {p}") | |
| changed = True | |
| if not changed: | |
| print("No filenames with quoted fragments needed collapsing.") | |
| PY | |
| # 5) Validate: all image links are /_images/<no-spaces> and no subfolders | |
| - name: Validate image links style (/_images/*, no spaces) | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| python3 - <<'PY' | |
| import re, glob, sys | |
| exts = r"(png|jpe?g|gif|webp|svg|gifv)" | |
| is_img = re.compile(rf"\.({exts})(?:$|\?)", re.I) | |
| md_img_inline = re.compile( | |
| r'!\[([^\]]*)\]\(' | |
| r'\s*<?' | |
| r'("?)([^)\r\n]+?)\1' | |
| r'>?' | |
| r'(?:\s+"[^"]*")?' | |
| r'\s*\)', | |
| re.I | |
| ) | |
| html_src = re.compile(r'\bsrc=["\']([^"\']+)["\']', re.I) | |
| ref_def_loose = re.compile( | |
| r'(\[([^\]]+)\]\s*:\s*)' r'<?("?)([^>\r\n]+?)\3>?' r'(\s+"[^"]*")?\s*$', | |
| re.I | |
| ) | |
| fence_re = re.compile(r'^\s*```') | |
| def bad(u: str) -> bool: | |
| u = u.strip() | |
| if u.startswith("http://") or u.startswith("https://"): | |
| return False | |
| if not is_img.search(u): | |
| return False | |
| if not u.startswith("/_images/"): | |
| return True | |
| rest = u[len("/_images/"):] | |
| if "/" in rest: | |
| return True | |
| if " " in u: | |
| return True | |
| return False | |
| bads = [] | |
| files = [p for g in ("**/*.md","**/*.MD","**/*.mdx","**/*.MDX","**/*.markdown","**/*.MARKDOWN") | |
| for p in glob.glob(g, recursive=True)] | |
| for p in files: | |
| with open(p, encoding="utf-8") as f: | |
| lines = f.read().splitlines() | |
| fenced = False | |
| for ln in lines: | |
| if fence_re.match(ln): | |
| fenced = not fenced | |
| continue | |
| if fenced: | |
| continue | |
| for m in md_img_inline.finditer(ln): | |
| u = m.group(2).strip() | |
| if bad(u): bads.append((p, u)) | |
| for u in html_src.findall(ln): | |
| if bad(u): bads.append((p, u)) | |
| fenced = False | |
| for ln in lines: | |
| if fence_re.match(ln): | |
| fenced = not fenced | |
| continue | |
| if fenced: | |
| continue | |
| m = ref_def_loose.search(ln) | |
| if m: | |
| u = m.group(4).strip() | |
| if bad(u): bads.append((p, u)) | |
| if bads: | |
| print("Non-compliant image links:") | |
| for pth, u in bads: | |
| print(f"- {pth}: {u}") | |
| sys.exit(1) | |
| print("All local image links are /_images/<file> with no spaces and no subfolders. External URLs are ignored.") | |
| PY | |
| - name: Commit changes (if any) | |
| run: | | |
| if [ -n "$(git status --porcelain)" ]; then | |
| git add -A | |
| git commit -m "Normalize: convert figures and links to , no spaces [ci skip]" | |
| else | |
| echo "No changes to commit." | |
| fi | |
| - name: Rebase on latest and push (retry with --force-with-lease) | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| git fetch origin main --prune | |
| if [ "$(git rev-list --count HEAD ^origin/main)" -eq 0 ]; then | |
| echo "No new commits to push." | |
| exit 0 | |
| fi | |
| for a in 1 2 3; do | |
| echo "Push attempt $a..." | |
| if git push --force-with-lease; then exit 0; fi | |
| git rebase origin/main | |
| sleep 2 | |
| done | |
| echo "Giving up after 3 attempts."; exit 1 |