Normalize GitBook assets for Git it Write

Merge remote-tracking branch 'origin/main' #126

Workflow file for this run

.github/workflows/normalize-images.yml at 0c37c17

	name: Normalize GitBook assets for Git it Write

	on:
	push:
	branches: [ main ]
	workflow_dispatch:

	permissions:
	contents: write

	concurrency:
	group: normalize-assets-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	normalize:
	if: ${{ github.actor != 'github-actions[bot]' }}
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	with: { fetch-depth: 0 }

	- name: Configure author
	run: \|
	git config user.name "github-actions[bot]"
	git config user.email "github-actions[bot]@users.noreply.github.com"

	- name: Update to latest main (rebase)
	run: \|
	git fetch origin main
	git checkout main
	git rebase origin/main

	- name: Ensure _images exists (flat)
	run: mkdir -p _images

	# 1) Copy images from .gitbook/assets → _images (flat)
	- name: Copy from .gitbook/assets → _images (flat)
	shell: bash
	run: \|
	set -euo pipefail
	if [ -d ".gitbook/assets" ]; then
	while IFS= read -r -d '' src; do
	base="$(basename "$src")"
	cp -f "$src" "_images/$base"
	done < <(find .gitbook/assets -type f -print0)
	fi

	# 2) Rename files in _images to remove spaces (spaces -> '-')
	- name: Slugify filenames in _images (remove spaces)
	shell: bash
	run: \|
	set -euo pipefail
	shopt -s nullglob
	for f in _images/*; do
	b="$(basename "$f")"
	nb="$(printf '%s' "$b" \| sed -E 's/%20/ /g; s/[[:space:]]+/-/g; s/-+/-/g')"
	if [ "$b" != "$nb" ]; then
	tgt="_images/$nb"
	if [ -e "$tgt" ]; then
	# If target exists and content is IDENTICAL → drop the duplicate
	if cmp -s "$f" "$tgt"; then
	git rm -f "$f" 2>/dev/null \|\| rm -f "$f"
	echo "Removed duplicate identical file: $b (kept $(basename "$tgt"))"
	continue
	fi
	# Else different content → create a unique name (rare)
	base="${nb%.}"; ext="${nb##.}"; i=1
	while [ -e "_images/${base}-${i}.${ext}" ]; do i=$((i+1)); done
	tgt="_images/${base}-${i}.${ext}"
	fi
	git mv -f "$f" "$tgt" 2>/dev/null \|\| mv -f "$f" "$tgt"
	echo "Renamed: $b -> $(basename "$tgt")"
	fi
	done

	# 3) Convert <figure><img ...><figcaption>...</figcaption></figure> → ![alt](/_images/file "title")
	- name: Convert <figure><img> blocks to Markdown images (/_images, no spaces)
	shell: bash
	run: \|
	set -euo pipefail
	python3 - <<'PY'
	import re, glob, html, os
	from urllib.parse import unquote

	FIG = re.compile(r'<figure\b[^>]>(.?)</figure>', re.I\|re.S)
	IMG = re.compile(r'<img\b[^>]*>', re.I\|re.S)
	CAP = re.compile(r'<figcaption\b[^>]>(.?)</figcaption>', re.I\|re.S)

	def attr(n,s):
	m = re.search(rf'\b{n}\s=\s["\']([^"\']*)["\']', s, re.I\|re.S)
	return m.group(1).strip() if m else ''

	def clean_basename(src:str)->str:
	"""
	Take src like '../.gitbook/assets/image (766).png'
	→ 'image-(766).png'
	"""
	# take last path segment, URL-decode
	bn = unquote(os.path.basename(src or '').strip())
	if not bn:
	return 'unknown.png'

	# split name + ext
	if '.' in bn:
	base, ext = bn.rsplit('.', 1)
	ext = '.' + ext.lower()
	else:
	base, ext = bn, ''

	# replace %20/spaces with dash; collapse dashes
	base = base.replace('%20', ' ')
	base = re.sub(r'\s+', '-', base)
	base = re.sub(r'-{2,}', '-', base).strip('-')

	# if base ends with (...) and contains a stray quoted fragment inside, drop it but keep ()
	# (defensive — shouldn’t be present in raw figure src, but safe)
	base = re.sub(r'$\s([^()"]?)\s+"(?:[^"\\]\|\\.)"\s$$', r'(\1)', base)

	# allow only letters, digits, -, _, ()
	base = re.sub(r'[^A-Za-z0-9_\-()]+', '-', base)
	base = re.sub(r'-{2,}', '-', base).strip('-')

	return base + ext

	def convert_figure(block:str)->str:
	mimg = IMG.search(block)
	if not mimg:
	return block

	img = mimg.group(0)
	raw_src = attr('src', img)
	alt = attr('alt', img) or ''

	# title: figcaption (preferred) else alt
	mcap = CAP.search(block)
	cap_text = html.unescape(re.sub(r'<[^>]+>', '', mcap.group(1))).strip() if mcap else ''
	title = cap_text or alt

	fn = clean_basename(raw_src)
	url = f"/_images/{fn}"

	# escape markdown specials safely
	alt_md = alt.replace(']', r'\]')
	title_md = title.replace('"', r'\"')

	# if no title at all, omit the "..."
	return f'![{alt_md}]({url} "{title_md}")' if title_md else f'![{alt_md}]({url})'

	files = [p for g in ["*/.md","*/.MD","*/.mdx","*/.MDX","*/.markdown","*/.MARKDOWN"]
	for p in glob.glob(g, recursive=True)]

	any_changed = False
	for path in files:
	s = open(path, encoding="utf-8").read()
	n = FIG.sub(lambda m: convert_figure(m.group(0)), s)
	if n != s:
	open(path, "w", encoding="utf-8").write(n)
	print(f"Converted figures in: {path}")
	any_changed = True

	if not any_changed:
	print("No <figure> blocks to convert.")
	PY


	# 4) Normalize any remaining image links to /_images/<no-spaces>, remove prefixes/subfolders, convert bare <img> too
	- name: Normalize ALL image links to ![alt](/_images/<slug> "alt") exactly + enhance blockquotes
	shell: bash
	run: \|
	set -euo pipefail
	python3 - <<'PY'
	import re, glob, os
	from urllib.parse import unquote
	from html import unescape

	# -------- front matter helpers --------
	def split_front_matter(text: str):
	if text.startswith('\ufeff'):
	text = text.lstrip('\ufeff')
	if text.startswith('\n---'):
	text = text[1:]
	m = re.match(r'^(---\s\n.?\n---\s*\n)', text, flags=re.S)
	if m:
	fm = m.group(1)
	body = text[m.end():]
	return fm, body
	return "", text

	def join_front_matter(fm: str, body: str) -> str:
	return (fm or "") + body

	IMG_EXT = r"(?:png\|jpe?g\|gif\|webp\|svg\|gifv)"

	def clean_filename_from_url(url: str) -> str:
	if not url: return "unknown.png"
	url = url.strip().rstrip('>')
	url = re.sub(r'\s*""', '', url)
	m = re.findall(rf'([^/?#]+?\.{IMG_EXT})', url, flags=re.I)
	bn = m[-1] if m else os.path.basename(url)
	bn = unquote(bn)
	if "." in bn: base, ext = bn.rsplit(".", 1)
	else: base, ext = bn, ""
	base = re.sub(r'$\s([^()"]?)\s+"(?:[^"\\]\|\\.)"\s$$', r'(\1)', base)
	pm = re.search(r'$([^()]*)$$', base)
	if pm:
	inner = re.sub(r'\s+', ' ', pm.group(1)).strip()
	base = base[:pm.start()] + f'({inner})'
	base = re.sub(r'\s+"(?:[^"\\]\|\\.)"\s', '', base)
	base = re.sub(r'["“”].*$', '', base)
	base = re.sub(r"\s+", "-", base)
	base = re.sub(r"[^A-Za-z0-9_\-()]", "-", base)
	base = re.sub(r"-{2,}", "-", base).strip("-")
	return f"{base}.{ext.lower()}" if ext else base

	def to_root_image_url(url: str) -> str:
	return "/_images/" + clean_filename_from_url(url)

	def norm_text(s: str) -> str:
	t = unescape(s or "")
	t = t.replace("“", '"').replace("”", '"').replace("’", "'")
	return t

	def build_md(alt: str, url: str) -> str:
	alt = norm_text(alt or "")
	alt_md = alt.replace("]", r"\]")
	if alt.strip():
	title = alt.replace('"', r'\"')
	return f'![{alt_md}]({to_root_image_url(url)} "{title}")'
	else:
	return f'![{alt_md}]({to_root_image_url(url)})'

	md_files = [p for g in ("*/.md","*/.MD","*/.mdx","*/.MDX","*/.markdown","*/.MARKDOWN")
	for p in glob.glob(g, recursive=True)]

	# ---------- GitBook hint blocks -> Gutenberg blockquote (unchanged) ----------
	HINT_BLOCK = re.compile(r'{%\shint\b[^%]%}(.?){%\sendhint\s*%}', re.I \| re.S)
	MD_LINK = re.compile(r'\[([^\]]+)\]$(https?://[^)]+)$')
	BARE_URL = re.compile(r'^(https?://\S+)$')

	def convert_hint_blocks(s: str) -> str:
	def _repl(m):
	inner = m.group(1).strip()
	paras = []
	for line in inner.splitlines():
	line = line.strip()
	if not line: continue
	line = MD_LINK.sub(r'<a href="\2">\1</a>', line)
	bu = BARE_URL.match(line)
	if bu:
	u = bu.group(1)
	line = f'<a href="{u}">{u}</a>'
	paras.append(f"<p>{line}</p>")
	content = "\n".join(paras) if paras else "<p></p>"
	return f'<blockquote class="wp-block-quote">\n{content}\n</blockquote>'
	return HINT_BLOCK.sub(_repl, s)

	# ---------- enhance existing blockquotes with marker -> class (keep marker <p>) ----------
	import re

	MARKERS = {
	'note': 'is-note',
	'tip': 'is-tip',
	'important': 'is-important',
	'warning': 'is-warning',
	'caution': 'is-caution',
	'info': 'is-info',
	}

	# Match blockquote with class containing wp-block-quote; capture classes and inner HTML
	BQ = re.compile(
	r'<blockquote\s+class="([^"]\bwp-block-quote\b[^"])">\s(.?)\s*</blockquote>',
	re.I \| re.S
	)

	# First paragraph inside the blockquote (no removal, just read it)
	FIRST_P = re.compile(r'^\s<p>(.?)</p>', re.I \| re.S)

	def _strip_md_bold(s: str) -> str:
	# remove … or __…__ only at edges
	s = re.sub(r'^\s(\\\|__)\s', '', s)
	s = re.sub(r'\s(\\\|__)\s$', '', s)
	return s.strip()

	def _detect_marker(text_html: str) -> str:
	# Get plain-ish text from <p>…</p>
	t = re.sub(r'<[^>]+>', '', text_html)
	t = _strip_md_bold(t)
	t = re.sub(r':\s*$', '', t).strip().lower()
	return t

	def enhance_blockquotes_keep_marker(html: str) -> str:
	def _repl(m):
	classes = m.group(1)
	inner = m.group(2)

	fm = FIRST_P.search(inner)
	if not fm:
	return m.group(0)

	first_p_html = fm.group(1)
	key = _detect_marker(first_p_html)
	mod = MARKERS.get(key)
	if not mod:
	return m.group(0)

	# add class if missing
	class_list = classes.split()
	if mod not in class_list:
	classes = classes + " " + mod

	return f'<blockquote class="{classes}">\n{inner}\n</blockquote>'

	return BQ.sub(_repl, html)


	# ---------- image normalizations (your existing logic) ----------
	html_img = re.compile(
	r'<img\b[^>]\bsrc=["\']([^"\']+)["\'][^>]?(?:\balt=["\']([^"\'])["\'])?[^>]>',
	re.I
	)
	md_img_inline = re.compile(
	r'!\[([^\]]*)\]'
	r'\('
	r'\s*<?'
	r'("?)([^)\r\n]+?)\1'
	r'>?'
	r'(?:\s+"[^"]*")?'
	r'\s*\)',
	re.I
	)
	md_img_any = re.compile(
	r'!\[([^\]]*)\]'
	r'\('
	r'\s*<?'
	r'([^)\r\n]+?)'
	r'>?'
	r'(?:\s+"[^"]*")?'
	r'\s*\)',
	re.I
	)
	md_img_ref_use = re.compile(r'!\[([^\]])\]\s\[([^\]]+)\]', re.I)
	ref_def_loose = re.compile(
	r'(\[([^\]]+)\]\s:\s)' r'<?("?)([^>\r\n]+?)\3>?' r'(\s+"[^"]")?\s$',
	re.I
	)

	any_changed = False
	for path in md_files:
	with open(path, encoding="utf-8") as f:
	raw = f.read()
	fm, s = split_front_matter(raw)
	o = s

	# 1) HTML <img> → strict MD
	s = html_img.sub(lambda m: build_md(m.group(2) or "", m.group(1)), s)
	# 1.5) GitBook inline MD images → strict MD to /_images
	s = re.compile(
	r'!\[([^\]]*)\]\('
	r'\s*<?'
	r'([^)\r\n]*?\.gitbook/assets[^)\r\n]+?)'
	r'>?'
	r'(?:\s+"[^"]*")?'
	r'\s*\)', re.I).sub(lambda m: build_md(m.group(1), m.group(2)), s)
	# 2) Inline MD (general) → strict MD (title = alt)
	s = md_img_inline.sub(lambda m: build_md(m.group(1), m.group(3)), s)
	# 2.5) GitBook hint blocks → Gutenberg blockquote
	s = convert_hint_blocks(s)
	# 2.6) Sanitize ANY Markdown image URL
	def _sanitize_md_img(m):
	alt = m.group(1)
	inner = (m.group(2) or "").strip()
	if not inner: return m.group(0)
	extm = re.search(rf'\.{IMG_EXT}\b', inner, re.I)
	if extm: inner = inner[:extm.end()]
	inner = re.sub(r'\s+"(?:[^"\\]\|\\.)"\s', '', inner)
	return build_md(alt, inner)
	s = md_img_any.sub(_sanitize_md_img, s)

	# 3) Gather reference defs
	defs = {}
	lines = s.splitlines()
	for ln in lines:
	m = ref_def_loose.search(ln)
	if m:
	rid = m.group(2).strip()
	url = m.group(4).strip()
	defs[rid] = url

	# 4) Reference usages → strict MD
	def repl_ref_use(m):
	alt, rid = m.group(1), m.group(2).strip()
	url = defs.get(rid)
	return build_md(alt, url) if url else m.group(0)
	s = md_img_ref_use.sub(repl_ref_use, s)

	# 5) Reference defs to /_images/<slug>
	def rewrite_def_line(ln: str) -> str:
	m = ref_def_loose.search(ln)
	if not m: return ln
	head, url, title = m.group(1), m.group(4).strip(), m.group(5) or ""
	new_url = to_root_image_url(url)
	return f"{head}{new_url}{title}"
	s = "\n".join(rewrite_def_line(ln) for ln in s.splitlines())

	# 6) Fix bare .gitbook/assets occurrences outside code fences
	out_lines, fenced = [], False
	fence_re = re.compile(r'^\s*```')
	asset_re = re.compile(r'(\.gitbook/assets/[^"\'<>]+)', re.I)
	for ln in s.splitlines():
	if fence_re.match(ln):
	fenced = not fenced
	out_lines.append(ln); continue
	if not fenced and '.gitbook/assets/' in ln:
	ln = asset_re.sub(lambda m: to_root_image_url(m.group(1)), ln)
	out_lines.append(ln)
	s = "\n".join(out_lines)

	# 7) Remove lingering ../ or ./ before _images
	s = re.sub(r'\]\(\s*<?(?:\.\./\|\.?/)+_images/', '](/_images/', s)

	# 8) NEW: enhance blockquotes (add classes + remove marker <p>)
	s = enhance_blockquotes_keep_marker(s)

	if s != o:
	with open(path, "w", encoding="utf-8", newline="") as f:
	f.write(join_front_matter(fm, s))
	print(f"Fixed: {path}")
	any_changed = True

	if not any_changed:
	print("No image links or blockquotes needed normalization.")
	PY




	# FINAL PATCH: strip quoted fragments inside filename parentheses before .ext
	- name: Final fix — collapse ("…") inside filenames
	shell: bash
	run: \|
	set -euo pipefail
	python3 - <<'PY'
	import re, glob, io

	exts = r'(?:png\|jpe?g\|gif\|webp\|svg\|gifv)'

	# Example it fixes:
	# /_images/image-(766 "Clicking the question mark and selecting \"Contact Support\"").png
	# -> /_images/image-(766).png
	pat = re.compile(
	rf'(/_images/[^)\s]$[^()"\\]?)\s+"(?:[^"\\]\|\\.)"\s($\.{exts})(?=[\s")])',
	re.I
	)

	files = [p for g in ("*/.md","*/.MD","*/.mdx","*/.MDX","*/.markdown","*/.MARKDOWN")
	for p in glob.glob(g, recursive=True)]

	changed = False
	for p in files:
	with open(p, 'r', encoding='utf-8') as f:
	s = f.read()
	n = pat.sub(r'\1\2', s)
	if n != s:
	with open(p, 'w', encoding='utf-8') as f:
	f.write(n)
	print(f"Collapsed quoted fragment inside filename: {p}")
	changed = True

	if not changed:
	print("No filenames with quoted fragments needed collapsing.")
	PY



	# 5) Validate: all image links are /_images/<no-spaces> and no subfolders
	- name: Validate image links style (/_images/*, no spaces)
	shell: bash
	run: \|
	set -euo pipefail
	python3 - <<'PY'
	import re, glob, sys

	exts = r"(png\|jpe?g\|gif\|webp\|svg\|gifv)"
	is_img = re.compile(rf"\.({exts})(?:$\|\?)", re.I)

	md_img_inline = re.compile(
	r'!\[([^\]]*)\]\('
	r'\s*<?'
	r'("?)([^)\r\n]+?)\1'
	r'>?'
	r'(?:\s+"[^"]*")?'
	r'\s*\)',
	re.I
	)
	html_src = re.compile(r'\bsrc=["\']([^"\']+)["\']', re.I)
	ref_def_loose = re.compile(
	r'(\[([^\]]+)\]\s:\s)' r'<?("?)([^>\r\n]+?)\3>?' r'(\s+"[^"]")?\s$',
	re.I
	)
	fence_re = re.compile(r'^\s*```')

	def bad(u: str) -> bool:
	u = u.strip()
	if u.startswith("http://") or u.startswith("https://"):
	return False
	if not is_img.search(u):
	return False
	if not u.startswith("/_images/"):
	return True
	rest = u[len("/_images/"):]
	if "/" in rest:
	return True
	if " " in u:
	return True
	return False

	bads = []
	files = [p for g in ("*/.md","*/.MD","*/.mdx","*/.MDX","*/.markdown","*/.MARKDOWN")
	for p in glob.glob(g, recursive=True)]

	for p in files:
	with open(p, encoding="utf-8") as f:
	lines = f.read().splitlines()

	fenced = False
	for ln in lines:
	if fence_re.match(ln):
	fenced = not fenced
	continue
	if fenced:
	continue
	for m in md_img_inline.finditer(ln):
	u = m.group(2).strip()
	if bad(u): bads.append((p, u))
	for u in html_src.findall(ln):
	if bad(u): bads.append((p, u))

	fenced = False
	for ln in lines:
	if fence_re.match(ln):
	fenced = not fenced
	continue
	if fenced:
	continue
	m = ref_def_loose.search(ln)
	if m:
	u = m.group(4).strip()
	if bad(u): bads.append((p, u))

	if bads:
	print("Non-compliant image links:")
	for pth, u in bads:
	print(f"- {pth}: {u}")
	sys.exit(1)

	print("All local image links are /_images/<file> with no spaces and no subfolders. External URLs are ignored.")
	PY



	- name: Commit changes (if any)
	run: \|
	if [ -n "$(git status --porcelain)" ]; then
	git add -A
	git commit -m "Normalize: convert figures and links to ![alt](/_images/file \"alt\"), no spaces [ci skip]"
	else
	echo "No changes to commit."
	fi

	- name: Rebase on latest and push (retry with --force-with-lease)
	shell: bash
	run: \|
	set -euo pipefail
	git fetch origin main --prune
	if [ "$(git rev-list --count HEAD ^origin/main)" -eq 0 ]; then
	echo "No new commits to push."
	exit 0
	fi
	for a in 1 2 3; do
	echo "Push attempt $a..."
	if git push --force-with-lease; then exit 0; fi
	git rebase origin/main
	sleep 2
	done
	echo "Giving up after 3 attempts."; exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge remote-tracking branch 'origin/main' #126

Workflow file

Merge remote-tracking branch 'origin/main' #126

Uh oh!

Workflow file for this run