);
diff --git a/src/components/seo/LearnSchema.astro b/src/components/seo/LearnSchema.astro
index 14588fc..0f5d61a 100644
--- a/src/components/seo/LearnSchema.astro
+++ b/src/components/seo/LearnSchema.astro
@@ -1,8 +1,15 @@
---
/**
- * LearnSchema - JSON-LD structured data for learn pages
- * Improves SEO and enables rich results in search engines
+ * LearnSchema - JSON-LD structured data for learn pages.
+ * Emits a combined LearningResource+Article block and a BreadcrumbList.
+ *
+ * Changes from previous version:
+ * - author changed from Organization to Person (E-E-A-T fix, P2-GEO-02)
+ * - @id and url added to the article schema (fixes F-09)
+ * - citation array emitted when references are provided
+ * - articleAuthor prop allows per-article author override
*/
+import type { ReferenceEntry } from '../../content/config';
interface PretestQuestion {
questionText: string;
@@ -24,6 +31,10 @@ interface Props {
pretestQuestionCount?: number;
/** Pretest question data for schema serialisation. */
pretestQuestions?: PretestQuestion[];
+ /** Per-article author override from YAML; defaults to Fabian Moor. */
+ articleAuthor?: string;
+ /** Ordered references to emit as citation[] on the LearningResource. */
+ references?: ReferenceEntry[];
}
const {
@@ -37,10 +48,23 @@ const {
dateModified,
pretestQuestionCount = 0,
pretestQuestions = [],
+ articleAuthor,
+ references = [],
} = Astro.props;
const url = `https://gemmology.dev/learn/${slug}`;
+// Canonical author entity — referenced by @id so it is declared once on /about/.
+const AUTHOR_NAME = articleAuthor ?? 'Fabian Moor';
+const authorNode = {
+ "@type": "Person",
+ "@id": "https://gemmology.dev/about#author",
+ "name": AUTHOR_NAME,
+ "alternateName": "Bissbert",
+ "url": "https://gemmology.dev/about/",
+ "knowsAbout": ["Gemmology", "Mineralogy", "Gemstone identification"],
+};
+
// Helper to format category names
function formatName(name: string): string {
return name
@@ -49,6 +73,53 @@ function formatName(name: string): string {
.join(' ');
}
+// Map a ReferenceEntry to the appropriate Schema.org type.
+function refToSchemaOrg(ref: ReferenceEntry): Record {
+ const authorNodes = ('authors' in ref && ref.authors)
+ ? ref.authors.map((a) => ({
+ "@type": "Person",
+ "name": a.given ? `${a.given} ${a.family}` : a.family,
+ }))
+ : [];
+
+ const doiProps = ref.kind !== 'web' && 'doi' in ref && ref.doi
+ ? {
+ "identifier": {
+ "@type": "PropertyValue",
+ "propertyID": "doi",
+ "value": ref.doi,
+ },
+ "url": `https://doi.org/${ref.doi}`,
+ }
+ : ('url' in ref && ref.url ? { "url": ref.url } : {});
+
+ const base: Record = {
+ "name": ref.title,
+ "datePublished": String(ref.year ?? 'n.d.'),
+ ...(authorNodes.length > 0 ? { "author": authorNodes } : {}),
+ ...doiProps,
+ };
+
+ switch (ref.kind) {
+ case 'journal':
+ return {
+ "@type": "ScholarlyArticle",
+ "isPartOf": { "@type": "Periodical", "name": ref.journal },
+ ...base,
+ };
+ case 'book': {
+ const isbn = ref.isbn ? { "isbn": ref.isbn.replace(/-/g, '') } : {};
+ const pub = ref.publisher ? { "publisher": { "@type": "Organization", "name": ref.publisher } } : {};
+ const ed = ref.edition ? { "bookEdition": String(ref.edition) } : {};
+ return { "@type": "Book", ...isbn, ...pub, ...ed, ...base };
+ }
+ case 'standard':
+ case 'web':
+ default:
+ return { "@type": "WebPage", ...base };
+ }
+}
+
// Build hasPart Quiz node when pretest questions are available
const quizNode = pretestQuestionCount > 0
? {
@@ -65,20 +136,24 @@ const quizNode = pretestQuestionCount > 0
}
: null;
-// Use combined LearningResource + Article so Google can pick either rich-result type
+// Citation array — only emitted when references are provided (never an empty array).
+const citationLD = references.length > 0
+ ? references.map(refToSchemaOrg)
+ : null;
+
+// Use combined LearningResource + Article so Google can pick either rich-result type.
+// @id and url added here (fixes schema.md flag F-09).
const articleSchema = {
"@context": "https://schema.org",
"@type": ["LearningResource", "Article"],
+ "@id": url,
+ "url": url,
"headline": title,
"name": title,
"description": description,
"inLanguage": "en",
"isAccessibleForFree": true,
- "author": {
- "@type": "Organization",
- "name": "gemmology.dev",
- "url": "https://gemmology.dev"
- },
+ "author": authorNode,
"publisher": {
"@type": "Organization",
"name": "gemmology.dev",
@@ -102,17 +177,16 @@ const articleSchema = {
...(datePublished ? { "datePublished": datePublished } : {}),
...(dateModified ? { "dateModified": dateModified } : {}),
...(quizNode ? { "hasPart": quizNode } : {}),
+ ...(citationLD ? { "citation": citationLD } : {}),
};
// Build BreadcrumbList — no fragment URLs (Google rejects them).
-// Chain: Home → Learn → [subcategory overview if present →] Article
const breadcrumbItems = [
{ "@type": "ListItem", "position": 1, "name": "Home", "item": "https://gemmology.dev" },
{ "@type": "ListItem", "position": 2, "name": "Learn", "item": "https://gemmology.dev/learn" },
];
if (subcategory) {
- // Position 3: article page (subcategory is visible-only; no clean URL to use here)
breadcrumbItems.push({
"@type": "ListItem",
"position": 3,
diff --git a/src/components/seo/StructuredData.astro b/src/components/seo/StructuredData.astro
index 52089d8..9924351 100644
--- a/src/components/seo/StructuredData.astro
+++ b/src/components/seo/StructuredData.astro
@@ -11,6 +11,7 @@ interface Crumb {
interface PersonData {
name: string;
+ alternateName?: string;
jobTitle?: string;
credentials?: string[];
sameAs?: string[];
@@ -64,8 +65,11 @@ const personSchema = person
? {
"@context": "https://schema.org",
"@type": "Person",
+ "@id": "https://gemmology.dev/about#author",
name: person.name,
+ ...(person.alternateName ? { alternateName: person.alternateName } : {}),
...(person.jobTitle ? { jobTitle: person.jobTitle } : {}),
+ url: "https://gemmology.dev/about/",
knowsAbout: [
"Gemmology",
"Mineralogy",
diff --git a/src/content/config.ts b/src/content/config.ts
index c27d272..3bda08e 100644
--- a/src/content/config.ts
+++ b/src/content/config.ts
@@ -1,12 +1,85 @@
import { defineCollection, z } from 'astro:content';
+// ----------------------------------------------------------------------
+// Citation / reference schema (used by learnCollection)
+// ----------------------------------------------------------------------
+
+const referenceAuthorSchema = z.object({
+ family: z.string(),
+ given: z.string().optional(),
+});
+
+const bookReferenceSchema = z.object({
+ id: z.string().regex(/^[a-z0-9][a-z0-9-]*$/),
+ kind: z.literal('book'),
+ authors: z.array(referenceAuthorSchema),
+ title: z.string(),
+ year: z.number().int(),
+ publisher: z.string().optional(),
+ edition: z.union([z.string(), z.number()]).optional(),
+ isbn: z.string().optional(),
+ doi: z.string().optional(),
+ url: z.string().url().optional(),
+ pages: z.string().optional(),
+});
+
+const journalReferenceSchema = z.object({
+ id: z.string().regex(/^[a-z0-9][a-z0-9-]*$/),
+ kind: z.literal('journal'),
+ authors: z.array(referenceAuthorSchema),
+ title: z.string(),
+ journal: z.string(),
+ year: z.number().int(),
+ volume: z.number().int().optional(),
+ issue: z.number().int().optional(),
+ pages: z.string().optional(),
+ doi: z.string().optional(),
+ url: z.string().url().optional(),
+});
+
+const webReferenceSchema = z.object({
+ id: z.string().regex(/^[a-z0-9][a-z0-9-]*$/),
+ kind: z.literal('web'),
+ authors: z.array(referenceAuthorSchema).optional(),
+ title: z.string(),
+ publisher: z.string().optional(),
+ url: z.string().url(),
+ accessed: z.string().optional(),
+ year: z.number().int().optional(),
+});
+
+const standardReferenceSchema = z.object({
+ id: z.string().regex(/^[a-z0-9][a-z0-9-]*$/),
+ kind: z.literal('standard'),
+ authors: z.array(referenceAuthorSchema).optional(),
+ organization: z.string().optional(),
+ title: z.string(),
+ year: z.number().int(),
+ identifier: z.string().optional(),
+ url: z.string().url().optional(),
+ publisher: z.string().optional(),
+});
+
+// Discriminated union on `kind` — preferred over flat union for type narrowing.
+export const referenceSchema = z.discriminatedUnion('kind', [
+ bookReferenceSchema,
+ journalReferenceSchema,
+ webReferenceSchema,
+ standardReferenceSchema,
+]);
+
+export type ReferenceEntry = z.infer;
+
+// ----------------------------------------------------------------------
// Schema for items within sections (property cards, definition lists)
+// ----------------------------------------------------------------------
const itemSchema = z.object({
name: z.string(),
value: z.string().optional(),
description: z.string().optional(),
examples: z.array(z.string()).optional(),
icon: z.string().optional(),
+ citations: z.array(z.string()).optional(),
});
// Schema for table data
@@ -66,6 +139,7 @@ const sectionSchema = z.object({
crystal: crystalSchema.optional(),
image: imageSchema.optional(),
subsections: z.array(subsectionSchema).optional(),
+ citations: z.array(z.string()).optional(),
});
// ----------------------------------------------------------------------
@@ -177,6 +251,7 @@ const learnCollection = defineCollection({
reviewedBy: z.string().optional(),
reviewedAt: z.string().optional(),
publishedAt: z.string().optional(),
+ references: z.array(referenceSchema).optional(),
sections: z.array(sectionSchema),
}),
});
diff --git a/src/lib/citations/index.ts b/src/lib/citations/index.ts
new file mode 100644
index 0000000..9bfd1b5
--- /dev/null
+++ b/src/lib/citations/index.ts
@@ -0,0 +1,161 @@
+/**
+ * Citation index builder for /learn/ pages.
+ *
+ * Scans every prose string in a learn article's sections for {cite:id} markers
+ * and section/item-level citations arrays. Assigns sequential citation numbers
+ * in first-appearance order across the entire page (not per-section).
+ *
+ * The index is built once in [...slug].astro frontmatter and passed as a prop
+ * into the renderer chain so all components share the same numbering.
+ */
+
+import type { ReferenceEntry } from '../../content/config';
+
+export type { ReferenceEntry };
+
+export interface CitationRef {
+ /** The stable YAML-defined id slug (e.g. "read-3rd"). */
+ id: string;
+ /** Order of first appearance across the page, 1-based. */
+ n: number;
+ /** The full reference record from the YAML references array. */
+ ref: ReferenceEntry;
+}
+
+export type CitationIndex = Map;
+
+/** Regex that matches {cite:some-id} markers in prose strings. */
+const CITE_PATTERN = /\{cite:([a-z0-9][a-z0-9-]*)\}/g;
+
+interface SectionLike {
+ content?: string;
+ callout?: { text?: string };
+ citations?: string[];
+ items?: Array<{
+ description?: string;
+ citations?: string[];
+ }>;
+ table?: {
+ rows?: string[][];
+ };
+ subsections?: Array<{
+ content?: string;
+ items?: Array<{
+ description?: string;
+ citations?: string[];
+ }>;
+ table?: {
+ rows?: string[][];
+ };
+ }>;
+}
+
+/**
+ * Build a CitationIndex from a learn article's sections and references array.
+ *
+ * Silently skips any {cite:id} marker whose id is not present in the
+ * references array — this prevents build errors when content editing is
+ * in progress. A dangling cite produces no output in the renderer either.
+ *
+ * Wrapped in a try/catch at call sites so that a malformed YAML references
+ * block cannot 404 a page.
+ */
+export function buildCitationIndex(
+ sections: SectionLike[],
+ references: ReferenceEntry[] = [],
+): CitationIndex {
+ const refMap = new Map(references.map((r) => [r.id, r]));
+ const index: CitationIndex = new Map();
+ let counter = 1;
+
+ function register(id: string): void {
+ if (index.has(id)) return;
+ const ref = refMap.get(id);
+ if (!ref) return; // dangling id — silently skip
+ index.set(id, { id, n: counter++, ref });
+ }
+
+ for (const section of sections) {
+ // Inline markers in prose content
+ for (const match of (section.content ?? '').matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ // Callout prose
+ for (const match of (section.callout?.text ?? '').matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ // Section-level citations array
+ for (const id of section.citations ?? []) register(id);
+ // Items
+ for (const item of section.items ?? []) {
+ for (const id of item.citations ?? []) register(id);
+ for (const match of (item.description ?? '').matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ }
+ // Table cells
+ for (const row of section.table?.rows ?? []) {
+ for (const cell of row) {
+ for (const match of cell.matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ }
+ }
+ // Subsections (one level deep — the schema does not nest deeper)
+ for (const sub of section.subsections ?? []) {
+ for (const match of (sub.content ?? '').matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ for (const item of sub.items ?? []) {
+ for (const id of item.citations ?? []) register(id);
+ for (const match of (item.description ?? '').matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ }
+ for (const row of sub.table?.rows ?? []) {
+ for (const cell of row) {
+ for (const match of cell.matchAll(CITE_PATTERN)) {
+ register(match[1]);
+ }
+ }
+ }
+ }
+ }
+
+ return index;
+}
+
+/**
+ * Return a short "Author, Year" label suitable for aria-label attributes.
+ * Uses the first author's family name; falls back to the title excerpt.
+ */
+export function formatCiteLabel(ref: ReferenceEntry): string {
+ const authors = 'authors' in ref && ref.authors ? ref.authors : [];
+ const firstAuthor = authors[0]?.family ?? ref.title.slice(0, 30);
+ return `${firstAuthor}, ${ref.year ?? 'n.d.'}`;
+}
+
+/**
+ * Replace {cite:id} markers in an already-rendered HTML string with
+ * accessible citation links that point to the reference list.
+ *
+ * Must run AFTER marked.parse() so Markdown does not escape the output,
+ * and BEFORE the HTML is passed to set:html in the template.
+ */
+export function resolveCiteMarkers(html: string, index: CitationIndex): string {
+ return html.replace(CITE_PATTERN, (_, id: string) => {
+ const entry = index.get(id);
+ if (!entry) return ''; // dangling ref — render nothing
+ const label = formatCiteLabel(entry.ref);
+ return (
+ `` +
+ `[${entry.n}]` +
+ ``
+ );
+ });
+}
diff --git a/src/pages/about.astro b/src/pages/about/index.astro
similarity index 72%
rename from src/pages/about.astro
rename to src/pages/about/index.astro
index 3ec2052..6610b65 100644
--- a/src/pages/about.astro
+++ b/src/pages/about/index.astro
@@ -1,17 +1,18 @@
---
-import BaseLayout from '../layouts/BaseLayout.astro';
-import Container from '../components/ui-astro/Container.astro';
-import { Card } from '../components/ui/Card';
-import { Link } from '../components/ui/Link';
-import { getSiteStats } from '../lib/stats';
-import StructuredData from '../components/seo/StructuredData.astro';
+import BaseLayout from '../../layouts/BaseLayout.astro';
+import Container from '../../components/ui-astro/Container.astro';
+import { Card } from '../../components/ui/Card';
+import { Link } from '../../components/ui/Link';
+import { getSiteStats } from '../../lib/stats';
+import StructuredData from '../../components/seo/StructuredData.astro';
const { familyCount, expressionCount } = await getSiteStats();
-const AUTHOR_NAME = "Bissbert";
-const AUTHOR_TITLE = "Maintainer & Editor";
+const AUTHOR_NAME = "Fabian Moor";
+const AUTHOR_ALTERNATE = "Bissbert";
+const AUTHOR_TITLE = "Gemmologist & developer";
const AUTHOR_CREDENTIALS: string[] = [];
-const AUTHOR_SAMEAS = ["https://github.com/Bissbert"];
+const AUTHOR_SAMEAS = ["https://github.com/Bissbert"];
---
About gemmology.dev
gemmology.dev is an open, FGA-aligned reference and study toolkit for
- coloured gemstones. Everything here – the {familyCount}-mineral family
+ coloured gemstones. Everything here – the {familyCount}-mineral family
reference, {expressionCount} crystal expressions, the Crystal
- Description Language (CDL), the calculators, and the quiz – is
+ Description Language (CDL), the calculators, and the quiz – is
free and open-source.
Maintainer
- {AUTHOR_NAME}, {AUTHOR_TITLE}.
+ {AUTHOR_NAME} ({AUTHOR_ALTERNATE}),
+ {AUTHOR_TITLE}.
Contributions, corrections, and source citations from the gemmological
- community are welcomed via GitHub.
-
-
-
Who maintains this
-
- gemmology.dev is maintained by the
- gemmology-dev open-source
- project. Contributions, corrections, and source citations from the
- gemmological community are welcomed via GitHub issues and pull requests.
+ community are welcomed via GitHub issues.
Mindat.org for locality and chemistry cross-checks
-
Schumann, Walter.Gemstones of the World
-
Read, Peter.Gemmology
-
+
Sources
+
+ All factual claims in the learn articles are sourced from peer-reviewed
+ gemmological literature, FGA curriculum materials, and vetted online
+ references. The complete bibliography is available at{' '}
+ Sources & bibliography.
+
Spotted incorrect data, a missing reference, or a broken figure?
- Open an issue at
+ Open an issue at{' '}
github.com/gemmology-dev/gemmology.dev/issues.
Corrections are usually shipped within a week.
Code is MIT-licensed. Educational content (the learn articles and
- curriculum data in
+ curriculum data in{' '}
gemmology-knowledge)
is published under CC BY-SA 4.0.
diff --git a/src/pages/learn/[...slug].astro b/src/pages/learn/[...slug].astro
index 2107389..3f06c00 100644
--- a/src/pages/learn/[...slug].astro
+++ b/src/pages/learn/[...slug].astro
@@ -8,8 +8,10 @@ import { Breadcrumb, PageNav } from '../../components/docs';
import { SectionRenderer } from '../../components/learn';
import { Container, Card, Badge } from '../../components/ui-astro';
import LearnSchema from '../../components/seo/LearnSchema.astro';
+import References from '../../components/learn/References.astro';
import { LearnQuizWidget } from '../../components/quiz/study/LearnQuizWidget';
import type { WidgetQuestion } from '../../components/quiz/study/LearnQuizWidget';
+import { buildCitationIndex } from '../../lib/citations/index';
export async function getStaticPaths() {
const entries = await getCollection('learn');
@@ -173,6 +175,21 @@ const pretestQuestions: WidgetQuestion[] = articleQuestions
unvetted: q.data.unvetted ?? false,
};
});
+
+// Build citation index — wrapped in try/catch so a malformed YAML references
+// block cannot 404 any page. Returns an empty map on error.
+let citationIndex = new Map();
+try {
+ citationIndex = buildCitationIndex(data.sections, data.references ?? []);
+} catch {
+ // intentionally swallowed
+}
+const hasCitations = citationIndex.size > 0;
+
+// Collect ordered references for JSON-LD emission.
+const orderedRefs = hasCitations
+ ? [...citationIndex.values()].sort((a, b) => a.n - b.n).map((e) => e.ref)
+ : [];
---
@@ -188,6 +205,8 @@ const pretestQuestions: WidgetQuestion[] = articleQuestions
dateModified={dateModified}
pretestQuestionCount={pretestQuestions.length}
pretestQuestions={pretestQuestions}
+ articleAuthor={data.author}
+ references={orderedRefs}
/>
@@ -200,7 +219,7 @@ const pretestQuestions: WidgetQuestion[] = articleQuestions
From b49c117fae8c05e38b39f4b188b0bff3b7a7e014 Mon Sep 17 00:00:00 2001
From: Bissbert <43237892+Bissbert@users.noreply.github.com>
Date: Tue, 12 May 2026 10:25:55 +0700
Subject: [PATCH 2/2] feat(learn): add /about/sources/ bibliography page and
validate-citations CI
- Extract formatAuthors/doiLink/isbnLink/urlLink/formatReference into
src/lib/citations/format.ts; update References.astro to import from there.
- Add src/pages/about/sources/index.astro: aggregate bibliography grouped by
kind (Journal articles / Books / Web sources / Standards), sorted by author
family name then year, with per-entry cited-in links; placeholder shown when
no citations are declared yet.
- Add scripts/validate-citations.mjs: validates {cite:id} markers, citations
arrays, unique ids, valid kinds and years >= 1800; dangling markers exit 1,
unused references warn only.
- Add validate:citations script to package.json.
- Add .github/workflows/ci.yml: lint/typecheck/build pipeline with sync step
before validate:citations, matching deploy.yml style.
---
.github/workflows/ci.yml | 42 +++++
package.json | 3 +-
scripts/validate-citations.mjs | 249 ++++++++++++++++++++++++++
src/components/learn/References.astro | 77 +-------
src/lib/citations/format.ts | 118 ++++++++++++
src/pages/about/sources/index.astro | 176 ++++++++++++++++++
6 files changed, 588 insertions(+), 77 deletions(-)
create mode 100644 .github/workflows/ci.yml
create mode 100644 scripts/validate-citations.mjs
create mode 100644 src/lib/citations/format.ts
create mode 100644 src/pages/about/sources/index.astro
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..fd96e18
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,42 @@
+name: CI
+
+on:
+ push:
+ branches: [main, 'feature/**', 'feat/**', 'fix/**', 'refactor/**', 'chore/**']
+ pull_request:
+ branches: [main]
+
+jobs:
+ lint-and-check:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Setup Node.js
+ uses: actions/setup-node@v4
+ with:
+ node-version: '20'
+ cache: 'npm'
+
+ - name: Install dependencies
+ run: npm install
+
+ - name: Copy database files
+ run: npm run copy-db
+
+ - name: Sync learn content
+ run: npm run sync
+
+ - name: Validate citations
+ run: npm run validate:citations
+
+ - name: Validate question bank
+ if: ${{ hashFiles('src/content/questions/**/*.yaml') != '' }}
+ run: npm run validate:questions
+
+ - name: Type check
+ run: npx astro check
+
+ - name: Build
+ run: npm run build
diff --git a/package.json b/package.json
index 99c9480..762fd80 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,8 @@
"question:new": "node scripts/new-question.mjs",
"validate:questions": "node scripts/validate-questions.mjs",
"questions:coverage": "node scripts/questions-coverage.mjs",
- "schema:questions": "node scripts/generate-question-schema.mjs"
+ "schema:questions": "node scripts/generate-question-schema.mjs",
+ "validate:citations": "node scripts/validate-citations.mjs"
},
"dependencies": {
"@astrojs/node": "^9.5.2",
diff --git a/scripts/validate-citations.mjs b/scripts/validate-citations.mjs
new file mode 100644
index 0000000..c0cfa32
--- /dev/null
+++ b/scripts/validate-citations.mjs
@@ -0,0 +1,249 @@
+#!/usr/bin/env node
+/**
+ * validate-citations.mjs
+ *
+ * Validates citation markers and reference declarations across every YAML file
+ * under src/content/learn/. Runs against the synced content tree, so execute
+ * AFTER `npm run sync`.
+ *
+ * Checks performed:
+ * - All reference ids are unique within a file.
+ * - Each reference has a valid `kind` and a year >= 1800.
+ * - Every {cite:id} marker in prose (content, description, callout.text,
+ * table cells) refers to a declared reference id.
+ * - Every id in a section/item `citations` array refers to a declared id.
+ * - Every declared reference is cited at least once (warning only).
+ *
+ * Exit codes:
+ * 0 — no ERRORs (warnings are printed but do not fail the run).
+ * 1 — one or more ERRORs found.
+ *
+ * Output format:
+ * ERROR: :
+ * WARN: :
+ */
+
+import { readdirSync, readFileSync, statSync } from 'node:fs';
+import { resolve, relative, extname, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+import yaml from 'js-yaml';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const ROOT = resolve(__dirname, '..');
+const LEARN_DIR = resolve(ROOT, 'src', 'content', 'learn');
+
+const VALID_KINDS = new Set(['book', 'journal', 'web', 'standard']);
+const CITE_PATTERN = /\{cite:([a-z0-9][a-z0-9-]*)\}/g;
+
+// ── File collection ───────────────────────────────────────────────────────────
+
+function collectYamlFiles(dir) {
+ let results = [];
+ let entries;
+ try {
+ entries = readdirSync(dir);
+ } catch {
+ return results;
+ }
+ for (const entry of entries) {
+ const full = resolve(dir, entry);
+ const stat = statSync(full);
+ if (stat.isDirectory()) {
+ results = results.concat(collectYamlFiles(full));
+ } else if (stat.isFile() && extname(entry) === '.yaml') {
+ results.push(full);
+ }
+ }
+ return results;
+}
+
+// ── Line-number helper ────────────────────────────────────────────────────────
+
+/**
+ * Return the 1-based line number of the first occurrence of a plain string
+ * in the source text. Returns 0 when not found (location unknown).
+ */
+function lineOf(src, needle) {
+ const idx = src.indexOf(needle);
+ if (idx === -1) return 0;
+ return src.slice(0, idx).split('\n').length;
+}
+
+// ── Prose string extraction ───────────────────────────────────────────────────
+
+/**
+ * Recursively collect all prose strings from the sections array where
+ * {cite:id} markers or citations arrays might appear.
+ */
+function collectProseStrings(data) {
+ const strings = [];
+
+ function walkSection(section) {
+ if (section.content) strings.push(section.content);
+ if (section.callout?.text) strings.push(section.callout.text);
+ for (const item of section.items ?? []) {
+ if (item.description) strings.push(item.description);
+ }
+ for (const row of section.table?.rows ?? []) {
+ for (const cell of row) strings.push(cell);
+ }
+ for (const sub of section.subsections ?? []) {
+ if (sub.content) strings.push(sub.content);
+ for (const item of sub.items ?? []) {
+ if (item.description) strings.push(item.description);
+ }
+ for (const row of sub.table?.rows ?? []) {
+ for (const cell of row) strings.push(cell);
+ }
+ }
+ }
+
+ for (const section of data.sections ?? []) {
+ walkSection(section);
+ }
+ return strings;
+}
+
+/**
+ * Collect all ids from `citations` arrays in sections and items.
+ */
+function collectCitationArrayIds(data) {
+ const ids = [];
+
+ function walkSection(section) {
+ for (const id of section.citations ?? []) ids.push(id);
+ for (const item of section.items ?? []) {
+ for (const id of item.citations ?? []) ids.push(id);
+ }
+ for (const sub of section.subsections ?? []) {
+ for (const item of sub.items ?? []) {
+ for (const id of item.citations ?? []) ids.push(id);
+ }
+ }
+ }
+
+ for (const section of data.sections ?? []) {
+ walkSection(section);
+ }
+ return ids;
+}
+
+// ── Main ──────────────────────────────────────────────────────────────────────
+
+const files = collectYamlFiles(LEARN_DIR);
+
+if (files.length === 0) {
+ console.log('No learn YAML files found — run `npm run sync` first.');
+ process.exit(0);
+}
+
+let errorCount = 0;
+let warnCount = 0;
+
+function report(level, filePath, line, message) {
+ const rel = relative(ROOT, filePath);
+ const loc = line > 0 ? `${rel}:${line}` : rel;
+ console.log(`${level}: ${loc} ${message}`);
+ if (level === 'ERROR') errorCount++;
+ else warnCount++;
+}
+
+for (const filePath of files) {
+ let src;
+ let data;
+
+ try {
+ src = readFileSync(filePath, 'utf8');
+ data = yaml.load(src);
+ } catch (err) {
+ report('ERROR', filePath, 0, `YAML parse error: ${err.message}`);
+ continue;
+ }
+
+ // Skip files with no references block entirely.
+ const references = data.references;
+ if (!references || references.length === 0) {
+ continue;
+ }
+
+ // ── 1. Validate reference declarations ─────────────────────────────────────
+
+ const declaredIds = new Map(); // id → line number in source
+
+ for (const ref of references) {
+ // Unique id check
+ if (declaredIds.has(ref.id)) {
+ const line = lineOf(src, ref.id);
+ report('ERROR', filePath, line, `duplicate reference id "${ref.id}"`);
+ } else {
+ declaredIds.set(ref.id, lineOf(src, ref.id));
+ }
+
+ // Valid kind check
+ if (!VALID_KINDS.has(ref.kind)) {
+ const line = lineOf(src, `kind: ${ref.kind}`);
+ report(
+ 'ERROR',
+ filePath,
+ line,
+ `reference "${ref.id}" has unknown kind "${ref.kind}" (expected: book, journal, web, standard)`,
+ );
+ }
+
+ // Year range check
+ if (ref.year != null && (ref.year < 1800 || !Number.isInteger(ref.year))) {
+ const line = lineOf(src, `year: ${ref.year}`);
+ report(
+ 'ERROR',
+ filePath,
+ line,
+ `reference "${ref.id}" has invalid year ${ref.year} (must be integer >= 1800)`,
+ );
+ }
+ }
+
+ // ── 2. Collect cited ids ────────────────────────────────────────────────────
+
+ const citedIds = new Set();
+
+ // Inline {cite:id} markers in prose strings
+ for (const text of collectProseStrings(data)) {
+ for (const match of text.matchAll(CITE_PATTERN)) {
+ const id = match[1];
+ citedIds.add(id);
+ if (!declaredIds.has(id)) {
+ const line = lineOf(src, match[0]);
+ report('ERROR', filePath, line, `dangling citation marker {cite:${id}} — id not declared in references`);
+ }
+ }
+ }
+
+ // Explicit citations arrays
+ for (const id of collectCitationArrayIds(data)) {
+ citedIds.add(id);
+ if (!declaredIds.has(id)) {
+ const line = lineOf(src, id);
+ report('ERROR', filePath, line, `citations array references undeclared id "${id}"`);
+ }
+ }
+
+ // ── 3. Unused reference warnings ───────────────────────────────────────────
+
+ for (const [id, line] of declaredIds.entries()) {
+ if (!citedIds.has(id)) {
+ report('WARN', filePath, line, `reference "${id}" is declared but never cited`);
+ }
+ }
+}
+
+// ── Summary ───────────────────────────────────────────────────────────────────
+
+if (errorCount === 0 && warnCount === 0) {
+ console.log(`validate-citations: all ${files.length} learn files passed (no citations declared yet).`);
+} else {
+ console.log('');
+ console.log(`validate-citations: ${errorCount} error(s), ${warnCount} warning(s) across ${files.length} files.`);
+}
+
+process.exit(errorCount > 0 ? 1 : 0);
diff --git a/src/components/learn/References.astro b/src/components/learn/References.astro
index ef9f5f5..b0c9191 100644
--- a/src/components/learn/References.astro
+++ b/src/components/learn/References.astro
@@ -13,7 +13,7 @@
* - wraps source titles only — never the [n] marker.
*/
import type { CitationIndex } from '../../lib/citations/index';
-import type { ReferenceEntry } from '../../content/config';
+import { formatReference } from '../../lib/citations/format';
interface Props {
citationIndex: CitationIndex;
@@ -21,81 +21,6 @@ interface Props {
const { citationIndex } = Astro.props;
const citations = [...citationIndex.values()].sort((a, b) => a.n - b.n);
-
-// Format author list as "Family, G.; Family2, G2." — single authors use family only.
-function formatAuthors(authors: Array<{ family: string; given?: string }> | undefined): string {
- if (!authors || authors.length === 0) return '';
- return authors
- .map((a) => (a.given ? `${a.family}, ${a.given[0]}.` : a.family))
- .join('; ');
-}
-
-// Build a DOI anchor string (already HTML-safe; DOIs contain no HTML special chars).
-function doiLink(doi: string): string {
- return (
- `DOI: ${doi}`
- );
-}
-
-// Build an ISBN WorldCat anchor.
-function isbnLink(isbn: string): string {
- const bare = isbn.replace(/-/g, '');
- return (
- `ISBN: ${isbn}`
- );
-}
-
-// Build a plain URL anchor.
-function urlLink(url: string, label?: string): string {
- const text = label ?? url;
- return `${text}`;
-}
-
-// Format a full reference entry as an HTML string.
-function formatReference(ref: ReferenceEntry): string {
- switch (ref.kind) {
- case 'book': {
- const authors = formatAuthors(ref.authors);
- const edition = ref.edition ? ` (${ref.edition} ed.)` : '';
- const pub = ref.publisher ? ` ${ref.publisher}.` : '';
- const isbn = ref.isbn ? ` ${isbnLink(ref.isbn)}.` : '';
- const doi = ref.doi ? ` ${doiLink(ref.doi)}.` : '';
- return `${authors} (${ref.year}). ${ref.title}${edition}.${pub}${isbn}${doi}`;
- }
- case 'journal': {
- const authors = formatAuthors(ref.authors);
- const vol = ref.volume != null ? `, ${ref.volume}` : '';
- const iss = ref.issue != null ? `(${ref.issue})` : '';
- const pages = ref.pages ? `, ${ref.pages}` : '';
- const doi = ref.doi ? `. ${doiLink(ref.doi)}` : '';
- const url = !ref.doi && ref.url ? `. ${urlLink(ref.url)}` : '';
- return (
- `${authors} (${ref.year}). ${ref.title}. ` +
- `${ref.journal}${vol}${iss}${pages}${doi}${url}.`
- );
- }
- case 'web': {
- const authors = formatAuthors(ref.authors ?? []);
- const year = ref.year ? `(${ref.year})` : '(n.d.)';
- const pub = ref.publisher ? ` ${ref.publisher}.` : '';
- const accessed = ref.accessed ? ` Retrieved ${ref.accessed}, from` : '';
- const link = ref.url ? ` ${urlLink(ref.url)}` : '';
- return `${authors} ${year}. ${ref.title}.${pub}${accessed}${link}`;
- }
- case 'standard': {
- const org =
- ref.organization ??
- formatAuthors(ref.authors ?? []) ??
- (ref.publisher ?? '');
- const pub = ref.publisher && ref.publisher !== org ? ` ${ref.publisher}.` : '';
- const id = ref.identifier ? ` ${ref.identifier}.` : '';
- const url = ref.url ? ` ${urlLink(ref.url)}.` : '';
- return `${org} (${ref.year}). ${ref.title}.${id}${pub}${url}`;
- }
- }
-}
---
{citations.length > 0 && (
diff --git a/src/lib/citations/format.ts b/src/lib/citations/format.ts
new file mode 100644
index 0000000..e433f43
--- /dev/null
+++ b/src/lib/citations/format.ts
@@ -0,0 +1,118 @@
+/**
+ * Shared citation-formatting utilities used by:
+ * - src/components/learn/References.astro (per-article reference list)
+ * - src/pages/about/sources/index.astro (aggregate bibliography page)
+ *
+ * All functions return plain strings; the anchor tags use HTML attribute
+ * syntax that is safe to pass to set:html in Astro templates.
+ */
+
+import type { ReferenceEntry } from '../../content/config';
+
+export type { ReferenceEntry };
+
+// ── Author formatting ────────────────────────────────────────────────────────
+
+/**
+ * Format an author list as "Family, G.; Family2, G2." --
+ * single-initial given names; returns empty string when authors is absent.
+ */
+export function formatAuthors(
+ authors: Array<{ family: string; given?: string }> | undefined,
+): string {
+ if (!authors || authors.length === 0) return '';
+ return authors
+ .map((a) => (a.given ? `${a.family}, ${a.given[0]}.` : a.family))
+ .join('; ');
+}
+
+// ── Link helpers ─────────────────────────────────────────────────────────────
+
+/**
+ * Build a DOI anchor. DOIs contain no HTML special characters so the
+ * string is already safe for set:html.
+ */
+export function doiLink(doi: string): string {
+ return (
+ `DOI: ${doi}`
+ );
+}
+
+/** Build an ISBN WorldCat anchor. */
+export function isbnLink(isbn: string): string {
+ const bare = isbn.replace(/-/g, '');
+ return (
+ `ISBN: ${isbn}`
+ );
+}
+
+/** Build a plain URL anchor. */
+export function urlLink(url: string, label?: string): string {
+ const text = label ?? url;
+ return `${text}`;
+}
+
+// ── Full reference formatter ─────────────────────────────────────────────────
+
+/** Format a full reference entry as an HTML string suitable for set:html. */
+export function formatReference(ref: ReferenceEntry): string {
+ switch (ref.kind) {
+ case 'book': {
+ const authors = formatAuthors(ref.authors);
+ const edition = ref.edition ? ` (${ref.edition} ed.)` : '';
+ const pub = ref.publisher ? ` ${ref.publisher}.` : '';
+ const isbn = ref.isbn ? ` ${isbnLink(ref.isbn)}.` : '';
+ const doi = ref.doi ? ` ${doiLink(ref.doi)}.` : '';
+ return `${authors} (${ref.year}). ${ref.title}${edition}.${pub}${isbn}${doi}`;
+ }
+ case 'journal': {
+ const authors = formatAuthors(ref.authors);
+ const vol = ref.volume != null ? `, ${ref.volume}` : '';
+ const iss = ref.issue != null ? `(${ref.issue})` : '';
+ const pages = ref.pages ? `, ${ref.pages}` : '';
+ const doi = ref.doi ? `. ${doiLink(ref.doi)}` : '';
+ const url = !ref.doi && ref.url ? `. ${urlLink(ref.url)}` : '';
+ return (
+ `${authors} (${ref.year}). ${ref.title}. ` +
+ `${ref.journal}${vol}${iss}${pages}${doi}${url}.`
+ );
+ }
+ case 'web': {
+ const authors = formatAuthors(ref.authors ?? []);
+ const year = ref.year ? `(${ref.year})` : '(n.d.)';
+ const pub = ref.publisher ? ` ${ref.publisher}.` : '';
+ const accessed = ref.accessed ? ` Retrieved ${ref.accessed}, from` : '';
+ const link = ref.url ? ` ${urlLink(ref.url)}` : '';
+ return `${authors} ${year}. ${ref.title}.${pub}${accessed}${link}`;
+ }
+ case 'standard': {
+ const org =
+ ref.organization ??
+ formatAuthors(ref.authors ?? []) ??
+ (ref.publisher ?? '');
+ const pub = ref.publisher && ref.publisher !== org ? ` ${ref.publisher}.` : '';
+ const id = ref.identifier ? ` ${ref.identifier}.` : '';
+ const url = ref.url ? ` ${urlLink(ref.url)}.` : '';
+ return `${org} (${ref.year}). ${ref.title}.${id}${pub}${url}`;
+ }
+ }
+}
+
+// ── Sort helpers ─────────────────────────────────────────────────────────────
+
+/** Primary sort key: first author family name (case-insensitive). */
+export function sortKeyAuthor(ref: ReferenceEntry): string {
+ const authors =
+ 'authors' in ref && ref.authors && ref.authors.length > 0
+ ? ref.authors
+ : undefined;
+ const org = 'organization' in ref ? (ref.organization ?? '') : '';
+ return (authors?.[0]?.family ?? org ?? ref.title).toLowerCase();
+}
+
+/** Secondary sort key: year (treat missing/undefined as 0). */
+export function sortKeyYear(ref: ReferenceEntry): number {
+ return ref.year ?? 0;
+}
diff --git a/src/pages/about/sources/index.astro b/src/pages/about/sources/index.astro
new file mode 100644
index 0000000..8b4ad0c
--- /dev/null
+++ b/src/pages/about/sources/index.astro
@@ -0,0 +1,176 @@
+---
+/**
+ * /about/sources/ — aggregate bibliography page.
+ *
+ * Collects every reference from every learn article, deduplicates by id,
+ * groups by kind (Journal articles / Books / Web sources / Standards), and
+ * sorts within each group by author family name then year.
+ *
+ * Each entry shows which learn slug(s) cite it as a small list of links.
+ */
+import { getCollection } from 'astro:content';
+import BaseLayout from '../../../layouts/BaseLayout.astro';
+import Container from '../../../components/ui-astro/Container.astro';
+import StructuredData from '../../../components/seo/StructuredData.astro';
+import {
+ formatReference,
+ sortKeyAuthor,
+ sortKeyYear,
+} from '../../../lib/citations/format';
+import type { ReferenceEntry } from '../../../lib/citations/format';
+
+// ── Collect and deduplicate references ───────────────────────────────────────
+
+const learnEntries = await getCollection('learn');
+
+/**
+ * Map from reference id → { ref, slugs[] }
+ * Using a Map preserves insertion order and gives O(1) dedup.
+ */
+const refMap = new Map();
+
+for (const entry of learnEntries) {
+ const refs: ReferenceEntry[] = entry.data.references ?? [];
+ for (const ref of refs) {
+ if (refMap.has(ref.id)) {
+ refMap.get(ref.id)!.slugs.push(entry.id);
+ } else {
+ refMap.set(ref.id, { ref, slugs: [entry.id] });
+ }
+ }
+}
+
+// ── Group by kind ─────────────────────────────────────────────────────────────
+
+type KindLabel = 'Journal articles' | 'Books' | 'Web sources' | 'Standards';
+
+const KIND_ORDER: KindLabel[] = ['Journal articles', 'Books', 'Web sources', 'Standards'];
+
+const KIND_MAP: Record = {
+ journal: 'Journal articles',
+ book: 'Books',
+ web: 'Web sources',
+ standard: 'Standards',
+};
+
+const grouped = new Map>(
+ KIND_ORDER.map((k) => [k, []]),
+);
+
+for (const item of refMap.values()) {
+ const label = KIND_MAP[item.ref.kind];
+ grouped.get(label)!.push(item);
+}
+
+// Sort each group by author family name then year.
+for (const group of grouped.values()) {
+ group.sort((a, b) => {
+ const aKey = sortKeyAuthor(a.ref);
+ const bKey = sortKeyAuthor(b.ref);
+ if (aKey !== bKey) return aKey.localeCompare(bKey);
+ return sortKeyYear(a.ref) - sortKeyYear(b.ref);
+ });
+}
+
+const totalRefs = refMap.size;
+
+// Build a slug → human-readable title map for link labels.
+const slugTitles = new Map(learnEntries.map((e) => [e.id, e.data.title as string]));
+---
+
+
+
+
+
+
+
+
+
Sources & bibliography
+
+
+ This page lists every peer-reviewed article, book, standard, and vetted
+ online reference cited across the learn section of
+ gemmology.dev. All factual claims in those articles -- numerical data,
+ optical constants, crystal properties, and identification criteria -- are
+ traceable to one or more entries below. DOIs link to the authoritative
+ record via doi.org;
+ ISBNs link to the WorldCat catalogue. Each entry shows which learn
+ articles cite it so you can trace a claim back to its source.
+
+
+ {totalRefs === 0 ? (
+
+ Sources will appear here as /learn/ articles are annotated with citations.
+