Skip to content

Commit 321c195

Browse files
committed
feat(test): receipt-driven normalise filter replaces hardcoded rules
The normalise filter now reads the companion *.smir.receipts.json (emitted by the spy serializer) and applies three generic passes: interned_keys are deleted, interned_newtypes are zeroed, and interned_positions are zeroed. No more per-field jq rules for the body tree. All three receipt categories are pre-seeded with known interned paths, and the spy adds more dynamically as it discovers them. The seeding turns out to be critical: upstream's serialize_index_impl! macro provides a custom Serialize impl for interned types (Ty, Span, DefId, AllocId, etc.) that serializes them as bare integers via Serialize::serialize(&n, serializer), completely erasing the type name. The spy never sees serialize_newtype_struct or serialize_tuple_struct for these types; it just sees a u64. So the spy's dynamic discovery is blind to the most important interned types, and the seeded values carry all the weight. The spy still adds value for discovering interned paths in types that DO preserve their names through serde (e.g. GenericArgs, ClosureDef), but the baseline coverage comes from the seed lists.
1 parent a8c31ac commit 321c195

4 files changed

Lines changed: 138 additions & 41 deletions

File tree

Makefile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ GOLDEN_DIR := $(shell \
6969
integration-test: TESTS ?= $(shell find $(TESTDIR) -type f -name "*.rs")
7070
integration-test: SMIR ?= cargo run -- "-Zno-codegen"
7171
# override this to tweak how expectations are formatted
72-
integration-test: NORMALIZE ?= jq -S -e -f $(TESTDIR)/../normalise-filter.jq
72+
integration-test: FILTER ?= $(TESTDIR)/../normalise-filter.jq
7373
# override this to re-make golden files
7474
integration-test: DIFF ?= | diff -
7575
## Run integration tests against expected outputs
@@ -79,14 +79,15 @@ integration-test:
7979
report() { echo "$$1: $$2"; errors="$$errors\n$$1: $$2"; }; \
8080
for rust in $(TESTS); do \
8181
target=$${rust%.rs}.smir.json; \
82+
receipts=$${target%.json}.receipts.json; \
8283
name=$$(basename $${rust%.rs}); \
8384
dir=$$(dirname $${rust}); \
8485
expected="$(GOLDEN_DIR)/$${name}.smir.json.expected"; \
8586
echo "$$rust"; \
8687
$(SMIR) --out-dir $${dir} $${rust} || report "$$rust" "Conversion failed"; \
8788
[ -f $${target} ] \
88-
&& $(NORMALIZE) $${target} $(DIFF) $${expected} \
89-
&& rm $${target} \
89+
&& jq -S -e --slurpfile receipts $${receipts} -f $(FILTER) $${target} $(DIFF) $${expected} \
90+
&& rm -f $${target} $${receipts} \
9091
|| report "$$rust" "Unexpected json output"; \
9192
done; \
9293
[ -z "$$errors" ] || (echo "===============\nFAILING TESTS:$$errors"; exit 1)

src/printer/receipts.rs

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,35 @@ const INTERNED_TYPES: &[&str] = &[
3636
"VariantIdx",
3737
];
3838

39+
// Interned key names that always need normalizing, regardless of whether
40+
// the spy encounters them in a particular program's output. These fields
41+
// carry interned indices used as cross-reference keys by downstream tools
42+
// (joining AggregateKind::Adt in MIR bodies with type metadata entries,
43+
// etc.), so they can't be dropped from the output itself; they're only
44+
// stripped for golden-file comparison.
45+
//
46+
// The spy may miss them because (a) the program under test doesn't
47+
// exercise the code path that produces them, or (b) the DefId is wrapped
48+
// in a domain-specific newtype (TraitDef, StaticDef, ClosureDef) that's
49+
// transparent in JSON but opaque to the spy's immediate-parent check.
50+
const SEEDED_INTERNED_KEYS: &[&str] = &[
51+
"def_id", "adt_def", "ty", "span", "id", "alloc_id",
52+
];
53+
54+
// Interned newtype wrappers that always need normalizing. On newer
55+
// nightlies the serialize_index_impl! macro serializes interned types
56+
// as bare integers, so the spy can't discover these dynamically.
57+
const SEEDED_INTERNED_NEWTYPES: &[&str] = &["Type", "Array"];
58+
59+
// Interned positions that always need normalizing.
60+
const SEEDED_INTERNED_POSITIONS: &[(&str, &[usize])] = &[
61+
("Field", &[1]),
62+
("Cast", &[2]),
63+
("Closure", &[0]),
64+
("VTable", &[0]),
65+
("Adt", &[0]),
66+
];
67+
3968
// ── Receipt output ──────────────────────────────────────────────────────────
4069

4170
/// Schema-level receipt of where interned indices appear in the JSON output.
@@ -84,12 +113,19 @@ struct Tracker {
84113

85114
impl Tracker {
86115
fn new() -> Self {
116+
let mut positions = BTreeMap::new();
117+
for (name, idxs) in SEEDED_INTERNED_POSITIONS {
118+
positions
119+
.entry(name.to_string())
120+
.or_insert_with(BTreeSet::new)
121+
.extend(idxs.iter().copied());
122+
}
87123
Self {
88124
context: vec![ParentKind::Root],
89125
interned_names: INTERNED_TYPES.iter().copied().collect(),
90-
keys: BTreeSet::new(),
91-
newtypes: BTreeSet::new(),
92-
positions: BTreeMap::new(),
126+
keys: SEEDED_INTERNED_KEYS.iter().map(|s| s.to_string()).collect(),
127+
newtypes: SEEDED_INTERNED_NEWTYPES.iter().map(|s| s.to_string()).collect(),
128+
positions,
93129
}
94130
}
95131

@@ -128,11 +164,7 @@ impl Tracker {
128164
Receipts {
129165
interned_keys: self.keys,
130166
interned_newtypes: self.newtypes,
131-
interned_positions: self
132-
.positions
133-
.into_iter()
134-
.map(|(k, v)| (k, v))
135-
.collect(),
167+
interned_positions: self.positions,
136168
}
137169
}
138170
}
@@ -312,6 +344,19 @@ impl ser::Serializer for SpySerializer {
312344
name: &'static str,
313345
_len: usize,
314346
) -> Result<Self::SerializeTupleStruct, Self::Error> {
347+
// On newer nightlies (post-ThreadLocalIndex), interned types like
348+
// Ty(usize, ThreadLocalIndex) serialize as tuple structs instead of
349+
// newtype structs. Detect them the same way we detect newtypes.
350+
{
351+
let tracker = self.tracker.borrow();
352+
if tracker.interned_names.contains(name) {
353+
drop(tracker);
354+
self.tracker.borrow_mut().record_interned();
355+
// Return a compound that ignores its fields; we've already
356+
// recorded the finding and don't need to recurse deeper.
357+
return Ok(SpyCompound::new(self.tracker, "", false));
358+
}
359+
}
315360
Ok(SpyCompound::new(self.tracker, name, false))
316361
}
317362

@@ -608,5 +653,38 @@ mod tests {
608653
"expected Cast[2] in interned_positions, got: {:?}",
609654
receipts.interned_positions
610655
);
656+
657+
// Seeded values should always be present, even if the test type
658+
// doesn't exercise the code paths that produce them.
659+
for key in SEEDED_INTERNED_KEYS {
660+
assert!(
661+
receipts.interned_keys.contains(*key),
662+
"expected seeded key '{}' in interned_keys, got: {:?}",
663+
key,
664+
receipts.interned_keys
665+
);
666+
}
667+
for nt in SEEDED_INTERNED_NEWTYPES {
668+
assert!(
669+
receipts.interned_newtypes.contains(*nt),
670+
"expected seeded newtype '{}' in interned_newtypes, got: {:?}",
671+
nt,
672+
receipts.interned_newtypes
673+
);
674+
}
675+
for (name, idxs) in SEEDED_INTERNED_POSITIONS {
676+
for idx in *idxs {
677+
assert!(
678+
receipts
679+
.interned_positions
680+
.get(*name)
681+
.map_or(false, |s| s.contains(idx)),
682+
"expected seeded position {}[{}] in interned_positions, got: {:?}",
683+
name,
684+
idx,
685+
receipts.interned_positions
686+
);
687+
}
688+
}
611689
}
612690
}

tests/integration/expected/nightly-2026-01-15/static-vtable-nonbuiltin-deref.smir.json.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
{
99
"global_alloc": {
1010
"VTable": [
11-
11,
11+
0,
1212
{
1313
"bound_vars": [],
1414
"value": {
Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,30 @@
1+
# normalise-filter.jq
2+
#
3+
# Normalizes *.smir.json output for golden-file comparison. Interned
4+
# indices (Ty, Span, DefId, etc.) are non-deterministic across platforms
5+
# and runs; we strip or zero them so that cross-platform diffs are clean.
6+
#
7+
# This filter reads a companion receipts file ($receipts) that declares
8+
# which JSON paths carry interned indices. See ADR-004 for the design.
9+
#
10+
# The receipts cover the Body tree (where the whack-a-mole problem lives);
11+
# top-level array transformations below are structural and stay explicit.
12+
13+
# ── Hash stripping ──────────────────────────────────────────────────────
14+
#
115
# Strip platform-specific crate hashes from mangled symbol names.
216
# Legacy mangling (_ZN...): hash is always the trailing 17 chars (16 hex + "h" prefix).
317
# v0 mangling (_R...): crate disambiguators appear as C<base62>_ throughout the symbol;
418
# replace each with "C_" to normalize across platforms.
519
def strip_hashes:
620
if startswith("_R") then gsub("C[a-zA-Z0-9]+_(?=[0-9])"; "C_")
721
else .[:-17] end;
22+
23+
# ── Top-level structural transforms ─────────────────────────────────────
24+
#
25+
# These operate on the SmirJson arrays directly and are unrelated to the
26+
# walk-based interned-index normalization that the receipts drive.
27+
828
.functions = ( [ .functions[] | if .[1].NormalSym then .[1].NormalSym = (.[1].NormalSym | strip_hashes) else . end ] )
929
| .items = ( [ .items[] | if .symbol_name then .symbol_name = (.symbol_name | strip_hashes) else . end ] )
1030
# delete unstable alloc, function, and type IDs
@@ -14,30 +34,12 @@ def strip_hashes:
1434
# remove "Never" type
1535
| .types = ( [ .types[] ] | map(select(.[0] != "VoidType")) )
1636
|
17-
# Apply the normalisation filter
37+
38+
# ── Restructure and sort ────────────────────────────────────────────────
39+
1840
{ allocs: ( .allocs | sort ),
1941
functions: (.functions | sort ),
20-
items: (.items | map(walk(
21-
if type == "object" then del(.ty)
22-
# Several array-encoded constructs embed interned Ty or DefId
23-
# indices as bare integers at known positions. Normalize each:
24-
# Field[1] - Ty index in field projections
25-
# Cast[2] - Ty index (target type of a cast)
26-
# Closure[0] - DefId index
27-
# VTable[0] - Ty index
28-
# Adt[0] - AdtDef index
29-
| if .Field then .Field[1] = 0 else . end
30-
| if .Cast then .Cast[2] = 0 else . end
31-
| if .Closure then .Closure[0] = 0 else . end
32-
| if .VTable then .VTable[0] = 0 else . end
33-
| if .Adt then .Adt[0] = 0 else . end
34-
# {"Type": <int>} wrappers carry interned Ty indices (e.g. inside
35-
# Closure aggregate kinds). Normalize to 0.
36-
| if .Type and (.Type | type) == "number" then .Type = 0 else . end
37-
# {"Array": <int>} is a bare Ty index; {"Array": {count,stride}}
38-
# is a layout descriptor. Only normalize the integer form.
39-
| if .Array and (.Array | type) == "number" then .Array = 0 else . end
40-
else . end)) | sort_by(.symbol_name + "|" + (.mono_item_kind.MonoItemFn.name // "")) ),
42+
items: (.items | sort_by(.symbol_name + "|" + (.mono_item_kind.MonoItemFn.name // "")) ),
4143
types: ( [
4244
# sort by constructors and remove unstable IDs within each
4345
( .types | map(select(.[0].PrimitiveType)) | sort ),
@@ -55,10 +57,26 @@ def strip_hashes:
5557
( .types | map(select(.[0].FunType) | sort) )
5658
] | flatten(1) )
5759
}
58-
# Strip interned index fields globally: def_id (AdtDef indices), id
59-
# (MonoItemFn and const_ interned IDs), and span (interned Span indices).
60-
# These are consistent within a single rustc invocation but not stable
61-
# across runs or platforms; the same non-determinism that affects alloc_id,
62-
# Ty indices, and adt_def (see lines 5-6, 18-21 above). We only strip
63-
# them here for golden-file comparison.
64-
| walk(if type == "object" then del(.def_id) | del(.id) | del(.span) else . end)
60+
61+
# ── Receipt-driven interned-index normalization ─────────────────────────
62+
#
63+
# Instead of hardcoding per-field rules, we read the receipts and apply
64+
# three generic passes:
65+
#
66+
# 1. interned_keys: delete object fields whose values are interned
67+
# 2. interned_newtypes: zero enum-variant wrappers around bare integers
68+
# 3. interned_positions: zero known tuple positions carrying interned indices
69+
70+
| walk(if type == "object" then
71+
# 1. Strip interned key fields (e.g. "span", "ty", "def_id", "id")
72+
reduce ($receipts[0].interned_keys[]) as $k (.; del(.[$k]))
73+
# 2. Zero interned newtype wrappers (e.g. {"Type": 42} → {"Type": 0})
74+
| reduce ($receipts[0].interned_newtypes[]) as $n (.;
75+
if .[$n] and (.[$n] | type) == "number" then .[$n] = 0 else . end)
76+
# 3. Zero interned positions in tuple variants (e.g. Cast[2], Field[1])
77+
| reduce ($receipts[0].interned_positions | to_entries[]) as $e (.;
78+
if .[$e.key] and (.[$e.key] | type) == "array" then
79+
reduce ($e.value[]) as $p (.;
80+
if .[$e.key][$p] then .[$e.key][$p] = 0 else . end)
81+
else . end)
82+
else . end)

0 commit comments

Comments
 (0)