diff --git a/docs/concepts/models.md b/docs/concepts/models.md index 544088bf..26ccaed4 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -22,8 +22,8 @@ export NVIDIA_API_KEY="your-nvidia-api-key" | Alias | Model | Used by | |-------|-------|---------| | `gliner-pii-detector` | [`nvidia/gliner-pii`](https://build.nvidia.com/nvidia/gliner-pii) | Entity detection (NER) | -| `gpt-oss-120b` | [`openai/gpt-oss-120b`](https://build.nvidia.com/openai/gpt-oss-120b) | Detection validation & augmentation, replacement, rewriting | -| `nemotron-30b-thinking` | [`nvidia/nemotron-3-nano-30b-a3b`](https://build.nvidia.com/nvidia/nemotron-3-nano-30b-a3b) | Latent detection, evaluation, final judge | +| `gpt-oss-120b` | [`openai/gpt-oss-120b`](https://build.nvidia.com/openai/gpt-oss-120b) | Detection validation & augmentation, replacement, replace evaluation, rewriting | +| `nemotron-30b-thinking` | [`nvidia/nemotron-3-nano-30b-a3b`](https://build.nvidia.com/nvidia/nemotron-3-nano-30b-a3b) | Latent detection, rewrite evaluation, final judge | Each pipeline stage has a **role** mapped to one of these aliases. See the full role list in the default configs: [`detection.yaml`](https://github.com/NVIDIA-NeMo/Anonymizer/blob/main/src/anonymizer/config/default_model_configs/detection.yaml), [`replace.yaml`](https://github.com/NVIDIA-NeMo/Anonymizer/blob/main/src/anonymizer/config/default_model_configs/replace.yaml), [`rewrite.yaml`](https://github.com/NVIDIA-NeMo/Anonymizer/blob/main/src/anonymizer/config/default_model_configs/rewrite.yaml). diff --git a/docs/notebook_source/01_your_first_anonymization.py b/docs/notebook_source/01_your_first_anonymization.py index dcad3f43..ee34ee7a 100644 --- a/docs/notebook_source/01_your_first_anonymization.py +++ b/docs/notebook_source/01_your_first_anonymization.py @@ -115,6 +115,17 @@ # %% result.dataframe.head() +# %% [markdown] +# ## 📊 (Optional) Evaluate replacement quality +# +# - `evaluate()` is a separate, opt-in step that scores the output with LLM-as-judge metrics. +# - For Substitute, all four metrics run: **Detection Validity**, **Type Fidelity**, **Relational Consistency**, **Attribute Fidelity**. +# - Skip it for routine runs; call it when you want LLM-side confidence on the output. Costs LLM calls per record, so try it on `preview` first. + +# %% +evaluated = anonymizer.evaluate(preview) +evaluated.display_record(0) + # %% [markdown] # ## ⏭️ Next steps # diff --git a/docs/notebook_source/02_inspecting_detected_entities.py b/docs/notebook_source/02_inspecting_detected_entities.py index eb351f2d..c3dc4d90 100644 --- a/docs/notebook_source/02_inspecting_detected_entities.py +++ b/docs/notebook_source/02_inspecting_detected_entities.py @@ -188,6 +188,16 @@ else: print("No failed records.") +# %% [markdown] +# ## 📊 (Optional) Score the detections with an LLM judge +# +# - `evaluate()` is a separate, opt-in step that runs LLM-as-judge metrics on the output. +# - This notebook uses Annotate, so only **Detection Validity** runs — it flags entities the detector got wrong (false positives, mislabels, boundary errors). Substitute would also enable Type Fidelity, Relational Consistency, and Attribute Fidelity. + +# %% +evaluated = anonymizer.evaluate(result) +evaluated.display_record(0) + # %% [markdown] # ## ⏭️ Next steps # diff --git a/docs/notebook_source/03_choosing_a_replacement_strategy.py b/docs/notebook_source/03_choosing_a_replacement_strategy.py index 782f4789..aef23a4f 100644 --- a/docs/notebook_source/03_choosing_a_replacement_strategy.py +++ b/docs/notebook_source/03_choosing_a_replacement_strategy.py @@ -218,6 +218,19 @@ hash_custom_preview.display_record(0) +# %% [markdown] +# ## 📊 (Optional) Evaluate each strategy +# +# - `evaluate()` is a separate, opt-in step that scores the output with LLM-as-judge metrics. Which metrics fire depends on the strategy: +# - **Substitute** → 4 metrics (Detection Validity + Type Fidelity + Relational Consistency + Attribute Fidelity). +# - **Redact / Annotate / Hash** → Detection Validity only (no replacement map to score type/relational/attribute against). +# - Below shows it on the Substitute preview to surface all four; the same call works on `redact_preview`, `annotate_preview`, or `hash_preview`. + +# %% +substitute_evaluated = anonymizer.evaluate(substitute_preview) +substitute_evaluated.display_record(0) + + # %% [markdown] # ## ⏭️ Next steps # diff --git a/docs/notebooks/01_your_first_anonymization.ipynb b/docs/notebooks/01_your_first_anonymization.ipynb index c339bf71..8c8004ca 100644 --- a/docs/notebooks/01_your_first_anonymization.ipynb +++ b/docs/notebooks/01_your_first_anonymization.ipynb @@ -36,12 +36,12 @@ "id": "8650bc79", "metadata": {}, "source": [ - "# 🕵️ Your First Anonymization\n", + "# \ud83d\udd75\ufe0f Your First Anonymization\n", "\n", "Detect sensitive entities and replace them with LLM-generated substitutes --\n", "the simplest end-to-end example of Anonymizer.\n", "\n", - "#### 📚 What you'll learn\n", + "#### \ud83d\udcda What you'll learn\n", "\n", "- Load a CSV dataset and configure Anonymizer in a few lines\n", "- Preview anonymized results on a small sample before committing to a full run\n", @@ -57,7 +57,7 @@ "id": "b2466e59", "metadata": {}, "source": [ - "## ⚙️ Setup\n", + "## \u2699\ufe0f Setup\n", "\n", "- Check if your `NVIDIA_API_KEY` from [build.nvidia.com](https://build.nvidia.com) is registered for model access.\n", " - The default `build.nvidia.com` (NVIDIA Build) setup is a convenient way to try Anonymizer and iterate on previews. Use of NVIDIA Build is subject to NVIDIA Build's own terms of service and privacy practices, which are separate from and independent of the NeMo Framework library. NVIDIA Build is intended for evaluation and testing purposes only and may not be used in production environments. Do not upload any confidential information or personal data when using NVIDIA Build. Your use of NVIDIA Build is logged for security purposes and to improve NVIDIA products and services.\n", @@ -127,28 +127,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:13:46] [INFO] 🔧 Anonymizer initialized with 3 model configs\n" + "[13:13:46] [INFO] \ud83d\udd27 Anonymizer initialized with 3 model configs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:13:46] [INFO] |-- 🔎 detector: gliner-pii-detector\n" + "[13:13:46] [INFO] |-- \ud83d\udd0e detector: gliner-pii-detector\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:13:46] [INFO] |-- ✅ validator: gpt-oss-120b\n" + "[13:13:46] [INFO] |-- \u2705 validator: gpt-oss-120b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:13:46] [INFO] |-- 🧩 augmenter: gpt-oss-120b\n" + "[13:13:46] [INFO] |-- \ud83e\udde9 augmenter: gpt-oss-120b\n" ] } ], @@ -161,7 +161,7 @@ "id": "89072886", "metadata": {}, "source": [ - "## 📦 Load data and configure\n", + "## \ud83d\udce6 Load data and configure\n", "\n", "- `AnonymizerInput` points to your CSV and names the text column. `data_summary`\n", " gives the LLM context about the kind of text it will process.\n", @@ -198,7 +198,7 @@ "id": "d7fac15c", "metadata": {}, "source": [ - "## 👁️ Preview\n", + "## \ud83d\udc41\ufe0f Preview\n", "\n", "- `preview()` runs on a small sample so you can iterate quickly.\n", "- Always preview before processing the full dataset -- it's the fastest way\n", @@ -222,14 +222,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:13:46] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:13:46] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:13:46] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:13:46] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -243,7 +243,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:14:17] [INFO] |-- 📋 Detection complete — 80 entities found across 3 records (0 failed) [30.6s]\n" + "[13:14:17] [INFO] |-- \ud83d\udccb Detection complete \u2014 80 entities found across 3 records (0 failed) [30.6s]\n" ] }, { @@ -257,21 +257,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:14:17] [INFO] 🔄 Running Substitute replacement\n" + "[13:14:17] [INFO] \ud83d\udd04 Running Substitute replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:15:14] [INFO] |-- 📋 Replacement complete (0 failed) [57.4s]\n" + "[13:15:14] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [57.4s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:15:14] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:15:14] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] } ], @@ -284,7 +284,7 @@ "id": "c404d7f6", "metadata": {}, "source": [ - "## 🔍 Inspect\n", + "## \ud83d\udd0d Inspect\n", "\n", "- `display_record()` shows the original text with highlighted entities,\n", " the replacement map, and the anonymized output -- all in one view.\n", @@ -315,15 +315,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria| first_name and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria| first_name and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
Ethan| first_name Henderson| last_name, a 45| age‑year‑old Vietnamese| race_ethnicity marine biologist| occupation living in Portland| city, Oregon| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Lincoln High| organization_name, he earned his Ph.D.| degree at the University of Oregon| university, where he also completed a research stint in marine ecology| field_of_study. Fluent in Spanish| language, Ethan| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Ethan| first_name Henderson| last_name, a 45| age\u2011year\u2011old Vietnamese| race_ethnicity marine biologist| occupation living in Portland| city, Oregon| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Lincoln High| organization_name, he earned his Ph.D.| degree at the University of Oregon| university, where he also completed a research stint in marine ecology| field_of_study. Fluent in Spanish| language, Ethan| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Ethan| first_name has worked at PetCare Medical Center| company_name and later at the Oregon Animal Wellness Center| organization_name, where he now leads a busy mixed‑practice team. He identifies as a Libertarian| political_view and often volunteers at local shelters, a habit encouraged by his wife, Leah| first_name, and their two teenage children, Sofia| first_name and Noah| first_name. Outside the clinic, Ethan| first_name enjoys hiking the Cascade Range| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Ethan| first_name has worked at PetCare Medical Center| company_name and later at the Oregon Animal Wellness Center| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a Libertarian| political_view and often volunteers at local shelters, a habit encouraged by his wife, Leah| first_name, and their two teenage children, Sofia| first_name and Noah| first_name. Outside the clinic, Ethan| first_name enjoys hiking the Cascade Range| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -369,19 +369,19 @@ "
\n", "
\n", "
Original
\n", - "
Idilio| first_name Bell| last_name is a 37| age‑year‑old astronomer| occupation living in Edison| city, New Jersey| state. Born on November 21, 1988| date_of_birth, he grew up in a bilingual Italian| race_ethnicity household and speaks English| language at home and work. He earned his bachelor’s degree| education_level in physics| field_of_study from the University of New Jersey| state and later completed a PhD in astrophysics| education_level at Princeton| city, where his dissertation focused on exoplanet atmospheres. After graduation he spent three years at NASA| organization_name’s Goddard Space Flight Center| organization_name before joining SpaceX| organization_name’s research division, where he now leads a team analyzing data from the Starlink| organization_name telescope array. Idilio| first_name describes himself as secular| religious_belief and leans progressive| political_view on most political issues, often volunteering for science outreach programs in his community.\n", + "
Idilio| first_name Bell| last_name is a 37| age\u2011year\u2011old astronomer| occupation living in Edison| city, New Jersey| state. Born on November\u202f21,\u202f1988| date_of_birth, he grew up in a bilingual Italian| race_ethnicity household and speaks English| language at home and work. He earned his bachelor\u2019s degree| education_level in physics| field_of_study from the University of New\u202fJersey| state and later completed a PhD in astrophysics| education_level at Princeton| city, where his dissertation focused on exoplanet atmospheres. After graduation he spent three years at NASA| organization_name\u2019s Goddard Space Flight Center| organization_name before joining SpaceX| organization_name\u2019s research division, where he now leads a team analyzing data from the Starlink| organization_name telescope array. Idilio| first_name describes himself as secular| religious_belief and leans progressive| political_view on most political issues, often volunteering for science outreach programs in his community.\n", "\n", - "Outside the lab, Idilio| first_name shares a modest house on West Roberts Drive| street_address with his wife, Maya| first_name, and their two young daughters, Lina| first_name and Zara| first_name. His mother, Elena| first_name, lives nearby and still cooks the family’s favorite pasta on Sundays, while his father, Marco| first_name, retired| employment_status from an engineering firm| company_name in New York| state. Family gatherings are a mix of lively conversation and stargazing sessions on the backyard deck, where Idilio| first_name points out constellations and tells stories of the cosmos that inspire his children’s curiosity.
\n", + "Outside the lab, Idilio| first_name shares a modest house on West Roberts Drive| street_address with his wife, Maya| first_name, and their two young daughters, Lina| first_name and Zara| first_name. His mother, Elena| first_name, lives nearby and still cooks the family\u2019s favorite pasta on Sundays, while his father, Marco| first_name, retired| employment_status from an engineering firm| company_name in New\u202fYork| state. Family gatherings are a mix of lively conversation and stargazing sessions on the backyard deck, where Idilio| first_name points out constellations and tells stories of the cosmos that inspire his children\u2019s curiosity.
\n", "
\n", "
\n", "
Replaced
\n", - "
Santiago| first_name Kumar| last_name is a 36| age‑year‑old geophysicist| occupation living in Austin| city, Texas| state. Born on July 5, 1989| date_of_birth, he grew up in a bilingual Greek| race_ethnicity household and speaks Spanish| language at home and work. He earned his associate’s degree| education_level in chemistry| field_of_study from the University of Oregon| state and later completed a master’s degree in planetary geology| education_level at Portland| city, where his dissertation focused on exoplanet atmospheres. After graduation he spent three years at European Space Agency| organization_name’s National Renewable Energy Laboratory| organization_name before joining Blue Origin| organization_name’s research division, where he now leads a team analyzing data from the OneWeb| organization_name telescope array. Santiago| first_name describes himself as agnostic| religious_belief and leans centrist| political_view on most political issues, often volunteering for science outreach programs in his community.\n", + "
Santiago| first_name Kumar| last_name is a 36| age\u2011year\u2011old geophysicist| occupation living in Austin| city, Texas| state. Born on July 5, 1989| date_of_birth, he grew up in a bilingual Greek| race_ethnicity household and speaks Spanish| language at home and work. He earned his associate\u2019s degree| education_level in chemistry| field_of_study from the University of Oregon| state and later completed a master\u2019s degree in planetary geology| education_level at Portland| city, where his dissertation focused on exoplanet atmospheres. After graduation he spent three years at European Space Agency| organization_name\u2019s National Renewable Energy Laboratory| organization_name before joining Blue Origin| organization_name\u2019s research division, where he now leads a team analyzing data from the OneWeb| organization_name telescope array. Santiago| first_name describes himself as agnostic| religious_belief and leans centrist| political_view on most political issues, often volunteering for science outreach programs in his community.\n", "\n", - "Outside the lab, Santiago| first_name shares a modest house on North Willow Lane| street_address with his wife, Priya| first_name, and their two young daughters, Aisha| first_name and Nadia| first_name. His mother, Sofia| first_name, lives nearby and still cooks the family’s favorite pasta on Sundays, while his father, Diego| first_name, part-time| employment_status from an architectural studio| company_name in Florida| state. Family gatherings are a mix of lively conversation and stargazing sessions on the backyard deck, where Santiago| first_name points out constellations and tells stories of the cosmos that inspire his children’s curiosity.
\n", + "Outside the lab, Santiago| first_name shares a modest house on North Willow Lane| street_address with his wife, Priya| first_name, and their two young daughters, Aisha| first_name and Nadia| first_name. His mother, Sofia| first_name, lives nearby and still cooks the family\u2019s favorite pasta on Sundays, while his father, Diego| first_name, part-time| employment_status from an architectural studio| company_name in Florida| state. Family gatherings are a mix of lively conversation and stargazing sessions on the backyard deck, where Santiago| first_name points out constellations and tells stories of the cosmos that inspire his children\u2019s curiosity.
\n", "
\n", "
\n", "
Replacement Map
\n", - "
OriginalLabelReplacement
37age36
Belllast_nameKumar
EdisoncityAustin
Elenafirst_nameSofia
EnglishlanguageSpanish
Goddard Space Flight Centerorganization_nameNational Renewable Energy Laboratory
Idiliofirst_nameSantiago
Italianrace_ethnicityGreek
Linafirst_nameAisha
Marcofirst_nameDiego
Mayafirst_namePriya
NASAorganization_nameEuropean Space Agency
New JerseystateTexas
New JerseystateOregon
New YorkstateFlorida
November 21, 1988date_of_birthJuly 5, 1989
PhD in astrophysicseducation_levelmaster’s degree in planetary geology
PrincetoncityPortland
SpaceXorganization_nameBlue Origin
Starlinkorganization_nameOneWeb
West Roberts Drivestreet_addressNorth Willow Lane
Zarafirst_nameNadia
astronomeroccupationgeophysicist
bachelor’s degreeeducation_levelassociate’s degree
engineering firmcompany_namearchitectural studio
in physicsfield_of_studyin chemistry
progressivepolitical_viewcentrist
retiredemployment_statuspart-time
secularreligious_beliefagnostic
\n", + "
OriginalLabelReplacement
37age36
Belllast_nameKumar
EdisoncityAustin
Elenafirst_nameSofia
EnglishlanguageSpanish
Goddard Space Flight Centerorganization_nameNational Renewable Energy Laboratory
Idiliofirst_nameSantiago
Italianrace_ethnicityGreek
Linafirst_nameAisha
Marcofirst_nameDiego
Mayafirst_namePriya
NASAorganization_nameEuropean Space Agency
New JerseystateTexas
New\u202fJerseystateOregon
New\u202fYorkstateFlorida
November\u202f21,\u202f1988date_of_birthJuly 5, 1989
PhD in astrophysicseducation_levelmaster\u2019s degree in planetary geology
PrincetoncityPortland
SpaceXorganization_nameBlue Origin
Starlinkorganization_nameOneWeb
West Roberts Drivestreet_addressNorth Willow Lane
Zarafirst_nameNadia
astronomeroccupationgeophysicist
bachelor\u2019s degreeeducation_levelassociate\u2019s degree
engineering firmcompany_namearchitectural studio
in physicsfield_of_studyin chemistry
progressivepolitical_viewcentrist
retiredemployment_statuspart-time
secularreligious_beliefagnostic
\n", "
\n", "
\n", "
\n", @@ -442,24 +442,24 @@ " \n", " \n", " 0\n", - " Bobby Watford, a 40‑year‑old Mexican veterinar...\n", + " Bobby Watford, a 40\u2011year\u2011old Mexican veterinar...\n", " <first_name>Bobby</first_name> <last_name>Watf...\n", " {'entities': [{'end_position': 5, 'id': 'first...\n", - " Ethan Henderson, a 45‑year‑old Vietnamese mari...\n", + " Ethan Henderson, a 45\u2011year\u2011old Vietnamese mari...\n", " \n", " \n", " 1\n", - " Idilio Bell is a 37‑year‑old astronomer living...\n", + " Idilio Bell is a 37\u2011year\u2011old astronomer living...\n", " <first_name>Idilio</first_name> <last_name>Bel...\n", " {'entities': [{'end_position': 6, 'id': 'first...\n", - " Santiago Kumar is a 36‑year‑old geophysicist l...\n", + " Santiago Kumar is a 36\u2011year\u2011old geophysicist l...\n", " \n", " \n", " 2\n", - " Jodi Allison, 36, lives at 204 Bluegrass in Cl...\n", + " Jodi Allison,\u202f36, lives at 204\u202fBluegrass in Cl...\n", " <first_name>Jodi</first_name> <last_name>Allis...\n", " {'entities': [{'end_position': 4, 'id': 'first...\n", - " Sofia Keller, 42, lives at 587 Maple in Macon,...\n", + " Sofia Keller,\u202f42, lives at 587\u202fMaple in Macon,...\n", " \n", " \n", "\n", @@ -467,9 +467,9 @@ ], "text/plain": [ " biography \\\n", - "0 Bobby Watford, a 40‑year‑old Mexican veterinar... \n", - "1 Idilio Bell is a 37‑year‑old astronomer living... \n", - "2 Jodi Allison, 36, lives at 204 Bluegrass in Cl... \n", + "0 Bobby Watford, a 40\u2011year\u2011old Mexican veterinar... \n", + "1 Idilio Bell is a 37\u2011year\u2011old astronomer living... \n", + "2 Jodi Allison,\u202f36, lives at 204\u202fBluegrass in Cl... \n", "\n", " biography_with_spans \\\n", "0 Bobby Watf... \n", @@ -482,9 +482,9 @@ "2 {'entities': [{'end_position': 4, 'id': 'first... \n", "\n", " biography_replaced \n", - "0 Ethan Henderson, a 45‑year‑old Vietnamese mari... \n", - "1 Santiago Kumar is a 36‑year‑old geophysicist l... \n", - "2 Sofia Keller, 42, lives at 587 Maple in Macon,... " + "0 Ethan Henderson, a 45\u2011year\u2011old Vietnamese mari... \n", + "1 Santiago Kumar is a 36\u2011year\u2011old geophysicist l... \n", + "2 Sofia Keller,\u202f42, lives at 587\u202fMaple in Macon,... " ] }, "execution_count": 9, @@ -501,7 +501,7 @@ "id": "8e4d2b0e", "metadata": {}, "source": [ - "## 🚀 Full run\n", + "## \ud83d\ude80 Full run\n", "\n", "- `run()` processes the entire dataset with the same config you previewed.\n", "- Access the output via `result.dataframe`." @@ -524,14 +524,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:15:14] [INFO] 📂 Loaded 25 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:15:14] [INFO] \ud83d\udcc2 Loaded 25 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:15:14] [INFO] 🔍 Running entity detection on 25 records\n" + "[13:15:14] [INFO] \ud83d\udd0d Running entity detection on 25 records\n" ] }, { @@ -545,7 +545,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:16:05] [INFO] |-- 📋 Detection complete — 648 entities found across 25 records (0 failed) [50.3s]\n" + "[13:16:05] [INFO] |-- \ud83d\udccb Detection complete \u2014 648 entities found across 25 records (0 failed) [50.3s]\n" ] }, { @@ -559,21 +559,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:16:05] [INFO] 🔄 Running Substitute replacement\n" + "[13:16:05] [INFO] \ud83d\udd04 Running Substitute replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:16:35] [INFO] |-- 📋 Replacement complete (0 failed) [30.5s]\n" + "[13:16:35] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [30.5s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:16:35] [INFO] 🎉 Pipeline complete — 25 records processed, 0 total failures\n" + "[13:16:35] [INFO] \ud83c\udf89 Pipeline complete \u2014 25 records processed, 0 total failures\n" ] }, { @@ -632,38 +632,38 @@ " \n", " \n", " 0\n", - " Bobby Watford, a 40‑year‑old Mexican veterinar...\n", + " Bobby Watford, a 40\u2011year\u2011old Mexican veterinar...\n", " <first_name>Bobby</first_name> <last_name>Watf...\n", " {'entities': array([{'end_position': 5, 'id': ...\n", - " Ethan Hernandez, a 52‑year‑old Filipino zoolog...\n", + " Ethan Hernandez, a 52\u2011year\u2011old Filipino zoolog...\n", " \n", " \n", " 1\n", - " Idilio Bell is a 37‑year‑old astronomer living...\n", + " Idilio Bell is a 37\u2011year\u2011old astronomer living...\n", " <first_name>Idilio</first_name> <last_name>Bel...\n", " {'entities': array([{'end_position': 6, 'id': ...\n", - " Rafael Khan is a 42‑year‑old planetary geologi...\n", + " Rafael Khan is a 42\u2011year\u2011old planetary geologi...\n", " \n", " \n", " 2\n", - " Jodi Allison, 36, lives at 204 Bluegrass in Cl...\n", + " Jodi Allison,\u202f36, lives at 204\u202fBluegrass in Cl...\n", " <first_name>Jodi</first_name> <last_name>Allis...\n", " {'entities': array([{'end_position': 4, 'id': ...\n", - " Leah Harper, 42, lives at 204 Willow in Eugene...\n", + " Leah Harper,\u202f42, lives at 204 Willow in Eugene...\n", " \n", " \n", " 3\n", - " James Mills is a 69‑year‑old paramedic who liv...\n", + " James Mills is a 69\u2011year\u2011old paramedic who liv...\n", " <first_name>James</first_name> <last_name>Mill...\n", " {'entities': array([{'end_position': 5, 'id': ...\n", - " Ethan Harper is a 71‑year‑old firefighter who ...\n", + " Ethan Harper is a 71\u2011year\u2011old firefighter who ...\n", " \n", " \n", " 4\n", - " Nancy Burton is a 21‑year‑old cashier who live...\n", + " Nancy Burton is a 21\u2011year\u2011old cashier who live...\n", " <first_name>Nancy</first_name> <last_name>Burt...\n", " {'entities': array([{'end_position': 5, 'id': ...\n", - " Leah Hawkins is a 27‑year‑old stock clerk who ...\n", + " Leah Hawkins is a 27\u2011year\u2011old stock clerk who ...\n", " \n", " \n", "\n", @@ -671,11 +671,11 @@ ], "text/plain": [ " biography \\\n", - "0 Bobby Watford, a 40‑year‑old Mexican veterinar... \n", - "1 Idilio Bell is a 37‑year‑old astronomer living... \n", - "2 Jodi Allison, 36, lives at 204 Bluegrass in Cl... \n", - "3 James Mills is a 69‑year‑old paramedic who liv... \n", - "4 Nancy Burton is a 21‑year‑old cashier who live... \n", + "0 Bobby Watford, a 40\u2011year\u2011old Mexican veterinar... \n", + "1 Idilio Bell is a 37\u2011year\u2011old astronomer living... \n", + "2 Jodi Allison,\u202f36, lives at 204\u202fBluegrass in Cl... \n", + "3 James Mills is a 69\u2011year\u2011old paramedic who liv... \n", + "4 Nancy Burton is a 21\u2011year\u2011old cashier who live... \n", "\n", " biography_with_spans \\\n", "0 Bobby Watf... \n", @@ -692,11 +692,11 @@ "4 {'entities': array([{'end_position': 5, 'id': ... \n", "\n", " biography_replaced \n", - "0 Ethan Hernandez, a 52‑year‑old Filipino zoolog... \n", - "1 Rafael Khan is a 42‑year‑old planetary geologi... \n", - "2 Leah Harper, 42, lives at 204 Willow in Eugene... \n", - "3 Ethan Harper is a 71‑year‑old firefighter who ... \n", - "4 Leah Hawkins is a 27‑year‑old stock clerk who ... " + "0 Ethan Hernandez, a 52\u2011year\u2011old Filipino zoolog... \n", + "1 Rafael Khan is a 42\u2011year\u2011old planetary geologi... \n", + "2 Leah Harper,\u202f42, lives at 204 Willow in Eugene... \n", + "3 Ethan Harper is a 71\u2011year\u2011old firefighter who ... \n", + "4 Leah Hawkins is a 27\u2011year\u2011old stock clerk who ... " ] }, "execution_count": 11, @@ -708,18 +708,41 @@ "result.dataframe.head()" ] }, + { + "cell_type": "markdown", + "id": "563fb815", + "metadata": {}, + "source": [ + "## \ud83d\udcca (Optional) Evaluate replacement quality\n", + "\n", + "- `evaluate()` is a separate, opt-in step that scores the output with LLM-as-judge metrics.\n", + "- For Substitute, all four metrics run: **Detection Validity**, **Type Fidelity**, **Relational Consistency**, **Attribute Fidelity**.\n", + "- Skip it for routine runs; call it when you want LLM-side confidence on the output. Costs LLM calls per record, so try it on `preview` first." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df6a871c", + "metadata": {}, + "outputs": [], + "source": [ + "evaluated = anonymizer.evaluate(preview)\n", + "evaluated.display_record(0)" + ] + }, { "cell_type": "markdown", "id": "7699994a", "metadata": {}, "source": [ - "## ⏭️ Next steps\n", + "## \u23ed\ufe0f Next steps\n", "\n", - "- **[🔍 Inspecting Detected Entities](../02_inspecting_detected_entities/)** --\n", + "- **[\ud83d\udd0d Inspecting Detected Entities](../02_inspecting_detected_entities/)** --\n", " dig into what the detection pipeline found and debug quality.\n", - "- **[🎯 Choosing a Replacement Strategy](../03_choosing_a_replacement_strategy/)** --\n", + "- **[\ud83c\udfaf Choosing a Replacement Strategy](../03_choosing_a_replacement_strategy/)** --\n", " compare Redact, Annotate, Hash, and Substitute side-by-side.\n", - "- **[✏️ Rewriting Biographies](../04_rewriting_biographies/)** --\n", + "- **[\u270f\ufe0f Rewriting Biographies](../04_rewriting_biographies/)** --\n", " generate privacy-safe paraphrases instead of token-level replacements." ] } diff --git a/docs/notebooks/02_inspecting_detected_entities.ipynb b/docs/notebooks/02_inspecting_detected_entities.ipynb index b4134e4f..139a5674 100644 --- a/docs/notebooks/02_inspecting_detected_entities.ipynb +++ b/docs/notebooks/02_inspecting_detected_entities.ipynb @@ -36,7 +36,7 @@ "id": "8084293f", "metadata": {}, "source": [ - "# 🕵️ Inspecting Detected Entities\n", + "# \ud83d\udd75\ufe0f Inspecting Detected Entities\n", "\n", "Dig into the entity detection pipeline output -- what was detected,\n", "what the LLM validator kept or dropped, and where entities appear in the text.\n", @@ -47,7 +47,7 @@ "We use **Annotate** mode because it preserves the original text while tagging each entity\n", "with its label, making it ideal for reviewing detection quality.\n", "\n", - "#### 📚 What you'll learn\n", + "#### \ud83d\udcda What you'll learn\n", "\n", "- Run the detection pipeline and inspect its output using Annotate mode\n", "- View tagged text with entities marked inline\n", @@ -63,7 +63,7 @@ "id": "88f2cc08", "metadata": {}, "source": [ - "## ⚙️ Setup\n", + "## \u2699\ufe0f Setup\n", "\n", "- Check if your `NVIDIA_API_KEY` from [build.nvidia.com](https://build.nvidia.com) is registered for model access.\n", " - The default `build.nvidia.com` (NVIDIA Build) setup is a convenient way to try Anonymizer and iterate on previews. Use of NVIDIA Build is subject to NVIDIA Build's own terms of service and privacy practices, which are separate from and independent of the NeMo Framework library. NVIDIA Build is intended for evaluation and testing purposes only and may not be used in production environments. Do not upload any confidential information or personal data when using NVIDIA Build. Your use of NVIDIA Build is logged for security purposes and to improve NVIDIA products and services.\n", @@ -136,28 +136,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:16:40] [INFO] 🔧 Anonymizer initialized with 3 model configs\n" + "[13:16:40] [INFO] \ud83d\udd27 Anonymizer initialized with 3 model configs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:16:40] [INFO] |-- 🔎 detector: gliner-pii-detector\n" + "[13:16:40] [INFO] |-- \ud83d\udd0e detector: gliner-pii-detector\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:16:40] [INFO] |-- ✅ validator: gpt-oss-120b\n" + "[13:16:40] [INFO] |-- \u2705 validator: gpt-oss-120b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:16:40] [INFO] |-- 🧩 augmenter: gpt-oss-120b\n" + "[13:16:40] [INFO] |-- \ud83e\udde9 augmenter: gpt-oss-120b\n" ] } ], @@ -170,7 +170,7 @@ "id": "be3b0054", "metadata": {}, "source": [ - "## 👁️ Preview\n", + "## \ud83d\udc41\ufe0f Preview\n", "\n", "- Detection runs as part of any strategy. `Annotate` keeps original text visible\n", " alongside entity labels -- ideal for debugging.\n", @@ -194,14 +194,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:16:40] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:16:40] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:16:40] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:16:40] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -215,7 +215,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:12] [INFO] |-- 📋 Detection complete — 77 entities found across 3 records (0 failed) [31.9s]\n" + "[13:17:12] [INFO] |-- \ud83d\udccb Detection complete \u2014 77 entities found across 3 records (0 failed) [31.9s]\n" ] }, { @@ -229,21 +229,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:12] [INFO] 🔄 Running Annotate replacement\n" + "[13:17:12] [INFO] \ud83d\udd04 Running Annotate replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:12] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:17:12] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:12] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:17:12] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] } ], @@ -268,7 +268,7 @@ "id": "eea0a736", "metadata": {}, "source": [ - "## 🔍 Inspect\n", + "## \ud83d\udd0d Inspect\n", "\n", "- `display_record()` renders an interactive view with entity highlights." ] @@ -297,15 +297,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria| first_name and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria| first_name and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
<Bobby, first_name>| first_name <Watford, last_name>| last_name, a <40, age>| age‑year‑old <Mexican, race_ethnicity>| race_ethnicity <veterinarian, occupation>| occupation living in <Denver, city>| city, <Colorado, state>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <Jefferson High, organization_name>| organization_name, he earned his <DVM, degree>| degree at the <University of Colorado Boulder, university>| university, where he also completed a research stint in wildlife health. Fluent in <English, language>| language, <Bobby, first_name>| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
<Bobby, first_name>| first_name <Watford, last_name>| last_name, a <40, age>| age\u2011year\u2011old <Mexican, race_ethnicity>| race_ethnicity <veterinarian, occupation>| occupation living in <Denver, city>| city, <Colorado, state>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <Jefferson High, organization_name>| organization_name, he earned his <DVM, degree>| degree at the <University of Colorado Boulder, university>| university, where he also completed a research stint in wildlife health. Fluent in <English, language>| language, <Bobby, first_name>| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, <Bobby, first_name>| first_name has worked at <VCA Animal Hospital, company_name>| company_name and later at the <Colorado Veterinary Clinic, organization_name>| organization_name, where he now leads a busy mixed‑practice team. He identifies as a <Christian Democrat, political_view>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <Maya, first_name>| first_name, and their two teenage children, <Aria, first_name>| first_name and <Leo, first_name>| first_name. Outside the clinic, <Bobby, first_name>| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, <Bobby, first_name>| first_name has worked at <VCA Animal Hospital, company_name>| company_name and later at the <Colorado Veterinary Clinic, organization_name>| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a <Christian Democrat, political_view>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <Maya, first_name>| first_name, and their two teenage children, <Aria, first_name>| first_name and <Leo, first_name>| first_name. Outside the clinic, <Bobby, first_name>| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -332,7 +332,7 @@ "id": "54073eb9", "metadata": {}, "source": [ - "## 📋 Columns\n", + "## \ud83d\udccb Columns\n", "\n", "- `trace_dataframe` contains all internal columns from the pipeline\n", " (detection, validation, replacement, etc.)." @@ -371,7 +371,7 @@ "id": "62e4dc1f", "metadata": {}, "source": [ - "## 🎯 Detected entities\n", + "## \ud83c\udfaf Detected entities\n", "\n", "- Final entity list after validation. Each entity has `value`, `label`,\n", " positions, `score`, and `source` (detector / augmenter / name_split / propagation)." @@ -437,7 +437,7 @@ "id": "c230e940", "metadata": {}, "source": [ - "## 🏷️ Labels\n", + "## \ud83c\udff7\ufe0f Labels\n", "\n", "- Entity label distribution across all records -- which types are most common." ] @@ -497,7 +497,7 @@ "id": "11a3ee18", "metadata": {}, "source": [ - "## 📡 Sources\n", + "## \ud83d\udce1 Sources\n", "\n", "- Where each entity came from in the pipeline:\n", " - `detector` -- GLiNER NER\n", @@ -545,7 +545,7 @@ "id": "2c9f621a", "metadata": {}, "source": [ - "## 📊 By value\n", + "## \ud83d\udcca By value\n", "\n", "- Entities grouped by unique value -- this is what drives consistent replacement\n", " downstream (same name always maps to the same substitute)." @@ -605,7 +605,7 @@ "id": "aa5a42b8", "metadata": {}, "source": [ - "## ❌ Failures\n", + "## \u274c Failures\n", "\n", "- Records dropped during detection (LLM timeout, parse error, etc.).\n", "- Check this to understand data loss in your pipeline." @@ -640,18 +640,40 @@ " print(\"No failed records.\")" ] }, + { + "cell_type": "markdown", + "id": "919f489e", + "metadata": {}, + "source": [ + "## \ud83d\udcca (Optional) Score the detections with an LLM judge\n", + "\n", + "- `evaluate()` is a separate, opt-in step that runs LLM-as-judge metrics on the output.\n", + "- This notebook uses Annotate, so only **Detection Validity** runs \u2014 it flags entities the detector got wrong (false positives, mislabels, boundary errors). Substitute would also enable Type Fidelity, Relational Consistency, and Attribute Fidelity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90382b68", + "metadata": {}, + "outputs": [], + "source": [ + "evaluated = anonymizer.evaluate(result)\n", + "evaluated.display_record(0)" + ] + }, { "cell_type": "markdown", "id": "6018bb1d", "metadata": {}, "source": [ - "## ⏭️ Next steps\n", + "## \u23ed\ufe0f Next steps\n", "\n", - "- **[🕵️ Your First Anonymization](../01_your_first_anonymization/)** --\n", + "- **[\ud83d\udd75\ufe0f Your First Anonymization](../01_your_first_anonymization/)** --\n", " the simplest end-to-end replace workflow if you haven't run it yet.\n", - "- **[🎯 Choosing a Replacement Strategy](../03_choosing_a_replacement_strategy/)** --\n", + "- **[\ud83c\udfaf Choosing a Replacement Strategy](../03_choosing_a_replacement_strategy/)** --\n", " compare Redact, Annotate, Hash, and Substitute side-by-side.\n", - "- **[✏️ Rewriting Biographies](../04_rewriting_biographies/)** --\n", + "- **[\u270f\ufe0f Rewriting Biographies](../04_rewriting_biographies/)** --\n", " generate privacy-safe paraphrases instead of token-level replacements." ] } diff --git a/docs/notebooks/03_choosing_a_replacement_strategy.ipynb b/docs/notebooks/03_choosing_a_replacement_strategy.ipynb index d9d611cb..89df5dc7 100644 --- a/docs/notebooks/03_choosing_a_replacement_strategy.ipynb +++ b/docs/notebooks/03_choosing_a_replacement_strategy.ipynb @@ -36,7 +36,7 @@ "id": "dd098e7b", "metadata": {}, "source": [ - "# 🕵️ Choosing a Replacement Strategy\n", + "# \ud83d\udd75\ufe0f Choosing a Replacement Strategy\n", "\n", "Four [replace mode](../../concepts/replace/) strategies compared side-by-side on the same data.\n", "\n", @@ -46,7 +46,7 @@ "| **Redact** | Label-based markers (`[REDACTED_FIRST_NAME]`) |\n", "| **Annotate** | Tags entities but keeps original text |\n", "| **Hash** | Deterministic hash digest |\n", - "#### 📚 What you'll learn\n", + "#### \ud83d\udcda What you'll learn\n", "\n", "- Compare **Redact**, **Annotate**, **Hash**, and **Substitute** on the same input\n", "- Customize output formats with `format_template`\n", @@ -61,7 +61,7 @@ "id": "bf4e4388", "metadata": {}, "source": [ - "## ⚙️ Setup\n", + "## \u2699\ufe0f Setup\n", "\n", "- Check if your `NVIDIA_API_KEY` from [build.nvidia.com](https://build.nvidia.com) is registered for model access.\n", " - The default `build.nvidia.com` (NVIDIA Build) setup is a convenient way to try Anonymizer and iterate on previews. Use of NVIDIA Build is subject to NVIDIA Build's own terms of service and privacy practices, which are separate from and independent of the NeMo Framework library. NVIDIA Build is intended for evaluation and testing purposes only and may not be used in production environments. Do not upload any confidential information or personal data when using NVIDIA Build. Your use of NVIDIA Build is logged for security purposes and to improve NVIDIA products and services.\n", @@ -141,28 +141,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:15] [INFO] 🔧 Anonymizer initialized with 3 model configs\n" + "[13:17:15] [INFO] \ud83d\udd27 Anonymizer initialized with 3 model configs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:15] [INFO] |-- 🔎 detector: gliner-pii-detector\n" + "[13:17:15] [INFO] |-- \ud83d\udd0e detector: gliner-pii-detector\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:15] [INFO] |-- ✅ validator: gpt-oss-120b\n" + "[13:17:15] [INFO] |-- \u2705 validator: gpt-oss-120b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:15] [INFO] |-- 🧩 augmenter: gpt-oss-120b\n" + "[13:17:15] [INFO] |-- \ud83e\udde9 augmenter: gpt-oss-120b\n" ] } ], @@ -175,7 +175,7 @@ "id": "d58e76d4", "metadata": {}, "source": [ - "## 📦 Input data\n", + "## \ud83d\udce6 Input data\n", "\n", "- We use the same biographies dataset throughout so each strategy is compared\n", " on identical input." @@ -207,7 +207,7 @@ "id": "2fc9be79", "metadata": {}, "source": [ - "## 🔄 Substitute\n", + "## \ud83d\udd04 Substitute\n", "\n", "- Uses an LLM to generate contextually appropriate synthetic replacements.\n", " - The LLM considers the full document context matching names with emails, cities to states, etc.\n", @@ -231,14 +231,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:15] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:17:15] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:15] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:17:15] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -252,7 +252,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:36] [INFO] |-- 📋 Detection complete — 76 entities found across 3 records (0 failed) [20.6s]\n" + "[13:17:36] [INFO] |-- \ud83d\udccb Detection complete \u2014 76 entities found across 3 records (0 failed) [20.6s]\n" ] }, { @@ -266,21 +266,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:36] [INFO] 🔄 Running Substitute replacement\n" + "[13:17:36] [INFO] \ud83d\udd04 Running Substitute replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:50] [INFO] |-- 📋 Replacement complete (0 failed) [14.6s]\n" + "[13:17:50] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [14.6s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:50] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:17:50] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] } ], @@ -318,15 +318,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
Ethan| first_name Hernandez| last_name, a 52| age‑year‑old Filipino| race_ethnicity zoologist| occupation living in Portland| city, Oregon| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Lincoln High| organization_name, he earned his Master of Science| degree at the University of Washington| university, where he also completed a research stint in conservation genetics| field_of_study. Fluent in Spanish| language, Ethan| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Ethan| first_name Hernandez| last_name, a 52| age\u2011year\u2011old Filipino| race_ethnicity zoologist| occupation living in Portland| city, Oregon| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Lincoln High| organization_name, he earned his Master of Science| degree at the University of Washington| university, where he also completed a research stint in conservation genetics| field_of_study. Fluent in Spanish| language, Ethan| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Ethan| first_name has worked at PetCare Veterinary Center| company_name and later at the Cascade Animal Hospital| company_name, where he now leads a busy mixed‑practice team. He identifies as a Libertarian| political_view and often volunteers at local shelters, a habit encouraged by his wife, Nina| first_name, and their two teenage children, Sofia and Mateo| first_name. Outside the clinic, Ethan| first_name enjoys hiking the Sierra Nevada| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Ethan| first_name has worked at PetCare Veterinary Center| company_name and later at the Cascade Animal Hospital| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Libertarian| political_view and often volunteers at local shelters, a habit encouraged by his wife, Nina| first_name, and their two teenage children, Sofia and Mateo| first_name. Outside the clinic, Ethan| first_name enjoys hiking the Sierra Nevada| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -377,14 +377,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:17:51] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:17:51] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:17:51] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:17:51] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -398,7 +398,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:18:18] [INFO] |-- 📋 Detection complete — 78 entities found across 3 records (0 failed) [27.1s]\n" + "[13:18:18] [INFO] |-- \ud83d\udccb Detection complete \u2014 78 entities found across 3 records (0 failed) [27.1s]\n" ] }, { @@ -412,21 +412,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:18:18] [INFO] 🔄 Running Substitute replacement\n" + "[13:18:18] [INFO] \ud83d\udd04 Running Substitute replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:18:28] [INFO] |-- 📋 Replacement complete (0 failed) [10.2s]\n" + "[13:18:28] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [10.2s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:18:28] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:18:28] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -440,15 +440,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
Takumi| first_name Tanaka| last_name, a 45| age‑year‑old Japanese| race_ethnicity marine biologist| occupation living in Sapporo| city, Hokkaido| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Tokyo Metropolitan Hibiya High School| organization_name, he earned his Ph.D. in Marine Biology| degree at the University of Tokyo| university, where he also completed a research stint in marine ecology| field_of_study. Fluent in Japanese| language, Takumi| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Takumi| first_name Tanaka| last_name, a 45| age\u2011year\u2011old Japanese| race_ethnicity marine biologist| occupation living in Sapporo| city, Hokkaido| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Tokyo Metropolitan Hibiya High School| organization_name, he earned his Ph.D. in Marine Biology| degree at the University of Tokyo| university, where he also completed a research stint in marine ecology| field_of_study. Fluent in Japanese| language, Takumi| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Takumi| first_name has worked at Sakura Animal Clinic| company_name and later at the Nihon Veterinary Center| company_name, where he now leads a busy mixed‑practice team. He identifies as a Liberal Democratic Party| political_view and often volunteers at local shelters, a habit encouraged by his wife, Haruka| first_name, and their two teenage children, Sora and Ren| first_name. Outside the clinic, Takumi| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Takumi| first_name has worked at Sakura Animal Clinic| company_name and later at the Nihon Veterinary Center| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Liberal Democratic Party| political_view and often volunteers at local shelters, a habit encouraged by his wife, Haruka| first_name, and their two teenage children, Sora and Ren| first_name. Outside the clinic, Takumi| first_name enjoys hiking the Rockies with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -483,7 +483,7 @@ "id": "ccbcd178", "metadata": {}, "source": [ - "## 🚫 Redact\n", + "## \ud83d\udeab Redact\n", "\n", "- Replaces each entity with a label-based marker. Default: `[REDACTED_FIRST_NAME]`.\n", "- Customize with `Redact(format_template=...)`." @@ -506,14 +506,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:18:28] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:18:28] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:18:28] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:18:28] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -527,7 +527,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:18:54] [INFO] |-- 📋 Detection complete — 75 entities found across 3 records (0 failed) [25.6s]\n" + "[13:18:54] [INFO] |-- \ud83d\udccb Detection complete \u2014 75 entities found across 3 records (0 failed) [25.6s]\n" ] }, { @@ -541,21 +541,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:18:54] [INFO] 🔄 Running Redact replacement\n" + "[13:18:54] [INFO] \ud83d\udd04 Running Redact replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:18:54] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:18:54] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:18:54] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:18:54] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -569,15 +569,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| education_level at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| education_level at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
[REDACTED_FIRST_NAME]| first_name [REDACTED_LAST_NAME]| last_name, a [REDACTED_AGE]| age‑year‑old [REDACTED_RACE_ETHNICITY]| race_ethnicity [REDACTED_OCCUPATION]| occupation living in [REDACTED_CITY]| city, [REDACTED_STATE]| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from [REDACTED_ORGANIZATION_NAME]| organization_name, he earned his [REDACTED_EDUCATION_LEVEL]| education_level at the [REDACTED_UNIVERSITY]| university, where he also completed a research stint in wildlife health. Fluent in [REDACTED_LANGUAGE]| language, [REDACTED_FIRST_NAME]| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
[REDACTED_FIRST_NAME]| first_name [REDACTED_LAST_NAME]| last_name, a [REDACTED_AGE]| age\u2011year\u2011old [REDACTED_RACE_ETHNICITY]| race_ethnicity [REDACTED_OCCUPATION]| occupation living in [REDACTED_CITY]| city, [REDACTED_STATE]| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from [REDACTED_ORGANIZATION_NAME]| organization_name, he earned his [REDACTED_EDUCATION_LEVEL]| education_level at the [REDACTED_UNIVERSITY]| university, where he also completed a research stint in wildlife health. Fluent in [REDACTED_LANGUAGE]| language, [REDACTED_FIRST_NAME]| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, [REDACTED_FIRST_NAME]| first_name has worked at [REDACTED_COMPANY_NAME]| company_name and later at the [REDACTED_ORGANIZATION_NAME]| organization_name, where he now leads a busy mixed‑practice team. He identifies as a [REDACTED_POLITICAL_VIEW]| political_view and often volunteers at local shelters, a habit encouraged by his wife, [REDACTED_FIRST_NAME]| first_name, and their two teenage children, [REDACTED_FIRST_NAME]| first_name. Outside the clinic, [REDACTED_FIRST_NAME]| first_name enjoys hiking the [REDACTED_PLACE_NAME]| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, [REDACTED_FIRST_NAME]| first_name has worked at [REDACTED_COMPANY_NAME]| company_name and later at the [REDACTED_ORGANIZATION_NAME]| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a [REDACTED_POLITICAL_VIEW]| political_view and often volunteers at local shelters, a habit encouraged by his wife, [REDACTED_FIRST_NAME]| first_name, and their two teenage children, [REDACTED_FIRST_NAME]| first_name. Outside the clinic, [REDACTED_FIRST_NAME]| first_name enjoys hiking the [REDACTED_PLACE_NAME]| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -634,14 +634,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:18:54] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:18:54] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:18:54] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:18:54] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -655,7 +655,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:19:21] [INFO] |-- 📋 Detection complete — 75 entities found across 3 records (0 failed) [26.5s]\n" + "[13:19:21] [INFO] |-- \ud83d\udccb Detection complete \u2014 75 entities found across 3 records (0 failed) [26.5s]\n" ] }, { @@ -669,21 +669,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:19:21] [INFO] 🔄 Running Redact replacement\n" + "[13:19:21] [INFO] \ud83d\udd04 Running Redact replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:19:21] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:19:21] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:19:21] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:19:21] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -697,15 +697,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| organization_name, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| organization_name, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
***| first_name ***| last_name, a ***| age‑year‑old ***| race_ethnicity ***| occupation living in ***| city, ***| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from ***| organization_name, he earned his ***| degree at the ***| organization_name, where he also completed a research stint in ***| field_of_study. Fluent in ***| language, ***| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
***| first_name ***| last_name, a ***| age\u2011year\u2011old ***| race_ethnicity ***| occupation living in ***| city, ***| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from ***| organization_name, he earned his ***| degree at the ***| organization_name, where he also completed a research stint in ***| field_of_study. Fluent in ***| language, ***| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, ***| first_name has worked at ***| company_name and later at the ***| company_name, where he now leads a busy mixed‑practice team. He identifies as a ***| political_view and often volunteers at local shelters, a habit encouraged by his wife, ***| first_name, and their two teenage children, ***| first_name. Outside the clinic, ***| first_name enjoys hiking the ***| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, ***| first_name has worked at ***| company_name and later at the ***| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a ***| political_view and often volunteers at local shelters, a habit encouraged by his wife, ***| first_name, and their two teenage children, ***| first_name. Outside the clinic, ***| first_name enjoys hiking the ***| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -740,7 +740,7 @@ "id": "70a454bf", "metadata": {}, "source": [ - "## 🏷️ Annotate\n", + "## \ud83c\udff7\ufe0f Annotate\n", "\n", "- Tags each entity with its label but keeps the original text visible.\n", " Default: ``.\n", @@ -765,14 +765,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:19:21] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:19:21] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:19:21] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:19:21] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -786,7 +786,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:19:49] [INFO] |-- 📋 Detection complete — 77 entities found across 3 records (0 failed) [27.8s]\n" + "[13:19:49] [INFO] |-- \ud83d\udccb Detection complete \u2014 77 entities found across 3 records (0 failed) [27.8s]\n" ] }, { @@ -800,21 +800,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:19:49] [INFO] 🔄 Running Annotate replacement\n" + "[13:19:49] [INFO] \ud83d\udd04 Running Annotate replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:19:49] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:19:49] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:19:49] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:19:49] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -828,15 +828,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater| organization_name.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater| organization_name.
\n", "
\n", "
\n", "
Replaced
\n", - "
<Bobby, first_name>| first_name <Watford, last_name>| last_name, a <40, age>| age‑year‑old <Mexican, race_ethnicity>| race_ethnicity <veterinarian, occupation>| occupation living in <Denver, city>| city, <Colorado, state>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <Jefferson High, organization_name>| organization_name, he earned his <DVM, degree>| degree at the <University of Colorado Boulder, university>| university, where he also completed a research stint in <wildlife health, field_of_study>| field_of_study. Fluent in <English, language>| language, <Bobby, first_name>| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
<Bobby, first_name>| first_name <Watford, last_name>| last_name, a <40, age>| age\u2011year\u2011old <Mexican, race_ethnicity>| race_ethnicity <veterinarian, occupation>| occupation living in <Denver, city>| city, <Colorado, state>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <Jefferson High, organization_name>| organization_name, he earned his <DVM, degree>| degree at the <University of Colorado Boulder, university>| university, where he also completed a research stint in <wildlife health, field_of_study>| field_of_study. Fluent in <English, language>| language, <Bobby, first_name>| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, <Bobby, first_name>| first_name has worked at <VCA Animal Hospital, company_name>| company_name and later at the <Colorado Veterinary Clinic, company_name>| company_name, where he now leads a busy mixed‑practice team. He identifies as a <Christian Democrat, political_view>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <Maya, first_name>| first_name, and their two teenage children, <Aria and Leo, first_name>| first_name. Outside the clinic, <Bobby, first_name>| first_name enjoys hiking the <Rockies, place_name>| place_name with his family and mentoring veterinary students from his <alma mater, organization_name>| organization_name.
\n", + "Since finishing his training, <Bobby, first_name>| first_name has worked at <VCA Animal Hospital, company_name>| company_name and later at the <Colorado Veterinary Clinic, company_name>| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a <Christian Democrat, political_view>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <Maya, first_name>| first_name, and their two teenage children, <Aria and Leo, first_name>| first_name. Outside the clinic, <Bobby, first_name>| first_name enjoys hiking the <Rockies, place_name>| place_name with his family and mentoring veterinary students from his <alma mater, organization_name>| organization_name.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -893,14 +893,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:19:49] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:19:49] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:19:49] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:19:49] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -914,7 +914,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:20:16] [INFO] |-- 📋 Detection complete — 77 entities found across 3 records (0 failed) [26.8s]\n" + "[13:20:16] [INFO] |-- \ud83d\udccb Detection complete \u2014 77 entities found across 3 records (0 failed) [26.8s]\n" ] }, { @@ -928,21 +928,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:20:16] [INFO] 🔄 Running Annotate replacement\n" + "[13:20:16] [INFO] \ud83d\udd04 Running Annotate replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:20:16] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:20:16] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:20:16] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:20:16] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -956,15 +956,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
<Bobby-|-first_name>| first_name <Watford-|-last_name>| last_name, a <40-|-age>| age‑year‑old <Mexican-|-race_ethnicity>| race_ethnicity <veterinarian-|-occupation>| occupation living in <Denver-|-city>| city, <Colorado-|-state>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <Jefferson High-|-organization_name>| organization_name, he earned his <DVM-|-degree>| degree at the <University of Colorado Boulder-|-university>| university, where he also completed a research stint in wildlife health. Fluent in <English-|-language>| language, <Bobby-|-first_name>| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
<Bobby-|-first_name>| first_name <Watford-|-last_name>| last_name, a <40-|-age>| age\u2011year\u2011old <Mexican-|-race_ethnicity>| race_ethnicity <veterinarian-|-occupation>| occupation living in <Denver-|-city>| city, <Colorado-|-state>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <Jefferson High-|-organization_name>| organization_name, he earned his <DVM-|-degree>| degree at the <University of Colorado Boulder-|-university>| university, where he also completed a research stint in wildlife health. Fluent in <English-|-language>| language, <Bobby-|-first_name>| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, <Bobby-|-first_name>| first_name has worked at <VCA Animal Hospital-|-company_name>| company_name and later at the <Colorado Veterinary Clinic-|-organization_name>| organization_name, where he now leads a busy mixed‑practice team. He identifies as a <Christian Democrat-|-political_view>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <Maya-|-first_name>| first_name, and their two teenage children, <Aria and Leo-|-first_name>| first_name. Outside the clinic, <Bobby-|-first_name>| first_name enjoys hiking the <Rockies-|-place_name>| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, <Bobby-|-first_name>| first_name has worked at <VCA Animal Hospital-|-company_name>| company_name and later at the <Colorado Veterinary Clinic-|-organization_name>| organization_name, where he now leads a busy mixed\u2011practice team. He identifies as a <Christian Democrat-|-political_view>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <Maya-|-first_name>| first_name, and their two teenage children, <Aria and Leo-|-first_name>| first_name. Outside the clinic, <Bobby-|-first_name>| first_name enjoys hiking the <Rockies-|-place_name>| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -997,7 +997,7 @@ "id": "c3cb3610", "metadata": {}, "source": [ - "## #️⃣ Hash\n", + "## #\ufe0f\u20e3 Hash\n", "\n", "- Deterministic -- same input always produces the same hash.\n", "- Customize with `format_template` (must include `{digest}`),\n", @@ -1021,14 +1021,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:20:31] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:20:31] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:20:31] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:20:31] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -1042,7 +1042,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:21:42] [INFO] |-- 📋 Detection complete — 78 entities found across 3 records (0 failed) [71.4s]\n" + "[13:21:42] [INFO] |-- \ud83d\udccb Detection complete \u2014 78 entities found across 3 records (0 failed) [71.4s]\n" ] }, { @@ -1056,21 +1056,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:21:42] [INFO] 🔄 Running Hash replacement\n" + "[13:21:42] [INFO] \ud83d\udd04 Running Hash replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:21:42] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:21:42] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:21:42] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:21:42] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -1084,15 +1084,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| school_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| school_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| clinic_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria| first_name and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| clinic_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria| first_name and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
<HASH_FIRST_NAME_4a70dab2cb4d>| first_name <HASH_LAST_NAME_e2efa8a62600>| last_name, a <HASH_AGE_d59eced1ded0>| age‑year‑old <HASH_RACE_ETHNICITY_d108dfd1df5c>| race_ethnicity <HASH_OCCUPATION_52a469e4d8e9>| occupation living in <HASH_CITY_fcdeb8c07d4a>| city, <HASH_STATE_4ae62bf4e804>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <HASH_SCHOOL_NAME_39dde416149c>| school_name, he earned his <HASH_DEGREE_d44ae5e206d1>| degree at the <HASH_UNIVERSITY_bca201129c41>| university, where he also completed a research stint in wildlife health. Fluent in <HASH_LANGUAGE_ba118bf7fc9c>| language, <HASH_FIRST_NAME_4a70dab2cb4d>| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
<HASH_FIRST_NAME_4a70dab2cb4d>| first_name <HASH_LAST_NAME_e2efa8a62600>| last_name, a <HASH_AGE_d59eced1ded0>| age\u2011year\u2011old <HASH_RACE_ETHNICITY_d108dfd1df5c>| race_ethnicity <HASH_OCCUPATION_52a469e4d8e9>| occupation living in <HASH_CITY_fcdeb8c07d4a>| city, <HASH_STATE_4ae62bf4e804>| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from <HASH_SCHOOL_NAME_39dde416149c>| school_name, he earned his <HASH_DEGREE_d44ae5e206d1>| degree at the <HASH_UNIVERSITY_bca201129c41>| university, where he also completed a research stint in wildlife health. Fluent in <HASH_LANGUAGE_ba118bf7fc9c>| language, <HASH_FIRST_NAME_4a70dab2cb4d>| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, <HASH_FIRST_NAME_4a70dab2cb4d>| first_name has worked at <HASH_COMPANY_NAME_56e3eb3da5fa>| company_name and later at the <HASH_CLINIC_NAME_b45afd893ae9>| clinic_name, where he now leads a busy mixed‑practice team. He identifies as a <HASH_POLITICAL_VIEW_1eba4d0314c9>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <HASH_FIRST_NAME_031e45c699d1>| first_name, and their two teenage children, <HASH_FIRST_NAME_736001faca59>| first_name and <HASH_FIRST_NAME_5bc426e8d81e>| first_name. Outside the clinic, <HASH_FIRST_NAME_4a70dab2cb4d>| first_name enjoys hiking the <HASH_PLACE_NAME_d706f1c04961>| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, <HASH_FIRST_NAME_4a70dab2cb4d>| first_name has worked at <HASH_COMPANY_NAME_56e3eb3da5fa>| company_name and later at the <HASH_CLINIC_NAME_b45afd893ae9>| clinic_name, where he now leads a busy mixed\u2011practice team. He identifies as a <HASH_POLITICAL_VIEW_1eba4d0314c9>| political_view and often volunteers at local shelters, a habit encouraged by his wife, <HASH_FIRST_NAME_031e45c699d1>| first_name, and their two teenage children, <HASH_FIRST_NAME_736001faca59>| first_name and <HASH_FIRST_NAME_5bc426e8d81e>| first_name. Outside the clinic, <HASH_FIRST_NAME_4a70dab2cb4d>| first_name enjoys hiking the <HASH_PLACE_NAME_d706f1c04961>| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -1150,14 +1150,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:21:43] [INFO] 👀 Preview mode: 📂 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" + "[13:21:43] [INFO] \ud83d\udc40 Preview mode: \ud83d\udcc2 Loaded 3 records from https://raw.githubusercontent.com/NVIDIA-NeMo/Anonymizer/refs/heads/main/docs/data/NVIDIA_synthetic_biographies.csv (column: 'biography')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:21:43] [INFO] 🔍 Running entity detection on 3 records\n" + "[13:21:43] [INFO] \ud83d\udd0d Running entity detection on 3 records\n" ] }, { @@ -1171,7 +1171,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:22:18] [INFO] |-- 📋 Detection complete — 76 entities found across 3 records (0 failed) [34.9s]\n" + "[13:22:18] [INFO] |-- \ud83d\udccb Detection complete \u2014 76 entities found across 3 records (0 failed) [34.9s]\n" ] }, { @@ -1185,21 +1185,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "[13:22:18] [INFO] 🔄 Running Hash replacement\n" + "[13:22:18] [INFO] \ud83d\udd04 Running Hash replacement\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:22:18] [INFO] |-- 📋 Replacement complete (0 failed) [0.0s]\n" + "[13:22:18] [INFO] |-- \ud83d\udccb Replacement complete (0 failed) [0.0s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[13:22:18] [INFO] 🎉 Pipeline complete — 3 records processed, 0 total failures\n" + "[13:22:18] [INFO] \ud83c\udf89 Pipeline complete \u2014 3 records processed, 0 total failures\n" ] }, { @@ -1213,15 +1213,15 @@ "
\n", "
\n", "
Original
\n", - "
Bobby| first_name Watford| last_name, a 40| age‑year‑old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
Bobby| first_name Watford| last_name, a 40| age\u2011year\u2011old Mexican| race_ethnicity veterinarian| occupation living in Denver| city, Colorado| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from Jefferson High| organization_name, he earned his DVM| degree at the University of Colorado Boulder| university, where he also completed a research stint in wildlife health| field_of_study. Fluent in English| language, Bobby| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed‑practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, Bobby| first_name has worked at VCA Animal Hospital| company_name and later at the Colorado Veterinary Clinic| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a Christian Democrat| political_view and often volunteers at local shelters, a habit encouraged by his wife, Maya| first_name, and their two teenage children, Aria and Leo| first_name. Outside the clinic, Bobby| first_name enjoys hiking the Rockies| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replaced
\n", - "
[657b3da9]| first_name [6e424e2c]| last_name, a [d645920e]| age‑year‑old [a0e769d8]| race_ethnicity [84c99b4a]| occupation living in [67100af8]| city, [15e49475]| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from [27c56955]| organization_name, he earned his [47211f54]| degree at the [e2b97348]| university, where he also completed a research stint in [7b2947bb]| field_of_study. Fluent in [78463a38]| language, [657b3da9]| first_name has always described his upbringing as a blend of small‑town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", + "
[657b3da9]| first_name [6e424e2c]| last_name, a [d645920e]| age\u2011year\u2011old [a0e769d8]| race_ethnicity [84c99b4a]| occupation living in [67100af8]| city, [15e49475]| state, grew up on the outskirts of the city and developed a love for animals early on. After graduating from [27c56955]| organization_name, he earned his [47211f54]| degree at the [e2b97348]| university, where he also completed a research stint in [7b2947bb]| field_of_study. Fluent in [78463a38]| language, [657b3da9]| first_name has always described his upbringing as a blend of small\u2011town curiosity and the vibrant culture of his community, values that continue to shape his compassionate approach to animal care.\n", "\n", - "Since finishing his training, [657b3da9]| first_name has worked at [3541ebe8]| company_name and later at the [cd3abcd1]| company_name, where he now leads a busy mixed‑practice team. He identifies as a [408d2599]| political_view and often volunteers at local shelters, a habit encouraged by his wife, [719fe280]| first_name, and their two teenage children, [0efaeae5]| first_name. Outside the clinic, [657b3da9]| first_name enjoys hiking the [661f0bd9]| place_name with his family and mentoring veterinary students from his alma mater.
\n", + "Since finishing his training, [657b3da9]| first_name has worked at [3541ebe8]| company_name and later at the [cd3abcd1]| company_name, where he now leads a busy mixed\u2011practice team. He identifies as a [408d2599]| political_view and often volunteers at local shelters, a habit encouraged by his wife, [719fe280]| first_name, and their two teenage children, [0efaeae5]| first_name. Outside the clinic, [657b3da9]| first_name enjoys hiking the [661f0bd9]| place_name with his family and mentoring veterinary students from his alma mater.
\n", "
\n", "
\n", "
Replacement Map
\n", @@ -1249,18 +1249,42 @@ "hash_custom_preview.display_record(0)" ] }, + { + "cell_type": "markdown", + "id": "deab7d5d", + "metadata": {}, + "source": [ + "## \ud83d\udcca (Optional) Evaluate each strategy\n", + "\n", + "- `evaluate()` is a separate, opt-in step that scores the output with LLM-as-judge metrics. Which metrics fire depends on the strategy:\n", + " - **Substitute** \u2192 4 metrics (Detection Validity + Type Fidelity + Relational Consistency + Attribute Fidelity).\n", + " - **Redact / Annotate / Hash** \u2192 Detection Validity only (no replacement map to score type/relational/attribute against).\n", + "- Below shows it on the Substitute preview to surface all four; the same call works on `redact_preview`, `annotate_preview`, or `hash_preview`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a21beab", + "metadata": {}, + "outputs": [], + "source": [ + "substitute_evaluated = anonymizer.evaluate(substitute_preview)\n", + "substitute_evaluated.display_record(0)" + ] + }, { "cell_type": "markdown", "id": "eeb153ba", "metadata": {}, "source": [ - "## ⏭️ Next steps\n", + "## \u23ed\ufe0f Next steps\n", "\n", - "- **[🕵️ Inspecting Detected Entities](../02_inspecting_detected_entities/)** --\n", + "- **[\ud83d\udd75\ufe0f Inspecting Detected Entities](../02_inspecting_detected_entities/)** --\n", " dig into what the detection pipeline found and debug quality.\n", - "- **[✏️ Rewriting Biographies](../04_rewriting_biographies/)** --\n", + "- **[\u270f\ufe0f Rewriting Biographies](../04_rewriting_biographies/)** --\n", " generate privacy-safe paraphrases instead of token-level replacements.\n", - "- **[⚖️ Rewriting Legal Documents](../05_rewriting_legal_documents/)** --\n", + "- **[\u2696\ufe0f Rewriting Legal Documents](../05_rewriting_legal_documents/)** --\n", " rewrite legal text with domain-specific privacy goals." ] } diff --git a/src/anonymizer/__init__.py b/src/anonymizer/__init__.py index 0c812934..55ab01cd 100644 --- a/src/anonymizer/__init__.py +++ b/src/anonymizer/__init__.py @@ -9,7 +9,14 @@ from data_designer.config.models import ModelProvider as ModelProvider -from anonymizer.config.anonymizer_config import AnonymizerConfig, AnonymizerInput, Detect, Rewrite, RiskTolerance +from anonymizer.config.anonymizer_config import ( + AnonymizerConfig, + AnonymizerInput, + Detect, + EvaluateConfig, + Rewrite, + RiskTolerance, +) from anonymizer.config.replace_strategies import Annotate, Hash, Redact, Substitute from anonymizer.config.rewrite import PrivacyGoal from anonymizer.engine.constants import DEFAULT_ENTITY_LABELS as _DEFAULT_ENTITY_LABELS @@ -35,6 +42,7 @@ "Annotate", "DEFAULT_ENTITY_LABELS", "Detect", + "EvaluateConfig", "Hash", "InvalidConfigError", "InvalidInputError", diff --git a/src/anonymizer/config/anonymizer_config.py b/src/anonymizer/config/anonymizer_config.py index 1f4b669e..6852bdbc 100644 --- a/src/anonymizer/config/anonymizer_config.py +++ b/src/anonymizer/config/anonymizer_config.py @@ -197,3 +197,20 @@ def validate_exactly_one_mode(self) -> AnonymizerConfig: " Use replace=Redact() for entity replacement, or rewrite=Rewrite() for LLM rewriting." ) return self + + +class EvaluateConfig(BaseModel): + """Optional knobs for :meth:`Anonymizer.evaluate`. + + Reserved for genuinely evaluation-specific configuration — metric selection, + per-judge model/prompt overrides, scoring thresholds, etc. The anonymization + mode is **not** here: it travels on the ``AnonymizerResult`` / + ``PreviewResult`` produced by ``run()`` / ``preview()`` and is read directly + by ``evaluate()``, so users don't restate it and can't mis-state it. + + Today this is an empty placeholder; fields will be added as evaluation + knobs are introduced. + """ + + # Intentionally empty for now. New fields land here as evaluation + # configurability is introduced. diff --git a/src/anonymizer/config/default_model_configs/evaluate.yaml b/src/anonymizer/config/default_model_configs/evaluate.yaml new file mode 100644 index 00000000..b0e97302 --- /dev/null +++ b/src/anonymizer/config/default_model_configs/evaluate.yaml @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Model aliases for the LLM-as-judge evaluation step (Anonymizer.evaluate). +# These roles are NOT needed at anonymization time — preview() / run() do not +# consume them. They are only resolved when the user opts into evaluation. + +selected_models: + detection_validity_judge: gpt-oss-120b + replace_type_fidelity_judge: gpt-oss-120b + replace_relational_consistency_judge: gpt-oss-120b + replace_attribute_fidelity_judge: gpt-oss-120b diff --git a/src/anonymizer/config/models.py b/src/anonymizer/config/models.py index 59792196..1cd03e13 100644 --- a/src/anonymizer/config/models.py +++ b/src/anonymizer/config/models.py @@ -93,9 +93,25 @@ class RewriteModelSelection(BaseModel): judge: str +class EvaluateModelSelection(BaseModel): + """Model aliases for the LLM-as-judge evaluation step. + + These roles are only consumed by :meth:`Anonymizer.evaluate` — they are not + needed at anonymization time. Keeping them in their own section lets + ``preview()`` / ``run()`` validate only the roles that produce anonymized + output, while ``evaluate(...)`` validates the roles that score it. + """ + + detection_validity_judge: str + replace_type_fidelity_judge: str + replace_relational_consistency_judge: str + replace_attribute_fidelity_judge: str + + class ModelSelection(BaseModel): """Model alias selections for all pipelines, loaded from YAML defaults via ``load_default_model_selection()``.""" detection: DetectionModelSelection replace: ReplaceModelSelection rewrite: RewriteModelSelection + evaluate: EvaluateModelSelection diff --git a/src/anonymizer/engine/constants.py b/src/anonymizer/engine/constants.py index 1150682c..fdfecadf 100644 --- a/src/anonymizer/engine/constants.py +++ b/src/anonymizer/engine/constants.py @@ -59,6 +59,28 @@ # Final output COL_FINAL_ENTITIES = "final_entities" +# Replace evaluation: detection-validity judge +COL_DETECTION_JUDGE = "_detection_judge" # raw judge output, internal +COL_DETECTION_VALID = "detection_valid" # user-facing bool (None if judge unavailable) +COL_DETECTION_INVALID_ENTITIES = "detection_invalid_entities" # user-facing list of {value, label, reasoning} + +# Replace evaluation: type-fidelity judge (Substitute only) +COL_TYPE_FIDELITY_JUDGE = "_type_fidelity_judge" # raw judge output, internal +COL_TYPE_FIDELITY_VALID = "type_fidelity_valid" # user-facing bool (None if judge unavailable) +COL_TYPE_FIDELITY_INVALID_REPLACEMENTS = ( + "type_fidelity_invalid_replacements" # list of {original, label, synthetic, reasoning} +) + +# Replace evaluation: relational-consistency judge (Substitute only) +COL_RELATIONAL_CONSISTENCY_JUDGE = "_relational_consistency_judge" # raw judge output (kept for display denominator) +COL_RELATIONAL_CONSISTENCY_VALID = "relational_consistency_valid" # user-facing bool (None if judge unavailable) +COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS = "relational_consistency_invalid_relations" # list of failing relations + +# Replace evaluation: attribute-fidelity judge (Substitute only) +COL_ATTRIBUTE_FIDELITY_JUDGE = "_attribute_fidelity_judge" # raw judge output (kept for display denominator) +COL_ATTRIBUTE_FIDELITY_VALID = "attribute_fidelity_valid" # user-facing bool (None if judge unavailable) +COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES = "attribute_fidelity_invalid_entities" # list of failing per-entity checks + # --------------------------------------------------------------------------- # Rewrite pipeline # --------------------------------------------------------------------------- diff --git a/src/anonymizer/engine/evaluation/__init__.py b/src/anonymizer/engine/evaluation/__init__.py new file mode 100644 index 00000000..1a8431c3 --- /dev/null +++ b/src/anonymizer/engine/evaluation/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/anonymizer/engine/evaluation/detection_judge.py b/src/anonymizer/engine/evaluation/detection_judge.py new file mode 100644 index 00000000..5d0349fe --- /dev/null +++ b/src/anonymizer/engine/evaluation/detection_judge.py @@ -0,0 +1,341 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig +from data_designer.config.models import ModelConfig +from pydantic import BaseModel, Field + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_DETECTION_INVALID_ENTITIES, + COL_DETECTION_JUDGE, + COL_DETECTION_VALID, + COL_ENTITIES_BY_VALUE, + COL_TEXT, + ENTITY_LABEL_EXAMPLES, + _jinja, +) +from anonymizer.engine.ndd.adapter import FailedRecord, NddAdapter +from anonymizer.engine.ndd.model_loader import resolve_model_alias +from anonymizer.engine.prompt_utils import substitute_placeholders +from anonymizer.engine.row_partitioning import merge_and_reorder, split_rows +from anonymizer.engine.schemas import EntitiesByValueSchema + +logger = logging.getLogger("anonymizer.evaluation.detection_judge") + +_ENTITIES_FOR_JUDGE_COL = "_entities_for_detection_judge" +_ENTITY_EXAMPLES_FOR_JUDGE_COL = "_entity_examples_for_detection_judge" + + +# --------------------------------------------------------------------------- +# Output schema +# --------------------------------------------------------------------------- + + +class InvalidDetectedEntity(BaseModel): + value: str = Field(description="Original detected span verbatim.") + label: str = Field(description="The label the detector assigned to this span.") + reasoning: str = Field( + description="One short sentence explaining why this (value, label) is not a valid detection." + ) + + +class DetectionJudgmentSchema(BaseModel): + all_valid: bool = Field( + description="True only if every detected entity is a correct (value, label) detection in context." + ) + invalid_entities: list[InvalidDetectedEntity] = Field( + default_factory=list, + description="Every detected entity that is not a valid detection. Empty when all_valid is True.", + ) + + +# --------------------------------------------------------------------------- +# Result +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class DetectionJudgeResult: + dataframe: pd.DataFrame + failed_records: list[FailedRecord] + + +# --------------------------------------------------------------------------- +# Prompt +# --------------------------------------------------------------------------- + + +def _judge_prompt() -> str: + prompt = """You are an expert judge evaluating the accuracy of automated PII / sensitive-entity detection. + + +<> + + + +{%- for entity in <> %} +- value="{{ entity.value }}" | label={{ entity.label }} +{%- endfor %} + + + +{{ <> }} + + + +For each detected entity above, decide whether the (value, label) pair is a correct PII / sensitive-entity \ +detection given the original text. + +Return structured JSON: +- Set `all_valid` to true ONLY if every detected entity is a correct detection. +- Otherwise set `all_valid` to false and list every incorrect detection in `invalid_entities`, with a short \ +`reasoning` per entry. + + + +A detection is INVALID if any of the following hold: +- false_positive: the span is not actually identifying or sensitive in this context (e.g. a common word, \ +generic phrase, or boilerplate flagged as PII). +- wrong_label: the span IS sensitive, but the chosen label does not fit. Treat labels as \ +BUCKETS, not precise taxonomy nodes. A label is NOT wrong when the chosen label is a SIBLING \ +within the same broad domain — a more specific, more general, or peer member of the same \ +parent concept (e.g. higher-education institutions, organizational entities, geographic \ +places, communication identifiers). Only flag `wrong_label` when the chosen label sits in a \ +clearly DIFFERENT domain (e.g. a company name labeled `first_name`; an email labeled `url`; \ +a job title labeled `degree`). +- not_in_text: the literal `value` does not appear in the original text. +- wrong_boundary: the span is a clear partial or over-extended capture of the real entity. \ +Flag this ONLY when the span itself is broken — i.e. it omits part of the actual value, or \ +it absorbs surrounding tokens (titles, prepositions, conjunctions, function words) that are \ +not part of the value. \ +Treat the span as CORRECT when it captures the bare value of the entity, even if that value \ +appears inside a longer descriptive phrase or compound expression. Surrounding descriptive \ +words in natural prose are NOT part of the entity, and trimming them is the right behavior, \ +not a boundary error. Apply the "form-field" test: if you were filling out a structured form \ +for this entity type, the bare value would be the answer. +- contextual_mismatch: in this context the span refers to something other than the labeled entity type \ +(e.g. "Apple" used as the fruit and labeled `company_name`; "May" used as a verb/month and labeled `first_name`). + + + +A detection is VALID when ALL of the following hold: +- The `value` appears in the original text. +- The chosen label is a reasonable fit; if multiple labels could plausibly apply, the chosen one is \ +acceptable. +- The span is a complete and reasonable boundary for the entity in context. +- Removing or replacing this span meaningfully contributes to anonymizing the record. + + + +- Use `reference_label_examples` as a guide for what each label is supposed to capture; do not invent labels \ +that are not in that mapping. +- Be charitable when multiple labels could plausibly apply: if the chosen label is reasonable, mark valid. +- Be strict about clear false positives, mislabels, and obvious boundary errors. +- `reasoning` MUST be one short sentence per invalid entity, naming the failure mode. +- If `detected_entities` is empty, return `all_valid: true` and an empty `invalid_entities` list. +- Do NOT include entities you consider valid in `invalid_entities`. +- Do NOT introduce entities that were not in the detected list. + + + +Return ONLY the JSON object that matches the required schema. Do NOT wrap your output in \ +``` or ```json markdown fences. Do NOT include any commentary, reasoning, preamble, or text \ +outside the JSON object. Your entire response must be a single valid JSON object. + +""" + return substitute_placeholders( + prompt, + { + "<>": _jinja(COL_TEXT), + "<>": _ENTITIES_FOR_JUDGE_COL, + "<>": _ENTITY_EXAMPLES_FOR_JUDGE_COL, + }, + ) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +_EXAMPLE_LOOKUP: dict[str, str] = { + label: f"(e.g. {', '.join(examples)})" for label, examples in ENTITY_LABEL_EXAMPLES.items() +} + + +def _entities_for_judge(parsed: EntitiesByValueSchema) -> list[dict[str, str]]: + """Flatten EntitiesByValueSchema into one (value, label) row per pair. + + The judge schema and the display denominator both work at (value, label) + granularity, so the prompt input mirrors that shape instead of grouping + labels under a single value. + """ + return [{"value": e.value, "label": label} for e in parsed.entities_by_value for label in e.labels] + + +def _label_examples_for_judge(parsed: EntitiesByValueSchema) -> str: + """Build a JSON map of {label: example_hint} for labels present in this row.""" + labels: set[str] = set() + for entity in parsed.entities_by_value: + labels.update(label for label in entity.labels if label) + if not labels: + return "{}" + examples = {label: _EXAMPLE_LOOKUP.get(label, "(no canonical example available)") for label in sorted(labels)} + return json.dumps(examples, ensure_ascii=True) + + +def _flatten_judgment(raw: object) -> tuple[bool | None, list[dict[str, str]]]: + """Normalize an LLM judge output into (all_valid, invalid_entities). + + Returns ``(None, [])`` for any malformed or missing payload so downstream + display can render "judge unavailable" rather than fabricate a verdict. + """ + if raw is None: + return None, [] + if hasattr(raw, "model_dump"): + raw = raw.model_dump(mode="python") + if isinstance(raw, str): + try: + raw = json.loads(raw) + except (json.JSONDecodeError, ValueError): + return None, [] + if not isinstance(raw, dict): + return None, [] + try: + parsed = DetectionJudgmentSchema.model_validate(raw) + except Exception: + return None, [] + return parsed.all_valid, [entry.model_dump() for entry in parsed.invalid_entities] + + +# --------------------------------------------------------------------------- +# Workflow +# --------------------------------------------------------------------------- + + +class DetectionJudgeWorkflow: + """LLM-as-judge evaluator that flags invalid PII detections per record. + + Runs after replacement and validates the detection step that fed the + replacement. Output columns: ``COL_DETECTION_VALID`` (bool|None) and + ``COL_DETECTION_INVALID_ENTITIES`` (list of {value, label, reasoning}). + """ + + def __init__(self, adapter: NddAdapter) -> None: + self._adapter = adapter + + # ------------------------------------------------------------------------ + # Decomposed pieces — the orchestrator in ReplacementWorkflow uses these + # to merge all 4 judges into a single adapter.run_workflow() call. + # ------------------------------------------------------------------------ + + def prepare( + self, + dataframe: pd.DataFrame, + *, + entities_column: str = COL_ENTITIES_BY_VALUE, + ) -> pd.DataFrame: + """Add the intermediate columns this judge's prompt template references. + + Returns a copy of ``dataframe`` with ``_entities_for_detection_judge`` and + ``_entity_examples_for_detection_judge`` populated. + """ + working_df = dataframe.copy() + parsed = working_df[entities_column].apply(EntitiesByValueSchema.from_raw) + working_df[_ENTITIES_FOR_JUDGE_COL] = parsed.apply(_entities_for_judge) + working_df[_ENTITY_EXAMPLES_FOR_JUDGE_COL] = parsed.apply(_label_examples_for_judge) + return working_df + + def column_config(self, selected_models: EvaluateModelSelection) -> LLMStructuredColumnConfig: + """The DD column config — name, prompt, model alias, structured-output schema.""" + return LLMStructuredColumnConfig( + name=COL_DETECTION_JUDGE, + prompt=_judge_prompt(), + model_alias=resolve_model_alias("detection_validity_judge", selected_models), + output_format=DetectionJudgmentSchema, + ) + + def postprocess(self, dataframe: pd.DataFrame) -> pd.DataFrame: + """Flatten the raw judge output into VALID / INVALID columns and apply + the passthrough default (rows with no detected entities trivially pass). + """ + out = dataframe.copy() + flattened = out[COL_DETECTION_JUDGE].apply(_flatten_judgment) if COL_DETECTION_JUDGE in out.columns else None + # `items` may be a numpy array after a parquet round-trip via DD, so use + # `len()` rather than `bool()` (which is ambiguous on multi-element arrays). + passthrough_mask = out[_ENTITIES_FOR_JUDGE_COL].apply(lambda items: items is None or len(items) == 0) + + valid: list[bool | None] = [] + invalid: list[list[dict[str, str]]] = [] + for idx in out.index: + if passthrough_mask.loc[idx]: + valid.append(True) + invalid.append([]) + elif flattened is not None: + v, inv = flattened.loc[idx] + valid.append(v) + invalid.append(inv) + else: + valid.append(None) + invalid.append([]) + out[COL_DETECTION_VALID] = valid + out[COL_DETECTION_INVALID_ENTITIES] = invalid + # Stamp passthrough rows with the default raw judge payload so display logic stays consistent. + if COL_DETECTION_JUDGE in out.columns: + out.loc[passthrough_mask, COL_DETECTION_JUDGE] = [{"all_valid": True, "invalid_entities": []}] * int( + passthrough_mask.sum() + ) + return out + + # ------------------------------------------------------------------------ + # Legacy single-judge entry point. Kept so existing callers/tests still work. + # ------------------------------------------------------------------------ + + def evaluate( + self, + dataframe: pd.DataFrame, + *, + model_configs: list[ModelConfig], + selected_models: EvaluateModelSelection, + entities_column: str = COL_ENTITIES_BY_VALUE, + preview_num_records: int | None = None, + ) -> DetectionJudgeResult: + working_df = self.prepare(dataframe, entities_column=entities_column) + + entity_rows, passthrough_rows = split_rows(working_df, column=_ENTITIES_FOR_JUDGE_COL, predicate=bool) + passthrough_rows[COL_DETECTION_JUDGE] = [ + {"all_valid": True, "invalid_entities": []} for _ in range(len(passthrough_rows)) + ] + passthrough_rows[COL_DETECTION_VALID] = True + passthrough_rows[COL_DETECTION_INVALID_ENTITIES] = [[] for _ in range(len(passthrough_rows))] + + if entity_rows.empty: + combined = merge_and_reorder(passthrough_rows) + return DetectionJudgeResult(dataframe=combined, failed_records=[]) + + effective_preview_num_records = ( + min(preview_num_records, len(entity_rows)) if preview_num_records is not None else None + ) + run_result = self._adapter.run_workflow( + entity_rows, + model_configs=model_configs, + columns=[self.column_config(selected_models)], + workflow_name="replace-detection-judge", + preview_num_records=effective_preview_num_records, + ) + + judged_df = run_result.dataframe.copy() + flattened = judged_df[COL_DETECTION_JUDGE].apply(_flatten_judgment) + judged_df[COL_DETECTION_VALID] = flattened.apply(lambda pair: pair[0]) + judged_df[COL_DETECTION_INVALID_ENTITIES] = flattened.apply(lambda pair: pair[1]) + + combined = merge_and_reorder(judged_df, passthrough_rows) + return DetectionJudgeResult(dataframe=combined, failed_records=run_result.failed_records) diff --git a/src/anonymizer/engine/evaluation/replace/__init__.py b/src/anonymizer/engine/evaluation/replace/__init__.py new file mode 100644 index 00000000..1a8431c3 --- /dev/null +++ b/src/anonymizer/engine/evaluation/replace/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/anonymizer/engine/evaluation/replace/attribute_fidelity_judge.py b/src/anonymizer/engine/evaluation/replace/attribute_fidelity_judge.py new file mode 100644 index 00000000..b9c72079 --- /dev/null +++ b/src/anonymizer/engine/evaluation/replace/attribute_fidelity_judge.py @@ -0,0 +1,343 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from datetime import datetime + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig +from data_designer.config.models import ModelConfig +from pydantic import BaseModel, Field + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES, + COL_ATTRIBUTE_FIDELITY_JUDGE, + COL_ATTRIBUTE_FIDELITY_VALID, + COL_REPLACEMENT_MAP, +) +from anonymizer.engine.ndd.adapter import FailedRecord, NddAdapter +from anonymizer.engine.ndd.model_loader import resolve_model_alias +from anonymizer.engine.prompt_utils import substitute_placeholders +from anonymizer.engine.row_partitioning import merge_and_reorder, split_rows +from anonymizer.engine.schemas import EntityReplacementMapSchema + +logger = logging.getLogger("anonymizer.evaluation.replace.attribute_fidelity_judge") + +_REPLACEMENTS_FOR_JUDGE_COL = "_replacements_for_attribute_fidelity_judge" + + +# --------------------------------------------------------------------------- +# Output schema +# --------------------------------------------------------------------------- + + +class AttributeCheck(BaseModel): + original: str = Field(description="Original value taken verbatim from the replacement map.") + label: str = Field(description="Entity label assigned to the original value.") + synthetic: str = Field(description="Synthetic value that replaced the original.") + attributes_checked: list[str] = Field( + default_factory=list, + description=("Salient within-entity attributes inspected for this triple (e.g. ['gender', 'age_bucket'])."), + ) + passes: bool = Field( + description=( + "True when EVERY attribute in `attributes_checked` is preserved by the synthetic; " + "false when any clearly changes." + ) + ) + reasoning: str = Field( + description=("One short sentence naming the attribute(s) and whether they were preserved or changed.") + ) + + +class AttributeFidelityJudgmentSchema(BaseModel): + all_valid: bool = Field( + description=("True only if every entry in `entities` has passes=true. False if even one fails.") + ) + entities: list[AttributeCheck] = Field( + default_factory=list, + description=( + "One entry per replacement triple that has AT LEAST ONE salient attribute. " + "Triples with no salient attributes (opaque identifiers) are omitted." + ), + ) + + +# --------------------------------------------------------------------------- +# Result +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class AttributeFidelityJudgeResult: + dataframe: pd.DataFrame + failed_records: list[FailedRecord] + + +# --------------------------------------------------------------------------- +# Prompt +# --------------------------------------------------------------------------- + + +def _judge_prompt() -> str: + prompt = """You are an expert judge evaluating ATTRIBUTE FIDELITY of synthetic PII replacements. + + +ATTRIBUTE FIDELITY answers ONE simple question per (original, label, synthetic) triple: + "Is the synthetic CLOSE ENOUGH to the original on its salient within-entity attributes?" + +A GOOD replacement keeps the obvious semantic properties of the original — the things a \ +reader would intuit just from looking at the entity. A BAD replacement clearly flips one of \ +those properties. + +CANONICAL EXAMPLES (these set the bar — calibrate against these): + - first_name "Valentina" -> "Natalia" -> PASS (gender preserved: both feminine). + - first_name "Valentina" -> "Mike" -> FAIL (gender clearly flipped). + - age "40" -> "42" -> PASS (same adult bucket). + - age "40" -> "12" -> FAIL (adult -> child). + +You are NOT judging: + - whether the synthetic is the right TYPE / format / class (that is a DIFFERENT metric). + - whether the entities AGREE with each other across the record, e.g. city <-> state, \ + DOB <-> age, name <-> email (DIFFERENT metric). + - whether the original detection was correct (assume it was). + - whether the replacement is "diverse" or "different enough" from the original. + +This metric is NORMATIVE: pass = a good attribute-preserving replacement; fail = a clearly \ +attribute-violating replacement. Be charitable on borderline cases — only flag CLEAR \ +attribute flips. + + + +{%- for entry in <> %} +- original="{{ entry.original }}" | label={{ entry.label }} | synthetic="{{ entry.synthetic }}" +{%- endfor %} + + + +For each (original, label, synthetic) triple: +1. Decide which salient attributes apply (see ). +2. For each applicable attribute the ORIGINAL clearly carries, decide whether the synthetic \ +preserves it. +3. Emit one entry in `entities` per triple where AT LEAST ONE salient attribute applies AND \ +the original clearly carries it. The entry must include the verbatim entity, the actually-\ +inspected `attributes_checked`, a `passes` boolean, and a one-sentence `reasoning`. +4. SKIP triples that have no salient attributes (opaque identifiers, hashes, codes). DO NOT \ +emit them. +5. SKIP triples where the original is too ambiguous to anchor any attribute (e.g. gender-\ +neutral name with no other signal). DO NOT emit them. +6. Set `all_valid=true` ONLY if every emitted entry has `passes=true`. Otherwise set false. \ +If no triple yields a checkable attribute, return `all_valid: true` and `entities: []`. + + + +This metric checks ONLY TWO attributes. Do not check anything else. + +1. GENDER OF NAME — applies to labels: first_name, last_name, user_name. + - Check only when the original name CLEARLY implies a gender (e.g. "Valentina", \ + "Michael"). If the original is gender-neutral or ambiguous (e.g. "Alex", "Taylor", \ + "J.", a surname-only token whose gender you can't reliably tell) -> SKIP this triple. + - The check: does the synthetic name carry the SAME implied gender as the original? + +2. AGE BUCKET — applies to labels: age, date_of_birth. + - Buckets: child (0-12), teen (13-19), young adult (20-29), adult (30-44), \ + middle-aged (45-64), senior (65+). It's fine if the years are +/- 1 year of the original. + - For `age`, the bucket comes from the numeric value directly. For `date_of_birth`, \ + compute age = <> - dob_year, then map to a bucket. + - The check: does the synthetic land in the SAME bucket as the original (or an \ + ADJACENT bucket — adjacent counts as preserved; only flag clear bucket flips like \ + adult -> child)? + +ALL OTHER LABELS — SKIP. Do not emit entries for any label not listed above. This includes \ +cities, countries, occupations, education, organizations, phone numbers, dates that are not \ +date_of_birth, protected categorical entities, and every opaque identifier. Their attributes \ +are either checked by other metrics or are too unreliable to judge here. + + + + - Use the LITERAL `label` field to decide which attributes apply. Do not infer attributes \ + from the value's surface form alone. + - Only check an attribute when the ORIGINAL clearly carries it. If the original is \ + ambiguous on an attribute (gender-neutral name, undated entity, generic city), do NOT \ + check that attribute. + - For each entity you emit, list every checked attribute in `attributes_checked`. If \ + nothing was checked, DO NOT emit the entry. + - `passes` = true when EVERY checked attribute is preserved. `passes` = false when any \ + checked attribute clearly changes. Be CHARITABLE on borderline cases — prefer passes=true. + - `reasoning` must be ONE short sentence per entry, naming the attribute(s) and stating \ + preserved-or-changed. + - DO NOT introduce entities that were not in the replacement list. + - `entities` may be empty when the record has no checkable attributes; that is a valid \ + `all_valid=true` outcome. + + + +Return ONLY the JSON object that matches the required schema. Do NOT wrap your output in \ +``` or ```json markdown fences. Do NOT include any commentary, reasoning, preamble, or text \ +outside the JSON object. Your entire response must be a single valid JSON object. + +""" + return substitute_placeholders( + prompt, + { + "<>": _REPLACEMENTS_FOR_JUDGE_COL, + "<>": str(datetime.now().year), + }, + ) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _replacements_for_judge(raw_map: object) -> list[dict[str, str]]: + """Flatten COL_REPLACEMENT_MAP into Jinja-friendly dicts.""" + if raw_map is None: + return [] + if hasattr(raw_map, "model_dump"): + raw_map = raw_map.model_dump(mode="python") + if isinstance(raw_map, str): + try: + raw_map = json.loads(raw_map) + except (json.JSONDecodeError, ValueError): + return [] + if not isinstance(raw_map, dict): + return [] + try: + parsed = EntityReplacementMapSchema.model_validate(raw_map) + except Exception: + return [] + return [{"original": r.original, "label": r.label, "synthetic": r.synthetic} for r in parsed.replacements] + + +def _flatten_judgment(raw: object) -> tuple[bool | None, list[dict[str, object]]]: + """Normalize an LLM judge output into (all_valid, invalid_entities). + + Returns ``(None, [])`` for any malformed or missing payload so downstream + display renders "judge unavailable" rather than fabricating a verdict. + """ + if raw is None: + return None, [] + if hasattr(raw, "model_dump"): + raw = raw.model_dump(mode="python") + if isinstance(raw, str): + try: + raw = json.loads(raw) + except (json.JSONDecodeError, ValueError): + return None, [] + if not isinstance(raw, dict): + return None, [] + try: + parsed = AttributeFidelityJudgmentSchema.model_validate(raw) + except Exception: + return None, [] + invalid = [e.model_dump() for e in parsed.entities if not e.passes] + return parsed.all_valid, invalid + + +# --------------------------------------------------------------------------- +# Workflow +# --------------------------------------------------------------------------- + + +class AttributeFidelityJudgeWorkflow: + """LLM-as-judge evaluator that checks per-entity attribute preservation. + + Runs after Substitute generates the replacement map. Output columns: + ``COL_ATTRIBUTE_FIDELITY_VALID`` (bool|None), + ``COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES`` (list of failing per-entity checks), + and the raw judge output ``COL_ATTRIBUTE_FIDELITY_JUDGE`` (kept so display + can derive the success-rate denominator from the full entities list). + """ + + def __init__(self, adapter: NddAdapter) -> None: + self._adapter = adapter + + def prepare(self, dataframe: pd.DataFrame) -> pd.DataFrame: + working_df = dataframe.copy() + working_df[_REPLACEMENTS_FOR_JUDGE_COL] = working_df[COL_REPLACEMENT_MAP].apply(_replacements_for_judge) + return working_df + + def column_config(self, selected_models: EvaluateModelSelection) -> LLMStructuredColumnConfig: + return LLMStructuredColumnConfig( + name=COL_ATTRIBUTE_FIDELITY_JUDGE, + prompt=_judge_prompt(), + model_alias=resolve_model_alias("replace_attribute_fidelity_judge", selected_models), + output_format=AttributeFidelityJudgmentSchema, + ) + + def postprocess(self, dataframe: pd.DataFrame) -> pd.DataFrame: + out = dataframe.copy() + flattened = ( + out[COL_ATTRIBUTE_FIDELITY_JUDGE].apply(_flatten_judgment) + if COL_ATTRIBUTE_FIDELITY_JUDGE in out.columns + else None + ) + passthrough_mask = out[_REPLACEMENTS_FOR_JUDGE_COL].apply(lambda items: items is None or len(items) == 0) + + valid: list[bool | None] = [] + invalid: list[list[dict[str, str]]] = [] + for idx in out.index: + if passthrough_mask.loc[idx]: + valid.append(True) + invalid.append([]) + elif flattened is not None: + v, inv = flattened.loc[idx] + valid.append(v) + invalid.append(inv) + else: + valid.append(None) + invalid.append([]) + out[COL_ATTRIBUTE_FIDELITY_VALID] = valid + out[COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES] = invalid + if COL_ATTRIBUTE_FIDELITY_JUDGE in out.columns: + out.loc[passthrough_mask, COL_ATTRIBUTE_FIDELITY_JUDGE] = [{"all_valid": True, "entities": []}] * int( + passthrough_mask.sum() + ) + return out + + def evaluate( + self, + dataframe: pd.DataFrame, + *, + model_configs: list[ModelConfig], + selected_models: EvaluateModelSelection, + preview_num_records: int | None = None, + ) -> AttributeFidelityJudgeResult: + working_df = self.prepare(dataframe) + + with_replacements, passthrough_rows = split_rows(working_df, column=_REPLACEMENTS_FOR_JUDGE_COL, predicate=bool) + passthrough_rows[COL_ATTRIBUTE_FIDELITY_JUDGE] = [ + {"all_valid": True, "entities": []} for _ in range(len(passthrough_rows)) + ] + passthrough_rows[COL_ATTRIBUTE_FIDELITY_VALID] = True + passthrough_rows[COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES] = [[] for _ in range(len(passthrough_rows))] + + if with_replacements.empty: + combined = merge_and_reorder(passthrough_rows) + return AttributeFidelityJudgeResult(dataframe=combined, failed_records=[]) + + effective_preview_num_records = ( + min(preview_num_records, len(with_replacements)) if preview_num_records is not None else None + ) + run_result = self._adapter.run_workflow( + with_replacements, + model_configs=model_configs, + columns=[self.column_config(selected_models)], + workflow_name="replace-attribute-fidelity-judge", + preview_num_records=effective_preview_num_records, + ) + + judged_df = run_result.dataframe.copy() + flattened = judged_df[COL_ATTRIBUTE_FIDELITY_JUDGE].apply(_flatten_judgment) + judged_df[COL_ATTRIBUTE_FIDELITY_VALID] = flattened.apply(lambda pair: pair[0]) + judged_df[COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES] = flattened.apply(lambda pair: pair[1]) + + combined = merge_and_reorder(judged_df, passthrough_rows) + return AttributeFidelityJudgeResult(dataframe=combined, failed_records=run_result.failed_records) diff --git a/src/anonymizer/engine/evaluation/replace/relational_consistency_judge.py b/src/anonymizer/engine/evaluation/replace/relational_consistency_judge.py new file mode 100644 index 00000000..572f3542 --- /dev/null +++ b/src/anonymizer/engine/evaluation/replace/relational_consistency_judge.py @@ -0,0 +1,410 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig +from data_designer.config.models import ModelConfig +from pydantic import BaseModel, Field + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS, + COL_RELATIONAL_CONSISTENCY_JUDGE, + COL_RELATIONAL_CONSISTENCY_VALID, + COL_REPLACED_TEXT, + COL_REPLACEMENT_MAP, + _jinja, +) +from anonymizer.engine.ndd.adapter import FailedRecord, NddAdapter +from anonymizer.engine.ndd.model_loader import resolve_model_alias +from anonymizer.engine.prompt_utils import substitute_placeholders +from anonymizer.engine.row_partitioning import merge_and_reorder, split_rows +from anonymizer.engine.schemas import EntityReplacementMapSchema + +logger = logging.getLogger("anonymizer.evaluation.replace.relational_consistency_judge") + +_REPLACEMENTS_FOR_JUDGE_COL = "_replacements_for_relational_consistency_judge" + + +# --------------------------------------------------------------------------- +# Output schema +# --------------------------------------------------------------------------- + + +class RelationCheck(BaseModel): + description: str = Field( + description=("Short label of the relation being verified, e.g. 'city <-> state' or 'date_of_birth <-> age'.") + ) + entities: list[str] = Field( + default_factory=list, + description=( + "The entities involved in this relation, each rendered as a single string of the " + "form ' (
+ {detection_judge_html} + {type_fidelity_section} + {attribute_fidelity_section} + {relational_consistency_section}
Replacement Map
diff --git a/src/anonymizer/interface/results.py b/src/anonymizer/interface/results.py index 9a0259f4..fa97c983 100644 --- a/src/anonymizer/interface/results.py +++ b/src/anonymizer/interface/results.py @@ -7,6 +7,7 @@ import pandas as pd +from anonymizer.config.replace_strategies import ReplaceMethod from anonymizer.engine.ndd.adapter import FailedRecord from anonymizer.interface.display import render_record_html @@ -54,12 +55,17 @@ class AnonymizerResult(_DisplayMixin): to avoid colliding with an Anonymizer output column, in which case it is the post-rename identifier (e.g. ``"final_entities__input"``). failed_records: Records that failed during pipeline processing. + replace_method: The replace strategy that produced this result. Set by + ``run()`` / ``preview()``; consumed by ``evaluate()`` to dispatch the + right judges. ``None`` on results that were constructed by hand or + loaded from a pre-strategy-tracking format. """ dataframe: pd.DataFrame trace_dataframe: pd.DataFrame resolved_text_column: str failed_records: list[FailedRecord] + replace_method: ReplaceMethod | None = None _display_cycle_index: int = field(default=0, init=False, repr=False) def __repr__(self) -> str: @@ -86,6 +92,10 @@ class PreviewResult(_DisplayMixin): it is the post-rename identifier (e.g. ``"final_entities__input"``). failed_records: Records that failed during pipeline processing. preview_num_records: Number of records requested for the preview. + replace_method: The replace strategy that produced this preview. Set by + ``preview()``; consumed by ``evaluate()`` to dispatch the right + judges. ``None`` on results that were constructed by hand or loaded + from a pre-strategy-tracking format. """ dataframe: pd.DataFrame @@ -93,6 +103,7 @@ class PreviewResult(_DisplayMixin): resolved_text_column: str failed_records: list[FailedRecord] preview_num_records: int + replace_method: ReplaceMethod | None = None _display_cycle_index: int = field(default=0, init=False, repr=False) def __repr__(self) -> str: diff --git a/tests/conftest.py b/tests/conftest.py index 879cc98a..8d374c95 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ from anonymizer.config.anonymizer_config import AnonymizerConfig from anonymizer.config.models import ( DetectionModelSelection, + EvaluateModelSelection, ModelSelection, ReplaceModelSelection, RewriteModelSelection, @@ -79,6 +80,11 @@ def stub_rewrite_model_selection() -> RewriteModelSelection: return load_default_model_selection().rewrite +@pytest.fixture +def stub_evaluate_model_selection() -> EvaluateModelSelection: + return load_default_model_selection().evaluate + + @pytest.fixture def stub_slim_model_selection() -> ModelSelection: """Selection model where every role points to the same known alias.""" @@ -100,6 +106,12 @@ def stub_slim_model_selection() -> ModelSelection: repairer="known", judge="known", ), + evaluate=EvaluateModelSelection( + detection_validity_judge="known", + replace_type_fidelity_judge="known", + replace_relational_consistency_judge="known", + replace_attribute_fidelity_judge="known", + ), ) diff --git a/tests/engine/evaluation/__init__.py b/tests/engine/evaluation/__init__.py new file mode 100644 index 00000000..1a8431c3 --- /dev/null +++ b/tests/engine/evaluation/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/engine/evaluation/replace/__init__.py b/tests/engine/evaluation/replace/__init__.py new file mode 100644 index 00000000..1a8431c3 --- /dev/null +++ b/tests/engine/evaluation/replace/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/engine/evaluation/replace/test_attribute_fidelity_judge.py b/tests/engine/evaluation/replace/test_attribute_fidelity_judge.py new file mode 100644 index 00000000..78902f9d --- /dev/null +++ b/tests/engine/evaluation/replace/test_attribute_fidelity_judge.py @@ -0,0 +1,269 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES, + COL_ATTRIBUTE_FIDELITY_JUDGE, + COL_ATTRIBUTE_FIDELITY_VALID, + COL_REPLACEMENT_MAP, +) +from anonymizer.engine.evaluation.replace.attribute_fidelity_judge import ( + AttributeFidelityJudgeWorkflow, + AttributeFidelityJudgmentSchema, + _flatten_judgment, + _judge_prompt, + _replacements_for_judge, +) + +# --------------------------------------------------------------------------- +# Tests: _judge_prompt +# --------------------------------------------------------------------------- + + +def test_judge_prompt_uses_xml_sections() -> None: + prompt = _judge_prompt() + for tag in ("scope", "replacements", "task", "salient_attributes_by_label", "rules"): + assert f"<{tag}>" in prompt + assert f"" in prompt + + +def test_judge_prompt_iterates_replacement_triples() -> None: + prompt = _judge_prompt() + assert "for entry in _replacements_for_attribute_fidelity_judge" in prompt + assert "entry.original" in prompt + assert "entry.label" in prompt + assert "entry.synthetic" in prompt + + +def test_judge_prompt_carves_out_neighbouring_metrics() -> None: + """Prompt must explicitly declare type fidelity and cross-entity coherence as out of scope.""" + prompt = _judge_prompt() + assert "DIFFERENT metric" in prompt + assert "city <-> state" in prompt # mentions cross-entity case as out of scope + + +def test_judge_prompt_scopes_to_gender_and_age_bucket_only() -> None: + """Prompt must restrict checks to the two designated attributes and skip everything else.""" + prompt = _judge_prompt() + assert "GENDER OF NAME" in prompt + assert "AGE BUCKET" in prompt + assert "ALL OTHER LABELS — SKIP" in prompt + + +# --------------------------------------------------------------------------- +# Tests: helpers +# --------------------------------------------------------------------------- + + +def test_replacements_for_judge_flattens_dict_form() -> None: + raw = { + "replacements": [ + {"original": "Sarah", "label": "first_name", "synthetic": "Michael"}, + {"original": "Tokyo", "label": "city", "synthetic": "Paris"}, + ] + } + assert _replacements_for_judge(raw) == [ + {"original": "Sarah", "label": "first_name", "synthetic": "Michael"}, + {"original": "Tokyo", "label": "city", "synthetic": "Paris"}, + ] + + +def test_replacements_for_judge_returns_empty_for_malformed() -> None: + assert _replacements_for_judge(None) == [] + assert _replacements_for_judge("not json") == [] + assert _replacements_for_judge(42) == [] + + +# --------------------------------------------------------------------------- +# Tests: _flatten_judgment +# --------------------------------------------------------------------------- + + +def test_flatten_judgment_all_valid_keeps_invalid_empty() -> None: + raw = { + "all_valid": True, + "entities": [ + { + "original": "Sarah", + "label": "first_name", + "synthetic": "Maria", + "attributes_checked": ["gender"], + "passes": True, + "reasoning": "Both names imply feminine gender.", + } + ], + } + valid, invalid = _flatten_judgment(raw) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_extracts_failing_entries_only() -> None: + raw = { + "all_valid": False, + "entities": [ + { + "original": "Sarah", + "label": "first_name", + "synthetic": "Maria", + "attributes_checked": ["gender"], + "passes": True, + "reasoning": "Both feminine.", + }, + { + "original": "40", + "label": "age", + "synthetic": "12", + "attributes_checked": ["age_bucket"], + "passes": False, + "reasoning": "Adult bucket changed to child.", + }, + ], + } + valid, invalid = _flatten_judgment(raw) + assert valid is False + assert len(invalid) == 1 + assert invalid[0]["original"] == "40" + assert invalid[0]["passes"] is False + + +def test_flatten_judgment_accepts_pydantic_model() -> None: + payload = AttributeFidelityJudgmentSchema(all_valid=True, entities=[]) + valid, invalid = _flatten_judgment(payload) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_none_returns_unavailable_sentinel() -> None: + assert _flatten_judgment(None) == (None, []) + + +def test_flatten_judgment_malformed_returns_unavailable_sentinel() -> None: + assert _flatten_judgment("not json") == (None, []) + assert _flatten_judgment(42) == (None, []) + assert _flatten_judgment({"missing": True}) == (None, []) + + +# --------------------------------------------------------------------------- +# Tests: AttributeFidelityJudgeWorkflow.evaluate +# --------------------------------------------------------------------------- + + +def _map_payload(items: list[dict]) -> dict: + return {"replacements": items} + + +def test_evaluate_short_circuits_when_no_replacements( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame({COL_REPLACEMENT_MAP: [_map_payload([])]}) + + class _UnusedAdapter: + def run_workflow(self, *args, **kwargs): # pragma: no cover - should not be called + raise AssertionError("run_workflow should not be called when there are no replacements") + + wf = AttributeFidelityJudgeWorkflow(adapter=_UnusedAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + assert result.failed_records == [] + assert bool(result.dataframe[COL_ATTRIBUTE_FIDELITY_VALID].iloc[0]) is True + assert result.dataframe[COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES].iloc[0] == [] + + +def test_evaluate_invokes_adapter_with_correct_alias_and_schema( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACEMENT_MAP: [ + _map_payload( + [ + {"original": "Sarah", "label": "first_name", "synthetic": "Michael"}, + {"original": "40", "label": "age", "synthetic": "12"}, + ] + ) + ] + } + ) + + captured: dict = {} + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + captured["columns"] = columns + captured["workflow_name"] = workflow_name + out = frame.copy() + out[COL_ATTRIBUTE_FIDELITY_JUDGE] = [ + { + "all_valid": False, + "entities": [ + { + "original": "Sarah", + "label": "first_name", + "synthetic": "Michael", + "attributes_checked": ["gender"], + "passes": False, + "reasoning": "Feminine -> masculine.", + }, + { + "original": "40", + "label": "age", + "synthetic": "12", + "attributes_checked": ["age_bucket"], + "passes": False, + "reasoning": "Adult -> child.", + }, + ], + } + ] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = AttributeFidelityJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert captured["workflow_name"] == "replace-attribute-fidelity-judge" + col = captured["columns"][0] + assert isinstance(col, LLMStructuredColumnConfig) + assert col.name == COL_ATTRIBUTE_FIDELITY_JUDGE + assert col.model_alias == stub_evaluate_model_selection.replace_attribute_fidelity_judge + assert col.output_format == AttributeFidelityJudgmentSchema.model_json_schema() + + assert bool(result.dataframe[COL_ATTRIBUTE_FIDELITY_VALID].iloc[0]) is False + invalid = result.dataframe[COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES].iloc[0] + assert len(invalid) == 2 + assert {item["original"] for item in invalid} == {"Sarah", "40"} + + +def test_evaluate_marks_unavailable_for_malformed_payload( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + {COL_REPLACEMENT_MAP: [_map_payload([{"original": "Sarah", "label": "first_name", "synthetic": "Maria"}])]} + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_ATTRIBUTE_FIDELITY_JUDGE] = ["not json"] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = AttributeFidelityJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert result.dataframe[COL_ATTRIBUTE_FIDELITY_VALID].iloc[0] is None + assert result.dataframe[COL_ATTRIBUTE_FIDELITY_INVALID_ENTITIES].iloc[0] == [] diff --git a/tests/engine/evaluation/replace/test_relational_consistency_judge.py b/tests/engine/evaluation/replace/test_relational_consistency_judge.py new file mode 100644 index 00000000..d4208181 --- /dev/null +++ b/tests/engine/evaluation/replace/test_relational_consistency_judge.py @@ -0,0 +1,348 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS, + COL_RELATIONAL_CONSISTENCY_JUDGE, + COL_RELATIONAL_CONSISTENCY_VALID, + COL_REPLACED_TEXT, + COL_REPLACEMENT_MAP, +) +from anonymizer.engine.evaluation.replace.relational_consistency_judge import ( + RelationalConsistencyJudgeWorkflow, + RelationalConsistencyJudgmentSchema, + _flatten_judgment, + _judge_prompt, + _replacements_for_judge, +) + +# --------------------------------------------------------------------------- +# Tests: _judge_prompt +# --------------------------------------------------------------------------- + + +def test_judge_prompt_uses_xml_sections() -> None: + prompt = _judge_prompt() + for tag in ("scope", "replaced_text", "replacements", "task", "relations_to_inspect", "rules", "edge_cases"): + assert f"<{tag}>" in prompt + assert f"" in prompt + + +def test_judge_prompt_references_replaced_text_column() -> None: + prompt = _judge_prompt() + assert COL_REPLACED_TEXT in prompt + + +def test_judge_prompt_iterates_replacement_triples() -> None: + prompt = _judge_prompt() + assert "for entry in _replacements_for_relational_consistency_judge" in prompt + assert "entry.original" in prompt + assert "entry.label" in prompt + assert "entry.synthetic" in prompt + + +def test_judge_prompt_disambiguates_from_neighbouring_metrics() -> None: + """Prompt must call out that type/format and semantic-attribute checks are out of scope.""" + prompt = _judge_prompt() + assert "DIFFERENT metric" in prompt + + +def test_judge_prompt_requires_passing_relations_in_output() -> None: + """Denominator depends on the judge listing passes AND fails.""" + prompt = _judge_prompt() + assert "denominator" in prompt.lower() + + +def test_judge_prompt_blocks_generic_date_as_date_of_birth() -> None: + """The judge must not treat a generic `date` (career year, etc.) as a `date_of_birth`. + + Regression guard for a real failure observed on the biographies dataset, where a + sentence like "returning home in 2012" caused the judge to pair the `date` entity + with `age` and compute `current_year - 2012 != age`, producing a false negative. + """ + prompt = _judge_prompt() + assert "literally `date_of_birth`" in prompt + assert "generic `date`" in prompt + assert "SKIP the temporal relation" in prompt + + +def test_judge_prompt_requires_literal_label_matching() -> None: + """Relations are matched by the literal label field, not by inferring from the value's surface form.""" + prompt = _judge_prompt() + assert "LITERAL `label` field" in prompt + assert "Do NOT infer a label from" in prompt + + +# --------------------------------------------------------------------------- +# Tests: helpers +# --------------------------------------------------------------------------- + + +def test_replacements_for_judge_flattens_dict_form() -> None: + raw = { + "replacements": [ + {"original": "Austin", "label": "city", "synthetic": "Portland"}, + {"original": "TX", "label": "state", "synthetic": "OR"}, + ] + } + assert _replacements_for_judge(raw) == [ + {"original": "Austin", "label": "city", "synthetic": "Portland"}, + {"original": "TX", "label": "state", "synthetic": "OR"}, + ] + + +def test_replacements_for_judge_returns_empty_for_malformed() -> None: + assert _replacements_for_judge(None) == [] + assert _replacements_for_judge("not json") == [] + assert _replacements_for_judge(42) == [] + + +# --------------------------------------------------------------------------- +# Tests: _flatten_judgment +# --------------------------------------------------------------------------- + + +def test_flatten_judgment_all_consistent_keeps_invalid_empty() -> None: + raw = { + "all_consistent": True, + "relations": [ + { + "description": "city <-> state", + "entities": [ + "Austin (city) -> Portland", + "TX (state) -> OR", + ], + "passes": True, + "reasoning": "Portland is in Oregon.", + } + ], + } + valid, invalid = _flatten_judgment(raw) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_extracts_failing_relations_only() -> None: + raw = { + "all_consistent": False, + "relations": [ + { + "description": "city <-> state", + "entities": [], + "passes": True, + "reasoning": "ok", + }, + { + "description": "date_of_birth <-> age", + "entities": [], + "passes": False, + "reasoning": "DOB 1990 vs age 12 is impossible.", + }, + ], + } + valid, invalid = _flatten_judgment(raw) + assert valid is False + assert len(invalid) == 1 + assert invalid[0]["description"] == "date_of_birth <-> age" + assert invalid[0]["passes"] is False + + +def test_flatten_judgment_accepts_pydantic_model() -> None: + payload = RelationalConsistencyJudgmentSchema(all_consistent=True, relations=[]) + valid, invalid = _flatten_judgment(payload) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_none_returns_unavailable_sentinel() -> None: + assert _flatten_judgment(None) == (None, []) + + +def test_flatten_judgment_malformed_returns_unavailable_sentinel() -> None: + assert _flatten_judgment("not json") == (None, []) + assert _flatten_judgment(42) == (None, []) + assert _flatten_judgment({"missing_top_level": True}) == (None, []) + + +# --------------------------------------------------------------------------- +# Tests: RelationalConsistencyJudgeWorkflow.evaluate +# --------------------------------------------------------------------------- + + +def _map_payload(items: list[dict]) -> dict: + return {"replacements": items} + + +def test_evaluate_short_circuits_when_fewer_than_two_replacements( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACED_TEXT: ["Alice"], + COL_REPLACEMENT_MAP: [_map_payload([{"original": "Alice", "label": "first_name", "synthetic": "Maya"}])], + } + ) + + class _UnusedAdapter: + def run_workflow(self, *args, **kwargs): # pragma: no cover - should not be called + raise AssertionError("run_workflow should not be called when there are <2 replacements") + + wf = RelationalConsistencyJudgeWorkflow(adapter=_UnusedAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + assert result.failed_records == [] + assert bool(result.dataframe[COL_RELATIONAL_CONSISTENCY_VALID].iloc[0]) is True + assert result.dataframe[COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS].iloc[0] == [] + + +def test_evaluate_invokes_adapter_with_correct_alias_and_schema( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACED_TEXT: ["Maya works in Portland, OR"], + COL_REPLACEMENT_MAP: [ + _map_payload( + [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "Austin", "label": "city", "synthetic": "Portland"}, + {"original": "TX", "label": "state", "synthetic": "OR"}, + ] + ) + ], + } + ) + + captured: dict = {} + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + captured["columns"] = columns + captured["workflow_name"] = workflow_name + out = frame.copy() + out[COL_RELATIONAL_CONSISTENCY_JUDGE] = [ + { + "all_consistent": True, + "relations": [ + { + "description": "city <-> state", + "entities": [ + "Austin (city) -> Portland", + "TX (state) -> OR", + ], + "passes": True, + "reasoning": "Portland is in Oregon.", + } + ], + } + ] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = RelationalConsistencyJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert captured["workflow_name"] == "replace-relational-consistency-judge" + col = captured["columns"][0] + assert isinstance(col, LLMStructuredColumnConfig) + assert col.name == COL_RELATIONAL_CONSISTENCY_JUDGE + assert col.model_alias == stub_evaluate_model_selection.replace_relational_consistency_judge + assert col.output_format == RelationalConsistencyJudgmentSchema.model_json_schema() + + assert bool(result.dataframe[COL_RELATIONAL_CONSISTENCY_VALID].iloc[0]) is True + assert result.dataframe[COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS].iloc[0] == [] + + +def test_evaluate_marks_unavailable_for_malformed_payload( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACED_TEXT: ["Maya works in Portland, OR"], + COL_REPLACEMENT_MAP: [ + _map_payload( + [ + {"original": "Austin", "label": "city", "synthetic": "Portland"}, + {"original": "TX", "label": "state", "synthetic": "OR"}, + ] + ) + ], + } + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_RELATIONAL_CONSISTENCY_JUDGE] = ["not json"] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = RelationalConsistencyJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert result.dataframe[COL_RELATIONAL_CONSISTENCY_VALID].iloc[0] is None + assert result.dataframe[COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS].iloc[0] == [] + + +def test_evaluate_propagates_failing_relations( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACED_TEXT: ["..."], + COL_REPLACEMENT_MAP: [ + _map_payload( + [ + {"original": "1990", "label": "date_of_birth", "synthetic": "2015"}, + {"original": "35", "label": "age", "synthetic": "35"}, + ] + ) + ], + } + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_RELATIONAL_CONSISTENCY_JUDGE] = [ + { + "all_consistent": False, + "relations": [ + { + "description": "date_of_birth <-> age", + "entities": [ + "1990 (date_of_birth) -> 2015", + "35 (age) -> 35", + ], + "passes": False, + "reasoning": "A 2015 birthdate does not yield age 35.", + } + ], + } + ] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = RelationalConsistencyJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + assert bool(result.dataframe[COL_RELATIONAL_CONSISTENCY_VALID].iloc[0]) is False + invalid = result.dataframe[COL_RELATIONAL_CONSISTENCY_INVALID_RELATIONS].iloc[0] + assert len(invalid) == 1 + assert invalid[0]["description"] == "date_of_birth <-> age" diff --git a/tests/engine/evaluation/replace/test_type_fidelity_judge.py b/tests/engine/evaluation/replace/test_type_fidelity_judge.py new file mode 100644 index 00000000..45523ae0 --- /dev/null +++ b/tests/engine/evaluation/replace/test_type_fidelity_judge.py @@ -0,0 +1,296 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_REPLACEMENT_MAP, + COL_TYPE_FIDELITY_INVALID_REPLACEMENTS, + COL_TYPE_FIDELITY_JUDGE, + COL_TYPE_FIDELITY_VALID, +) +from anonymizer.engine.evaluation.replace.type_fidelity_judge import ( + TypeFidelityJudgeWorkflow, + TypeFidelityJudgmentSchema, + _flatten_judgment, + _judge_prompt, + _label_examples_for_judge, + _replacements_for_judge, +) + +# --------------------------------------------------------------------------- +# Tests: _judge_prompt +# --------------------------------------------------------------------------- + + +def test_judge_prompt_uses_xml_sections() -> None: + prompt = _judge_prompt() + for tag in ( + "scope", + "replacements", + "reference_label_examples", + "task", + "class_membership_rules", + "format_type_rules", + "edge_cases", + "output_rules", + ): + assert f"<{tag}>" in prompt + assert f"" in prompt + + +def test_judge_prompt_iterates_replacement_triples() -> None: + prompt = _judge_prompt() + assert "for entry in _replacements_for_type_fidelity_judge" in prompt + assert "entry.original" in prompt + assert "entry.label" in prompt + assert "entry.synthetic" in prompt + + +def test_judge_prompt_disambiguates_from_neighbouring_metrics() -> None: + """Prompt must call out that semantic attributes and cross-entity consistency are + OUT of scope, otherwise the judge will silently penalize valid replacements.""" + prompt = _judge_prompt() + assert "DIFFERENT metric" in prompt + assert "gender of a name" in prompt + assert "city/state" in prompt + + +# --------------------------------------------------------------------------- +# Tests: helpers +# --------------------------------------------------------------------------- + + +def test_replacements_for_judge_flattens_dict_form() -> None: + raw = { + "replacements": [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "Acme", "label": "company_name", "synthetic": "NovaCorp"}, + ] + } + assert _replacements_for_judge(raw) == [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "Acme", "label": "company_name", "synthetic": "NovaCorp"}, + ] + + +def test_replacements_for_judge_accepts_json_string() -> None: + payload = '{"replacements":[{"original":"Alice","label":"first_name","synthetic":"Maya"}]}' + assert _replacements_for_judge(payload) == [{"original": "Alice", "label": "first_name", "synthetic": "Maya"}] + + +def test_replacements_for_judge_returns_empty_for_malformed() -> None: + assert _replacements_for_judge(None) == [] + assert _replacements_for_judge("not json") == [] + assert _replacements_for_judge(42) == [] + assert _replacements_for_judge({"replacements": "oops"}) == [] + + +def test_label_examples_for_judge_only_includes_labels_in_replacements() -> None: + examples_json = _label_examples_for_judge([{"original": "Alice", "label": "first_name", "synthetic": "Maya"}]) + assert "first_name" in examples_json + assert "ssn" not in examples_json + + +def test_label_examples_for_judge_empty_when_no_replacements() -> None: + assert _label_examples_for_judge([]) == "{}" + + +# --------------------------------------------------------------------------- +# Tests: _flatten_judgment +# --------------------------------------------------------------------------- + + +def test_flatten_judgment_all_valid_path() -> None: + valid, invalid = _flatten_judgment({"all_valid": True, "invalid_replacements": []}) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_returns_invalid_entries() -> None: + raw = { + "all_valid": False, + "invalid_replacements": [ + { + "original": "Alice", + "label": "first_name", + "synthetic": "[REDACTED]", + "reasoning": "class membership: placeholder, not a person name", + }, + ], + } + valid, invalid = _flatten_judgment(raw) + assert valid is False + assert invalid == [ + { + "original": "Alice", + "label": "first_name", + "synthetic": "[REDACTED]", + "reasoning": "class membership: placeholder, not a person name", + } + ] + + +def test_flatten_judgment_accepts_pydantic_model() -> None: + payload = TypeFidelityJudgmentSchema(all_valid=True, invalid_replacements=[]) + valid, invalid = _flatten_judgment(payload) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_none_returns_unavailable_sentinel() -> None: + assert _flatten_judgment(None) == (None, []) + + +def test_flatten_judgment_malformed_returns_unavailable_sentinel() -> None: + assert _flatten_judgment("not json") == (None, []) + assert _flatten_judgment(42) == (None, []) + assert _flatten_judgment({"missing": "all_valid"}) == (None, []) + + +# --------------------------------------------------------------------------- +# Tests: TypeFidelityJudgeWorkflow.evaluate +# --------------------------------------------------------------------------- + + +def _map_payload(items: list[dict]) -> dict: + return {"replacements": items} + + +def test_evaluate_short_circuits_when_no_replacements( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame({COL_REPLACEMENT_MAP: [_map_payload([])]}) + + class _UnusedAdapter: + def run_workflow(self, *args, **kwargs): # pragma: no cover - should not be called + raise AssertionError("run_workflow should not be called when there are no replacements") + + wf = TypeFidelityJudgeWorkflow(adapter=_UnusedAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + assert result.failed_records == [] + assert bool(result.dataframe[COL_TYPE_FIDELITY_VALID].iloc[0]) is True + assert result.dataframe[COL_TYPE_FIDELITY_INVALID_REPLACEMENTS].iloc[0] == [] + + +def test_evaluate_invokes_adapter_with_correct_alias_and_schema( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACEMENT_MAP: [ + _map_payload( + [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "alice@x.com", "label": "email", "synthetic": "not-an-email"}, + ] + ) + ] + } + ) + + captured: dict = {} + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + captured["columns"] = columns + captured["workflow_name"] = workflow_name + out = frame.copy() + out[COL_TYPE_FIDELITY_JUDGE] = [ + { + "all_valid": False, + "invalid_replacements": [ + { + "original": "alice@x.com", + "label": "email", + "synthetic": "not-an-email", + "reasoning": "format: missing '@' and domain", + } + ], + } + ] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = TypeFidelityJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert captured["workflow_name"] == "replace-type-fidelity-judge" + col = captured["columns"][0] + assert isinstance(col, LLMStructuredColumnConfig) + assert col.name == COL_TYPE_FIDELITY_JUDGE + assert col.model_alias == stub_evaluate_model_selection.replace_type_fidelity_judge + assert col.output_format == TypeFidelityJudgmentSchema.model_json_schema() + + assert bool(result.dataframe[COL_TYPE_FIDELITY_VALID].iloc[0]) is False + invalid = result.dataframe[COL_TYPE_FIDELITY_INVALID_REPLACEMENTS].iloc[0] + assert invalid == [ + { + "original": "alice@x.com", + "label": "email", + "synthetic": "not-an-email", + "reasoning": "format: missing '@' and domain", + } + ] + + +def test_evaluate_preserves_row_order_when_mixing_empty_and_populated_maps( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + { + COL_REPLACEMENT_MAP: [ + _map_payload([]), + _map_payload([{"original": "Alice", "label": "first_name", "synthetic": "Maya"}]), + ] + } + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_TYPE_FIDELITY_JUDGE] = [{"all_valid": True, "invalid_replacements": []}] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = TypeFidelityJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert [bool(v) for v in result.dataframe[COL_TYPE_FIDELITY_VALID]] == [True, True] + + +def test_evaluate_marks_unavailable_for_malformed_payload( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + df = pd.DataFrame( + {COL_REPLACEMENT_MAP: [_map_payload([{"original": "Alice", "label": "first_name", "synthetic": "Maya"}])]} + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_TYPE_FIDELITY_JUDGE] = ["not json"] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = TypeFidelityJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert result.dataframe[COL_TYPE_FIDELITY_VALID].iloc[0] is None + assert result.dataframe[COL_TYPE_FIDELITY_INVALID_REPLACEMENTS].iloc[0] == [] diff --git a/tests/engine/evaluation/test_detection_judge.py b/tests/engine/evaluation/test_detection_judge.py new file mode 100644 index 00000000..1d92eca2 --- /dev/null +++ b/tests/engine/evaluation/test_detection_judge.py @@ -0,0 +1,274 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pandas as pd +from data_designer.config.column_configs import LLMStructuredColumnConfig + +from anonymizer.config.models import EvaluateModelSelection +from anonymizer.engine.constants import ( + COL_DETECTION_INVALID_ENTITIES, + COL_DETECTION_JUDGE, + COL_DETECTION_VALID, + COL_ENTITIES_BY_VALUE, + COL_TEXT, +) +from anonymizer.engine.evaluation.detection_judge import ( + DetectionJudgeWorkflow, + DetectionJudgmentSchema, + _entities_for_judge, + _flatten_judgment, + _judge_prompt, + _label_examples_for_judge, +) +from anonymizer.engine.schemas import EntitiesByValueSchema + +# --------------------------------------------------------------------------- +# Tests: _judge_prompt +# --------------------------------------------------------------------------- + + +def test_judge_prompt_uses_xml_sections() -> None: + prompt = _judge_prompt() + for tag in ("original_text", "detected_entities", "task", "invalid_criteria", "valid_criteria"): + assert f"<{tag}>" in prompt + assert f"" in prompt + + +def test_judge_prompt_references_original_text_column() -> None: + prompt = _judge_prompt() + assert COL_TEXT in prompt + + +def test_judge_prompt_iterates_detected_entities() -> None: + prompt = _judge_prompt() + assert "for entity in _entities_for_detection_judge" in prompt + assert "entity.value" in prompt + assert "entity.label" in prompt + + +# --------------------------------------------------------------------------- +# Tests: helpers +# --------------------------------------------------------------------------- + + +def test_entities_for_judge_flattens_labels() -> None: + parsed = EntitiesByValueSchema.from_raw( + { + "entities_by_value": [ + {"value": "Alice", "labels": ["first_name"]}, + {"value": "Acme", "labels": ["company_name", "organization_name"]}, + ] + } + ) + rows = _entities_for_judge(parsed) + assert rows == [ + {"value": "Alice", "label": "first_name"}, + {"value": "Acme", "label": "company_name"}, + {"value": "Acme", "label": "organization_name"}, + ] + + +def test_label_examples_for_judge_returns_json_keyed_by_label() -> None: + parsed = EntitiesByValueSchema.from_raw({"entities_by_value": [{"value": "Alice", "labels": ["first_name"]}]}) + examples_json = _label_examples_for_judge(parsed) + assert "first_name" in examples_json + assert examples_json.startswith("{") + + +def test_label_examples_for_judge_empty_when_no_entities() -> None: + parsed = EntitiesByValueSchema() + assert _label_examples_for_judge(parsed) == "{}" + + +# --------------------------------------------------------------------------- +# Tests: _flatten_judgment +# --------------------------------------------------------------------------- + + +def test_flatten_judgment_all_valid_path() -> None: + valid, invalid = _flatten_judgment({"all_valid": True, "invalid_entities": []}) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_returns_invalid_entries() -> None: + raw = { + "all_valid": False, + "invalid_entities": [ + {"value": "morning", "label": "date_time", "reasoning": "common word"}, + ], + } + valid, invalid = _flatten_judgment(raw) + assert valid is False + assert invalid == [{"value": "morning", "label": "date_time", "reasoning": "common word"}] + + +def test_flatten_judgment_accepts_pydantic_model() -> None: + payload = DetectionJudgmentSchema(all_valid=True, invalid_entities=[]) + valid, invalid = _flatten_judgment(payload) + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_accepts_json_string() -> None: + valid, invalid = _flatten_judgment('{"all_valid": true, "invalid_entities": []}') + assert valid is True + assert invalid == [] + + +def test_flatten_judgment_none_returns_unavailable_sentinel() -> None: + assert _flatten_judgment(None) == (None, []) + + +def test_flatten_judgment_malformed_returns_unavailable_sentinel() -> None: + assert _flatten_judgment("not json") == (None, []) + assert _flatten_judgment(42) == (None, []) + assert _flatten_judgment({"missing": "all_valid"}) == (None, []) + + +# --------------------------------------------------------------------------- +# Tests: DetectionJudgeWorkflow.evaluate +# --------------------------------------------------------------------------- + + +def _entities_payload(entities: list[dict]) -> dict: + return {"entities_by_value": entities} + + +def test_evaluate_short_circuits_when_no_entities( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """Rows with no detected entities skip the LLM call and pass trivially.""" + df = pd.DataFrame( + { + COL_TEXT: ["plain text"], + COL_ENTITIES_BY_VALUE: [_entities_payload([])], + } + ) + + class _UnusedAdapter: + def run_workflow(self, *args, **kwargs): # pragma: no cover - should not be called + raise AssertionError("run_workflow should not be called for empty-entity rows") + + wf = DetectionJudgeWorkflow(adapter=_UnusedAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + assert result.failed_records == [] + assert bool(result.dataframe[COL_DETECTION_VALID].iloc[0]) is True + assert result.dataframe[COL_DETECTION_INVALID_ENTITIES].iloc[0] == [] + + +def test_evaluate_invokes_adapter_for_rows_with_entities( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """Rows with entities get a structured-column workflow keyed on detection_judge.""" + df = pd.DataFrame( + { + COL_TEXT: ["Alice works at Acme"], + COL_ENTITIES_BY_VALUE: [ + _entities_payload( + [ + {"value": "Alice", "labels": ["first_name"]}, + {"value": "Acme", "labels": ["company_name"]}, + ] + ) + ], + } + ) + + captured: dict = {} + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + captured["columns"] = columns + captured["workflow_name"] = workflow_name + out = frame.copy() + out[COL_DETECTION_JUDGE] = [ + { + "all_valid": False, + "invalid_entities": [{"value": "Acme", "label": "company_name", "reasoning": "spurious"}], + } + ] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = DetectionJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert captured["workflow_name"] == "replace-detection-judge" + assert len(captured["columns"]) == 1 + col = captured["columns"][0] + assert isinstance(col, LLMStructuredColumnConfig) + assert col.name == COL_DETECTION_JUDGE + assert col.model_alias == stub_evaluate_model_selection.detection_validity_judge + assert col.output_format == DetectionJudgmentSchema.model_json_schema() + + assert bool(result.dataframe[COL_DETECTION_VALID].iloc[0]) is False + invalid = result.dataframe[COL_DETECTION_INVALID_ENTITIES].iloc[0] + assert invalid == [{"value": "Acme", "label": "company_name", "reasoning": "spurious"}] + + +def test_evaluate_merges_entity_and_empty_rows_in_order( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """Rows are returned in their original order, even when one bypasses the LLM.""" + df = pd.DataFrame( + { + COL_TEXT: ["no entities here", "Alice was here"], + COL_ENTITIES_BY_VALUE: [ + _entities_payload([]), + _entities_payload([{"value": "Alice", "labels": ["first_name"]}]), + ], + } + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_DETECTION_JUDGE] = [{"all_valid": True, "invalid_entities": []}] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = DetectionJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert list(result.dataframe[COL_TEXT]) == ["no entities here", "Alice was here"] + assert [bool(v) for v in result.dataframe[COL_DETECTION_VALID]] == [True, True] + + +def test_evaluate_marks_judge_unavailable_for_malformed_payload( + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """Malformed judge output leaves detection_valid=None rather than fabricating a verdict.""" + df = pd.DataFrame( + { + COL_TEXT: ["Alice"], + COL_ENTITIES_BY_VALUE: [_entities_payload([{"value": "Alice", "labels": ["first_name"]}])], + } + ) + + class _StubAdapter: + def run_workflow(self, frame, *, model_configs, columns, workflow_name, preview_num_records=None): + out = frame.copy() + out[COL_DETECTION_JUDGE] = ["not json"] + + class _Result: + dataframe = out + failed_records: list = [] + + return _Result() + + wf = DetectionJudgeWorkflow(adapter=_StubAdapter()) + result = wf.evaluate(df, model_configs=[], selected_models=stub_evaluate_model_selection) + + assert result.dataframe[COL_DETECTION_VALID].iloc[0] is None + assert result.dataframe[COL_DETECTION_INVALID_ENTITIES].iloc[0] == [] diff --git a/tests/engine/test_model_loader.py b/tests/engine/test_model_loader.py index 14e94710..3b754dd4 100644 --- a/tests/engine/test_model_loader.py +++ b/tests/engine/test_model_loader.py @@ -312,7 +312,15 @@ def test_validate_model_alias_references_raises_on_unknown_replace_alias_when_en stub_slim_model_selection: ModelSelection, ) -> None: selected_models = stub_slim_model_selection.model_copy( - update={"replace": ReplaceModelSelection(replacement_generator="bad-replace-alias")} + update={ + "replace": ReplaceModelSelection( + replacement_generator="bad-replace-alias", + detection_judge="known", + type_fidelity_judge="known", + relational_consistency_judge="known", + attribute_fidelity_judge="known", + ) + } ) with pytest.raises(ValueError, match="bad-replace-alias"): @@ -328,7 +336,15 @@ def test_validate_model_alias_references_skips_replace_alias_when_not_enabled( stub_slim_model_selection: ModelSelection, ) -> None: selected_models = stub_slim_model_selection.model_copy( - update={"replace": ReplaceModelSelection(replacement_generator="bad-replace-alias")} + update={ + "replace": ReplaceModelSelection( + replacement_generator="bad-replace-alias", + detection_judge="known", + type_fidelity_judge="known", + relational_consistency_judge="known", + attribute_fidelity_judge="known", + ) + } ) validate_model_alias_references( diff --git a/tests/engine/test_replace_runner.py b/tests/engine/test_replace_runner.py index 17339068..adc4d970 100644 --- a/tests/engine/test_replace_runner.py +++ b/tests/engine/test_replace_runner.py @@ -10,10 +10,28 @@ import pytest from data_designer.config.models import ModelConfig -from anonymizer.config.models import ReplaceModelSelection +from anonymizer.config.models import EvaluateModelSelection, ReplaceModelSelection from anonymizer.config.replace_strategies import Hash, Redact, Substitute -from anonymizer.engine.constants import COL_FINAL_ENTITIES, COL_REPLACED_TEXT, COL_REPLACEMENT_MAP, COL_TEXT -from anonymizer.engine.ndd.adapter import FailedRecord +from anonymizer.engine.constants import ( + COL_ATTRIBUTE_FIDELITY_JUDGE, + COL_ATTRIBUTE_FIDELITY_VALID, + COL_DETECTION_JUDGE, + COL_DETECTION_VALID, + COL_ENTITIES_BY_VALUE, + COL_FINAL_ENTITIES, + COL_RELATIONAL_CONSISTENCY_JUDGE, + COL_RELATIONAL_CONSISTENCY_VALID, + COL_REPLACED_TEXT, + COL_REPLACEMENT_MAP, + COL_TEXT, + COL_TYPE_FIDELITY_JUDGE, + COL_TYPE_FIDELITY_VALID, +) +from anonymizer.engine.evaluation.detection_judge import DetectionJudgeWorkflow +from anonymizer.engine.evaluation.replace.attribute_fidelity_judge import AttributeFidelityJudgeWorkflow +from anonymizer.engine.evaluation.replace.relational_consistency_judge import RelationalConsistencyJudgeWorkflow +from anonymizer.engine.evaluation.replace.type_fidelity_judge import TypeFidelityJudgeWorkflow +from anonymizer.engine.ndd.adapter import RECORD_ID_COLUMN, FailedRecord, WorkflowRunResult from anonymizer.engine.replace.llm_replace_workflow import LlmReplaceResult from anonymizer.engine.replace.replace_runner import ReplacementWorkflow from anonymizer.engine.replace.strategies import apply_replacement_map @@ -116,6 +134,268 @@ def test_substitute_without_workflow_raises( ) +def test_evaluate_uses_merged_dd_workflow_for_judges( + stub_model_configs: list[ModelConfig], + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """``evaluate()`` runs all 4 judges as columns of a SINGLE DD workflow call + (DataDesigner parallelizes the columns internally — no Python threads).""" + + # Trace-shaped input: simulates a dataframe returned by a prior ``run()``. + saved_trace = pd.DataFrame( + { + COL_TEXT: ["Alice works at Acme"], + COL_FINAL_ENTITIES: [{"entities": []}], + COL_REPLACED_TEXT: ["Maya works at NovaCorp"], + COL_REPLACEMENT_MAP: [ + { + "replacements": [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "Acme", "label": "organization", "synthetic": "NovaCorp"}, + ] + } + ], + COL_ENTITIES_BY_VALUE: [ + { + "entities_by_value": [ + {"value": "Alice", "labels": ["first_name"]}, + {"value": "Acme", "labels": ["organization"]}, + ] + } + ], + } + ) + + judge_defaults = { + COL_DETECTION_JUDGE: {"all_valid": True, "invalid_entities": []}, + COL_TYPE_FIDELITY_JUDGE: {"all_valid": True, "invalid_replacements": []}, + COL_RELATIONAL_CONSISTENCY_JUDGE: {"all_consistent": True, "relations": []}, + COL_ATTRIBUTE_FIDELITY_JUDGE: {"all_valid": True, "entities": []}, + } + + def fake_run_workflow(df: pd.DataFrame, *, columns, **_: object) -> WorkflowRunResult: + out = df.copy() + for column in columns: + out[column.name] = [judge_defaults[column.name]] * len(out) + return WorkflowRunResult(dataframe=out, failed_records=[]) + + def fake_attach_ids(df: pd.DataFrame) -> pd.DataFrame: + if RECORD_ID_COLUMN in df.columns: + return df.copy() + out = df.copy() + out[RECORD_ID_COLUMN] = [f"id-{i}" for i in range(len(out))] + return out + + adapter = Mock() + adapter.run_workflow.side_effect = fake_run_workflow + adapter._attach_record_ids.side_effect = fake_attach_ids + + runner = ReplacementWorkflow( + detection_judge=DetectionJudgeWorkflow(adapter=adapter), + type_fidelity_judge=TypeFidelityJudgeWorkflow(adapter=adapter), + relational_consistency_judge=RelationalConsistencyJudgeWorkflow(adapter=adapter), + attribute_fidelity_judge=AttributeFidelityJudgeWorkflow(adapter=adapter), + adapter=adapter, + ) + + result = runner.evaluate( + saved_trace, + replace_method=Substitute(), + model_configs=stub_model_configs, + selected_models=stub_evaluate_model_selection, + ) + + # Exactly ONE adapter call for the judges step (proves merge, not 4 separate workflows). + assert adapter.run_workflow.call_count == 1 + call_columns = adapter.run_workflow.call_args.kwargs["columns"] + assert {c.name for c in call_columns} == set(judge_defaults) + + # And each judge's VALID column ended up on the result, with True (default payload above). + for col in ( + COL_DETECTION_VALID, + COL_TYPE_FIDELITY_VALID, + COL_RELATIONAL_CONSISTENCY_VALID, + COL_ATTRIBUTE_FIDELITY_VALID, + ): + assert col in result.dataframe.columns, f"missing column: {col}" + assert bool(result.dataframe[col].iloc[0]) is True + + +def test_evaluate_preserves_all_rows_when_llm_drops_some( + stub_model_configs: list[ModelConfig], + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """Evaluation is non-critical: rows the LLM drops (parse error, timeout, + etc.) must still appear in the result with *_valid=None ("Unavailable"), + not vanish from a previously successful preview/run. + """ + saved_trace = pd.DataFrame( + { + COL_TEXT: ["Alice works at Acme", "Bob works at Globex"], + COL_FINAL_ENTITIES: [{"entities": []}, {"entities": []}], + COL_REPLACED_TEXT: ["Maya works at NovaCorp", "Carl works at Initech"], + COL_REPLACEMENT_MAP: [ + { + "replacements": [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "Acme", "label": "organization", "synthetic": "NovaCorp"}, + ] + }, + { + "replacements": [ + {"original": "Bob", "label": "first_name", "synthetic": "Carl"}, + {"original": "Globex", "label": "organization", "synthetic": "Initech"}, + ] + }, + ], + COL_ENTITIES_BY_VALUE: [ + {"entities_by_value": [{"value": "Alice", "labels": ["first_name"]}]}, + {"entities_by_value": [{"value": "Bob", "labels": ["first_name"]}]}, + ], + } + ) + + judge_payload = { + COL_DETECTION_JUDGE: {"all_valid": True, "invalid_entities": []}, + COL_TYPE_FIDELITY_JUDGE: {"all_valid": True, "invalid_replacements": []}, + COL_RELATIONAL_CONSISTENCY_JUDGE: {"all_consistent": True, "relations": []}, + COL_ATTRIBUTE_FIDELITY_JUDGE: {"all_valid": True, "entities": []}, + } + + def fake_attach_ids(df: pd.DataFrame) -> pd.DataFrame: + if RECORD_ID_COLUMN in df.columns: + return df.copy() + out = df.copy() + out[RECORD_ID_COLUMN] = [f"id-{i}" for i in range(len(out))] + return out + + def fake_run_workflow(df: pd.DataFrame, *, columns, **_: object) -> WorkflowRunResult: + # Simulate the LLM successfully judging only the first row; + # the second row got dropped during the workflow. + kept = df.iloc[:1].copy() + for column in columns: + kept[column.name] = [judge_payload[column.name]] * len(kept) + dropped = FailedRecord(record_id="id-1", step="replace-judges", reason="parse error") + return WorkflowRunResult(dataframe=kept, failed_records=[dropped]) + + adapter = Mock() + adapter._attach_record_ids.side_effect = fake_attach_ids + adapter.run_workflow.side_effect = fake_run_workflow + + runner = ReplacementWorkflow( + detection_judge=DetectionJudgeWorkflow(adapter=adapter), + type_fidelity_judge=TypeFidelityJudgeWorkflow(adapter=adapter), + relational_consistency_judge=RelationalConsistencyJudgeWorkflow(adapter=adapter), + attribute_fidelity_judge=AttributeFidelityJudgeWorkflow(adapter=adapter), + adapter=adapter, + ) + result = runner.evaluate( + saved_trace, + replace_method=Substitute(), + model_configs=stub_model_configs, + selected_models=stub_evaluate_model_selection, + ) + + # Row count is preserved end-to-end. + assert len(result.dataframe) == 2 + # First row got a real verdict. + assert bool(result.dataframe[COL_DETECTION_VALID].iloc[0]) is True + # Second row (LLM-dropped) is surfaced as Unavailable, not dropped. + assert result.dataframe[COL_DETECTION_VALID].iloc[1] is None + assert result.dataframe[COL_TYPE_FIDELITY_VALID].iloc[1] is None + assert result.dataframe[COL_RELATIONAL_CONSISTENCY_VALID].iloc[1] is None + assert result.dataframe[COL_ATTRIBUTE_FIDELITY_VALID].iloc[1] is None + # The drop is still visible via failed_records for downstream observability. + assert len(result.failed_records) == 1 + assert result.failed_records[0].record_id == "id-1" + + +def test_runner_does_not_invoke_judges( + stub_model_configs: list[ModelConfig], + stub_replace_model_selection: ReplaceModelSelection, + stub_entities: list[dict], +) -> None: + """``ReplacementWorkflow.run()`` only does the replace step — never the judges. + + The judges live behind a separate ``evaluate()`` call. + """ + llm_workflow = Mock() + llm_workflow.generate_map_only.return_value = LlmReplaceResult( + dataframe=pd.DataFrame( + { + COL_TEXT: ["Alice works at Acme"], + COL_FINAL_ENTITIES: [{"entities": stub_entities}], + COL_REPLACEMENT_MAP: [ + { + "replacements": [ + {"original": "Alice", "label": "first_name", "synthetic": "Maya"}, + {"original": "Acme", "label": "organization", "synthetic": "NovaCorp"}, + ] + } + ], + } + ), + failed_records=[], + ) + detection_judge = Mock() + type_fidelity_judge = Mock() + relational_judge = Mock() + attribute_judge = Mock() + adapter = Mock() + runner = ReplacementWorkflow( + llm_workflow=llm_workflow, + detection_judge=detection_judge, + type_fidelity_judge=type_fidelity_judge, + relational_consistency_judge=relational_judge, + attribute_fidelity_judge=attribute_judge, + adapter=adapter, + ) + + result = runner.run( + pd.DataFrame({COL_TEXT: ["Alice works at Acme"], COL_FINAL_ENTITIES: [{"entities": []}]}), + replace_method=Substitute(), + model_configs=stub_model_configs, + selected_models=stub_replace_model_selection, + ) + + detection_judge.evaluate.assert_not_called() + type_fidelity_judge.evaluate.assert_not_called() + relational_judge.evaluate.assert_not_called() + attribute_judge.evaluate.assert_not_called() + adapter.run_workflow.assert_not_called() + for col in ( + COL_DETECTION_VALID, + COL_TYPE_FIDELITY_VALID, + COL_ATTRIBUTE_FIDELITY_VALID, + COL_RELATIONAL_CONSISTENCY_VALID, + ): + assert col not in result.dataframe.columns + assert result.dataframe[COL_REPLACED_TEXT].iloc[0] == "Maya works at NovaCorp" + + +def test_evaluate_raises_on_missing_required_columns( + stub_model_configs: list[ModelConfig], + stub_evaluate_model_selection: EvaluateModelSelection, +) -> None: + """``evaluate()`` rejects dataframes lacking the columns the judges need, + with a message that hints at the trace_dataframe workflow.""" + runner = ReplacementWorkflow( + detection_judge=DetectionJudgeWorkflow(adapter=Mock()), + type_fidelity_judge=TypeFidelityJudgeWorkflow(adapter=Mock()), + relational_consistency_judge=RelationalConsistencyJudgeWorkflow(adapter=Mock()), + attribute_fidelity_judge=AttributeFidelityJudgeWorkflow(adapter=Mock()), + adapter=Mock(), + ) + bare_df = pd.DataFrame({COL_TEXT: ["Alice"]}) # missing _entities_by_value and _replacement_map + with pytest.raises(ValueError, match="trace_dataframe"): + runner.evaluate( + bare_df, + replace_method=Substitute(), + model_configs=stub_model_configs, + selected_models=stub_evaluate_model_selection, + ) + + def test_apply_replacement_map_handles_string_map() -> None: dataframe = pd.DataFrame( { diff --git a/tests/interface/test_anonymizer_interface.py b/tests/interface/test_anonymizer_interface.py index 3428b667..f892285c 100644 --- a/tests/interface/test_anonymizer_interface.py +++ b/tests/interface/test_anonymizer_interface.py @@ -4,6 +4,7 @@ from __future__ import annotations from pathlib import Path +from types import SimpleNamespace from unittest.mock import Mock import pandas as pd @@ -428,7 +429,15 @@ def test_validate_config_raises_on_unknown_replace_alias_for_substitute( anonymizer._model_configs = stub_known_model_configs anonymizer._selected_models = stub_slim_model_selection anonymizer._selected_models = anonymizer._selected_models.model_copy( - update={"replace": ReplaceModelSelection(replacement_generator="bad-replace-alias")} + update={ + "replace": ReplaceModelSelection( + replacement_generator="bad-replace-alias", + detection_judge="known", + type_fidelity_judge="known", + relational_consistency_judge="known", + attribute_fidelity_judge="known", + ) + } ) with pytest.raises(InvalidConfigError, match="bad-replace-alias"): @@ -444,7 +453,15 @@ def test_validate_config_skips_replace_alias_for_non_substitute( anonymizer._model_configs = stub_known_model_configs anonymizer._selected_models = stub_slim_model_selection anonymizer._selected_models = anonymizer._selected_models.model_copy( - update={"replace": ReplaceModelSelection(replacement_generator="bad-replace-alias")} + update={ + "replace": ReplaceModelSelection( + replacement_generator="bad-replace-alias", + detection_judge="known", + type_fidelity_judge="known", + relational_consistency_judge="known", + attribute_fidelity_judge="known", + ) + } ) anonymizer.validate_config(stub_anonymizer_config) @@ -519,7 +536,15 @@ def test_run_raises_invalid_config_before_workflows( anonymizer._model_configs = stub_known_model_configs anonymizer._selected_models = stub_slim_model_selection anonymizer._selected_models = anonymizer._selected_models.model_copy( - update={"replace": ReplaceModelSelection(replacement_generator="bad-replace-alias")} + update={ + "replace": ReplaceModelSelection( + replacement_generator="bad-replace-alias", + detection_judge="known", + type_fidelity_judge="known", + relational_consistency_judge="known", + attribute_fidelity_judge="known", + ) + } ) with pytest.raises(InvalidConfigError, match="bad-replace-alias"): @@ -653,8 +678,30 @@ def test_validate_config_raises_on_unknown_replace_alias_in_rewrite_mode( anonymizer._model_configs = stub_known_model_configs anonymizer._selected_models = stub_slim_model_selection anonymizer._selected_models = anonymizer._selected_models.model_copy( - update={"replace": ReplaceModelSelection(replacement_generator="bad-replace-alias")} + update={ + "replace": ReplaceModelSelection( + replacement_generator="bad-replace-alias", + detection_judge="known", + type_fidelity_judge="known", + relational_consistency_judge="known", + attribute_fidelity_judge="known", + ) + } ) with pytest.raises(InvalidConfigError, match="bad-replace-alias"): anonymizer.validate_config(AnonymizerConfig(rewrite=Rewrite())) + + +def test_evaluate_raises_value_error_on_legacy_result_without_replace_method() -> None: + """A pickled result from before `replace_method` existed should surface the + actionable ValueError, not an AttributeError from the missing attribute.""" + anonymizer, _, _, _ = _make_anonymizer() + legacy_result = SimpleNamespace( + dataframe=pd.DataFrame(), + trace_dataframe=pd.DataFrame(), + resolved_text_column="text", + ) + + with pytest.raises(ValueError, match="replace_method"): + anonymizer.evaluate(legacy_result) # type: ignore[arg-type] diff --git a/tests/interface/test_display.py b/tests/interface/test_display.py index 8578763c..fddc6028 100644 --- a/tests/interface/test_display.py +++ b/tests/interface/test_display.py @@ -23,6 +23,7 @@ _build_replaced_entities, _normalize_replacement_map, _render_highlighted_text, + _verdict_badge, render_record_html, ) from anonymizer.interface.results import PreviewResult @@ -148,6 +149,33 @@ def test_normalize_replacement_map_non_dict_returns_empty() -> None: assert _normalize_replacement_map([1, 2, 3]) == [] +def test_verdict_badge_satisfied_when_all_correct_and_valid_true() -> None: + badge, rate = _verdict_badge(valid=True, correct=10, total=10) + assert "Satisfied" in badge and "Not" not in badge + assert "10/10" in rate + + +def test_verdict_badge_partial_for_mixed_count() -> None: + badge, _ = _verdict_badge(valid=False, correct=8, total=10) + assert "Partially Satisfied" in badge + + +def test_verdict_badge_unavailable_when_valid_none() -> None: + badge, rate = _verdict_badge(valid=None, correct=0, total=0) + assert "Unavailable" in badge + assert rate == "" + + +def test_verdict_badge_not_satisfied_when_valid_false_without_enumerated_failures() -> None: + """``valid is False`` with ``correct == total`` is an inconsistent LLM response + (the judge said it's invalid but didn't list specifics). The explicit boolean + must override the count so we don't render a misleading green badge.""" + badge, rate = _verdict_badge(valid=False, correct=10, total=10) + assert "Not Satisfied" in badge + assert "Satisfied" not in badge.replace("Not Satisfied", "") + assert "10/10" in rate + + @pytest.mark.parametrize( "payload_kind", ["dict_wrapper", "numpy_wrapped_dict_wrapper", "entities_schema"], @@ -227,6 +255,23 @@ def test_render_record_html_without_replacement_map() -> None: assert "No replacement map available" in result +def test_render_record_html_omits_detection_judge_section_when_judge_did_not_run() -> None: + """A preview/run without evaluation must not render an empty 'Detection Judge' + heading. The wrapper lives inside ``_render_detection_judge_section`` so the + whole block is omitted when ``COL_DETECTION_VALID`` is absent.""" + row = pd.Series( + { + "text": "Alice works here", + "text_replaced": "Bob works here", + COL_DETECTED_ENTITIES: {"entities": []}, + COL_REPLACEMENT_MAP: {}, + } + ) + result = render_record_html(row) + assert "Detection Judge" not in result + assert "Detection Validity" not in result + + def _make_preview(rows: int = 2) -> PreviewResult: df = pd.DataFrame( {