diff --git a/CLAUDE.md b/CLAUDE.md
index 5256b17..5eedad6 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -263,18 +263,39 @@ draw_content <- function(content, vp, gp = gpar(), content_just = "left") {
 same `match.arg(., c("left", "right", "centre"))` pattern as `caption_just` /
 `footnote_just` and supports per-page override via `x$content_just`.
 
-### Device lifecycle
+### Device lifecycle (D-48)
+
+Every `export_tfl()` call opens exactly **one** PDF device, which covers
+both pagination measurements and (in normal mode) the per-page draw loop.
 
 ```r
-export_tfl <- function(...) {
-  # validate first, before opening device
-  pdf(file, width = pg_width, height = pg_height)
-  on.exit(dev.off(), add = TRUE)
-  # loop
+export_tfl.<method> <- function(x, file, pg_width, pg_height,
+                                 page_num, preview, ...) {
+  .validate_export_args(page_num, preview, file)
+
+  # Normal mode: pdf(file).   Preview mode: pdf(NULL).
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
+  # on.exit(dev.off()) is registered on THIS frame by .open_metric_device(),
+  # so the device closes cleanly even on mid-pagination error.
+
+  # ... call *_to_pagelist() with a `text_dim_cache` env that
+  #     pagination populates and (in normal mode) drawing reuses ...
+
+  # Preview: close the transient pdf(NULL) so user's device is active.
+  if (!isFALSE(preview)) .close_metric_device(md)
+
+  # Drawing phase.  Reuses the same device in normal mode.
+  .export_tfl_pages(..., pdf_already_open = TRUE)
 }
 ```
 
-`on.exit(dev.off(), add = TRUE)` ensures the device closes even if a page errors.
+Internal measurement helpers (`compute_table_content_area`,
+`.resolve_natural_widths`, `.run_pagination_iter`,
+`.compute_col_min_widths`, `.compute_wrapped_widths`,
+`.height_balance_widths`, `.gt_grob_height`, `.rtables_lpp_cpp`) **require**
+an active device; a safety guard inside `.measure_text_dims_in()`
+(`dev.cur() == 1L` → `rlang::abort()`) catches regressions that violate
+this.
 
 ### Return value
 
diff --git a/DESCRIPTION b/DESCRIPTION
index d4ead32..60dd107 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,7 +17,6 @@ License: AGPL-3
 Encoding: UTF-8
 Depends: R (>= 4.1.0)
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.3
 Imports:
     checkmate,
     dplyr,
@@ -42,3 +41,4 @@ Suggests:
 VignetteBuilder: knitr
 Config/testthat/edition: 3
 URL: https://humanpred.github.io/writetfl/
+Config/roxygen2/version: 8.0.0
diff --git a/R/export_tfl.R b/R/export_tfl.R
index ace8aae..4ad3d2e 100644
--- a/R/export_tfl.R
+++ b/R/export_tfl.R
@@ -140,8 +140,11 @@ export_tfl.default <- function(
 ) {
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
   x <- coerce_x_to_pagelist(x)
-  .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 #' @export
@@ -156,9 +159,46 @@ export_tfl.tfl_table <- function(
 ) {
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+
+  # Open the metric device BEFORE pagination so measurement runs on the
+  # same device the drawing phase will use (normal mode) or a transient
+  # pdf(NULL) with matching settings (preview mode).  The helper
+  # registers on.exit on THIS frame, so a mid-pagination or mid-drawing
+  # error still closes the device cleanly.
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
+
+  # Cross-phase text-dimension cache.  Pagination populates it with
+  # (width, height) per (gp_key, string).  In PDF mode (preview = FALSE),
+  # pagination and drawing share `md$dev`, so cached values are
+  # authoritative for the render pass without re-measurement.  In preview
+  # mode, the user's render device differs from `md$dev`, so drawing
+  # gets a fresh empty cache and falls back to per-cell measurement --
+  # preserving today's preview behaviour exactly.
+  pagination_cache <- new.env(hash = TRUE, parent = emptyenv())
   x <- tfl_table_to_pagelist(x, pg_width = pg_width, pg_height = pg_height,
-                              dots = dots, page_num = page_num)
-  .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots)
+                              dots = dots, page_num = page_num,
+                              text_dim_cache = pagination_cache)
+
+  drawing_cache <- if (isFALSE(preview)) pagination_cache else
+    new.env(hash = TRUE, parent = emptyenv())
+
+  # Attach the drawing cache to every tfl_table grob in the pagelist so
+  # drawDetails can reach it.  Loops are O(n_pages); each assignment is
+  # a reference copy, not a data copy.
+  for (i in seq_along(x)) {
+    if (inherits(x[[i]]$content, "tfl_table_grob")) {
+      x[[i]]$content$text_dim_cache <- drawing_cache
+    }
+  }
+
+  # Preview mode: close the transient pagination device so the user's
+  # device is active for drawing.  The on.exit guard installed by
+  # `.open_metric_device()` will see this device already closed (via
+  # `.close_metric_device`'s idempotency check) and no-op.
+  if (!isFALSE(preview)) .close_metric_device(md)
+
+  .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 #' @export
@@ -174,6 +214,8 @@ export_tfl.list <- function(
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
 
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
+
   # Check if this is a list of gt_tbl objects
   all_gt <- length(x) > 0L &&
     all(vapply(x, inherits, logical(1L), "gt_tbl"))
@@ -215,7 +257,9 @@ export_tfl.list <- function(
       }
     }
   }
-  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 
@@ -234,9 +278,78 @@ export_tfl.list <- function(
   invisible(NULL)
 }
 
-# Render a list of page specs to PDF or the current device
+# Open the metric device for an export_tfl() call.  D-48 establishes
+# that one device covers both pagination measurements and (in normal
+# mode) drawing, rather than each measurement helper opening its own
+# scratch device.
+#
+# Normal mode (`isFALSE(preview)`): opens `grDevices::pdf(file)` -- the
+# final output PDF.  Pagination uses it for `convertWidth` / `grobWidth`
+# resolution; subsequent drawing reuses the same device.
+#
+# Preview mode: opens a transient `grDevices::pdf(NULL)` so pagination
+# uses identical PDF font metrics to normal mode (preserving today's
+# pagination decisions).  The caller is responsible for invoking
+# `.close_metric_device()` AFTER pagination so the user's pre-existing
+# device becomes active again for drawing.
+#
+# Safety:
+# * The helper registers an `on.exit()` handler on the CALLER's frame
+#   (`envir`) so any error during pagination or drawing still closes
+#   the device.  Without that, an interrupted run would leak the
+#   device; running export_tfl() again would then open another and
+#   eventually exhaust the per-session limit of 64.
+# * `.close_metric_device()` is idempotent: calling it explicitly in
+#   preview mode and then letting `on.exit` run is harmless because
+#   the second call sees a different `dev.cur()` and no-ops.
+#
+# @keywords internal
+.open_metric_device <- function(file, pg_width, pg_height, preview,
+                                 envir = parent.frame()) {
+  if (isFALSE(preview)) {
+    grDevices::pdf(file, width = pg_width, height = pg_height)
+  } else {
+    grDevices::pdf(NULL, width = pg_width, height = pg_height)
+  }
+  dev <- grDevices::dev.cur()
+  md  <- list(dev = dev)
+  # Register on.exit on the caller's frame so the device closes even
+  # if the caller errors out mid-execution.  bquote inlines `dev` so
+  # the on.exit body does not need to reach back to `md`.
+  do.call("on.exit",
+          list(bquote({
+            if (grDevices::dev.cur() == .(dev)) grDevices::dev.off()
+          }), add = TRUE),
+          envir = envir)
+  md
+}
+
+# Close a metric device opened by `.open_metric_device()`.
+#
+# Idempotent: a second call (or a call when the device has already been
+# closed by something else) is a no-op.  Idempotency matters because
+# preview-mode callers close explicitly after pagination AND register
+# the same close via the helper's `on.exit` handler.
+#
+# @keywords internal
+.close_metric_device <- function(md) {
+  if (!is.null(md$dev) && grDevices::dev.cur() == md$dev) {
+    grDevices::dev.off()
+  }
+  invisible(NULL)
+}
+
+# Render a list of page specs to PDF or the current device.
+#
+# `pdf_already_open` signals that the CALLER has already opened the
+# render device (via `.open_metric_device()`) and owns its lifecycle.
+# In that case this function skips its own `pdf()` open / on.exit
+# close and just iterates pages.  When the caller does not pass the
+# flag (e.g. `export_tfl.default()` for ggplot pages), the legacy
+# self-open path is preserved.
 .export_tfl_pages <- function(pages, file, pg_width, pg_height,
-                               page_num, preview, dots) {
+                               page_num, preview, dots,
+                               pdf_already_open = FALSE) {
   n <- length(pages)
 
   # ------------------------------------------------------------------
@@ -263,8 +376,10 @@ export_tfl.list <- function(
   # ------------------------------------------------------------------
   # Normal mode: write PDF
   # ------------------------------------------------------------------
-  grDevices::pdf(file, width = pg_width, height = pg_height)
-  on.exit(grDevices::dev.off(), add = TRUE)
+  if (!pdf_already_open) {
+    grDevices::pdf(file, width = pg_width, height = pg_height)
+    on.exit(grDevices::dev.off(), add = TRUE)
+  }
 
   for (i in seq_along(pages)) {
     page_args <- build_page_args(pages[[i]], dots, page_num, i, n)
diff --git a/R/flextable.R b/R/flextable.R
index c0b0d22..da5ffcf 100644
--- a/R/flextable.R
+++ b/R/flextable.R
@@ -25,8 +25,11 @@ export_tfl.flextable <- function(
   rlang::check_installed("flextable", reason = "to export flextable tables")
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
   pages <- flextable_to_pagelist(x, pg_width, pg_height, dots, page_num)
-  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 #' Convert a flextable object to a list of page specification lists
diff --git a/R/ggtibble.R b/R/ggtibble.R
index b34180f..4a3191f 100644
--- a/R/ggtibble.R
+++ b/R/ggtibble.R
@@ -20,10 +20,13 @@ export_tfl.ggtibble <- function(
 ) {
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
   x <- ggtibble_to_pagelist(x, sub_tfl = sub_tfl, sub_tfl_sep = sub_tfl_sep,
                             sub_tfl_collapse = sub_tfl_collapse,
                             sub_tfl_prefix = sub_tfl_prefix)
-  .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 # Page-spec arg names recognised on a ggtibble row.
diff --git a/R/gt.R b/R/gt.R
index 3374c7f..1ec7327 100644
--- a/R/gt.R
+++ b/R/gt.R
@@ -24,8 +24,11 @@ export_tfl.gt_tbl <- function(
   rlang::check_installed("gt", reason = "to export gt tables")
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
   pages <- gt_to_pagelist(x, pg_width, pg_height, dots, page_num)
-  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 #' Convert a gt_tbl object to a list of page specification lists
@@ -176,19 +179,20 @@ gt_to_pagelist <- function(gt_obj, pg_width = 11, pg_height = 8.5,
   dims$height
 }
 
-#' Measure a gt grob's height in a scratch device
+#' Measure a gt grob's height
+#'
+#' D-48: requires an active graphics device with matching page
+#' dimensions; `export_tfl.gt_tbl()` opens the metric device via
+#' `.open_metric_device()` before invoking the pagelist conversion
+#' pipeline, so `convertHeight()` here resolves against that device's
+#' font metrics.
 #'
 #' @param grob A gtable grob from [gt::as_gtable()].
-#' @param pg_width,pg_height Page dimensions for the scratch device.
+#' @param pg_width,pg_height Page dimensions (advisory; the active
+#'   metric device's dimensions are what `convertHeight` uses).
 #' @return Numeric scalar: grob height in inches.
 #' @keywords internal
 .gt_grob_height <- function(grob, pg_width, pg_height) {
-  scratch <- tempfile(fileext = ".pdf")
-  grDevices::pdf(scratch, width = pg_width, height = pg_height)
-  on.exit({
-    grDevices::dev.off()
-    unlink(scratch)
-  })
   grid::convertHeight(grid::grobHeight(grob), "inches", valueOnly = TRUE)
 }
 
diff --git a/R/rtables.R b/R/rtables.R
index 04339ff..5d8a70e 100644
--- a/R/rtables.R
+++ b/R/rtables.R
@@ -22,8 +22,11 @@ export_tfl.VTableTree <- function(
   rlang::check_installed("rtables", reason = "to export rtables tables")
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
   pages <- rtables_to_pagelist(x, pg_width, pg_height, dots, page_num)
-  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 #' Convert a VTableTree object to a list of page specification lists
@@ -207,19 +210,17 @@ rtables_to_pagelist <- function(rt_obj, pg_width = 11, pg_height = 8.5,
   line_h_in <- (font_size / 72) * lineheight
   lpp <- floor(content_h / line_h_in)
 
-  # Character width: measure "M" in the target font using a scratch device
-  scratch <- tempfile(fileext = ".pdf")
-  grDevices::pdf(scratch, width = 10, height = 10)
-  on.exit({
-    grDevices::dev.off()
-    unlink(scratch)
-  })
+  # Character width: measure "M" in the target font.  D-48: relies on
+  # the metric device opened upstream by `.open_metric_device()`
+  # rather than opening a scratch PDF here.  The viewport is pushed
+  # and popped on exit so an error mid-measurement does not leave the
+  # font-context viewport on the stack.
   grid::pushViewport(grid::viewport(
     gp = grid::gpar(fontfamily = font_family, fontsize = font_size)
   ))
+  on.exit(grid::popViewport(), add = TRUE)
   char_w_in <- grid::convertWidth(grid::stringWidth("M"), "inches",
                                   valueOnly = TRUE)
-  grid::popViewport()
 
   cpp <- floor(content_w / char_w_in)
 
diff --git a/R/table1.R b/R/table1.R
index ad45ed5..685e1e4 100644
--- a/R/table1.R
+++ b/R/table1.R
@@ -21,8 +21,11 @@ export_tfl.table1 <- function(
   rlang::check_installed("flextable", reason = "to export table1 tables")
   dots <- list(...)
   .validate_export_args(page_num, preview, file)
+  md <- .open_metric_device(file, pg_width, pg_height, preview)
   pages <- table1_to_pagelist(x, pg_width, pg_height, dots, page_num)
-  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots)
+  if (!isFALSE(preview)) .close_metric_device(md)
+  .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots,
+                    pdf_already_open = TRUE)
 }
 
 #' Convert a table1 object to a list of page specification lists
diff --git a/R/table_columns.R b/R/table_columns.R
index 80cadef..09ca272 100644
--- a/R/table_columns.R
+++ b/R/table_columns.R
@@ -96,14 +96,16 @@ compute_col_widths <- function(resolved_cols, data, content_width_in,
                                tbl, pg_width, pg_height, margins,
                                overflow_action   = c("error", "warn"),
                                validate_overflow = TRUE,
-                               floor_overrides   = NULL) {
+                               floor_overrides   = NULL,
+                               cache             = NULL) {
   overflow_action <- match.arg(overflow_action)
   strategy <- tbl$col_split_strategy %||% "balanced"
 
   # Shared setup: compute natural widths, resolve relative weights,
   # auto-detect wrap eligibility, and measure col_cont_label_half_w.
   setup <- .resolve_natural_widths(
-    resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins
+    resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins,
+    cache = cache
   )
 
   # Dispatch.  Each strategy returns list(resolved_cols, col_groups).
@@ -162,7 +164,8 @@ compute_col_widths <- function(resolved_cols, data, content_width_in,
 # The scratch device used for text measurement is opened, used, and
 # closed inside this function so neither strategy has to manage it.
 .resolve_natural_widths <- function(resolved_cols, data, content_width_in,
-                                     tbl, pg_width, pg_height, margins) {
+                                     tbl, pg_width, pg_height, margins,
+                                     cache = NULL) {
   n_cols    <- length(resolved_cols)
   n_grp     <- length(tbl$group_vars)
   min_in    <- .width_in(tbl$min_col_width)
@@ -171,16 +174,16 @@ compute_col_widths <- function(resolved_cols, data, content_width_in,
                .width_in(cell_pad[["left"]])
   na_str    <- tbl$na_string
   max_rows  <- tbl$max_measure_rows
+  hdr_gp_key  <- paste0("header_row_lh", tbl$line_height)
 
-  scratch_file <- tempfile(fileext = ".pdf")
-  grDevices::pdf(scratch_file, width = pg_width, height = pg_height)
+  # D-48: relies on the metric device opened upstream by
+  # `.open_metric_device()` rather than opening a scratch PDF here.
+  # The outer viewport is still pushed so width conversions resolve
+  # against the post-margin content area; popped on exit so an error
+  # inside the measurement loop does not leave it on the stack.
   outer_vp <- .make_outer_vp(margins)
   grid::pushViewport(outer_vp)
-  on.exit({
-    grid::popViewport()                  # nocov
-    grDevices::dev.off()                 # nocov
-    unlink(scratch_file)                 # nocov
-  }, add = TRUE)
+  on.exit(grid::popViewport(), add = TRUE)                # nocov
 
   widths_in <- vapply(seq_len(n_cols), function(j) {
     cs <- resolved_cols[[j]]
@@ -198,8 +201,12 @@ compute_col_widths <- function(resolved_cols, data, content_width_in,
         .resolve_table_gp(tbl$gp, "header_row"), tbl$line_height
       )
       parts  <- .split_col_strings(data[[cs$col]], cs$label, na_str, max_rows)
-      w_data <- .measure_max_string_width(parts$data, cell_gp)
-      w_hdr  <- .measure_max_string_width(parts$header, hdr_gp)
+      cell_key <- paste0(if (cs$is_group_col) "group_col" else "data_row",
+                         "_lh", tbl$line_height)
+      w_data <- .measure_max_string_width(parts$data,   cell_gp,
+                                          gp_key = cell_key, cache = cache)
+      w_hdr  <- .measure_max_string_width(parts$header, hdr_gp,
+                                          gp_key = hdr_gp_key, cache = cache)
       max(min_in, max(w_data, w_hdr) + h_pad_in)
     }
   }, numeric(1L))
@@ -213,11 +220,11 @@ compute_col_widths <- function(resolved_cols, data, content_width_in,
     0
   }
 
-  # Close the scratch device now so strategy functions can open their own
-  # without nested-device complications.
+  # Pop the outer viewport now so strategy functions push their own
+  # viewports on a clean stack.  No device to close anymore -- the
+  # metric device opened upstream by `.open_metric_device()` stays
+  # open for subsequent measurement work.
   grid::popViewport()
-  grDevices::dev.off()
-  unlink(scratch_file)
   on.exit(NULL)
 
   # Resolve relative weights.
diff --git a/R/table_draw.R b/R/table_draw.R
index 197c78a..4498816 100644
--- a/R/table_draw.R
+++ b/R/table_draw.R
@@ -66,7 +66,8 @@ build_table_grob <- function(row_page, col_group_idx, n_group_cols,
                              cell_heights_in_mat = NULL,
                              cont_row_h_in       = NULL,
                              is_first_col_page   = TRUE,
-                             is_last_col_page    = TRUE) {
+                             is_last_col_page    = TRUE,
+                             clip_width_caches   = NULL) {
   # Subset to display columns for this page
   page_cols <- resolved_cols[col_group_idx]
 
@@ -81,6 +82,11 @@ build_table_grob <- function(row_page, col_group_idx, n_group_cols,
     cont_row_h_in       = cont_row_h_in,        # cached from paginate phase
     is_first_col_page   = is_first_col_page,    # FALSE when prior col pages exist
     is_last_col_page    = is_last_col_page,     # FALSE when more col pages follow
+    clip_width_caches   = clip_width_caches,    # shared across all pages of
+                                                # this tfl_table; NULL means
+                                                # drawDetails will create
+                                                # per-page envs (e.g. grobs
+                                                # assembled outside pipeline)
     cl                  = "tfl_table_grob"
   )
 }
@@ -242,6 +248,17 @@ drawDetails.tfl_table_grob <- function(x, recording) {
 
   y_cursor <- 0   # distance from top in inches
 
+  # Cross-phase text-dim cache attached by export_tfl.tfl_table().  In
+  # PDF mode it is already populated by pagination with both width and
+  # height for every cell; drawing's width lookups become pure env
+  # reads.  In preview mode the supplied cache is an empty env, so
+  # lookups miss and .draw_cell_text() falls through to the per-column
+  # width_cache (D-46) and fresh measurement.  When grobs are built
+  # outside the normal export_tfl() pipeline (no cache attached),
+  # NULL means "skip the cross-phase lookup entirely".  Hoisted above
+  # the header-row block so both header and data-row paths use it.
+  text_dim_cache <- x$text_dim_cache
+
   # Draw column header row
   if (tbl$show_col_names) {
     # Header row background fill
@@ -261,7 +278,8 @@ drawDetails.tfl_table_grob <- function(x, recording) {
     .draw_header_row(page_cols, col_x_left, col_x_right, col_widths_in,
                      y_cursor, header_row_h, vp_w, vp_h,
                      h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh,
-                     breaks = breaks)
+                     breaks         = breaks,
+                     text_dim_cache = text_dim_cache)
     y_cursor <- y_cursor + header_row_h
 
     # Column header rule — spans table width only
@@ -299,13 +317,36 @@ drawDetails.tfl_table_grob <- function(x, recording) {
     .gp_with_lineheight(.resolve_table_cell_gp(gp_tbl, cs$is_group_col), lh)
   })
 
+  # Per-column gp_key matching what the pagination phase used in
+  # text_dim_cache (table_utils.R `.measure_text_dims_in` namespace).
+  # Lets .draw_cell_text() look up pagination-cached widths without
+  # hashing the gpar field-by-field.
+  gp_key_by_col <- vapply(page_cols, function(cs) {
+    paste0(if (cs$is_group_col) "group_col" else "data_row", "_lh", lh)
+  }, character(1L))
+
   # Per-column clip-width memo: many cells in a column share identical text
-  # (numeric formats like "5.1", category labels), and the clip-width
-  # computation otherwise re-measures each one.  Cache scoped to this
-  # drawDetails call and to one column, so a single (text -> width) key is
-  # enough.
-  clip_width_cache_by_col <- lapply(page_cols, function(cs) {
-    new.env(hash = TRUE, parent = emptyenv())
+  # (numeric formats like "5.1", category labels).  When the parent
+  # tfl_table_to_pagelist() built a shared cache list, reuse those envs --
+  # they persist across every row-page and col-group of this table, so the
+  # cache hits accumulate over all pages.  Otherwise fall back to per-page
+  # envs (e.g. for grobs built outside the normal pipeline).
+  #
+  # Kept as a second-tier cache (after text_dim_cache) so that strings
+  # produced by .wrap_string() at draw time -- which pagination never saw
+  # in their wrapped form -- still get cached per column for repeats
+  # across rows / pages.
+  clip_width_cache_by_col <- if (!is.null(x$clip_width_caches)) {
+    lapply(x$col_group_idx, function(k) x$clip_width_caches[[k]])
+  } else {
+    lapply(page_cols, function(cs) new.env(hash = TRUE, parent = emptyenv()))
+  }
+
+  # Pre-extract and pre-format each column's cell strings for the rows on
+  # this page.  Replaces a per-cell `.fmt_cell(data[[cs$col]][i], na_str)`
+  # with a single vectorised `.fmt_cell_vec()` per column.
+  cell_strs_by_col <- lapply(page_cols, function(cs) {
+    .fmt_cell_vec(data[[cs$col]][rows], na_str)
   })
 
   # Hoist row/group-rule gpars too -- they don't change between rows.  Only
@@ -358,9 +399,8 @@ drawDetails.tfl_table_grob <- function(x, recording) {
 
     # Draw data row
     for (j in seq_len(n_disp_cols)) {
-      cs      <- page_cols[[j]]
-      raw_val <- data[[cs$col]][i]
-      cell_str <- .fmt_cell(raw_val, na_str)
+      cs       <- page_cols[[j]]
+      cell_str <- cell_strs_by_col[[j]][[ri]]
 
       # Group repeat suppression and span detection
       clip_h <- row_h
@@ -399,7 +439,9 @@ drawDetails.tfl_table_grob <- function(x, recording) {
                       y_cursor, clip_h, vp_w, vp_h,
                       h_lft_in, h_rgt_in, v_top_in,
                       cell_gp, cs$width_in,
-                      width_cache = clip_width_cache_by_col[[j]])
+                      width_cache    = clip_width_cache_by_col[[j]],
+                      text_dim_cache = text_dim_cache,
+                      gp_key         = gp_key_by_col[[j]])
     }
 
     y_cursor <- y_cursor + row_h
@@ -505,8 +547,13 @@ drawDetails.tfl_table_grob <- function(x, recording) {
 .draw_header_row <- function(page_cols, col_x_left, col_x_right, col_widths_in,
                               y_top_in, row_h, vp_w, vp_h,
                               h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh,
-                              breaks = NULL) {
+                              breaks         = NULL,
+                              text_dim_cache = NULL) {
   hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), lh)
+  # Single gp_key for the whole header row -- all cells share hdr_gp, so
+  # pagination's `.measure_header_row_height()` populated text_dim_cache
+  # under this same key (table_utils.R:99).
+  hdr_gp_key <- paste0("header_row_lh", lh)
   for (j in seq_along(page_cols)) {
     cs    <- page_cols[[j]]
     label <- cs$label
@@ -518,7 +565,9 @@ drawDetails.tfl_table_grob <- function(x, recording) {
                     col_x_left[[j]], col_x_right[[j]],
                     y_top_in, row_h, vp_w, vp_h,
                     h_lft_in, h_rgt_in, v_top_in,
-                    hdr_gp, cs$width_in)
+                    hdr_gp, cs$width_in,
+                    text_dim_cache = text_dim_cache,
+                    gp_key         = hdr_gp_key)
   }
 }
 
@@ -551,10 +600,25 @@ drawDetails.tfl_table_grob <- function(x, recording) {
 }
 
 # Draw a single cell's text
+#
+# `text_dim_cache` (the optional cross-phase cache populated during
+# pagination) is consulted FIRST under the `gp_key` namespace.  D-47
+# established that PDF scratch devices and the render PDF share font
+# metrics for matching `(pg_width, pg_height)`, so cached widths are
+# authoritative.  Misses fall through to the per-column `width_cache`
+# (D-46) and finally to a fresh measurement via `.measure_text_width_in`.
+#
+# Preview mode is preserved exactly: `export_tfl.tfl_table()` attaches
+# an empty `text_dim_cache` to grobs in preview mode, so every lookup
+# misses and the function falls through to per-cell measurement on the
+# user's render device (the pre-D-48 path).
 .draw_cell_text <- function(text, align, x_left, x_right,
                              y_top_in, row_h, vp_w, vp_h,
                              h_lft_in, h_rgt_in, v_top_in,
-                             gp, col_width_in, width_cache = NULL) {
+                             gp, col_width_in,
+                             width_cache    = NULL,
+                             text_dim_cache = NULL,
+                             gp_key         = NULL) {
   if (nchar(text) == 0L) return(invisible(NULL))
 
   y_npc <- 1 - (y_top_in + v_top_in) / vp_h
@@ -571,29 +635,58 @@ drawDetails.tfl_table_grob <- function(x, recording) {
     just  <- c("centre", "top")
   }
 
-  # Re-measure text width in the current (rendering) device using the
-  # actual rendering gpar (grid::stringWidth() picks up only the active
-  # viewport's gp, which is wrong when `gp` is, e.g., a bold header
-  # gpar and the active vp is regular weight).  This corrects font-metric
-  # variance between the PDF scratch device used for column-width
-  # measurement and the device used for actual rendering (e.g. a PNG
-  # device in knitr / RStudio preview mode).
+  # Width lookup: text_dim_cache (cross-phase, populated by pagination)
+  # -> width_cache (per-column, D-46) -> fresh measurement.
   #
-  # Important: cap the clip width at a small tolerance past `col_width_in`
-  # so a column that is genuinely too narrow for its content (user set a
-  # fixed width below the longest unbreakable token, or a bold header
-  # whose measured width exceeded the regular-weight column-width pass)
-  # cannot bleed text into the neighboring column and hide its content.
-  # Anything past the tolerance gets visually clipped at the column edge,
-  # which is a far less destructive failure mode than overlap.
-  # Measurement is identical for repeated cell text within one drawDetails
-  # call; the optional per-column cache lets the caller deduplicate.
-  text_w <- .measure_text_width_in(text, gp, width_cache)
+  # Cap the clip width at a small tolerance past `col_width_in` so a
+  # column genuinely too narrow for its content cannot bleed text into
+  # the neighboring column and hide its content.  Anything past the
+  # tolerance gets visually clipped at the column edge -- a far less
+  # destructive failure mode than overlap.
+  text_w <- NA_real_
+  if (!is.null(text_dim_cache) && !is.null(gp_key)) {
+    key <- paste0(gp_key, "\x01", text)
+    if (exists(key, envir = text_dim_cache, inherits = FALSE)) {
+      hit <- get(key, envir = text_dim_cache, inherits = FALSE)
+      text_w <- hit$w
+    }
+  }
+  if (is.na(text_w)) {
+    text_w <- .measure_text_width_in(text, gp, width_cache)
+  }
   needed <- text_w + h_lft_in + h_rgt_in
-  bleed_tol_in <- 0.05
-  clip_w <- min(col_width_in + bleed_tol_in, max(col_width_in, needed))
 
-  # Clip to column width by using a clipping viewport
+  # X position in parent-viewport inches.
+  x_in <- if (identical(align, "left")) {
+    x_left + h_lft_in
+  } else if (identical(align, "right")) {
+    x_right - h_rgt_in
+  } else {
+    (x_left + x_right) / 2
+  }
+  y_in <- y_top_in + v_top_in
+
+  if (needed <= col_width_in) {
+    # Fast path: the text fits inside its column, so the column width
+    # already clips by construction.  Draw directly into the parent
+    # viewport and skip the per-cell clip viewport push/pop entirely --
+    # for tables of all-fits cells (numeric columns, short categoricals)
+    # this saves a viewport per cell.
+    grid::grid.text(
+      label = text,
+      x     = grid::unit(x_in / vp_w, "npc"),
+      y     = grid::unit(1 - y_in / vp_h, "npc"),
+      just  = just,
+      gp    = gp
+    )
+    return(invisible(NULL))
+  }
+
+  # Slow path: text exceeds the column.  Push a clipping viewport so the
+  # overflow is clipped at the column edge plus a small tolerance,
+  # instead of bleeding into the neighbour.
+  bleed_tol_in <- 0.05
+  clip_w <- min(col_width_in + bleed_tol_in, needed)
   vp_clip <- grid::viewport(
     x      = grid::unit(x_left / vp_w, "npc"),
     y      = grid::unit(1 - (y_top_in + row_h) / vp_h, "npc"),
@@ -604,18 +697,17 @@ drawDetails.tfl_table_grob <- function(x, recording) {
   )
   grid::pushViewport(vp_clip)
 
-  # Re-express x, y relative to clip viewport
-  vp_w2 <- .width_in(grid::unit(1, "npc"))
-  vp_h2 <- .height_in(grid::unit(1, "npc"))
-
+  # vp_clip was just constructed with width = clip_w inches and
+  # height = row_h inches, so npc=1 inside it is exactly that many inches --
+  # no need to convertUnit() to recover those numbers.
   x_local_in <- if (identical(align, "left")) {
     h_lft_in
   } else if (identical(align, "right")) {
-    vp_w2 - h_rgt_in
+    clip_w - h_rgt_in
   } else {
-    vp_w2 / 2
+    clip_w / 2
   }
-  y_local_in <- vp_h2 - v_top_in
+  y_local_in <- row_h - v_top_in
 
   grid::grid.text(
     label = text,
diff --git a/R/table_pagelist.R b/R/table_pagelist.R
index bb2583f..0da1437 100644
--- a/R/table_pagelist.R
+++ b/R/table_pagelist.R
@@ -35,14 +35,24 @@
 #' @param pg_width,pg_height Page dimensions in inches.
 #' @param dots The `list(...)` from [export_tfl()].
 #' @param page_num Glue template string for page numbering.
+#' @param text_dim_cache Optional environment used as the pagination-phase
+#'   text-dimension cache.  When supplied, the same env is reused instead
+#'   of allocating one locally, so the caller (`export_tfl()`) can later
+#'   reuse its entries during the drawing phase by attaching the env to
+#'   the table grobs.  When `NULL` (the default), a fresh env is
+#'   allocated and discarded after pagination completes -- equivalent to
+#'   the pre-D-48 behaviour.
 #' @return A list of page spec lists, each with at least `$content` (a grob).
 #' @keywords internal
 tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
-                                   page_num = "Page {i} of {n}") {
+                                   page_num       = "Page {i} of {n}",
+                                   text_dim_cache = NULL) {
   if (is.null(tbl$sub_tfl)) {
-    .tfl_table_to_pagelist_default(tbl, pg_width, pg_height, dots, page_num)
+    .tfl_table_to_pagelist_default(tbl, pg_width, pg_height, dots, page_num,
+                                    text_dim_cache = text_dim_cache)
   } else {
-    .tfl_table_to_pagelist_sub_tfl(tbl, pg_width, pg_height, dots, page_num)
+    .tfl_table_to_pagelist_sub_tfl(tbl, pg_width, pg_height, dots, page_num,
+                                    text_dim_cache = text_dim_cache)
   }
 }
 
@@ -53,7 +63,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
 # measurement pipeline; recursion handles that naturally.
 #' @keywords internal
 .tfl_table_to_pagelist_sub_tfl <- function(tbl, pg_width, pg_height, dots,
-                                            page_num) {
+                                            page_num,
+                                            text_dim_cache = NULL) {
   groups <- .compute_sub_tfl_groups(tbl$data, tbl$sub_tfl)
   pages  <- list()
   for (g in groups) {
@@ -69,8 +80,12 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
     sub_dots$caption <- .apply_sub_tfl_caption(dots$caption, suffix,
                                                tbl$sub_tfl_prefix)
 
+    # Share the same cache across all sub-groups: identical (gp_key, string)
+    # pairs appear in every sub-table for repeating categoricals and the
+    # cache keeps adding entries instead of being thrown away per group.
     sub_pages <- tfl_table_to_pagelist(sub_tbl, pg_width, pg_height,
-                                       sub_dots, page_num)
+                                       sub_dots, page_num,
+                                       text_dim_cache = text_dim_cache)
     sub_pages <- lapply(sub_pages, .attach_page_caption,
                         caption = sub_dots$caption)
     pages <- c(pages, sub_pages)
@@ -88,7 +103,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
 
 #' @keywords internal
 .tfl_table_to_pagelist_default <- function(tbl, pg_width, pg_height, dots,
-                                            page_num) {
+                                            page_num,
+                                            text_dim_cache = NULL) {
   # --- Step 1: Extract layout args from dots ---
   # Use explicit NULL checks instead of %||% for arguments that can legitimately
 
@@ -150,6 +166,23 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
   wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) {
     .height_in(tbl$wrap_extra_padding)
   } else 0
+
+  # Per-pagination text-dimension cache.  Spans the natural-width pass,
+  # the row-height pass, and the header-height pass.  Each (gp_key, string)
+  # is constructed via grid::textGrob exactly once; both dimensions are read
+  # from that grob and cached together.  Different passes typically need
+  # only one dimension, so the cache amortises the other for free.  All
+  # those passes open PDF scratch devices with the same pg_width/pg_height
+  # so font metrics are stable across the cache's lifetime.
+  #
+  # When `export_tfl()` supplies a cache, the same env is reused so its
+  # entries survive past pagination and become available to the drawing
+  # phase via the grob's $text_dim_cache slot.  D-47 documented this as
+  # safe across PDF scratch devices with matching settings; D-48 extends
+  # the argument to span the render device.
+  if (is.null(text_dim_cache)) {
+    text_dim_cache <- new.env(hash = TRUE, parent = emptyenv())
+  }
   strategy        <- tbl$col_split_strategy %||% "balanced"
   max_retries     <- as.integer(tbl$row_overflow_max_retries %||% 5L)
   use_retry_loop  <- identical(strategy, "balanced") && max_retries > 0L
@@ -158,34 +191,30 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
   names(floor_overrides) <- character(0L)
   retries         <- 0L
 
-  # Per-iteration helper: opens a fresh row-height scratch device, runs
-  # the measurement + pagination phase, closes the scratch device, and
-  # returns (row_pages, cell_h_mat, cont_row_h).  The scratch device's
-  # lifecycle is fully contained inside this helper so the surrounding
-  # retry loop never holds a viewport across a compute_col_widths()
-  # call (compute_col_widths() opens its own scratch devices internally).
+  # Per-iteration helper: runs the row-height measurement + pagination
+  # phase under a fresh outer viewport.  D-48: relies on the metric
+  # device opened by `.open_metric_device()` upstream rather than
+  # opening its own scratch PDF.  Returns (row_pages, cell_h_mat,
+  # cont_row_h).  The viewport is popped on exit so an error inside
+  # paginate_rows() does not leave it on the stack.
   .run_pagination_iter <- function(resolved_cols, collect_overflows) {
-    scratch_file_rh <- tempfile(fileext = ".pdf")
-    grDevices::pdf(scratch_file_rh, width = pg_width, height = pg_height)
     rh_outer_vp <- .make_outer_vp(margins)
     grid::pushViewport(rh_outer_vp)
-    on.exit({
-      grid::popViewport()
-      grDevices::dev.off()
-      unlink(scratch_file_rh)
-    }, add = TRUE)
+    on.exit(grid::popViewport(), add = TRUE)
 
     header_row_h <- if (tbl$show_col_names) {
       .measure_header_row_height(resolved_cols, tbl$gp, tbl$cell_padding,
                                  tbl$line_height, breaks = breaks,
-                                 wrap_extra_pad_in = wrap_extra_pad_in)
+                                 wrap_extra_pad_in = wrap_extra_pad_in,
+                                 cache = text_dim_cache)
     } else 0
 
     cell_h_mat <- measure_row_heights_tbl(
       tbl$data, resolved_cols, tbl$gp, tbl$cell_padding,
       tbl$na_string, tbl$line_height, tbl$max_measure_rows,
       breaks = breaks,
-      wrap_extra_pad_in = wrap_extra_pad_in
+      wrap_extra_pad_in = wrap_extra_pad_in,
+      cache = text_dim_cache
     )
 
     cont_row_h <- max(
@@ -217,7 +246,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
     col_result <- compute_col_widths(
       resolved_cols_0, tbl$data, cw, tbl, pg_width, pg_height, margins,
       overflow_action = overflow_action,
-      floor_overrides = floor_overrides
+      floor_overrides = floor_overrides,
+      cache           = text_dim_cache
     )
     resolved_cols <- col_result$resolved_cols
     col_groups    <- col_result$col_groups
@@ -239,7 +269,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
         resolved_cols_0, tbl$data, cw_adj, tbl, pg_width, pg_height, margins,
         overflow_action   = overflow_action,
         validate_overflow = FALSE,
-        floor_overrides   = floor_overrides
+        floor_overrides   = floor_overrides,
+        cache             = text_dim_cache
       )
       resolved_cols <- col_result$resolved_cols
       col_groups    <- col_result$col_groups
@@ -287,6 +318,15 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
   pages <- vector("list", n_rp * n_cg)
   idx   <- 1L
 
+  # Shared per-resolved_cols clip-width cache.  Every page-grob built below
+  # holds a reference to the SAME list of envs, so .draw_cell_text() can
+  # reuse measurements across pages of the same tfl_table (one entry per
+  # unique cell text per column).  Envs are reference-typed, so memory is
+  # not duplicated per grob.
+  clip_width_caches <- lapply(seq_along(resolved_cols), function(k) {
+    new.env(hash = TRUE, parent = emptyenv())
+  })
+
   for (rp in seq_len(n_rp)) {
     for (cg in seq_len(n_cg)) {
       grob <- build_table_grob(
@@ -298,7 +338,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
         cell_heights_in_mat  = cell_h_mat,
         cont_row_h_in        = cont_row_h,
         is_first_col_page    = (cg == 1L),
-        is_last_col_page     = (cg == n_cg)
+        is_last_col_page     = (cg == n_cg),
+        clip_width_caches    = clip_width_caches
       )
       page_spec <- list(content = grob)
       pages[[idx]] <- page_spec
@@ -315,19 +356,23 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
 
 #' Compute available content area for a tfl_table page
 #'
-#' Opens a scratch device, measures annotation section heights using the
-#' same infrastructure as export_tfl_page(), and returns available width and
-#' height in inches.
+#' Measures annotation section heights using the same infrastructure as
+#' export_tfl_page() and returns available width and height in inches.
+#'
+#' D-48: requires an active graphics device with matching page
+#' dimensions; the caller (`.tfl_table_to_pagelist_default()`) runs
+#' inside the metric device opened by `.open_metric_device()` in the
+#' S3 dispatcher, so font metrics here equal those used at draw time
+#' (normal mode) or those of the same pdf(NULL) used for the rest of
+#' pagination (preview mode).
 #'
 #' @keywords internal
 compute_table_content_area <- function(pg_width, pg_height, margins, padding,
                                        header_rule, footer_rule,
                                        annot, gp_page, cap_just, fn_just) {
-  grDevices::pdf(NULL, width = pg_width, height = pg_height)
-  on.exit(grDevices::dev.off(), add = TRUE)
-
   outer_vp <- .make_outer_vp(margins)
   grid::pushViewport(outer_vp)
+  on.exit(grid::popViewport(), add = TRUE)
 
   vp_w <- .width_in(grid::unit(1, "npc"))
   vp_h <- .height_in(grid::unit(1, "npc"))
@@ -375,7 +420,8 @@ compute_table_content_area <- function(pg_width, pg_height, margins, padding,
   used_h <- heights$header + heights$caption + heights$footnote + heights$footer
   avail_h <- vp_h - used_h - n_gaps * pad_in
 
-  grid::popViewport()
+  # popViewport() runs from on.exit so an error mid-measurement does
+  # not leave the outer_vp on the stack.
 
   list(width = vp_w, height = max(avail_h, 0))
 }
diff --git a/R/table_rows.R b/R/table_rows.R
index 6af93e1..7c71d23 100644
--- a/R/table_rows.R
+++ b/R/table_rows.R
@@ -26,7 +26,8 @@
 #' @keywords internal
 measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding,
                                     na_string, line_height, max_measure_rows,
-                                    breaks = NULL, wrap_extra_pad_in = 0) {
+                                    breaks = NULL, wrap_extra_pad_in = 0,
+                                    cache = NULL) {
   n_rows   <- nrow(data)
   n_cols   <- length(resolved_cols)
   v_pad_in <- .height_in(cell_padding[["top"]]) +
@@ -34,17 +35,12 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding,
   h_lft_in <- .width_in(cell_padding[["left"]])
   h_rgt_in <- .width_in(cell_padding[["right"]])
 
-  # Memoised per-cell-text-height function: (string, gp_key) -> height_in
-  memo <- new.env(hash = TRUE, parent = emptyenv())
-  .memo_str_height <- function(s, gp_key, gp) {
-    key <- paste0(gp_key, "\x01", s)
-    if (!exists(key, envir = memo, inherits = FALSE)) {
-      grob <- grid::textGrob(s, gp = gp)
-      h    <- .height_in(grid::grobHeight(grob))
-      assign(key, h, envir = memo)
-    }
-    get(key, envir = memo, inherits = FALSE)
-  }
+  # When the caller supplied a shared cache, reuse it across columns so that
+  # any (string, gp) measured during the natural-width pass earlier in
+  # pagination is served from the cache here.  Otherwise create a local
+  # height-only memo (preserves the pre-cache behaviour for the few callers
+  # that exercise this function directly).
+  if (is.null(cache)) cache <- new.env(hash = TRUE, parent = emptyenv())
 
   # Limit rows sampled for height estimation
   sample_rows <- if (is.finite(max_measure_rows) && n_rows > max_measure_rows) {
@@ -83,7 +79,7 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding,
         cell_str
       }
       nlines  <- max(1L, length(strsplit(display_str, "\n", fixed = TRUE)[[1L]]))
-      h_grob  <- .memo_str_height(display_str, gp_key, cell_gp)
+      h_grob  <- .measure_text_dims_in(display_str, cell_gp, gp_key, cache)$h
       h_line  <- nlines * .height_in(grid::stringHeight("M"))
       extra   <- if (nlines > 1L) wrap_extra_pad_in else 0
       cell_h_mat[i, j] <- max(h_grob, h_line) + v_pad_in + extra
diff --git a/R/table_utils.R b/R/table_utils.R
index b81b17c..d846fad 100644
--- a/R/table_utils.R
+++ b/R/table_utils.R
@@ -52,13 +52,15 @@
 # between the header row and the first data row is more obvious.
 .measure_header_row_height <- function(resolved_cols, gp_tbl, cell_padding,
                                        line_height, breaks = NULL,
-                                       wrap_extra_pad_in = 0) {
+                                       wrap_extra_pad_in = 0,
+                                       cache = NULL) {
   v_pad_in <- .height_in(cell_padding[["top"]]) +
               .height_in(cell_padding[["bottom"]])
   h_lft_in <- .width_in(cell_padding[["left"]])
   h_rgt_in <- .width_in(cell_padding[["right"]])
   hdr_gp   <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"),
                                    line_height)
+  gp_key   <- paste0("header_row_lh", line_height)
 
   max(vapply(resolved_cols, function(cs) {
     label <- cs$label
@@ -67,8 +69,7 @@
                                       h_lft_in + h_rgt_in, hdr_gp, breaks)
     }
     nlines <- max(1L, length(strsplit(label, "\n", fixed = TRUE)[[1L]]))
-    grob   <- grid::textGrob(label, gp = hdr_gp)
-    h_grob <- .height_in(grid::grobHeight(grob))
+    h_grob <- .measure_text_dims_in(label, hdr_gp, gp_key, cache)$h
     h_line <- nlines * .height_in(grid::stringHeight("M"))
     extra  <- if (nlines > 1L) wrap_extra_pad_in else 0
     max(h_grob, h_line) + extra
@@ -255,15 +256,70 @@
 # Text measurement helpers
 # ---------------------------------------------------------------------------
 
+# Look up or measure (width, height) for `s` under `gp`, caching both when
+# an environment is supplied.  `gp_key` is a stable structural key for `gp`
+# (e.g. paste0("data_row_lh", line_height)) so callers using the same gp
+# share entries without hashing the gpar field-by-field per lookup.  When
+# `gp_key` is NULL the cache key is just the string -- appropriate for
+# caches whose entries are all measured under one gp (the caller owns
+# that invariant).
+#
+# When the cache misses we build the textGrob once and read BOTH dimensions
+# from it -- consolidating what would otherwise be two separate textGrob
+# constructions if a later caller needs the other dimension of the same
+# (gp, string).  Each construction re-runs grid's gpar validation, which
+# is the dominant cost; avoiding the duplicate is the point.
+.measure_text_dims_in <- function(s, gp, gp_key = NULL, cache = NULL) {
+  if (!nzchar(s)) return(list(w = 0, h = 0))
+  if (!is.null(cache)) {
+    key <- if (is.null(gp_key)) s else paste0(gp_key, "\x01", s)
+    if (exists(key, envir = cache, inherits = FALSE)) {
+      return(get(key, envir = cache, inherits = FALSE))
+    }
+  }
+  # D-48 safety guard.  After Phases 1/2 every internal caller runs
+  # inside the metric device opened by `.open_metric_device()`.  A
+  # future regression that forgets to open one would produce
+  # confusing downstream errors (convertWidth/convertHeight against
+  # the null device returns 0 or NA depending on grid version);
+  # fail fast here with a clear message instead.  Skipped on cache
+  # hits, so the cost is paid only on the slow path.
+  if (grDevices::dev.cur() == 1L) {
+    rlang::abort(paste0(
+      "Internal: .measure_text_dims_in() requires an active graphics ",
+      "device.  This is a bug in writetfl -- the caller should be ",
+      "invoked under `.open_metric_device()`."
+    ))
+  }
+  g <- grid::textGrob(s, gp = gp)
+  out <- list(w = .width_in(grid::grobWidth(g)),
+              h = .height_in(grid::grobHeight(g)))
+  if (!is.null(cache)) assign(key, out, envir = cache)
+  out
+}
+
 # Measure the maximum rendered text width (in inches) for a vector of strings.
 # Uses textGrob rather than stringWidth() because stringWidth() does not
 # accept a gp argument in all grid versions.
-.measure_max_string_width <- function(strings, gp) {
+#
+# When a `cache` env and `gp_key` are supplied, lookups go through the
+# consolidated (string -> (w, h)) cache from `.measure_text_dims_in()` so a
+# later height query for the same (string, gp) reuses the textGrob.
+.measure_max_string_width <- function(strings, gp, gp_key = NULL,
+                                      cache = NULL) {
   if (length(strings) == 0L) return(0)
   # Dedupe up front: real-world callers pass cell-string vectors where the
   # same value typically appears in many rows (e.g. category labels, NA
   # strings), so this saves grid round-trips with no behaviour change.
   uniq <- unique(strings)
+  if (!is.null(cache) && !is.null(gp_key)) {
+    return(max(vapply(uniq, function(s) {
+      lines <- strsplit(s, "\n", fixed = TRUE)[[1L]]
+      max(vapply(lines,
+                 function(ln) .measure_text_dims_in(ln, gp, gp_key, cache)$w,
+                 numeric(1L)))
+    }, numeric(1L))))
+  }
   max(vapply(uniq, function(s) {
     lines <- strsplit(s, "\n", fixed = TRUE)[[1L]]
     max(vapply(lines, function(ln) {
diff --git a/R/wrap.R b/R/wrap.R
index 7fd9acf..7204514 100644
--- a/R/wrap.R
+++ b/R/wrap.R
@@ -112,21 +112,25 @@ wrap_breaks_default <- function() {
   n     <- length(chars)
   if (n == 0L) return(list())
 
-  tokens   <- vector("list", n)        # over-allocate; trim at end
-  k        <- 0L
-  cur_buf  <- character(n)
-  cur_n    <- 0L
-  pending  <- ""                       # lead for the next emitted token
+  # Track each token by its [start, end] position in `s` rather than
+  # accumulating characters and pasting on flush.  `substr(s, st, ed)` is
+  # one C call per token; the previous `paste(cur_buf[seq_len(cur_n)],
+  # collapse = "")` allocated and joined cur_n elements per token.
+  tokens    <- vector("list", n)       # over-allocate; trim at end
+  k         <- 0L
+  cur_start <- 0L                       # 0 means "no token in progress"
+  cur_end   <- 0L
+  pending   <- ""
 
   flush <- function() {
-    if (cur_n > 0L) {
+    if (cur_start > 0L) {
       k <<- k + 1L
       tokens[[k]] <<- list(
-        text = paste(cur_buf[seq_len(cur_n)], collapse = ""),
+        text = substr(s, cur_start, cur_end),
         lead = pending
       )
-      cur_n   <<- 0L
-      pending <<- ""
+      cur_start <<- 0L
+      pending   <<- ""
     }
   }
 
@@ -136,13 +140,13 @@ wrap_breaks_default <- function() {
       flush()
       pending <- ch
     } else if (length(keep_chars) > 0L && ch %in% keep_chars) {
-      cur_n          <- cur_n + 1L
-      cur_buf[cur_n] <- ch
+      if (cur_start == 0L) cur_start <- i
+      cur_end <- i
       flush()
       pending <- ""
     } else {
-      cur_n          <- cur_n + 1L
-      cur_buf[cur_n] <- ch
+      if (cur_start == 0L) cur_start <- i
+      cur_end <- i
     }
   }
   flush()
@@ -159,6 +163,14 @@ wrap_breaks_default <- function() {
 # `cache`, if supplied, is an environment used as a (string -> width) memo
 # scoped to the caller's lifetime.  The caller is responsible for ensuring
 # every cache entry was measured under the same `gp`.
+#
+# This helper is in a hot loop (wrap module re-measures candidate strings
+# many times per cell).  An earlier attempt to delegate through
+# `.measure_text_dims_in()` for a unified cache cost ~20-30% on
+# wrap_heavy / big_df / preview_iris -- the double function call plus
+# list-wrapping per cache hit was unaffordable here.  The two helpers
+# share a textGrob construction strategy but stay separate functions for
+# the inner loop's benefit.
 .measure_text_width_in <- function(s, gp, cache = NULL) {
   if (!nzchar(s)) return(0)
   if (!is.null(cache) && exists(s, envir = cache, inherits = FALSE)) {
@@ -330,15 +342,11 @@ wrap_breaks_default <- function() {
   na_str   <- tbl$na_string
   max_rows <- tbl$max_measure_rows
 
-  scratch_file <- tempfile(fileext = ".pdf")
-  grDevices::pdf(scratch_file, width = pg_width, height = pg_height)
+  # D-48: relies on the metric device opened upstream by
+  # `.open_metric_device()` rather than opening a scratch PDF here.
   outer_vp <- .make_outer_vp(margins)
   grid::pushViewport(outer_vp)
-  on.exit({
-    grid::popViewport()
-    grDevices::dev.off()
-    unlink(scratch_file)
-  }, add = TRUE)
+  on.exit(grid::popViewport(), add = TRUE)
 
   vapply(seq_len(n), function(j) {
     cs <- resolved_cols[[j]]
@@ -521,15 +529,11 @@ wrap_breaks_default <- function() {
   na_str   <- tbl$na_string
   max_rows <- tbl$max_measure_rows
 
-  scratch_file <- tempfile(fileext = ".pdf")
-  grDevices::pdf(scratch_file, width = pg_width, height = pg_height)
+  # D-48: relies on the metric device opened upstream by
+  # `.open_metric_device()` rather than opening a scratch PDF here.
   outer_vp <- .make_outer_vp(margins)
   grid::pushViewport(outer_vp)
-  on.exit({
-    grid::popViewport()
-    grDevices::dev.off()
-    unlink(scratch_file)
-  }, add = TRUE)
+  on.exit(grid::popViewport(), add = TRUE)
 
   # Compute per-column floors (only meaningful for wrap-eligible cols).
   # Headers are rendered with the header_row gpar (typically bold) and data
@@ -641,17 +645,13 @@ wrap_breaks_default <- function() {
   line_height <- tbl$line_height %||% 1.05
   min_in      <- .width_in(tbl$min_col_width)
 
-  # Open scratch device once.  Closing happens via on.exit so it runs even
-  # under tryCatch failure inside the search loop.
-  scratch_file <- tempfile(fileext = ".pdf")
-  grDevices::pdf(scratch_file, width = pg_width, height = pg_height)
+  # D-48: relies on the metric device opened upstream by
+  # `.open_metric_device()` rather than opening a scratch PDF here.
+  # outer_vp pop happens via on.exit so it runs even under tryCatch
+  # failure inside the search loop.
   outer_vp <- .make_outer_vp(margins)
   grid::pushViewport(outer_vp)
-  on.exit({
-    grid::popViewport()
-    grDevices::dev.off()
-    unlink(scratch_file)
-  }, add = TRUE)
+  on.exit(grid::popViewport(), add = TRUE)
 
   result <- tryCatch({
     .height_balance_widths_impl(
diff --git a/design/ARCHITECTURE.md b/design/ARCHITECTURE.md
index 82104e9..91fe14b 100644
--- a/design/ARCHITECTURE.md
+++ b/design/ARCHITECTURE.md
@@ -30,42 +30,67 @@ export_tfl(x, file, preview, ...)                     [exported, S3 generic]
   │     │     wraps single ggplot/grob as list(list(content = x))
   │     └── .export_tfl_pages(...)                     — export_tfl.R (shared)
   │
+  ## All S3 methods follow the SAME D-48 device-lifecycle pattern:
+  ##   md <- .open_metric_device(file, pg_width, pg_height, preview)
+  ##   # ... call *_to_pagelist(...) ...
+  ##   if (!isFALSE(preview)) .close_metric_device(md)
+  ##   .export_tfl_pages(..., pdf_already_open = TRUE)
+  ## Pagination and (in normal mode) drawing both run on `md`.
+
   ├── export_tfl.tfl_table()                           — export_tfl.R
   │     ├── .validate_export_args(...)
-  │     ├── tfl_table_to_pagelist(...)                  — table_pagelist.R
-  │     └── .export_tfl_pages(...)
+  │     ├── .open_metric_device(file, pg_width, pg_height, preview)
+  │     │     opens pdf(file) (normal) or pdf(NULL) (preview);
+  │     │     on.exit registered on this frame.
+  │     ├── pagination_cache <- new.env(...)
+  │     ├── tfl_table_to_pagelist(..., text_dim_cache = pagination_cache)
+  │     ├── drawing_cache <- pagination_cache (normal) or new.env() (preview)
+  │     ├── for each tfl_table_grob: attach $text_dim_cache <- drawing_cache
+  │     ├── if (preview) .close_metric_device(md)
+  │     └── .export_tfl_pages(..., pdf_already_open = TRUE)
   │
   ├── export_tfl.ggtibble()                            — ggtibble.R
   │     ├── .validate_export_args(...)
+  │     ├── .open_metric_device(file, pg_width, pg_height, preview)
   │     ├── ggtibble_to_pagelist(x, sub_tfl, sub_tfl_sep,  — ggtibble.R
   │     │                       sub_tfl_collapse, sub_tfl_prefix)
   │     │     per row, appends "label: value; ..." suffix to caption via
   │     │     .apply_sub_tfl_caption() (sub_tfl.R); raw column names used
   │     │     as labels (no colspec system for ggtibble)
-  │     └── .export_tfl_pages(...)
+  │     ├── if (preview) .close_metric_device(md)
+  │     └── .export_tfl_pages(..., pdf_already_open = TRUE)
   │
   ├── export_tfl.gt_tbl()                              — gt.R
   │     ├── rlang::check_installed("gt")
   │     ├── .validate_export_args(...)
+  │     ├── .open_metric_device(file, pg_width, pg_height, preview)
   │     ├── gt_to_pagelist(x)                           — gt.R
-  │     └── .export_tfl_pages(...)
+  │     ├── if (preview) .close_metric_device(md)
+  │     └── .export_tfl_pages(..., pdf_already_open = TRUE)
   │
   ├── export_tfl.list()                                — export_tfl.R
   │     ├── .validate_export_args(...)
+  │     ├── .open_metric_device(file, pg_width, pg_height, preview)
   │     ├── [all gt_tbl?] → gt_to_pagelist() per element
   │     ├── [otherwise]   → coerce_x_to_pagelist(x)
-  │     └── .export_tfl_pages(...)
+  │     ├── if (preview) .close_metric_device(md)
+  │     └── .export_tfl_pages(..., pdf_already_open = TRUE)
   │
-  └── .export_tfl_pages(pages, file, ...)              — export_tfl.R (shared)
+  └── .export_tfl_pages(pages, file, ..., pdf_already_open = TRUE)  — export_tfl.R
   └── [preview = FALSE] PDF loop:
-  │     grDevices::pdf(file, ...)
-  │     on.exit(dev.off(), add = TRUE)
+  │     ## Device opened upstream by .open_metric_device() in the
+  │     ## S3 dispatcher; this function does NOT re-open it.  Legacy
+  │     ## fallback (`pdf_already_open = FALSE`) preserved for
+  │     ## external callers that invoke .export_tfl_pages() directly.
   │     for i in seq_along(pages):
   │       build_page_args(pages[[i]], dots, page_num, i, n)  — utils.R
   │       export_tfl_page(x = pages[[i]], ...)               [exported]
   │     invisible(normalizePath(file))
   │
   └── [preview = TRUE or integer] Preview loop:
+        ## In preview mode the dispatcher already closed the transient
+        ## pdf(NULL) metric device; the user's pre-existing device is
+        ## the render target.
         for j in seq_along(page_idx):
           build_page_args(pages[[i]], dots, page_num, i, n)
           export_tfl_page(x = pages[[i]], ..., preview = TRUE)
@@ -141,7 +166,8 @@ export_tfl(x = tfl_table_obj, ...)                    [exported]
         │     concatenate per-group pages → return
         │
         ├── compute_table_content_area(...)             — table_pagelist.R
-        │     scratch device + outer_vp to measure annotation heights
+        │     outer_vp (no scratch device — uses metric device opened by
+        │     .open_metric_device() upstream) to measure annotation heights
         ├── resolve_col_specs(tbl)                      — table_columns.R
         ├── compute_col_widths(resolved_cols, ...)      — table_columns.R
         │     ├── auto-detect wrap eligibility via       — wrap.R
@@ -151,7 +177,7 @@ export_tfl(x = tfl_table_obj, ...)                    [exported]
         │     │   .column_min_token_width_in(strings, gp, breaks)
         │     │   as the per-column floor
         │     └── paginate_cols(...)
-        ├── [scratch device + outer_vp] measure heights:
+        ├── [outer_vp on metric device] measure heights:
         │     .measure_header_row_height()              — table_utils.R
         │     measure_row_heights_tbl() → cell_h_mat    — table_rows.R
         │       Per-cell height matrix [nrow × ncol]; each entry includes
@@ -214,7 +240,7 @@ export_tfl(x = gt_tbl_obj, ...)                      [exported]
         ├── .gt_content_height(...)                     — gt.R
         │     reuses compute_table_content_area()
         ├── .gt_grob_height(grob, ...)                  — gt.R
-        │     measures grob height in scratch device
+        │     measures grob height under the metric device (D-48)
         │
         ├── [fits on one page] → single page spec
         │
diff --git a/design/DECISIONS.md b/design/DECISIONS.md
index f9260e4..60340e2 100644
--- a/design/DECISIONS.md
+++ b/design/DECISIONS.md
@@ -1483,3 +1483,452 @@ never touches `drawDetails.tfl_table_grob`.
 **Verification:**
 - Full `devtools::test()` green before and after.
 - `examples/bench_compare.R` reproduces the table above.
+
+---
+
+## D-45: Fast-path cell drawing, position-based tokenizer, and per-page formatting
+
+**Decision:** Three independent, profile-driven changes layered on top of
+D-43/D-44:
+
+1. **Fast path in `.draw_cell_text()`** — when the measured text width
+   plus horizontal padding is no greater than the column width, draw
+   directly in the parent viewport instead of pushing a per-cell
+   clipping viewport.  The clip viewport (which created+pushed+popped
+   per cell) is still used in the slow path, where text might bleed.
+2. **Drop redundant `convertUnit` calls in `.draw_cell_text()`'s clip
+   path** — the clip viewport is constructed with explicit
+   `unit(clip_w, "inches")` / `unit(row_h, "inches")` dimensions, so
+   the post-push `vp_w2 <- .width_in(unit(1, "npc"))` /
+   `vp_h2 <- .height_in(unit(1, "npc"))` calls were re-measuring known
+   values.  Replaced with the literals.
+3. **Position-based tokenizer in `.tokenize_for_wrap()`** — replaced the
+   per-token `paste(cur_buf[seq_len(cur_n)], collapse = "")` with
+   `substr(s, cur_start, cur_end)`.  One C call per token instead of
+   `n` element accumulations + `paste`.
+4. **Per-page formatting hoist in `drawDetails`** — replaced per-cell
+   `.fmt_cell(data[[cs$col]][i], na_str)` with a single
+   `.fmt_cell_vec(data[[cs$col]][rows], na_str)` per column built before
+   the row loop.
+
+**Context (profiling, round 3):**  Post-D-44, the `core_paginate`
+Rprof showed `table_draw.R:403` (the `.draw_cell_text()` call site)
+accumulating **73% of total time**.  Drilling into `.draw_cell_text()`:
+
+| Line | Self self.pct | Total total.pct | What |
+|------|--------------|-----------------|------|
+| 597 (measure) | 0.73% | 13.76% | text-width measurement (cached, but still per cell) |
+| 603 (vp_clip) | 1.17% | 13.03% | viewport creation (4 `grid::unit()` + `viewport()`) |
+| 611 (pushViewport) | — | — | pushing the clip vp |
+| 625 (grid.text)   | 0.29% | 29.14% | the actual text draw |
+| 633 (popViewport) | 0.15% | 6.44% | popping |
+
+The viewport push/pop pair (~20% of total) is only meaningful when text
+overflows the column.  For all-numeric tables (iris) and post-wrap
+cells, the text always fits and the clip is redundant.
+
+**Measured improvement** (medians, 15 iterations per core scenario, 3
+for `wrap_demos`, 5 independent runs averaged; baseline = main + D-43 +
+D-44 at `7c9484c`):
+
+| Scenario        | Before    | After     | Δ                       |
+|-----------------|-----------|-----------|-------------------------|
+| `core_small`    | 146 ms    | 111 ms    | **~24% ↓**              |
+| `core_wrap`     | 206 ms    | 209 ms    | within run-to-run noise |
+| `core_paginate` | 397 ms    | 270 ms    | **~32% ↓**              |
+| `figure_multi`  | 318 ms    | 330 ms    | within noise            |
+| `wrap_demos`    | 2.97 s    | 2.80 s    | **~6% ↓**               |
+
+`core_paginate` (the iris-heavy draw scenario) and `core_small`
+(short table going through the same draw loop) get the biggest gains.
+`wrap_demos` had high run-to-run variance with only n=3 iterations;
+averaged across 5 independent runs it shows a steady ~6% reduction.
+`core_wrap` is unchanged because its bottleneck is height-balance
+measurement, not cell drawing.  `figure_multi` doesn't touch
+`drawDetails.tfl_table_grob`.
+
+**Alternatives considered and rejected:**
+- *Regex-based vectorized tokenizer* — `regmatches`/`strsplit`-driven
+  reimplementation would shave more time but the algorithm has subtle
+  drop-vs-keep_before semantics (multiple consecutive drops collapse to
+  one lead, keep_before chars stay with the preceding token).  The
+  position-based change keeps the existing algorithm verbatim and
+  preserves the comment-level explanation.  Faster vectorization was
+  not worth the risk-of-regression.
+- *Pre-computed parent-viewport coordinates passed into
+  `.draw_cell_text()`* — would push more work into the drawDetails
+  body.  The current shape (compute once inside `.draw_cell_text()`) is
+  shorter and the per-call overhead is now tiny.
+- *Skipping the text-width measurement entirely when `nchar(text)` is
+  small enough* — would require a font-aware upper bound on per-char
+  width; fragile across font sizes and families.  The cached
+  measurement is already cheap on cache hit.
+
+**Files touched:**
+- `R/table_draw.R`:
+  - `drawDetails.tfl_table_grob`: hoisted `cell_strs_by_col` (vectorised
+    formatting), and the existing hoists from D-44 are unchanged.
+  - `.draw_cell_text`: added fast/slow branch on `needed <= col_width_in`,
+    removed the two post-push `convertUnit` calls in the slow path.
+- `R/wrap.R`:
+  - `.tokenize_for_wrap`: track `cur_start`/`cur_end` positions and
+    emit text via `substr()` instead of accumulating + pasting.
+
+**Verification:**
+- Full `devtools::test()` green before and after.
+- `examples/bench_compare.R` reproduces the table above (averaging over
+  multiple runs is needed for `wrap_demos`'s lower-iteration sample).
+
+---
+
+## D-46: Cross-page clip-width cache shared across all pages of one tfl_table
+
+**Decision:** Construct one list of clip-width cache envs (one per column
+in `resolved_cols`) at the top of `tfl_table_to_pagelist()` and pass it
+into every `build_table_grob()` call.  Every page-grob built for the
+same table holds a reference to the same env list, so
+`drawDetails.tfl_table_grob()` can reuse cached measurements across all
+row-page x col-group combinations.
+
+**Context:** D-44 introduced a per-page clip-width cache.  Profiling and
+focused benchmarking showed that for tables spanning many pages, the
+same cell text typically appears on every page (numeric formats,
+visit labels, category codes), so each page was re-measuring the same
+strings.  A single env shared across pages eliminates that
+inter-page duplication while preserving the intra-page hits D-44
+already provided.
+
+**Change:**
+- `R/table_pagelist.R` -- inside `tfl_table_to_pagelist()` (and the
+  table1/flextable equivalents that call `build_table_grob` similarly),
+  `clip_width_caches <- lapply(seq_along(resolved_cols), function(k)
+  new.env(...))` is built once, then threaded to every
+  `build_table_grob()` invocation.
+- `R/table_draw.R` -- `build_table_grob()` gains a `clip_width_caches`
+  argument stored as a grob slot.  `drawDetails.tfl_table_grob()` now
+  prefers `x$clip_width_caches[x$col_group_idx]` over creating fresh
+  per-page envs, falling back to the old behaviour when the slot is
+  absent (e.g. for grobs built outside the normal pipeline).
+
+The cache is keyed by column index into `resolved_cols` (stable across
+col-group splits), not by the local `j` index in `page_cols` (which
+re-numbers per col-group).
+
+**Measured improvement** (medians, 30 iterations; baseline = round-3
+HEAD at `7fcf208` = D-45 shipped):
+
+| Scenario        | Before    | After     | Δ           |
+|-----------------|-----------|-----------|-------------|
+| `iris5p` (150 rows / 5 pages) | 264 ms | 258 ms | ~2% (modest) |
+| `big_df` (500 rows / ~17 pages, 4 cols incl. repeating categoricals) | 1560 ms | 1330 ms | **~15% ↓** |
+
+The gain scales with page count and amount of inter-page duplication.
+Short single-page tables see negligible benefit (the per-page cache
+already caught everything).  Realistic multi-page clinical listings
+hit the design sweet spot.
+
+**Alternatives considered and rejected:**
+- *Cache spanning pagination (scratch device) + drawing (render device)*
+  -- the documented re-measurement in `.draw_cell_text()` exists
+  because font metrics can legitimately differ between the scratch
+  PDF and the render device (e.g. knitr PNG vs PDF for preview mode).
+  A cross-device cache would risk inaccurate placement, which the
+  project owner explicitly excluded.
+- *Consolidated (width, height) cache in pagination* -- would save
+  ~half the gpar-validation overhead in `.measure_max_string_width()`
+  + `.memo_str_height()` by sharing one textGrob's measurements
+  between Pass 1 (widths) and Pass 2 (heights).  Independent of this
+  change; worth pursuing only if benchmarks justify the threading
+  cost.  Deferred.
+- *Package-level env keyed by tbl-object identity* -- zero plumbing,
+  but a global mutable env is harder to reason about (parallelism,
+  leaked entries).  The explicit list-threaded approach is clearer.
+
+**Files touched:**
+- `R/table_pagelist.R` -- one new `clip_width_caches` construction
+  block in `.tfl_table_to_pagelist_default()` and the corresponding
+  pass-through in the `build_table_grob()` call.
+- `R/table_draw.R` -- `build_table_grob()` accepts and stores the
+  cache; `drawDetails.tfl_table_grob()` reads it via
+  `x$clip_width_caches`.
+
+**Verification:**
+- Full `devtools::test()` green.
+- `examples/bench_focused.R` (n=30) reproduces the `iris5p` /
+  `big_df` table above.
+
+---
+
+## D-47: Consolidated width+height text-dimension cache during pagination
+
+**Decision:** Replace the two separate per-call height memos with a single
+`(gp_key, string) -> list(w, h)` cache that lives for one
+`.tfl_table_to_pagelist_default()` call.  Every textGrob built for
+measurement during pagination populates both dimensions; subsequent
+lookups for either dimension reuse the existing entry.
+
+**Context:** Profiling after D-46 showed that pagination still constructed
+two textGrobs for every unique cell string -- one in
+`.measure_max_string_width()` (Pass 1, widths) and one in
+`measure_row_heights_tbl()` (Pass 2, heights).  Each construction re-runs
+grid's `validGP` / `set.gpar` chain, the dominant per-call cost.  Pass 1
+and Pass 2 use the same gpar for matching column categories
+(`data_row`, `group_col`, `header_row`), so the same (gp, string) shows
+up twice in pagination work.
+
+**Change:**
+- `R/table_utils.R` -- new `.measure_text_dims_in(s, gp, gp_key, cache)`
+  helper that builds the textGrob once, reads both dimensions, and
+  caches the pair.  Existing `.measure_max_string_width()` and
+  `.measure_header_row_height()` now accept an optional cache and
+  delegate per-string measurement to the helper when provided.
+- `R/table_rows.R` -- `measure_row_heights_tbl()` accepts a cache and
+  replaces its inline `.memo_str_height()` closure with the same helper.
+- `R/table_columns.R` -- `compute_col_widths()` and `.resolve_natural_widths()`
+  thread a `cache` argument; the natural-width pass builds the
+  appropriate structural `gp_key` for cell-vs-header gpars and passes
+  it down.
+- `R/table_pagelist.R` -- `.tfl_table_to_pagelist_default()` creates one
+  `text_dim_cache <- new.env(hash = TRUE, parent = emptyenv())` at the
+  top and passes it to `compute_col_widths()`,
+  `.measure_header_row_height()`, and `measure_row_heights_tbl()`.
+
+The `gp_key` namespace matches what `measure_row_heights_tbl()` already
+used internally (e.g. `"data_row_lh1.2"`, `"group_col_lh1.2"`,
+`"header_row_lh1.2"`).  All callers that share a category resolve to the
+same key, so width-then-height (Pass 1 -> Pass 2) on the same `(category,
+string)` is a cache hit.
+
+**Why crossing scratch-device boundaries is safe here:** the multiple
+PDF scratch devices opened during pagination
+(`.resolve_natural_widths`, `.run_pagination_iter`) all use identical
+`(pg_width, pg_height)` settings, identical fonts, and identical R
+session state.  PDF device font metrics are deterministic given those
+inputs, so values measured on one scratch device are equal to what
+the next scratch device would produce.  The cache does NOT cross into
+the render-device drawing phase -- that boundary still goes through
+`.draw_cell_text()`'s separate re-measurement (D-44/D-46 territory).
+
+**Measured improvement** (n=30 iterations, baseline = round-3 HEAD at
+`61bd26a` = D-46 shipped; min-of-mins across 3 independent runs to
+suppress system-load noise):
+
+| Scenario | Before (min) | After (min) | Δ          |
+|----------|--------------|-------------|------------|
+| `iris5p` (150 rows / 5 pages) | 329 ms | 255 ms | **~22% ↓** |
+| `big_df` (500 rows / ~17 pages, 4 cols) | 1.45 s | 1.36 s | ~6% ↓ |
+
+`iris5p` is the targeted scenario for pagination-side optimisation:
+relatively small data, many measurement-heavy passes.  `big_df` is
+dominated by drawing rather than measurement (D-44/D-46 territory), so
+the pagination cache helps less in relative terms.
+
+**Alternatives considered and rejected:**
+- *Caching width and height under separate keys* -- requires the cache
+  consumer to ask for the right dimension and stores two entries per
+  unique string.  Consolidated `(w, h)` tuple is one entry, one
+  textGrob construction, and any pass that has either dim gets the
+  other for free.
+- *Hashing the full gpar object as the cache key* -- gpars carry many
+  fields; per-lookup hashing costs more than the structural-key
+  approach the codebase already uses internally.  Callers pre-compute
+  `gp_key` once per gpar.
+- *Threading the cache further down into `.compute_col_min_widths()`
+  and `.height_balance_widths_impl()`* -- those have their own
+  per-call caches keyed differently (per-token, per-(j, width)).
+  Bringing them into the unified cache would require harmonising key
+  schemes and is left for a follow-up if profile data justifies it.
+
+**Files touched:**
+- `R/table_utils.R` -- new `.measure_text_dims_in()` helper; cache-aware
+  `.measure_max_string_width()` and `.measure_header_row_height()`.
+- `R/table_rows.R` -- cache-aware `measure_row_heights_tbl()`.
+- `R/table_columns.R` -- cache-aware `compute_col_widths()` and
+  `.resolve_natural_widths()`.
+- `R/table_pagelist.R` -- cache construction in
+  `.tfl_table_to_pagelist_default()` and threading into the three
+  pagination measurement entry points.
+
+**Verification:**
+- Full `devtools::test()` green.
+- `examples/bench_focused.R` (n=30) reproduces the table above.
+
+---
+
+## D-48: Single device per export_tfl() + cross-phase text-dim cache
+
+**Decision:** Open exactly one PDF device per `export_tfl()` call.
+Pagination measurements and the per-page draw loop both run on that
+device.  The `text_dim_cache` populated during pagination is attached
+to every `tfl_table_grob` and reused by `.draw_cell_text()` so the
+per-cell width re-measurement at draw time becomes a single env
+lookup.
+
+**Context:** D-46/D-47 brought the pagination cache inside one
+process-wide env, but the cache documented its boundary as the
+render-device transition: `.draw_cell_text()` always re-measured
+because the scratch PDF used for pagination and the user-visible
+render device could differ in font metrics (e.g. PNG in knitr).
+
+In practice the package's "render device" is *always* a PDF -- either
+`pdf(file)` opened by `.export_tfl_pages()` in normal mode, or the
+user's own device in preview mode.  Phase-0 profiling showed
+`.draw_cell_text`'s re-measure consuming **8.87 %** of total time in
+`core_paginate` -- the line was the single largest target.  Eight
+helpers across `R/` opened their own scratch `pdf()` devices for
+font-metric resolution, each costing ~5 ms; while never individually
+hot, collectively they added measurable overhead and complicated the
+device-lifecycle invariant.
+
+**Change:**
+
+1. New helpers in `R/export_tfl.R`:
+   - `.open_metric_device(file, pg_width, pg_height, preview, envir)`
+     -- opens `pdf(file)` in normal mode or `pdf(NULL, ...)` in
+     preview mode, and registers `on.exit({...; dev.off()}, add =
+     TRUE)` on the CALLER's frame so a mid-execution error still
+     closes the device.
+   - `.close_metric_device(md)` -- idempotent close; preview-mode
+     callers invoke it explicitly after pagination so the user's
+     pre-existing device is restored for drawing.
+
+2. Every `export_tfl` S3 method calls `.open_metric_device()` BEFORE
+   pagination:
+   - `.default`, `.tfl_table`, `.list`, `.ggtibble`, `.gt_tbl`,
+     `.VTableTree`, `.flextable`, `.table1`.
+   - `.export_tfl_pages()` gains a `pdf_already_open = FALSE`
+     parameter; the dispatchers pass `TRUE` so it skips its own
+     `pdf(file)` / `dev.off()`.
+
+3. Eight scratch-device opens deleted from `R/`:
+   - `compute_table_content_area()`, `.run_pagination_iter()` in
+     `R/table_pagelist.R`
+   - `.resolve_natural_widths()` in `R/table_columns.R`
+   - `.compute_col_min_widths()`, `.compute_wrapped_widths()`,
+     `.height_balance_widths()` in `R/wrap.R`
+   - `.gt_grob_height()` in `R/gt.R`
+   - `.rtables_lpp_cpp()` in `R/rtables.R`
+   Each helper now relies on the upstream metric device.  Outer-
+   viewport push/pop preserved (still needed for inch resolution
+   against the post-margin content area).
+
+4. Cross-phase cache plumbing in `R/table_pagelist.R` and
+   `R/export_tfl.R`:
+   - `tfl_table_to_pagelist()` accepts an optional `text_dim_cache`
+     parameter; when supplied, that env is reused instead of being
+     allocated locally and discarded.
+   - `export_tfl.tfl_table()` allocates `pagination_cache`, threads
+     it through pagination, then attaches it to every
+     `tfl_table_grob` in PDF mode (`drawing_cache <- pagination_cache`).
+     Preview mode attaches a fresh **empty** env instead, so
+     drawing's lookups all miss and the function falls through to
+     per-cell measurement on the user's render device -- preserving
+     today's preview behaviour exactly.
+
+5. Cache consumption in `R/table_draw.R`:
+   - `drawDetails.tfl_table_grob()` extracts `x$text_dim_cache` and
+     computes `gp_key_by_col` matching pagination's
+     ("data_row_lh<lh>", "group_col_lh<lh>") namespace.
+   - `.draw_cell_text()` gains `text_dim_cache` and `gp_key`
+     arguments.  Width lookup order: text_dim_cache hit ->
+     per-column `width_cache` (D-46) -> fresh measurement.
+   - `.draw_header_row()` uses the matching `"header_row_lh<lh>"` key.
+
+6. Safety guard in `.measure_text_dims_in()`:
+   `if (grDevices::dev.cur() == 1L) rlang::abort(...)` on the slow
+   path catches future regressions where a caller forgets to open
+   the metric device.
+
+**Why crossing the render-device boundary is safe here:** the
+single device opened by `.open_metric_device()` IS the render
+device in normal mode (`pdf(file)`).  Pagination and drawing
+measure against identical font metrics by construction -- no
+inference required.  In preview mode the cache is empty and the
+boundary still holds via per-cell re-measurement.
+
+**Measured impact** (n=30 per scenario; min-of-mins across 3 runs,
+b196ca3 vs HEAD on the same machine):
+
+| Scenario       | Baseline | After   | Δ        |
+|----------------|----------|---------|----------|
+| `iris5p`       | 213 ms   | 200 ms  | **−6 %** |
+| `big_df`       | 1.25 s   | 1.10 s  | **−12 %**|
+| `wrap_heavy`   | 2.24 s   | 1.94 s  | **−13 %**|
+| `preview_iris` | 194 ms   | 166 ms  | **−14 %**|
+| `figure_multi` | 619 ms   | 605 ms  | −2 %     |
+
+`figure_multi` is within variance, as expected: the ggplot pipeline
+neither populates nor reads the cache.  `preview_iris` benefits
+from scratch-device elimination even though its cache is empty by
+design.
+
+Profile signal (Rprof @ 0.01s, core_paginate):
+
+| Site                                | Baseline | After  |
+|-------------------------------------|----------|--------|
+| `.measure_text_width_in` inside `.draw_cell_text` | 8.87 % total | 0.5 % total |
+
+A 17× drop on the line that motivated the refactor.
+
+**Alternatives considered and rejected:**
+
+- *Unify all four caches (D-46 clip_width_caches, D-47 text_dim_cache,
+  wrap.R width_cache, wrap.R tokenize cache) into one helper.*  An
+  early prototype routed `.measure_text_width_in()` through
+  `.measure_text_dims_in()` for a single `(w, h)` cache contract.  It
+  cost 20-30 % on `wrap_heavy`, `big_df`, and `preview_iris` because
+  the wrap module's inner loop calls .measure_text_width_in() many
+  times per cell and the extra function call + list-wrapping on
+  cache hits dominated.  Documented in the
+  `.measure_text_width_in()` source comment; the two helpers stay
+  separate because the inner loop cannot afford the indirection.
+- *Use the user's device for pagination in preview mode (true single
+  device).*  Would make preview pagination decisions vary with the
+  user's render device (PNG vs screen vs PDF).  Today's pagination
+  is device-agnostic relative to user device; preserving that
+  contract was a user-stated constraint.  Preview mode therefore
+  opens a transient `pdf(NULL)` for pagination, closes it before
+  drawing.
+
+- *Drop `.export_tfl_pages`'s legacy `pdf_already_open = FALSE`
+  path.*  Defensible cleanup (no caller in package passes FALSE
+  after Phase 1c), but kept defensive in case external code
+  invokes `.export_tfl_pages()` directly.
+
+**Files touched:**
+- `R/export_tfl.R` -- `.open_metric_device()`, `.close_metric_device()`,
+  wired into `.default`, `.tfl_table`, `.list`;
+  `.export_tfl_pages()` gains `pdf_already_open` parameter.
+- `R/ggtibble.R`, `R/gt.R`, `R/rtables.R`, `R/flextable.R`,
+  `R/table1.R` -- wired in their respective S3 methods.
+- `R/table_pagelist.R` -- `text_dim_cache` plumbing through
+  `tfl_table_to_pagelist`, `.tfl_table_to_pagelist_default`,
+  `.tfl_table_to_pagelist_sub_tfl`; `compute_table_content_area`
+  and `.run_pagination_iter` drop their scratch devices.
+- `R/table_columns.R` -- `.resolve_natural_widths` drops its
+  scratch device.
+- `R/wrap.R` -- three helpers drop their scratch devices; the
+  `.measure_text_width_in` comment documents why it stays separate
+  from `.measure_text_dims_in`.
+- `R/table_draw.R` -- `.draw_cell_text` and `.draw_header_row`
+  consume the cache; `drawDetails.tfl_table_grob` extracts it from
+  the grob.
+- `R/table_utils.R` -- `.measure_text_dims_in` accepts `NULL`
+  gp_key; safety guard for missing device.
+- `R/gt.R`, `R/rtables.R` -- scratch device opens removed.
+- `tests/testthat/test-export_tfl.R` -- 8 new tests (4 helper tests
+  for `.open_metric_device` / `.close_metric_device`, plus
+  device-count, font-metric equality, safety-guard fires, cache
+  shape).
+- `examples/bench_focused.R` -- added `wrap_heavy`, `preview_iris`,
+  `figure_multi` scenarios.
+- `design/perf-baseline-notes.md` -- new working note with the
+  Phase-0 and Phase-5 numbers and the per-phase outcome table.
+
+**Verification:**
+- Full `devtools::test()` -- 587 passing, 0 failed.
+- `examples/bench_focused.R` (n=30, 3 runs) reproduces the table
+  above.
+- `grep -n "grDevices::pdf(" R/` shows exactly three occurrences,
+  all in `R/export_tfl.R` (the `.open_metric_device` normal/preview
+  paths + the defensive legacy fallback in `.export_tfl_pages`).
diff --git a/design/DESIGN.md b/design/DESIGN.md
index b46cdb6..e16d186 100644
--- a/design/DESIGN.md
+++ b/design/DESIGN.md
@@ -163,22 +163,71 @@ pagination is consistent with the actual output.
 
 ---
 
-## Why does `tfl_table_to_pagelist()` open a scratch PDF device?
+## Why a single device per `export_tfl()` call? (D-48)
 
 Font metrics (`stringHeight`, `stringWidth`, `grobHeight`) require an active
-graphics device. A scratch `pdf(NULL, ...)` device is opened at measurement
-time to provide a consistent rendering context (same page dimensions as the
-final output) without writing to disk.
+graphics device.  Earlier designs opened ~8 small scratch `pdf(...)` devices
+across the pagination pipeline; D-48 collapsed them all to **one** device per
+`export_tfl()` call:
+
+- **Normal mode (`preview = FALSE`):** the S3 dispatcher calls
+  `.open_metric_device()` immediately on entry, which opens
+  `grDevices::pdf(file, width = pg_width, height = pg_height)` and
+  registers an `on.exit` handler on the dispatcher's frame.  Pagination
+  measurements and the per-page draw loop both run on this device.
+  `on.exit` ensures the device closes cleanly even if pagination errors
+  out (a long-running R session would otherwise leak file handles up to
+  the 64-device limit).
+
+- **Preview mode:** the dispatcher opens a transient `pdf(NULL, ...)` for
+  pagination, closes it explicitly via `.close_metric_device()` after
+  pagination, and lets the per-page draw loop target the user's
+  pre-existing device.  This keeps preview-mode pagination decisions
+  byte-for-byte identical to normal mode (both run on PDF font metrics
+  with matching page dimensions).
+
+All internal measurement helpers — `compute_table_content_area()`,
+`.resolve_natural_widths()`, `.run_pagination_iter()`,
+`.compute_col_min_widths()`, `.compute_wrapped_widths()`,
+`.height_balance_widths()`, `.gt_grob_height()`, `.rtables_lpp_cpp()` —
+now **require** an active device with matching page dimensions.  The
+contract is enforced by a `dev.cur() == 1L` safety guard inside
+`.measure_text_dims_in()`: a regression that calls a helper without a
+device fails fast with a clear "requires an active graphics device"
+abort, rather than producing silently wrong measurements.
+
+---
+
+## Why thread `text_dim_cache` from pagination to the drawing phase? (D-48)
+
+D-47 introduced a process-wide `(gp_key, string) -> list(w, h)` cache
+shared across the natural-width pass, the row-height pass, and the
+header-height pass.  D-46 added a per-column `clip_width_caches` shared
+across all pages of one `tfl_table`.
+
+D-48 extends both: the pagination cache is **attached to every
+`tfl_table_grob` in the pagelist** (via `x$text_dim_cache`).
+`drawDetails.tfl_table_grob` extracts it; `.draw_cell_text()` consults it
+before falling through to the per-column `width_cache` and finally to
+fresh measurement.
+
+The pre-D-48 boundary was justified by "the render device may differ
+from pagination's scratch device" — but after D-48 there *is no
+difference* in normal mode (pagination and rendering both happen on
+`pdf(file)`).  In preview mode the dispatcher attaches an **empty** env
+so every lookup misses and the function falls back to per-cell
+measurement on the user's render device — preserving today's preview
+behaviour exactly.
 
 ---
 
 ## Why store `cell_heights_in_mat` and `cont_row_h_in` in the gTree?
 
 The `drawDetails` method is called by `grid` at render time, potentially long
-after paginate time. Pre-computing cell heights during pagination (when a
-scratch device is already open) and caching them in the grob avoids opening
-another device at draw time and ensures layout consistency: the heights used
-for pagination and the heights used for drawing are identical.
+after paginate time. Pre-computing cell heights during pagination (when the
+metric device is open) and caching them in the grob avoids re-measurement
+at draw time and ensures layout consistency: the heights used for pagination
+and the heights used for drawing are identical.
 
 The grob caches the *full* per-cell height matrix rather than per-row
 scalars because the per-row height for a given page depends on which other
diff --git a/design/perf-baseline-notes.md b/design/perf-baseline-notes.md
new file mode 100644
index 0000000..685132e
--- /dev/null
+++ b/design/perf-baseline-notes.md
@@ -0,0 +1,240 @@
+# Performance baseline notes — single-device + cache-through-drawing refactor
+
+Working notes captured during Phase 0 of the refactor that extends
+PR #39 with three layered changes:
+
+1. Single device per `export_tfl()` call (eliminate scratch
+   `pdf()` opens in `R/`).
+2. Cache the pagination-phase text-dim measurements through to
+   `.draw_cell_text()` (D-47's cache currently stops at the render-
+   device boundary).
+3. Unify the four small per-purpose caches around the shared
+   `.measure_text_dims_in()` helper.
+
+Baseline HEAD: `b196ca3` ("Consolidate width+height measurement into one
+cache during pagination"; D-47 in `design/DECISIONS.md`).
+
+The user explicitly rejected premature optimization
+(`memory/project_perf_profiling.md`). Profile-first, then commit only
+the changes the data plus the maintainability argument justifies.
+
+## Wall-clock baseline (bench_focused.R, n = 30)
+
+| Scenario       | min       | median    | iqr      |
+|----------------|-----------|-----------|----------|
+| `iris5p`       | 209 ms    | 231 ms    | 21.9 ms  |
+| `big_df`       | 1.06 s    | 1.16 s    | 100 ms   |
+| `wrap_heavy`   | 1.79 s    | 1.94 s    | 152 ms   |
+| `preview_iris` | 143 ms    | 155 ms    | 11.4 ms  |
+| `figure_multi` | 528 ms    | 601 ms    | 73.1 ms  |
+
+These will be the comparison baselines for every later commit.
+
+## Profile snapshots (profile_writetfl.R --quick, Rprof @ 0.01s)
+
+Top writetfl source lines by self-time, summarised across the four
+core scenarios. **The percentages are self/total of *Rprof samples*
+during a 20-rep loop**, so they capture relative cost, not absolute
+time.
+
+### `core_paginate` (iris -> ~5 pages)
+
+| Line                       | self.pct | total.pct | What it is                              |
+|----------------------------|----------|-----------|-----------------------------------------|
+| `table_draw.R#412`         | 1.4 %    | **55.7 %**| `.draw_cell_text(display_str, cs$align, ...)` call site |
+| `table_draw.R#625`         | 0.4 %    | **47.9 %**| inside `.draw_cell_text` (cap clip + grid.text) |
+| `table_utils.R#339`        | 1.1 %    | 2.5 %     | `.fmt_cell_vec()` body                  |
+| `resolve_gp.R#15`          | 1.1 %    | 1.1 %     | `merge_gpar()` field loop               |
+| `table_draw.R#606`         | 0.7 %    | **8.9 %** | `text_w <- .measure_text_width_in(...)` — D-47 boundary line, Phase 3 target |
+| `wrap.R#168`               | 0.7 %    | 0.7 %     | `.measure_text_width_in()` body         |
+| `table_utils.R#250`        | 0.35 %   | 11.7 %    | `.measure_max_string_width()` body      |
+| `table_rows.R#71/81/83`    | 0.35 %   | 1–3 %     | row-height measurement loop             |
+
+### `core_wrap` (clinical_df with wrap_balance = "height")
+
+| Line                       | self.pct | total.pct | What it is                              |
+|----------------------------|----------|-----------|-----------------------------------------|
+| `wrap.R#139`               | 2.5 %    | 3.3 %     | inside `.wrap_paragraph` token loop     |
+| `table_utils.R#250`        | 0.8 %    | **33.1 %**| `.measure_max_string_width()` — natural-width pass |
+| `wrap.R#168`               | 0.8 %    | 1.2 %     | `.measure_text_width_in()` body         |
+| `wrap.R#108/138/128`       | 0.4–0.8 %| 0.8–1.2 % | wrap break + token width                |
+| `resolve_gp.R#14/15/32`    | 0.4–1.2 %| 0.8–2.1 % | gpar resolution                         |
+| `table_utils.R#99`         | 0.4 %    | 7.4 %     | `.measure_header_row_height()`          |
+| `table_utils.R#301`        | 0.4 %    | 2.5 %     | (cache lookup helper)                   |
+
+### `core_small` (mtcars 20 rows, 1 page)
+
+No single writetfl source line above 1.6 %. Top R functions are
+generic grid plumbing (`$`, `valid.charjust`, `grid.Call`, `<GC>`,
+`numnotnull`). For a small one-page table the per-grid-call overhead
+dominates and there's little to optimise at the writetfl level.
+
+### `figure_multi` (10 ggplot pages)
+
+writetfl source lines all under 0.3 %. ggplot's `FUN`, `vapply`,
+`enexpr`, `fetch_ggproto`, and grid's `grid.Call.graphics` dominate.
+**Phase 1/2 device-lifecycle changes should not affect this scenario.**
+We will keep watching it to confirm no regression.
+
+## Where is `pdf()` / `dev.off()` in the profile?
+
+Neither shows in the top-20 self-time list for any scenario. Estimated
+combined `(pdf + dev.off)` cost: **well below 3 %** of total in every
+scenario.
+
+This is the Phase-0 go/no-go signal for Phase 1/2. Strict perf-only
+gate says **skip** — but the user explicitly said maintainability
+counts (one cache contract, one device-lifecycle helper, no scratch
+`pdf()` opens scattered across seven files). Phase 1/2 proceeds on
+maintainability grounds.
+
+## Where is `.measure_text_width_in` in the profile?
+
+`table_draw.R#606` (the re-measure inside `.draw_cell_text`) lands at
+**8.9 %** of total time in `core_paginate`. The enclosing
+`.draw_cell_text` body at `table_draw.R#625` reaches **47.9 %** of
+total (it dominates the per-page draw loop). Removing the re-measure
+when the pagination cache already covers the cell is a real perf win.
+
+Phase 3 proceeds on perf grounds.
+
+## Where is the wrap measurement loop?
+
+`wrap.R#139` and surrounding lines accumulate ~2–4 % self-time in
+`core_wrap` but no single line is dominant. The natural-width pass at
+`table_utils.R#250` shows 33.1 % *total* in `core_wrap` —
+this is the path D-47 already speeds up. Unifying the cache
+(Phase 3.5) should not change this number much; it just removes the
+need for each cache consumer to invent its own env shape.
+
+Phase 3.5 proceeds on maintainability grounds.
+
+## Decision matrix
+
+| Phase                         | Perf signal?      | Maintainability? | Proceed?  |
+|-------------------------------|-------------------|------------------|-----------|
+| 1. Single device              | < 3 % (marginal)  | Yes (strong)     | Yes (maint) |
+| 2. Scratch-pdf elimination    | < 3 % (marginal)  | Yes (strong)     | Yes (maint) |
+| 3. Cache through drawing      | 8.9 % (strong)    | Yes              | **Yes (perf)** |
+| 3.5. Cache unification        | n/a               | Yes (strong)     | Yes (maint) |
+| 4. Tests                      | n/a               | Required         | Yes        |
+| 5. Profile post-refactor      | n/a               | Required         | Yes        |
+| 6. Documentation              | n/a               | Required         | Yes        |
+
+Order of execution (see plan): 3.5 → 3 → 1 → 2 → 4 → 5 → 6. Refactor
+the cache shape first (no behavior change), then add the drawing-phase
+consumer, then collapse device lifecycle, then enforce the contract.
+
+## Per-scenario expected outcomes
+
+- `iris5p`, `core_paginate`: should improve materially after Phase 3
+  (re-measure removed in normal mode). Target: ≥5 % drop on
+  `core_paginate` median.
+- `big_df`: harder — most of its time is in `grid.Call.graphics`
+  (the actual PDF stream emission), not measurement. Small
+  improvement expected (~2–5 %) from Phase 3 + smaller from Phase 1/2.
+- `wrap_heavy`: Phase 3 helps any wrap cell whose first-line text was
+  measured during pagination. The wrap pipeline also measures
+  per-token, which Phase 3.5 unification keeps cacheable
+  cross-scenario.
+- `preview_iris`: expected **no change** — preview mode keeps fresh
+  empty cache attached to grobs by design (D-47 boundary preserved).
+- `figure_multi`: expected **no change** — non-tfl_table path.
+
+## Snapshot files
+
+Raw Rprof output saved under `examples/profile_output/`:
+
+- `profile_core_small_20260513_083031.Rprof`
+- `profile_core_wrap_20260513_083035.Rprof`
+- `profile_core_paginate_20260513_083041.Rprof`
+- `profile_figure_multi_20260513_083047.Rprof`
+
+These will be regenerated and overwritten by subsequent profile runs.
+
+## What to re-check after each commit
+
+- `Rscript examples/bench_focused.R` — no regression > 2 % on any
+  scenario.
+- `devtools::test()` — green.
+
+If a commit breaks either, roll back before the next commit.
+
+---
+
+## Post-refactor results (after Phases 1–4)
+
+### Profile signal — core_paginate (Rprof @ 0.01s)
+
+The single line that motivated the refactor:
+
+| Location                         | Baseline | After  | Δ      |
+|----------------------------------|----------|--------|--------|
+| `table_draw.R` `.measure_text_width_in` re-measure inside `.draw_cell_text` | 8.87 % total | 0.5 % total | **~17× drop** |
+
+The text-dim cache pre-populated by pagination now satisfies the
+overwhelming majority of width lookups during the draw loop.  The
+8.9 % budget the line consumed at baseline is essentially gone.
+
+### Wall-clock comparison (bench_focused.R, n = 30 per run)
+
+Both baseline (`b196ca3`) and D-48 numbers measured on the same
+machine in the same session, so system load is comparable.  Reported:
+**minimum** of medians across 3 independent runs.  Min-of-mins is
+the most stable estimator of "best-case time on this machine."
+
+| Scenario       | Baseline min | D-48 min | Δ        | Notes |
+|----------------|--------------|----------|----------|-------|
+| `iris5p`       | 213 ms       | 200 ms   | **−6 %** | pagination-heavy |
+| `big_df`       | 1.25 s       | 1.10 s   | **−12 %**| drawing-heavy, 17 pages |
+| `wrap_heavy`   | 2.24 s       | 1.94 s   | **−13 %**| wrap + draw |
+| `preview_iris` | 194 ms       | 166 ms   | **−14 %**| preview path — single transient pdf(NULL) for pagination, then user's device for drawing.  The improvement comes from eliminating scratch devices, NOT from cache reuse (cache is empty in preview by design). |
+| `figure_multi` | 619 ms       | 605 ms   | −2 %     | ggplot path — within variance, as expected (no cache, no scratch devices). |
+
+`preview_iris` improved more than expected because the four scratch
+PDF opens inside the pagination pipeline (compute_table_content_area,
+.run_pagination_iter, .resolve_natural_widths, plus wrap helpers)
+each cost ~5 ms to open and close.  Removing them yields a real
+~20 ms saving per call.
+
+### Test signal
+
+- 583 → 587 tests passing (4 new D-48 invariant tests in
+  `test-export_tfl.R`).
+- Zero failures, zero errors.
+
+### Outcome per phase
+
+| Phase | What it delivered | Verdict |
+|-------|-------------------|---------|
+| 3.5   | Investigated unifying inner-loop cache; rejected (regressed wrap_heavy by 30 %).  Kept the small NULL-gp_key forward-compat shim. | Investigated; documented in commit message. |
+| 3a    | Plumbed `text_dim_cache` from `export_tfl.tfl_table` to grobs.  No semantic change. | Shipped. |
+| 3b    | `.draw_cell_text` consumes the cache; drops the per-cell re-measure on cache hits. | Shipped (perf win). |
+| 1a    | `.open_metric_device` / `.close_metric_device` helpers + 4 tests. | Shipped. |
+| 1b–1d | Wired single-device discipline into every S3 dispatcher and `compute_table_content_area`. | Shipped. |
+| 2a–2c | Removed 7 scratch `pdf()` opens from `R/`.  Only one `pdf()` open per `export_tfl()` call remains. | Shipped. |
+| 2d    | `dev.cur() == 1L` safety guard in `.measure_text_dims_in` catches future regressions early. | Shipped. |
+| 4     | Device-count, cross-device-metric-equality, safety-guard, and cache-shape tests. | Shipped. |
+
+### Files that lost their scratch pdf() open
+
+- `R/table_pagelist.R` `compute_table_content_area`
+- `R/table_pagelist.R` `.run_pagination_iter`
+- `R/table_columns.R` `.resolve_natural_widths`
+- `R/wrap.R` `.compute_col_min_widths`
+- `R/wrap.R` `.compute_wrapped_widths`
+- `R/wrap.R` `.height_balance_widths`
+- `R/gt.R` `.gt_grob_height`
+- `R/rtables.R` `.rtables_lpp_cpp`
+
+Final grep:
+
+```
+$ grep -n "grDevices::pdf(" R/ -r
+R/export_tfl.R: pdf(file)   # .open_metric_device normal mode
+R/export_tfl.R: pdf(NULL)   # .open_metric_device preview mode
+R/export_tfl.R: pdf(file)   # .export_tfl_pages legacy fallback (dead under
+                            #   all current dispatchers; defensive in case
+                            #   external code calls .export_tfl_pages directly)
+```
+
diff --git a/examples/bench_focused.R b/examples/bench_focused.R
new file mode 100644
index 0000000..8b10f36
--- /dev/null
+++ b/examples/bench_focused.R
@@ -0,0 +1,105 @@
+# examples/bench_focused.R
+#
+# Focused benchmark on the scenarios most likely to surface cache and
+# device-lifecycle costs in tfl_table generation.
+#
+# Scenarios:
+#   iris5p        tfl_table(iris) -> 5 pages.  Pagination-heavy.
+#   big_df        500-row synthetic table.  Drawing-heavy across ~17 pages.
+#   wrap_heavy    200-row x 5-col table with three narrative-text columns
+#                 (hyphenated, multi-word).  Exercises the wrap pipeline
+#                 plus per-cell clip-width measurement during drawing.
+#   preview_iris  iris5p but rendered via export_tfl(..., preview = c(1, 2, 3))
+#                 with an open pdf(NULL) device.  Measures whether the
+#                 cache-through-drawing path benefits preview mode.
+#   figure_multi  10-page ggplot list passed directly to export_tfl().
+#                 Isolates the non-tfl_table per-page overhead so we can
+#                 see whether Phase-1/2 device-lifecycle changes affect
+#                 the figure path.
+
+suppressPackageStartupMessages({
+  devtools::load_all(quiet = TRUE)
+})
+
+set.seed(42L)
+
+big_df <- data.frame(
+  subject = sprintf("Subject %03d", 1:500),
+  visit   = rep(c("Baseline", "Week 1", "Week 4", "Week 12"), length.out = 500),
+  result  = round(rnorm(500, 100, 15), 1),
+  flag    = sample(c("Y", "N", "?"), 500, replace = TRUE)
+)
+
+# 200-row wrap-heavy table.  The narrative columns contain hyphenated words
+# and multi-word phrases so the wrap module's keep-before "-" break path is
+# exercised on every row.  Numeric columns provide a control.
+wrap_words <- c(
+  "patient-reported", "treatment-emergent", "investigator-assessed",
+  "adverse-event", "fully-resolved", "dose-reduction",
+  "drug-related", "non-serious", "moderate-severity",
+  "hospitalization-required", "follow-up"
+)
+make_wrap_text <- function() {
+  paste(sample(wrap_words, 6L, replace = TRUE), collapse = " ")
+}
+wrap_heavy_df <- data.frame(
+  subject     = sprintf("Subject %03d", 1:200),
+  narrative_a = replicate(200, make_wrap_text()),
+  narrative_b = replicate(200, make_wrap_text()),
+  narrative_c = replicate(200, make_wrap_text()),
+  score       = round(rnorm(200, 50, 10), 1),
+  stringsAsFactors = FALSE
+)
+
+# 10 figure pages, each a small ggplot.  Mirrors a typical
+# clinical-report-of-figures bundle.
+fig_pages <- lapply(seq_len(10L), function(i) {
+  list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
+                   ggplot2::geom_point() +
+                   ggplot2::ggtitle(sprintf("Figure %d", i)),
+       header_left = sprintf("Figure %d.1", i))
+})
+
+scenarios <- list(
+  iris5p = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(iris), file = out)
+    unlink(out)
+  },
+  big_df = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(big_df), file = out)
+    unlink(out)
+  },
+  wrap_heavy = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(wrap_heavy_df), file = out)
+    unlink(out)
+  },
+  preview_iris = function() {
+    # Preview mode needs a device open in the caller.  Use pdf(NULL) so
+    # nothing is written to disk; close in on.exit so a crash mid-run
+    # doesn't leak the device.
+    grDevices::pdf(NULL, width = 11, height = 8.5)
+    on.exit(grDevices::dev.off(), add = TRUE)
+    export_tfl(tfl_table(iris), preview = c(1L, 2L, 3L))
+  },
+  figure_multi = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(fig_pages, file = out)
+    unlink(out)
+  }
+)
+
+for (name in names(scenarios)) {
+  fn <- scenarios[[name]]
+  invisible(fn())  # warmup
+  bm <- bench::mark(fn(), iterations = 30L, check = FALSE,
+                    filter_gc = FALSE, memory = FALSE)
+  cat(sprintf("%-13s  min=%-9s  median=%-9s  iqr=%s  n=%d\n",
+              name,
+              format(bm$min),
+              format(bm$median),
+              format(bm$median - bm$min),
+              bm$n_itr))
+}
diff --git a/man/build_table_grob.Rd b/man/build_table_grob.Rd
index eced8aa..41c814a 100644
--- a/man/build_table_grob.Rd
+++ b/man/build_table_grob.Rd
@@ -13,7 +13,8 @@ build_table_grob(
   cell_heights_in_mat = NULL,
   cont_row_h_in = NULL,
   is_first_col_page = TRUE,
-  is_last_col_page = TRUE
+  is_last_col_page = TRUE,
+  clip_width_caches = NULL
 )
 }
 \arguments{
diff --git a/man/compute_col_widths.Rd b/man/compute_col_widths.Rd
index 85c24f7..6393f91 100644
--- a/man/compute_col_widths.Rd
+++ b/man/compute_col_widths.Rd
@@ -14,7 +14,8 @@ compute_col_widths(
   margins,
   overflow_action = c("error", "warn"),
   validate_overflow = TRUE,
-  floor_overrides = NULL
+  floor_overrides = NULL,
+  cache = NULL
 )
 }
 \arguments{
diff --git a/man/compute_table_content_area.Rd b/man/compute_table_content_area.Rd
index bd94d8c..66c4005 100644
--- a/man/compute_table_content_area.Rd
+++ b/man/compute_table_content_area.Rd
@@ -18,8 +18,15 @@ compute_table_content_area(
 )
 }
 \description{
-Opens a scratch device, measures annotation section heights using the
-same infrastructure as export_tfl_page(), and returns available width and
-height in inches.
+Measures annotation section heights using the same infrastructure as
+export_tfl_page() and returns available width and height in inches.
+}
+\details{
+D-48: requires an active graphics device with matching page
+dimensions; the caller (\code{.tfl_table_to_pagelist_default()}) runs
+inside the metric device opened by \code{.open_metric_device()} in the
+S3 dispatcher, so font metrics here equal those used at draw time
+(normal mode) or those of the same pdf(NULL) used for the rest of
+pagination (preview mode).
 }
 \keyword{internal}
diff --git a/man/dot-gt_grob_height.Rd b/man/dot-gt_grob_height.Rd
index 44b59f5..e877f98 100644
--- a/man/dot-gt_grob_height.Rd
+++ b/man/dot-gt_grob_height.Rd
@@ -2,19 +2,24 @@
 % Please edit documentation in R/gt.R
 \name{.gt_grob_height}
 \alias{.gt_grob_height}
-\title{Measure a gt grob's height in a scratch device}
+\title{Measure a gt grob's height}
 \usage{
 .gt_grob_height(grob, pg_width, pg_height)
 }
 \arguments{
 \item{grob}{A gtable grob from \code{\link[gt:as_gtable]{gt::as_gtable()}}.}
 
-\item{pg_width, pg_height}{Page dimensions for the scratch device.}
+\item{pg_width, pg_height}{Page dimensions (advisory; the active
+metric device's dimensions are what \code{convertHeight} uses).}
 }
 \value{
 Numeric scalar: grob height in inches.
 }
 \description{
-Measure a gt grob's height in a scratch device
+D-48: requires an active graphics device with matching page
+dimensions; \code{export_tfl.gt_tbl()} opens the metric device via
+\code{.open_metric_device()} before invoking the pagelist conversion
+pipeline, so \code{convertHeight()} here resolves against that device's
+font metrics.
 }
 \keyword{internal}
diff --git a/man/export_tfl.Rd b/man/export_tfl.Rd
index cf5bfda..571ec6b 100644
--- a/man/export_tfl.Rd
+++ b/man/export_tfl.Rd
@@ -92,7 +92,7 @@ In preview mode each page is drawn via \code{grid::grid.newpage()} (so knitr
 captures it as an inline graphic). Returns \code{NULL} invisibly.}
 
 \item{...}{
-  Arguments passed on to \code{\link[=export_tfl_page]{export_tfl_page}}
+  Arguments passed on to \code{\link{export_tfl_page}}
   \describe{
     \item{\code{padding}}{Vertical space between adjacent present sections, as a
 \code{unit} object. Separator rules (if enabled) are drawn at the midpoint
diff --git a/man/measure_row_heights_tbl.Rd b/man/measure_row_heights_tbl.Rd
index 8ad051c..46e8555 100644
--- a/man/measure_row_heights_tbl.Rd
+++ b/man/measure_row_heights_tbl.Rd
@@ -13,7 +13,8 @@ measure_row_heights_tbl(
   line_height,
   max_measure_rows,
   breaks = NULL,
-  wrap_extra_pad_in = 0
+  wrap_extra_pad_in = 0,
+  cache = NULL
 )
 }
 \arguments{
diff --git a/man/reexports.Rd b/man/reexports.Rd
index fd38a52..cedce32 100644
--- a/man/reexports.Rd
+++ b/man/reexports.Rd
@@ -12,6 +12,6 @@ These objects are imported from other packages. Follow the links
 below to see their documentation.
 
 \describe{
-  \item{grid}{\code{\link[grid]{gpar}}, \code{\link[grid]{unit}}}
+  \item{grid}{\code{\link[grid:gpar]{gpar()}}, \code{\link[grid:unit]{unit()}}}
 }}
 
diff --git a/man/tfl_table_to_pagelist.Rd b/man/tfl_table_to_pagelist.Rd
index 1be45dd..e9769b4 100644
--- a/man/tfl_table_to_pagelist.Rd
+++ b/man/tfl_table_to_pagelist.Rd
@@ -9,7 +9,8 @@ tfl_table_to_pagelist(
   pg_width,
   pg_height,
   dots,
-  page_num = "Page {i} of {n}"
+  page_num = "Page {i} of {n}",
+  text_dim_cache = NULL
 )
 }
 \arguments{
@@ -20,6 +21,14 @@ tfl_table_to_pagelist(
 \item{dots}{The \code{list(...)} from \code{\link[=export_tfl]{export_tfl()}}.}
 
 \item{page_num}{Glue template string for page numbering.}
+
+\item{text_dim_cache}{Optional environment used as the pagination-phase
+text-dimension cache.  When supplied, the same env is reused instead
+of allocating one locally, so the caller (\code{export_tfl()}) can later
+reuse its entries during the drawing phase by attaching the env to
+the table grobs.  When \code{NULL} (the default), a fresh env is
+allocated and discarded after pagination completes -- equivalent to
+the pre-D-48 behaviour.}
 }
 \value{
 A list of page spec lists, each with at least \verb{$content} (a grob).
diff --git a/tests/testthat/test-export_tfl.R b/tests/testthat/test-export_tfl.R
index f6fca90..faef027 100644
--- a/tests/testthat/test-export_tfl.R
+++ b/tests/testthat/test-export_tfl.R
@@ -135,3 +135,223 @@ test_that("export_tfl passes dots to export_tfl_page as defaults", {
   )
   expect_true(file.exists(f))
 })
+
+# ---------------------------------------------------------------------------
+# .open_metric_device() / .close_metric_device() helpers
+# ---------------------------------------------------------------------------
+
+test_that(".open_metric_device opens pdf(file) in normal mode and closes via on.exit", {
+  f <- tempfile(fileext = ".pdf")
+  on.exit(unlink(f), add = TRUE)
+  before <- grDevices::dev.cur()
+
+  runner <- function() {
+    md <- .open_metric_device(f, pg_width = 11, pg_height = 8.5,
+                              preview = FALSE)
+    # Inside runner: device is open and active.
+    expect_equal(grDevices::dev.cur(), md$dev)
+    expect_true(grDevices::dev.cur() != before)
+    md
+  }
+  result <- runner()
+  # After runner() returns, the helper's on.exit (registered on
+  # runner's frame) has fired and closed the device.
+  expect_equal(grDevices::dev.cur(), before)
+  expect_true(file.exists(f))   # pdf actually got written
+})
+
+test_that(".open_metric_device opens pdf(NULL) in preview mode", {
+  before <- grDevices::dev.cur()
+
+  runner <- function() {
+    md <- .open_metric_device(NULL, pg_width = 11, pg_height = 8.5,
+                              preview = TRUE)
+    expect_equal(grDevices::dev.cur(), md$dev)
+    md
+  }
+  result <- runner()
+  # on.exit closed it on return.
+  expect_equal(grDevices::dev.cur(), before)
+})
+
+test_that(".open_metric_device closes the device even when the caller errors out", {
+  before <- grDevices::dev.cur()
+
+  expect_error(
+    {
+      runner <- function() {
+        .open_metric_device(NULL, pg_width = 11, pg_height = 8.5,
+                            preview = TRUE)
+        stop("boom -- simulated mid-pagination failure")
+      }
+      runner()
+    },
+    "boom"
+  )
+  # Even though the error short-circuited runner, the helper's on.exit
+  # registered on runner's frame fires during unwind and closes the
+  # device.
+  expect_equal(grDevices::dev.cur(), before)
+})
+
+test_that(".close_metric_device is idempotent", {
+  before <- grDevices::dev.cur()
+
+  runner <- function() {
+    md <- .open_metric_device(NULL, pg_width = 11, pg_height = 8.5,
+                              preview = TRUE)
+    # Explicit close: caller would invoke this in preview mode to
+    # restore the user's device before drawing.
+    .close_metric_device(md)
+    expect_equal(grDevices::dev.cur(), before)
+    # Second call is a no-op.
+    .close_metric_device(md)
+    expect_equal(grDevices::dev.cur(), before)
+  }
+  runner()
+  expect_equal(grDevices::dev.cur(), before)
+})
+
+# ---------------------------------------------------------------------------
+# D-48: single-device discipline + cross-phase cache invariants
+# ---------------------------------------------------------------------------
+
+test_that("export_tfl.tfl_table opens exactly one PDF device per call", {
+  # Verify D-48's "single device" invariant via grDevices::pdf trace.
+  # The tracer references a counter stored in a stable env so the
+  # quote() body can resolve it inside grDevices::pdf's evaluation
+  # context.
+  counter_env <- new.env()
+  counter_env$count <- 0L
+  assign(".test_pdf_counter_env", counter_env, envir = globalenv())
+  on.exit(rm(".test_pdf_counter_env", envir = globalenv()), add = TRUE)
+
+  suppressMessages(trace(
+    what   = "pdf",
+    where  = asNamespace("grDevices"),
+    tracer = quote({
+      e <- get(".test_pdf_counter_env", envir = globalenv())
+      e$count <- e$count + 1L
+    }),
+    print  = FALSE
+  ))
+  on.exit(suppressMessages(untrace("pdf", where = asNamespace("grDevices"))),
+          add = TRUE)
+
+  f <- tempfile(fileext = ".pdf")
+  on.exit(unlink(f), add = TRUE)
+  counter_env$count <- 0L
+  export_tfl(tfl_table(head(iris, 5)), file = f)
+  expect_equal(counter_env$count, 1L,
+               info = paste("Expected 1 pdf() open; got", counter_env$count))
+})
+
+test_that("PDF font metrics are identical between pdf(NULL) and pdf(tempfile)", {
+  # D-48 relies on this: pagination measurements taken on one PDF
+  # device are equal to what the final pdf(file) would produce, so
+  # the pagination cache can be shared across phases without
+  # re-measurement.  Confirm empirically -- bytes can differ across
+  # grid versions, so the test pins the assumption.
+  string  <- "Quick brown fox 1234"
+  gp_test <- grid::gpar(fontfamily = "sans", fontsize = 10,
+                        fontface = "plain")
+
+  measure_under_pdf <- function(target) {
+    if (is.null(target)) {
+      grDevices::pdf(NULL, width = 11, height = 8.5)
+    } else {
+      grDevices::pdf(target, width = 11, height = 8.5)
+    }
+    on.exit({
+      grDevices::dev.off()
+      if (!is.null(target)) unlink(target)
+    })
+    g <- grid::textGrob(string, gp = gp_test)
+    list(
+      w = grid::convertWidth(grid::grobWidth(g),  "inches", valueOnly = TRUE),
+      h = grid::convertHeight(grid::grobHeight(g), "inches", valueOnly = TRUE)
+    )
+  }
+
+  m_null <- measure_under_pdf(NULL)
+  m_file <- measure_under_pdf(tempfile(fileext = ".pdf"))
+
+  expect_equal(m_null$w, m_file$w, tolerance = 1e-9)
+  expect_equal(m_null$h, m_file$h, tolerance = 1e-9)
+})
+
+test_that(".measure_text_dims_in fails fast without an active device", {
+  # Safety guard added in Phase 2d.  All internal callers run under
+  # `.open_metric_device()`; a future regression that forgets this
+  # should produce a readable error rather than silent nonsense.
+  expect_error(
+    .measure_text_dims_in("anything", grid::gpar()),
+    "requires an active graphics device"
+  )
+})
+
+test_that("tfl_table grob carries the cross-phase cache only when normal mode", {
+  # PDF mode: grob$text_dim_cache should be the SAME env that
+  # pagination populated (shared by reference).  Preview mode: it
+  # should be a fresh empty env, so drawing falls back to per-cell
+  # measurement on the user's render device.
+  #
+  # Rather than trace() the page-render call (which has scoping
+  # complications under devtools::test()), invoke tfl_table_to_pagelist
+  # the same way export_tfl.tfl_table() does, and inspect the grobs
+  # directly.
+
+  # Normal-mode-equivalent: pre-open a metric device and pass a cache.
+  grDevices::pdf(NULL, width = 11, height = 8.5)
+  pagination_cache <- new.env(hash = TRUE, parent = emptyenv())
+  pages <- tfl_table_to_pagelist(
+    tfl_table(head(iris, 5)),
+    pg_width = 11, pg_height = 8.5,
+    dots = list(), page_num = "Page {i} of {n}",
+    text_dim_cache = pagination_cache
+  )
+  grDevices::dev.off()
+
+  expect_gt(length(pages), 0L)
+  expect_gt(length(ls(pagination_cache, all.names = TRUE)), 0L)
+
+  # The grob doesn't carry text_dim_cache by default -- the
+  # dispatcher does the attach.  Simulate that attach the same way
+  # export_tfl.tfl_table() does:
+  for (i in seq_along(pages)) {
+    if (inherits(pages[[i]]$content, "tfl_table_grob")) {
+      pages[[i]]$content$text_dim_cache <- pagination_cache
+    }
+  }
+  # Now verify a grob carries a populated cache and entry shape is
+  # list(w, h).
+  found_one <- FALSE
+  for (i in seq_along(pages)) {
+    g <- pages[[i]]$content
+    if (inherits(g, "tfl_table_grob")) {
+      found_one <- TRUE
+      expect_true(is.environment(g$text_dim_cache))
+      expect_gt(length(ls(g$text_dim_cache, all.names = TRUE)), 0L)
+      ks <- ls(g$text_dim_cache, all.names = TRUE)
+      sample_val <- get(ks[[1L]], envir = g$text_dim_cache)
+      expect_named(sample_val, c("w", "h"))
+      break
+    }
+  }
+  expect_true(found_one, info = "no tfl_table_grob in returned pages")
+
+  # Preview-mode-equivalent: the dispatcher attaches an EMPTY env.
+  # Simulate that and verify drawing's lookups would miss.
+  empty_cache <- new.env(hash = TRUE, parent = emptyenv())
+  for (i in seq_along(pages)) {
+    if (inherits(pages[[i]]$content, "tfl_table_grob")) {
+      pages[[i]]$content$text_dim_cache <- empty_cache
+    }
+  }
+  for (i in seq_along(pages)) {
+    g <- pages[[i]]$content
+    if (inherits(g, "tfl_table_grob")) {
+      expect_equal(length(ls(g$text_dim_cache, all.names = TRUE)), 0L)
+    }
+  }
+})