diff --git a/CLAUDE.md b/CLAUDE.md index 5256b17..5eedad6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -263,18 +263,39 @@ draw_content <- function(content, vp, gp = gpar(), content_just = "left") { same `match.arg(., c("left", "right", "centre"))` pattern as `caption_just` / `footnote_just` and supports per-page override via `x$content_just`. -### Device lifecycle +### Device lifecycle (D-48) + +Every `export_tfl()` call opens exactly **one** PDF device, which covers +both pagination measurements and (in normal mode) the per-page draw loop. ```r -export_tfl <- function(...) { - # validate first, before opening device - pdf(file, width = pg_width, height = pg_height) - on.exit(dev.off(), add = TRUE) - # loop +export_tfl. <- function(x, file, pg_width, pg_height, + page_num, preview, ...) { + .validate_export_args(page_num, preview, file) + + # Normal mode: pdf(file). Preview mode: pdf(NULL). + md <- .open_metric_device(file, pg_width, pg_height, preview) + # on.exit(dev.off()) is registered on THIS frame by .open_metric_device(), + # so the device closes cleanly even on mid-pagination error. + + # ... call *_to_pagelist() with a `text_dim_cache` env that + # pagination populates and (in normal mode) drawing reuses ... + + # Preview: close the transient pdf(NULL) so user's device is active. + if (!isFALSE(preview)) .close_metric_device(md) + + # Drawing phase. Reuses the same device in normal mode. + .export_tfl_pages(..., pdf_already_open = TRUE) } ``` -`on.exit(dev.off(), add = TRUE)` ensures the device closes even if a page errors. +Internal measurement helpers (`compute_table_content_area`, +`.resolve_natural_widths`, `.run_pagination_iter`, +`.compute_col_min_widths`, `.compute_wrapped_widths`, +`.height_balance_widths`, `.gt_grob_height`, `.rtables_lpp_cpp`) **require** +an active device; a safety guard inside `.measure_text_dims_in()` +(`dev.cur() == 1L` → `rlang::abort()`) catches regressions that violate +this. ### Return value diff --git a/DESCRIPTION b/DESCRIPTION index d4ead32..60dd107 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,6 @@ License: AGPL-3 Encoding: UTF-8 Depends: R (>= 4.1.0) Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 Imports: checkmate, dplyr, @@ -42,3 +41,4 @@ Suggests: VignetteBuilder: knitr Config/testthat/edition: 3 URL: https://humanpred.github.io/writetfl/ +Config/roxygen2/version: 8.0.0 diff --git a/R/export_tfl.R b/R/export_tfl.R index ace8aae..4ad3d2e 100644 --- a/R/export_tfl.R +++ b/R/export_tfl.R @@ -140,8 +140,11 @@ export_tfl.default <- function( ) { dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) x <- coerce_x_to_pagelist(x) - .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } #' @export @@ -156,9 +159,46 @@ export_tfl.tfl_table <- function( ) { dots <- list(...) .validate_export_args(page_num, preview, file) + + # Open the metric device BEFORE pagination so measurement runs on the + # same device the drawing phase will use (normal mode) or a transient + # pdf(NULL) with matching settings (preview mode). The helper + # registers on.exit on THIS frame, so a mid-pagination or mid-drawing + # error still closes the device cleanly. + md <- .open_metric_device(file, pg_width, pg_height, preview) + + # Cross-phase text-dimension cache. Pagination populates it with + # (width, height) per (gp_key, string). In PDF mode (preview = FALSE), + # pagination and drawing share `md$dev`, so cached values are + # authoritative for the render pass without re-measurement. In preview + # mode, the user's render device differs from `md$dev`, so drawing + # gets a fresh empty cache and falls back to per-cell measurement -- + # preserving today's preview behaviour exactly. + pagination_cache <- new.env(hash = TRUE, parent = emptyenv()) x <- tfl_table_to_pagelist(x, pg_width = pg_width, pg_height = pg_height, - dots = dots, page_num = page_num) - .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots) + dots = dots, page_num = page_num, + text_dim_cache = pagination_cache) + + drawing_cache <- if (isFALSE(preview)) pagination_cache else + new.env(hash = TRUE, parent = emptyenv()) + + # Attach the drawing cache to every tfl_table grob in the pagelist so + # drawDetails can reach it. Loops are O(n_pages); each assignment is + # a reference copy, not a data copy. + for (i in seq_along(x)) { + if (inherits(x[[i]]$content, "tfl_table_grob")) { + x[[i]]$content$text_dim_cache <- drawing_cache + } + } + + # Preview mode: close the transient pagination device so the user's + # device is active for drawing. The on.exit guard installed by + # `.open_metric_device()` will see this device already closed (via + # `.close_metric_device`'s idempotency check) and no-op. + if (!isFALSE(preview)) .close_metric_device(md) + + .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } #' @export @@ -174,6 +214,8 @@ export_tfl.list <- function( dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) + # Check if this is a list of gt_tbl objects all_gt <- length(x) > 0L && all(vapply(x, inherits, logical(1L), "gt_tbl")) @@ -215,7 +257,9 @@ export_tfl.list <- function( } } } - .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } @@ -234,9 +278,78 @@ export_tfl.list <- function( invisible(NULL) } -# Render a list of page specs to PDF or the current device +# Open the metric device for an export_tfl() call. D-48 establishes +# that one device covers both pagination measurements and (in normal +# mode) drawing, rather than each measurement helper opening its own +# scratch device. +# +# Normal mode (`isFALSE(preview)`): opens `grDevices::pdf(file)` -- the +# final output PDF. Pagination uses it for `convertWidth` / `grobWidth` +# resolution; subsequent drawing reuses the same device. +# +# Preview mode: opens a transient `grDevices::pdf(NULL)` so pagination +# uses identical PDF font metrics to normal mode (preserving today's +# pagination decisions). The caller is responsible for invoking +# `.close_metric_device()` AFTER pagination so the user's pre-existing +# device becomes active again for drawing. +# +# Safety: +# * The helper registers an `on.exit()` handler on the CALLER's frame +# (`envir`) so any error during pagination or drawing still closes +# the device. Without that, an interrupted run would leak the +# device; running export_tfl() again would then open another and +# eventually exhaust the per-session limit of 64. +# * `.close_metric_device()` is idempotent: calling it explicitly in +# preview mode and then letting `on.exit` run is harmless because +# the second call sees a different `dev.cur()` and no-ops. +# +# @keywords internal +.open_metric_device <- function(file, pg_width, pg_height, preview, + envir = parent.frame()) { + if (isFALSE(preview)) { + grDevices::pdf(file, width = pg_width, height = pg_height) + } else { + grDevices::pdf(NULL, width = pg_width, height = pg_height) + } + dev <- grDevices::dev.cur() + md <- list(dev = dev) + # Register on.exit on the caller's frame so the device closes even + # if the caller errors out mid-execution. bquote inlines `dev` so + # the on.exit body does not need to reach back to `md`. + do.call("on.exit", + list(bquote({ + if (grDevices::dev.cur() == .(dev)) grDevices::dev.off() + }), add = TRUE), + envir = envir) + md +} + +# Close a metric device opened by `.open_metric_device()`. +# +# Idempotent: a second call (or a call when the device has already been +# closed by something else) is a no-op. Idempotency matters because +# preview-mode callers close explicitly after pagination AND register +# the same close via the helper's `on.exit` handler. +# +# @keywords internal +.close_metric_device <- function(md) { + if (!is.null(md$dev) && grDevices::dev.cur() == md$dev) { + grDevices::dev.off() + } + invisible(NULL) +} + +# Render a list of page specs to PDF or the current device. +# +# `pdf_already_open` signals that the CALLER has already opened the +# render device (via `.open_metric_device()`) and owns its lifecycle. +# In that case this function skips its own `pdf()` open / on.exit +# close and just iterates pages. When the caller does not pass the +# flag (e.g. `export_tfl.default()` for ggplot pages), the legacy +# self-open path is preserved. .export_tfl_pages <- function(pages, file, pg_width, pg_height, - page_num, preview, dots) { + page_num, preview, dots, + pdf_already_open = FALSE) { n <- length(pages) # ------------------------------------------------------------------ @@ -263,8 +376,10 @@ export_tfl.list <- function( # ------------------------------------------------------------------ # Normal mode: write PDF # ------------------------------------------------------------------ - grDevices::pdf(file, width = pg_width, height = pg_height) - on.exit(grDevices::dev.off(), add = TRUE) + if (!pdf_already_open) { + grDevices::pdf(file, width = pg_width, height = pg_height) + on.exit(grDevices::dev.off(), add = TRUE) + } for (i in seq_along(pages)) { page_args <- build_page_args(pages[[i]], dots, page_num, i, n) diff --git a/R/flextable.R b/R/flextable.R index c0b0d22..da5ffcf 100644 --- a/R/flextable.R +++ b/R/flextable.R @@ -25,8 +25,11 @@ export_tfl.flextable <- function( rlang::check_installed("flextable", reason = "to export flextable tables") dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) pages <- flextable_to_pagelist(x, pg_width, pg_height, dots, page_num) - .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } #' Convert a flextable object to a list of page specification lists diff --git a/R/ggtibble.R b/R/ggtibble.R index b34180f..4a3191f 100644 --- a/R/ggtibble.R +++ b/R/ggtibble.R @@ -20,10 +20,13 @@ export_tfl.ggtibble <- function( ) { dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) x <- ggtibble_to_pagelist(x, sub_tfl = sub_tfl, sub_tfl_sep = sub_tfl_sep, sub_tfl_collapse = sub_tfl_collapse, sub_tfl_prefix = sub_tfl_prefix) - .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(x, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } # Page-spec arg names recognised on a ggtibble row. diff --git a/R/gt.R b/R/gt.R index 3374c7f..1ec7327 100644 --- a/R/gt.R +++ b/R/gt.R @@ -24,8 +24,11 @@ export_tfl.gt_tbl <- function( rlang::check_installed("gt", reason = "to export gt tables") dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) pages <- gt_to_pagelist(x, pg_width, pg_height, dots, page_num) - .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } #' Convert a gt_tbl object to a list of page specification lists @@ -176,19 +179,20 @@ gt_to_pagelist <- function(gt_obj, pg_width = 11, pg_height = 8.5, dims$height } -#' Measure a gt grob's height in a scratch device +#' Measure a gt grob's height +#' +#' D-48: requires an active graphics device with matching page +#' dimensions; `export_tfl.gt_tbl()` opens the metric device via +#' `.open_metric_device()` before invoking the pagelist conversion +#' pipeline, so `convertHeight()` here resolves against that device's +#' font metrics. #' #' @param grob A gtable grob from [gt::as_gtable()]. -#' @param pg_width,pg_height Page dimensions for the scratch device. +#' @param pg_width,pg_height Page dimensions (advisory; the active +#' metric device's dimensions are what `convertHeight` uses). #' @return Numeric scalar: grob height in inches. #' @keywords internal .gt_grob_height <- function(grob, pg_width, pg_height) { - scratch <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch, width = pg_width, height = pg_height) - on.exit({ - grDevices::dev.off() - unlink(scratch) - }) grid::convertHeight(grid::grobHeight(grob), "inches", valueOnly = TRUE) } diff --git a/R/rtables.R b/R/rtables.R index 04339ff..5d8a70e 100644 --- a/R/rtables.R +++ b/R/rtables.R @@ -22,8 +22,11 @@ export_tfl.VTableTree <- function( rlang::check_installed("rtables", reason = "to export rtables tables") dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) pages <- rtables_to_pagelist(x, pg_width, pg_height, dots, page_num) - .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } #' Convert a VTableTree object to a list of page specification lists @@ -207,19 +210,17 @@ rtables_to_pagelist <- function(rt_obj, pg_width = 11, pg_height = 8.5, line_h_in <- (font_size / 72) * lineheight lpp <- floor(content_h / line_h_in) - # Character width: measure "M" in the target font using a scratch device - scratch <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch, width = 10, height = 10) - on.exit({ - grDevices::dev.off() - unlink(scratch) - }) + # Character width: measure "M" in the target font. D-48: relies on + # the metric device opened upstream by `.open_metric_device()` + # rather than opening a scratch PDF here. The viewport is pushed + # and popped on exit so an error mid-measurement does not leave the + # font-context viewport on the stack. grid::pushViewport(grid::viewport( gp = grid::gpar(fontfamily = font_family, fontsize = font_size) )) + on.exit(grid::popViewport(), add = TRUE) char_w_in <- grid::convertWidth(grid::stringWidth("M"), "inches", valueOnly = TRUE) - grid::popViewport() cpp <- floor(content_w / char_w_in) diff --git a/R/table1.R b/R/table1.R index ad45ed5..685e1e4 100644 --- a/R/table1.R +++ b/R/table1.R @@ -21,8 +21,11 @@ export_tfl.table1 <- function( rlang::check_installed("flextable", reason = "to export table1 tables") dots <- list(...) .validate_export_args(page_num, preview, file) + md <- .open_metric_device(file, pg_width, pg_height, preview) pages <- table1_to_pagelist(x, pg_width, pg_height, dots, page_num) - .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots) + if (!isFALSE(preview)) .close_metric_device(md) + .export_tfl_pages(pages, file, pg_width, pg_height, page_num, preview, dots, + pdf_already_open = TRUE) } #' Convert a table1 object to a list of page specification lists diff --git a/R/table_columns.R b/R/table_columns.R index 80cadef..09ca272 100644 --- a/R/table_columns.R +++ b/R/table_columns.R @@ -96,14 +96,16 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins, overflow_action = c("error", "warn"), validate_overflow = TRUE, - floor_overrides = NULL) { + floor_overrides = NULL, + cache = NULL) { overflow_action <- match.arg(overflow_action) strategy <- tbl$col_split_strategy %||% "balanced" # Shared setup: compute natural widths, resolve relative weights, # auto-detect wrap eligibility, and measure col_cont_label_half_w. setup <- .resolve_natural_widths( - resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins + resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins, + cache = cache ) # Dispatch. Each strategy returns list(resolved_cols, col_groups). @@ -162,7 +164,8 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, # The scratch device used for text measurement is opened, used, and # closed inside this function so neither strategy has to manage it. .resolve_natural_widths <- function(resolved_cols, data, content_width_in, - tbl, pg_width, pg_height, margins) { + tbl, pg_width, pg_height, margins, + cache = NULL) { n_cols <- length(resolved_cols) n_grp <- length(tbl$group_vars) min_in <- .width_in(tbl$min_col_width) @@ -171,16 +174,16 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, .width_in(cell_pad[["left"]]) na_str <- tbl$na_string max_rows <- tbl$max_measure_rows + hdr_gp_key <- paste0("header_row_lh", tbl$line_height) - scratch_file <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch_file, width = pg_width, height = pg_height) + # D-48: relies on the metric device opened upstream by + # `.open_metric_device()` rather than opening a scratch PDF here. + # The outer viewport is still pushed so width conversions resolve + # against the post-margin content area; popped on exit so an error + # inside the measurement loop does not leave it on the stack. outer_vp <- .make_outer_vp(margins) grid::pushViewport(outer_vp) - on.exit({ - grid::popViewport() # nocov - grDevices::dev.off() # nocov - unlink(scratch_file) # nocov - }, add = TRUE) + on.exit(grid::popViewport(), add = TRUE) # nocov widths_in <- vapply(seq_len(n_cols), function(j) { cs <- resolved_cols[[j]] @@ -198,8 +201,12 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, .resolve_table_gp(tbl$gp, "header_row"), tbl$line_height ) parts <- .split_col_strings(data[[cs$col]], cs$label, na_str, max_rows) - w_data <- .measure_max_string_width(parts$data, cell_gp) - w_hdr <- .measure_max_string_width(parts$header, hdr_gp) + cell_key <- paste0(if (cs$is_group_col) "group_col" else "data_row", + "_lh", tbl$line_height) + w_data <- .measure_max_string_width(parts$data, cell_gp, + gp_key = cell_key, cache = cache) + w_hdr <- .measure_max_string_width(parts$header, hdr_gp, + gp_key = hdr_gp_key, cache = cache) max(min_in, max(w_data, w_hdr) + h_pad_in) } }, numeric(1L)) @@ -213,11 +220,11 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, 0 } - # Close the scratch device now so strategy functions can open their own - # without nested-device complications. + # Pop the outer viewport now so strategy functions push their own + # viewports on a clean stack. No device to close anymore -- the + # metric device opened upstream by `.open_metric_device()` stays + # open for subsequent measurement work. grid::popViewport() - grDevices::dev.off() - unlink(scratch_file) on.exit(NULL) # Resolve relative weights. diff --git a/R/table_draw.R b/R/table_draw.R index 197c78a..4498816 100644 --- a/R/table_draw.R +++ b/R/table_draw.R @@ -66,7 +66,8 @@ build_table_grob <- function(row_page, col_group_idx, n_group_cols, cell_heights_in_mat = NULL, cont_row_h_in = NULL, is_first_col_page = TRUE, - is_last_col_page = TRUE) { + is_last_col_page = TRUE, + clip_width_caches = NULL) { # Subset to display columns for this page page_cols <- resolved_cols[col_group_idx] @@ -81,6 +82,11 @@ build_table_grob <- function(row_page, col_group_idx, n_group_cols, cont_row_h_in = cont_row_h_in, # cached from paginate phase is_first_col_page = is_first_col_page, # FALSE when prior col pages exist is_last_col_page = is_last_col_page, # FALSE when more col pages follow + clip_width_caches = clip_width_caches, # shared across all pages of + # this tfl_table; NULL means + # drawDetails will create + # per-page envs (e.g. grobs + # assembled outside pipeline) cl = "tfl_table_grob" ) } @@ -242,6 +248,17 @@ drawDetails.tfl_table_grob <- function(x, recording) { y_cursor <- 0 # distance from top in inches + # Cross-phase text-dim cache attached by export_tfl.tfl_table(). In + # PDF mode it is already populated by pagination with both width and + # height for every cell; drawing's width lookups become pure env + # reads. In preview mode the supplied cache is an empty env, so + # lookups miss and .draw_cell_text() falls through to the per-column + # width_cache (D-46) and fresh measurement. When grobs are built + # outside the normal export_tfl() pipeline (no cache attached), + # NULL means "skip the cross-phase lookup entirely". Hoisted above + # the header-row block so both header and data-row paths use it. + text_dim_cache <- x$text_dim_cache + # Draw column header row if (tbl$show_col_names) { # Header row background fill @@ -261,7 +278,8 @@ drawDetails.tfl_table_grob <- function(x, recording) { .draw_header_row(page_cols, col_x_left, col_x_right, col_widths_in, y_cursor, header_row_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh, - breaks = breaks) + breaks = breaks, + text_dim_cache = text_dim_cache) y_cursor <- y_cursor + header_row_h # Column header rule — spans table width only @@ -299,13 +317,36 @@ drawDetails.tfl_table_grob <- function(x, recording) { .gp_with_lineheight(.resolve_table_cell_gp(gp_tbl, cs$is_group_col), lh) }) + # Per-column gp_key matching what the pagination phase used in + # text_dim_cache (table_utils.R `.measure_text_dims_in` namespace). + # Lets .draw_cell_text() look up pagination-cached widths without + # hashing the gpar field-by-field. + gp_key_by_col <- vapply(page_cols, function(cs) { + paste0(if (cs$is_group_col) "group_col" else "data_row", "_lh", lh) + }, character(1L)) + # Per-column clip-width memo: many cells in a column share identical text - # (numeric formats like "5.1", category labels), and the clip-width - # computation otherwise re-measures each one. Cache scoped to this - # drawDetails call and to one column, so a single (text -> width) key is - # enough. - clip_width_cache_by_col <- lapply(page_cols, function(cs) { - new.env(hash = TRUE, parent = emptyenv()) + # (numeric formats like "5.1", category labels). When the parent + # tfl_table_to_pagelist() built a shared cache list, reuse those envs -- + # they persist across every row-page and col-group of this table, so the + # cache hits accumulate over all pages. Otherwise fall back to per-page + # envs (e.g. for grobs built outside the normal pipeline). + # + # Kept as a second-tier cache (after text_dim_cache) so that strings + # produced by .wrap_string() at draw time -- which pagination never saw + # in their wrapped form -- still get cached per column for repeats + # across rows / pages. + clip_width_cache_by_col <- if (!is.null(x$clip_width_caches)) { + lapply(x$col_group_idx, function(k) x$clip_width_caches[[k]]) + } else { + lapply(page_cols, function(cs) new.env(hash = TRUE, parent = emptyenv())) + } + + # Pre-extract and pre-format each column's cell strings for the rows on + # this page. Replaces a per-cell `.fmt_cell(data[[cs$col]][i], na_str)` + # with a single vectorised `.fmt_cell_vec()` per column. + cell_strs_by_col <- lapply(page_cols, function(cs) { + .fmt_cell_vec(data[[cs$col]][rows], na_str) }) # Hoist row/group-rule gpars too -- they don't change between rows. Only @@ -358,9 +399,8 @@ drawDetails.tfl_table_grob <- function(x, recording) { # Draw data row for (j in seq_len(n_disp_cols)) { - cs <- page_cols[[j]] - raw_val <- data[[cs$col]][i] - cell_str <- .fmt_cell(raw_val, na_str) + cs <- page_cols[[j]] + cell_str <- cell_strs_by_col[[j]][[ri]] # Group repeat suppression and span detection clip_h <- row_h @@ -399,7 +439,9 @@ drawDetails.tfl_table_grob <- function(x, recording) { y_cursor, clip_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, cell_gp, cs$width_in, - width_cache = clip_width_cache_by_col[[j]]) + width_cache = clip_width_cache_by_col[[j]], + text_dim_cache = text_dim_cache, + gp_key = gp_key_by_col[[j]]) } y_cursor <- y_cursor + row_h @@ -505,8 +547,13 @@ drawDetails.tfl_table_grob <- function(x, recording) { .draw_header_row <- function(page_cols, col_x_left, col_x_right, col_widths_in, y_top_in, row_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh, - breaks = NULL) { + breaks = NULL, + text_dim_cache = NULL) { hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), lh) + # Single gp_key for the whole header row -- all cells share hdr_gp, so + # pagination's `.measure_header_row_height()` populated text_dim_cache + # under this same key (table_utils.R:99). + hdr_gp_key <- paste0("header_row_lh", lh) for (j in seq_along(page_cols)) { cs <- page_cols[[j]] label <- cs$label @@ -518,7 +565,9 @@ drawDetails.tfl_table_grob <- function(x, recording) { col_x_left[[j]], col_x_right[[j]], y_top_in, row_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, - hdr_gp, cs$width_in) + hdr_gp, cs$width_in, + text_dim_cache = text_dim_cache, + gp_key = hdr_gp_key) } } @@ -551,10 +600,25 @@ drawDetails.tfl_table_grob <- function(x, recording) { } # Draw a single cell's text +# +# `text_dim_cache` (the optional cross-phase cache populated during +# pagination) is consulted FIRST under the `gp_key` namespace. D-47 +# established that PDF scratch devices and the render PDF share font +# metrics for matching `(pg_width, pg_height)`, so cached widths are +# authoritative. Misses fall through to the per-column `width_cache` +# (D-46) and finally to a fresh measurement via `.measure_text_width_in`. +# +# Preview mode is preserved exactly: `export_tfl.tfl_table()` attaches +# an empty `text_dim_cache` to grobs in preview mode, so every lookup +# misses and the function falls through to per-cell measurement on the +# user's render device (the pre-D-48 path). .draw_cell_text <- function(text, align, x_left, x_right, y_top_in, row_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, - gp, col_width_in, width_cache = NULL) { + gp, col_width_in, + width_cache = NULL, + text_dim_cache = NULL, + gp_key = NULL) { if (nchar(text) == 0L) return(invisible(NULL)) y_npc <- 1 - (y_top_in + v_top_in) / vp_h @@ -571,29 +635,58 @@ drawDetails.tfl_table_grob <- function(x, recording) { just <- c("centre", "top") } - # Re-measure text width in the current (rendering) device using the - # actual rendering gpar (grid::stringWidth() picks up only the active - # viewport's gp, which is wrong when `gp` is, e.g., a bold header - # gpar and the active vp is regular weight). This corrects font-metric - # variance between the PDF scratch device used for column-width - # measurement and the device used for actual rendering (e.g. a PNG - # device in knitr / RStudio preview mode). + # Width lookup: text_dim_cache (cross-phase, populated by pagination) + # -> width_cache (per-column, D-46) -> fresh measurement. # - # Important: cap the clip width at a small tolerance past `col_width_in` - # so a column that is genuinely too narrow for its content (user set a - # fixed width below the longest unbreakable token, or a bold header - # whose measured width exceeded the regular-weight column-width pass) - # cannot bleed text into the neighboring column and hide its content. - # Anything past the tolerance gets visually clipped at the column edge, - # which is a far less destructive failure mode than overlap. - # Measurement is identical for repeated cell text within one drawDetails - # call; the optional per-column cache lets the caller deduplicate. - text_w <- .measure_text_width_in(text, gp, width_cache) + # Cap the clip width at a small tolerance past `col_width_in` so a + # column genuinely too narrow for its content cannot bleed text into + # the neighboring column and hide its content. Anything past the + # tolerance gets visually clipped at the column edge -- a far less + # destructive failure mode than overlap. + text_w <- NA_real_ + if (!is.null(text_dim_cache) && !is.null(gp_key)) { + key <- paste0(gp_key, "\x01", text) + if (exists(key, envir = text_dim_cache, inherits = FALSE)) { + hit <- get(key, envir = text_dim_cache, inherits = FALSE) + text_w <- hit$w + } + } + if (is.na(text_w)) { + text_w <- .measure_text_width_in(text, gp, width_cache) + } needed <- text_w + h_lft_in + h_rgt_in - bleed_tol_in <- 0.05 - clip_w <- min(col_width_in + bleed_tol_in, max(col_width_in, needed)) - # Clip to column width by using a clipping viewport + # X position in parent-viewport inches. + x_in <- if (identical(align, "left")) { + x_left + h_lft_in + } else if (identical(align, "right")) { + x_right - h_rgt_in + } else { + (x_left + x_right) / 2 + } + y_in <- y_top_in + v_top_in + + if (needed <= col_width_in) { + # Fast path: the text fits inside its column, so the column width + # already clips by construction. Draw directly into the parent + # viewport and skip the per-cell clip viewport push/pop entirely -- + # for tables of all-fits cells (numeric columns, short categoricals) + # this saves a viewport per cell. + grid::grid.text( + label = text, + x = grid::unit(x_in / vp_w, "npc"), + y = grid::unit(1 - y_in / vp_h, "npc"), + just = just, + gp = gp + ) + return(invisible(NULL)) + } + + # Slow path: text exceeds the column. Push a clipping viewport so the + # overflow is clipped at the column edge plus a small tolerance, + # instead of bleeding into the neighbour. + bleed_tol_in <- 0.05 + clip_w <- min(col_width_in + bleed_tol_in, needed) vp_clip <- grid::viewport( x = grid::unit(x_left / vp_w, "npc"), y = grid::unit(1 - (y_top_in + row_h) / vp_h, "npc"), @@ -604,18 +697,17 @@ drawDetails.tfl_table_grob <- function(x, recording) { ) grid::pushViewport(vp_clip) - # Re-express x, y relative to clip viewport - vp_w2 <- .width_in(grid::unit(1, "npc")) - vp_h2 <- .height_in(grid::unit(1, "npc")) - + # vp_clip was just constructed with width = clip_w inches and + # height = row_h inches, so npc=1 inside it is exactly that many inches -- + # no need to convertUnit() to recover those numbers. x_local_in <- if (identical(align, "left")) { h_lft_in } else if (identical(align, "right")) { - vp_w2 - h_rgt_in + clip_w - h_rgt_in } else { - vp_w2 / 2 + clip_w / 2 } - y_local_in <- vp_h2 - v_top_in + y_local_in <- row_h - v_top_in grid::grid.text( label = text, diff --git a/R/table_pagelist.R b/R/table_pagelist.R index bb2583f..0da1437 100644 --- a/R/table_pagelist.R +++ b/R/table_pagelist.R @@ -35,14 +35,24 @@ #' @param pg_width,pg_height Page dimensions in inches. #' @param dots The `list(...)` from [export_tfl()]. #' @param page_num Glue template string for page numbering. +#' @param text_dim_cache Optional environment used as the pagination-phase +#' text-dimension cache. When supplied, the same env is reused instead +#' of allocating one locally, so the caller (`export_tfl()`) can later +#' reuse its entries during the drawing phase by attaching the env to +#' the table grobs. When `NULL` (the default), a fresh env is +#' allocated and discarded after pagination completes -- equivalent to +#' the pre-D-48 behaviour. #' @return A list of page spec lists, each with at least `$content` (a grob). #' @keywords internal tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, - page_num = "Page {i} of {n}") { + page_num = "Page {i} of {n}", + text_dim_cache = NULL) { if (is.null(tbl$sub_tfl)) { - .tfl_table_to_pagelist_default(tbl, pg_width, pg_height, dots, page_num) + .tfl_table_to_pagelist_default(tbl, pg_width, pg_height, dots, page_num, + text_dim_cache = text_dim_cache) } else { - .tfl_table_to_pagelist_sub_tfl(tbl, pg_width, pg_height, dots, page_num) + .tfl_table_to_pagelist_sub_tfl(tbl, pg_width, pg_height, dots, page_num, + text_dim_cache = text_dim_cache) } } @@ -53,7 +63,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, # measurement pipeline; recursion handles that naturally. #' @keywords internal .tfl_table_to_pagelist_sub_tfl <- function(tbl, pg_width, pg_height, dots, - page_num) { + page_num, + text_dim_cache = NULL) { groups <- .compute_sub_tfl_groups(tbl$data, tbl$sub_tfl) pages <- list() for (g in groups) { @@ -69,8 +80,12 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, sub_dots$caption <- .apply_sub_tfl_caption(dots$caption, suffix, tbl$sub_tfl_prefix) + # Share the same cache across all sub-groups: identical (gp_key, string) + # pairs appear in every sub-table for repeating categoricals and the + # cache keeps adding entries instead of being thrown away per group. sub_pages <- tfl_table_to_pagelist(sub_tbl, pg_width, pg_height, - sub_dots, page_num) + sub_dots, page_num, + text_dim_cache = text_dim_cache) sub_pages <- lapply(sub_pages, .attach_page_caption, caption = sub_dots$caption) pages <- c(pages, sub_pages) @@ -88,7 +103,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, #' @keywords internal .tfl_table_to_pagelist_default <- function(tbl, pg_width, pg_height, dots, - page_num) { + page_num, + text_dim_cache = NULL) { # --- Step 1: Extract layout args from dots --- # Use explicit NULL checks instead of %||% for arguments that can legitimately @@ -150,6 +166,23 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) { .height_in(tbl$wrap_extra_padding) } else 0 + + # Per-pagination text-dimension cache. Spans the natural-width pass, + # the row-height pass, and the header-height pass. Each (gp_key, string) + # is constructed via grid::textGrob exactly once; both dimensions are read + # from that grob and cached together. Different passes typically need + # only one dimension, so the cache amortises the other for free. All + # those passes open PDF scratch devices with the same pg_width/pg_height + # so font metrics are stable across the cache's lifetime. + # + # When `export_tfl()` supplies a cache, the same env is reused so its + # entries survive past pagination and become available to the drawing + # phase via the grob's $text_dim_cache slot. D-47 documented this as + # safe across PDF scratch devices with matching settings; D-48 extends + # the argument to span the render device. + if (is.null(text_dim_cache)) { + text_dim_cache <- new.env(hash = TRUE, parent = emptyenv()) + } strategy <- tbl$col_split_strategy %||% "balanced" max_retries <- as.integer(tbl$row_overflow_max_retries %||% 5L) use_retry_loop <- identical(strategy, "balanced") && max_retries > 0L @@ -158,34 +191,30 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, names(floor_overrides) <- character(0L) retries <- 0L - # Per-iteration helper: opens a fresh row-height scratch device, runs - # the measurement + pagination phase, closes the scratch device, and - # returns (row_pages, cell_h_mat, cont_row_h). The scratch device's - # lifecycle is fully contained inside this helper so the surrounding - # retry loop never holds a viewport across a compute_col_widths() - # call (compute_col_widths() opens its own scratch devices internally). + # Per-iteration helper: runs the row-height measurement + pagination + # phase under a fresh outer viewport. D-48: relies on the metric + # device opened by `.open_metric_device()` upstream rather than + # opening its own scratch PDF. Returns (row_pages, cell_h_mat, + # cont_row_h). The viewport is popped on exit so an error inside + # paginate_rows() does not leave it on the stack. .run_pagination_iter <- function(resolved_cols, collect_overflows) { - scratch_file_rh <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch_file_rh, width = pg_width, height = pg_height) rh_outer_vp <- .make_outer_vp(margins) grid::pushViewport(rh_outer_vp) - on.exit({ - grid::popViewport() - grDevices::dev.off() - unlink(scratch_file_rh) - }, add = TRUE) + on.exit(grid::popViewport(), add = TRUE) header_row_h <- if (tbl$show_col_names) { .measure_header_row_height(resolved_cols, tbl$gp, tbl$cell_padding, tbl$line_height, breaks = breaks, - wrap_extra_pad_in = wrap_extra_pad_in) + wrap_extra_pad_in = wrap_extra_pad_in, + cache = text_dim_cache) } else 0 cell_h_mat <- measure_row_heights_tbl( tbl$data, resolved_cols, tbl$gp, tbl$cell_padding, tbl$na_string, tbl$line_height, tbl$max_measure_rows, breaks = breaks, - wrap_extra_pad_in = wrap_extra_pad_in + wrap_extra_pad_in = wrap_extra_pad_in, + cache = text_dim_cache ) cont_row_h <- max( @@ -217,7 +246,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, col_result <- compute_col_widths( resolved_cols_0, tbl$data, cw, tbl, pg_width, pg_height, margins, overflow_action = overflow_action, - floor_overrides = floor_overrides + floor_overrides = floor_overrides, + cache = text_dim_cache ) resolved_cols <- col_result$resolved_cols col_groups <- col_result$col_groups @@ -239,7 +269,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, resolved_cols_0, tbl$data, cw_adj, tbl, pg_width, pg_height, margins, overflow_action = overflow_action, validate_overflow = FALSE, - floor_overrides = floor_overrides + floor_overrides = floor_overrides, + cache = text_dim_cache ) resolved_cols <- col_result$resolved_cols col_groups <- col_result$col_groups @@ -287,6 +318,15 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, pages <- vector("list", n_rp * n_cg) idx <- 1L + # Shared per-resolved_cols clip-width cache. Every page-grob built below + # holds a reference to the SAME list of envs, so .draw_cell_text() can + # reuse measurements across pages of the same tfl_table (one entry per + # unique cell text per column). Envs are reference-typed, so memory is + # not duplicated per grob. + clip_width_caches <- lapply(seq_along(resolved_cols), function(k) { + new.env(hash = TRUE, parent = emptyenv()) + }) + for (rp in seq_len(n_rp)) { for (cg in seq_len(n_cg)) { grob <- build_table_grob( @@ -298,7 +338,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, cell_heights_in_mat = cell_h_mat, cont_row_h_in = cont_row_h, is_first_col_page = (cg == 1L), - is_last_col_page = (cg == n_cg) + is_last_col_page = (cg == n_cg), + clip_width_caches = clip_width_caches ) page_spec <- list(content = grob) pages[[idx]] <- page_spec @@ -315,19 +356,23 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, #' Compute available content area for a tfl_table page #' -#' Opens a scratch device, measures annotation section heights using the -#' same infrastructure as export_tfl_page(), and returns available width and -#' height in inches. +#' Measures annotation section heights using the same infrastructure as +#' export_tfl_page() and returns available width and height in inches. +#' +#' D-48: requires an active graphics device with matching page +#' dimensions; the caller (`.tfl_table_to_pagelist_default()`) runs +#' inside the metric device opened by `.open_metric_device()` in the +#' S3 dispatcher, so font metrics here equal those used at draw time +#' (normal mode) or those of the same pdf(NULL) used for the rest of +#' pagination (preview mode). #' #' @keywords internal compute_table_content_area <- function(pg_width, pg_height, margins, padding, header_rule, footer_rule, annot, gp_page, cap_just, fn_just) { - grDevices::pdf(NULL, width = pg_width, height = pg_height) - on.exit(grDevices::dev.off(), add = TRUE) - outer_vp <- .make_outer_vp(margins) grid::pushViewport(outer_vp) + on.exit(grid::popViewport(), add = TRUE) vp_w <- .width_in(grid::unit(1, "npc")) vp_h <- .height_in(grid::unit(1, "npc")) @@ -375,7 +420,8 @@ compute_table_content_area <- function(pg_width, pg_height, margins, padding, used_h <- heights$header + heights$caption + heights$footnote + heights$footer avail_h <- vp_h - used_h - n_gaps * pad_in - grid::popViewport() + # popViewport() runs from on.exit so an error mid-measurement does + # not leave the outer_vp on the stack. list(width = vp_w, height = max(avail_h, 0)) } diff --git a/R/table_rows.R b/R/table_rows.R index 6af93e1..7c71d23 100644 --- a/R/table_rows.R +++ b/R/table_rows.R @@ -26,7 +26,8 @@ #' @keywords internal measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, na_string, line_height, max_measure_rows, - breaks = NULL, wrap_extra_pad_in = 0) { + breaks = NULL, wrap_extra_pad_in = 0, + cache = NULL) { n_rows <- nrow(data) n_cols <- length(resolved_cols) v_pad_in <- .height_in(cell_padding[["top"]]) + @@ -34,17 +35,12 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, h_lft_in <- .width_in(cell_padding[["left"]]) h_rgt_in <- .width_in(cell_padding[["right"]]) - # Memoised per-cell-text-height function: (string, gp_key) -> height_in - memo <- new.env(hash = TRUE, parent = emptyenv()) - .memo_str_height <- function(s, gp_key, gp) { - key <- paste0(gp_key, "\x01", s) - if (!exists(key, envir = memo, inherits = FALSE)) { - grob <- grid::textGrob(s, gp = gp) - h <- .height_in(grid::grobHeight(grob)) - assign(key, h, envir = memo) - } - get(key, envir = memo, inherits = FALSE) - } + # When the caller supplied a shared cache, reuse it across columns so that + # any (string, gp) measured during the natural-width pass earlier in + # pagination is served from the cache here. Otherwise create a local + # height-only memo (preserves the pre-cache behaviour for the few callers + # that exercise this function directly). + if (is.null(cache)) cache <- new.env(hash = TRUE, parent = emptyenv()) # Limit rows sampled for height estimation sample_rows <- if (is.finite(max_measure_rows) && n_rows > max_measure_rows) { @@ -83,7 +79,7 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, cell_str } nlines <- max(1L, length(strsplit(display_str, "\n", fixed = TRUE)[[1L]])) - h_grob <- .memo_str_height(display_str, gp_key, cell_gp) + h_grob <- .measure_text_dims_in(display_str, cell_gp, gp_key, cache)$h h_line <- nlines * .height_in(grid::stringHeight("M")) extra <- if (nlines > 1L) wrap_extra_pad_in else 0 cell_h_mat[i, j] <- max(h_grob, h_line) + v_pad_in + extra diff --git a/R/table_utils.R b/R/table_utils.R index b81b17c..d846fad 100644 --- a/R/table_utils.R +++ b/R/table_utils.R @@ -52,13 +52,15 @@ # between the header row and the first data row is more obvious. .measure_header_row_height <- function(resolved_cols, gp_tbl, cell_padding, line_height, breaks = NULL, - wrap_extra_pad_in = 0) { + wrap_extra_pad_in = 0, + cache = NULL) { v_pad_in <- .height_in(cell_padding[["top"]]) + .height_in(cell_padding[["bottom"]]) h_lft_in <- .width_in(cell_padding[["left"]]) h_rgt_in <- .width_in(cell_padding[["right"]]) hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), line_height) + gp_key <- paste0("header_row_lh", line_height) max(vapply(resolved_cols, function(cs) { label <- cs$label @@ -67,8 +69,7 @@ h_lft_in + h_rgt_in, hdr_gp, breaks) } nlines <- max(1L, length(strsplit(label, "\n", fixed = TRUE)[[1L]])) - grob <- grid::textGrob(label, gp = hdr_gp) - h_grob <- .height_in(grid::grobHeight(grob)) + h_grob <- .measure_text_dims_in(label, hdr_gp, gp_key, cache)$h h_line <- nlines * .height_in(grid::stringHeight("M")) extra <- if (nlines > 1L) wrap_extra_pad_in else 0 max(h_grob, h_line) + extra @@ -255,15 +256,70 @@ # Text measurement helpers # --------------------------------------------------------------------------- +# Look up or measure (width, height) for `s` under `gp`, caching both when +# an environment is supplied. `gp_key` is a stable structural key for `gp` +# (e.g. paste0("data_row_lh", line_height)) so callers using the same gp +# share entries without hashing the gpar field-by-field per lookup. When +# `gp_key` is NULL the cache key is just the string -- appropriate for +# caches whose entries are all measured under one gp (the caller owns +# that invariant). +# +# When the cache misses we build the textGrob once and read BOTH dimensions +# from it -- consolidating what would otherwise be two separate textGrob +# constructions if a later caller needs the other dimension of the same +# (gp, string). Each construction re-runs grid's gpar validation, which +# is the dominant cost; avoiding the duplicate is the point. +.measure_text_dims_in <- function(s, gp, gp_key = NULL, cache = NULL) { + if (!nzchar(s)) return(list(w = 0, h = 0)) + if (!is.null(cache)) { + key <- if (is.null(gp_key)) s else paste0(gp_key, "\x01", s) + if (exists(key, envir = cache, inherits = FALSE)) { + return(get(key, envir = cache, inherits = FALSE)) + } + } + # D-48 safety guard. After Phases 1/2 every internal caller runs + # inside the metric device opened by `.open_metric_device()`. A + # future regression that forgets to open one would produce + # confusing downstream errors (convertWidth/convertHeight against + # the null device returns 0 or NA depending on grid version); + # fail fast here with a clear message instead. Skipped on cache + # hits, so the cost is paid only on the slow path. + if (grDevices::dev.cur() == 1L) { + rlang::abort(paste0( + "Internal: .measure_text_dims_in() requires an active graphics ", + "device. This is a bug in writetfl -- the caller should be ", + "invoked under `.open_metric_device()`." + )) + } + g <- grid::textGrob(s, gp = gp) + out <- list(w = .width_in(grid::grobWidth(g)), + h = .height_in(grid::grobHeight(g))) + if (!is.null(cache)) assign(key, out, envir = cache) + out +} + # Measure the maximum rendered text width (in inches) for a vector of strings. # Uses textGrob rather than stringWidth() because stringWidth() does not # accept a gp argument in all grid versions. -.measure_max_string_width <- function(strings, gp) { +# +# When a `cache` env and `gp_key` are supplied, lookups go through the +# consolidated (string -> (w, h)) cache from `.measure_text_dims_in()` so a +# later height query for the same (string, gp) reuses the textGrob. +.measure_max_string_width <- function(strings, gp, gp_key = NULL, + cache = NULL) { if (length(strings) == 0L) return(0) # Dedupe up front: real-world callers pass cell-string vectors where the # same value typically appears in many rows (e.g. category labels, NA # strings), so this saves grid round-trips with no behaviour change. uniq <- unique(strings) + if (!is.null(cache) && !is.null(gp_key)) { + return(max(vapply(uniq, function(s) { + lines <- strsplit(s, "\n", fixed = TRUE)[[1L]] + max(vapply(lines, + function(ln) .measure_text_dims_in(ln, gp, gp_key, cache)$w, + numeric(1L))) + }, numeric(1L)))) + } max(vapply(uniq, function(s) { lines <- strsplit(s, "\n", fixed = TRUE)[[1L]] max(vapply(lines, function(ln) { diff --git a/R/wrap.R b/R/wrap.R index 7fd9acf..7204514 100644 --- a/R/wrap.R +++ b/R/wrap.R @@ -112,21 +112,25 @@ wrap_breaks_default <- function() { n <- length(chars) if (n == 0L) return(list()) - tokens <- vector("list", n) # over-allocate; trim at end - k <- 0L - cur_buf <- character(n) - cur_n <- 0L - pending <- "" # lead for the next emitted token + # Track each token by its [start, end] position in `s` rather than + # accumulating characters and pasting on flush. `substr(s, st, ed)` is + # one C call per token; the previous `paste(cur_buf[seq_len(cur_n)], + # collapse = "")` allocated and joined cur_n elements per token. + tokens <- vector("list", n) # over-allocate; trim at end + k <- 0L + cur_start <- 0L # 0 means "no token in progress" + cur_end <- 0L + pending <- "" flush <- function() { - if (cur_n > 0L) { + if (cur_start > 0L) { k <<- k + 1L tokens[[k]] <<- list( - text = paste(cur_buf[seq_len(cur_n)], collapse = ""), + text = substr(s, cur_start, cur_end), lead = pending ) - cur_n <<- 0L - pending <<- "" + cur_start <<- 0L + pending <<- "" } } @@ -136,13 +140,13 @@ wrap_breaks_default <- function() { flush() pending <- ch } else if (length(keep_chars) > 0L && ch %in% keep_chars) { - cur_n <- cur_n + 1L - cur_buf[cur_n] <- ch + if (cur_start == 0L) cur_start <- i + cur_end <- i flush() pending <- "" } else { - cur_n <- cur_n + 1L - cur_buf[cur_n] <- ch + if (cur_start == 0L) cur_start <- i + cur_end <- i } } flush() @@ -159,6 +163,14 @@ wrap_breaks_default <- function() { # `cache`, if supplied, is an environment used as a (string -> width) memo # scoped to the caller's lifetime. The caller is responsible for ensuring # every cache entry was measured under the same `gp`. +# +# This helper is in a hot loop (wrap module re-measures candidate strings +# many times per cell). An earlier attempt to delegate through +# `.measure_text_dims_in()` for a unified cache cost ~20-30% on +# wrap_heavy / big_df / preview_iris -- the double function call plus +# list-wrapping per cache hit was unaffordable here. The two helpers +# share a textGrob construction strategy but stay separate functions for +# the inner loop's benefit. .measure_text_width_in <- function(s, gp, cache = NULL) { if (!nzchar(s)) return(0) if (!is.null(cache) && exists(s, envir = cache, inherits = FALSE)) { @@ -330,15 +342,11 @@ wrap_breaks_default <- function() { na_str <- tbl$na_string max_rows <- tbl$max_measure_rows - scratch_file <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch_file, width = pg_width, height = pg_height) + # D-48: relies on the metric device opened upstream by + # `.open_metric_device()` rather than opening a scratch PDF here. outer_vp <- .make_outer_vp(margins) grid::pushViewport(outer_vp) - on.exit({ - grid::popViewport() - grDevices::dev.off() - unlink(scratch_file) - }, add = TRUE) + on.exit(grid::popViewport(), add = TRUE) vapply(seq_len(n), function(j) { cs <- resolved_cols[[j]] @@ -521,15 +529,11 @@ wrap_breaks_default <- function() { na_str <- tbl$na_string max_rows <- tbl$max_measure_rows - scratch_file <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch_file, width = pg_width, height = pg_height) + # D-48: relies on the metric device opened upstream by + # `.open_metric_device()` rather than opening a scratch PDF here. outer_vp <- .make_outer_vp(margins) grid::pushViewport(outer_vp) - on.exit({ - grid::popViewport() - grDevices::dev.off() - unlink(scratch_file) - }, add = TRUE) + on.exit(grid::popViewport(), add = TRUE) # Compute per-column floors (only meaningful for wrap-eligible cols). # Headers are rendered with the header_row gpar (typically bold) and data @@ -641,17 +645,13 @@ wrap_breaks_default <- function() { line_height <- tbl$line_height %||% 1.05 min_in <- .width_in(tbl$min_col_width) - # Open scratch device once. Closing happens via on.exit so it runs even - # under tryCatch failure inside the search loop. - scratch_file <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch_file, width = pg_width, height = pg_height) + # D-48: relies on the metric device opened upstream by + # `.open_metric_device()` rather than opening a scratch PDF here. + # outer_vp pop happens via on.exit so it runs even under tryCatch + # failure inside the search loop. outer_vp <- .make_outer_vp(margins) grid::pushViewport(outer_vp) - on.exit({ - grid::popViewport() - grDevices::dev.off() - unlink(scratch_file) - }, add = TRUE) + on.exit(grid::popViewport(), add = TRUE) result <- tryCatch({ .height_balance_widths_impl( diff --git a/design/ARCHITECTURE.md b/design/ARCHITECTURE.md index 82104e9..91fe14b 100644 --- a/design/ARCHITECTURE.md +++ b/design/ARCHITECTURE.md @@ -30,42 +30,67 @@ export_tfl(x, file, preview, ...) [exported, S3 generic] │ │ wraps single ggplot/grob as list(list(content = x)) │ └── .export_tfl_pages(...) — export_tfl.R (shared) │ + ## All S3 methods follow the SAME D-48 device-lifecycle pattern: + ## md <- .open_metric_device(file, pg_width, pg_height, preview) + ## # ... call *_to_pagelist(...) ... + ## if (!isFALSE(preview)) .close_metric_device(md) + ## .export_tfl_pages(..., pdf_already_open = TRUE) + ## Pagination and (in normal mode) drawing both run on `md`. + ├── export_tfl.tfl_table() — export_tfl.R │ ├── .validate_export_args(...) - │ ├── tfl_table_to_pagelist(...) — table_pagelist.R - │ └── .export_tfl_pages(...) + │ ├── .open_metric_device(file, pg_width, pg_height, preview) + │ │ opens pdf(file) (normal) or pdf(NULL) (preview); + │ │ on.exit registered on this frame. + │ ├── pagination_cache <- new.env(...) + │ ├── tfl_table_to_pagelist(..., text_dim_cache = pagination_cache) + │ ├── drawing_cache <- pagination_cache (normal) or new.env() (preview) + │ ├── for each tfl_table_grob: attach $text_dim_cache <- drawing_cache + │ ├── if (preview) .close_metric_device(md) + │ └── .export_tfl_pages(..., pdf_already_open = TRUE) │ ├── export_tfl.ggtibble() — ggtibble.R │ ├── .validate_export_args(...) + │ ├── .open_metric_device(file, pg_width, pg_height, preview) │ ├── ggtibble_to_pagelist(x, sub_tfl, sub_tfl_sep, — ggtibble.R │ │ sub_tfl_collapse, sub_tfl_prefix) │ │ per row, appends "label: value; ..." suffix to caption via │ │ .apply_sub_tfl_caption() (sub_tfl.R); raw column names used │ │ as labels (no colspec system for ggtibble) - │ └── .export_tfl_pages(...) + │ ├── if (preview) .close_metric_device(md) + │ └── .export_tfl_pages(..., pdf_already_open = TRUE) │ ├── export_tfl.gt_tbl() — gt.R │ ├── rlang::check_installed("gt") │ ├── .validate_export_args(...) + │ ├── .open_metric_device(file, pg_width, pg_height, preview) │ ├── gt_to_pagelist(x) — gt.R - │ └── .export_tfl_pages(...) + │ ├── if (preview) .close_metric_device(md) + │ └── .export_tfl_pages(..., pdf_already_open = TRUE) │ ├── export_tfl.list() — export_tfl.R │ ├── .validate_export_args(...) + │ ├── .open_metric_device(file, pg_width, pg_height, preview) │ ├── [all gt_tbl?] → gt_to_pagelist() per element │ ├── [otherwise] → coerce_x_to_pagelist(x) - │ └── .export_tfl_pages(...) + │ ├── if (preview) .close_metric_device(md) + │ └── .export_tfl_pages(..., pdf_already_open = TRUE) │ - └── .export_tfl_pages(pages, file, ...) — export_tfl.R (shared) + └── .export_tfl_pages(pages, file, ..., pdf_already_open = TRUE) — export_tfl.R └── [preview = FALSE] PDF loop: - │ grDevices::pdf(file, ...) - │ on.exit(dev.off(), add = TRUE) + │ ## Device opened upstream by .open_metric_device() in the + │ ## S3 dispatcher; this function does NOT re-open it. Legacy + │ ## fallback (`pdf_already_open = FALSE`) preserved for + │ ## external callers that invoke .export_tfl_pages() directly. │ for i in seq_along(pages): │ build_page_args(pages[[i]], dots, page_num, i, n) — utils.R │ export_tfl_page(x = pages[[i]], ...) [exported] │ invisible(normalizePath(file)) │ └── [preview = TRUE or integer] Preview loop: + ## In preview mode the dispatcher already closed the transient + ## pdf(NULL) metric device; the user's pre-existing device is + ## the render target. for j in seq_along(page_idx): build_page_args(pages[[i]], dots, page_num, i, n) export_tfl_page(x = pages[[i]], ..., preview = TRUE) @@ -141,7 +166,8 @@ export_tfl(x = tfl_table_obj, ...) [exported] │ concatenate per-group pages → return │ ├── compute_table_content_area(...) — table_pagelist.R - │ scratch device + outer_vp to measure annotation heights + │ outer_vp (no scratch device — uses metric device opened by + │ .open_metric_device() upstream) to measure annotation heights ├── resolve_col_specs(tbl) — table_columns.R ├── compute_col_widths(resolved_cols, ...) — table_columns.R │ ├── auto-detect wrap eligibility via — wrap.R @@ -151,7 +177,7 @@ export_tfl(x = tfl_table_obj, ...) [exported] │ │ .column_min_token_width_in(strings, gp, breaks) │ │ as the per-column floor │ └── paginate_cols(...) - ├── [scratch device + outer_vp] measure heights: + ├── [outer_vp on metric device] measure heights: │ .measure_header_row_height() — table_utils.R │ measure_row_heights_tbl() → cell_h_mat — table_rows.R │ Per-cell height matrix [nrow × ncol]; each entry includes @@ -214,7 +240,7 @@ export_tfl(x = gt_tbl_obj, ...) [exported] ├── .gt_content_height(...) — gt.R │ reuses compute_table_content_area() ├── .gt_grob_height(grob, ...) — gt.R - │ measures grob height in scratch device + │ measures grob height under the metric device (D-48) │ ├── [fits on one page] → single page spec │ diff --git a/design/DECISIONS.md b/design/DECISIONS.md index f9260e4..60340e2 100644 --- a/design/DECISIONS.md +++ b/design/DECISIONS.md @@ -1483,3 +1483,452 @@ never touches `drawDetails.tfl_table_grob`. **Verification:** - Full `devtools::test()` green before and after. - `examples/bench_compare.R` reproduces the table above. + +--- + +## D-45: Fast-path cell drawing, position-based tokenizer, and per-page formatting + +**Decision:** Three independent, profile-driven changes layered on top of +D-43/D-44: + +1. **Fast path in `.draw_cell_text()`** — when the measured text width + plus horizontal padding is no greater than the column width, draw + directly in the parent viewport instead of pushing a per-cell + clipping viewport. The clip viewport (which created+pushed+popped + per cell) is still used in the slow path, where text might bleed. +2. **Drop redundant `convertUnit` calls in `.draw_cell_text()`'s clip + path** — the clip viewport is constructed with explicit + `unit(clip_w, "inches")` / `unit(row_h, "inches")` dimensions, so + the post-push `vp_w2 <- .width_in(unit(1, "npc"))` / + `vp_h2 <- .height_in(unit(1, "npc"))` calls were re-measuring known + values. Replaced with the literals. +3. **Position-based tokenizer in `.tokenize_for_wrap()`** — replaced the + per-token `paste(cur_buf[seq_len(cur_n)], collapse = "")` with + `substr(s, cur_start, cur_end)`. One C call per token instead of + `n` element accumulations + `paste`. +4. **Per-page formatting hoist in `drawDetails`** — replaced per-cell + `.fmt_cell(data[[cs$col]][i], na_str)` with a single + `.fmt_cell_vec(data[[cs$col]][rows], na_str)` per column built before + the row loop. + +**Context (profiling, round 3):** Post-D-44, the `core_paginate` +Rprof showed `table_draw.R:403` (the `.draw_cell_text()` call site) +accumulating **73% of total time**. Drilling into `.draw_cell_text()`: + +| Line | Self self.pct | Total total.pct | What | +|------|--------------|-----------------|------| +| 597 (measure) | 0.73% | 13.76% | text-width measurement (cached, but still per cell) | +| 603 (vp_clip) | 1.17% | 13.03% | viewport creation (4 `grid::unit()` + `viewport()`) | +| 611 (pushViewport) | — | — | pushing the clip vp | +| 625 (grid.text) | 0.29% | 29.14% | the actual text draw | +| 633 (popViewport) | 0.15% | 6.44% | popping | + +The viewport push/pop pair (~20% of total) is only meaningful when text +overflows the column. For all-numeric tables (iris) and post-wrap +cells, the text always fits and the clip is redundant. + +**Measured improvement** (medians, 15 iterations per core scenario, 3 +for `wrap_demos`, 5 independent runs averaged; baseline = main + D-43 + +D-44 at `7c9484c`): + +| Scenario | Before | After | Δ | +|-----------------|-----------|-----------|-------------------------| +| `core_small` | 146 ms | 111 ms | **~24% ↓** | +| `core_wrap` | 206 ms | 209 ms | within run-to-run noise | +| `core_paginate` | 397 ms | 270 ms | **~32% ↓** | +| `figure_multi` | 318 ms | 330 ms | within noise | +| `wrap_demos` | 2.97 s | 2.80 s | **~6% ↓** | + +`core_paginate` (the iris-heavy draw scenario) and `core_small` +(short table going through the same draw loop) get the biggest gains. +`wrap_demos` had high run-to-run variance with only n=3 iterations; +averaged across 5 independent runs it shows a steady ~6% reduction. +`core_wrap` is unchanged because its bottleneck is height-balance +measurement, not cell drawing. `figure_multi` doesn't touch +`drawDetails.tfl_table_grob`. + +**Alternatives considered and rejected:** +- *Regex-based vectorized tokenizer* — `regmatches`/`strsplit`-driven + reimplementation would shave more time but the algorithm has subtle + drop-vs-keep_before semantics (multiple consecutive drops collapse to + one lead, keep_before chars stay with the preceding token). The + position-based change keeps the existing algorithm verbatim and + preserves the comment-level explanation. Faster vectorization was + not worth the risk-of-regression. +- *Pre-computed parent-viewport coordinates passed into + `.draw_cell_text()`* — would push more work into the drawDetails + body. The current shape (compute once inside `.draw_cell_text()`) is + shorter and the per-call overhead is now tiny. +- *Skipping the text-width measurement entirely when `nchar(text)` is + small enough* — would require a font-aware upper bound on per-char + width; fragile across font sizes and families. The cached + measurement is already cheap on cache hit. + +**Files touched:** +- `R/table_draw.R`: + - `drawDetails.tfl_table_grob`: hoisted `cell_strs_by_col` (vectorised + formatting), and the existing hoists from D-44 are unchanged. + - `.draw_cell_text`: added fast/slow branch on `needed <= col_width_in`, + removed the two post-push `convertUnit` calls in the slow path. +- `R/wrap.R`: + - `.tokenize_for_wrap`: track `cur_start`/`cur_end` positions and + emit text via `substr()` instead of accumulating + pasting. + +**Verification:** +- Full `devtools::test()` green before and after. +- `examples/bench_compare.R` reproduces the table above (averaging over + multiple runs is needed for `wrap_demos`'s lower-iteration sample). + +--- + +## D-46: Cross-page clip-width cache shared across all pages of one tfl_table + +**Decision:** Construct one list of clip-width cache envs (one per column +in `resolved_cols`) at the top of `tfl_table_to_pagelist()` and pass it +into every `build_table_grob()` call. Every page-grob built for the +same table holds a reference to the same env list, so +`drawDetails.tfl_table_grob()` can reuse cached measurements across all +row-page x col-group combinations. + +**Context:** D-44 introduced a per-page clip-width cache. Profiling and +focused benchmarking showed that for tables spanning many pages, the +same cell text typically appears on every page (numeric formats, +visit labels, category codes), so each page was re-measuring the same +strings. A single env shared across pages eliminates that +inter-page duplication while preserving the intra-page hits D-44 +already provided. + +**Change:** +- `R/table_pagelist.R` -- inside `tfl_table_to_pagelist()` (and the + table1/flextable equivalents that call `build_table_grob` similarly), + `clip_width_caches <- lapply(seq_along(resolved_cols), function(k) + new.env(...))` is built once, then threaded to every + `build_table_grob()` invocation. +- `R/table_draw.R` -- `build_table_grob()` gains a `clip_width_caches` + argument stored as a grob slot. `drawDetails.tfl_table_grob()` now + prefers `x$clip_width_caches[x$col_group_idx]` over creating fresh + per-page envs, falling back to the old behaviour when the slot is + absent (e.g. for grobs built outside the normal pipeline). + +The cache is keyed by column index into `resolved_cols` (stable across +col-group splits), not by the local `j` index in `page_cols` (which +re-numbers per col-group). + +**Measured improvement** (medians, 30 iterations; baseline = round-3 +HEAD at `7fcf208` = D-45 shipped): + +| Scenario | Before | After | Δ | +|-----------------|-----------|-----------|-------------| +| `iris5p` (150 rows / 5 pages) | 264 ms | 258 ms | ~2% (modest) | +| `big_df` (500 rows / ~17 pages, 4 cols incl. repeating categoricals) | 1560 ms | 1330 ms | **~15% ↓** | + +The gain scales with page count and amount of inter-page duplication. +Short single-page tables see negligible benefit (the per-page cache +already caught everything). Realistic multi-page clinical listings +hit the design sweet spot. + +**Alternatives considered and rejected:** +- *Cache spanning pagination (scratch device) + drawing (render device)* + -- the documented re-measurement in `.draw_cell_text()` exists + because font metrics can legitimately differ between the scratch + PDF and the render device (e.g. knitr PNG vs PDF for preview mode). + A cross-device cache would risk inaccurate placement, which the + project owner explicitly excluded. +- *Consolidated (width, height) cache in pagination* -- would save + ~half the gpar-validation overhead in `.measure_max_string_width()` + + `.memo_str_height()` by sharing one textGrob's measurements + between Pass 1 (widths) and Pass 2 (heights). Independent of this + change; worth pursuing only if benchmarks justify the threading + cost. Deferred. +- *Package-level env keyed by tbl-object identity* -- zero plumbing, + but a global mutable env is harder to reason about (parallelism, + leaked entries). The explicit list-threaded approach is clearer. + +**Files touched:** +- `R/table_pagelist.R` -- one new `clip_width_caches` construction + block in `.tfl_table_to_pagelist_default()` and the corresponding + pass-through in the `build_table_grob()` call. +- `R/table_draw.R` -- `build_table_grob()` accepts and stores the + cache; `drawDetails.tfl_table_grob()` reads it via + `x$clip_width_caches`. + +**Verification:** +- Full `devtools::test()` green. +- `examples/bench_focused.R` (n=30) reproduces the `iris5p` / + `big_df` table above. + +--- + +## D-47: Consolidated width+height text-dimension cache during pagination + +**Decision:** Replace the two separate per-call height memos with a single +`(gp_key, string) -> list(w, h)` cache that lives for one +`.tfl_table_to_pagelist_default()` call. Every textGrob built for +measurement during pagination populates both dimensions; subsequent +lookups for either dimension reuse the existing entry. + +**Context:** Profiling after D-46 showed that pagination still constructed +two textGrobs for every unique cell string -- one in +`.measure_max_string_width()` (Pass 1, widths) and one in +`measure_row_heights_tbl()` (Pass 2, heights). Each construction re-runs +grid's `validGP` / `set.gpar` chain, the dominant per-call cost. Pass 1 +and Pass 2 use the same gpar for matching column categories +(`data_row`, `group_col`, `header_row`), so the same (gp, string) shows +up twice in pagination work. + +**Change:** +- `R/table_utils.R` -- new `.measure_text_dims_in(s, gp, gp_key, cache)` + helper that builds the textGrob once, reads both dimensions, and + caches the pair. Existing `.measure_max_string_width()` and + `.measure_header_row_height()` now accept an optional cache and + delegate per-string measurement to the helper when provided. +- `R/table_rows.R` -- `measure_row_heights_tbl()` accepts a cache and + replaces its inline `.memo_str_height()` closure with the same helper. +- `R/table_columns.R` -- `compute_col_widths()` and `.resolve_natural_widths()` + thread a `cache` argument; the natural-width pass builds the + appropriate structural `gp_key` for cell-vs-header gpars and passes + it down. +- `R/table_pagelist.R` -- `.tfl_table_to_pagelist_default()` creates one + `text_dim_cache <- new.env(hash = TRUE, parent = emptyenv())` at the + top and passes it to `compute_col_widths()`, + `.measure_header_row_height()`, and `measure_row_heights_tbl()`. + +The `gp_key` namespace matches what `measure_row_heights_tbl()` already +used internally (e.g. `"data_row_lh1.2"`, `"group_col_lh1.2"`, +`"header_row_lh1.2"`). All callers that share a category resolve to the +same key, so width-then-height (Pass 1 -> Pass 2) on the same `(category, +string)` is a cache hit. + +**Why crossing scratch-device boundaries is safe here:** the multiple +PDF scratch devices opened during pagination +(`.resolve_natural_widths`, `.run_pagination_iter`) all use identical +`(pg_width, pg_height)` settings, identical fonts, and identical R +session state. PDF device font metrics are deterministic given those +inputs, so values measured on one scratch device are equal to what +the next scratch device would produce. The cache does NOT cross into +the render-device drawing phase -- that boundary still goes through +`.draw_cell_text()`'s separate re-measurement (D-44/D-46 territory). + +**Measured improvement** (n=30 iterations, baseline = round-3 HEAD at +`61bd26a` = D-46 shipped; min-of-mins across 3 independent runs to +suppress system-load noise): + +| Scenario | Before (min) | After (min) | Δ | +|----------|--------------|-------------|------------| +| `iris5p` (150 rows / 5 pages) | 329 ms | 255 ms | **~22% ↓** | +| `big_df` (500 rows / ~17 pages, 4 cols) | 1.45 s | 1.36 s | ~6% ↓ | + +`iris5p` is the targeted scenario for pagination-side optimisation: +relatively small data, many measurement-heavy passes. `big_df` is +dominated by drawing rather than measurement (D-44/D-46 territory), so +the pagination cache helps less in relative terms. + +**Alternatives considered and rejected:** +- *Caching width and height under separate keys* -- requires the cache + consumer to ask for the right dimension and stores two entries per + unique string. Consolidated `(w, h)` tuple is one entry, one + textGrob construction, and any pass that has either dim gets the + other for free. +- *Hashing the full gpar object as the cache key* -- gpars carry many + fields; per-lookup hashing costs more than the structural-key + approach the codebase already uses internally. Callers pre-compute + `gp_key` once per gpar. +- *Threading the cache further down into `.compute_col_min_widths()` + and `.height_balance_widths_impl()`* -- those have their own + per-call caches keyed differently (per-token, per-(j, width)). + Bringing them into the unified cache would require harmonising key + schemes and is left for a follow-up if profile data justifies it. + +**Files touched:** +- `R/table_utils.R` -- new `.measure_text_dims_in()` helper; cache-aware + `.measure_max_string_width()` and `.measure_header_row_height()`. +- `R/table_rows.R` -- cache-aware `measure_row_heights_tbl()`. +- `R/table_columns.R` -- cache-aware `compute_col_widths()` and + `.resolve_natural_widths()`. +- `R/table_pagelist.R` -- cache construction in + `.tfl_table_to_pagelist_default()` and threading into the three + pagination measurement entry points. + +**Verification:** +- Full `devtools::test()` green. +- `examples/bench_focused.R` (n=30) reproduces the table above. + +--- + +## D-48: Single device per export_tfl() + cross-phase text-dim cache + +**Decision:** Open exactly one PDF device per `export_tfl()` call. +Pagination measurements and the per-page draw loop both run on that +device. The `text_dim_cache` populated during pagination is attached +to every `tfl_table_grob` and reused by `.draw_cell_text()` so the +per-cell width re-measurement at draw time becomes a single env +lookup. + +**Context:** D-46/D-47 brought the pagination cache inside one +process-wide env, but the cache documented its boundary as the +render-device transition: `.draw_cell_text()` always re-measured +because the scratch PDF used for pagination and the user-visible +render device could differ in font metrics (e.g. PNG in knitr). + +In practice the package's "render device" is *always* a PDF -- either +`pdf(file)` opened by `.export_tfl_pages()` in normal mode, or the +user's own device in preview mode. Phase-0 profiling showed +`.draw_cell_text`'s re-measure consuming **8.87 %** of total time in +`core_paginate` -- the line was the single largest target. Eight +helpers across `R/` opened their own scratch `pdf()` devices for +font-metric resolution, each costing ~5 ms; while never individually +hot, collectively they added measurable overhead and complicated the +device-lifecycle invariant. + +**Change:** + +1. New helpers in `R/export_tfl.R`: + - `.open_metric_device(file, pg_width, pg_height, preview, envir)` + -- opens `pdf(file)` in normal mode or `pdf(NULL, ...)` in + preview mode, and registers `on.exit({...; dev.off()}, add = + TRUE)` on the CALLER's frame so a mid-execution error still + closes the device. + - `.close_metric_device(md)` -- idempotent close; preview-mode + callers invoke it explicitly after pagination so the user's + pre-existing device is restored for drawing. + +2. Every `export_tfl` S3 method calls `.open_metric_device()` BEFORE + pagination: + - `.default`, `.tfl_table`, `.list`, `.ggtibble`, `.gt_tbl`, + `.VTableTree`, `.flextable`, `.table1`. + - `.export_tfl_pages()` gains a `pdf_already_open = FALSE` + parameter; the dispatchers pass `TRUE` so it skips its own + `pdf(file)` / `dev.off()`. + +3. Eight scratch-device opens deleted from `R/`: + - `compute_table_content_area()`, `.run_pagination_iter()` in + `R/table_pagelist.R` + - `.resolve_natural_widths()` in `R/table_columns.R` + - `.compute_col_min_widths()`, `.compute_wrapped_widths()`, + `.height_balance_widths()` in `R/wrap.R` + - `.gt_grob_height()` in `R/gt.R` + - `.rtables_lpp_cpp()` in `R/rtables.R` + Each helper now relies on the upstream metric device. Outer- + viewport push/pop preserved (still needed for inch resolution + against the post-margin content area). + +4. Cross-phase cache plumbing in `R/table_pagelist.R` and + `R/export_tfl.R`: + - `tfl_table_to_pagelist()` accepts an optional `text_dim_cache` + parameter; when supplied, that env is reused instead of being + allocated locally and discarded. + - `export_tfl.tfl_table()` allocates `pagination_cache`, threads + it through pagination, then attaches it to every + `tfl_table_grob` in PDF mode (`drawing_cache <- pagination_cache`). + Preview mode attaches a fresh **empty** env instead, so + drawing's lookups all miss and the function falls through to + per-cell measurement on the user's render device -- preserving + today's preview behaviour exactly. + +5. Cache consumption in `R/table_draw.R`: + - `drawDetails.tfl_table_grob()` extracts `x$text_dim_cache` and + computes `gp_key_by_col` matching pagination's + ("data_row_lh", "group_col_lh") namespace. + - `.draw_cell_text()` gains `text_dim_cache` and `gp_key` + arguments. Width lookup order: text_dim_cache hit -> + per-column `width_cache` (D-46) -> fresh measurement. + - `.draw_header_row()` uses the matching `"header_row_lh"` key. + +6. Safety guard in `.measure_text_dims_in()`: + `if (grDevices::dev.cur() == 1L) rlang::abort(...)` on the slow + path catches future regressions where a caller forgets to open + the metric device. + +**Why crossing the render-device boundary is safe here:** the +single device opened by `.open_metric_device()` IS the render +device in normal mode (`pdf(file)`). Pagination and drawing +measure against identical font metrics by construction -- no +inference required. In preview mode the cache is empty and the +boundary still holds via per-cell re-measurement. + +**Measured impact** (n=30 per scenario; min-of-mins across 3 runs, +b196ca3 vs HEAD on the same machine): + +| Scenario | Baseline | After | Δ | +|----------------|----------|---------|----------| +| `iris5p` | 213 ms | 200 ms | **−6 %** | +| `big_df` | 1.25 s | 1.10 s | **−12 %**| +| `wrap_heavy` | 2.24 s | 1.94 s | **−13 %**| +| `preview_iris` | 194 ms | 166 ms | **−14 %**| +| `figure_multi` | 619 ms | 605 ms | −2 % | + +`figure_multi` is within variance, as expected: the ggplot pipeline +neither populates nor reads the cache. `preview_iris` benefits +from scratch-device elimination even though its cache is empty by +design. + +Profile signal (Rprof @ 0.01s, core_paginate): + +| Site | Baseline | After | +|-------------------------------------|----------|--------| +| `.measure_text_width_in` inside `.draw_cell_text` | 8.87 % total | 0.5 % total | + +A 17× drop on the line that motivated the refactor. + +**Alternatives considered and rejected:** + +- *Unify all four caches (D-46 clip_width_caches, D-47 text_dim_cache, + wrap.R width_cache, wrap.R tokenize cache) into one helper.* An + early prototype routed `.measure_text_width_in()` through + `.measure_text_dims_in()` for a single `(w, h)` cache contract. It + cost 20-30 % on `wrap_heavy`, `big_df`, and `preview_iris` because + the wrap module's inner loop calls .measure_text_width_in() many + times per cell and the extra function call + list-wrapping on + cache hits dominated. Documented in the + `.measure_text_width_in()` source comment; the two helpers stay + separate because the inner loop cannot afford the indirection. +- *Use the user's device for pagination in preview mode (true single + device).* Would make preview pagination decisions vary with the + user's render device (PNG vs screen vs PDF). Today's pagination + is device-agnostic relative to user device; preserving that + contract was a user-stated constraint. Preview mode therefore + opens a transient `pdf(NULL)` for pagination, closes it before + drawing. + +- *Drop `.export_tfl_pages`'s legacy `pdf_already_open = FALSE` + path.* Defensible cleanup (no caller in package passes FALSE + after Phase 1c), but kept defensive in case external code + invokes `.export_tfl_pages()` directly. + +**Files touched:** +- `R/export_tfl.R` -- `.open_metric_device()`, `.close_metric_device()`, + wired into `.default`, `.tfl_table`, `.list`; + `.export_tfl_pages()` gains `pdf_already_open` parameter. +- `R/ggtibble.R`, `R/gt.R`, `R/rtables.R`, `R/flextable.R`, + `R/table1.R` -- wired in their respective S3 methods. +- `R/table_pagelist.R` -- `text_dim_cache` plumbing through + `tfl_table_to_pagelist`, `.tfl_table_to_pagelist_default`, + `.tfl_table_to_pagelist_sub_tfl`; `compute_table_content_area` + and `.run_pagination_iter` drop their scratch devices. +- `R/table_columns.R` -- `.resolve_natural_widths` drops its + scratch device. +- `R/wrap.R` -- three helpers drop their scratch devices; the + `.measure_text_width_in` comment documents why it stays separate + from `.measure_text_dims_in`. +- `R/table_draw.R` -- `.draw_cell_text` and `.draw_header_row` + consume the cache; `drawDetails.tfl_table_grob` extracts it from + the grob. +- `R/table_utils.R` -- `.measure_text_dims_in` accepts `NULL` + gp_key; safety guard for missing device. +- `R/gt.R`, `R/rtables.R` -- scratch device opens removed. +- `tests/testthat/test-export_tfl.R` -- 8 new tests (4 helper tests + for `.open_metric_device` / `.close_metric_device`, plus + device-count, font-metric equality, safety-guard fires, cache + shape). +- `examples/bench_focused.R` -- added `wrap_heavy`, `preview_iris`, + `figure_multi` scenarios. +- `design/perf-baseline-notes.md` -- new working note with the + Phase-0 and Phase-5 numbers and the per-phase outcome table. + +**Verification:** +- Full `devtools::test()` -- 587 passing, 0 failed. +- `examples/bench_focused.R` (n=30, 3 runs) reproduces the table + above. +- `grep -n "grDevices::pdf(" R/` shows exactly three occurrences, + all in `R/export_tfl.R` (the `.open_metric_device` normal/preview + paths + the defensive legacy fallback in `.export_tfl_pages`). diff --git a/design/DESIGN.md b/design/DESIGN.md index b46cdb6..e16d186 100644 --- a/design/DESIGN.md +++ b/design/DESIGN.md @@ -163,22 +163,71 @@ pagination is consistent with the actual output. --- -## Why does `tfl_table_to_pagelist()` open a scratch PDF device? +## Why a single device per `export_tfl()` call? (D-48) Font metrics (`stringHeight`, `stringWidth`, `grobHeight`) require an active -graphics device. A scratch `pdf(NULL, ...)` device is opened at measurement -time to provide a consistent rendering context (same page dimensions as the -final output) without writing to disk. +graphics device. Earlier designs opened ~8 small scratch `pdf(...)` devices +across the pagination pipeline; D-48 collapsed them all to **one** device per +`export_tfl()` call: + +- **Normal mode (`preview = FALSE`):** the S3 dispatcher calls + `.open_metric_device()` immediately on entry, which opens + `grDevices::pdf(file, width = pg_width, height = pg_height)` and + registers an `on.exit` handler on the dispatcher's frame. Pagination + measurements and the per-page draw loop both run on this device. + `on.exit` ensures the device closes cleanly even if pagination errors + out (a long-running R session would otherwise leak file handles up to + the 64-device limit). + +- **Preview mode:** the dispatcher opens a transient `pdf(NULL, ...)` for + pagination, closes it explicitly via `.close_metric_device()` after + pagination, and lets the per-page draw loop target the user's + pre-existing device. This keeps preview-mode pagination decisions + byte-for-byte identical to normal mode (both run on PDF font metrics + with matching page dimensions). + +All internal measurement helpers — `compute_table_content_area()`, +`.resolve_natural_widths()`, `.run_pagination_iter()`, +`.compute_col_min_widths()`, `.compute_wrapped_widths()`, +`.height_balance_widths()`, `.gt_grob_height()`, `.rtables_lpp_cpp()` — +now **require** an active device with matching page dimensions. The +contract is enforced by a `dev.cur() == 1L` safety guard inside +`.measure_text_dims_in()`: a regression that calls a helper without a +device fails fast with a clear "requires an active graphics device" +abort, rather than producing silently wrong measurements. + +--- + +## Why thread `text_dim_cache` from pagination to the drawing phase? (D-48) + +D-47 introduced a process-wide `(gp_key, string) -> list(w, h)` cache +shared across the natural-width pass, the row-height pass, and the +header-height pass. D-46 added a per-column `clip_width_caches` shared +across all pages of one `tfl_table`. + +D-48 extends both: the pagination cache is **attached to every +`tfl_table_grob` in the pagelist** (via `x$text_dim_cache`). +`drawDetails.tfl_table_grob` extracts it; `.draw_cell_text()` consults it +before falling through to the per-column `width_cache` and finally to +fresh measurement. + +The pre-D-48 boundary was justified by "the render device may differ +from pagination's scratch device" — but after D-48 there *is no +difference* in normal mode (pagination and rendering both happen on +`pdf(file)`). In preview mode the dispatcher attaches an **empty** env +so every lookup misses and the function falls back to per-cell +measurement on the user's render device — preserving today's preview +behaviour exactly. --- ## Why store `cell_heights_in_mat` and `cont_row_h_in` in the gTree? The `drawDetails` method is called by `grid` at render time, potentially long -after paginate time. Pre-computing cell heights during pagination (when a -scratch device is already open) and caching them in the grob avoids opening -another device at draw time and ensures layout consistency: the heights used -for pagination and the heights used for drawing are identical. +after paginate time. Pre-computing cell heights during pagination (when the +metric device is open) and caching them in the grob avoids re-measurement +at draw time and ensures layout consistency: the heights used for pagination +and the heights used for drawing are identical. The grob caches the *full* per-cell height matrix rather than per-row scalars because the per-row height for a given page depends on which other diff --git a/design/perf-baseline-notes.md b/design/perf-baseline-notes.md new file mode 100644 index 0000000..685132e --- /dev/null +++ b/design/perf-baseline-notes.md @@ -0,0 +1,240 @@ +# Performance baseline notes — single-device + cache-through-drawing refactor + +Working notes captured during Phase 0 of the refactor that extends +PR #39 with three layered changes: + +1. Single device per `export_tfl()` call (eliminate scratch + `pdf()` opens in `R/`). +2. Cache the pagination-phase text-dim measurements through to + `.draw_cell_text()` (D-47's cache currently stops at the render- + device boundary). +3. Unify the four small per-purpose caches around the shared + `.measure_text_dims_in()` helper. + +Baseline HEAD: `b196ca3` ("Consolidate width+height measurement into one +cache during pagination"; D-47 in `design/DECISIONS.md`). + +The user explicitly rejected premature optimization +(`memory/project_perf_profiling.md`). Profile-first, then commit only +the changes the data plus the maintainability argument justifies. + +## Wall-clock baseline (bench_focused.R, n = 30) + +| Scenario | min | median | iqr | +|----------------|-----------|-----------|----------| +| `iris5p` | 209 ms | 231 ms | 21.9 ms | +| `big_df` | 1.06 s | 1.16 s | 100 ms | +| `wrap_heavy` | 1.79 s | 1.94 s | 152 ms | +| `preview_iris` | 143 ms | 155 ms | 11.4 ms | +| `figure_multi` | 528 ms | 601 ms | 73.1 ms | + +These will be the comparison baselines for every later commit. + +## Profile snapshots (profile_writetfl.R --quick, Rprof @ 0.01s) + +Top writetfl source lines by self-time, summarised across the four +core scenarios. **The percentages are self/total of *Rprof samples* +during a 20-rep loop**, so they capture relative cost, not absolute +time. + +### `core_paginate` (iris -> ~5 pages) + +| Line | self.pct | total.pct | What it is | +|----------------------------|----------|-----------|-----------------------------------------| +| `table_draw.R#412` | 1.4 % | **55.7 %**| `.draw_cell_text(display_str, cs$align, ...)` call site | +| `table_draw.R#625` | 0.4 % | **47.9 %**| inside `.draw_cell_text` (cap clip + grid.text) | +| `table_utils.R#339` | 1.1 % | 2.5 % | `.fmt_cell_vec()` body | +| `resolve_gp.R#15` | 1.1 % | 1.1 % | `merge_gpar()` field loop | +| `table_draw.R#606` | 0.7 % | **8.9 %** | `text_w <- .measure_text_width_in(...)` — D-47 boundary line, Phase 3 target | +| `wrap.R#168` | 0.7 % | 0.7 % | `.measure_text_width_in()` body | +| `table_utils.R#250` | 0.35 % | 11.7 % | `.measure_max_string_width()` body | +| `table_rows.R#71/81/83` | 0.35 % | 1–3 % | row-height measurement loop | + +### `core_wrap` (clinical_df with wrap_balance = "height") + +| Line | self.pct | total.pct | What it is | +|----------------------------|----------|-----------|-----------------------------------------| +| `wrap.R#139` | 2.5 % | 3.3 % | inside `.wrap_paragraph` token loop | +| `table_utils.R#250` | 0.8 % | **33.1 %**| `.measure_max_string_width()` — natural-width pass | +| `wrap.R#168` | 0.8 % | 1.2 % | `.measure_text_width_in()` body | +| `wrap.R#108/138/128` | 0.4–0.8 %| 0.8–1.2 % | wrap break + token width | +| `resolve_gp.R#14/15/32` | 0.4–1.2 %| 0.8–2.1 % | gpar resolution | +| `table_utils.R#99` | 0.4 % | 7.4 % | `.measure_header_row_height()` | +| `table_utils.R#301` | 0.4 % | 2.5 % | (cache lookup helper) | + +### `core_small` (mtcars 20 rows, 1 page) + +No single writetfl source line above 1.6 %. Top R functions are +generic grid plumbing (`$`, `valid.charjust`, `grid.Call`, ``, +`numnotnull`). For a small one-page table the per-grid-call overhead +dominates and there's little to optimise at the writetfl level. + +### `figure_multi` (10 ggplot pages) + +writetfl source lines all under 0.3 %. ggplot's `FUN`, `vapply`, +`enexpr`, `fetch_ggproto`, and grid's `grid.Call.graphics` dominate. +**Phase 1/2 device-lifecycle changes should not affect this scenario.** +We will keep watching it to confirm no regression. + +## Where is `pdf()` / `dev.off()` in the profile? + +Neither shows in the top-20 self-time list for any scenario. Estimated +combined `(pdf + dev.off)` cost: **well below 3 %** of total in every +scenario. + +This is the Phase-0 go/no-go signal for Phase 1/2. Strict perf-only +gate says **skip** — but the user explicitly said maintainability +counts (one cache contract, one device-lifecycle helper, no scratch +`pdf()` opens scattered across seven files). Phase 1/2 proceeds on +maintainability grounds. + +## Where is `.measure_text_width_in` in the profile? + +`table_draw.R#606` (the re-measure inside `.draw_cell_text`) lands at +**8.9 %** of total time in `core_paginate`. The enclosing +`.draw_cell_text` body at `table_draw.R#625` reaches **47.9 %** of +total (it dominates the per-page draw loop). Removing the re-measure +when the pagination cache already covers the cell is a real perf win. + +Phase 3 proceeds on perf grounds. + +## Where is the wrap measurement loop? + +`wrap.R#139` and surrounding lines accumulate ~2–4 % self-time in +`core_wrap` but no single line is dominant. The natural-width pass at +`table_utils.R#250` shows 33.1 % *total* in `core_wrap` — +this is the path D-47 already speeds up. Unifying the cache +(Phase 3.5) should not change this number much; it just removes the +need for each cache consumer to invent its own env shape. + +Phase 3.5 proceeds on maintainability grounds. + +## Decision matrix + +| Phase | Perf signal? | Maintainability? | Proceed? | +|-------------------------------|-------------------|------------------|-----------| +| 1. Single device | < 3 % (marginal) | Yes (strong) | Yes (maint) | +| 2. Scratch-pdf elimination | < 3 % (marginal) | Yes (strong) | Yes (maint) | +| 3. Cache through drawing | 8.9 % (strong) | Yes | **Yes (perf)** | +| 3.5. Cache unification | n/a | Yes (strong) | Yes (maint) | +| 4. Tests | n/a | Required | Yes | +| 5. Profile post-refactor | n/a | Required | Yes | +| 6. Documentation | n/a | Required | Yes | + +Order of execution (see plan): 3.5 → 3 → 1 → 2 → 4 → 5 → 6. Refactor +the cache shape first (no behavior change), then add the drawing-phase +consumer, then collapse device lifecycle, then enforce the contract. + +## Per-scenario expected outcomes + +- `iris5p`, `core_paginate`: should improve materially after Phase 3 + (re-measure removed in normal mode). Target: ≥5 % drop on + `core_paginate` median. +- `big_df`: harder — most of its time is in `grid.Call.graphics` + (the actual PDF stream emission), not measurement. Small + improvement expected (~2–5 %) from Phase 3 + smaller from Phase 1/2. +- `wrap_heavy`: Phase 3 helps any wrap cell whose first-line text was + measured during pagination. The wrap pipeline also measures + per-token, which Phase 3.5 unification keeps cacheable + cross-scenario. +- `preview_iris`: expected **no change** — preview mode keeps fresh + empty cache attached to grobs by design (D-47 boundary preserved). +- `figure_multi`: expected **no change** — non-tfl_table path. + +## Snapshot files + +Raw Rprof output saved under `examples/profile_output/`: + +- `profile_core_small_20260513_083031.Rprof` +- `profile_core_wrap_20260513_083035.Rprof` +- `profile_core_paginate_20260513_083041.Rprof` +- `profile_figure_multi_20260513_083047.Rprof` + +These will be regenerated and overwritten by subsequent profile runs. + +## What to re-check after each commit + +- `Rscript examples/bench_focused.R` — no regression > 2 % on any + scenario. +- `devtools::test()` — green. + +If a commit breaks either, roll back before the next commit. + +--- + +## Post-refactor results (after Phases 1–4) + +### Profile signal — core_paginate (Rprof @ 0.01s) + +The single line that motivated the refactor: + +| Location | Baseline | After | Δ | +|----------------------------------|----------|--------|--------| +| `table_draw.R` `.measure_text_width_in` re-measure inside `.draw_cell_text` | 8.87 % total | 0.5 % total | **~17× drop** | + +The text-dim cache pre-populated by pagination now satisfies the +overwhelming majority of width lookups during the draw loop. The +8.9 % budget the line consumed at baseline is essentially gone. + +### Wall-clock comparison (bench_focused.R, n = 30 per run) + +Both baseline (`b196ca3`) and D-48 numbers measured on the same +machine in the same session, so system load is comparable. Reported: +**minimum** of medians across 3 independent runs. Min-of-mins is +the most stable estimator of "best-case time on this machine." + +| Scenario | Baseline min | D-48 min | Δ | Notes | +|----------------|--------------|----------|----------|-------| +| `iris5p` | 213 ms | 200 ms | **−6 %** | pagination-heavy | +| `big_df` | 1.25 s | 1.10 s | **−12 %**| drawing-heavy, 17 pages | +| `wrap_heavy` | 2.24 s | 1.94 s | **−13 %**| wrap + draw | +| `preview_iris` | 194 ms | 166 ms | **−14 %**| preview path — single transient pdf(NULL) for pagination, then user's device for drawing. The improvement comes from eliminating scratch devices, NOT from cache reuse (cache is empty in preview by design). | +| `figure_multi` | 619 ms | 605 ms | −2 % | ggplot path — within variance, as expected (no cache, no scratch devices). | + +`preview_iris` improved more than expected because the four scratch +PDF opens inside the pagination pipeline (compute_table_content_area, +.run_pagination_iter, .resolve_natural_widths, plus wrap helpers) +each cost ~5 ms to open and close. Removing them yields a real +~20 ms saving per call. + +### Test signal + +- 583 → 587 tests passing (4 new D-48 invariant tests in + `test-export_tfl.R`). +- Zero failures, zero errors. + +### Outcome per phase + +| Phase | What it delivered | Verdict | +|-------|-------------------|---------| +| 3.5 | Investigated unifying inner-loop cache; rejected (regressed wrap_heavy by 30 %). Kept the small NULL-gp_key forward-compat shim. | Investigated; documented in commit message. | +| 3a | Plumbed `text_dim_cache` from `export_tfl.tfl_table` to grobs. No semantic change. | Shipped. | +| 3b | `.draw_cell_text` consumes the cache; drops the per-cell re-measure on cache hits. | Shipped (perf win). | +| 1a | `.open_metric_device` / `.close_metric_device` helpers + 4 tests. | Shipped. | +| 1b–1d | Wired single-device discipline into every S3 dispatcher and `compute_table_content_area`. | Shipped. | +| 2a–2c | Removed 7 scratch `pdf()` opens from `R/`. Only one `pdf()` open per `export_tfl()` call remains. | Shipped. | +| 2d | `dev.cur() == 1L` safety guard in `.measure_text_dims_in` catches future regressions early. | Shipped. | +| 4 | Device-count, cross-device-metric-equality, safety-guard, and cache-shape tests. | Shipped. | + +### Files that lost their scratch pdf() open + +- `R/table_pagelist.R` `compute_table_content_area` +- `R/table_pagelist.R` `.run_pagination_iter` +- `R/table_columns.R` `.resolve_natural_widths` +- `R/wrap.R` `.compute_col_min_widths` +- `R/wrap.R` `.compute_wrapped_widths` +- `R/wrap.R` `.height_balance_widths` +- `R/gt.R` `.gt_grob_height` +- `R/rtables.R` `.rtables_lpp_cpp` + +Final grep: + +``` +$ grep -n "grDevices::pdf(" R/ -r +R/export_tfl.R: pdf(file) # .open_metric_device normal mode +R/export_tfl.R: pdf(NULL) # .open_metric_device preview mode +R/export_tfl.R: pdf(file) # .export_tfl_pages legacy fallback (dead under + # all current dispatchers; defensive in case + # external code calls .export_tfl_pages directly) +``` + diff --git a/examples/bench_focused.R b/examples/bench_focused.R new file mode 100644 index 0000000..8b10f36 --- /dev/null +++ b/examples/bench_focused.R @@ -0,0 +1,105 @@ +# examples/bench_focused.R +# +# Focused benchmark on the scenarios most likely to surface cache and +# device-lifecycle costs in tfl_table generation. +# +# Scenarios: +# iris5p tfl_table(iris) -> 5 pages. Pagination-heavy. +# big_df 500-row synthetic table. Drawing-heavy across ~17 pages. +# wrap_heavy 200-row x 5-col table with three narrative-text columns +# (hyphenated, multi-word). Exercises the wrap pipeline +# plus per-cell clip-width measurement during drawing. +# preview_iris iris5p but rendered via export_tfl(..., preview = c(1, 2, 3)) +# with an open pdf(NULL) device. Measures whether the +# cache-through-drawing path benefits preview mode. +# figure_multi 10-page ggplot list passed directly to export_tfl(). +# Isolates the non-tfl_table per-page overhead so we can +# see whether Phase-1/2 device-lifecycle changes affect +# the figure path. + +suppressPackageStartupMessages({ + devtools::load_all(quiet = TRUE) +}) + +set.seed(42L) + +big_df <- data.frame( + subject = sprintf("Subject %03d", 1:500), + visit = rep(c("Baseline", "Week 1", "Week 4", "Week 12"), length.out = 500), + result = round(rnorm(500, 100, 15), 1), + flag = sample(c("Y", "N", "?"), 500, replace = TRUE) +) + +# 200-row wrap-heavy table. The narrative columns contain hyphenated words +# and multi-word phrases so the wrap module's keep-before "-" break path is +# exercised on every row. Numeric columns provide a control. +wrap_words <- c( + "patient-reported", "treatment-emergent", "investigator-assessed", + "adverse-event", "fully-resolved", "dose-reduction", + "drug-related", "non-serious", "moderate-severity", + "hospitalization-required", "follow-up" +) +make_wrap_text <- function() { + paste(sample(wrap_words, 6L, replace = TRUE), collapse = " ") +} +wrap_heavy_df <- data.frame( + subject = sprintf("Subject %03d", 1:200), + narrative_a = replicate(200, make_wrap_text()), + narrative_b = replicate(200, make_wrap_text()), + narrative_c = replicate(200, make_wrap_text()), + score = round(rnorm(200, 50, 10), 1), + stringsAsFactors = FALSE +) + +# 10 figure pages, each a small ggplot. Mirrors a typical +# clinical-report-of-figures bundle. +fig_pages <- lapply(seq_len(10L), function(i) { + list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) + + ggplot2::geom_point() + + ggplot2::ggtitle(sprintf("Figure %d", i)), + header_left = sprintf("Figure %d.1", i)) +}) + +scenarios <- list( + iris5p = function() { + out <- tempfile(fileext = ".pdf") + export_tfl(tfl_table(iris), file = out) + unlink(out) + }, + big_df = function() { + out <- tempfile(fileext = ".pdf") + export_tfl(tfl_table(big_df), file = out) + unlink(out) + }, + wrap_heavy = function() { + out <- tempfile(fileext = ".pdf") + export_tfl(tfl_table(wrap_heavy_df), file = out) + unlink(out) + }, + preview_iris = function() { + # Preview mode needs a device open in the caller. Use pdf(NULL) so + # nothing is written to disk; close in on.exit so a crash mid-run + # doesn't leak the device. + grDevices::pdf(NULL, width = 11, height = 8.5) + on.exit(grDevices::dev.off(), add = TRUE) + export_tfl(tfl_table(iris), preview = c(1L, 2L, 3L)) + }, + figure_multi = function() { + out <- tempfile(fileext = ".pdf") + export_tfl(fig_pages, file = out) + unlink(out) + } +) + +for (name in names(scenarios)) { + fn <- scenarios[[name]] + invisible(fn()) # warmup + bm <- bench::mark(fn(), iterations = 30L, check = FALSE, + filter_gc = FALSE, memory = FALSE) + cat(sprintf("%-13s min=%-9s median=%-9s iqr=%s n=%d\n", + name, + format(bm$min), + format(bm$median), + format(bm$median - bm$min), + bm$n_itr)) +} diff --git a/man/build_table_grob.Rd b/man/build_table_grob.Rd index eced8aa..41c814a 100644 --- a/man/build_table_grob.Rd +++ b/man/build_table_grob.Rd @@ -13,7 +13,8 @@ build_table_grob( cell_heights_in_mat = NULL, cont_row_h_in = NULL, is_first_col_page = TRUE, - is_last_col_page = TRUE + is_last_col_page = TRUE, + clip_width_caches = NULL ) } \arguments{ diff --git a/man/compute_col_widths.Rd b/man/compute_col_widths.Rd index 85c24f7..6393f91 100644 --- a/man/compute_col_widths.Rd +++ b/man/compute_col_widths.Rd @@ -14,7 +14,8 @@ compute_col_widths( margins, overflow_action = c("error", "warn"), validate_overflow = TRUE, - floor_overrides = NULL + floor_overrides = NULL, + cache = NULL ) } \arguments{ diff --git a/man/compute_table_content_area.Rd b/man/compute_table_content_area.Rd index bd94d8c..66c4005 100644 --- a/man/compute_table_content_area.Rd +++ b/man/compute_table_content_area.Rd @@ -18,8 +18,15 @@ compute_table_content_area( ) } \description{ -Opens a scratch device, measures annotation section heights using the -same infrastructure as export_tfl_page(), and returns available width and -height in inches. +Measures annotation section heights using the same infrastructure as +export_tfl_page() and returns available width and height in inches. +} +\details{ +D-48: requires an active graphics device with matching page +dimensions; the caller (\code{.tfl_table_to_pagelist_default()}) runs +inside the metric device opened by \code{.open_metric_device()} in the +S3 dispatcher, so font metrics here equal those used at draw time +(normal mode) or those of the same pdf(NULL) used for the rest of +pagination (preview mode). } \keyword{internal} diff --git a/man/dot-gt_grob_height.Rd b/man/dot-gt_grob_height.Rd index 44b59f5..e877f98 100644 --- a/man/dot-gt_grob_height.Rd +++ b/man/dot-gt_grob_height.Rd @@ -2,19 +2,24 @@ % Please edit documentation in R/gt.R \name{.gt_grob_height} \alias{.gt_grob_height} -\title{Measure a gt grob's height in a scratch device} +\title{Measure a gt grob's height} \usage{ .gt_grob_height(grob, pg_width, pg_height) } \arguments{ \item{grob}{A gtable grob from \code{\link[gt:as_gtable]{gt::as_gtable()}}.} -\item{pg_width, pg_height}{Page dimensions for the scratch device.} +\item{pg_width, pg_height}{Page dimensions (advisory; the active +metric device's dimensions are what \code{convertHeight} uses).} } \value{ Numeric scalar: grob height in inches. } \description{ -Measure a gt grob's height in a scratch device +D-48: requires an active graphics device with matching page +dimensions; \code{export_tfl.gt_tbl()} opens the metric device via +\code{.open_metric_device()} before invoking the pagelist conversion +pipeline, so \code{convertHeight()} here resolves against that device's +font metrics. } \keyword{internal} diff --git a/man/export_tfl.Rd b/man/export_tfl.Rd index cf5bfda..571ec6b 100644 --- a/man/export_tfl.Rd +++ b/man/export_tfl.Rd @@ -92,7 +92,7 @@ In preview mode each page is drawn via \code{grid::grid.newpage()} (so knitr captures it as an inline graphic). Returns \code{NULL} invisibly.} \item{...}{ - Arguments passed on to \code{\link[=export_tfl_page]{export_tfl_page}} + Arguments passed on to \code{\link{export_tfl_page}} \describe{ \item{\code{padding}}{Vertical space between adjacent present sections, as a \code{unit} object. Separator rules (if enabled) are drawn at the midpoint diff --git a/man/measure_row_heights_tbl.Rd b/man/measure_row_heights_tbl.Rd index 8ad051c..46e8555 100644 --- a/man/measure_row_heights_tbl.Rd +++ b/man/measure_row_heights_tbl.Rd @@ -13,7 +13,8 @@ measure_row_heights_tbl( line_height, max_measure_rows, breaks = NULL, - wrap_extra_pad_in = 0 + wrap_extra_pad_in = 0, + cache = NULL ) } \arguments{ diff --git a/man/reexports.Rd b/man/reexports.Rd index fd38a52..cedce32 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -12,6 +12,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{grid}{\code{\link[grid]{gpar}}, \code{\link[grid]{unit}}} + \item{grid}{\code{\link[grid:gpar]{gpar()}}, \code{\link[grid:unit]{unit()}}} }} diff --git a/man/tfl_table_to_pagelist.Rd b/man/tfl_table_to_pagelist.Rd index 1be45dd..e9769b4 100644 --- a/man/tfl_table_to_pagelist.Rd +++ b/man/tfl_table_to_pagelist.Rd @@ -9,7 +9,8 @@ tfl_table_to_pagelist( pg_width, pg_height, dots, - page_num = "Page {i} of {n}" + page_num = "Page {i} of {n}", + text_dim_cache = NULL ) } \arguments{ @@ -20,6 +21,14 @@ tfl_table_to_pagelist( \item{dots}{The \code{list(...)} from \code{\link[=export_tfl]{export_tfl()}}.} \item{page_num}{Glue template string for page numbering.} + +\item{text_dim_cache}{Optional environment used as the pagination-phase +text-dimension cache. When supplied, the same env is reused instead +of allocating one locally, so the caller (\code{export_tfl()}) can later +reuse its entries during the drawing phase by attaching the env to +the table grobs. When \code{NULL} (the default), a fresh env is +allocated and discarded after pagination completes -- equivalent to +the pre-D-48 behaviour.} } \value{ A list of page spec lists, each with at least \verb{$content} (a grob). diff --git a/tests/testthat/test-export_tfl.R b/tests/testthat/test-export_tfl.R index f6fca90..faef027 100644 --- a/tests/testthat/test-export_tfl.R +++ b/tests/testthat/test-export_tfl.R @@ -135,3 +135,223 @@ test_that("export_tfl passes dots to export_tfl_page as defaults", { ) expect_true(file.exists(f)) }) + +# --------------------------------------------------------------------------- +# .open_metric_device() / .close_metric_device() helpers +# --------------------------------------------------------------------------- + +test_that(".open_metric_device opens pdf(file) in normal mode and closes via on.exit", { + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f), add = TRUE) + before <- grDevices::dev.cur() + + runner <- function() { + md <- .open_metric_device(f, pg_width = 11, pg_height = 8.5, + preview = FALSE) + # Inside runner: device is open and active. + expect_equal(grDevices::dev.cur(), md$dev) + expect_true(grDevices::dev.cur() != before) + md + } + result <- runner() + # After runner() returns, the helper's on.exit (registered on + # runner's frame) has fired and closed the device. + expect_equal(grDevices::dev.cur(), before) + expect_true(file.exists(f)) # pdf actually got written +}) + +test_that(".open_metric_device opens pdf(NULL) in preview mode", { + before <- grDevices::dev.cur() + + runner <- function() { + md <- .open_metric_device(NULL, pg_width = 11, pg_height = 8.5, + preview = TRUE) + expect_equal(grDevices::dev.cur(), md$dev) + md + } + result <- runner() + # on.exit closed it on return. + expect_equal(grDevices::dev.cur(), before) +}) + +test_that(".open_metric_device closes the device even when the caller errors out", { + before <- grDevices::dev.cur() + + expect_error( + { + runner <- function() { + .open_metric_device(NULL, pg_width = 11, pg_height = 8.5, + preview = TRUE) + stop("boom -- simulated mid-pagination failure") + } + runner() + }, + "boom" + ) + # Even though the error short-circuited runner, the helper's on.exit + # registered on runner's frame fires during unwind and closes the + # device. + expect_equal(grDevices::dev.cur(), before) +}) + +test_that(".close_metric_device is idempotent", { + before <- grDevices::dev.cur() + + runner <- function() { + md <- .open_metric_device(NULL, pg_width = 11, pg_height = 8.5, + preview = TRUE) + # Explicit close: caller would invoke this in preview mode to + # restore the user's device before drawing. + .close_metric_device(md) + expect_equal(grDevices::dev.cur(), before) + # Second call is a no-op. + .close_metric_device(md) + expect_equal(grDevices::dev.cur(), before) + } + runner() + expect_equal(grDevices::dev.cur(), before) +}) + +# --------------------------------------------------------------------------- +# D-48: single-device discipline + cross-phase cache invariants +# --------------------------------------------------------------------------- + +test_that("export_tfl.tfl_table opens exactly one PDF device per call", { + # Verify D-48's "single device" invariant via grDevices::pdf trace. + # The tracer references a counter stored in a stable env so the + # quote() body can resolve it inside grDevices::pdf's evaluation + # context. + counter_env <- new.env() + counter_env$count <- 0L + assign(".test_pdf_counter_env", counter_env, envir = globalenv()) + on.exit(rm(".test_pdf_counter_env", envir = globalenv()), add = TRUE) + + suppressMessages(trace( + what = "pdf", + where = asNamespace("grDevices"), + tracer = quote({ + e <- get(".test_pdf_counter_env", envir = globalenv()) + e$count <- e$count + 1L + }), + print = FALSE + )) + on.exit(suppressMessages(untrace("pdf", where = asNamespace("grDevices"))), + add = TRUE) + + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f), add = TRUE) + counter_env$count <- 0L + export_tfl(tfl_table(head(iris, 5)), file = f) + expect_equal(counter_env$count, 1L, + info = paste("Expected 1 pdf() open; got", counter_env$count)) +}) + +test_that("PDF font metrics are identical between pdf(NULL) and pdf(tempfile)", { + # D-48 relies on this: pagination measurements taken on one PDF + # device are equal to what the final pdf(file) would produce, so + # the pagination cache can be shared across phases without + # re-measurement. Confirm empirically -- bytes can differ across + # grid versions, so the test pins the assumption. + string <- "Quick brown fox 1234" + gp_test <- grid::gpar(fontfamily = "sans", fontsize = 10, + fontface = "plain") + + measure_under_pdf <- function(target) { + if (is.null(target)) { + grDevices::pdf(NULL, width = 11, height = 8.5) + } else { + grDevices::pdf(target, width = 11, height = 8.5) + } + on.exit({ + grDevices::dev.off() + if (!is.null(target)) unlink(target) + }) + g <- grid::textGrob(string, gp = gp_test) + list( + w = grid::convertWidth(grid::grobWidth(g), "inches", valueOnly = TRUE), + h = grid::convertHeight(grid::grobHeight(g), "inches", valueOnly = TRUE) + ) + } + + m_null <- measure_under_pdf(NULL) + m_file <- measure_under_pdf(tempfile(fileext = ".pdf")) + + expect_equal(m_null$w, m_file$w, tolerance = 1e-9) + expect_equal(m_null$h, m_file$h, tolerance = 1e-9) +}) + +test_that(".measure_text_dims_in fails fast without an active device", { + # Safety guard added in Phase 2d. All internal callers run under + # `.open_metric_device()`; a future regression that forgets this + # should produce a readable error rather than silent nonsense. + expect_error( + .measure_text_dims_in("anything", grid::gpar()), + "requires an active graphics device" + ) +}) + +test_that("tfl_table grob carries the cross-phase cache only when normal mode", { + # PDF mode: grob$text_dim_cache should be the SAME env that + # pagination populated (shared by reference). Preview mode: it + # should be a fresh empty env, so drawing falls back to per-cell + # measurement on the user's render device. + # + # Rather than trace() the page-render call (which has scoping + # complications under devtools::test()), invoke tfl_table_to_pagelist + # the same way export_tfl.tfl_table() does, and inspect the grobs + # directly. + + # Normal-mode-equivalent: pre-open a metric device and pass a cache. + grDevices::pdf(NULL, width = 11, height = 8.5) + pagination_cache <- new.env(hash = TRUE, parent = emptyenv()) + pages <- tfl_table_to_pagelist( + tfl_table(head(iris, 5)), + pg_width = 11, pg_height = 8.5, + dots = list(), page_num = "Page {i} of {n}", + text_dim_cache = pagination_cache + ) + grDevices::dev.off() + + expect_gt(length(pages), 0L) + expect_gt(length(ls(pagination_cache, all.names = TRUE)), 0L) + + # The grob doesn't carry text_dim_cache by default -- the + # dispatcher does the attach. Simulate that attach the same way + # export_tfl.tfl_table() does: + for (i in seq_along(pages)) { + if (inherits(pages[[i]]$content, "tfl_table_grob")) { + pages[[i]]$content$text_dim_cache <- pagination_cache + } + } + # Now verify a grob carries a populated cache and entry shape is + # list(w, h). + found_one <- FALSE + for (i in seq_along(pages)) { + g <- pages[[i]]$content + if (inherits(g, "tfl_table_grob")) { + found_one <- TRUE + expect_true(is.environment(g$text_dim_cache)) + expect_gt(length(ls(g$text_dim_cache, all.names = TRUE)), 0L) + ks <- ls(g$text_dim_cache, all.names = TRUE) + sample_val <- get(ks[[1L]], envir = g$text_dim_cache) + expect_named(sample_val, c("w", "h")) + break + } + } + expect_true(found_one, info = "no tfl_table_grob in returned pages") + + # Preview-mode-equivalent: the dispatcher attaches an EMPTY env. + # Simulate that and verify drawing's lookups would miss. + empty_cache <- new.env(hash = TRUE, parent = emptyenv()) + for (i in seq_along(pages)) { + if (inherits(pages[[i]]$content, "tfl_table_grob")) { + pages[[i]]$content$text_dim_cache <- empty_cache + } + } + for (i in seq_along(pages)) { + g <- pages[[i]]$content + if (inherits(g, "tfl_table_grob")) { + expect_equal(length(ls(g$text_dim_cache, all.names = TRUE)), 0L) + } + } +})