diff --git a/.gitignore b/.gitignore index 4760921..826287e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ .Rhistory .RData .Ruserdata +Rplots.pdf +tests/testthat/Rplots.pdf .Rproj.user/ *.Rproj *.Rcheck/ diff --git a/DESCRIPTION b/DESCRIPTION index 174177d..ba00c22 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -65,9 +65,11 @@ Collate: 'utils.R' 'enum_codes.R' 'extract.R' + 'fonts.R' 'form_field_setters.R' 'forms.R' 'glyph_paths.R' + 'image_authoring.R' 'images.R' 'mutation.R' 'name_lookups.R' diff --git a/NAMESPACE b/NAMESPACE index 118daaf..c42d748 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ S3method(format,pdfium_bookmark) S3method(format,pdfium_bookmark_list) S3method(format,pdfium_clip_path) S3method(format,pdfium_doc) +S3method(format,pdfium_font) S3method(format,pdfium_form_field) S3method(format,pdfium_form_field_list) S3method(format,pdfium_obj) @@ -35,6 +36,7 @@ S3method(print,pdfium_bookmark) S3method(print,pdfium_bookmark_list) S3method(print,pdfium_clip_path) S3method(print,pdfium_doc) +S3method(print,pdfium_font) S3method(print,pdfium_form_field) S3method(print,pdfium_form_field_list) S3method(print,pdfium_obj) @@ -138,6 +140,9 @@ export(pdf_doc_viewer_preferences) export(pdf_doc_xref_valid) export(pdf_docs_merge) export(pdf_extract_paths) +export(pdf_font_close) +export(pdf_font_load) +export(pdf_font_load_standard) export(pdf_form_field_additional_actions_js) export(pdf_form_field_alternate_name) export(pdf_form_field_at_point) @@ -166,6 +171,7 @@ export(pdf_image_data) export(pdf_image_filters) export(pdf_image_icc_profile) export(pdf_image_info) +export(pdf_image_new) export(pdf_image_rendered) export(pdf_image_size) export(pdf_link_annot_at_point) diff --git a/NEWS.md b/NEWS.md index 454926b..3f6da8b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -162,8 +162,19 @@ release on scope grounds (see `CLAUDE.md` §"Scope"): `pdf_path_close()`, `pdf_path_append()` — append path geometry to an existing path object. * `pdf_path_new()`, `pdf_rect_new()`, `pdf_text_new()`, - `pdf_obj_delete()` — create fresh paths, rectangles, and text - objects, or remove an existing one. + `pdf_image_new()`, `pdf_obj_delete()` — create fresh paths, + rectangles, text objects, or JPEG images, or remove an existing + object. `pdf_text_new()` accepts either a standard-font name or + a `pdfium_font` handle from `pdf_font_load_standard()` / + `pdf_font_load()`; `pdf_image_new()` embeds JPEG bytes inline + via `FPDFImageObj_LoadJpegFileInline` and lets you place the + image into an explicit `bounds = c(left, bottom, right, top)` + rectangle. +* `pdf_font_load_standard()`, `pdf_font_load()`, `pdf_font_close()` + — load one of the 14 PDF standard fonts (no embedding) or + embed an arbitrary TrueType / Type1 font's bytes into the + document. Returned `pdfium_font` handles plug straight into + `pdf_text_new()` for custom-typeface text. ## Annotation authoring diff --git a/R/RcppExports.R b/R/RcppExports.R index 3ee8ada..e914ffe 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -277,6 +277,22 @@ cpp_doc_file_version <- function(doc_ptr) { .Call(`_pdfium_cpp_doc_file_version`, doc_ptr) } +cpp_font_load_standard <- function(doc_ptr, font_name) { + .Call(`_pdfium_cpp_font_load_standard`, doc_ptr, font_name) +} + +cpp_font_load_truetype <- function(doc_ptr, font_data, font_type, cid) { + .Call(`_pdfium_cpp_font_load_truetype`, doc_ptr, font_data, font_type, cid) +} + +cpp_font_close <- function(font_ptr) { + invisible(.Call(`_pdfium_cpp_font_close`, font_ptr)) +} + +cpp_text_new_with_font <- function(doc_ptr, page_ptr, font_ptr, font_size, text_utf8, x, y) { + .Call(`_pdfium_cpp_text_new_with_font`, doc_ptr, page_ptr, font_ptr, font_size, text_utf8, x, y) +} + cpp_form_field_handles <- function(doc_ptr) { .Call(`_pdfium_cpp_form_field_handles`, doc_ptr) } @@ -361,6 +377,14 @@ cpp_text_char_font_info <- function(page_ptr) { .Call(`_pdfium_cpp_text_char_font_info`, page_ptr) } +cpp_image_new_from_jpeg <- function(doc_ptr, page_ptr, jpeg_bytes) { + .Call(`_pdfium_cpp_image_new_from_jpeg`, doc_ptr, page_ptr, jpeg_bytes) +} + +cpp_image_set_matrix <- function(image_ptr, a, b, c, d, e, f) { + .Call(`_pdfium_cpp_image_set_matrix`, image_ptr, a, b, c, d, e, f) +} + cpp_image_metadata <- function(obj_ptr, page_ptr) { .Call(`_pdfium_cpp_image_metadata`, obj_ptr, page_ptr) } diff --git a/R/classes.R b/R/classes.R index 7fdc270..3bb56c5 100644 --- a/R/classes.R +++ b/R/classes.R @@ -19,6 +19,14 @@ new_pdfium_doc <- function(ptr, path, readwrite = FALSE) { state <- new.env(parent = emptyenv()) state$dirty_pages <- integer(0L) state$ffl_env <- NULL + # `open_pages` maps "page index" → externalptr of an open + # pdfium_page handle. Updated by new_pdfium_page (when a page is + # loaded or newly created) and by pdf_page_close (when one is + # released). flush_dirty_pages uses the registered externalptr + # so FPDFPage_GenerateContent runs on the handle that actually + # accumulated the edits — a fresh FPDF_LoadPage returns a + # different page handle that hasn't seen the user's inserts. + state$open_pages <- list() structure( list(ptr = ptr, path = path, readwrite = readwrite, state = state), @@ -48,11 +56,13 @@ is_open <- function(x) { if (inherits(x, c("pdfium_obj", "pdfium_annot"))) { return(cpp_handle_is_valid(x$ptr) && is_open(x$page)) } - # Doc-children: attachment / signature / bookmark all carry no - # finalizer and live as long as the parent doc lives. The doc's - # is_open() is the authoritative liveness signal. + # Doc-children: attachment / signature / bookmark carry no + # finalizer and live as long as the parent doc lives. Font has + # its own finalizer (FPDFFont_Close), but its lifetime is still + # doc-bound; the own-ptr check is necessary alongside the doc + # check (same shape as the annot case above). doc_owned_classes <- c("pdfium_attachment", "pdfium_signature", - "pdfium_bookmark") + "pdfium_bookmark", "pdfium_font") if (inherits(x, doc_owned_classes)) { return(cpp_handle_is_valid(x$ptr) && is_open(x$doc)) } @@ -90,8 +100,19 @@ new_pdfium_page <- function(ptr, doc, index) { checkmate::assert_class(ptr, "externalptr") checkmate::assert_class(doc, "pdfium_doc") checkmate::assert_number(index) + index <- as.integer(index) + # Register the externalptr in the doc's open-pages registry so + # pdf_save's flush_dirty_pages can call FPDFPage_GenerateContent + # on the actual user-modified handle. FPDF_LoadPage returns a + # different page handle for the same index than FPDFPage_New + # does — generating content on a fresh load wipes out the + # original handle's pending edits. + state <- doc$state + if (!is.null(state)) { + state$open_pages[[as.character(index)]] <- ptr + } structure( - list(ptr = ptr, doc = doc, index = as.integer(index)), + list(ptr = ptr, doc = doc, index = index), class = c("pdfium_page", "pdfium_handle") ) } @@ -217,6 +238,45 @@ print.pdfium_obj_list <- function(x, ...) { invisible(x) } +#' Construct a `pdfium_font` from an external pointer +#' +#' Internal helper. The `FPDF_FONT` handle has its own lifetime — +#' the externalptr carries a C finalizer that calls `FPDFFont_Close`, +#' registered C-side in `cpp_font_load_*`. The parent doc is pinned +#' in the externalptr's `prot` slot so R's GC cannot reclaim the doc +#' while any font handle is reachable. `name` is informational — +#' for standard fonts it's the spec name; for loaded TTF/Type1 fonts +#' it's the file's basename when the font was loaded from a path, +#' or `""` for in-memory loads. +#' +#' @param ptr An `externalptr` to an `FPDF_FONT`. +#' @param doc Parent `pdfium_doc`. +#' @param name Character — display label. +#' @return An object of class `c("pdfium_font", "pdfium_handle")`. +#' @keywords internal +#' @noRd +new_pdfium_font <- function(ptr, doc, name) { + checkmate::assert_class(ptr, "externalptr") + checkmate::assert_class(doc, "pdfium_doc") + checkmate::assert_string(name) + structure( + list(ptr = ptr, doc = doc, name = name), + class = c("pdfium_font", "pdfium_handle") + ) +} + +#' @export +format.pdfium_font <- function(x, ...) { + state <- if (is_open(x)) "open" else "closed" + sprintf("", state, x$name) +} + +#' @export +print.pdfium_font <- function(x, ...) { + cat(format(x, ...), "\n", sep = "") + invisible(x) +} + #' Construct a `pdfium_annot` from an external pointer #' #' Internal helper. The `FPDF_ANNOTATION` handle has its own diff --git a/R/fonts.R b/R/fonts.R new file mode 100644 index 0000000..154d6de --- /dev/null +++ b/R/fonts.R @@ -0,0 +1,168 @@ +# Font loading + handle class for the page-authoring API. +# +# Three constructors: +# * pdf_font_load_standard(doc, name) — one of the 14 PDF standard +# Type 1 fonts. No bytes needed. +# * pdf_font_load(doc, font_data, type) — TTF / Type1 bytes, embedded +# into the PDF. +# * (internal) new_pdfium_font(ptr, doc) wraps an externalptr. +# +# Lifetime: the C-side externalptr carries a finalizer that calls +# FPDFFont_Close. The `prot` slot pins the parent doc so the doc can't +# be GC'd before the font. Calling pdf_font_close() is idempotent. +# +# To draw with a custom font, pass the `pdfium_font` handle as the +# `font` argument of pdf_text_new(); R-side dispatch picks the +# custom-font code path automatically. + +# The 14 standard PDF Type 1 fonts that every PDF reader is required +# to support. PDFium accepts any of these names without needing font +# bytes to be embedded. +.pdfium_standard_fonts <- c( + "Helvetica", + "Helvetica-Bold", + "Helvetica-Oblique", + "Helvetica-BoldOblique", + "Times-Roman", + "Times-Bold", + "Times-Italic", + "Times-BoldItalic", + "Courier", + "Courier-Bold", + "Courier-Oblique", + "Courier-BoldOblique", + "Symbol", + "ZapfDingbats" +) + +#' Load one of the 14 PDF standard fonts +#' +#' Wraps `FPDFText_LoadStandardFont`. The 14 standard fonts +#' (Helvetica / Times-Roman / Courier in their four weight+style +#' variants, plus Symbol and ZapfDingbats) are required by the PDF +#' spec to be supported by every reader, so no font bytes need to +#' be embedded. +#' +#' For arbitrary TrueType / Type1 fonts, use [pdf_font_load()]. +#' +#' @param doc A `pdfium_doc` opened with `readwrite = TRUE`. +#' @param name Character scalar — one of the 14 standard font names +#' listed in the **Standard fonts** section below. +#' @return A `pdfium_font` handle. Pass it as the `font` argument +#' of [pdf_text_new()]. +#' +#' @section Standard fonts: +#' `"Helvetica"`, `"Helvetica-Bold"`, `"Helvetica-Oblique"`, +#' `"Helvetica-BoldOblique"`, `"Times-Roman"`, `"Times-Bold"`, +#' `"Times-Italic"`, `"Times-BoldItalic"`, `"Courier"`, +#' `"Courier-Bold"`, `"Courier-Oblique"`, `"Courier-BoldOblique"`, +#' `"Symbol"`, `"ZapfDingbats"`. +#' +#' @seealso [pdf_font_load()], [pdf_text_new()], [pdf_font_close()]. +#' @examples +#' \dontrun{ +#' doc <- pdf_doc_new() +#' page <- pdf_page_new(doc, width = 612, height = 792) +#' font <- pdf_font_load_standard(doc, "Times-Italic") +#' pdf_text_new(page, "hello", font = font, font_size = 24, +#' x = 72, y = 720) +#' pdf_save(doc, tempfile(fileext = ".pdf")) +#' } +#' @export +pdf_font_load_standard <- function(doc, name) { + assert_readwrite(doc) + checkmate::assert_choice(name, .pdfium_standard_fonts) + ptr <- cpp_font_load_standard(doc$ptr, name) + new_pdfium_font(ptr, doc, name) +} + +#' Load a TrueType or Type1 font from bytes +#' +#' Wraps `FPDFText_LoadFont`. The font bytes are embedded into the +#' PDF up front (PDFium copies them; the input is free to be +#' garbage-collected after the call returns). Use [pdf_font_load_standard()] +#' for the 14 PDF standard fonts where no embedding is needed. +#' +#' @param doc A `pdfium_doc` opened with `readwrite = TRUE`. +#' @param font_data Either a raw vector containing the font bytes +#' or a character path to a font file on disk. +#' @param type Character — `"truetype"` (default) for TrueType / OTF +#' fonts, or `"type1"` for Type1 fonts. +#' @param cid Logical. If `TRUE` (the default), the font is loaded as +#' a CID (composite) font. CID encoding is required for fonts with +#' more than 255 glyphs — i.e. anything that needs to render +#' non-Latin-1 text. The non-CID path (`cid = FALSE`) is smaller on +#' disk but limited to the standard PDF encodings. When in doubt, +#' leave this as `TRUE`. +#' @return A `pdfium_font` handle. Pass it as the `font` argument +#' of [pdf_text_new()]. +#' +#' @seealso [pdf_font_load_standard()], [pdf_text_new()], +#' [pdf_font_close()]. +#' @examples +#' \dontrun{ +#' doc <- pdf_doc_new() +#' page <- pdf_page_new(doc, width = 612, height = 792) +#' ttf <- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" +#' if (file.exists(ttf)) { +#' font <- pdf_font_load(doc, ttf) +#' pdf_text_new(page, "hello", font = font, font_size = 24, +#' x = 72, y = 720) +#' } +#' pdf_save(doc, tempfile(fileext = ".pdf")) +#' } +#' @export +pdf_font_load <- function(doc, font_data, + type = c("truetype", "type1"), + cid = TRUE) { + assert_readwrite(doc) + type <- match.arg(type) + checkmate::assert_flag(cid) + bytes <- coerce_font_bytes(font_data) + type_code <- if (identical(type, "type1")) 1L else 2L + ptr <- cpp_font_load_truetype(doc$ptr, bytes, type_code, cid) + display <- if (is.character(font_data)) basename(font_data) else "" + new_pdfium_font(ptr, doc, display) +} + +#' Close a font handle +#' +#' Releases the underlying PDFium font. Idempotent — a second call is +#' a no-op. The finalizer attached to the externalptr also runs this +#' when R garbage-collects the `pdfium_font`, but explicit close is +#' useful when many large fonts have been loaded and you want +#' deterministic release. +#' +#' Closing a font does **not** invalidate text objects that already +#' used it — PDFium keeps an internal reference. Only the embedder's +#' R-side handle is released. +#' +#' @param font A `pdfium_font` from [pdf_font_load_standard()] or +#' [pdf_font_load()]. +#' @return Invisibly returns `font`. +#' @export +pdf_font_close <- function(font) { + checkmate::assert_class(font, "pdfium_font") + cpp_font_close(font$ptr) + invisible(font) +} + +# Internal: coerce the `font_data` argument (raw vector or path) into +# a raw vector ready to hand to the C++ shim. Mirrors +# coerce_jpeg_bytes() in R/image_authoring.R. +coerce_font_bytes <- function(font_data) { + if (is.raw(font_data)) { + checkmate::assert_raw(font_data, min.len = 1L) + return(font_data) + } + if (is.character(font_data)) { + checkmate::assert_string(font_data, min.chars = 1L) + if (!file.exists(font_data)) { + stop("Font file not found: ", font_data, call. = FALSE) + } + n <- file.info(font_data)$size + return(readBin(font_data, what = "raw", n = n)) + } + stop("`font_data` must be a raw vector of font bytes or a path to ", + "a font file. Got: ", class(font_data)[[1L]], call. = FALSE) +} diff --git a/R/image_authoring.R b/R/image_authoring.R new file mode 100644 index 0000000..571d74c --- /dev/null +++ b/R/image_authoring.R @@ -0,0 +1,98 @@ +# Image-object creation for the page-authoring API. +# +# Wraps PDFium's `FPDFImageObj_LoadJpegFileInline` path so users can +# add JPEG content to programmatic PDFs. Sibling of pdf_path_new(), +# pdf_rect_new(), pdf_text_new() in R/obj_creators.R. +# +# JPEG-only by design for v0.1.0. Other formats (PNG, TIFF, raw +# bitmaps) need an FPDF_BITMAP wrapper class which is a separate +# design exercise — see dev/v0.2.0-plan.md. + +#' Create a new image page-object from JPEG bytes +#' +#' Wraps `FPDFPageObj_NewImageObj` + `FPDFImageObj_LoadJpegFileInline` +#' to embed a JPEG into a page. The JPEG bytes are copied into the +#' PDF at the moment of creation (the "Inline" variant of PDFium's +#' loader), so the input is free to be garbage-collected immediately +#' after the call returns. +#' +#' The new image is placed at the origin (0, 0) at its natural +#' pixel size in PDF user-space points (one unit per pixel). For a +#' specific position and size, pass `bounds = c(left, bottom, +#' right, top)`; the wrapper computes the transformation matrix +#' that scales + translates the image into that rectangle. +#' +#' @param page A `pdfium_page` from [pdf_page_load()] (or a +#' `pdfium_doc` with `page_num`). Parent doc must be readwrite. +#' @param jpeg Either a raw vector containing JPEG-encoded bytes or +#' a character path to a JPEG file on disk. PNG / TIFF / other +#' formats are not supported in v0.1.0; convert to JPEG with an +#' external tool (`magick::image_write(..., format = "jpeg")` is +#' the easy path) if needed. +#' @param bounds Optional length-4 numeric `c(left, bottom, right, +#' top)` in PDF user-space points. When `NULL` (default), the +#' image is placed at the origin at its natural pixel size in +#' points (rarely what you want — pass an explicit `bounds`). +#' @return A `pdfium_obj` handle of `type = "image"`. +#' @seealso [pdf_path_new()], [pdf_rect_new()], [pdf_text_new()] +#' for sibling creators; [pdf_image_info()], +#' [pdf_image_bitmap()] for the read side. +#' @examples +#' \dontrun{ +#' doc <- pdf_doc_new() +#' page <- pdf_page_new(doc, width = 612, height = 792) +#' jpeg_path <- system.file("img", "Rlogo.jpg", package = "jpeg") +#' if (nzchar(jpeg_path)) { +#' pdf_image_new(page, jpeg_path, +#' bounds = c(72, 600, 272, 700)) +#' } +#' pdf_save(doc, tempfile(fileext = ".pdf")) +#' } +#' @export +pdf_image_new <- function(page, jpeg, bounds = NULL) { + ph <- as_page_and_doc(page) + assert_readwrite(ph$doc) + bytes <- coerce_jpeg_bytes(jpeg) + if (!is.null(bounds)) { + checkmate::assert_numeric( + bounds, len = 4L, any.missing = FALSE, finite = TRUE + ) + } + ptr <- cpp_image_new_from_jpeg(ph$doc$ptr, ph$page$ptr, bytes) + if (!is.null(bounds)) { + width <- bounds[[3L]] - bounds[[1L]] + height <- bounds[[4L]] - bounds[[2L]] + # PDFium's image unit-square gets mapped by this matrix: scale to + # (width, height) and translate by (left, bottom). The PDF spec + # convention is that an image object's natural coordinate space + # is [0, 1] × [0, 1] before its CTM applies. + ok <- cpp_image_set_matrix( + ptr, width, 0, 0, height, bounds[[1L]], bounds[[2L]] + ) + if (!isTRUE(ok)) { + stop("FPDFImageObj_SetMatrix failed.", call. = FALSE) # nocov + } + } + idx <- cpp_page_object_count(ph$page$ptr) + mark_page_dirty(ph$doc, ph$page$index) + new_pdfium_obj(ptr, ph$page, idx, "image") +} + +# Internal: coerce the `jpeg` argument (raw vector or path) into +# a raw vector of JPEG bytes ready to hand to the C++ shim. +coerce_jpeg_bytes <- function(jpeg) { + if (is.raw(jpeg)) { + checkmate::assert_raw(jpeg, min.len = 1L) + return(jpeg) + } + if (is.character(jpeg)) { + checkmate::assert_string(jpeg, min.chars = 1L) + if (!file.exists(jpeg)) { + stop("JPEG file not found: ", jpeg, call. = FALSE) + } + n <- file.info(jpeg)$size + return(readBin(jpeg, what = "raw", n = n)) + } + stop("`jpeg` must be a raw vector of JPEG bytes or a path to a ", + "JPEG file. Got: ", class(jpeg)[[1L]], call. = FALSE) +} diff --git a/R/obj_creators.R b/R/obj_creators.R index 5fd5934..7e76e55 100644 --- a/R/obj_creators.R +++ b/R/obj_creators.R @@ -73,40 +73,26 @@ pdf_rect_new <- function(page, x, y, width, height) { new_pdfium_obj(ptr, ph$page, idx, "path") } -# The 14 standard PDF Type 1 fonts that every PDF reader is -# required to support. PDFium accepts any of these names without -# needing font data to be embedded. Custom fonts require loading -# via FPDFText_LoadFont (not yet exposed in this package). -.pdfium_standard_fonts <- c( - "Helvetica", - "Helvetica-Bold", - "Helvetica-Oblique", - "Helvetica-BoldOblique", - "Times-Roman", - "Times-Bold", - "Times-Italic", - "Times-BoldItalic", - "Courier", - "Courier-Bold", - "Courier-Oblique", - "Courier-BoldOblique", - "Symbol", - "ZapfDingbats" -) - #' Create a new text page-object on a page #' -#' Wraps `FPDFPageObj_NewTextObj` + (optionally) `FPDFText_SetText` -#' + `FPDFPageObj_Transform` + `FPDFPage_InsertObject`. The text -#' object uses one of the 14 PDF standard fonts (no font embedding -#' needed); custom fonts are deferred to a later release. +#' Wraps `FPDFPageObj_NewTextObj` (when `font` is a standard-font +#' name) or `FPDFPageObj_CreateTextObj` (when `font` is a custom +#' `pdfium_font` handle from [pdf_font_load_standard()] / +#' [pdf_font_load()]). Either path is followed by an optional +#' `FPDFText_SetText`, `FPDFPageObj_Transform`, and +#' `FPDFPage_InsertObject`. #' #' @inheritParams pdf_path_new #' @param text Character scalar — the text content. Pass `""` to #' create an empty text object you'll populate later via #' [pdf_text_set_content()]. -#' @param font Character scalar — one of the 14 PDF standard font -#' names. Default `"Helvetica"`. +#' @param font Either a character scalar — one of the 14 PDF +#' standard font names (see [pdf_font_load_standard()] for the +#' list) — or a `pdfium_font` handle from +#' [pdf_font_load_standard()] or [pdf_font_load()]. Default +#' `"Helvetica"`. Pass a `pdfium_font` handle when you need a +#' custom TrueType / Type1 font; the standard-font shortcut is +#' purely for convenience. #' @param font_size Numeric scalar — font size in points. Default #' `12`. #' @param x,y Numeric scalars — baseline position in PDF user-space @@ -114,25 +100,38 @@ pdf_rect_new <- function(page, x, y, width, height) { #' @return The new `pdfium_obj` (type `"text"`), inserted on the #' page. #' @seealso [pdf_text_set_content()], [pdf_text_set_render_mode()], -#' [pdf_obj_set_matrix()]. +#' [pdf_obj_set_matrix()], [pdf_font_load_standard()], +#' [pdf_font_load()]. #' @export pdf_text_new <- function(page, text, font = "Helvetica", font_size = 12, x = 0, y = 0) { checkmate::assert_string(text, na.ok = FALSE) - checkmate::assert_choice(font, .pdfium_standard_fonts) checkmate::assert_number(font_size, lower = 0, finite = TRUE) checkmate::assert_number(x, finite = TRUE) checkmate::assert_number(y, finite = TRUE) ph <- as_page_and_doc(page) assert_readwrite(ph$doc) - ptr <- cpp_text_new( - ph$doc$ptr, ph$page$ptr, - font, as.numeric(font_size), - enc2utf8(text), - as.numeric(x), as.numeric(y) - ) + if (inherits(font, "pdfium_font")) { + if (!is_open(font)) { + stop("Font handle has been closed.", call. = FALSE) + } + ptr <- cpp_text_new_with_font( + ph$doc$ptr, ph$page$ptr, font$ptr, + as.numeric(font_size), + enc2utf8(text), + as.numeric(x), as.numeric(y) + ) + } else { + checkmate::assert_choice(font, .pdfium_standard_fonts) + ptr <- cpp_text_new( + ph$doc$ptr, ph$page$ptr, + font, as.numeric(font_size), + enc2utf8(text), + as.numeric(x), as.numeric(y) + ) + } idx <- cpp_page_object_count(ph$page$ptr) mark_page_dirty(ph$doc, ph$page$index) new_pdfium_obj(ptr, ph$page, idx, "text") diff --git a/R/page.R b/R/page.R index e51678e..110a074 100644 --- a/R/page.R +++ b/R/page.R @@ -51,6 +51,19 @@ pdf_page_load <- function(doc, page_num = 1L) { #' @export pdf_page_close <- function(page) { checkmate::assert_class(page, "pdfium_page") + # Deregister from the doc's open-pages map BEFORE closing so a + # racing pdf_save() doesn't pick up a stale externalptr. Only + # remove if the entry actually points at this page's externalptr + # — another open handle for the same page index may have + # registered itself more recently. + state <- page$doc$state + if (!is.null(state)) { + key <- as.character(page$index) + registered <- state$open_pages[[key]] + if (!is.null(registered) && identical(registered, page$ptr)) { + state$open_pages[[key]] <- NULL + } + } cpp_close_page(page$ptr) invisible(page) } diff --git a/R/save.R b/R/save.R index be2e6cb..3cfb345 100644 --- a/R/save.R +++ b/R/save.R @@ -223,6 +223,17 @@ pdf_doc_new <- function() { # each. Pages mark themselves dirty (via mark_page_dirty(doc, n) # from mutator wrappers); pdf_save() invokes this just before # serialising. +# +# Generates content on the user's actual page handle when one is +# registered in `doc$state$open_pages` — a fresh FPDF_LoadPage +# returns a different FPDF_PAGE that hasn't seen the user's +# inserts, and calling GenerateContent on that would erase the +# user's in-memory page-object array (and thus their unsaved +# edits). When no handle is registered (e.g. a setter that +# transiently loaded + modified + closed before save), fall back +# to a fresh load: the modification has already been persisted to +# the page-dict (rotation, etc.) and re-generating empty content +# is a no-op. flush_dirty_pages <- function(doc) { state <- doc$state # nocov start — every `pdfium_doc` built via `new_pdfium_doc()` @@ -233,9 +244,14 @@ flush_dirty_pages <- function(doc) { dirty <- state$dirty_pages if (length(dirty) == 0L) return(invisible(NULL)) for (i in dirty) { - page <- pdf_page_load(doc, i) - cpp_page_generate_content(page$ptr) - pdf_page_close(page) + page_ptr <- state$open_pages[[as.character(i)]] + if (!is.null(page_ptr) && cpp_handle_is_valid(page_ptr)) { + cpp_page_generate_content(page_ptr) + } else { + page <- pdf_page_load(doc, i) + cpp_page_generate_content(page$ptr) + pdf_page_close(page) + } } # Reset the dirty set so a re-save (incremental or no-op) doesn't # double-flush. The state environment IS reference-semantics so diff --git a/README.Rmd b/README.Rmd index 954bd19..a49b6bb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -46,14 +46,13 @@ same library that powers Chrome's PDF viewer. It has two halves: just text. * **Filling** AcroForm fields programmatically and flattening the result for downstream tooling. -* **Authoring** programmatic PDFs from vector graphics, text, and - annotations (think: figure callouts, table reports, annotated - source documents). v0.1.0 ships paths / text in the 14 standard - PDF fonts / annotations. Image embedding and custom-font loading - are both wrapping gaps (PDFium already exposes the symbols; we - just haven't wrapped them yet) and come in a later release; - `/Info`-dict writes and on-save encryption need upstream PDFium - changes that we've proposed but Google hasn't shipped yet. +* **Authoring** programmatic PDFs from vector graphics, JPEG + images, text in the 14 standard fonts or any TrueType / Type1 + typeface, and annotations (think: figure callouts, table + reports, annotated source documents). `/Info`-dict writes and + on-save encryption are the remaining v0.1.0 gaps — both need + upstream PDFium changes that we've proposed but Google hasn't + shipped yet. * Anything you'd otherwise drop into Python with `pypdfium2`. See [`vignette("mutating-pdfs")`](https://humanpred.github.io/rpdfium/articles/mutating-pdfs.html) diff --git a/README.md b/README.md index 0698b76..fd28b09 100644 --- a/README.md +++ b/README.md @@ -39,14 +39,12 @@ powers Chrome’s PDF viewer. It has two halves: text. - **Filling** AcroForm fields programmatically and flattening the result for downstream tooling. -- **Authoring** programmatic PDFs from vector graphics, text, and +- **Authoring** programmatic PDFs from vector graphics, JPEG images, + text in the 14 standard fonts or any TrueType / Type1 typeface, and annotations (think: figure callouts, table reports, annotated source - documents). v0.1.0 ships paths / text in the 14 standard PDF fonts / - annotations. Image embedding and custom-font loading are both wrapping - gaps (PDFium already exposes the symbols; we just haven’t wrapped them - yet) and come in a later release; `/Info`-dict writes and on-save - encryption need upstream PDFium changes that we’ve proposed but Google - hasn’t shipped yet. + documents). `/Info`-dict writes and on-save encryption are the + remaining v0.1.0 gaps — both need upstream PDFium changes that we’ve + proposed but Google hasn’t shipped yet. - Anything you’d otherwise drop into Python with `pypdfium2`. See diff --git a/_pkgdown.yml b/_pkgdown.yml index b798857..395fcdd 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -280,16 +280,29 @@ reference: - pdf_path_append - title: Page-object creation desc: > - Create fresh page-objects (paths, rectangles, text) on a page - that's been opened with `readwrite = TRUE` or built via - `pdf_doc_new()`. Image creation is deferred to a later release - pending FPDF_BITMAP plumbing. Use `pdf_obj_delete()` for the - inverse — remove + destroy a page-object. + Create fresh page-objects (paths, rectangles, text, JPEG + images) on a page that's been opened with `readwrite = TRUE` + or built via `pdf_doc_new()`. Use `pdf_obj_delete()` for the + inverse — remove + destroy a page-object. PNG / TIFF / raw- + bitmap embedding stays deferred to a later release pending + `FPDF_BITMAP` plumbing. contents: - pdf_path_new - pdf_rect_new - pdf_text_new + - pdf_image_new - pdf_obj_delete + - title: Font loading + desc: > + Load a font for use in `pdf_text_new()`. The 14 PDF standard + fonts need no embedding; arbitrary TrueType / Type1 fonts get + their bytes copied into the document via `FPDFText_LoadFont`. + `pdf_font_close()` is idempotent and matches the explicit- + release pattern of the other handle classes. + contents: + - pdf_font_load_standard + - pdf_font_load + - pdf_font_close - title: Annotation authoring desc: > Create / delete annotations and mutate their properties. diff --git a/dev/v0.1.0-api-gap-audit.md b/dev/v0.1.0-api-gap-audit.md index e4b6367..9512437 100644 --- a/dev/v0.1.0-api-gap-audit.md +++ b/dev/v0.1.0-api-gap-audit.md @@ -90,13 +90,25 @@ in v0.2.0: ### Image authoring -`FPDFImageObj_LoadJpegFile`, `FPDFImageObj_SetBitmap`, -`FPDFImageObj_SetMatrix` — Phase 5 deliberately deferred image -creation pending an FPDF_BITMAP plumbing design. v0.2.0 should -revisit: an `FPDF_BITMAP` constructor (`FPDFBitmap_Create*`) plus -these three setters would round out the v0.1.0 page-object -creation set (`pdf_path_new`, `pdf_rect_new`, `pdf_text_new` → -`pdf_image_new`). +**Closed for v0.1.0** via `pdf_image_new()` — wraps +`FPDFImageObj_NewImageObj` + `FPDFImageObj_LoadJpegFileInline` + +`FPDFImageObj_SetMatrix`, embedding JPEG bytes inline. `bounds = +c(left, bottom, right, top)` controls placement. + +Still deferred to v0.2.0: PNG / TIFF / raw-bitmap embedding via +`FPDFImageObj_SetBitmap`, which needs an `FPDF_BITMAP` lifecycle +class (`FPDFBitmap_Create*` constructors + finalizer) — a +separate design exercise that pulls in colorspace and decoder +considerations. + +### Custom font authoring + +**Closed for v0.1.0** via `pdf_font_load_standard()` (the 14 +PDF standard fonts) and `pdf_font_load()` (arbitrary TrueType / +Type1 bytes). The returned `pdfium_font` handle plugs into +`pdf_text_new(page, text, font = handle)`; the handle carries an +`FPDFFont_Close` finalizer and the doc is pinned in the +externalptr's `prot` slot. ### Clip path authoring diff --git a/dev/v0.2.0-plan.md b/dev/v0.2.0-plan.md index d4814c3..9471e3f 100644 --- a/dev/v0.2.0-plan.md +++ b/dev/v0.2.0-plan.md @@ -119,11 +119,13 @@ directly to user demand visible in the R ecosystem: Non-goals for 0.2.0: -- Page-object authoring (drawing new paths / text / images via - `FPDFPageObj_CreateNewPath`, `_NewTextObj`, `_NewImageObj`). - This is a PDF *authoring* API; users who want to draw PDFs - programmatically should reach for `grDevices::cairo_pdf` or - `ggplot2` → ggsave. Reasonable 0.3+ feature if there is demand. +- PNG / TIFF / raw-bitmap image embedding. v0.1.0 ships JPEG-only + via `pdf_image_new()` + `FPDFImageObj_LoadJpegFileInline`; other + formats need an `FPDF_BITMAP` lifecycle class + (`FPDFBitmap_Create*` constructors + finalizer + colorspace + plumbing) which is a separate design exercise. Workaround + today: convert to JPEG with `magick::image_write(..., format = + "jpeg")` before calling `pdf_image_new()`. - Tagged-PDF authoring. Read in 0.1.0; write is a research-grade problem. - Digital-signature creation (PKCS#7 signing). Better as a diff --git a/man/pdf_font_close.Rd b/man/pdf_font_close.Rd new file mode 100644 index 0000000..1815a2a --- /dev/null +++ b/man/pdf_font_close.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fonts.R +\name{pdf_font_close} +\alias{pdf_font_close} +\title{Close a font handle} +\usage{ +pdf_font_close(font) +} +\arguments{ +\item{font}{A \code{pdfium_font} from \code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}} or +\code{\link[=pdf_font_load]{pdf_font_load()}}.} +} +\value{ +Invisibly returns \code{font}. +} +\description{ +Releases the underlying PDFium font. Idempotent — a second call is +a no-op. The finalizer attached to the externalptr also runs this +when R garbage-collects the \code{pdfium_font}, but explicit close is +useful when many large fonts have been loaded and you want +deterministic release. +} +\details{ +Closing a font does \strong{not} invalidate text objects that already +used it — PDFium keeps an internal reference. Only the embedder's +R-side handle is released. +} diff --git a/man/pdf_font_load.Rd b/man/pdf_font_load.Rd new file mode 100644 index 0000000..fdc4d47 --- /dev/null +++ b/man/pdf_font_load.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fonts.R +\name{pdf_font_load} +\alias{pdf_font_load} +\title{Load a TrueType or Type1 font from bytes} +\usage{ +pdf_font_load(doc, font_data, type = c("truetype", "type1"), cid = TRUE) +} +\arguments{ +\item{doc}{A \code{pdfium_doc} opened with \code{readwrite = TRUE}.} + +\item{font_data}{Either a raw vector containing the font bytes +or a character path to a font file on disk.} + +\item{type}{Character — \code{"truetype"} (default) for TrueType / OTF +fonts, or \code{"type1"} for Type1 fonts.} + +\item{cid}{Logical. If \code{TRUE} (the default), the font is loaded as +a CID (composite) font. CID encoding is required for fonts with +more than 255 glyphs — i.e. anything that needs to render +non-Latin-1 text. The non-CID path (\code{cid = FALSE}) is smaller on +disk but limited to the standard PDF encodings. When in doubt, +leave this as \code{TRUE}.} +} +\value{ +A \code{pdfium_font} handle. Pass it as the \code{font} argument +of \code{\link[=pdf_text_new]{pdf_text_new()}}. +} +\description{ +Wraps \code{FPDFText_LoadFont}. The font bytes are embedded into the +PDF up front (PDFium copies them; the input is free to be +garbage-collected after the call returns). Use \code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}} +for the 14 PDF standard fonts where no embedding is needed. +} +\examples{ +\dontrun{ +doc <- pdf_doc_new() +page <- pdf_page_new(doc, width = 612, height = 792) +ttf <- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" +if (file.exists(ttf)) { + font <- pdf_font_load(doc, ttf) + pdf_text_new(page, "hello", font = font, font_size = 24, + x = 72, y = 720) +} +pdf_save(doc, tempfile(fileext = ".pdf")) +} +} +\seealso{ +\code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}}, \code{\link[=pdf_text_new]{pdf_text_new()}}, +\code{\link[=pdf_font_close]{pdf_font_close()}}. +} diff --git a/man/pdf_font_load_standard.Rd b/man/pdf_font_load_standard.Rd new file mode 100644 index 0000000..e68cc80 --- /dev/null +++ b/man/pdf_font_load_standard.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fonts.R +\name{pdf_font_load_standard} +\alias{pdf_font_load_standard} +\title{Load one of the 14 PDF standard fonts} +\usage{ +pdf_font_load_standard(doc, name) +} +\arguments{ +\item{doc}{A \code{pdfium_doc} opened with \code{readwrite = TRUE}.} + +\item{name}{Character scalar — one of the 14 standard font names +listed in the \strong{Standard fonts} section below.} +} +\value{ +A \code{pdfium_font} handle. Pass it as the \code{font} argument +of \code{\link[=pdf_text_new]{pdf_text_new()}}. +} +\description{ +Wraps \code{FPDFText_LoadStandardFont}. The 14 standard fonts +(Helvetica / Times-Roman / Courier in their four weight+style +variants, plus Symbol and ZapfDingbats) are required by the PDF +spec to be supported by every reader, so no font bytes need to +be embedded. +} +\details{ +For arbitrary TrueType / Type1 fonts, use \code{\link[=pdf_font_load]{pdf_font_load()}}. +} +\section{Standard fonts}{ + +\code{"Helvetica"}, \code{"Helvetica-Bold"}, \code{"Helvetica-Oblique"}, +\code{"Helvetica-BoldOblique"}, \code{"Times-Roman"}, \code{"Times-Bold"}, +\code{"Times-Italic"}, \code{"Times-BoldItalic"}, \code{"Courier"}, +\code{"Courier-Bold"}, \code{"Courier-Oblique"}, \code{"Courier-BoldOblique"}, +\code{"Symbol"}, \code{"ZapfDingbats"}. +} + +\examples{ +\dontrun{ +doc <- pdf_doc_new() +page <- pdf_page_new(doc, width = 612, height = 792) +font <- pdf_font_load_standard(doc, "Times-Italic") +pdf_text_new(page, "hello", font = font, font_size = 24, + x = 72, y = 720) +pdf_save(doc, tempfile(fileext = ".pdf")) +} +} +\seealso{ +\code{\link[=pdf_font_load]{pdf_font_load()}}, \code{\link[=pdf_text_new]{pdf_text_new()}}, \code{\link[=pdf_font_close]{pdf_font_close()}}. +} diff --git a/man/pdf_image_new.Rd b/man/pdf_image_new.Rd new file mode 100644 index 0000000..29cb508 --- /dev/null +++ b/man/pdf_image_new.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_authoring.R +\name{pdf_image_new} +\alias{pdf_image_new} +\title{Create a new image page-object from JPEG bytes} +\usage{ +pdf_image_new(page, jpeg, bounds = NULL) +} +\arguments{ +\item{page}{A \code{pdfium_page} from \code{\link[=pdf_page_load]{pdf_page_load()}} (or a +\code{pdfium_doc} with \code{page_num}). Parent doc must be readwrite.} + +\item{jpeg}{Either a raw vector containing JPEG-encoded bytes or +a character path to a JPEG file on disk. PNG / TIFF / other +formats are not supported in v0.1.0; convert to JPEG with an +external tool (\code{magick::image_write(..., format = "jpeg")} is +the easy path) if needed.} + +\item{bounds}{Optional length-4 numeric \code{c(left, bottom, right, top)} in PDF user-space points. When \code{NULL} (default), the +image is placed at the origin at its natural pixel size in +points (rarely what you want — pass an explicit \code{bounds}).} +} +\value{ +A \code{pdfium_obj} handle of \code{type = "image"}. +} +\description{ +Wraps \code{FPDFPageObj_NewImageObj} + \code{FPDFImageObj_LoadJpegFileInline} +to embed a JPEG into a page. The JPEG bytes are copied into the +PDF at the moment of creation (the "Inline" variant of PDFium's +loader), so the input is free to be garbage-collected immediately +after the call returns. +} +\details{ +The new image is placed at the origin (0, 0) at its natural +pixel size in PDF user-space points (one unit per pixel). For a +specific position and size, pass \code{bounds = c(left, bottom, right, top)}; the wrapper computes the transformation matrix +that scales + translates the image into that rectangle. +} +\examples{ +\dontrun{ +doc <- pdf_doc_new() +page <- pdf_page_new(doc, width = 612, height = 792) +jpeg_path <- system.file("img", "Rlogo.jpg", package = "jpeg") +if (nzchar(jpeg_path)) { + pdf_image_new(page, jpeg_path, + bounds = c(72, 600, 272, 700)) +} +pdf_save(doc, tempfile(fileext = ".pdf")) +} +} +\seealso{ +\code{\link[=pdf_path_new]{pdf_path_new()}}, \code{\link[=pdf_rect_new]{pdf_rect_new()}}, \code{\link[=pdf_text_new]{pdf_text_new()}} +for sibling creators; \code{\link[=pdf_image_info]{pdf_image_info()}}, +\code{\link[=pdf_image_bitmap]{pdf_image_bitmap()}} for the read side. +} diff --git a/man/pdf_text_new.Rd b/man/pdf_text_new.Rd index 9dadeca..3c70db3 100644 --- a/man/pdf_text_new.Rd +++ b/man/pdf_text_new.Rd @@ -14,8 +14,13 @@ must be readwrite.} create an empty text object you'll populate later via \code{\link[=pdf_text_set_content]{pdf_text_set_content()}}.} -\item{font}{Character scalar — one of the 14 PDF standard font -names. Default \code{"Helvetica"}.} +\item{font}{Either a character scalar — one of the 14 PDF +standard font names (see \code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}} for the +list) — or a \code{pdfium_font} handle from +\code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}} or \code{\link[=pdf_font_load]{pdf_font_load()}}. Default +\code{"Helvetica"}. Pass a \code{pdfium_font} handle when you need a +custom TrueType / Type1 font; the standard-font shortcut is +purely for convenience.} \item{font_size}{Numeric scalar — font size in points. Default \code{12}.} @@ -28,14 +33,15 @@ The new \code{pdfium_obj} (type \code{"text"}), inserted on the page. } \description{ -Wraps \code{FPDFPageObj_NewTextObj} + (optionally) \code{FPDFText_SetText} -\itemize{ -\item \code{FPDFPageObj_Transform} + \code{FPDFPage_InsertObject}. The text -object uses one of the 14 PDF standard fonts (no font embedding -needed); custom fonts are deferred to a later release. -} +Wraps \code{FPDFPageObj_NewTextObj} (when \code{font} is a standard-font +name) or \code{FPDFPageObj_CreateTextObj} (when \code{font} is a custom +\code{pdfium_font} handle from \code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}} / +\code{\link[=pdf_font_load]{pdf_font_load()}}). Either path is followed by an optional +\code{FPDFText_SetText}, \code{FPDFPageObj_Transform}, and +\code{FPDFPage_InsertObject}. } \seealso{ \code{\link[=pdf_text_set_content]{pdf_text_set_content()}}, \code{\link[=pdf_text_set_render_mode]{pdf_text_set_render_mode()}}, -\code{\link[=pdf_obj_set_matrix]{pdf_obj_set_matrix()}}. +\code{\link[=pdf_obj_set_matrix]{pdf_obj_set_matrix()}}, \code{\link[=pdf_font_load_standard]{pdf_font_load_standard()}}, +\code{\link[=pdf_font_load]{pdf_font_load()}}. } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 88e50ee..be887a7 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -823,6 +823,59 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// cpp_font_load_standard +SEXP cpp_font_load_standard(SEXP doc_ptr, std::string font_name); +RcppExport SEXP _pdfium_cpp_font_load_standard(SEXP doc_ptrSEXP, SEXP font_nameSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type doc_ptr(doc_ptrSEXP); + Rcpp::traits::input_parameter< std::string >::type font_name(font_nameSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_font_load_standard(doc_ptr, font_name)); + return rcpp_result_gen; +END_RCPP +} +// cpp_font_load_truetype +SEXP cpp_font_load_truetype(SEXP doc_ptr, Rcpp::RawVector font_data, int font_type, bool cid); +RcppExport SEXP _pdfium_cpp_font_load_truetype(SEXP doc_ptrSEXP, SEXP font_dataSEXP, SEXP font_typeSEXP, SEXP cidSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type doc_ptr(doc_ptrSEXP); + Rcpp::traits::input_parameter< Rcpp::RawVector >::type font_data(font_dataSEXP); + Rcpp::traits::input_parameter< int >::type font_type(font_typeSEXP); + Rcpp::traits::input_parameter< bool >::type cid(cidSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_font_load_truetype(doc_ptr, font_data, font_type, cid)); + return rcpp_result_gen; +END_RCPP +} +// cpp_font_close +void cpp_font_close(SEXP font_ptr); +RcppExport SEXP _pdfium_cpp_font_close(SEXP font_ptrSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type font_ptr(font_ptrSEXP); + cpp_font_close(font_ptr); + return R_NilValue; +END_RCPP +} +// cpp_text_new_with_font +SEXP cpp_text_new_with_font(SEXP doc_ptr, SEXP page_ptr, SEXP font_ptr, double font_size, std::string text_utf8, double x, double y); +RcppExport SEXP _pdfium_cpp_text_new_with_font(SEXP doc_ptrSEXP, SEXP page_ptrSEXP, SEXP font_ptrSEXP, SEXP font_sizeSEXP, SEXP text_utf8SEXP, SEXP xSEXP, SEXP ySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type doc_ptr(doc_ptrSEXP); + Rcpp::traits::input_parameter< SEXP >::type page_ptr(page_ptrSEXP); + Rcpp::traits::input_parameter< SEXP >::type font_ptr(font_ptrSEXP); + Rcpp::traits::input_parameter< double >::type font_size(font_sizeSEXP); + Rcpp::traits::input_parameter< std::string >::type text_utf8(text_utf8SEXP); + Rcpp::traits::input_parameter< double >::type x(xSEXP); + Rcpp::traits::input_parameter< double >::type y(ySEXP); + rcpp_result_gen = Rcpp::wrap(cpp_text_new_with_font(doc_ptr, page_ptr, font_ptr, font_size, text_utf8, x, y)); + return rcpp_result_gen; +END_RCPP +} // cpp_form_field_handles Rcpp::List cpp_form_field_handles(SEXP doc_ptr); RcppExport SEXP _pdfium_cpp_form_field_handles(SEXP doc_ptrSEXP) { @@ -1073,6 +1126,36 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// cpp_image_new_from_jpeg +SEXP cpp_image_new_from_jpeg(SEXP doc_ptr, SEXP page_ptr, Rcpp::RawVector jpeg_bytes); +RcppExport SEXP _pdfium_cpp_image_new_from_jpeg(SEXP doc_ptrSEXP, SEXP page_ptrSEXP, SEXP jpeg_bytesSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type doc_ptr(doc_ptrSEXP); + Rcpp::traits::input_parameter< SEXP >::type page_ptr(page_ptrSEXP); + Rcpp::traits::input_parameter< Rcpp::RawVector >::type jpeg_bytes(jpeg_bytesSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_image_new_from_jpeg(doc_ptr, page_ptr, jpeg_bytes)); + return rcpp_result_gen; +END_RCPP +} +// cpp_image_set_matrix +bool cpp_image_set_matrix(SEXP image_ptr, double a, double b, double c, double d, double e, double f); +RcppExport SEXP _pdfium_cpp_image_set_matrix(SEXP image_ptrSEXP, SEXP aSEXP, SEXP bSEXP, SEXP cSEXP, SEXP dSEXP, SEXP eSEXP, SEXP fSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type image_ptr(image_ptrSEXP); + Rcpp::traits::input_parameter< double >::type a(aSEXP); + Rcpp::traits::input_parameter< double >::type b(bSEXP); + Rcpp::traits::input_parameter< double >::type c(cSEXP); + Rcpp::traits::input_parameter< double >::type d(dSEXP); + Rcpp::traits::input_parameter< double >::type e(eSEXP); + Rcpp::traits::input_parameter< double >::type f(fSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_image_set_matrix(image_ptr, a, b, c, d, e, f)); + return rcpp_result_gen; +END_RCPP +} // cpp_image_metadata Rcpp::List cpp_image_metadata(SEXP obj_ptr, SEXP page_ptr); RcppExport SEXP _pdfium_cpp_image_metadata(SEXP obj_ptrSEXP, SEXP page_ptrSEXP) { @@ -2495,6 +2578,10 @@ static const R_CallMethodDef CallEntries[] = { {"_pdfium_cpp_doc_meta_text", (DL_FUNC) &_pdfium_cpp_doc_meta_text, 2}, {"_pdfium_cpp_doc_info", (DL_FUNC) &_pdfium_cpp_doc_info, 1}, {"_pdfium_cpp_doc_file_version", (DL_FUNC) &_pdfium_cpp_doc_file_version, 1}, + {"_pdfium_cpp_font_load_standard", (DL_FUNC) &_pdfium_cpp_font_load_standard, 2}, + {"_pdfium_cpp_font_load_truetype", (DL_FUNC) &_pdfium_cpp_font_load_truetype, 4}, + {"_pdfium_cpp_font_close", (DL_FUNC) &_pdfium_cpp_font_close, 1}, + {"_pdfium_cpp_text_new_with_font", (DL_FUNC) &_pdfium_cpp_text_new_with_font, 7}, {"_pdfium_cpp_form_field_handles", (DL_FUNC) &_pdfium_cpp_form_field_handles, 1}, {"_pdfium_cpp_form_field_name_handle", (DL_FUNC) &_pdfium_cpp_form_field_name_handle, 2}, {"_pdfium_cpp_form_field_alternate_name_handle", (DL_FUNC) &_pdfium_cpp_form_field_alternate_name_handle, 2}, @@ -2516,6 +2603,8 @@ static const R_CallMethodDef CallEntries[] = { {"_pdfium_cpp_text_obj_glyph_width", (DL_FUNC) &_pdfium_cpp_text_obj_glyph_width, 3}, {"_pdfium_cpp_text_obj_font_metrics", (DL_FUNC) &_pdfium_cpp_text_obj_font_metrics, 2}, {"_pdfium_cpp_text_char_font_info", (DL_FUNC) &_pdfium_cpp_text_char_font_info, 1}, + {"_pdfium_cpp_image_new_from_jpeg", (DL_FUNC) &_pdfium_cpp_image_new_from_jpeg, 3}, + {"_pdfium_cpp_image_set_matrix", (DL_FUNC) &_pdfium_cpp_image_set_matrix, 7}, {"_pdfium_cpp_image_metadata", (DL_FUNC) &_pdfium_cpp_image_metadata, 2}, {"_pdfium_cpp_image_pixel_size", (DL_FUNC) &_pdfium_cpp_image_pixel_size, 1}, {"_pdfium_cpp_image_get_bitmap", (DL_FUNC) &_pdfium_cpp_image_get_bitmap, 1}, diff --git a/src/font_authoring.cpp b/src/font_authoring.cpp new file mode 100644 index 0000000..4eab8e2 --- /dev/null +++ b/src/font_authoring.cpp @@ -0,0 +1,139 @@ +// pdfium R package — font loading for the page-authoring API. +// +// Three exports: +// cpp_font_load_standard — FPDFText_LoadStandardFont +// cpp_font_load_truetype — FPDFText_LoadFont (TrueType / Type1) +// cpp_text_new_with_font — FPDFPageObj_CreateTextObj +// +// All three return externalptrs whose `prot` slot pins the parent +// document (so the doc outlives the font / text obj). The font +// externalptr carries a finalizer that calls FPDFFont_Close; +// page objects (the text obj returned by CreateTextObj) inherit +// page lifetime so they have no finalizer, matching the existing +// path/rect creator conventions. + +#include +#include +#include +#include "fpdfview.h" +#include "fpdf_edit.h" +#include "handle_validation.h" +#include "utf16.h" + +namespace { + +inline FPDF_DOCUMENT doc_from_ptr(SEXP doc_ptr) { + return static_cast( + pdfium_r::validate_handle(doc_ptr, "Document", + /*require_prot_alive=*/false)); +} + +inline FPDF_PAGE page_from_ptr(SEXP page_ptr) { + return static_cast( + pdfium_r::validate_handle(page_ptr, "Page", + /*require_prot_alive=*/false)); +} + +inline FPDF_FONT font_from_ptr(SEXP font_ptr) { + return static_cast( + pdfium_r::validate_handle(font_ptr, "Font", + /*require_prot_alive=*/true)); +} + +// Finalizer for pdfium_font externalptr. PDFium's FPDFFont_Close +// releases the font (it stays referenced by any text objects that +// already used it; this just drops the embedder's hold). +void font_finalizer(SEXP font_ptr) { + if (TYPEOF(font_ptr) != EXTPTRSXP) return; + FPDF_FONT font = static_cast(R_ExternalPtrAddr(font_ptr)); + if (font == nullptr) return; + FPDFFont_Close(font); + R_ClearExternalPtr(font_ptr); +} + +} // namespace + +// [[Rcpp::export(name = "cpp_font_load_standard")]] +SEXP cpp_font_load_standard(SEXP doc_ptr, std::string font_name) { + FPDF_DOCUMENT doc = doc_from_ptr(doc_ptr); + FPDF_FONT font = FPDFText_LoadStandardFont(doc, font_name.c_str()); + if (font == nullptr) { + Rcpp::stop( + "FPDFText_LoadStandardFont('%s') returned NULL. Valid names " + "are the 14 PDF standard fonts (e.g. 'Helvetica', " + "'Helvetica-Bold', 'Times-Roman', 'Courier').", + font_name.c_str()); + } + SEXP ext = PROTECT(R_MakeExternalPtr(font, R_NilValue, doc_ptr)); + R_RegisterCFinalizerEx(ext, font_finalizer, + static_cast(TRUE)); + UNPROTECT(1); + return ext; +} + +// [[Rcpp::export(name = "cpp_font_load_truetype")]] +SEXP cpp_font_load_truetype(SEXP doc_ptr, Rcpp::RawVector font_data, + int font_type, bool cid) { + FPDF_DOCUMENT doc = doc_from_ptr(doc_ptr); + const std::uint8_t* data = + font_data.size() > 0 + ? reinterpret_cast(&font_data[0]) + : nullptr; + FPDF_FONT font = FPDFText_LoadFont( + doc, data, static_cast(font_data.size()), + font_type, cid ? 1 : 0); + if (font == nullptr) { + Rcpp::stop( + "FPDFText_LoadFont returned NULL. Confirm the bytes are a " + "valid %s font.", + font_type == 1 ? "Type1" : "TrueType"); + } + SEXP ext = PROTECT(R_MakeExternalPtr(font, R_NilValue, doc_ptr)); + R_RegisterCFinalizerEx(ext, font_finalizer, + static_cast(TRUE)); + UNPROTECT(1); + return ext; +} + +// [[Rcpp::export(name = "cpp_font_close")]] +void cpp_font_close(SEXP font_ptr) { + // Idempotent — finalizer-style close. Match pdf_doc_close's + // "second call is a no-op" contract. + if (TYPEOF(font_ptr) != EXTPTRSXP) return; + FPDF_FONT font = static_cast(R_ExternalPtrAddr(font_ptr)); + if (font == nullptr) return; + FPDFFont_Close(font); + R_ClearExternalPtr(font_ptr); +} + +// [[Rcpp::export(name = "cpp_text_new_with_font")]] +SEXP cpp_text_new_with_font(SEXP doc_ptr, SEXP page_ptr, + SEXP font_ptr, double font_size, + std::string text_utf8, + double x, double y) { + FPDF_DOCUMENT doc = doc_from_ptr(doc_ptr); + FPDF_PAGE page = page_from_ptr(page_ptr); + FPDF_FONT font = font_from_ptr(font_ptr); + + FPDF_PAGEOBJECT text_obj = FPDFPageObj_CreateTextObj( + doc, font, static_cast(font_size)); + if (text_obj == nullptr) { + Rcpp::stop("FPDFPageObj_CreateTextObj returned NULL."); + } + if (!text_utf8.empty()) { + std::vector utf16 = + pdfium_r::utf8_to_utf16le_nul(text_utf8); + if (!FPDFText_SetText( + text_obj, + reinterpret_cast(utf16.data()))) { + FPDFPageObj_Destroy(text_obj); + Rcpp::stop("FPDFText_SetText failed on the new text object."); + } + } + // Identity scale + translate (same convention as cpp_text_new). + FPDFPageObj_Transform(text_obj, 1, 0, 0, 1, + static_cast(x), + static_cast(y)); + FPDFPage_InsertObject(page, text_obj); + return R_MakeExternalPtr(text_obj, R_NilValue, page_ptr); +} diff --git a/src/image_authoring.cpp b/src/image_authoring.cpp new file mode 100644 index 0000000..0adf83f --- /dev/null +++ b/src/image_authoring.cpp @@ -0,0 +1,105 @@ +// pdfium R package — image-object creation from JPEG bytes. +// +// Wraps the FPDFImageObj_LoadJpegFileInline path. PDFium's LoadJpeg* +// API takes an FPDF_FILEACCESS callback structure; for our use case +// (load a single in-memory buffer up front, copy it into the PDF), +// the callback is a thin memcpy from a static buffer we own for the +// duration of the call. +// +// LoadJpegFileInline vs LoadJpegFile: the "inline" variant tells +// PDFium to copy the JPEG bytes into the PDF immediately rather +// than holding a reference to the FileAccess for later reads. +// That's what we want — the R-side raw vector that backs our +// FileAccess struct is only valid for the lifetime of this call. + +#include +#include +#include +#include "fpdfview.h" +#include "fpdf_edit.h" +#include "handle_validation.h" + +namespace { + +inline FPDF_DOCUMENT doc_from_ptr(SEXP doc_ptr) { + return static_cast( + pdfium_r::validate_handle(doc_ptr, "Document", + /*require_prot_alive=*/false)); +} + +inline FPDF_PAGE page_from_ptr(SEXP page_ptr) { + return static_cast( + pdfium_r::validate_handle(page_ptr, "Page", + /*require_prot_alive=*/false)); +} + +// FPDF_FILEACCESS callback. Reads bytes from the buffer held in +// `param` (which we set to point at a JpegBuf). Returns 1 on +// success (PDFium's docs say "non-zero if successful"). +struct JpegBuf { + const unsigned char* data; + unsigned long len; +}; + +int read_jpeg_block(void* param, unsigned long position, + unsigned char* p_buf, unsigned long size) { + JpegBuf* jb = static_cast(param); + if (position + size > jb->len) { + return 0; // out of range + } + std::memcpy(p_buf, jb->data + position, size); + return 1; +} + +} // namespace + +// [[Rcpp::export(name = "cpp_image_new_from_jpeg")]] +SEXP cpp_image_new_from_jpeg(SEXP doc_ptr, SEXP page_ptr, + Rcpp::RawVector jpeg_bytes) { + FPDF_DOCUMENT doc = doc_from_ptr(doc_ptr); + FPDF_PAGE page = page_from_ptr(page_ptr); + + FPDF_PAGEOBJECT image_obj = FPDFPageObj_NewImageObj(doc); + if (image_obj == nullptr) { + Rcpp::stop("FPDFPageObj_NewImageObj returned NULL."); + } + + JpegBuf jb; + jb.data = reinterpret_cast( + jpeg_bytes.size() > 0 ? &jpeg_bytes[0] : nullptr); + jb.len = static_cast(jpeg_bytes.size()); + + FPDF_FILEACCESS file_access; + file_access.m_FileLen = jb.len; + file_access.m_GetBlock = read_jpeg_block; + file_access.m_Param = &jb; + + // The "inline" variant copies the bytes into the PDF up front, + // so jb only needs to outlive this call. (LoadJpegFile, without + // the "Inline" suffix, retains a reference to file_access and + // would dangle on return.) + FPDF_PAGE pages[] = {page}; + if (!FPDFImageObj_LoadJpegFileInline(pages, 1, image_obj, + &file_access)) { + FPDFPageObj_Destroy(image_obj); + Rcpp::stop("FPDFImageObj_LoadJpegFileInline failed; the bytes " + "may not be valid JPEG."); + } + + FPDFPage_InsertObject(page, image_obj); + + // No finalizer: the page owns the object now (PDFium will destroy + // it when the page closes). The externalptr's prot slot pins the + // page so handle_validation can detect a closed parent. + return R_MakeExternalPtr(image_obj, R_NilValue, page_ptr); +} + +// [[Rcpp::export(name = "cpp_image_set_matrix")]] +bool cpp_image_set_matrix(SEXP image_ptr, + double a, double b, double c, double d, + double e, double f) { + FPDF_PAGEOBJECT image_obj = static_cast( + pdfium_r::validate_handle(image_ptr, "Image object", + /*require_prot_alive=*/true)); + return FPDFImageObj_SetMatrix(image_obj, a, b, c, d, e, f) != 0; +} diff --git a/tests/testthat/test-font-authoring.R b/tests/testthat/test-font-authoring.R new file mode 100644 index 0000000..2f18563 --- /dev/null +++ b/tests/testthat/test-font-authoring.R @@ -0,0 +1,210 @@ +# Tests for font loading + custom-font text authoring: +# * pdf_font_load_standard() — wraps FPDFText_LoadStandardFont +# * pdf_font_load() — wraps FPDFText_LoadFont +# * pdf_font_close() — idempotent close +# * pdf_text_new(..., font = handle) — pdfium_font dispatch +# +# Standard-font tests run everywhere. TrueType tests skip when no +# system TTF can be located (Windows/macOS CI runners that don't ship +# DejaVu / Liberation, etc.). + +# Helper: fresh doc + page, scheduled to close in the caller. +font_blank_page <- function(envir = parent.frame()) { + doc <- pdf_doc_new() + withr::defer(pdf_doc_close(doc), envir = envir) + page <- pdf_page_new(doc, page_num = 1L, width = 612, height = 792) + withr::defer(pdf_page_close(page), envir = envir, + priority = "first") + list(doc = doc, page = page) +} + +# Helper: locate a system TrueType font path. Returns NULL when no +# candidate exists — tests gate themselves on this. Candidates +# cover Linux distros (Ubuntu / Debian / Alpine), macOS system +# fonts, and the Windows C:/Windows/Fonts directory. +find_system_ttf <- function() { + candidates <- c( + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", + "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", + "/usr/share/fonts/truetype/freefont/FreeSans.ttf", + "/usr/share/fonts/TTF/DejaVuSans.ttf", + "/Library/Fonts/Arial.ttf", + "/System/Library/Fonts/Supplemental/Arial.ttf", + "/System/Library/Fonts/Helvetica.ttc", + "C:/Windows/Fonts/arial.ttf" + ) + for (p in candidates) { + if (file.exists(p)) return(p) + } + NULL +} + +# pdf_font_load_standard -------------------------------------------- + +test_that("pdf_font_load_standard returns a pdfium_font handle", { + s <- font_blank_page() + f <- pdf_font_load_standard(s$doc, "Helvetica") + expect_s3_class(f, "pdfium_font") + expect_true(pdfium:::is_open(f)) + expect_identical(f$name, "Helvetica") +}) + +test_that("pdf_font_load_standard accepts every standard font name", { + s <- font_blank_page() + for (nm in c("Helvetica-Bold", "Times-Italic", "Courier", + "Symbol", "ZapfDingbats")) { + f <- pdf_font_load_standard(s$doc, nm) + expect_s3_class(f, "pdfium_font") + expect_identical(f$name, nm) + } +}) + +test_that("pdf_font_load_standard rejects non-standard names", { + s <- font_blank_page() + expect_error(pdf_font_load_standard(s$doc, "Comic Sans"), + "Assertion on") +}) + +test_that("pdf_font_load_standard refuses a read-only doc", { + doc <- pdf_doc_open(fixture_path("shapes")) + on.exit(pdf_doc_close(doc), add = TRUE) + expect_error(pdf_font_load_standard(doc, "Helvetica"), + "readwrite") +}) + +# pdf_font_close ---------------------------------------------------- + +test_that("pdf_font_close releases the handle and is idempotent", { + s <- font_blank_page() + f <- pdf_font_load_standard(s$doc, "Helvetica") + expect_true(pdfium:::is_open(f)) + pdf_font_close(f) + expect_false(pdfium:::is_open(f)) + # Second call: no-op. + expect_silent(pdf_font_close(f)) +}) + +# pdf_text_new with a pdfium_font handle ---------------------------- + +test_that("pdf_text_new dispatches on pdfium_font handles", { + s <- font_blank_page() + f <- pdf_font_load_standard(s$doc, "Times-Bold") + txt <- pdf_text_new(s$page, "Hi", font = f, + font_size = 14, x = 72, y = 720) + expect_identical(txt$type, "text") + expect_identical(pdf_text_content(txt), "Hi") +}) + +test_that("pdf_text_new rejects a closed pdfium_font handle", { + s <- font_blank_page() + f <- pdf_font_load_standard(s$doc, "Helvetica") + pdf_font_close(f) + expect_error(pdf_text_new(s$page, "x", font = f), + "closed") +}) + +# pdf_font_load (TrueType) ------------------------------------------ + +test_that("pdf_font_load loads a TrueType font from a path", { + ttf <- find_system_ttf() + skip_if(is.null(ttf), "no system TrueType font available") + s <- font_blank_page() + f <- pdf_font_load(s$doc, ttf) + expect_s3_class(f, "pdfium_font") + expect_true(pdfium:::is_open(f)) +}) + +test_that("pdf_font_load loads a TrueType font from raw bytes", { + ttf <- find_system_ttf() + skip_if(is.null(ttf), "no system TrueType font available") + s <- font_blank_page() + bytes <- readBin(ttf, what = "raw", n = file.info(ttf)$size) + f <- pdf_font_load(s$doc, bytes) + expect_s3_class(f, "pdfium_font") +}) + +test_that("pdf_font_load + pdf_text_new round-trips through pdf_save", { + ttf <- find_system_ttf() + skip_if(is.null(ttf), "no system TrueType font available") + s <- font_blank_page() + f <- pdf_font_load(s$doc, ttf) + pdf_text_new(s$page, "Hello, world!", + font = f, font_size = 18, x = 72, y = 720) + out <- withr::local_tempfile(fileext = ".pdf") + pdf_save(s$doc, out) + + doc2 <- pdf_doc_open(out) + on.exit(pdf_doc_close(doc2), add = TRUE) + page2 <- pdf_page_load(doc2, 1L) + on.exit(pdf_page_close(page2), add = TRUE, after = FALSE) + runs <- pdf_text_runs(page2) + expect_true(any(grepl("Hello", runs$text, fixed = TRUE))) +}) + +test_that("pdf_font_load rejects an unreadable path", { + s <- font_blank_page() + expect_error(pdf_font_load(s$doc, tempfile(fileext = ".ttf")), + "Font file not found") +}) + +test_that("pdf_font_load rejects unsupported font_data types", { + s <- font_blank_page() + expect_error(pdf_font_load(s$doc, 1L), + "must be a raw vector") +}) + +test_that("pdf_font_load rejects garbage font bytes", { + s <- font_blank_page() + garbage <- as.raw(c(0x00, 0x01, 0x02, 0x03)) + expect_error(pdf_font_load(s$doc, garbage), + "valid") +}) + +test_that("pdf_font_load refuses a read-only doc", { + ttf <- find_system_ttf() + skip_if(is.null(ttf), "no system TrueType font available") + doc <- pdf_doc_open(fixture_path("shapes")) + on.exit(pdf_doc_close(doc), add = TRUE) + expect_error(pdf_font_load(doc, ttf), + "readwrite") +}) + +test_that("pdf_font_load type argument is matched", { + ttf <- find_system_ttf() + skip_if(is.null(ttf), "no system TrueType font available") + s <- font_blank_page() + # Default ("truetype") works. + f <- pdf_font_load(s$doc, ttf) + expect_s3_class(f, "pdfium_font") + # Bad type is rejected. + expect_error(pdf_font_load(s$doc, ttf, type = "ttf"), + "'arg' should be one of") +}) + +test_that("pdf_font_load validates cid flag", { + ttf <- find_system_ttf() + skip_if(is.null(ttf), "no system TrueType font available") + s <- font_blank_page() + expect_error(pdf_font_load(s$doc, ttf, cid = NA), + "Assertion on") + expect_error(pdf_font_load(s$doc, ttf, cid = "yes"), + "Assertion on") +}) + +# pdfium_font format/print methods ---------------------------------- + +test_that("format.pdfium_font reflects open/closed state + name", { + s <- font_blank_page() + f <- pdf_font_load_standard(s$doc, "Helvetica") + expect_match(format(f), "open") + expect_match(format(f), "Helvetica") + pdf_font_close(f) + expect_match(format(f), "closed") +}) + +test_that("print.pdfium_font emits the formatted line", { + s <- font_blank_page() + f <- pdf_font_load_standard(s$doc, "Courier") + expect_output(print(f), "pdfium_font") + expect_output(print(f), "Courier") +}) diff --git a/tests/testthat/test-image-authoring.R b/tests/testthat/test-image-authoring.R new file mode 100644 index 0000000..10f668c --- /dev/null +++ b/tests/testthat/test-image-authoring.R @@ -0,0 +1,127 @@ +# Tests for pdf_image_new (JPEG embedding via +# FPDFImageObj_LoadJpegFileInline). Each test builds a fresh +# in-memory doc + page via pdf_doc_new() / pdf_page_new() and +# generates a JPEG with grDevices::jpeg() so the fixture cost is +# zero on every CI runner. + +# Helper: fresh doc + page, scheduled to close in the caller. +img_blank_page <- function(envir = parent.frame()) { + doc <- pdf_doc_new() + withr::defer(pdf_doc_close(doc), envir = envir) + page <- pdf_page_new(doc, page_num = 1L, width = 612, height = 792) + withr::defer(pdf_page_close(page), envir = envir, + priority = "first") + list(doc = doc, page = page) +} + +# Helper: tiny JPEG file in tempdir + a path that auto-cleans. +# Uses a 256×256 canvas with zero margins so the rect fills the +# whole image — smaller dimensions hit "figure margins too large" +# from the default plot.new() margins. par() settings here apply +# only to the JPEG device we open and discard, so no restore is +# needed (restoring after dev.off() would re-open a default +# device and leak a stray Rplots.pdf into the working directory). +make_test_jpeg <- function(envir = parent.frame(), width = 256L, + height = 256L) { + path <- withr::local_tempfile(fileext = ".jpg", .local_envir = envir) + grDevices::jpeg(path, width = width, height = height) + on.exit(grDevices::dev.off(), add = TRUE) + graphics::par(mar = c(0, 0, 0, 0)) + graphics::plot.new() + graphics::rect(0, 0, 1, 1, col = "tomato", border = NA) + path +} + +# pdf_image_new (JPEG path) ----------------------------------------- + +test_that("pdf_image_new inserts an image obj from a JPEG file path", { + s <- img_blank_page() + jp <- make_test_jpeg() + obj <- pdf_image_new(s$page, jp, + bounds = c(72, 600, 272, 700)) + expect_s3_class(obj, "pdfium_obj") + expect_identical(obj$type, "image") + expect_setequal(s$doc$state$dirty_pages, 1L) + # The page now has exactly one object (the image). + expect_equal(length(pdf_page_objects(s$page)), 1L) +}) + +test_that("pdf_image_new inserts an image obj from raw bytes", { + s <- img_blank_page() + jp <- make_test_jpeg() + bytes <- readBin(jp, what = "raw", n = file.info(jp)$size) + obj <- pdf_image_new(s$page, bytes, + bounds = c(72, 600, 272, 700)) + expect_identical(obj$type, "image") +}) + +test_that("pdf_image_new without bounds places at natural size", { + s <- img_blank_page() + jp <- make_test_jpeg(width = 32L, height = 32L) + obj <- pdf_image_new(s$page, jp) + expect_identical(obj$type, "image") +}) + +# Argument validation ----------------------------------------------- + +test_that("pdf_image_new rejects an unreadable path", { + s <- img_blank_page() + expect_error(pdf_image_new(s$page, tempfile(fileext = ".jpg")), + "JPEG file not found") +}) + +test_that("pdf_image_new rejects unsupported `jpeg` types", { + s <- img_blank_page() + expect_error(pdf_image_new(s$page, 1L), + "must be a raw vector") + expect_error(pdf_image_new(s$page, list(raw(1L))), + "must be a raw vector") +}) + +test_that("pdf_image_new validates `bounds` shape", { + s <- img_blank_page() + jp <- make_test_jpeg() + expect_error(pdf_image_new(s$page, jp, bounds = c(1, 2, 3)), + "Assertion on") + expect_error(pdf_image_new(s$page, jp, + bounds = c(NA_real_, 0, 1, 1)), + "Assertion on") +}) + +# Read-only / closed-page rejection --------------------------------- + +test_that("pdf_image_new refuses a read-only doc", { + doc <- pdf_doc_open(fixture_path("shapes")) + on.exit(pdf_doc_close(doc), add = TRUE) + page <- pdf_page_load(doc, 1L) + on.exit(pdf_page_close(page), add = TRUE, after = FALSE) + jp <- make_test_jpeg() + expect_error(pdf_image_new(page, jp), "readwrite") +}) + +test_that("pdf_image_new refuses a closed page", { + doc <- pdf_doc_new() + on.exit(pdf_doc_close(doc), add = TRUE) + page <- pdf_page_new(doc, 1L, 612, 792) + pdf_page_close(page) + jp <- make_test_jpeg() + expect_error(pdf_image_new(page, jp), "Page has been closed") +}) + +# Round-trip: open the saved PDF and confirm one image object ------- + +test_that("pdf_image_new round-trips through pdf_save", { + s <- img_blank_page() + jp <- make_test_jpeg(width = 16L, height = 16L) + pdf_image_new(s$page, jp, bounds = c(0, 0, 100, 100)) + out <- withr::local_tempfile(fileext = ".pdf") + pdf_save(s$doc, out) + + doc2 <- pdf_doc_open(out) + on.exit(pdf_doc_close(doc2), add = TRUE) + page2 <- pdf_page_load(doc2, 1L) + on.exit(pdf_page_close(page2), add = TRUE, after = FALSE) + objs <- pdf_page_objects(page2) + types <- vapply(objs, function(o) o$type, character(1L)) + expect_true("image" %in% types) +}) diff --git a/vignettes/comparison.Rmd b/vignettes/comparison.Rmd index c461ca3..87c7ecd 100644 --- a/vignettes/comparison.Rmd +++ b/vignettes/comparison.Rmd @@ -98,8 +98,13 @@ supported subtypes lives in `?pdf_annot_new`. plus the page-object creators ([`pdf_path_new`](https://humanpred.github.io/rpdfium/reference/pdf_path_new.html), [`pdf_rect_new`](https://humanpred.github.io/rpdfium/reference/pdf_rect_new.html), [`pdf_text_new`](https://humanpred.github.io/rpdfium/reference/pdf_text_new.html), +[`pdf_image_new`](https://humanpred.github.io/rpdfium/reference/pdf_image_new.html), +[`pdf_font_load`](https://humanpred.github.io/rpdfium/reference/pdf_font_load.html) +/ [`pdf_font_load_standard`](https://humanpred.github.io/rpdfium/reference/pdf_font_load_standard.html), plus the path-geometry appenders) let you build PDFs from scratch -in R. The mutating-pdfs vignette walks through the workflow. +in R — vector graphics, JPEG images, text in the 14 PDF standard +fonts, and arbitrary TrueType / Type1 typefaces. The mutating-pdfs +vignette walks through the workflow. **What scales fine.** Page count is unlimited (PDFium handles thousand-page docs efficiently); objects-per-page are unlimited; @@ -109,23 +114,12 @@ paths — is exposed; annotations are richly covered. The R↔C boundary cost is microseconds per call, so 10⁶ object writes is seconds, not minutes. -**v0.1.0 limits worth knowing about.** Four authoring axes have -real gaps in the current release. They split cleanly into two -groups by what's blocking closure: - -*Blocked on this package's roadmap* (PDFium exposes the symbols; -we just haven't wrapped them yet — closure timing is in our -hands): - -| Gap | Unwrapped PDFium symbols | Workaround today | -|---|---|---| -| **Image embedding** | `FPDFImageObj_LoadJpegFile`, `FPDFImageObj_LoadJpegFileInline`, `FPDFImageObj_SetBitmap` — all in `inst/include/fpdf_edit.h`; deferred from Phase 5 pending an `FPDF_BITMAP` lifecycle design | Use `minipdf` for image-heavy programmatic PDFs, or render images separately and embed via a post-process | -| **Custom fonts** | `FPDFText_LoadFont`, `FPDFText_LoadStandardFont`, `FPDFText_LoadCidType2Font` — all in `inst/include/fpdf_edit.h`; not yet wrapped, needs a `pdfium_font` S3 class with `FPDFFont_Close` finalizer | For non-Latin scripts or branded type, render text as paths via an external tool and embed the resulting vector paths | - -*Blocked on upstream PDFium* (the symbols don't exist yet — we've -proposed them but they need to ship through Google's Gerrit review -cycle, land in a PDFium release, and propagate to a `bblanchon` -binary before we can wrap them): +**v0.1.0 limits worth knowing about.** Two authoring axes have +real gaps in the current release — both blocked on upstream +PDFium (the symbols don't exist yet — we've proposed them but they +need to ship through Google's Gerrit review cycle, land in a +PDFium release, and propagate to a `bblanchon` binary before we +can wrap them): | Gap | Missing PDFium symbol(s) | Workaround today | |---|---|---| diff --git a/vignettes/mutating-pdfs.Rmd b/vignettes/mutating-pdfs.Rmd index 477b30c..638ee75 100644 --- a/vignettes/mutating-pdfs.Rmd +++ b/vignettes/mutating-pdfs.Rmd @@ -77,16 +77,15 @@ PDFs. doc <- pdf_doc_new() page <- pdf_page_new(doc, width = 612, height = 792) -# A red filled rectangle. -rect <- pdf_rect_new(page, bounds = c(50, 600, 250, 700)) +# A red filled rectangle (x, y, width, height) in PDF points. +rect <- pdf_rect_new(page, x = 50, y = 600, width = 200, height = 100) pdf_path_set_fill(rect, color = c(220, 50, 47)) -pdf_path_set_draw_mode(rect, draw_mode = "fill") +pdf_path_set_draw_mode(rect, fill_mode = "winding", stroke = FALSE) # A text object on top. -txt <- pdf_text_new(page, font = "Helvetica-Bold", size = 18) -pdf_text_set_content(txt, "Hello pdfium!") -pdf_obj_set_matrix(txt, matrix(c(1, 0, 0, 0, 1, 0, 80, 640, 1), - nrow = 3, byrow = TRUE)) +pdf_text_new(page, "Hello pdfium!", + font = "Helvetica-Bold", font_size = 18, + x = 80, y = 640) pdf_save(doc, "hello.pdf") ``` @@ -98,6 +97,44 @@ data-frame of segments (matching the schema of `pdf_path_segments()`) into a path in one call — useful for echoing geometry from one document into another. +### Embedding a JPEG image + +`pdf_image_new()` wraps `FPDFImageObj_LoadJpegFileInline` — the JPEG +bytes are copied into the PDF up front, so the source raw vector or +file is free immediately after the call. Pass `bounds = c(left, +bottom, right, top)` to place the image at a specific rectangle in +PDF user-space points; without `bounds`, the image sits at the +origin at its natural pixel size (rarely what you want). + +```{r, eval = FALSE} +jpeg_path <- system.file("img", "Rlogo.jpg", package = "jpeg") +pdf_image_new(page, jpeg_path, + bounds = c(72, 400, 272, 500)) +``` + +PNG / TIFF / raw-bitmap embedding is deferred to a later release — +convert to JPEG with `magick::image_write(..., format = "jpeg")` if +needed. + +### Custom fonts + +The 14 PDF standard fonts work without embedding — pass the name +straight through (`pdf_text_new(page, "...", font = "Times-Italic", +...)`). For arbitrary TrueType or Type1 typefaces, load the font +first via `pdf_font_load()` (or `pdf_font_load_standard()` if you +just want a handle for a named standard font) and pass the +resulting `pdfium_font` handle into `pdf_text_new()`: + +```{r, eval = FALSE} +font <- pdf_font_load(doc, "/path/to/MyTypeface-Regular.ttf") +pdf_text_new(page, "ünïcödé ✓", font = font, font_size = 24, + x = 72, y = 300) +``` + +The font handle carries an `FPDFFont_Close` finalizer; the doc is +pinned in the externalptr's `prot` slot. `pdf_font_close(font)` is +idempotent and useful when you want deterministic release. + ## Styling an existing object The setters mirror the readers one-for-one: