Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
.Rhistory
.RData
.Ruserdata
Rplots.pdf
tests/testthat/Rplots.pdf
.Rproj.user/
*.Rproj
*.Rcheck/
Expand Down
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ Collate:
'utils.R'
'enum_codes.R'
'extract.R'
'fonts.R'
'form_field_setters.R'
'forms.R'
'glyph_paths.R'
'image_authoring.R'
'images.R'
'mutation.R'
'name_lookups.R'
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ S3method(format,pdfium_bookmark)
S3method(format,pdfium_bookmark_list)
S3method(format,pdfium_clip_path)
S3method(format,pdfium_doc)
S3method(format,pdfium_font)
S3method(format,pdfium_form_field)
S3method(format,pdfium_form_field_list)
S3method(format,pdfium_obj)
Expand All @@ -35,6 +36,7 @@ S3method(print,pdfium_bookmark)
S3method(print,pdfium_bookmark_list)
S3method(print,pdfium_clip_path)
S3method(print,pdfium_doc)
S3method(print,pdfium_font)
S3method(print,pdfium_form_field)
S3method(print,pdfium_form_field_list)
S3method(print,pdfium_obj)
Expand Down Expand Up @@ -138,6 +140,9 @@ export(pdf_doc_viewer_preferences)
export(pdf_doc_xref_valid)
export(pdf_docs_merge)
export(pdf_extract_paths)
export(pdf_font_close)
export(pdf_font_load)
export(pdf_font_load_standard)
export(pdf_form_field_additional_actions_js)
export(pdf_form_field_alternate_name)
export(pdf_form_field_at_point)
Expand Down Expand Up @@ -166,6 +171,7 @@ export(pdf_image_data)
export(pdf_image_filters)
export(pdf_image_icc_profile)
export(pdf_image_info)
export(pdf_image_new)
export(pdf_image_rendered)
export(pdf_image_size)
export(pdf_link_annot_at_point)
Expand Down
15 changes: 13 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,19 @@ release on scope grounds (see `CLAUDE.md` §"Scope"):
`pdf_path_close()`, `pdf_path_append()` — append path geometry to
an existing path object.
* `pdf_path_new()`, `pdf_rect_new()`, `pdf_text_new()`,
`pdf_obj_delete()` — create fresh paths, rectangles, and text
objects, or remove an existing one.
`pdf_image_new()`, `pdf_obj_delete()` — create fresh paths,
rectangles, text objects, or JPEG images, or remove an existing
object. `pdf_text_new()` accepts either a standard-font name or
a `pdfium_font` handle from `pdf_font_load_standard()` /
`pdf_font_load()`; `pdf_image_new()` embeds JPEG bytes inline
via `FPDFImageObj_LoadJpegFileInline` and lets you place the
image into an explicit `bounds = c(left, bottom, right, top)`
rectangle.
* `pdf_font_load_standard()`, `pdf_font_load()`, `pdf_font_close()`
— load one of the 14 PDF standard fonts (no embedding) or
embed an arbitrary TrueType / Type1 font's bytes into the
document. Returned `pdfium_font` handles plug straight into
`pdf_text_new()` for custom-typeface text.

## Annotation authoring

Expand Down
24 changes: 24 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,22 @@ cpp_doc_file_version <- function(doc_ptr) {
.Call(`_pdfium_cpp_doc_file_version`, doc_ptr)
}

cpp_font_load_standard <- function(doc_ptr, font_name) {
.Call(`_pdfium_cpp_font_load_standard`, doc_ptr, font_name)
}

cpp_font_load_truetype <- function(doc_ptr, font_data, font_type, cid) {
.Call(`_pdfium_cpp_font_load_truetype`, doc_ptr, font_data, font_type, cid)
}

cpp_font_close <- function(font_ptr) {
invisible(.Call(`_pdfium_cpp_font_close`, font_ptr))
}

cpp_text_new_with_font <- function(doc_ptr, page_ptr, font_ptr, font_size, text_utf8, x, y) {
.Call(`_pdfium_cpp_text_new_with_font`, doc_ptr, page_ptr, font_ptr, font_size, text_utf8, x, y)
}

cpp_form_field_handles <- function(doc_ptr) {
.Call(`_pdfium_cpp_form_field_handles`, doc_ptr)
}
Expand Down Expand Up @@ -361,6 +377,14 @@ cpp_text_char_font_info <- function(page_ptr) {
.Call(`_pdfium_cpp_text_char_font_info`, page_ptr)
}

cpp_image_new_from_jpeg <- function(doc_ptr, page_ptr, jpeg_bytes) {
.Call(`_pdfium_cpp_image_new_from_jpeg`, doc_ptr, page_ptr, jpeg_bytes)
}

cpp_image_set_matrix <- function(image_ptr, a, b, c, d, e, f) {
.Call(`_pdfium_cpp_image_set_matrix`, image_ptr, a, b, c, d, e, f)
}

cpp_image_metadata <- function(obj_ptr, page_ptr) {
.Call(`_pdfium_cpp_image_metadata`, obj_ptr, page_ptr)
}
Expand Down
70 changes: 65 additions & 5 deletions R/classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ new_pdfium_doc <- function(ptr, path, readwrite = FALSE) {
state <- new.env(parent = emptyenv())
state$dirty_pages <- integer(0L)
state$ffl_env <- NULL
# `open_pages` maps "page index" → externalptr of an open
# pdfium_page handle. Updated by new_pdfium_page (when a page is
# loaded or newly created) and by pdf_page_close (when one is
# released). flush_dirty_pages uses the registered externalptr
# so FPDFPage_GenerateContent runs on the handle that actually
# accumulated the edits — a fresh FPDF_LoadPage returns a
# different page handle that hasn't seen the user's inserts.
state$open_pages <- list()
structure(
list(ptr = ptr, path = path,
readwrite = readwrite, state = state),
Expand Down Expand Up @@ -48,11 +56,13 @@ is_open <- function(x) {
if (inherits(x, c("pdfium_obj", "pdfium_annot"))) {
return(cpp_handle_is_valid(x$ptr) && is_open(x$page))
}
# Doc-children: attachment / signature / bookmark all carry no
# finalizer and live as long as the parent doc lives. The doc's
# is_open() is the authoritative liveness signal.
# Doc-children: attachment / signature / bookmark carry no
# finalizer and live as long as the parent doc lives. Font has
# its own finalizer (FPDFFont_Close), but its lifetime is still
# doc-bound; the own-ptr check is necessary alongside the doc
# check (same shape as the annot case above).
doc_owned_classes <- c("pdfium_attachment", "pdfium_signature",
"pdfium_bookmark")
"pdfium_bookmark", "pdfium_font")
if (inherits(x, doc_owned_classes)) {
return(cpp_handle_is_valid(x$ptr) && is_open(x$doc))
}
Expand Down Expand Up @@ -90,8 +100,19 @@ new_pdfium_page <- function(ptr, doc, index) {
checkmate::assert_class(ptr, "externalptr")
checkmate::assert_class(doc, "pdfium_doc")
checkmate::assert_number(index)
index <- as.integer(index)
# Register the externalptr in the doc's open-pages registry so
# pdf_save's flush_dirty_pages can call FPDFPage_GenerateContent
# on the actual user-modified handle. FPDF_LoadPage returns a
# different page handle for the same index than FPDFPage_New
# does — generating content on a fresh load wipes out the
# original handle's pending edits.
state <- doc$state
if (!is.null(state)) {
state$open_pages[[as.character(index)]] <- ptr
}
structure(
list(ptr = ptr, doc = doc, index = as.integer(index)),
list(ptr = ptr, doc = doc, index = index),
class = c("pdfium_page", "pdfium_handle")
)
}
Expand Down Expand Up @@ -217,6 +238,45 @@ print.pdfium_obj_list <- function(x, ...) {
invisible(x)
}

#' Construct a `pdfium_font` from an external pointer
#'
#' Internal helper. The `FPDF_FONT` handle has its own lifetime —
#' the externalptr carries a C finalizer that calls `FPDFFont_Close`,
#' registered C-side in `cpp_font_load_*`. The parent doc is pinned
#' in the externalptr's `prot` slot so R's GC cannot reclaim the doc
#' while any font handle is reachable. `name` is informational —
#' for standard fonts it's the spec name; for loaded TTF/Type1 fonts
#' it's the file's basename when the font was loaded from a path,
#' or `"<raw>"` for in-memory loads.
#'
#' @param ptr An `externalptr` to an `FPDF_FONT`.
#' @param doc Parent `pdfium_doc`.
#' @param name Character — display label.
#' @return An object of class `c("pdfium_font", "pdfium_handle")`.
#' @keywords internal
#' @noRd
new_pdfium_font <- function(ptr, doc, name) {
checkmate::assert_class(ptr, "externalptr")
checkmate::assert_class(doc, "pdfium_doc")
checkmate::assert_string(name)
structure(
list(ptr = ptr, doc = doc, name = name),
class = c("pdfium_font", "pdfium_handle")
)
}

#' @export
format.pdfium_font <- function(x, ...) {
state <- if (is_open(x)) "open" else "closed"
sprintf("<pdfium_font [%s] %s>", state, x$name)
}

#' @export
print.pdfium_font <- function(x, ...) {
cat(format(x, ...), "\n", sep = "")
invisible(x)
}

#' Construct a `pdfium_annot` from an external pointer
#'
#' Internal helper. The `FPDF_ANNOTATION` handle has its own
Expand Down
168 changes: 168 additions & 0 deletions R/fonts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Font loading + handle class for the page-authoring API.
#
# Three constructors:
# * pdf_font_load_standard(doc, name) — one of the 14 PDF standard
# Type 1 fonts. No bytes needed.
# * pdf_font_load(doc, font_data, type) — TTF / Type1 bytes, embedded
# into the PDF.
# * (internal) new_pdfium_font(ptr, doc) wraps an externalptr.
#
# Lifetime: the C-side externalptr carries a finalizer that calls
# FPDFFont_Close. The `prot` slot pins the parent doc so the doc can't
# be GC'd before the font. Calling pdf_font_close() is idempotent.
#
# To draw with a custom font, pass the `pdfium_font` handle as the
# `font` argument of pdf_text_new(); R-side dispatch picks the
# custom-font code path automatically.

# The 14 standard PDF Type 1 fonts that every PDF reader is required
# to support. PDFium accepts any of these names without needing font
# bytes to be embedded.
.pdfium_standard_fonts <- c(
"Helvetica",
"Helvetica-Bold",
"Helvetica-Oblique",
"Helvetica-BoldOblique",
"Times-Roman",
"Times-Bold",
"Times-Italic",
"Times-BoldItalic",
"Courier",
"Courier-Bold",
"Courier-Oblique",
"Courier-BoldOblique",
"Symbol",
"ZapfDingbats"
)

#' Load one of the 14 PDF standard fonts
#'
#' Wraps `FPDFText_LoadStandardFont`. The 14 standard fonts
#' (Helvetica / Times-Roman / Courier in their four weight+style
#' variants, plus Symbol and ZapfDingbats) are required by the PDF
#' spec to be supported by every reader, so no font bytes need to
#' be embedded.
#'
#' For arbitrary TrueType / Type1 fonts, use [pdf_font_load()].
#'
#' @param doc A `pdfium_doc` opened with `readwrite = TRUE`.
#' @param name Character scalar — one of the 14 standard font names
#' listed in the **Standard fonts** section below.
#' @return A `pdfium_font` handle. Pass it as the `font` argument
#' of [pdf_text_new()].
#'
#' @section Standard fonts:
#' `"Helvetica"`, `"Helvetica-Bold"`, `"Helvetica-Oblique"`,
#' `"Helvetica-BoldOblique"`, `"Times-Roman"`, `"Times-Bold"`,
#' `"Times-Italic"`, `"Times-BoldItalic"`, `"Courier"`,
#' `"Courier-Bold"`, `"Courier-Oblique"`, `"Courier-BoldOblique"`,
#' `"Symbol"`, `"ZapfDingbats"`.
#'
#' @seealso [pdf_font_load()], [pdf_text_new()], [pdf_font_close()].
#' @examples
#' \dontrun{
#' doc <- pdf_doc_new()
#' page <- pdf_page_new(doc, width = 612, height = 792)
#' font <- pdf_font_load_standard(doc, "Times-Italic")
#' pdf_text_new(page, "hello", font = font, font_size = 24,
#' x = 72, y = 720)
#' pdf_save(doc, tempfile(fileext = ".pdf"))
#' }
#' @export
pdf_font_load_standard <- function(doc, name) {
assert_readwrite(doc)
checkmate::assert_choice(name, .pdfium_standard_fonts)
ptr <- cpp_font_load_standard(doc$ptr, name)
new_pdfium_font(ptr, doc, name)
}

#' Load a TrueType or Type1 font from bytes
#'
#' Wraps `FPDFText_LoadFont`. The font bytes are embedded into the
#' PDF up front (PDFium copies them; the input is free to be
#' garbage-collected after the call returns). Use [pdf_font_load_standard()]
#' for the 14 PDF standard fonts where no embedding is needed.
#'
#' @param doc A `pdfium_doc` opened with `readwrite = TRUE`.
#' @param font_data Either a raw vector containing the font bytes
#' or a character path to a font file on disk.
#' @param type Character — `"truetype"` (default) for TrueType / OTF
#' fonts, or `"type1"` for Type1 fonts.
#' @param cid Logical. If `TRUE` (the default), the font is loaded as
#' a CID (composite) font. CID encoding is required for fonts with
#' more than 255 glyphs — i.e. anything that needs to render
#' non-Latin-1 text. The non-CID path (`cid = FALSE`) is smaller on
#' disk but limited to the standard PDF encodings. When in doubt,
#' leave this as `TRUE`.
#' @return A `pdfium_font` handle. Pass it as the `font` argument
#' of [pdf_text_new()].
#'
#' @seealso [pdf_font_load_standard()], [pdf_text_new()],
#' [pdf_font_close()].
#' @examples
#' \dontrun{
#' doc <- pdf_doc_new()
#' page <- pdf_page_new(doc, width = 612, height = 792)
#' ttf <- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
#' if (file.exists(ttf)) {
#' font <- pdf_font_load(doc, ttf)
#' pdf_text_new(page, "hello", font = font, font_size = 24,
#' x = 72, y = 720)
#' }
#' pdf_save(doc, tempfile(fileext = ".pdf"))
#' }
#' @export
pdf_font_load <- function(doc, font_data,
type = c("truetype", "type1"),
cid = TRUE) {
assert_readwrite(doc)
type <- match.arg(type)
checkmate::assert_flag(cid)
bytes <- coerce_font_bytes(font_data)
type_code <- if (identical(type, "type1")) 1L else 2L
ptr <- cpp_font_load_truetype(doc$ptr, bytes, type_code, cid)
display <- if (is.character(font_data)) basename(font_data) else "<raw>"
new_pdfium_font(ptr, doc, display)
}

#' Close a font handle
#'
#' Releases the underlying PDFium font. Idempotent — a second call is
#' a no-op. The finalizer attached to the externalptr also runs this
#' when R garbage-collects the `pdfium_font`, but explicit close is
#' useful when many large fonts have been loaded and you want
#' deterministic release.
#'
#' Closing a font does **not** invalidate text objects that already
#' used it — PDFium keeps an internal reference. Only the embedder's
#' R-side handle is released.
#'
#' @param font A `pdfium_font` from [pdf_font_load_standard()] or
#' [pdf_font_load()].
#' @return Invisibly returns `font`.
#' @export
pdf_font_close <- function(font) {
checkmate::assert_class(font, "pdfium_font")
cpp_font_close(font$ptr)
invisible(font)
}

# Internal: coerce the `font_data` argument (raw vector or path) into
# a raw vector ready to hand to the C++ shim. Mirrors
# coerce_jpeg_bytes() in R/image_authoring.R.
coerce_font_bytes <- function(font_data) {
if (is.raw(font_data)) {
checkmate::assert_raw(font_data, min.len = 1L)
return(font_data)
}
if (is.character(font_data)) {
checkmate::assert_string(font_data, min.chars = 1L)
if (!file.exists(font_data)) {
stop("Font file not found: ", font_data, call. = FALSE)
}
n <- file.info(font_data)$size
return(readBin(font_data, what = "raw", n = n))
}
stop("`font_data` must be a raw vector of font bytes or a path to ",
"a font file. Got: ", class(font_data)[[1L]], call. = FALSE)
}
Loading
Loading