diff --git a/DESCRIPTION b/DESCRIPTION index 1f4e5fbc..b51425f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Authors@R: c( Imports: checkmate, dplyr (>= 1.1.0), - digest, + digest, nlme, purrr, rlang, @@ -23,6 +23,7 @@ Suggests: covr, cowplot, ggplot2, + haven, knitr, labeling, pander, @@ -30,7 +31,10 @@ Suggests: spelling, testthat (>= 3.0.0), units, - withr + withr, + rio, + zoo, + janitor Description: Compute standard Non-Compartmental Analysis (NCA) parameters for typical pharmacokinetic analyses and summarize them. License: AGPL-3 diff --git a/NAMESPACE b/NAMESPACE index 34b4580e..a69fd304 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,8 @@ export(getDepVar) export(getGroups) export(getIndepVar) export(get_halflife_points) +export(get_mapped_column) +export(get_pk_patterns) export(group_by) export(inner_join) export(interp.extrap.conc) @@ -165,6 +167,7 @@ export(interp.extrap.conc.dose) export(interpolate.conc) export(is_sparse_pk) export(left_join) +export(load_pk_data) export(mutate) export(normalize) export(normalize_by_col) diff --git a/NEWS.md b/NEWS.md index c13f0a91..bf66a9d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,13 @@ the dosing including dose amount and route. # PKNCA 0.12.2 +# PKNCA (development version) + +## New features +* `load_pk_data()` now provides a unified pipeline for loading, classifying, + cleaning, and standardising PK data across multiple file formats, with + improved column detection, BLQ handling, and decimal formatting. + ## Bug Fixes * `normalize.data.frame()` no longer triggers a dplyr deprecation warning diff --git a/R/load_pk_data.R b/R/load_pk_data.R new file mode 100644 index 00000000..dd2f7a2b --- /dev/null +++ b/R/load_pk_data.R @@ -0,0 +1,911 @@ +# ============================================================================= +# PK Data Loader +# ============================================================================= +# Loads, classifies, cleans, and standardises pharmacokinetic data from +# multiple file formats (XPT, XLSX, XLS, CSV, TXT, SAS7BDAT). +# ============================================================================= + +# ============================================================================= +# 1. Public API +# ============================================================================= + +#' Load and Process Pharmacokinetic Data +#' +#' Streamlines loading, cleaning, and standardisation of PK data from multiple +#' file formats (XPT, XLSX, XLS, CSV, TXT, SAS7BDAT). +#' +#' @param path Character. Directory containing PK files. Default: \code{getwd()}. +#' @param file_types Character vector. File extensions to search for. Default: +#' \code{c("xpt","xlsx","xls","csv","txt","sas7bdat")}. +#' @param patterns Named list. Regex patterns for PK column roles. +#' See \code{\link{get_pk_patterns}}. +#' @param decimal_control Logical. Apply smart decimal formatting? Default \code{TRUE}. +#' @param blq_handling Logical. Apply BLQ interpolation? Default \code{TRUE}. +#' @param bind_rows Logical. Bind multiple files into one data frame? Default \code{TRUE}. +#' @param verbose Logical. Print detailed progress? Default \code{TRUE}. +#' +#' @return A list of class \code{pk_data_list}: +#' \item{conc}{Concentration data frame (if found)} +#' \item{dose}{Dose data frame (if found)} +#' +#' @export +#' @examples +#' \dontrun{ +#' pk <- load_pk_data(path = "path/to/data", verbose = TRUE) +#' conc <- pk$conc +#' dose <- pk$dose +#' } +load_pk_data <- function(path = getwd(), + file_types = NULL, + patterns = get_pk_patterns(), + decimal_control = TRUE, + blq_handling = TRUE, + bind_rows = TRUE, + verbose = TRUE) { + + # ---- argument validation -------------------------------------------------- + checkmate::assert_string(path, min.chars = 1) + checkmate::assert_directory_exists(path) + checkmate::assert_character(file_types, null.ok = TRUE, min.chars = 1) + checkmate::assert_list(patterns, min.len = 1, names = "named") + checkmate::assert_flag(decimal_control) + checkmate::assert_flag(blq_handling) + checkmate::assert_flag(bind_rows) + checkmate::assert_flag(verbose) + + lapply(patterns, function(p) { + if (!is.character(p)) + rlang::abort("All entries in `patterns` must be character vectors.") + }) + + # ---- 1. resolve files ----------------------------------------------------- + file_set <- resolve_pk_files( + path = path, + file_types = file_types, + patterns = patterns, + verbose = verbose + ) + + # ---- 2. load & (optionally) bind ------------------------------------------ + result <- list() + type_map <- c(conc = "concentration", dose = "dose") + + for (role in names(type_map)) { + files <- file_set[[role]] + if (length(files) == 0) next + + result[[role]] <- load_and_bind_pk_files( + paths = files, + type = type_map[[role]], + patterns = patterns, + verbose = verbose, + bind_rows = bind_rows + ) + } + + quick_validate(result) # lightweight safety net + + # ---- 3. post-processing --------------------------------------------------- + process_map <- list( + conc = function(x) process_conc_data(x, decimal_control, blq_handling, verbose), + dose = function(x) process_dose_data(x, decimal_control, verbose) + ) + + for (role in names(process_map)) { + if (!is.null(result[[role]])) + result[[role]] <- process_map[[role]](result[[role]]) + } + + class(result) <- c("pk_data_list", class(result)) + result +} + +#' Lightweight Result Validation +#' +#' @keywords internal +quick_validate <- function(result) { + if (is.null(result$conc) && is.null(result$dose)){ + rlang::abort("No valid PK data loaded \u2014 neither conc nor dose was found.") + } + + invisible(TRUE) +} + +# ============================================================================= +# 2. File Resolution +# ============================================================================= + +#' Resolve PK Files in a Directory +#' +#' Scans \code{path} for supported files and classifies each as +#' "conc", "dose", "combined", or "unknown". +#' +#' @keywords internal +resolve_pk_files <- function(path, + file_types = NULL, + patterns, + verbose = FALSE) { + + # path validity already guaranteed by load_pk_data(); skip redundant check + if (is.null(file_types)){ + file_types <- c("xpt", "xlsx", "xls", "csv", "txt", "sas7bdat") + } + + ext_pat <- paste0("\\.(", paste(file_types, collapse = "|"), ")$") + + if (verbose){message("Supported extensions: ", paste(file_types, collapse = ", "))} + + files <- list.files(path, pattern = ext_pat, + full.names = TRUE, ignore.case = TRUE) + + if (length(files) == 0){ + rlang::abort(sprintf("No supported files found in: %s", path)) + } + + # ---- role detection ------------------------------------------------------- + roles <- vapply( + X = files, + FUN = detect_role, + FUN.VALUE = character(1), + patterns = patterns, + verbose = verbose + ) + + if (verbose) { + message("File role detection:") + for (i in seq_along(files)) + message(sprintf(" %s \u2192 %s", basename(files[i]), roles[i])) + } + + combined_files <- files[roles == "combined"] + conc_only <- files[roles == "conc"] + dose_only <- files[roles == "dose"] + + # If any combined files exist, use them exclusively for both roles + if (length(combined_files) > 0) { + if (verbose) message("Combined conc+dose file(s) found \u2192 single-file mode") + return(structure( + list(conc = combined_files, dose = combined_files), + class = "pk_file_set" + )) + } + + # Warn when one role is absent + if (length(conc_only) == 0 && length(dose_only) == 0) + rlang::abort(sprintf( + "Neither concentration nor dose file detected.\nFiles found: %s\n%s", + paste(basename(files), collapse = ", "), + "Ensure files contain required columns (conc/concentration and/or dose/amt)." + )) + + if (length(conc_only) == 0) + rlang::warn(sprintf( + "No concentration file detected. Dose file(s): %s", + paste(basename(dose_only), collapse = ", ") + )) + + if (length(dose_only) == 0) + rlang::warn(sprintf( + "No dose file detected. Concentration file(s): %s", + paste(basename(conc_only), collapse = ", ") + )) + + structure(list(conc = conc_only, dose = dose_only), class = "pk_file_set") +} + + +# ============================================================================= +# 3. File Loading & Binding +# ============================================================================= + +#' Load and Bind Multiple PK Files +#' +#' @keywords internal +load_and_bind_pk_files <- function(paths, type, patterns, verbose, bind_rows) { + + missing_files <- paths[!file.exists(paths)] + if (length(missing_files) > 0){ + rlang::abort(sprintf("File(s) do not exist:\n %s", paste(missing_files, collapse = "\n "))) + } + + dfs <- lapply(paths, read_one_pk_file, patterns = patterns, verbose = verbose) + + if (length(dfs) == 1) return(dfs[[1]]) + + if (!bind_rows) return(dfs) + + # -- check mapping consistency across files --------------------------------- + mappings <- lapply(dfs, function(d) attr(d, "column_mapping")) + reference_mapping <- mappings[[1]] + + inconsistent <- vapply(mappings[-1], function(m) { + !identical( + unlist(m[!is.na(unlist(m))]), + unlist(reference_mapping[!is.na(unlist(reference_mapping))]) + ) + }, logical(1)) + + if (any(inconsistent)) + rlang::warn( + c("!" = "Column mappings differ across files being bound together.", + "i" = "Using the mapping from the first file.", + ">" = "Verify all files share the same column structure.") + ) + + # -- bind & restore attributes ---------------------------------------------- + bound <- dplyr::bind_rows(dfs) + attr(bound, "column_mapping") <- reference_mapping + class(bound) <- c("pk_data", class(bound)) + + if (verbose){ + message(sprintf(" \u2022 Bound %d %s file(s) \u2192 %d rows", length(dfs), type, nrow(bound))) + } + + bound +} + + +#' Read a Single PK File +#' +#' @keywords internal +read_one_pk_file <- function(filepath, patterns, verbose = TRUE) { + + if (verbose){rlang::inform(sprintf("Loading: %s", basename(filepath)))} + + df <- tryCatch( + rio::import(file = filepath, which = 1), + error = function(e) + rlang::abort(sprintf("Failed to read '%s': %s", filepath, e$message)) + ) + + if (nrow(df) == 0){rlang::abort(sprintf("Empty file: %s", basename(filepath)))} + + mapping <- create_column_mapping(names(df), patterns) + attr(df, "column_mapping") <- mapping + class(df) <- c("pk_data", class(df)) + df +} + + +# ============================================================================= +# 4. Role Detection +# ============================================================================= + +#' Detect the PK Role of a File +#' +#' @keywords internal +detect_role <- function(f, patterns, verbose = FALSE) { + + col_names <- read_column_names_only(f, verbose = verbose) + if (is.null(col_names)) return("unknown") + + if (verbose){message(sprintf(" Columns in %s: %s", basename(f), paste(col_names, collapse = ", ")))} + + role_matches <- resolve_pk_column_roles( + names_vec = col_names, + patterns = patterns, + mode = "detect_all", + stop_on_ambiguous = FALSE + ) + + has_conc <- isTRUE(role_matches["conc"]) + has_dose <- isTRUE(role_matches["dose"]) + + if (has_conc && has_dose) return("combined") + if (has_conc) return("conc") + if (has_dose) return("dose") + + if (verbose){message(sprintf(" %s \u2192 no PK columns found", basename(f)))} + + "unknown" +} + + +#' Read Only Column Names from a File +#' +#' Attempts a zero-row or one-row read to obtain column names without loading +#' the full dataset. Handles formats that ignore \code{n_max}. +#' +#' @keywords internal +read_column_names_only <- function(f, verbose = FALSE) { + + ext <- tolower(tools::file_ext(f)) + + col_names <- tryCatch({ + if (ext %in% c("xpt", "sas7bdat")) { + # haven is more reliable for these formats + if (requireNamespace("haven", quietly = TRUE)) { + reader <- if (ext == "xpt") haven::read_xpt else haven::read_sas + tmp <- reader(f, n_max = 1L) + names(tmp) + } else { + tmp <- rio::import(file = f, which = 1) + names(tmp) + } + } else { + tmp <- rio::import(file = f, which = 1, n_max = 1L) + names(tmp) + } + }, error = function(e) { + if (verbose){message(sprintf(" Could not read columns from '%s': %s", basename(f), e$message))} + + NULL + }) + + col_names +} + + +# ============================================================================= +# 5. Column Mapping +# ============================================================================= + +#' Get Default PK Column Patterns +#' +#' Returns a named list of regex patterns used to identify concentration, dose, +#' subject, and time columns. +#' +#' @return Named list with patterns for \code{conc}, \code{dose}, +#' \code{subject}, and \code{time}. +#' @export +#' @examples +#' patterns <- get_pk_patterns() +#' # Add SDTM PCORRES support: +#' # patterns$conc <- c(patterns$conc, "^pcorres$", "^pcstresc$") +get_pk_patterns <- function() { + list( + # FIX #7: tightened greedy patterns with word boundaries / anchors + conc = c( + "^conc$", "^aval$", "^pcstresn$", "^dv$", "^concentration$", + # "^pcorres$", # opt-in: SDTM PCORRES (character result) + # "^pcstresc$", # opt-in: SDTM PCSTRESC (standardised char result) + "^conc_", # conc_ prefix (e.g. conc_plasma) + "_conc$", # _conc suffix + "\\bconcentration\\b", + "\\bng[_/]?ml\\b", + "\\bmg[_/]?ml\\b", + "\\bug[_/]?ml\\b" + ), + dose = c( + "^dose$", "^amount$", "^exdose$", "^amt$", + "^dose_", # dose_ prefix + "_dose$", # _dose suffix + "\\bmg$", + "\\bug$" + ), + subject = c( + "^usubjid$", "^id$", "^subject$", "^subjectid$", "^ptno$", + "^subj$", "^subj_id$", "^subject_id$" + ), + time = c( + "^time$", "^pctptnum$", "^atptn$", "^tad$", "^tafd$", "^hr$", + "^hours$", "^time_h$", "^time_hr$", + "\\btime\\s*\\(.*\\)" # e.g. "Time (h)" + ) + ) +} + + +#' Create Column Mapping from Column Names +#' +#' @keywords internal +create_column_mapping <- function(original_names, patterns) { + matches <- resolve_pk_column_roles( + names_vec = tolower(original_names), + patterns = patterns, + mode = "match", + stop_on_ambiguous = FALSE + ) + mapping <- vector("list", length(patterns)) + names(mapping) <- names(patterns) + for (role in names(patterns)) { + idx <- which(matches[[role]]) + mapping[[role]] <- if (length(idx) > 0) original_names[idx[1L]] else NA_character_ + } + mapping +} + + +#' Get Mapped Column Name +#' +#' Returns the actual column name for a given PK role. +#' +#' @param data A data frame created by \code{load_pk_data()}. +#' @param role Character. One of \code{"subject"}, \code{"time"}, +#' \code{"conc"}, or \code{"dose"}. +#' +#' @return Character string - the matched column name. +#' @export +#' @examples +#' \dontrun{ +#' time_col <- get_mapped_column(pk$conc, "time") +#' } +get_mapped_column <- function(data, role) { + + mapping <- attr(data, "column_mapping") + if (is.null(mapping)){ + rlang::abort( + message = "Missing column mapping.", + body = c( + "i" = "This data frame was not produced by load_pk_data().", + ">" = "Use load_pk_data() to load and map your data." + ) + ) + } + + col <- mapping[[role]] + if (is.na(col)) { + available <- names(mapping)[!is.na(unlist(mapping))] + rlang::abort( + message = sprintf("Role '%s' not found in column mapping.", role), + body = c( + "i" = sprintf("Available roles: %s", paste(available, collapse = ", ")), + ">" = "Check your data or adjust column patterns via get_pk_patterns()." + ) + ) + } + col +} + + +#' Resolve PK Column Roles +#' +#' Matches column names against PK role patterns. +#' +#' @param file Character. Optional file path for error messages. +#' @param names_vec Character vector (or data frame) of column names. +#' @param patterns Named list of regex patterns. +#' @param mode One of \code{"match"}, \code{"detect"}, \code{"detect_all"}. +#' @param stop_on_ambiguous Logical. Abort if multiple columns match one role? +#' +#' @keywords internal +resolve_pk_column_roles <- function(file = NULL, + names_vec, + patterns, + mode = c("match", "detect", "detect_all"), + stop_on_ambiguous = TRUE) { + + mode <- match.arg(mode) + + if (is.data.frame(names_vec)) { + if (is.null(names_vec) || nrow(names_vec) == 0) { + return(switch(mode, + detect = FALSE, + detect_all = FALSE, + lapply(patterns, function(x) logical(0)))) + } + names_vec <- names(names_vec) + } + + lower_names <- tolower(names_vec) + + # Duplicate column check + dupes <- lower_names[duplicated(lower_names)] + if (length(dupes) > 0){ + rlang::abort(sprintf( + "%sDuplicate column names (case-insensitive): %s", + if (!is.null(file)) sprintf("File '%s': ", basename(file)) else "", + paste(unique(dupes), collapse = ", ") + )) + } + + if (length(lower_names) == 0) { + return(switch(mode, + detect = FALSE, + detect_all = FALSE, + lapply(patterns, function(x) logical(0)))) + } + + # Match each role + role_hits <- lapply(patterns, function(pats) { + combined_pat <- paste0("(", paste(pats, collapse = "|"), ")") + grepl(combined_pat, lower_names, ignore.case = TRUE, perl = TRUE) + }) + + # Ambiguity check + if (stop_on_ambiguous) { + ambiguous_roles <- names(role_hits)[vapply(role_hits, sum, integer(1)) > 1L] + if (length(ambiguous_roles) > 0) { + details <- vapply(ambiguous_roles, function(r) { + hits <- which(role_hits[[r]]) + sprintf("%s \u2190 %s", r, paste(names_vec[hits], collapse = ", ")) + }, character(1)) + rlang::abort(sprintf( + "%sAmbiguous column matches:\n %s\n\nFix: Rename columns or supply custom patterns.", + if (!is.null(file)) sprintf("File '%s': ", basename(file)) else "", + paste(details, collapse = "\n ") + )) + } + } + + switch(mode, + detect = any(vapply(role_hits, any, logical(1))), + detect_all = vapply(role_hits, any, logical(1)), + role_hits # "match" \u2014 return the full logical list + ) +} + + +# ============================================================================= +# 6. Post-Processing: Concentration & Dose +# ============================================================================= + +#' Process Concentration Data +#' +#' @keywords internal +process_conc_data <- function(df, decimal_control, blq_handling, verbose) { + + df <- remove_empty_data(df, verbose) + + mapping <- attr(df, "column_mapping") + if (is.na(mapping$subject)) + df <- auto_create_subject_id(df, verbose = verbose) + + if (blq_handling) + df <- clean_blq_values(df, verbose) + + if (decimal_control) + df <- decimal_formatter( + df = df, + col_max_map = list(time = 1L, conc = 3L), + verbose = verbose + ) + + df +} + + +#' Process Dose Data +#' +#' @keywords internal +process_dose_data <- function(df, decimal_control, verbose) { + + df <- remove_empty_data(df, verbose) + + mapping <- attr(df, "column_mapping") + if (is.na(mapping$subject)) + df <- auto_create_subject_id(df, verbose = verbose) + + if (decimal_control) + df <- decimal_formatter( + df = df, + col_max_map = list(dose = 2L), + verbose = verbose + ) + + df +} + + +# ============================================================================= +# 7. Data Cleaning Utilities +# ============================================================================= + +#' Remove Empty Rows and Columns +#' +#' Thin wrapper around \code{janitor::remove_empty()} that preserves +#' custom attributes. +#' +#' @keywords internal +remove_empty_data <- function(df, verbose) { + + orig_rows <- nrow(df) + orig_cols <- ncol(df) + + cleaned <- janitor::remove_empty(dat = df, which = c("rows", "cols")) + + removed_rows <- orig_rows - nrow(cleaned) + removed_cols <- orig_cols - ncol(cleaned) + + if ((removed_rows > 0 || removed_cols > 0) && verbose){ + message(sprintf(" \u2022 Removed %d empty row(s), %d empty col(s)", removed_rows, removed_cols)) + } + + # Preserve attributes + attr(cleaned, "column_mapping") <- attr(df, "column_mapping") + class(cleaned) <- class(df) + cleaned +} + + +#' Auto-Create Subject ID Column +#' +#' Creates a default subject ID (\code{"SUBJ001"}) when no subject column is +#' detected. Warns if duplicate time values suggest multiple subjects. +#' +#' @keywords internal +auto_create_subject_id <- function(df, verbose = FALSE) { + + mapping <- attr(df, "column_mapping") + + # Already mapped, nothing to do + if (!is.null(mapping$subject) && + !is.na(mapping$subject) && + mapping$subject %in% names(df)) { + return(df) + } + + warned <- FALSE + time_col <- mapping$time + + if (!is.na(time_col) && time_col %in% names(df)) { + time_vals <- df[[time_col]] + if (any(duplicated(stats::na.omit(time_vals)))) { + warned <- TRUE + rlang::warn(c( + "!" = "No subject ID column detected.", + "i" = "Duplicate time values found \u2014 possible multiple subjects.", + ">" = "All data assigned to single subject ID = 'SUBJ001'.", + ">" = "Add a subject column if multiple subjects are present." + )) + } + } + + df$ID <- "SUBJ001" + mapping$subject <- "ID" + attr(df, "column_mapping") <- mapping + + if (verbose && !warned){rlang::inform("No subject ID detected \u2014 created default column 'ID' = 'SUBJ001'.")} + + df +} + + +# ============================================================================= +# 8. BLQ Handling +# ============================================================================= + +#' Handle BLQ (Below Limit of Quantification) Values +#' +#' Replaces BLQ strings with \code{NA} then interpolates using linear +#' interpolation pre-Cmax and log-linear interpolation post-Cmax. +#' +#' Negative-time rows are flagged with a warning before removal. +#' +#' @keywords internal +clean_blq_values <- function(data, verbose) { + + subj_col <- get_mapped_column(data, "subject") + time_col <- get_mapped_column(data, "time") + conc_col <- get_mapped_column(data, "conc") + + blq_strings <- c("blq", "bloq", "bql", "lloq", "na", "nr", "", + "nd", " 0) { + rlang::warn(c( + "!" = sprintf("%d row(s) with negative time values will be removed.", + length(neg_time_rows)), + "i" = "Negative time indicates pre-dose sampling \u2014 handle separately if needed.", + ">" = "Rows removed from BLQ processing pipeline." + )) + } + + # ---- step 1: mutate ------------------------------------------------------- + + processed <- data + + # keep original + processed$conc_original <- processed[[conc_col]] + + # transform + tmp <- trimws(tolower(as.character(processed[[conc_col]]))) + + processed[[conc_col]] <- dplyr::case_when( + tmp %in% blq_strings ~ NA_real_, + tmp == "0" ~ 0, + TRUE ~ suppressWarnings(as.numeric(tmp)) + ) + + # ---- step 2: filter ------------------------------------------------------- + processed <- dplyr::filter( + processed, + !is.na(.data[[time_col]]), + is.finite(suppressWarnings(as.numeric(as.character(.data[[time_col]])))), + suppressWarnings(as.numeric(as.character(.data[[time_col]]))) >= 0 + ) + + # ---- step 3: arrange ------------------------------------------------------ + processed <- dplyr::arrange( + processed, + .data[[subj_col]], + .data[[time_col]] + ) + + # ---- step 4: group-wise interpolation ------------------------------------ + split_data <- split(processed, processed[[subj_col]]) + + interpolated_list <- lapply(split_data, function(sub_df) { + interpolate_subject( + sub_df = sub_df, + time_col = time_col, + conc_col = conc_col, + verbose = verbose + ) + }) + + cleaned <- dplyr::bind_rows(interpolated_list) + + # ---- final arrange -------------------------------------------------------- + cleaned <- dplyr::arrange( + cleaned, + .data[[subj_col]], + .data[[time_col]] + ) + + # ---- restore attributes --------------------------------------------------- + attr(cleaned, "column_mapping") <- attr(data, "column_mapping") + class(cleaned) <- class(data) + cleaned +} + + +#' Interpolate BLQ Values for a Single Subject +#' +#' @keywords internal +interpolate_subject <- function(sub_df, time_col, conc_col, verbose) { + + if (nrow(sub_df) == 0) return(sub_df) + + sub_df <- dplyr::arrange(sub_df, .data[[time_col]]) + t_vals <- sub_df[[time_col]] + conc_vals <- sub_df[[conc_col]] # FIX #2 + method <- rep("observed", length(t_vals)) + + obs_idx <- which(!is.na(conc_vals)) + + # All BLQ set everything to zero + if (length(obs_idx) == 0) { + sub_df[[conc_col]] <- 0 + sub_df$method <- "all-blq" + return(sub_df) + } + + if (length(obs_idx) >= 2) { + cmax_idx <- obs_idx[which.max(conc_vals[obs_idx])] + cmax_time <- t_vals[cmax_idx] + + first_obs <- min(t_vals[obs_idx]) + last_obs <- max(t_vals[obs_idx]) + middle_na <- which(is.na(conc_vals) & t_vals > first_obs & t_vals < last_obs) + + if (length(middle_na) > 0) { + conc_interp <- tryCatch( + zoo::na.approx(conc_vals, x = t_vals, na.rm = FALSE), + error = function(e) conc_vals + ) + + for (i in middle_na) { + if (t_vals[i] <= cmax_time) { + conc_vals[i] <- conc_interp[i] + method[i] <- "pre-cmax-linear" + } else { + before <- max(obs_idx[obs_idx < i]) + after <- min(obs_idx[obs_idx > i]) + if (!is.na(before) && !is.na(after) && + conc_vals[before] > 0 && conc_vals[after] > 0) { + lambda <- log(conc_vals[before] / conc_vals[after]) / + (t_vals[after] - t_vals[before]) + conc_vals[i] <- conc_vals[before] * exp(-lambda * (t_vals[i] - t_vals[before])) + method[i] <- "post-cmax-loglinear" + } else { + conc_vals[i] <- conc_interp[i] + method[i] <- "post-cmax-linear" + } + } + } + } + } + + # Pre-dose zeros (before first observed value) + first_obs_idx <- min(obs_idx) + if (first_obs_idx > 1L) { + conc_vals[seq_len(first_obs_idx - 1L)] <- 0 + method[seq_len(first_obs_idx - 1L)] <- "pre-dose-zero" + } + + sub_df[[conc_col]] <- conc_vals + sub_df$method <- method + sub_df +} + + +# ============================================================================= +# 9. Decimal Formatting +# ============================================================================= + +#' Count Decimal Places in a Numeric Value +#' +#' @keywords internal +count_decimal_places <- function(x) { + if (is.na(x) || !is.finite(x)) return(0L) + x_str <- sub("0+$", "", sprintf("%.15f", x)) + if (grepl("\\.", x_str, fixed = FALSE)) + nchar(sub(".*\\.", "", x_str)) + else + 0L +} + + +#' Apply Consistent Decimal Formatting to PK Columns +#' +#' Decimal information is stored as an attribute on the *data frame* +#' (\code{"decimal_info"}) rather than on a copy of an individual column, +#' ensuring the metadata actually persists. +#' +#' @keywords internal +decimal_formatter <- function(df, col_max_map, verbose) { + + mapping <- attr(df, "column_mapping") + if (is.null(mapping)) + rlang::abort("Internal error: column_mapping attribute is missing.") + + decimal_info <- attr(df, "decimal_info") %||% list() + + for (role in names(col_max_map)) { + col <- mapping[[role]] + if (is.null(col) || is.na(col) || !col %in% names(df)) next + if (!is.numeric(df[[col]])) next + + places <- vapply(df[[col]], count_decimal_places, integer(1)) + optimal <- min(max(places, na.rm = TRUE), col_max_map[[role]]) + + # store on the data frame, not on a discarded copy of the column + decimal_info[[col]] <- optimal + + if (verbose){rlang::inform(sprintf(" \u2022 [%s] %s: using %d decimal place(s)", role, col, optimal))} + } + + attr(df, "decimal_info") <- decimal_info + df +} + + +# ============================================================================= +# 10. Usage Example (wrapped in if (FALSE) so it never auto-runs) +# ============================================================================= +if (FALSE) { + + data_dir <- "../data-raw/test" + + pk <- load_pk_data( + path = data_dir, + bind_rows = TRUE, + decimal_control = TRUE, + blq_handling = TRUE, + verbose = TRUE + ) + + print(pk) + + conc <- pk$conc + dose <- pk$dose + + # Retrieve mapped column names + time_col <- get_mapped_column(conc, "time") + conc_col <- get_mapped_column(conc, "conc") + subj_col <- get_mapped_column(conc, "subject") + + # Ready for PKNCA + conc_obj <- PKNCAconc( + conc, + formula = as.formula( + sprintf("%s ~ %s | %s", conc_col, time_col, subj_col) + ) + ) + dose_obj <- PKNCAdose( + dose, + formula = as.formula( + sprintf("%s ~ %s | %s", + get_mapped_column(dose, "dose"), + get_mapped_column(dose, "time"), + get_mapped_column(dose, "subject")) + ) + ) + data_obj <- PKNCAdata(conc_obj, dose_obj) + result <- pk.nca(data_obj) +} \ No newline at end of file diff --git a/data-raw/test/EX.xpt b/data-raw/test/EX.xpt new file mode 100644 index 00000000..a94595c6 Binary files /dev/null and b/data-raw/test/EX.xpt differ diff --git a/data-raw/test/PC.xpt b/data-raw/test/PC.xpt new file mode 100644 index 00000000..e273118e Binary files /dev/null and b/data-raw/test/PC.xpt differ diff --git a/man/auto_create_subject_id.Rd b/man/auto_create_subject_id.Rd new file mode 100644 index 00000000..552c2278 --- /dev/null +++ b/man/auto_create_subject_id.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{auto_create_subject_id} +\alias{auto_create_subject_id} +\title{Auto-Create Subject ID Column} +\usage{ +auto_create_subject_id(df, verbose = FALSE) +} +\description{ +Creates a default subject ID (\code{"SUBJ001"}) when no subject column is +detected. Warns if duplicate time values suggest multiple subjects. +} +\keyword{internal} diff --git a/man/clean_blq_values.Rd b/man/clean_blq_values.Rd new file mode 100644 index 00000000..498e7783 --- /dev/null +++ b/man/clean_blq_values.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{clean_blq_values} +\alias{clean_blq_values} +\title{Handle BLQ (Below Limit of Quantification) Values} +\usage{ +clean_blq_values(data, verbose) +} +\description{ +Replaces BLQ strings with \code{NA} then interpolates using linear +interpolation pre-Cmax and log-linear interpolation post-Cmax. +} +\details{ +Negative-time rows are flagged with a warning before removal. +} +\keyword{internal} diff --git a/man/count_decimal_places.Rd b/man/count_decimal_places.Rd new file mode 100644 index 00000000..2dc5fbcb --- /dev/null +++ b/man/count_decimal_places.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{count_decimal_places} +\alias{count_decimal_places} +\title{Count Decimal Places in a Numeric Value} +\usage{ +count_decimal_places(x) +} +\description{ +Count Decimal Places in a Numeric Value +} +\keyword{internal} diff --git a/man/create_column_mapping.Rd b/man/create_column_mapping.Rd new file mode 100644 index 00000000..2b907732 --- /dev/null +++ b/man/create_column_mapping.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{create_column_mapping} +\alias{create_column_mapping} +\title{Create Column Mapping from Column Names} +\usage{ +create_column_mapping(original_names, patterns) +} +\description{ +Create Column Mapping from Column Names +} +\keyword{internal} diff --git a/man/decimal_formatter.Rd b/man/decimal_formatter.Rd new file mode 100644 index 00000000..7792c33f --- /dev/null +++ b/man/decimal_formatter.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{decimal_formatter} +\alias{decimal_formatter} +\title{Apply Consistent Decimal Formatting to PK Columns} +\usage{ +decimal_formatter(df, col_max_map, verbose) +} +\description{ +Decimal information is stored as an attribute on the \emph{data frame} +(\code{"decimal_info"}) rather than on a copy of an individual column, +ensuring the metadata actually persists. +} +\keyword{internal} diff --git a/man/detect_role.Rd b/man/detect_role.Rd new file mode 100644 index 00000000..e982fbc2 --- /dev/null +++ b/man/detect_role.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{detect_role} +\alias{detect_role} +\title{Detect the PK Role of a File} +\usage{ +detect_role(f, patterns, verbose = FALSE) +} +\description{ +Detect the PK Role of a File +} +\keyword{internal} diff --git a/man/get_mapped_column.Rd b/man/get_mapped_column.Rd new file mode 100644 index 00000000..9aea326d --- /dev/null +++ b/man/get_mapped_column.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{get_mapped_column} +\alias{get_mapped_column} +\title{Get Mapped Column Name} +\usage{ +get_mapped_column(data, role) +} +\arguments{ +\item{data}{A data frame created by \code{load_pk_data()}.} + +\item{role}{Character. One of \code{"subject"}, \code{"time"}, +\code{"conc"}, or \code{"dose"}.} +} +\value{ +Character string - the matched column name. +} +\description{ +Returns the actual column name for a given PK role. +} +\examples{ +\dontrun{ + time_col <- get_mapped_column(pk$conc, "time") +} +} diff --git a/man/get_pk_patterns.Rd b/man/get_pk_patterns.Rd new file mode 100644 index 00000000..c8dd9e6f --- /dev/null +++ b/man/get_pk_patterns.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{get_pk_patterns} +\alias{get_pk_patterns} +\title{Get Default PK Column Patterns} +\usage{ +get_pk_patterns() +} +\value{ +Named list with patterns for \code{conc}, \code{dose}, +\code{subject}, and \code{time}. +} +\description{ +Returns a named list of regex patterns used to identify concentration, dose, +subject, and time columns. +} +\examples{ +patterns <- get_pk_patterns() +# Add SDTM PCORRES support: +# patterns$conc <- c(patterns$conc, "^pcorres$", "^pcstresc$") +} diff --git a/man/interpolate_subject.Rd b/man/interpolate_subject.Rd new file mode 100644 index 00000000..5bcd75ef --- /dev/null +++ b/man/interpolate_subject.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{interpolate_subject} +\alias{interpolate_subject} +\title{Interpolate BLQ Values for a Single Subject} +\usage{ +interpolate_subject(sub_df, time_col, conc_col, verbose) +} +\description{ +Interpolate BLQ Values for a Single Subject +} +\keyword{internal} diff --git a/man/load_and_bind_pk_files.Rd b/man/load_and_bind_pk_files.Rd new file mode 100644 index 00000000..9856f57d --- /dev/null +++ b/man/load_and_bind_pk_files.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{load_and_bind_pk_files} +\alias{load_and_bind_pk_files} +\title{Load and Bind Multiple PK Files} +\usage{ +load_and_bind_pk_files(paths, type, patterns, verbose, bind_rows) +} +\description{ +Load and Bind Multiple PK Files +} +\keyword{internal} diff --git a/man/load_pk_data.Rd b/man/load_pk_data.Rd new file mode 100644 index 00000000..a98549f0 --- /dev/null +++ b/man/load_pk_data.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{load_pk_data} +\alias{load_pk_data} +\title{Load and Process Pharmacokinetic Data} +\usage{ +load_pk_data( + path = getwd(), + file_types = NULL, + patterns = get_pk_patterns(), + decimal_control = TRUE, + blq_handling = TRUE, + bind_rows = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{path}{Character. Directory containing PK files. Default: \code{getwd()}.} + +\item{file_types}{Character vector. File extensions to search for. Default: +\code{c("xpt","xlsx","xls","csv","txt","sas7bdat")}.} + +\item{patterns}{Named list. Regex patterns for PK column roles. +See \code{\link{get_pk_patterns}}.} + +\item{decimal_control}{Logical. Apply smart decimal formatting? Default \code{TRUE}.} + +\item{blq_handling}{Logical. Apply BLQ interpolation? Default \code{TRUE}.} + +\item{bind_rows}{Logical. Bind multiple files into one data frame? Default \code{TRUE}.} + +\item{verbose}{Logical. Print detailed progress? Default \code{TRUE}.} +} +\value{ +A list of class \code{pk_data_list}: +\item{conc}{Concentration data frame (if found)} +\item{dose}{Dose data frame (if found)} +} +\description{ +Streamlines loading, cleaning, and standardisation of PK data from multiple +file formats (XPT, XLSX, XLS, CSV, TXT, SAS7BDAT). +} +\examples{ +\dontrun{ + pk <- load_pk_data(path = "path/to/data", verbose = TRUE) + conc <- pk$conc + dose <- pk$dose +} +} diff --git a/man/process_conc_data.Rd b/man/process_conc_data.Rd new file mode 100644 index 00000000..1ee891ad --- /dev/null +++ b/man/process_conc_data.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{process_conc_data} +\alias{process_conc_data} +\title{Process Concentration Data} +\usage{ +process_conc_data(df, decimal_control, blq_handling, verbose) +} +\description{ +Process Concentration Data +} +\keyword{internal} diff --git a/man/process_dose_data.Rd b/man/process_dose_data.Rd new file mode 100644 index 00000000..df774d87 --- /dev/null +++ b/man/process_dose_data.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{process_dose_data} +\alias{process_dose_data} +\title{Process Dose Data} +\usage{ +process_dose_data(df, decimal_control, verbose) +} +\description{ +Process Dose Data +} +\keyword{internal} diff --git a/man/quick_validate.Rd b/man/quick_validate.Rd new file mode 100644 index 00000000..76518034 --- /dev/null +++ b/man/quick_validate.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{quick_validate} +\alias{quick_validate} +\title{Lightweight Result Validation} +\usage{ +quick_validate(result) +} +\description{ +Lightweight Result Validation +} +\keyword{internal} diff --git a/man/read_column_names_only.Rd b/man/read_column_names_only.Rd new file mode 100644 index 00000000..d6eb5602 --- /dev/null +++ b/man/read_column_names_only.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{read_column_names_only} +\alias{read_column_names_only} +\title{Read Only Column Names from a File} +\usage{ +read_column_names_only(f, verbose = FALSE) +} +\description{ +Attempts a zero-row or one-row read to obtain column names without loading +the full dataset. Handles formats that ignore \code{n_max}. +} +\keyword{internal} diff --git a/man/read_one_pk_file.Rd b/man/read_one_pk_file.Rd new file mode 100644 index 00000000..c9d300db --- /dev/null +++ b/man/read_one_pk_file.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{read_one_pk_file} +\alias{read_one_pk_file} +\title{Read a Single PK File} +\usage{ +read_one_pk_file(filepath, patterns, verbose = TRUE) +} +\description{ +Read a Single PK File +} +\keyword{internal} diff --git a/man/remove_empty_data.Rd b/man/remove_empty_data.Rd new file mode 100644 index 00000000..93d14f12 --- /dev/null +++ b/man/remove_empty_data.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{remove_empty_data} +\alias{remove_empty_data} +\title{Remove Empty Rows and Columns} +\usage{ +remove_empty_data(df, verbose) +} +\description{ +Thin wrapper around \code{janitor::remove_empty()} that preserves +custom attributes. +} +\keyword{internal} diff --git a/man/resolve_pk_column_roles.Rd b/man/resolve_pk_column_roles.Rd new file mode 100644 index 00000000..1851c760 --- /dev/null +++ b/man/resolve_pk_column_roles.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{resolve_pk_column_roles} +\alias{resolve_pk_column_roles} +\title{Resolve PK Column Roles} +\usage{ +resolve_pk_column_roles( + file = NULL, + names_vec, + patterns, + mode = c("match", "detect", "detect_all"), + stop_on_ambiguous = TRUE +) +} +\arguments{ +\item{file}{Character. Optional file path for error messages.} + +\item{names_vec}{Character vector (or data frame) of column names.} + +\item{patterns}{Named list of regex patterns.} + +\item{mode}{One of \code{"match"}, \code{"detect"}, \code{"detect_all"}.} + +\item{stop_on_ambiguous}{Logical. Abort if multiple columns match one role?} +} +\description{ +Matches column names against PK role patterns. +} +\keyword{internal} diff --git a/man/resolve_pk_files.Rd b/man/resolve_pk_files.Rd new file mode 100644 index 00000000..09e71824 --- /dev/null +++ b/man/resolve_pk_files.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_pk_data.R +\name{resolve_pk_files} +\alias{resolve_pk_files} +\title{Resolve PK Files in a Directory} +\usage{ +resolve_pk_files(path, file_types = NULL, patterns, verbose = FALSE) +} +\description{ +Scans \code{path} for supported files and classifies each as +"conc", "dose", "combined", or "unknown". +} +\keyword{internal} diff --git a/tests/testthat/test-load_pk_data.R b/tests/testthat/test-load_pk_data.R new file mode 100644 index 00000000..da780f3d --- /dev/null +++ b/tests/testthat/test-load_pk_data.R @@ -0,0 +1,540 @@ +# ============================================================================= +# HELPERS +# ============================================================================= + +make_conc_df <- function(n = 6) { + data.frame( + USUBJID = rep("SUBJ001", n), + TIME = seq(0, n - 1), + CONC = c(0, 10, 20, 15, 8, 3), + stringsAsFactors = FALSE + ) +} + +make_dose_df <- function() { + data.frame( + USUBJID = "SUBJ001", + TIME = 0, + DOSE = 100, + stringsAsFactors = FALSE + ) +} + +with_mapping <- function(df, mapping) { + attr(df, "column_mapping") <- mapping + class(df) <- c("pk_data", class(df)) + df +} + +std_mapping <- function(subj = "USUBJID", time = "TIME", + conc = "CONC", dose = NA_character_) { + list(subject = subj, time = time, conc = conc, dose = dose) +} + + +# ============================================================================= +# 1. get_pk_patterns() +# ============================================================================= + +test_that("get_pk_patterns() returns a named list", { + p <- get_pk_patterns() + expect_type(p, "list") + expect_named(p) +}) + +test_that("get_pk_patterns() contains required roles", { + p <- get_pk_patterns() + expect_true(all(c("conc", "dose", "subject", "time") %in% names(p))) +}) + +test_that("each pattern entry is a character vector", { + p <- get_pk_patterns() + for (role in names(p)) + expect_true(is.character(p[[role]]), info = paste("role:", role)) +}) + +test_that("get_pk_patterns() conc patterns include common column names", { + p <- get_pk_patterns() + expect_true(any(grepl("conc", p$conc, ignore.case = TRUE))) +}) + +test_that("get_pk_patterns() subject patterns include USUBJID", { + p <- get_pk_patterns() + expect_true(any(grepl("usubjid", p$subject, ignore.case = TRUE))) +}) + + +# ============================================================================= +# 2. resolve_pk_column_roles() +# ============================================================================= + +test_that("detect mode returns TRUE when any PK column found", { + result <- resolve_pk_column_roles( + names_vec = make_conc_df(), + patterns = get_pk_patterns(), + mode = "detect", + stop_on_ambiguous = FALSE + ) + expect_true(result) +}) + +test_that("detect_all mode returns named logical vector", { + result <- resolve_pk_column_roles( + names_vec = make_conc_df(), + patterns = get_pk_patterns(), + mode = "detect_all", + stop_on_ambiguous = FALSE + ) + expect_type(result, "logical") + expect_named(result) +}) + +test_that("match mode returns a named list of logical vectors", { + result <- resolve_pk_column_roles( + names_vec = make_conc_df(), + patterns = get_pk_patterns(), + mode = "match", + stop_on_ambiguous = FALSE + ) + expect_type(result, "list") + expect_named(result) + for (role in names(result)) + expect_type(result[[role]], "logical") +}) + +test_that("CONC column is detected as 'conc' role", { + result <- resolve_pk_column_roles( + names_vec = make_conc_df(), + patterns = get_pk_patterns(), + mode = "detect_all", + stop_on_ambiguous = FALSE + ) + expect_true(result["conc"]) +}) + +test_that("TIME column is detected as 'time' role", { + result <- resolve_pk_column_roles( + names_vec = make_conc_df(), + patterns = get_pk_patterns(), + mode = "detect_all", + stop_on_ambiguous = FALSE + ) + expect_true(result["time"]) +}) + +test_that("empty names_vec returns logical(0) in match mode", { + result <- resolve_pk_column_roles( + names_vec = character(0), + patterns = get_pk_patterns(), + mode = "match", + stop_on_ambiguous = FALSE + ) + expect_type(result, "list") + for (role in names(result)) + expect_length(result[[role]], 0) +}) + +test_that("empty names_vec returns FALSE in detect mode", { + result <- resolve_pk_column_roles( + names_vec = character(0), + patterns = get_pk_patterns(), + mode = "detect", + stop_on_ambiguous = FALSE + ) + expect_false(result) +}) + +test_that("duplicate column names abort with informative error", { + expect_error( + resolve_pk_column_roles( + names_vec = c("conc", "CONC"), + patterns = get_pk_patterns(), + mode = "match", + stop_on_ambiguous = FALSE + ), + regexp = "(?i)duplicate" + ) +}) + +test_that("ambiguous columns abort when stop_on_ambiguous = TRUE", { + expect_error( + resolve_pk_column_roles( + names_vec = c("conc", "concentration", "TIME", "USUBJID"), + patterns = get_pk_patterns(), + mode = "match", + stop_on_ambiguous = TRUE + ), + regexp = "(?i)ambiguous" + ) +}) + +test_that("ambiguous columns allowed when stop_on_ambiguous = FALSE", { + expect_no_error( + resolve_pk_column_roles( + names_vec = c("conc", "concentration", "TIME", "USUBJID"), + patterns = get_pk_patterns(), + mode = "match", + stop_on_ambiguous = FALSE + ) + ) +}) + + +# ============================================================================= +# 3. create_column_mapping() +# ============================================================================= + +test_that("create_column_mapping() returns a named list", { + mapping <- create_column_mapping(names(make_conc_df()), get_pk_patterns()) + expect_type(mapping, "list") + expect_named(mapping) +}) + +test_that("conc role is mapped to CONC column", { + mapping <- create_column_mapping(names(make_conc_df()), get_pk_patterns()) + expect_equal(mapping$conc, "CONC") +}) + +test_that("time role is mapped to TIME column", { + mapping <- create_column_mapping(names(make_conc_df()), get_pk_patterns()) + expect_equal(mapping$time, "TIME") +}) + +test_that("subject role is mapped to USUBJID", { + mapping <- create_column_mapping(names(make_conc_df()), get_pk_patterns()) + expect_equal(mapping$subject, "USUBJID") +}) + +test_that("unrecognised columns get NA mapping", { + mapping <- create_column_mapping(c("alpha", "beta", "gamma"), get_pk_patterns()) + for (role in names(mapping)) + expect_true(is.na(mapping[[role]]), info = paste("role:", role)) +}) + + +# ============================================================================= +# 4. get_mapped_column() +# ============================================================================= + +test_that("get_mapped_column() returns the correct column name", { + df <- with_mapping(make_conc_df(), std_mapping()) + expect_equal(get_mapped_column(df, "time"), "TIME") + expect_equal(get_mapped_column(df, "conc"), "CONC") +}) + +test_that("get_mapped_column() aborts when no mapping attribute", { + expect_error(get_mapped_column(make_conc_df(), "time"), class = "error") +}) + +test_that("get_mapped_column() aborts for unmapped role", { + df <- with_mapping(make_conc_df(), std_mapping(dose = NA_character_)) + expect_error(get_mapped_column(df, "dose"), regexp = "(?i)not found", perl = TRUE) +}) + +test_that("get_mapped_column() error lists available roles", { + df <- with_mapping(make_conc_df(), std_mapping()) + err <- tryCatch(get_mapped_column(df, "dose"), error = function(e) e) + expect_match(conditionMessage(err), "(?i)available", perl = TRUE) +}) + + +# ============================================================================= +# 5. quick_validate() +# ============================================================================= + +test_that("quick_validate() returns TRUE invisibly when conc present", { + result <- list(conc = make_conc_df(), dose = NULL) + expect_true(quick_validate(result)) +}) + +test_that("quick_validate() returns TRUE invisibly when dose present", { + result <- list(conc = NULL, dose = make_dose_df()) + expect_true(quick_validate(result)) +}) + +test_that("quick_validate() aborts when both conc and dose are NULL", { + expect_error( + quick_validate(list(conc = NULL, dose = NULL)), + regexp = "(?i)no valid PK data" + ) +}) + + +# ============================================================================= +# 6. remove_empty_data() +# ============================================================================= + +test_that("remove_empty_data() preserves column_mapping attribute", { + m <- std_mapping() + df <- with_mapping(make_conc_df(), m) + out <- remove_empty_data(df, verbose = FALSE) + expect_equal(attr(out, "column_mapping"), m) +}) + +test_that("remove_empty_data() removes all-NA rows", { + df <- make_conc_df() + df[7, ] <- NA + df <- with_mapping(df, std_mapping()) + out <- remove_empty_data(df, verbose = FALSE) + expect_lt(nrow(out), nrow(df)) +}) + +test_that("remove_empty_data() removes all-NA columns", { + df <- make_conc_df() + df$EMPTY <- NA + df <- with_mapping(df, std_mapping()) + out <- remove_empty_data(df, verbose = FALSE) + expect_false("EMPTY" %in% names(out)) +}) + +test_that("remove_empty_data() preserves class", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- remove_empty_data(df, verbose = FALSE) + expect_s3_class(out, "pk_data") +}) + + +# ============================================================================= +# 7. auto_create_subject_id() +# ============================================================================= + +test_that("auto_create_subject_id() adds ID column when subject is NA", { + df <- make_conc_df() + df$USUBJID <- NULL + df <- with_mapping(df, std_mapping(subj = NA_character_)) + out <- auto_create_subject_id(df) + expect_true("ID" %in% names(out)) + expect_true(all(out$ID == "SUBJ001")) +}) + +test_that("auto_create_subject_id() does not add ID when subject already mapped", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- auto_create_subject_id(df) + expect_false("ID" %in% names(out)) +}) + +test_that("auto_create_subject_id() updates column_mapping$subject to 'ID'", { + df <- make_conc_df() + df$USUBJID <- NULL + df <- with_mapping(df, std_mapping(subj = NA_character_)) + out <- auto_create_subject_id(df) + expect_equal(attr(out, "column_mapping")$subject, "ID") +}) + +test_that("auto_create_subject_id() warns about duplicate times when no subject", { + df <- data.frame(TIME = c(0, 0, 1, 2), CONC = c(5, 10, 8, 3), + stringsAsFactors = FALSE) + df <- with_mapping(df, std_mapping(subj = NA_character_)) + expect_warning(auto_create_subject_id(df), regexp = "(?i)duplicate", perl = TRUE) +}) + + +# ============================================================================= +# 8. count_decimal_places() +# ============================================================================= + +test_that("count_decimal_places() returns 0 for integer", { + expect_equal(count_decimal_places(5), 0) +}) + +test_that("count_decimal_places() returns 2 for 1.23", { + expect_equal(count_decimal_places(1.23), 2) +}) + +test_that("count_decimal_places() returns 3 for 0.001", { + expect_equal(count_decimal_places(0.001), 3) +}) + +test_that("count_decimal_places() returns 0 for NA", { + expect_equal(count_decimal_places(NA_real_), 0) +}) + +test_that("count_decimal_places() returns 0 for Inf", { + expect_equal(count_decimal_places(Inf), 0) +}) + +test_that("count_decimal_places() returns 0 for NaN", { + expect_equal(count_decimal_places(NaN), 0) +}) + + +# ============================================================================= +# 9. decimal_formatter() +# ============================================================================= + +test_that("decimal_formatter() returns data frame unchanged if role column missing", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- decimal_formatter(df, col_max_map = list(dose = 2), verbose = FALSE) + expect_equal(nrow(out), nrow(df)) + expect_equal(ncol(out), ncol(df)) +}) + +test_that("decimal_formatter() stores decimal_info attribute on data frame", { + # FIX: decimal precision is stored on the df (decimal_info), not on the column + df <- with_mapping(make_conc_df(), std_mapping()) + out <- decimal_formatter(df, col_max_map = list(conc = 3), verbose = FALSE) + expect_false(is.null(attr(out, "decimal_info"))) + expect_true("CONC" %in% names(attr(out, "decimal_info"))) +}) + +test_that("decimal_formatter() respects col_max_map cap", { + df <- make_conc_df() + df$CONC <- c(0.12345, 1.23456, 2.34567, 3.45678, 4.56789, 5.67890) + df <- with_mapping(df, std_mapping()) + out <- decimal_formatter(df, col_max_map = list(conc = 2), verbose = FALSE) + dp <- attr(out, "decimal_info")[["CONC"]] + expect_lte(dp, 2) +}) + +test_that("decimal_formatter() aborts with missing column_mapping", { + expect_error( + decimal_formatter(make_conc_df(), col_max_map = list(conc = 3), verbose = FALSE), + regexp = "(?i)mapping" + ) +}) + + +# ============================================================================= +# 10. interpolate_subject() +# ============================================================================= + +test_that("interpolate_subject() returns same number of rows", { + sub <- make_conc_df() + out <- interpolate_subject(sub, time_col = "TIME", conc_col = "CONC", + verbose = FALSE) + expect_equal(nrow(out), nrow(sub)) +}) + +test_that("interpolate_subject() adds method column", { + out <- interpolate_subject(make_conc_df(), time_col = "TIME", conc_col = "CONC", + verbose = FALSE) + expect_true("method" %in% names(out)) +}) + +test_that("interpolate_subject() fills middle NA with interpolated values", { + sub <- make_conc_df() + sub$CONC[3] <- NA + out <- interpolate_subject(sub, time_col = "TIME", conc_col = "CONC", + verbose = FALSE) + expect_false(any(is.na(out$CONC))) +}) + +test_that("interpolate_subject() sets pre-dose values to zero", { + sub <- make_conc_df() + sub$CONC[1] <- NA + out <- interpolate_subject(sub, time_col = "TIME", conc_col = "CONC", + verbose = FALSE) + expect_equal(out$CONC[1], 0) +}) + +test_that("interpolate_subject() handles all-NA concentration gracefully", { + sub <- make_conc_df() + sub$CONC <- NA_real_ + out <- interpolate_subject(sub, time_col = "TIME", conc_col = "CONC", + verbose = FALSE) + expect_true(all(out$CONC == 0)) + expect_true(all(out$method == "all-blq")) +}) + +test_that("interpolate_subject() method is 'observed' for non-BLQ rows", { + out <- interpolate_subject(make_conc_df(), time_col = "TIME", conc_col = "CONC", + verbose = FALSE) + expect_true(any(out$method == "observed")) +}) + +test_that("interpolate_subject() returns empty df unchanged", { + out <- interpolate_subject(make_conc_df()[0, ], time_col = "TIME", + conc_col = "CONC", verbose = FALSE) + expect_equal(nrow(out), 0) +}) + + +# ============================================================================= +# 11. clean_blq_values() +# ============================================================================= + +test_that("clean_blq_values() converts BLQ strings to NA then interpolates", { + df <- make_conc_df() + df$CONC <- as.character(df$CONC) + df$CONC[3] <- "BLQ" + df <- with_mapping(df, std_mapping()) + expect_no_error(clean_blq_values(df, verbose = FALSE)) +}) + +test_that("clean_blq_values() preserves column_mapping", { + m <- std_mapping() + df <- with_mapping(make_conc_df(), m) + out <- clean_blq_values(df, verbose = FALSE) + expect_equal(attr(out, "column_mapping"), m) +}) + +test_that("clean_blq_values() adds method column", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- clean_blq_values(df, verbose = FALSE) + expect_true("method" %in% names(out)) +}) + +test_that("clean_blq_values() filters negative time rows", { + df <- make_conc_df() + df$TIME[1] <- -1 + df <- with_mapping(df, std_mapping()) + out <- suppressWarnings(clean_blq_values(df, verbose = FALSE)) + expect_true(all(out$TIME >= 0)) +}) + +test_that("clean_blq_values() preserves pk_data class", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- clean_blq_values(df, verbose = FALSE) + expect_s3_class(out, "pk_data") +}) + + +# ============================================================================= +# 12. process_conc_data() & process_dose_data() +# ============================================================================= + +test_that("process_conc_data() returns a data frame", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- process_conc_data(df, decimal_control = TRUE, + blq_handling = TRUE, verbose = FALSE) + expect_s3_class(out, "data.frame") +}) + +test_that("process_conc_data() preserves column_mapping", { + m <- std_mapping() + df <- with_mapping(make_conc_df(), m) + out <- process_conc_data(df, decimal_control = FALSE, + blq_handling = FALSE, verbose = FALSE) + expect_equal(attr(out, "column_mapping"), m) +}) + +test_that("process_dose_data() returns a data frame", { + df <- with_mapping(make_dose_df(), + list(subject = "USUBJID", time = "TIME", + conc = NA_character_, dose = "DOSE")) + out <- process_dose_data(df, decimal_control = TRUE, verbose = FALSE) + expect_s3_class(out, "data.frame") +}) + +test_that("process_dose_data() preserves column_mapping", { + m <- list(subject = "USUBJID", time = "TIME", + conc = NA_character_, dose = "DOSE") + df <- with_mapping(make_dose_df(), m) + out <- process_dose_data(df, decimal_control = FALSE, verbose = FALSE) + expect_equal(attr(out, "column_mapping"), m) +}) + + +# ============================================================================= +# 13. quick_validate() edge cases +# ============================================================================= + +test_that("quick_validate() is invisible", { + result <- list(conc = make_conc_df(), dose = NULL) + expect_invisible(quick_validate(result)) +}) + +test_that("quick_validate() passes when both conc and dose present", { + result <- list(conc = make_conc_df(), dose = make_dose_df()) + expect_true(quick_validate(result)) +}) \ No newline at end of file