diff --git a/.Rbuildignore b/.Rbuildignore index 7f222f31..ba4e2a35 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -31,3 +31,4 @@ develop ^vignettes/original$ ^vignettes/Makefile$ ^node_modules$ +^\.claude$ diff --git a/.lintr b/.lintr index 34473d27..02ab4e74 100644 --- a/.lintr +++ b/.lintr @@ -1,5 +1,12 @@ linters: linters_with_defaults( line_length_linter = line_length_linter(120), - cyclocomp_linter = NULL, - object_usage_linter = NULL + object_usage_linter = NULL, + return_linter = NULL, + object_name_linter = NULL, + object_length_linter = NULL, + pipe_consistency_linter = NULL, + indentation_linter = NULL + ) +exclusions: list( + "vignettes/original" ) diff --git a/DESCRIPTION b/DESCRIPTION index b123753d..92cbc7fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: psborrow2 Title: Bayesian Dynamic Borrowing Analysis and Simulation -Version: 0.0.4.0 +Version: 0.0.5.1 Authors@R: c( person( given = "Matt", diff --git a/NEWS.md b/NEWS.md index 51c5948d..0c4e823d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# psborrow2 0.0.5 + +- Fixed a bug where operating characteristics in no-borrowing scenarios + incorrectly varied by external drift +- Fixed a bug with the fixed power prior +- Additional test cases + # psborrow2 0.0.4 - Additional bug fixes impacting continuous endpoints diff --git a/R/analysis_class.R b/R/analysis_class.R index 4ec6057d..f08971d1 100644 --- a/R/analysis_class.R +++ b/R/analysis_class.R @@ -67,20 +67,24 @@ setMethod( cat("Data: Matrix with", nrow(object@data_matrix), "observations \n") cat( - " - ", sum(object@data_matrix[, get_vars(object@treatment)] == 0 & + " - ", + sum(object@data_matrix[, get_vars(object@treatment)] == 0 & object@data_matrix[, get_vars(object@borrowing)["ext_flag_col"]] == 0), " internal controls\n" ) cat( - " - ", sum(object@data_matrix[, get_vars(object@treatment)] == 0 & + " - ", + sum(object@data_matrix[, get_vars(object@treatment)] == 0 & object@data_matrix[, get_vars(object@borrowing)["ext_flag_col"]] == 1), - " external controls", ifelse(is(object@borrowing, "BorrowingNone"), + " external controls", + ifelse(is(object@borrowing, "BorrowingNone"), " (ignored in this analysis)\n", "\n" ) ) cat( - " - ", sum(object@data_matrix[, get_vars(object@treatment)] == 1 & + " - ", + sum(object@data_matrix[, get_vars(object@treatment)] == 1 & object@data_matrix[, get_vars(object@borrowing)["ext_flag_col"]] == 0), " internal experimental\n\n" ) diff --git a/R/borrowing_details.R b/R/borrowing_details.R index d3b63208..5c2bcc60 100644 --- a/R/borrowing_details.R +++ b/R/borrowing_details.R @@ -2,13 +2,13 @@ #' #' Please use one of `borrowing_hierarchical_commensurate()`, `borrowing_none()`, or `borrowing_full()` instead. #' @export -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `borrowing_details()` is deprecated and that +#' This function does not return a value. When called, it triggers an error +#' message indicating that `borrowing_details()` is deprecated and that #' one of `borrowing_hierarchical_commensurate()`, `borrowing_none()`, or #' `borrowing_full()` should be used instead. -#' +#' #' @param ... Deprecated arguments to `borrowing_details`. borrowing_details <- function(...) { .Defunct( diff --git a/R/borrowing_hierarchical_commensurate.R b/R/borrowing_hierarchical_commensurate.R index f901e0ee..76ceedc3 100644 --- a/R/borrowing_hierarchical_commensurate.R +++ b/R/borrowing_hierarchical_commensurate.R @@ -74,7 +74,8 @@ #' __Pharmaceut. Statist., 13: 41--54__. \doi{10.1002/pst.1589} #' #' Hobbes, B.P., Carlin, B.P., Mandrekar, S.J. and Sargent, D.J. (2011), -#' Hierarchical commensurate and power prior models for adaptive incorporation of historical information in clinical trials. +#' Hierarchical commensurate and power prior models for adaptive +#' incorporation of historical information in clinical trials. #' __Biometrics, 67: 1047--1056__. \doi{10.1111/j.1541-0420.2011.01564.x} #' #' @return Object of class [`BorrowingHierarchicalCommensurate`][BorrowingHierarchicalCommensurate-class]. diff --git a/R/cast_mat_to_long_pem.R b/R/cast_mat_to_long_pem.R index ad06f671..f39b0cb9 100644 --- a/R/cast_mat_to_long_pem.R +++ b/R/cast_mat_to_long_pem.R @@ -20,7 +20,7 @@ #' "time", #' "cnsr", #' baseline_prior = prior_normal(0, 1000), -#' cut_points = c(1,2,3) +#' cut_points = c(1, 2, 3) #' ), #' borrowing = borrowing_hierarchical_commensurate( #' "ext", @@ -35,34 +35,43 @@ #' anls_obj <- psborrow2:::cast_mat_to_long_pem(anls) #' cast_mat_to_long_pem <- function(analysis_obj) { - ## Start with data.frame df <- as.data.frame(analysis_obj@data_matrix) cn <- colnames(df) ## Check cut points cut_points <- analysis_obj@outcome@cut_points - max_fup <- max(df[,analysis_obj@outcome@time_var]) + max_fup <- max(df[, analysis_obj@outcome@time_var]) cut_points_keep <- cut_points[cut_points < max_fup] if (length(cut_points_keep) < length(cut_points)) { - warning(paste0("Some cut points are greater than the maximum follow-up time of ", max_fup, ". These will be ignored.")) + warning( + paste0( + "Some cut points are greater than the maximum follow-up time of ", + max_fup, ". These will be ignored." + ) + ) } ## Did they use a protected name? if (any(c("psb2__period", "psb2__start", "psb2__status", "__period__") %in% cn)) { - stop("The column names 'psb2__period', 'psb2__status', 'psb2__start', and '__period__' are reserved when using PEM. Please rename your columns.") + stop( + "The column names 'psb2__period', 'psb2__status', 'psb2__start', ", + "and '__period__' are reserved when using PEM. Please rename your columns." + ) } ## Censorship flag -> event flag df$psb2__status <- 1 - df[, analysis_obj@outcome@cens_var] ## Create long data - long_df <- survival::survSplit(data = df, - cut = cut_points_keep, - event = "psb2__status", - episode = "psb2__period", - start = "psb2__tstart", - end = analysis_obj@outcome@time_var) + long_df <- survival::survSplit( + data = df, + cut = cut_points_keep, + event = "psb2__status", + episode = "psb2__period", + start = "psb2__tstart", + end = analysis_obj@outcome@time_var + ) names(long_df)[which(names(long_df) == "psb2__period")] <- "__period__" long_df[, analysis_obj@outcome@cens_var] <- 1 - long_df[, "psb2__status"] long_df[, "time"] <- long_df[, "time"] - long_df[, "psb2__tstart"] @@ -71,10 +80,9 @@ cast_mat_to_long_pem <- function(analysis_obj) { # Update data matrix analysis_obj@data_matrix <- long_mat - + # Update periods - analysis_obj@outcome@n_periods <- NROW(unique(long_df[,"__period__"])) + analysis_obj@outcome@n_periods <- NROW(unique(long_df[, "__period__"])) return(analysis_obj) - } diff --git a/R/cmdstan.R b/R/cmdstan.R index dafb855d..c3e8a717 100644 --- a/R/cmdstan.R +++ b/R/cmdstan.R @@ -34,7 +34,6 @@ check_cmdstanr <- function(check_sampling = FALSE) { } - #' @return `check_cmdstan()` returns `TRUE` if `CmdStan` seems to be installed, otherwise `FALSE` #' @describeIn check_cmdstanr Check if the `CmdStan` command line tools are available. #' @export diff --git a/R/helpers.R b/R/helpers.R index b44dd7b3..71dfda38 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -6,11 +6,11 @@ #' #' Plots the density values as a curve with the lower vertical limit set to 0. #' -#' +#' #' @return No return value, this function generates a plot in the current graphics device. #' #' @export -#' +#' #' @examples #' x <- seq(-2, 2, len = 100) #' y <- dnorm(x) diff --git a/R/load_and_interpolate_stan_model.R b/R/load_and_interpolate_stan_model.R index 40020c8b..a2c1c37a 100644 --- a/R/load_and_interpolate_stan_model.R +++ b/R/load_and_interpolate_stan_model.R @@ -8,7 +8,6 @@ #' @return String containing the interpolated Stan model #' @include outcome_surv_pem.R build_model_string <- function(template_domain, template_filename, outcome, borrowing, analysis_obj, ...) { - # Load the Stan template template <- load_stan_file(template_domain, template_filename) diff --git a/R/load_stan_file.R b/R/load_stan_file.R index 2ae510d7..e41519eb 100644 --- a/R/load_stan_file.R +++ b/R/load_stan_file.R @@ -1,19 +1,19 @@ #' Load a Stan `psborrow2` template #' #' This function loads a Stan template file from the package's 'inst/stan' directory. -#' @param ... subidrectories +#' @param ... subdirectories #' @return template string load_stan_file <- function(...) { # Construct the path within the package template_path <- system.file("stan", ..., package = "psborrow2") - + # Check if the file exists if (template_path == "") { stop("Template file not found at path: ", file.path("inst/stan", ...)) } - + # Read the content of the file template_content <- paste(readLines(template_path), collapse = "\n") - + return(template_content) } diff --git a/R/outcome_bin_logistic.R b/R/outcome_bin_logistic.R index 996d1847..ce0aedc9 100644 --- a/R/outcome_bin_logistic.R +++ b/R/outcome_bin_logistic.R @@ -43,9 +43,11 @@ #' The `baseline_prior` argument specifies the prior distribution for the #' baseline log odds. The interpretation of the `baseline_prior` differs #' slightly between borrowing methods selected. -#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: the `baseline_prior` for Bayesian Dynamic Borrowing refers +#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: +#' the `baseline_prior` for Bayesian Dynamic Borrowing refers #' to the log odds of the external control arm. -#' - \emph{Full borrowing} or \emph{No borrowing} using `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for +#' - \emph{Full borrowing} or \emph{No borrowing} using +#' `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for #' these borrowing methods refers to the log odds for the #' internal control arm. #' diff --git a/R/outcome_surv_exponential.R b/R/outcome_surv_exponential.R index 2a7d82ec..4a8ba903 100644 --- a/R/outcome_surv_exponential.R +++ b/R/outcome_surv_exponential.R @@ -44,9 +44,11 @@ #' The `baseline_prior` argument specifies the prior distribution for the #' baseline log hazard rate. The interpretation of the `baseline_prior` differs #' slightly between borrowing methods selected. -#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: the `baseline_prior` for Bayesian Dynamic Borrowing +#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: +#' the `baseline_prior` for Bayesian Dynamic Borrowing #' refers to the log hazard rate of the external control arm. -#' - \emph{Full borrowing} or \emph{No borrowing} using `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for +#' - \emph{Full borrowing} or \emph{No borrowing} using +#' `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for #' these borrowing methods refers to the log hazard rate for the #' internal control arm. #' @return Object of class [`OutcomeSurvExponential`][OutcomeSurvExponential-class]. diff --git a/R/outcome_surv_pem.R b/R/outcome_surv_pem.R index 60712182..82e0e69f 100644 --- a/R/outcome_surv_pem.R +++ b/R/outcome_surv_pem.R @@ -49,8 +49,10 @@ #' See `Details` for more information. #' @param cut_points numeric. Vector of internal cut points for the piecewise exponential model. Note: the choice of #' cut points will impact the amount of borrowing between arms when dynamic borrowing methods are selected. It is -#' recommended to choose cut points that contain an equal number of events within each interval. Please include only internal -#' cut points in the vector. For instance, for cut points of \[0, 15], (15, 20\], (20, Inf), the vector should be c(15, 20). +#' recommended to choose cut points that contain an equal number of events +#' within each interval. Please include only internal +#' cut points in the vector. For instance, for cut points of +#' \[0, 15], (15, 20\], (20, Inf), the vector should be c(15, 20). #' If you pass cut-points beyond the follow-up of the data, you will receive an informative warning when calling #' `create_analysis_object()` and these cut points will be ignored. #' @@ -62,10 +64,13 @@ #' consider different baseline priors within each cut point. #' The interpretation of the `baseline_prior` differs #' slightly between borrowing methods selected. -#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: the `baseline_prior` for Bayesian Dynamic Borrowing +#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: +#' the `baseline_prior` for Bayesian Dynamic Borrowing #' refers to the log hazard rate of the external control arm. -#' - \emph{Full borrowing} or \emph{No borrowing} using `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for -#' these borrowing methods refers to the log hazard rate for the internal control arm. +#' - \emph{Full borrowing} or \emph{No borrowing} using +#' `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for +#' these borrowing methods refers to the log hazard rate for the +#' internal control arm. #' @return Object of class [`OutcomeSurvPEM`][OutcomeSurvPEM-class]. #' @export #' @family outcome models @@ -78,7 +83,6 @@ #' cut_points = c(10, 15, 30) #' ) outcome_surv_pem <- function(time_var, cens_var, baseline_prior, weight_var = "", cut_points) { - # Standard input checks assert_string(time_var) assert_string(cens_var) @@ -94,8 +98,11 @@ outcome_surv_pem <- function(time_var, cens_var, baseline_prior, weight_var = "" cut_points_neg0 <- any(cut_points <= 0) cut_points_inf <- any(cut_points == Inf) - if (cut_points_neg0 | cut_points_inf) { - stop("`cut_points` must be positive, non-infinite and exclude 0. Just put internal cutpoints, the model will automatically add 0 and Inf.") + if (cut_points_neg0 || cut_points_inf) { + stop( + "`cut_points` must be positive, non-infinite and exclude 0. ", + "Just put internal cutpoints, the model will automatically add 0 and Inf." + ) } n_cuts <- length(cut_points) diff --git a/R/outcome_surv_weibull_ph.R b/R/outcome_surv_weibull_ph.R index add599c6..5cc8edc9 100644 --- a/R/outcome_surv_weibull_ph.R +++ b/R/outcome_surv_weibull_ph.R @@ -53,9 +53,11 @@ #' The `baseline_prior` argument specifies the prior distribution for the #' baseline log hazard rate. The interpretation of the `baseline_prior` differs #' slightly between borrowing methods selected. -#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: the `baseline_prior` for Bayesian Dynamic Borrowing +#' - \emph{Dynamic borrowing using `borrowing_hierarchical_commensurate()`}: +#' the `baseline_prior` for Bayesian Dynamic Borrowing #' refers to the log hazard rate of the external control arm. -#' - \emph{Full borrowing} or \emph{No borrowing} using `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for +#' - \emph{Full borrowing} or \emph{No borrowing} using +#' `borrowing_full()` or `borrowing_none()`: the `baseline_prior` for #' these borrowing methods refers to the log hazard rate for the #' internal control arm. #' diff --git a/R/prior_bernoulli.R b/R/prior_bernoulli.R index 8b25a0e1..04d4188c 100644 --- a/R/prior_bernoulli.R +++ b/R/prior_bernoulli.R @@ -85,10 +85,10 @@ setMethod( #' @param ... Deprecated arguments to `bernoulli_prior()`. #' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `bernoulli_prior()` is deprecated and that +#' This function does not return a value. When called, it triggers an error +#' message indicating that `bernoulli_prior()` is deprecated and that #' `prior_bernoulli()` should be used instead. -#' +#' #' @export bernoulli_prior <- function(...) { .Defunct( diff --git a/R/prior_beta.R b/R/prior_beta.R index 2f4ce346..8b15f6c8 100644 --- a/R/prior_beta.R +++ b/R/prior_beta.R @@ -90,12 +90,12 @@ setMethod( #' #' Please use `prior_beta()` instead. #' @param ... Deprecated arguments to `beta_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `beta_prior()` is deprecated and that +#' This function does not return a value. When called, it triggers an error +#' message indicating that `beta_prior()` is deprecated and that #' `prior_beta()` should be used instead. -#' +#' #' @export beta_prior <- function(...) { .Defunct( diff --git a/R/prior_cauchy.R b/R/prior_cauchy.R index 79fd8aab..faff84a4 100644 --- a/R/prior_cauchy.R +++ b/R/prior_cauchy.R @@ -87,17 +87,16 @@ setMethod( ) - #' Legacy function for the cauchy prior #' #' Please use `prior_cauchy()` instead. #' @param ... Deprecated arguments to `cauchy_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `cauchy_prior()` is deprecated and that +#' This function does not return a value. When called, it triggers an error +#' message indicating that `cauchy_prior()` is deprecated and that #' `prior_cauchy()` should be used instead. -#' +#' #' @export cauchy_prior <- function(...) { .Defunct( diff --git a/R/prior_exponential.R b/R/prior_exponential.R index ba8dca89..d67d3196 100644 --- a/R/prior_exponential.R +++ b/R/prior_exponential.R @@ -85,12 +85,12 @@ setMethod( #' #' Please use `prior_exponential()` instead. #' @param ... Deprecated arguments to `exponential_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `exponential_prior()` is deprecated and that +#' This function does not return a value. When called, it triggers an error +#' message indicating that `exponential_prior()` is deprecated and that #' `prior_exponential()` should be used instead. -#' +#' #' @export exponential_prior <- function(...) { .Defunct( diff --git a/R/prior_gamma.R b/R/prior_gamma.R index 2ed645b3..e2e7d269 100644 --- a/R/prior_gamma.R +++ b/R/prior_gamma.R @@ -90,11 +90,11 @@ setMethod( #' #' Please use `prior_gamma()` instead. #' @param ... Deprecated arguments to `gamma_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `gamma_prior()` is deprecated and that -#' `prior_gamma()` should be used instead. +#' This function does not return a value. When called, it triggers an error +#' message indicating that `gamma_prior()` is deprecated and that +#' `prior_gamma()` should be used instead. #' #' @export gamma_prior <- function(...) { diff --git a/R/prior_half_cauchy.R b/R/prior_half_cauchy.R index 4d319c65..925a5549 100644 --- a/R/prior_half_cauchy.R +++ b/R/prior_half_cauchy.R @@ -97,11 +97,11 @@ setMethod( #' #' Please use `prior_half_cauchy()` instead. #' @param ... Deprecated arguments to `half_cauchy_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `half_cauchy_prior()` is deprecated and that -#' `prior_half_cauchy()` should be used instead. +#' This function does not return a value. When called, it triggers an error +#' message indicating that `half_cauchy_prior()` is deprecated and that +#' `prior_half_cauchy()` should be used instead. #' #' @export half_cauchy_prior <- function(...) { diff --git a/R/prior_normal.R b/R/prior_normal.R index cf6683ae..45f22c7f 100644 --- a/R/prior_normal.R +++ b/R/prior_normal.R @@ -86,17 +86,16 @@ setMethod( ) - #' Legacy function for the normal prior #' #' Please use `prior_normal()` instead. #' @param ... Deprecated arguments to `normal_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `normal_prior()` is deprecated and that -#' `prior_normal()` should be used instead. -#' +#' This function does not return a value. When called, it triggers an error +#' message indicating that `normal_prior()` is deprecated and that +#' `prior_normal()` should be used instead. +#' #' @export normal_prior <- function(...) { .Defunct( @@ -111,12 +110,12 @@ normal_prior <- function(...) { #' #' Please use `prior_half_normal()` instead. #' @param ... Deprecated arguments to `half_normal_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `half_normal_prior()` is deprecated and that -#' `prior_half_normal()` should be used instead. -#' +#' This function does not return a value. When called, it triggers an error +#' message indicating that `half_normal_prior()` is deprecated and that +#' `prior_half_normal()` should be used instead. +#' #' @export half_normal_prior <- function(...) { .Defunct( diff --git a/R/prior_poisson.R b/R/prior_poisson.R index 7e535027..4663d2b8 100644 --- a/R/prior_poisson.R +++ b/R/prior_poisson.R @@ -82,17 +82,16 @@ setMethod( ) - #' Legacy function for the poisson prior #' #' Please use `prior_poisson()` instead. #' @param ... Deprecated arguments to `poisson_prior()`. -#' +#' #' @return -#' This function does not return a value. When called, it triggers an error -#' message indicating that `poisson_prior()` is deprecated and that -#' `prior_poisson()` should be used instead. -#' +#' This function does not return a value. When called, it triggers an error +#' message indicating that `poisson_prior()` is deprecated and that +#' `prior_poisson()` should be used instead. +#' #' @export poisson_prior <- function(...) { .Defunct( diff --git a/R/sim_borrowing_list.R b/R/sim_borrowing_list.R index 62bd56a5..c9fee170 100644 --- a/R/sim_borrowing_list.R +++ b/R/sim_borrowing_list.R @@ -18,7 +18,8 @@ guide = "data.frame" ), validity = function(object) { - if (!all(vapply(object@borrowing_list, + if (!all(vapply( + object@borrowing_list, function(item) is(item, "Borrowing"), FUN.VALUE = logical(1) ))) { @@ -46,7 +47,7 @@ #' by `borrowing_full()`, `borrowing_none()`, or `borrowing_hierarchical_commensurate()`. #' #' @return Object of class [`SimBorrowingList`][SimBorrowingList-class]. -#' +#' #' @export #' #' @family simulation classes diff --git a/R/sim_covariate_list.R b/R/sim_covariate_list.R index ce9407f8..ee7c30b4 100644 --- a/R/sim_covariate_list.R +++ b/R/sim_covariate_list.R @@ -17,7 +17,8 @@ guide = "data.frame" ), validity = function(object) { - if (!all(vapply(object@covariate_list, + if (!all(vapply( + object@covariate_list, function(item) is(item, "CovariatesOrNULL"), FUN.VALUE = logical(1) ))) { diff --git a/R/sim_data_list.R b/R/sim_data_list.R index 41d96130..7d6a60c2 100644 --- a/R/sim_data_list.R +++ b/R/sim_data_list.R @@ -172,9 +172,9 @@ #' drift = "driftOR", #' index = "ind" #' ) -#' +#' #' @return Object of class [`SimDataList`][SimDataList-class]. -#' +#' #' @export sim_data_list <- function(data_list, guide, @@ -212,7 +212,7 @@ setMethod( # c ---- #' @rdname c -#' +#' #' @return list of [`SimDataList`][SimDataList-class] objects. #' @export setMethod( diff --git a/R/sim_outcome_list.R b/R/sim_outcome_list.R index 78093017..4b741a22 100644 --- a/R/sim_outcome_list.R +++ b/R/sim_outcome_list.R @@ -18,7 +18,8 @@ guide = "data.frame" ), validity = function(object) { - if (!all(vapply(object@outcome_list, + if (!all(vapply( + object@outcome_list, function(item) is(item, "Outcome"), FUN.VALUE = logical(1) ))) { @@ -45,7 +46,7 @@ #' by `outcome_details()`. #' #' @family simulation classes -#' +#' #' @return Object of class [`SimOutcomeList`][SimOutcomeList-class]. #' #' @export diff --git a/R/sim_treatment_list.R b/R/sim_treatment_list.R index 429a1157..9b371baf 100644 --- a/R/sim_treatment_list.R +++ b/R/sim_treatment_list.R @@ -16,7 +16,8 @@ guide = "data.frame" ), validity = function(object) { - if (!all(vapply(object@treatment_list, + if (!all(vapply( + object@treatment_list, function(item) is(item, "Treatment"), FUN.VALUE = logical(1) ))) { diff --git a/R/simulate_data.R b/R/simulate_data.R index 11b8d6c5..b5ca3735 100644 --- a/R/simulate_data.R +++ b/R/simulate_data.R @@ -147,7 +147,6 @@ create_event_dist <- function(dist = NULL, } - #' No specified event distribution #' #' @return `null_event_dist` returns an object with no parameters specified that does not simulate event times. diff --git a/R/simulation_class.R b/R/simulation_class.R index f2e7c83c..3e60234c 100644 --- a/R/simulation_class.R +++ b/R/simulation_class.R @@ -53,10 +53,12 @@ setMethod( definition = function(object) { ready_to_sample <- all(vapply( object@analysis_obj_list, - \(level_1) all(vapply( - level_1, - \(level_2) level_2@ready_to_sample, logical(1) - )), logical(1) + \(level_1) { + all(vapply( + level_1, + \(level_2) level_2@ready_to_sample, logical(1) + )) + }, logical(1) )) if (!ready_to_sample) { cat( diff --git a/man/borrowing_hierarchical_commensurate.Rd b/man/borrowing_hierarchical_commensurate.Rd index b85f5510..e2f680a1 100644 --- a/man/borrowing_hierarchical_commensurate.Rd +++ b/man/borrowing_hierarchical_commensurate.Rd @@ -65,6 +65,7 @@ Use of historical control data for assessing treatment effects in clinical trial \strong{Pharmaceut. Statist., 13: 41--54}. \doi{10.1002/pst.1589} Hobbes, B.P., Carlin, B.P., Mandrekar, S.J. and Sargent, D.J. (2011), -Hierarchical commensurate and power prior models for adaptive incorporation of historical information in clinical trials. +Hierarchical commensurate and power prior models for adaptive +incorporation of historical information in clinical trials. \strong{Biometrics, 67: 1047--1056}. \doi{10.1111/j.1541-0420.2011.01564.x} } diff --git a/man/load_stan_file.Rd b/man/load_stan_file.Rd index d73c9851..5e59fc19 100644 --- a/man/load_stan_file.Rd +++ b/man/load_stan_file.Rd @@ -7,7 +7,7 @@ load_stan_file(...) } \arguments{ -\item{...}{subidrectories} +\item{...}{subdirectories} } \value{ template string diff --git a/man/outcome_bin_logistic.Rd b/man/outcome_bin_logistic.Rd index 4b820e11..81ec0c99 100644 --- a/man/outcome_bin_logistic.Rd +++ b/man/outcome_bin_logistic.Rd @@ -29,9 +29,11 @@ The \code{baseline_prior} argument specifies the prior distribution for the baseline log odds. The interpretation of the \code{baseline_prior} differs slightly between borrowing methods selected. \itemize{ -\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: the \code{baseline_prior} for Bayesian Dynamic Borrowing refers +\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: +the \code{baseline_prior} for Bayesian Dynamic Borrowing refers to the log odds of the external control arm. -\item \emph{Full borrowing} or \emph{No borrowing} using \code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for +\item \emph{Full borrowing} or \emph{No borrowing} using +\code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for these borrowing methods refers to the log odds for the internal control arm. } diff --git a/man/outcome_surv_exponential.Rd b/man/outcome_surv_exponential.Rd index 19940211..e3e52c75 100644 --- a/man/outcome_surv_exponential.Rd +++ b/man/outcome_surv_exponential.Rd @@ -30,9 +30,11 @@ The \code{baseline_prior} argument specifies the prior distribution for the baseline log hazard rate. The interpretation of the \code{baseline_prior} differs slightly between borrowing methods selected. \itemize{ -\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: the \code{baseline_prior} for Bayesian Dynamic Borrowing +\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: +the \code{baseline_prior} for Bayesian Dynamic Borrowing refers to the log hazard rate of the external control arm. -\item \emph{Full borrowing} or \emph{No borrowing} using \code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for +\item \emph{Full borrowing} or \emph{No borrowing} using +\code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for these borrowing methods refers to the log hazard rate for the internal control arm. } diff --git a/man/outcome_surv_pem.Rd b/man/outcome_surv_pem.Rd index dd1008bc..e49c7d6c 100644 --- a/man/outcome_surv_pem.Rd +++ b/man/outcome_surv_pem.Rd @@ -25,8 +25,10 @@ See \code{Details} for more information.} \item{cut_points}{numeric. Vector of internal cut points for the piecewise exponential model. Note: the choice of cut points will impact the amount of borrowing between arms when dynamic borrowing methods are selected. It is -recommended to choose cut points that contain an equal number of events within each interval. Please include only internal -cut points in the vector. For instance, for cut points of [0, 15], (15, 20], (20, Inf), the vector should be c(15, 20). +recommended to choose cut points that contain an equal number of events +within each interval. Please include only internal +cut points in the vector. For instance, for cut points of +[0, 15], (15, 20], (20, Inf), the vector should be c(15, 20). If you pass cut-points beyond the follow-up of the data, you will receive an informative warning when calling \code{create_analysis_object()} and these cut points will be ignored.} } @@ -45,10 +47,13 @@ consider different baseline priors within each cut point. The interpretation of the \code{baseline_prior} differs slightly between borrowing methods selected. \itemize{ -\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: the \code{baseline_prior} for Bayesian Dynamic Borrowing +\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: +the \code{baseline_prior} for Bayesian Dynamic Borrowing refers to the log hazard rate of the external control arm. -\item \emph{Full borrowing} or \emph{No borrowing} using \code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for -these borrowing methods refers to the log hazard rate for the internal control arm. +\item \emph{Full borrowing} or \emph{No borrowing} using +\code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for +these borrowing methods refers to the log hazard rate for the +internal control arm. } } } diff --git a/man/outcome_surv_weibull_ph.Rd b/man/outcome_surv_weibull_ph.Rd index 4a4634cf..669cf367 100644 --- a/man/outcome_surv_weibull_ph.Rd +++ b/man/outcome_surv_weibull_ph.Rd @@ -39,9 +39,11 @@ The \code{baseline_prior} argument specifies the prior distribution for the baseline log hazard rate. The interpretation of the \code{baseline_prior} differs slightly between borrowing methods selected. \itemize{ -\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: the \code{baseline_prior} for Bayesian Dynamic Borrowing +\item \emph{Dynamic borrowing using \code{borrowing_hierarchical_commensurate()}}: +the \code{baseline_prior} for Bayesian Dynamic Borrowing refers to the log hazard rate of the external control arm. -\item \emph{Full borrowing} or \emph{No borrowing} using \code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for +\item \emph{Full borrowing} or \emph{No borrowing} using +\code{borrowing_full()} or \code{borrowing_none()}: the \code{baseline_prior} for these borrowing methods refers to the log hazard rate for the internal control arm. } diff --git a/tests/testthat/test-cast_mat_to_long_pem.R b/tests/testthat/test-cast_mat_to_long_pem.R index d089e609..100223c5 100644 --- a/tests/testthat/test-cast_mat_to_long_pem.R +++ b/tests/testthat/test-cast_mat_to_long_pem.R @@ -1,4 +1,3 @@ - anls <- psborrow2:::.analysis_obj( data_matrix = example_matrix, outcome = outcome_surv_pem( @@ -28,8 +27,13 @@ test_that("cast_mat_to_long_pem handles cut points correctly", { borrowing = borrowing_hierarchical_commensurate("ext", prior_exponential(.001)), treatment = treatment_details("trt", prior_normal(0, 1000)) ) - expect_warning(result <- cast_mat_to_long_pem(anls_cut), "Some cut points are greater than the maximum follow-up time") - cut_points_keep <- anls_cut@outcome@cut_points[anls_cut@outcome@cut_points < max(anls_cut@data_matrix[, anls_cut@outcome@time_var])] + expect_warning( + result <- cast_mat_to_long_pem(anls_cut), + "Some cut points are greater than the maximum follow-up time" + ) + cut_points_keep <- anls_cut@outcome@cut_points[ + anls_cut@outcome@cut_points < max(anls_cut@data_matrix[, anls_cut@outcome@time_var]) + ] expect_equal(length(cut_points_keep) + 1, result@outcome@n_periods) }) diff --git a/tests/testthat/test-create_simulation_obj.R b/tests/testthat/test-create_simulation_obj.R index e33f5fe7..3f497bcb 100644 --- a/tests/testthat/test-create_simulation_obj.R +++ b/tests/testthat/test-create_simulation_obj.R @@ -380,7 +380,6 @@ test_that("`create_simulation_obj()` does not create deep copies", { }) test_that("`show_guide()` gives correct output", { - valid_simulation_obj <- create_simulation_obj( data_matrix_list = valid_data_list, covariate = valid_covariate, @@ -410,10 +409,10 @@ test_that("`show_guide()` gives correct output", { 1, NROW(guide[ guide$true_hr == 0.6 & - guide$drift_hr == 1.0 & - guide$borrowing_scenario == "bdb" & - guide$covariate_scenario == "cov1" & - guide$treatment_scenario == "standard_tx", + guide$drift_hr == 1.0 & + guide$borrowing_scenario == "bdb" & + guide$covariate_scenario == "cov1" & + guide$treatment_scenario == "standard_tx", ]) ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-mcmc_sample-analysis.R b/tests/testthat/test-mcmc_sample-analysis.R index b034d0fc..eecc0314 100644 --- a/tests/testthat/test-mcmc_sample-analysis.R +++ b/tests/testthat/test-mcmc_sample-analysis.R @@ -603,7 +603,11 @@ test_that("mcmc_sample for Analysis works for full borrowing, piecewise exponent skip_on_ci() library(eha) cuts <- c(1, 5, 10) - pem_eha <- eha::pchreg(survival::Surv(time, status) ~ trt + cov1 + cov2, data = as.data.frame(psborrow2::example_matrix), cuts = c(0, cuts, 1000)) + pem_eha <- eha::pchreg( + survival::Surv(time, status) ~ trt + cov1 + cov2, + data = as.data.frame(psborrow2::example_matrix), + cuts = c(0, cuts, 1000) + ) full_pem_bayes_ao <- create_analysis_obj( data_matrix = example_matrix, diff --git a/tests/testthat/test-prepare_stan_data_inputs.R b/tests/testthat/test-prepare_stan_data_inputs.R index a44d60c3..a56e51e3 100644 --- a/tests/testthat/test-prepare_stan_data_inputs.R +++ b/tests/testthat/test-prepare_stan_data_inputs.R @@ -127,7 +127,6 @@ test_that("prepare_stan_data_inputs works with weights", { test_that("prepare_stan_data_inputs works with PEM", { - object <- psborrow2:::.analysis_obj( data_matrix = example_matrix, outcome = outcome_surv_pem( @@ -148,12 +147,10 @@ test_that("prepare_stan_data_inputs works with PEM", { expect_equal(names(result), c("N", "trt", "time", "cens", "N_periods", "Z0", "Z1")) expect_equal(dim(result[["Z0"]])[2], result[["N_periods"]]) expect_equal(dim(result[["Z1"]])[2], result[["N_periods"]]) - }) test_that("prepare_stan_data_inputs works with a fixed power prior", { - object <- psborrow2:::.analysis_obj( data_matrix = example_matrix, outcome = outcome_surv_exponential( @@ -172,6 +169,5 @@ test_that("prepare_stan_data_inputs works with a fixed power prior", { power <- result$power expect_atomic_vector(power) - expect_equal(sum(power==1), sum(1-example_matrix[,'ext'])) - + expect_equal(sum(power == 1), sum(1 - example_matrix[, "ext"])) }) diff --git a/tests/testthat/test-sim_covariate_list.R b/tests/testthat/test-sim_covariate_list.R index 7d728212..c5adfec8 100644 --- a/tests/testthat/test-sim_covariate_list.R +++ b/tests/testthat/test-sim_covariate_list.R @@ -117,7 +117,6 @@ test_that("Covariate `guide` is produced correctly", { }) - test_that("get_vars for `sim_covariate_list` works", { covariate_obj <- sim_covariate_list( list( diff --git a/vignettes/data_simulation.Rmd b/vignettes/data_simulation.Rmd index b35ec3a0..51d3261c 100644 --- a/vignettes/data_simulation.Rmd +++ b/vignettes/data_simulation.Rmd @@ -193,8 +193,8 @@ generate(with_age_score) #> [ reached 'max' / getOption("max.print") -- omitted 4 rows ] #> External Control #> patid ext trt age score score_high score_rounded -#> 151 1 0 58.40067 4.247311 0 4 -#> 152 1 0 57.90136 3.947487 0 4 +#> 151 1 0 58.37822 4.154307 0 4 +#> 152 1 0 57.86995 3.819286 0 4 #> [ reached 'max' / getOption("max.print") -- omitted 4 rows ] ``` diff --git a/vignettes/dataset.Rmd b/vignettes/dataset.Rmd index e870d509..5aa57930 100644 --- a/vignettes/dataset.Rmd +++ b/vignettes/dataset.Rmd @@ -409,12 +409,12 @@ results$summary() # # A tibble: 6 × 10 # variable mean median sd mad q5 q95 rhat ess_bulk # -# 1 lp__ -1618. -1618. 1.49 1.29 -1621. -1.62e+3 1.00 73444. -# 2 beta_trt -0.158 -0.159 0.198 0.197 -0.478 1.71e-1 1.00 86730. -# 3 alpha[1] -3.36 -3.35 0.161 0.160 -3.63 -3.10e+0 1.00 85882. -# 4 alpha[2] -2.40 -2.40 0.0557 0.0557 -2.49 -2.31e+0 1.00 129722. -# 5 tau 1.22 0.502 1.95 0.688 0.00433 4.79e+0 1.00 83471. -# 6 HR_trt 0.871 0.853 0.175 0.167 0.620 1.19e+0 1.00 86730. +# 1 lp__ -1618. -1618. 1.49 1.30 -1621. -1.62e+3 1.00 73716. +# 2 beta_trt -0.157 -0.159 0.199 0.198 -0.481 1.74e-1 1.00 85201. +# 3 alpha[1] -3.36 -3.35 0.162 0.161 -3.63 -3.10e+0 1.00 85683. +# 4 alpha[2] -2.40 -2.40 0.0554 0.0556 -2.49 -2.31e+0 1.00 124051. +# 5 tau 1.21 0.502 1.93 0.689 0.00442 4.81e+0 1.00 82311. +# 6 HR_trt 0.872 0.853 0.177 0.168 0.618 1.19e+0 1.00 85201. # # ℹ 1 more variable: ess_tail ``` @@ -448,41 +448,41 @@ print(draws) # # chain # iteration 1 2 3 4 -# 1 -1618 -1619 -1616 -1621 -# 2 -1620 -1620 -1623 -1621 -# 3 -1620 -1620 -1619 -1621 -# 4 -1618 -1620 -1618 -1619 -# 5 -1617 -1620 -1616 -1621 +# 1 -1617 -1618 -1622 -1618 +# 2 -1618 -1617 -1620 -1617 +# 3 -1619 -1617 -1619 -1616 +# 4 -1619 -1617 -1620 -1617 +# 5 -1619 -1617 -1616 -1619 # # , , variable = beta_trt # # chain -# iteration 1 2 3 4 -# 1 -0.22 -0.41 -0.21 -0.64 -# 2 -0.18 -0.46 -0.67 -0.68 -# 3 -0.13 -0.44 -0.56 0.52 -# 4 -0.27 -0.36 -0.37 0.23 -# 5 -0.44 -0.38 -0.18 0.04 +# iteration 1 2 3 4 +# 1 -0.246 -0.39 0.432 0.204 +# 2 -0.398 -0.12 0.280 0.140 +# 3 -0.073 -0.27 0.124 -0.015 +# 4 0.013 -0.33 0.074 0.111 +# 5 0.077 -0.18 -0.169 0.146 # # , , variable = alpha[1] # # chain # iteration 1 2 3 4 -# 1 -3.4 -3.0 -3.3 -3.0 -# 2 -3.3 -3.1 -2.9 -3.0 -# 3 -3.4 -3.0 -3.0 -3.9 -# 4 -3.3 -3.2 -3.3 -3.6 -# 5 -3.2 -3.2 -3.4 -3.4 +# 1 -3.2 -3.2 -3.8 -3.6 +# 2 -3.1 -3.5 -3.8 -3.5 +# 3 -3.5 -3.2 -3.7 -3.4 +# 4 -3.6 -3.1 -3.7 -3.5 +# 5 -3.5 -3.5 -3.3 -3.6 # # , , variable = alpha[2] # # chain # iteration 1 2 3 4 -# 1 -2.5 -2.3 -2.4 -2.4 -# 2 -2.5 -2.3 -2.5 -2.4 -# 3 -2.5 -2.3 -2.4 -2.4 -# 4 -2.4 -2.3 -2.3 -2.3 -# 5 -2.4 -2.3 -2.4 -2.4 +# 1 -2.4 -2.5 -2.3 -2.4 +# 2 -2.5 -2.5 -2.4 -2.4 +# 3 -2.4 -2.3 -2.3 -2.4 +# 4 -2.4 -2.4 -2.3 -2.4 +# 5 -2.5 -2.4 -2.4 -2.5 # # # ... with 49995 more iterations, and 2 more variables ``` @@ -499,12 +499,12 @@ summary(draws) # # A tibble: 6 × 10 # variable mean median sd mad q5 q95 rhat ess_bulk # -# 1 lp__ -1.62e+3 -1.62e+3 1.49 1.29 -1.62e+3 -1.62e+3 1.00 73444. -# 2 treatment lo… -1.58e-1 -1.59e-1 0.198 0.197 -4.78e-1 1.71e-1 1.00 86730. -# 3 baseline log… -3.36e+0 -3.35e+0 0.161 0.160 -3.63e+0 -3.10e+0 1.00 85882. -# 4 baseline log… -2.40e+0 -2.40e+0 0.0557 0.0557 -2.49e+0 -2.31e+0 1.00 129722. -# 5 commensurabi… 1.22e+0 5.02e-1 1.95 0.688 4.33e-3 4.79e+0 1.00 83471. -# 6 treatment HR 8.71e-1 8.53e-1 0.175 0.167 6.20e-1 1.19e+0 1.00 86730. +# 1 lp__ -1.62e+3 -1.62e+3 1.49 1.30 -1.62e+3 -1.62e+3 1.00 73716. +# 2 treatment lo… -1.57e-1 -1.59e-1 0.199 0.198 -4.81e-1 1.74e-1 1.00 85201. +# 3 baseline log… -3.36e+0 -3.35e+0 0.162 0.161 -3.63e+0 -3.10e+0 1.00 85683. +# 4 baseline log… -2.40e+0 -2.40e+0 0.0554 0.0556 -2.49e+0 -2.31e+0 1.00 124051. +# 5 commensurabi… 1.21e+0 5.02e-1 1.93 0.689 4.42e-3 4.81e+0 1.00 82311. +# 6 treatment HR 8.72e-1 8.53e-1 0.177 0.168 6.18e-1 1.19e+0 1.00 85201. # # ℹ 1 more variable: ess_tail ``` @@ -563,11 +563,11 @@ summarize_draws(draws, ~ quantile(.x, probs = c(0.1, 0.9))) # variable `10%` `90%` # # 1 lp__ -1620. -1616. -# 2 treatment log HR -0.409 0.0966 -# 3 baseline log hazard rate, internal -3.56 -3.15 +# 2 treatment log HR -0.410 0.0993 +# 3 baseline log hazard rate, internal -3.57 -3.15 # 4 baseline log hazard rate, external -2.47 -2.33 -# 5 commensurability parameter 0.0177 3.25 -# 6 treatment HR 0.664 1.10 +# 5 commensurability parameter 0.0174 3.23 +# 6 treatment HR 0.663 1.10 ``` Another useful application of the `posterior` package is the evaluation of the Monte Carlo @@ -577,8 +577,8 @@ standard error for quantiles of a variable of interest: ``` r vm <- extract_variable_matrix(draws, "treatment HR") mcse_quantile(x = vm, probs = c(0.1, 0.5, 0.9)) -# mcse_q10 mcse_q50 mcse_q90 -# 0.0006515 0.0006575 0.0012550 +# mcse_q10 mcse_q50 mcse_q90 +# 0.000636940 0.000662795 0.001266500 ``` `posterior` contains many other helpful functions, as outlined in their diff --git a/vignettes/figure-data_simulation-unnamed-chunk-6-1.png b/vignettes/figure-data_simulation-unnamed-chunk-6-1.png index 214b1545..38e95e72 100644 Binary files a/vignettes/figure-data_simulation-unnamed-chunk-6-1.png and b/vignettes/figure-data_simulation-unnamed-chunk-6-1.png differ diff --git a/vignettes/figure-dataset-mcmc-hist-1.png b/vignettes/figure-dataset-mcmc-hist-1.png index ff7dd688..fdb811ee 100644 Binary files a/vignettes/figure-dataset-mcmc-hist-1.png and b/vignettes/figure-dataset-mcmc-hist-1.png differ diff --git a/vignettes/figure-dataset-mcmc-trace-1.png b/vignettes/figure-dataset-mcmc-trace-1.png index adae99c3..ad9bfef7 100644 Binary files a/vignettes/figure-dataset-mcmc-trace-1.png and b/vignettes/figure-dataset-mcmc-trace-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png index 464be35d..bbc3a844 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png deleted file mode 100644 index 89cad4e6..00000000 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png and /dev/null differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png index 5df9cc8b..21a43c2a 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png index 3179a28c..a779b2ec 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png index 855e1349..cb9a4451 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png index d4df7b7b..7185a07f 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png index 6a8a315b..f44ada32 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png index 05a4ed09..9463e5c2 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png index 977dae80..e7c3bab6 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png index 8a244ff5..1d83b7eb 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png index 70988854..89f3386c 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png index f57d1079..5430e07b 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png index bbeae47b..a2f9d9ac 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png index 6fcfc670..630d1570 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png index 38b0ee75..f0a87fc6 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png index 2d12cfa8..039d6da2 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png index c789aa34..9e150fe9 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png index bb5297a0..0a90cdda 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png index 47ac790b..fd228858 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png index c7a0a386..6bd3b9ab 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png index b7ba9a0a..e6f3d154 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png index 1a60823b..fe2a5ef9 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png index ecf8694d..445fa65e 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png differ diff --git a/vignettes/figure-prior_distributions-unnamed-chunk-3-1.png b/vignettes/figure-prior_distributions-unnamed-chunk-3-1.png index 3c0e3c2c..d664a741 100644 Binary files a/vignettes/figure-prior_distributions-unnamed-chunk-3-1.png and b/vignettes/figure-prior_distributions-unnamed-chunk-3-1.png differ diff --git a/vignettes/figure-prior_distributions-unnamed-chunk-4-1.png b/vignettes/figure-prior_distributions-unnamed-chunk-4-1.png index 40a6e898..620f7174 100644 Binary files a/vignettes/figure-prior_distributions-unnamed-chunk-4-1.png and b/vignettes/figure-prior_distributions-unnamed-chunk-4-1.png differ diff --git a/vignettes/figure-propensity_scores-balplot-1.png b/vignettes/figure-propensity_scores-balplot-1.png index 2e495398..fa725c23 100644 Binary files a/vignettes/figure-propensity_scores-balplot-1.png and b/vignettes/figure-propensity_scores-balplot-1.png differ diff --git a/vignettes/figure-propensity_scores-loveplot-1.png b/vignettes/figure-propensity_scores-loveplot-1.png index 114bd95d..e7f8dd6b 100644 Binary files a/vignettes/figure-propensity_scores-loveplot-1.png and b/vignettes/figure-propensity_scores-loveplot-1.png differ diff --git a/vignettes/figure-propensity_scores-matchit_jitter-1.png b/vignettes/figure-propensity_scores-matchit_jitter-1.png index a97d49ed..b8c3f338 100644 Binary files a/vignettes/figure-propensity_scores-matchit_jitter-1.png and b/vignettes/figure-propensity_scores-matchit_jitter-1.png differ diff --git a/vignettes/figure-propensity_scores-plots-1.png b/vignettes/figure-propensity_scores-plots-1.png index 1c176de3..979d1953 100644 Binary files a/vignettes/figure-propensity_scores-plots-1.png and b/vignettes/figure-propensity_scores-plots-1.png differ diff --git a/vignettes/figure-simple_overview-unnamed-chunk-18-1.png b/vignettes/figure-simple_overview-unnamed-chunk-18-1.png index e902d84a..304f3ff0 100644 Binary files a/vignettes/figure-simple_overview-unnamed-chunk-18-1.png and b/vignettes/figure-simple_overview-unnamed-chunk-18-1.png differ diff --git a/vignettes/figure-simple_overview-unnamed-chunk-5-1.png b/vignettes/figure-simple_overview-unnamed-chunk-5-1.png index 66a10245..40a3c400 100644 Binary files a/vignettes/figure-simple_overview-unnamed-chunk-5-1.png and b/vignettes/figure-simple_overview-unnamed-chunk-5-1.png differ diff --git a/vignettes/figure-simple_overview-unnamed-chunk-7-1.png b/vignettes/figure-simple_overview-unnamed-chunk-7-1.png index 2d8f6963..dc4317ea 100644 Binary files a/vignettes/figure-simple_overview-unnamed-chunk-7-1.png and b/vignettes/figure-simple_overview-unnamed-chunk-7-1.png differ diff --git a/vignettes/figure-simple_overview-unnamed-chunk-7-2.png b/vignettes/figure-simple_overview-unnamed-chunk-7-2.png index 9debcdd3..61bd425d 100644 Binary files a/vignettes/figure-simple_overview-unnamed-chunk-7-2.png and b/vignettes/figure-simple_overview-unnamed-chunk-7-2.png differ diff --git a/vignettes/figure-simple_overview-unnamed-chunk-7-3.png b/vignettes/figure-simple_overview-unnamed-chunk-7-3.png index 8d23bc09..6b387656 100644 Binary files a/vignettes/figure-simple_overview-unnamed-chunk-7-3.png and b/vignettes/figure-simple_overview-unnamed-chunk-7-3.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-23-1.png b/vignettes/figure-simulation_study-unnamed-chunk-23-1.png index 3e55d9ff..1e69efd1 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-23-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-23-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-24-1.png b/vignettes/figure-simulation_study-unnamed-chunk-24-1.png index 1382ff45..33e45752 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-24-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-24-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-25-1.png b/vignettes/figure-simulation_study-unnamed-chunk-25-1.png index bcb9327b..08222bae 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-25-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-25-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-26-1.png b/vignettes/figure-simulation_study-unnamed-chunk-26-1.png index bac60e8e..6a8328b1 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-26-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-26-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-27-1.png b/vignettes/figure-simulation_study-unnamed-chunk-27-1.png index e4251ceb..39c8b8d7 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-27-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-27-1.png differ diff --git a/vignettes/figure-weighting-exp_plot-1.png b/vignettes/figure-weighting-exp_plot-1.png index dd43f5a4..afd9060d 100644 Binary files a/vignettes/figure-weighting-exp_plot-1.png and b/vignettes/figure-weighting-exp_plot-1.png differ diff --git a/vignettes/figure-weighting-logistic_plot-1.png b/vignettes/figure-weighting-logistic_plot-1.png index 71456b7e..bef4c132 100644 Binary files a/vignettes/figure-weighting-logistic_plot-1.png and b/vignettes/figure-weighting-logistic_plot-1.png differ diff --git a/vignettes/match_weight_01_methods.Rmd b/vignettes/match_weight_01_methods.Rmd index a393f241..225d25f2 100644 --- a/vignettes/match_weight_01_methods.Rmd +++ b/vignettes/match_weight_01_methods.Rmd @@ -4,7 +4,7 @@ bibliography: references.bib author: - Manoj Khanal - Eli Lilly & Company -date: "2025-05-07" +date: "2026-05-12" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{1. Matching and Weighting Methods} @@ -14,13 +14,13 @@ vignette: > -Using external control in the analysis of clinical trial data can be challenging as the two data source may be heterogenous. In this document, we demonstrate the use of various matching and weighting techniques using readily available packages in R to adjust for imbalance in baseline covariates between the data sets. +Using external control in the analysis of clinical trial data can be challenging as the two data source may be heterogeneous. In this document, we demonstrate the use of various matching and weighting techniques using readily available packages in R to adjust for imbalance in baseline covariates between the data sets. We first load all of the libraries used in this tutorial. ``` r -#Loading the libraries +# Loading the libraries library(SimMultiCorrData) library(ebal) library(ggplot2) @@ -70,30 +70,32 @@ Other variables in the simulated data include To generate the covariates, we first specify the sample sizes, number of continuous and categorical variables, the marginal moments of the covariates, and the correlation matrix. Note that rcorrvar2 requires the correlation matrix to be ordered as ordinal, continuous, Poisson, and Negative Binomial. ``` r -#Simulate correlated covariates -n_t <- 600 #Sample size in trial data -n_ec <- 500 #Sample size in external control data -k_cont <- 5 #Number of continuous variables -k_cat <- 3 #Number of categorical variables -means_cont_tc <- c(55,150,65,35,50) #Vector of means of continuous variables -vars_cont_tc <- c(15,10,6,5,6) -marginal_tc = list(0.2,0.8,0.3) +# Simulate correlated covariates +n_t <- 600 # Sample size in trial data +n_ec <- 500 # Sample size in external control data +k_cont <- 5 # Number of continuous variables +k_cat <- 3 # Number of categorical variables +means_cont_tc <- c(55, 150, 65, 35, 50) # Vector of means of continuous variables +vars_cont_tc <- c(15, 10, 6, 5, 6) +marginal_tc <- list(0.2, 0.8, 0.3) rho_tc <- matrix(0.2, 8, 8) diag(rho_tc) <- 1 -skews_tc <- rep(0.2,5) -skurts_tc <- rep(0.1,5) +skews_tc <- rep(0.2, 5) +skurts_tc <- rep(0.1, 5) ``` # Simulating trial data After specifying the moments of the covariate distribution, we simulate the covariates using `rcorrvar2` function from `SimMultiCorrData` package. ``` r -#Simulating covariates -trial.data <- rcorrvar2(n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, - method = "Fleishman", seed=1, - means = means_cont_tc, vars = vars_cont_tc, # if continuous variables - skews = skews_tc, skurts = skurts_tc, - marginal=marginal_tc, rho = rho_tc) +# Simulating covariates +trial.data <- rcorrvar2( + n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, + method = "Fleishman", seed = 1, + means = means_cont_tc, vars = vars_cont_tc, # if continuous variables + skews = skews_tc, skurts = skurts_tc, + marginal = marginal_tc, rho = rho_tc +) #> #> Constants: Distribution 1 #> @@ -105,18 +107,22 @@ trial.data <- rcorrvar2(n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, #> #> Constants: Distribution 5 #> -#> Constants calculation time: 0.003 minutes +#> Constants calculation time: 0.001 minutes #> Intercorrelation calculation time: 0.001 minutes #> Error loop calculation time: 0 minutes -#> Total Simulation time: 0.003 minutes +#> Total Simulation time: 0.002 minutes ``` ``` r -trial.data <- data.frame(cbind(id=paste("TRIAL",1:n_t,sep=""), trial.data$continuous_variables, - ifelse(trial.data$ordinal_variables==1,1,0))) -colnames(trial.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomarker2","Smoker", - "Sex", "ECOG1") +trial.data <- data.frame(cbind( + id = paste("TRIAL", 1:n_t, sep = ""), trial.data$continuous_variables, + ifelse(trial.data$ordinal_variables == 1, 1, 0) +)) +colnames(trial.data) <- c( + "ID", "Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", + "Sex", "ECOG1" +) ``` We now simulate the survival outcome data using the Cox proportional hazards model [@cox1972regression] in which the hazard function $\lambda(t|\boldsymbol X, Z)$ is given by $$\lambda(t|\boldsymbol X, Z)=\lambda_0(t) \exp\{\boldsymbol X \boldsymbol \beta_{Trial} + Z \gamma\},$$ @@ -138,48 +144,48 @@ where $\lambda_0(t)=2$ is the baseline hazard function and assumed to be constan The survival function is given by $$S(t|\boldsymbol X, Z)=exp\{-\Lambda(t|\boldsymbol X, Z)\},$$ where $\Lambda(t|\boldsymbol X, Z)=\int_0^t\lambda(u|\boldsymbol X, Z)du$. The time to event is generated using a inverse CDF method. The censoring time is generated independently from an exponential distribution with `rate=1/4`. ``` r -#Simulate survival outcome using Cox proportional hazards regression model +# Simulate survival outcome using Cox proportional hazards regression model set.seed(1) u <- runif(1) -lambda0 <- 2 #constant baseline hazard -#Simulate treatment indicator in the trial data -trial.data$group <- rbinom(n_t,1,prob=0.7) -beta <- c(0.3,0.1,-0.3,-0.2,-0.12,0.3,1,-1,-0.4) -times <- -log(u)/(lambda0*exp(as.matrix(trial.data[,-1])%*%beta)) #Inverse CDF method -cens.time <- rexp(n_t,rate=1/4) #Censoring time from exponential distribution -event <- as.numeric(times <= cens.time) #Event indicator. 0 is censored. -time <- pmin(times,cens.time) +lambda0 <- 2 # constant baseline hazard +# Simulate treatment indicator in the trial data +trial.data$group <- rbinom(n_t, 1, prob = 0.7) +beta <- c(0.3, 0.1, -0.3, -0.2, -0.12, 0.3, 1, -1, -0.4) +times <- -log(u) / (lambda0 * exp(as.matrix(trial.data[, -1]) %*% beta)) # Inverse CDF method +cens.time <- rexp(n_t, rate = 1 / 4) # Censoring time from exponential distribution +event <- as.numeric(times <= cens.time) # Event indicator. 0 is censored. +time <- pmin(times, cens.time) ``` ``` r -#Combine trial data -trial.data <- data.frame(trial.data,time,event,data="TRIAL") +# Combine trial data +trial.data <- data.frame(trial.data, time, event, data = "TRIAL") ``` The first 10 observations in the RCT data is shown below. |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:-------|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:-----| -|TRIAL1 | 54.59830| 145.8227| 67.60183| 36.69103| 52.73474| 0| 1| 0| 1| 0.4089043| 0|TRIAL | -|TRIAL2 | 50.71835| 151.0235| 63.55137| 32.19172| 45.61235| 1| 1| 1| 1| 0.9721405| 0|TRIAL | -|TRIAL3 | 59.66273| 150.3079| 68.91083| 40.98128| 53.25314| 0| 1| 0| 0| 0.2956919| 0|TRIAL | -|TRIAL4 | 48.05746| 142.5048| 65.83782| 37.38280| 51.73119| 0| 1| 1| 1| 4.3111823| 0|TRIAL | -|TRIAL5 | 51.36553| 150.3589| 61.96020| 33.71680| 49.69150| 0| 1| 0| 0| 0.2644848| 0|TRIAL | -|TRIAL6 | 55.05345| 148.9918| 64.62284| 36.49923| 45.23599| 0| 1| 0| 0| 0.3103544| 0|TRIAL | -|TRIAL7 | 60.73487| 154.1394| 70.10825| 37.82151| 48.60835| 0| 0| 0| 1| 1.7177436| 0|TRIAL | -|TRIAL8 | 51.16550| 147.4829| 64.71295| 34.43417| 47.80836| 0| 1| 1| 1| 5.2652367| 0|TRIAL | -|TRIAL9 | 54.18983| 148.4076| 67.79324| 32.34567| 49.26449| 1| 1| 1| 1| 3.7070752| 1|TRIAL | -|TRIAL10 | 55.79565| 150.5489| 65.86911| 32.92278| 46.61912| 0| 1| 0| 1| 0.4210952| 1|TRIAL | +|TRIAL1 | 55.51419| 148.5092| 65.63899| 33.71898| 55.26178| 0| 1| 0| 1| 0.4089043| 0|TRIAL | +|TRIAL2 | 49.93326| 146.9441| 63.44883| 35.65828| 45.50928| 1| 1| 1| 1| 0.9721405| 0|TRIAL | +|TRIAL3 | 58.51588| 153.5435| 70.53193| 35.53710| 55.89283| 0| 1| 0| 0| 0.2956919| 0|TRIAL | +|TRIAL4 | 48.61226| 147.2873| 64.10615| 31.17663| 55.80523| 0| 1| 0| 1| 4.3111823| 0|TRIAL | +|TRIAL5 | 51.84943| 151.0312| 62.16361| 33.99677| 48.01981| 0| 1| 0| 0| 0.2644848| 0|TRIAL | +|TRIAL6 | 51.81743| 148.1115| 68.69766| 33.82398| 47.32652| 1| 1| 0| 0| 0.3103544| 0|TRIAL | +|TRIAL7 | 58.04648| 150.2572| 71.35831| 38.71276| 50.68694| 0| 1| 0| 1| 1.7177436| 0|TRIAL | +|TRIAL8 | 50.72777| 147.1815| 64.55223| 33.86884| 49.31299| 0| 1| 1| 1| 5.2652367| 0|TRIAL | +|TRIAL9 | 55.16073| 145.0733| 63.84238| 36.28519| 50.27354| 0| 1| 1| 1| 3.9595461| 1|TRIAL | +|TRIAL10 | 54.85184| 146.4757| 65.59890| 36.21950| 47.12495| 1| 1| 0| 1| 1.1788494| 1|TRIAL | The censoring and event rate in the RCT data is ``` r -table(trial.data$event)/nrow(trial.data) +table(trial.data$event) / nrow(trial.data) #> -#> 0 1 -#> 0.385 0.615 +#> 0 1 +#> 0.3883333 0.6116667 ``` The distribution of the outcome time is @@ -187,36 +193,38 @@ The distribution of the outcome time is ``` r summary(trial.data$time) #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 0.00771 0.31937 0.91603 1.61943 2.00923 15.21981 +#> 0.00771 0.35048 0.89904 1.61350 2.04652 21.27312 ``` The summary of each of the baseline covariates and their standardized mean difference between treatment arms is shown below. ``` r -myVars <- c("Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", - "ECOG1") +myVars <- c( + "Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", + "ECOG1" +) -## Vector of categorical variables +## Vector of categorical variables catVars <- c("Smoker", "Sex", "ECOG1", "group") -tab1 <- CreateTableOne(vars = myVars, strata = "group" , data = trial.data, factorVars = catVars) +tab1 <- CreateTableOne(vars = myVars, strata = "group", data = trial.data, factorVars = catVars) ``` ``` r -print(tab1,smd=TRUE) +print(tab1, smd = TRUE) #> Stratified by group #> 0 1 p test SMD #> n 178 422 -#> Age (mean (SD)) 54.89 (3.84) 55.05 (3.91) 0.651 0.041 -#> Weight (mean (SD)) 150.29 (3.12) 149.88 (3.18) 0.151 0.129 -#> Height (mean (SD)) 65.29 (2.56) 64.88 (2.40) 0.060 0.166 -#> Biomarker1 (mean (SD)) 35.28 (2.24) 34.88 (2.24) 0.048 0.177 -#> Biomarker2 (mean (SD)) 50.02 (2.52) 49.99 (2.41) 0.902 0.011 -#> Smoker = 1 (%) 34 (19.1) 87 (20.6) 0.756 0.038 -#> Sex = 1 (%) 148 (83.1) 332 (78.7) 0.254 0.114 -#> ECOG1 = 1 (%) 45 (25.3) 137 (32.5) 0.099 0.159 +#> Age (mean (SD)) 54.82 (4.00) 55.08 (3.83) 0.451 0.067 +#> Weight (mean (SD)) 150.18 (3.02) 149.92 (3.24) 0.357 0.084 +#> Height (mean (SD)) 65.30 (2.39) 64.88 (2.49) 0.056 0.173 +#> Biomarker1 (mean (SD)) 35.26 (2.38) 34.89 (2.16) 0.066 0.161 +#> Biomarker2 (mean (SD)) 50.21 (2.52) 49.91 (2.43) 0.181 0.119 +#> Smoker = 1 (%) 32 (18.0) 87 (20.6) 0.530 0.067 +#> Sex = 1 (%) 132 (74.2) 344 (81.5) 0.054 0.178 +#> ECOG1 = 1 (%) 44 (24.7) 139 (32.9) 0.057 0.182 ``` # Simulating external control data @@ -224,14 +232,16 @@ The same set of covariates $X$ were simulated for the external control data as t ``` r -means_cont_ec <- c(55+2,150-2,65-2,35+2,50-2) #Vector of means of continuous variables -vars_cont_ec <- c(14,10,5,5,5) -marginal_ec = list(0.3,0.7,0.4) -ext.cont.data <- rcorrvar2(n = n_ec, k_cont = k_cont, k_cat = k_cat, k_nb = 0, - method = "Fleishman", seed=3, - means = means_cont_ec, vars = vars_cont_ec, # if continuous variables - skews = skews_tc, skurts = skurts_tc, - marginal=marginal_ec, rho = rho_tc) +means_cont_ec <- c(55 + 2, 150 - 2, 65 - 2, 35 + 2, 50 - 2) # Vector of means of continuous variables +vars_cont_ec <- c(14, 10, 5, 5, 5) +marginal_ec <- list(0.3, 0.7, 0.4) +ext.cont.data <- rcorrvar2( + n = n_ec, k_cont = k_cont, k_cat = k_cat, k_nb = 0, + method = "Fleishman", seed = 3, + means = means_cont_ec, vars = vars_cont_ec, # if continuous variables + skews = skews_tc, skurts = skurts_tc, + marginal = marginal_ec, rho = rho_tc +) #> #> Constants: Distribution 1 #> @@ -244,35 +254,37 @@ ext.cont.data <- rcorrvar2(n = n_ec, k_cont = k_cont, k_cat = k_cat, k_nb = 0, #> Constants: Distribution 5 #> #> Constants calculation time: 0.001 minutes -#> Intercorrelation calculation time: 0 minutes +#> Intercorrelation calculation time: 0.001 minutes #> Error loop calculation time: 0 minutes #> Total Simulation time: 0.002 minutes ``` ``` r -ext.cont.data <- data.frame(cbind(id=paste("EC",1:n_ec,sep=""), ext.cont.data$continuous_variables, - ifelse(ext.cont.data$ordinal_variables==1,1,0))) -colnames(ext.cont.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomarker2","Smoker", "Sex", "ECOG1") +ext.cont.data <- data.frame(cbind( + id = paste("EC", 1:n_ec, sep = ""), ext.cont.data$continuous_variables, + ifelse(ext.cont.data$ordinal_variables == 1, 1, 0) +)) +colnames(ext.cont.data) <- c("ID", "Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", "ECOG1") ``` The same generating mechanism used for the RCT data was used to simulate survival time for the external controls. ``` r -#Simulate survival outcome using Cox proportional hazards regression model +# Simulate survival outcome using Cox proportional hazards regression model set.seed(1111) u <- runif(1) -lambda0 <- 6 #constant baseline hazard -beta <- c(-0.27,-0.1,0.3,0.2,0.1,-0.31,-1,1) -times <- -log(u)/(lambda0*exp(as.matrix(ext.cont.data[,-1])%*%beta)) #Inverse CDF method -cens.time <- rexp(n_ec,rate=3) #Censoring time from exponential distribution -event <- as.numeric(times <= cens.time) #Event indicator. 0 is censored. -time <- pmin(times,cens.time) - -#Simulate treatment indicator in the trial data +lambda0 <- 6 # constant baseline hazard +beta <- c(-0.27, -0.1, 0.3, 0.2, 0.1, -0.31, -1, 1) +times <- -log(u) / (lambda0 * exp(as.matrix(ext.cont.data[, -1]) %*% beta)) # Inverse CDF method +cens.time <- rexp(n_ec, rate = 3) # Censoring time from exponential distribution +event <- as.numeric(times <= cens.time) # Event indicator. 0 is censored. +time <- pmin(times, cens.time) + +# Simulate treatment indicator in the trial data group <- 0 -ext.cont.data <- data.frame(ext.cont.data,group,time,event,data="EC") +ext.cont.data <- data.frame(ext.cont.data, group, time, event, data = "EC") ``` The first 10 observations in the external control data is shown below. @@ -284,26 +296,26 @@ knitr::kable(head(ext.cont.data, 10)) |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:----|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:----| -|EC1 | 56.22299| 147.2548| 68.01663| 37.43258| 46.26655| 0| 1| 0| 0| 0.0254106| 1|EC | -|EC2 | 51.53777| 142.9052| 59.87302| 37.63385| 45.57254| 1| 1| 1| 0| 0.0275893| 1|EC | -|EC3 | 61.84501| 146.4668| 61.10423| 37.99792| 47.80536| 0| 0| 0| 0| 0.0269829| 0|EC | -|EC4 | 51.65163| 142.8451| 63.08088| 38.97152| 45.82025| 0| 0| 1| 0| 0.0021760| 1|EC | -|EC5 | 51.60718| 150.1815| 62.41377| 36.57902| 47.97870| 1| 1| 1| 0| 0.0263622| 1|EC | -|EC6 | 53.71580| 146.6556| 61.32083| 36.67672| 48.62139| 1| 1| 1| 0| 0.0417934| 1|EC | -|EC7 | 62.89995| 144.0576| 63.05230| 38.20200| 48.28190| 0| 1| 0| 0| 0.0368207| 0|EC | -|EC8 | 58.22797| 149.8744| 63.76035| 35.20858| 48.10369| 0| 1| 0| 0| 0.0380045| 0|EC | -|EC9 | 54.67203| 142.4845| 61.59342| 37.91245| 47.51907| 0| 1| 0| 0| 0.0379876| 0|EC | -|EC10 | 50.52381| 140.2529| 60.53011| 31.34492| 42.98157| 0| 1| 1| 0| 0.0441761| 1|EC | +|EC1 | 53.56019| 146.2147| 61.51467| 40.84616| 49.72928| 1| 1| 1| 0| 0.0140666| 1|EC | +|EC2 | 63.77695| 152.2142| 62.02556| 38.95175| 49.78448| 0| 0| 0| 0| 0.0316781| 0|EC | +|EC3 | 56.99771| 149.1450| 62.24326| 36.20215| 44.16568| 0| 1| 0| 0| 0.0269829| 0|EC | +|EC4 | 59.12390| 149.8345| 59.44829| 39.34725| 49.72269| 0| 1| 0| 0| 0.4541084| 1|EC | +|EC5 | 61.69661| 147.6965| 64.49722| 37.18740| 52.32832| 0| 1| 0| 0| 0.1916924| 1|EC | +|EC6 | 61.68715| 151.9682| 64.44483| 37.41585| 51.00122| 0| 1| 0| 0| 0.0889376| 0|EC | +|EC7 | 55.67649| 152.5314| 62.15795| 38.39580| 45.32034| 0| 1| 1| 0| 0.0368207| 0|EC | +|EC8 | 53.59508| 145.5144| 64.03617| 36.10634| 47.25504| 0| 1| 1| 0| 0.0150613| 1|EC | +|EC9 | 56.72507| 150.8987| 59.97046| 36.71559| 47.23557| 0| 0| 0| 0| 0.0379876| 0|EC | +|EC10 | 53.92706| 148.9728| 63.72740| 39.32683| 48.73124| 0| 0| 0| 0| 0.0115712| 1|EC | The censoring and event rate in the trial data is ``` r -table(ext.cont.data$event)/nrow(ext.cont.data) +table(ext.cont.data$event) / nrow(ext.cont.data) #> -#> 0 1 -#> 0.306 0.694 +#> 0 1 +#> 0.33 0.67 ``` The distribution of the outcome time is @@ -311,7 +323,7 @@ The distribution of the outcome time is ``` r summary(ext.cont.data$time) #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 1.655e-05 2.286e-02 5.027e-02 9.448e-02 1.115e-01 8.698e-01 +#> 1.655e-05 2.108e-02 5.736e-02 9.317e-02 1.281e-01 8.529e-01 ``` # Merging trial and external control data @@ -331,14 +343,14 @@ names(trial.data) #> [1] "ID" "Age" "Weight" "Height" "Biomarker1" #> [6] "Biomarker2" "Smoker" "Sex" "ECOG1" "group" #> [11] "time" "event" "data" -#MS I recommend adding a statement that forces the colnames to be the same, like names(ex.cont.data)[1:10]=names(trial.data)[1:10] +# MS I recommend adding a statement that forces the colnames to be the same, like names(ex.cont.data)[1:10]=names(trial.data)[1:10] ``` Now, we merge the two datasets. ``` r -final.data <- data.frame(bind_rows(trial.data,ext.cont.data)) +final.data <- data.frame(bind_rows(trial.data, ext.cont.data)) ``` @@ -351,16 +363,16 @@ knitr::kable(head(final.data, 10)) |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:-------|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:-----| -|TRIAL1 | 54.59830| 145.8227| 67.60183| 36.69103| 52.73474| 0| 1| 0| 1| 0.4089043| 0|TRIAL | -|TRIAL2 | 50.71835| 151.0235| 63.55137| 32.19172| 45.61235| 1| 1| 1| 1| 0.9721405| 0|TRIAL | -|TRIAL3 | 59.66273| 150.3079| 68.91083| 40.98128| 53.25314| 0| 1| 0| 0| 0.2956919| 0|TRIAL | -|TRIAL4 | 48.05746| 142.5048| 65.83782| 37.38280| 51.73119| 0| 1| 1| 1| 4.3111823| 0|TRIAL | -|TRIAL5 | 51.36553| 150.3589| 61.96020| 33.71680| 49.69150| 0| 1| 0| 0| 0.2644848| 0|TRIAL | -|TRIAL6 | 55.05345| 148.9918| 64.62284| 36.49923| 45.23599| 0| 1| 0| 0| 0.3103544| 0|TRIAL | -|TRIAL7 | 60.73487| 154.1394| 70.10825| 37.82151| 48.60835| 0| 0| 0| 1| 1.7177436| 0|TRIAL | -|TRIAL8 | 51.16550| 147.4829| 64.71295| 34.43417| 47.80836| 0| 1| 1| 1| 5.2652367| 0|TRIAL | -|TRIAL9 | 54.18983| 148.4076| 67.79324| 32.34567| 49.26449| 1| 1| 1| 1| 3.7070752| 1|TRIAL | -|TRIAL10 | 55.79565| 150.5489| 65.86911| 32.92278| 46.61912| 0| 1| 0| 1| 0.4210952| 1|TRIAL | +|TRIAL1 | 55.51419| 148.5092| 65.63899| 33.71898| 55.26178| 0| 1| 0| 1| 0.4089043| 0|TRIAL | +|TRIAL2 | 49.93326| 146.9441| 63.44883| 35.65828| 45.50928| 1| 1| 1| 1| 0.9721405| 0|TRIAL | +|TRIAL3 | 58.51588| 153.5435| 70.53193| 35.53710| 55.89283| 0| 1| 0| 0| 0.2956919| 0|TRIAL | +|TRIAL4 | 48.61226| 147.2873| 64.10615| 31.17663| 55.80523| 0| 1| 0| 1| 4.3111823| 0|TRIAL | +|TRIAL5 | 51.84943| 151.0312| 62.16361| 33.99677| 48.01981| 0| 1| 0| 0| 0.2644848| 0|TRIAL | +|TRIAL6 | 51.81743| 148.1115| 68.69766| 33.82398| 47.32652| 1| 1| 0| 0| 0.3103544| 0|TRIAL | +|TRIAL7 | 58.04648| 150.2572| 71.35831| 38.71276| 50.68694| 0| 1| 0| 1| 1.7177436| 0|TRIAL | +|TRIAL8 | 50.72777| 147.1815| 64.55223| 33.86884| 49.31299| 0| 1| 1| 1| 5.2652367| 0|TRIAL | +|TRIAL9 | 55.16073| 145.0733| 63.84238| 36.28519| 50.27354| 0| 1| 1| 1| 3.9595461| 1|TRIAL | +|TRIAL10 | 54.85184| 146.4757| 65.59890| 36.21950| 47.12495| 1| 1| 0| 1| 1.1788494| 1|TRIAL | @@ -373,16 +385,16 @@ knitr::kable(tail(final.data, 10)) | |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:----|:-----|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:----| -|1091 |EC491 | 53.18940| 143.3551| 57.69727| 35.72714| 47.69671| 1| 1| 1| 0| 0.1025154| 1|EC | -|1092 |EC492 | 56.51178| 155.0675| 66.71788| 41.77618| 48.43950| 0| 0| 0| 0| 0.0110014| 1|EC | -|1093 |EC493 | 63.14821| 143.9180| 60.76872| 36.69923| 47.32070| 0| 1| 1| 0| 0.3981900| 1|EC | -|1094 |EC494 | 57.53298| 143.4756| 61.48325| 36.09593| 45.08466| 1| 1| 1| 0| 0.1298631| 1|EC | -|1095 |EC495 | 53.64071| 146.6340| 65.04088| 35.80434| 44.99437| 0| 1| 1| 0| 0.0168014| 1|EC | -|1096 |EC496 | 49.79612| 149.9081| 60.27894| 33.37305| 47.14132| 1| 1| 1| 0| 0.0616193| 1|EC | -|1097 |EC497 | 56.79361| 147.6758| 62.65351| 42.68411| 44.67903| 0| 1| 0| 0| 0.0633529| 1|EC | -|1098 |EC498 | 59.93031| 147.8859| 66.40048| 37.26859| 47.07702| 0| 1| 1| 0| 0.0419240| 1|EC | -|1099 |EC499 | 52.88813| 148.9676| 60.61804| 36.08088| 51.92952| 0| 1| 1| 0| 0.0295196| 0|EC | -|1100 |EC500 | 55.23686| 148.4539| 62.51881| 36.02603| 49.34816| 0| 1| 1| 0| 0.0409076| 1|EC | +|1091 |EC491 | 69.53940| 159.5926| 67.71536| 39.26545| 52.12297| 0| 1| 0| 0| 0.8529239| 0|EC | +|1092 |EC492 | 53.47004| 138.5339| 57.64346| 33.99965| 45.82407| 1| 1| 1| 0| 0.0578393| 0|EC | +|1093 |EC493 | 56.55503| 152.2442| 63.43464| 37.62576| 44.14769| 1| 0| 0| 0| 0.1079433| 1|EC | +|1094 |EC494 | 60.55909| 152.8971| 64.70891| 41.42971| 48.20160| 0| 1| 0| 0| 0.1439600| 1|EC | +|1095 |EC495 | 57.59334| 147.7045| 63.79287| 41.56532| 51.17375| 1| 1| 0| 0| 0.0498988| 1|EC | +|1096 |EC496 | 61.84329| 147.6566| 67.33102| 36.45325| 52.91298| 0| 1| 0| 0| 0.0927341| 1|EC | +|1097 |EC497 | 63.15644| 146.0702| 59.24182| 39.62219| 46.17372| 1| 1| 0| 0| 0.1711318| 0|EC | +|1098 |EC498 | 55.08102| 148.2339| 63.41347| 40.50234| 48.40309| 1| 0| 0| 0| 0.0179590| 1|EC | +|1099 |EC499 | 57.80213| 149.0080| 63.27848| 32.43753| 49.81902| 1| 1| 0| 0| 0.0295196| 0|EC | +|1100 |EC500 | 56.92428| 148.5391| 63.78767| 35.68987| 49.34849| 0| 1| 0| 0| 0.0514581| 0|EC | @@ -390,23 +402,23 @@ We examine the standardized mean difference for covariates between the RCT and e ``` r -tab2 <- CreateTableOne(vars = myVars, strata = "data" , data = final.data, factorVars = catVars) +tab2 <- CreateTableOne(vars = myVars, strata = "data", data = final.data, factorVars = catVars) ``` ``` r -print(tab2,smd=TRUE) +print(tab2, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD #> n 500 600 -#> Age (mean (SD)) 57.00 (3.75) 55.00 (3.89) <0.001 0.524 -#> Weight (mean (SD)) 148.00 (3.16) 150.00 (3.17) <0.001 0.633 -#> Height (mean (SD)) 63.00 (2.23) 65.00 (2.45) <0.001 0.854 -#> Biomarker1 (mean (SD)) 37.00 (2.23) 35.00 (2.25) <0.001 0.894 -#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.00 (2.44) <0.001 0.856 -#> Smoker = 1 (%) 160 (32.0) 121 (20.2) <0.001 0.272 -#> Sex = 1 (%) 347 (69.4) 480 (80.0) <0.001 0.246 -#> ECOG1 = 1 (%) 197 (39.4) 182 (30.3) 0.002 0.191 +#> Age (mean (SD)) 57.00 (3.75) 55.00 (3.88) <0.001 0.524 +#> Weight (mean (SD)) 148.00 (3.17) 150.00 (3.17) <0.001 0.631 +#> Height (mean (SD)) 63.00 (2.24) 65.00 (2.47) <0.001 0.848 +#> Biomarker1 (mean (SD)) 37.00 (2.24) 35.00 (2.24) <0.001 0.895 +#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.00 (2.46) <0.001 0.853 +#> Smoker = 1 (%) 161 (32.2) 119 (19.8) <0.001 0.285 +#> Sex = 1 (%) 346 (69.2) 476 (79.3) <0.001 0.233 +#> ECOG1 = 1 (%) 193 (38.6) 183 (30.5) 0.006 0.171 ``` Note that the standardized mean difference for all covariates is large. Next, we will conduct matching/weighting approach to reduce difference in baseline characteristics. @@ -415,23 +427,23 @@ We also examine the standardized mean difference for covariates between the trea ``` r -tab2 <- CreateTableOne(vars = myVars, strata = "group" , data = final.data, factorVars = catVars) +tab2 <- CreateTableOne(vars = myVars, strata = "group", data = final.data, factorVars = catVars) ``` ``` r -print(tab2,smd=TRUE) +print(tab2, smd = TRUE) #> Stratified by group #> 0 1 p test SMD #> n 678 422 -#> Age (mean (SD)) 56.45 (3.88) 55.05 (3.91) <0.001 0.359 -#> Weight (mean (SD)) 148.60 (3.30) 149.88 (3.18) <0.001 0.395 -#> Height (mean (SD)) 63.60 (2.53) 64.88 (2.40) <0.001 0.518 -#> Biomarker1 (mean (SD)) 36.55 (2.36) 34.88 (2.24) <0.001 0.725 -#> Biomarker2 (mean (SD)) 48.53 (2.47) 49.99 (2.41) <0.001 0.599 -#> Smoker = 1 (%) 194 (28.6) 87 (20.6) 0.004 0.186 -#> Sex = 1 (%) 495 (73.0) 332 (78.7) 0.041 0.133 -#> ECOG1 = 1 (%) 242 (35.7) 137 (32.5) 0.303 0.068 +#> Age (mean (SD)) 56.43 (3.94) 55.08 (3.83) <0.001 0.347 +#> Weight (mean (SD)) 148.57 (3.27) 149.92 (3.24) <0.001 0.415 +#> Height (mean (SD)) 63.60 (2.50) 64.88 (2.49) <0.001 0.510 +#> Biomarker1 (mean (SD)) 36.54 (2.40) 34.89 (2.16) <0.001 0.723 +#> Biomarker2 (mean (SD)) 48.58 (2.50) 49.91 (2.43) <0.001 0.541 +#> Smoker = 1 (%) 193 (28.5) 87 (20.6) 0.005 0.183 +#> Sex = 1 (%) 478 (70.5) 344 (81.5) <0.001 0.260 +#> ECOG1 = 1 (%) 237 (35.0) 139 (32.9) 0.535 0.043 ``` # Propensity scores overlap @@ -440,19 +452,22 @@ Before applying the matching/weighting methods, we investigate the overlapping o ``` r -final.data$indicator <- ifelse(final.data$data=="TRIAL",1,0) -ps.logit <- glm(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, data = final.data, - family=binomial) -psfit=predict(ps.logit,type = "response",data=final.data) +final.data$indicator <- ifelse(final.data$data == "TRIAL", 1, 0) +ps.logit <- glm( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + data = final.data, + family = binomial +) +psfit <- predict(ps.logit, type = "response", data = final.data) -ps_trial <- psfit[final.data$indicator==1] -ps_extcont <- psfit[final.data$indicator==0] +ps_trial <- psfit[final.data$indicator == 1] +ps_extcont <- psfit[final.data$indicator == 0] ``` ``` r -overlap(list(ps_trial=ps_trial, ps_extcont=ps_extcont),plot=TRUE) +overlap(list(ps_trial = ps_trial, ps_extcont = ps_extcont), plot = TRUE) ```
@@ -462,7 +477,7 @@ overlap(list(ps_trial=ps_trial, ps_extcont=ps_extcont),plot=TRUE) ``` #> $OV -#> [1] 0.3808273 +#> [1] 0.3854194 ``` # Matching Methods @@ -480,17 +495,20 @@ All of the matching methods can be conducted using the `MatchIt` package. The ma ``` r -final.data$indicator <- ifelse(final.data$data=="TRIAL",1,0) +final.data$indicator <- ifelse(final.data$data == "TRIAL", 1, 0) ``` ## PSML This matching method is a variation of nearest neighbour or greedy matching that selects matches based on the difference in the logit of the propensity score, up to a certain distance (caliper) [@austin2011optimal]. We selected a caliper width of 0.2 of the standard deviation of the logit of the propensity score, where the propensity score is estimated using a logistic regression. ``` r -m.out.nearest.ratio1.caliper.lps <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, estimand="ATT",data = final.data, - method="nearest",ratio=1,caliper=0.20, - distance="glm",link="linear.logit",replace=FALSE, - m.order="largest") +m.out.nearest.ratio1.caliper.lps <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + estimand = "ATT", data = final.data, + method = "nearest", ratio = 1, caliper = 0.20, + distance = "glm", link = "linear.logit", replace = FALSE, + m.order = "largest" +) #> Warning: Fewer control units than treated units; not all treated units will get #> a match. summary(m.out.nearest.ratio1.caliper.lps) @@ -503,83 +521,83 @@ summary(m.out.nearest.ratio1.caliper.lps) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 2.0938 -1.7165 1.8941 1.0635 0.4152 -#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 -#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 -#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 -#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 -#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 -#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 -#> Sex 0.8000 0.6940 0.2650 . 0.1060 -#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 +#> distance 2.0399 -1.6492 1.8996 1.0714 0.4135 +#> Age 55.0001 57.0003 -0.5155 1.0679 0.1491 +#> Weight 150.0006 148.0001 0.6303 1.0033 0.1714 +#> Height 65.0010 63.0002 0.8102 1.2116 0.2249 +#> Biomarker1 34.9998 36.9998 -0.8948 0.9993 0.2396 +#> Biomarker2 50.0003 47.9994 0.8139 1.2201 0.2240 +#> Smoker 0.1983 0.3220 -0.3101 . 0.1237 +#> Sex 0.7933 0.6920 0.2503 . 0.1013 +#> ECOG1 0.3050 0.3860 -0.1759 . 0.0810 #> eCDF Max -#> distance 0.6703 -#> Age 0.2350 -#> Weight 0.2717 -#> Height 0.3237 -#> Biomarker1 0.3640 -#> Biomarker2 0.3387 -#> Smoker 0.1183 -#> Sex 0.1060 -#> ECOG1 0.0907 +#> distance 0.6710 +#> Age 0.2287 +#> Weight 0.2627 +#> Height 0.3300 +#> Biomarker1 0.3837 +#> Biomarker2 0.3457 +#> Smoker 0.1237 +#> Sex 0.1013 +#> ECOG1 0.0810 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.2951 -0.0827 0.1878 1.2711 0.0485 -#> Age 56.0086 56.0781 -0.0179 1.4496 0.0281 -#> Weight 148.8976 148.9481 -0.0159 1.0788 0.0129 -#> Height 63.9709 63.8057 0.0673 1.2080 0.0235 -#> Biomarker1 35.7594 36.0287 -0.1200 1.3080 0.0436 -#> Biomarker2 48.9661 48.6500 0.1293 1.0174 0.0335 -#> Smoker 0.2374 0.2831 -0.1138 . 0.0457 -#> Sex 0.7717 0.7671 0.0114 . 0.0046 -#> ECOG1 0.3836 0.3927 -0.0199 . 0.0091 +#> distance 0.3235 -0.0285 0.1812 1.2964 0.0459 +#> Age 55.9123 56.1154 -0.0523 1.2509 0.0276 +#> Weight 149.0808 148.6717 0.1289 0.7273 0.0419 +#> Height 63.9712 63.8891 0.0332 0.9906 0.0251 +#> Biomarker1 35.6912 36.1098 -0.1873 1.1815 0.0645 +#> Biomarker2 48.8531 48.9186 -0.0266 1.0961 0.0167 +#> Smoker 0.2731 0.2454 0.0697 . 0.0278 +#> Sex 0.7593 0.7361 0.0572 . 0.0231 +#> ECOG1 0.3750 0.3611 0.0302 . 0.0139 #> eCDF Max Std. Pair Dist. -#> distance 0.1872 0.1881 -#> Age 0.0776 1.0502 -#> Weight 0.0411 1.1119 -#> Height 0.0685 0.8779 -#> Biomarker1 0.0959 0.9835 -#> Biomarker2 0.0822 0.9921 -#> Smoker 0.0457 0.9332 -#> Sex 0.0046 0.9018 -#> ECOG1 0.0091 1.0728 +#> distance 0.1806 0.1818 +#> Age 0.0880 1.0436 +#> Weight 0.0972 1.0238 +#> Height 0.0833 0.9004 +#> Biomarker1 0.1667 1.0620 +#> Biomarker2 0.0602 0.8814 +#> Smoker 0.0278 0.9753 +#> Sex 0.0231 0.9490 +#> ECOG1 0.0139 0.9955 #> #> Sample Sizes: #> Control Treated #> All 500 600 -#> Matched 219 219 -#> Unmatched 281 381 +#> Matched 216 216 +#> Unmatched 284 384 #> Discarded 0 0 ``` ``` r -final.data$ratio1_caliper_weights_lps = m.out.nearest.ratio1.caliper.lps$weights +final.data$ratio1_caliper_weights_lps <- m.out.nearest.ratio1.caliper.lps$weights ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~ratio1_caliper_weights_lps) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~ratio1_caliper_weights_lps) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 219.00 219.00 -#> Age (mean (SD)) 56.08 (3.37) 56.01 (4.06) 0.845 0.019 -#> Weight (mean (SD)) 148.95 (2.99) 148.90 (3.11) 0.863 0.017 -#> Height (mean (SD)) 63.81 (2.07) 63.97 (2.27) 0.426 0.076 -#> Biomarker1 (mean (SD)) 36.03 (1.97) 35.76 (2.26) 0.183 0.127 -#> Biomarker2 (mean (SD)) 48.65 (2.33) 48.97 (2.35) 0.157 0.135 -#> Smoker = 1 (%) 62.0 (28.3) 52.0 (23.7) 0.277 0.104 -#> Sex = 1 (%) 168.0 (76.7) 169.0 (77.2) 0.910 0.011 -#> ECOG1 = 1 (%) 86.0 (39.3) 84.0 (38.4) 0.845 0.019 +#> n 216.00 216.00 +#> Age (mean (SD)) 56.12 (3.50) 55.91 (3.92) 0.570 0.055 +#> Weight (mean (SD)) 148.67 (3.23) 149.08 (2.76) 0.157 0.136 +#> Height (mean (SD)) 63.89 (2.07) 63.97 (2.06) 0.680 0.040 +#> Biomarker1 (mean (SD)) 36.11 (2.04) 35.69 (2.21) 0.041 0.197 +#> Biomarker2 (mean (SD)) 48.92 (2.08) 48.85 (2.18) 0.750 0.031 +#> Smoker = 1 (%) 53.0 (24.5) 59.0 (27.3) 0.511 0.063 +#> Sex = 1 (%) 159.0 (73.6) 164.0 (75.9) 0.580 0.053 +#> ECOG1 = 1 (%) 78.0 (36.1) 81.0 (37.5) 0.765 0.029 ``` @@ -603,9 +621,12 @@ This matching method is similar to PSML except the caliper width of 0.2 is based ``` r -m.out.nearest.ratio1.caliper <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, estimand="ATT",data = final.data, - method = "nearest", ratio = 1, caliper=0.2, replace=FALSE) +m.out.nearest.ratio1.caliper <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + estimand = "ATT", data = final.data, + method = "nearest", ratio = 1, caliper = 0.2, replace = FALSE +) #> Warning: Fewer control units than treated units; not all treated units will get #> a match. summary(m.out.nearest.ratio1.caliper) @@ -617,84 +638,84 @@ summary(m.out.nearest.ratio1.caliper) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 -#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 -#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 -#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 -#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 -#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 -#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 -#> Sex 0.8000 0.6940 0.2650 . 0.1060 -#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 +#> distance 0.7838 0.2594 2.1718 0.8578 0.4135 +#> Age 55.0001 57.0003 -0.5155 1.0679 0.1491 +#> Weight 150.0006 148.0001 0.6303 1.0033 0.1714 +#> Height 65.0010 63.0002 0.8102 1.2116 0.2249 +#> Biomarker1 34.9998 36.9998 -0.8948 0.9993 0.2396 +#> Biomarker2 50.0003 47.9994 0.8139 1.2201 0.2240 +#> Smoker 0.1983 0.3220 -0.3101 . 0.1237 +#> Sex 0.7933 0.6920 0.2503 . 0.1013 +#> ECOG1 0.3050 0.3860 -0.1759 . 0.0810 #> eCDF Max -#> distance 0.6703 -#> Age 0.2350 -#> Weight 0.2717 -#> Height 0.3237 -#> Biomarker1 0.3640 -#> Biomarker2 0.3387 -#> Smoker 0.1183 -#> Sex 0.1060 -#> ECOG1 0.0907 +#> distance 0.6710 +#> Age 0.2287 +#> Weight 0.2627 +#> Height 0.3300 +#> Biomarker1 0.3837 +#> Biomarker2 0.3457 +#> Smoker 0.1237 +#> Sex 0.1013 +#> ECOG1 0.0810 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.5419 0.4959 0.1904 1.2062 0.0390 -#> Age 55.9874 56.0864 -0.0254 1.4798 0.0323 -#> Weight 149.1182 148.8877 0.0728 1.0497 0.0204 -#> Height 64.1411 63.8307 0.1265 1.4769 0.0302 -#> Biomarker1 35.8381 35.9810 -0.0636 1.2859 0.0301 -#> Biomarker2 49.0981 48.7663 0.1357 1.0359 0.0368 -#> Smoker 0.2350 0.2950 -0.1495 . 0.0600 -#> Sex 0.7400 0.7700 -0.0750 . 0.0300 -#> ECOG1 0.3400 0.3900 -0.1088 . 0.0500 +#> distance 0.5484 0.5043 0.1825 1.2207 0.0393 +#> Age 55.9702 56.0739 -0.0267 1.3404 0.0303 +#> Weight 149.3103 148.8024 0.1600 0.9051 0.0484 +#> Height 64.1223 63.8898 0.0942 1.3449 0.0286 +#> Biomarker1 36.1294 36.1398 -0.0046 1.1428 0.0211 +#> Biomarker2 49.2931 48.9730 0.1302 1.3606 0.0287 +#> Smoker 0.2338 0.2338 0.0000 . 0.0000 +#> Sex 0.7662 0.7413 0.0614 . 0.0249 +#> ECOG1 0.3184 0.3532 -0.0756 . 0.0348 #> eCDF Max Std. Pair Dist. -#> distance 0.110 0.1909 -#> Age 0.080 1.0475 -#> Weight 0.060 1.0672 -#> Height 0.085 0.9194 -#> Biomarker1 0.075 0.9429 -#> Biomarker2 0.085 0.9834 -#> Smoker 0.060 0.8723 -#> Sex 0.030 0.9250 -#> ECOG1 0.050 1.0442 +#> distance 0.1194 0.1834 +#> Age 0.0846 1.0346 +#> Weight 0.0995 1.0964 +#> Height 0.0846 0.9505 +#> Biomarker1 0.0796 1.0343 +#> Biomarker2 0.0896 0.9370 +#> Smoker 0.0000 0.3582 +#> Sex 0.0249 0.9461 +#> ECOG1 0.0348 1.0266 #> #> Sample Sizes: #> Control Treated #> All 500 600 -#> Matched 200 200 -#> Unmatched 300 400 +#> Matched 201 201 +#> Unmatched 299 399 #> Discarded 0 0 ``` ``` r -final.data$ratio1_caliper_weights = m.out.nearest.ratio1.caliper$weights +final.data$ratio1_caliper_weights <- m.out.nearest.ratio1.caliper$weights ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~ratio1_caliper_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~ratio1_caliper_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 200.00 200.00 -#> Age (mean (SD)) 56.09 (3.33) 55.99 (4.05) 0.789 0.027 -#> Weight (mean (SD)) 148.89 (2.95) 149.12 (3.02) 0.440 0.077 -#> Height (mean (SD)) 63.83 (2.08) 64.14 (2.52) 0.179 0.134 -#> Biomarker1 (mean (SD)) 35.98 (2.00) 35.84 (2.26) 0.503 0.067 -#> Biomarker2 (mean (SD)) 48.77 (2.34) 49.10 (2.38) 0.159 0.141 -#> Smoker = 1 (%) 59.0 (29.5) 47.0 (23.5) 0.174 0.136 -#> Sex = 1 (%) 154.0 (77.0) 148.0 (74.0) 0.486 0.070 -#> ECOG1 = 1 (%) 78.0 (39.0) 68.0 (34.0) 0.299 0.104 +#> n 201.00 201.00 +#> Age (mean (SD)) 56.07 (3.53) 55.97 (4.09) 0.785 0.027 +#> Weight (mean (SD)) 148.80 (3.24) 149.31 (3.09) 0.108 0.160 +#> Height (mean (SD)) 63.89 (2.07) 64.12 (2.40) 0.297 0.104 +#> Biomarker1 (mean (SD)) 36.14 (2.05) 36.13 (2.19) 0.961 0.005 +#> Biomarker2 (mean (SD)) 48.97 (2.10) 49.29 (2.45) 0.160 0.140 +#> Smoker = 1 (%) 47.0 (23.4) 47.0 (23.4) 1.000 <0.001 +#> Sex = 1 (%) 149.0 (74.1) 154.0 (76.6) 0.563 0.058 +#> ECOG1 = 1 (%) 71.0 (35.3) 64.0 (31.8) 0.460 0.074 ``` @@ -735,98 +756,52 @@ If $w_k=1$ for all $k$ then the distance is the standard Mahalanobis distance. H ``` r -m.out.genetic.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1 ,replace=TRUE, estimand="ATT", - data = final.data, method = "genetic", - ratio = 1,pop.size=200) +m.out.genetic.ratio1 <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + replace = TRUE, estimand = "ATT", + data = final.data, method = "genetic", + ratio = 1, pop.size = 200 +) +#> Error in `matchit2genetic()`: +#> ! The packages "Matching" and "rgenoud" are required. summary(m.out.genetic.ratio1) -#> -#> Call: -#> matchit(formula = indicator ~ Age + Weight + Height + Biomarker1 + -#> Biomarker2 + Smoker + Sex + ECOG1, data = final.data, method = "genetic", -#> estimand = "ATT", replace = TRUE, ratio = 1, pop.size = 200) -#> -#> Summary of Balance for All Data: -#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 -#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 -#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 -#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 -#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 -#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 -#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 -#> Sex 0.8000 0.6940 0.2650 . 0.1060 -#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 -#> eCDF Max -#> distance 0.6703 -#> Age 0.2350 -#> Weight 0.2717 -#> Height 0.3237 -#> Biomarker1 0.3640 -#> Biomarker2 0.3387 -#> Smoker 0.1183 -#> Sex 0.1060 -#> ECOG1 0.0907 -#> -#> Summary of Balance for Matched Data: -#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7861 0.7314 0.2266 1.0791 0.0735 -#> Age 55.0006 55.6050 -0.1554 1.6735 0.0569 -#> Weight 150.0000 149.3017 0.2206 1.3229 0.0536 -#> Height 65.0002 64.8737 0.0515 1.4409 0.0301 -#> Biomarker1 35.0004 35.4694 -0.2089 1.0202 0.0594 -#> Biomarker2 49.9996 49.5484 0.1846 1.5596 0.0482 -#> Smoker 0.2017 0.1933 0.0208 . 0.0083 -#> Sex 0.8000 0.8000 -0.0000 . 0.0000 -#> ECOG1 0.3033 0.3033 -0.0000 . 0.0000 -#> eCDF Max Std. Pair Dist. -#> distance 0.2583 0.3565 -#> Age 0.1667 0.5881 -#> Weight 0.1683 0.7942 -#> Height 0.0983 0.2757 -#> Biomarker1 0.1417 0.7685 -#> Biomarker2 0.1633 0.7310 -#> Smoker 0.0083 0.0208 -#> Sex 0.0000 0.0000 -#> ECOG1 0.0000 0.0000 -#> -#> Sample Sizes: -#> Control Treated -#> All 500. 600 -#> Matched (ESS) 48.69 600 -#> Matched 153. 600 -#> Unmatched 347. 0 -#> Discarded 0. 0 +#> Error in `h()`: +#> ! error in evaluating the argument 'object' in selecting a method for function 'summary': object 'm.out.genetic.ratio1' not found ``` ``` r -final.data$genetic_ratio1_weights = m.out.genetic.ratio1$weights +final.data$genetic_ratio1_weights <- m.out.genetic.ratio1$weights +#> Error: +#> ! object 'm.out.genetic.ratio1' not found ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~genetic_ratio1_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~genetic_ratio1_weights) +#> Error: +#> ! object 'genetic_ratio1_weights' not found +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 153.00 600.00 -#> Age (mean (SD)) 55.61 (2.99) 55.00 (3.89) 0.126 0.174 -#> Weight (mean (SD)) 149.30 (2.73) 150.00 (3.17) 0.056 0.236 -#> Height (mean (SD)) 64.87 (2.03) 65.00 (2.45) 0.671 0.056 -#> Biomarker1 (mean (SD)) 35.47 (2.21) 35.00 (2.25) 0.165 0.211 -#> Biomarker2 (mean (SD)) 49.55 (1.94) 50.00 (2.44) 0.128 0.204 -#> Smoker = 1 (%) 29.6 (19.3) 121.0 (20.2) 0.872 0.021 -#> Sex = 1 (%) 122.4 (80.0) 480.0 (80.0) 1.000 <0.001 -#> ECOG1 = 1 (%) 46.4 (30.3) 182.0 (30.3) 1.000 <0.001 +#> n 201.00 201.00 +#> Age (mean (SD)) 56.07 (3.53) 55.97 (4.09) 0.785 0.027 +#> Weight (mean (SD)) 148.80 (3.24) 149.31 (3.09) 0.108 0.160 +#> Height (mean (SD)) 63.89 (2.07) 64.12 (2.40) 0.297 0.104 +#> Biomarker1 (mean (SD)) 36.14 (2.05) 36.13 (2.19) 0.961 0.005 +#> Biomarker2 (mean (SD)) 48.97 (2.10) 49.29 (2.45) 0.160 0.140 +#> Smoker = 1 (%) 47.0 (23.4) 47.0 (23.4) 1.000 <0.001 +#> Sex = 1 (%) 149.0 (74.1) 154.0 (76.6) 0.563 0.058 +#> ECOG1 = 1 (%) 71.0 (35.3) 64.0 (31.8) 0.460 0.074 ``` ## GMW @@ -835,99 +810,52 @@ We now consider genetic matching without replacement. ``` r -m.out.genetic.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1 ,replace=FALSE, estimand="ATT", - data = final.data, method = "genetic", - ratio = 1,pop.size=200) -#> Warning: Fewer control units than treated units; not all treated units will get -#> a match. +m.out.genetic.ratio1 <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + replace = FALSE, estimand = "ATT", + data = final.data, method = "genetic", + ratio = 1, pop.size = 200 +) +#> Error in `matchit2genetic()`: +#> ! The packages "Matching" and "rgenoud" are required. summary(m.out.genetic.ratio1) -#> -#> Call: -#> matchit(formula = indicator ~ Age + Weight + Height + Biomarker1 + -#> Biomarker2 + Smoker + Sex + ECOG1, data = final.data, method = "genetic", -#> estimand = "ATT", replace = FALSE, ratio = 1, pop.size = 200) -#> -#> Summary of Balance for All Data: -#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 -#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 -#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 -#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 -#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 -#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 -#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 -#> Sex 0.8000 0.6940 0.2650 . 0.1060 -#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 -#> eCDF Max -#> distance 0.6703 -#> Age 0.2350 -#> Weight 0.2717 -#> Height 0.3237 -#> Biomarker1 0.3640 -#> Biomarker2 0.3387 -#> Smoker 0.1183 -#> Sex 0.1060 -#> ECOG1 0.0907 -#> -#> Summary of Balance for Matched Data: -#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.8782 0.2566 2.5745 0.2286 0.4845 -#> Age 54.6141 57.0000 -0.6134 1.0025 0.1778 -#> Weight 150.3142 147.9994 0.7314 0.9790 0.2000 -#> Height 65.3268 62.9993 0.9483 1.1390 0.2592 -#> Biomarker1 34.7270 36.9995 -1.0122 0.9747 0.2732 -#> Biomarker2 50.3290 47.9993 0.9530 1.1270 0.2626 -#> Smoker 0.1900 0.3200 -0.3240 . 0.1300 -#> Sex 0.8100 0.6940 0.2900 . 0.1160 -#> ECOG1 0.2800 0.3940 -0.2480 . 0.1140 -#> eCDF Max Std. Pair Dist. -#> distance 0.834 2.5745 -#> Age 0.272 0.6838 -#> Weight 0.310 0.7728 -#> Height 0.376 0.9634 -#> Biomarker1 0.426 1.0313 -#> Biomarker2 0.388 1.1486 -#> Smoker 0.130 0.8823 -#> Sex 0.116 0.6700 -#> ECOG1 0.114 0.6483 -#> -#> Sample Sizes: -#> Control Treated -#> All 500 600 -#> Matched 500 500 -#> Unmatched 0 100 -#> Discarded 0 0 +#> Error in `h()`: +#> ! error in evaluating the argument 'object' in selecting a method for function 'summary': object 'm.out.genetic.ratio1' not found ``` ``` r -final.data$genetic_ratio1_weights_no_replace = m.out.genetic.ratio1$weights +final.data$genetic_ratio1_weights_no_replace <- m.out.genetic.ratio1$weights +#> Error: +#> ! object 'm.out.genetic.ratio1' not found ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~genetic_ratio1_weights_no_replace) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~genetic_ratio1_weights_no_replace) +#> Error: +#> ! object 'genetic_ratio1_weights_no_replace' not found +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 500.00 500.00 -#> Age (mean (SD)) 57.00 (3.75) 54.61 (3.75) <0.001 0.637 -#> Weight (mean (SD)) 148.00 (3.16) 150.31 (3.12) <0.001 0.738 -#> Height (mean (SD)) 63.00 (2.23) 65.33 (2.38) <0.001 1.011 -#> Biomarker1 (mean (SD)) 37.00 (2.23) 34.73 (2.20) <0.001 1.026 -#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.33 (2.36) <0.001 1.015 -#> Smoker = 1 (%) 160.0 (32.0) 95.0 (19.0) <0.001 0.302 -#> Sex = 1 (%) 347.0 (69.4) 405.0 (81.0) <0.001 0.271 -#> ECOG1 = 1 (%) 197.0 (39.4) 140.0 (28.0) <0.001 0.243 +#> n 201.00 201.00 +#> Age (mean (SD)) 56.07 (3.53) 55.97 (4.09) 0.785 0.027 +#> Weight (mean (SD)) 148.80 (3.24) 149.31 (3.09) 0.108 0.160 +#> Height (mean (SD)) 63.89 (2.07) 64.12 (2.40) 0.297 0.104 +#> Biomarker1 (mean (SD)) 36.14 (2.05) 36.13 (2.19) 0.961 0.005 +#> Biomarker2 (mean (SD)) 48.97 (2.10) 49.29 (2.45) 0.160 0.140 +#> Smoker = 1 (%) 47.0 (23.4) 47.0 (23.4) 1.000 <0.001 +#> Sex = 1 (%) 149.0 (74.1) 154.0 (76.6) 0.563 0.058 +#> ECOG1 = 1 (%) 71.0 (35.3) 64.0 (31.8) 0.460 0.074 ``` @@ -951,99 +879,52 @@ The optimal matching algorithm performs a global minimization of propensity scor ``` r -m.out.optimal.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1 , estimand="ATT", - data = final.data, method = "optimal", - ratio = 1) -#> Warning: Fewer control units than treated units; not all treated units will get -#> a match. +m.out.optimal.ratio1 <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + estimand = "ATT", + data = final.data, method = "optimal", + ratio = 1 +) +#> Error in `matchit2optimal()`: +#> ! The package "optmatch" is required. summary(m.out.optimal.ratio1) -#> -#> Call: -#> matchit(formula = indicator ~ Age + Weight + Height + Biomarker1 + -#> Biomarker2 + Smoker + Sex + ECOG1, data = final.data, method = "optimal", -#> estimand = "ATT", ratio = 1) -#> -#> Summary of Balance for All Data: -#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 -#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 -#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 -#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 -#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 -#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 -#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 -#> Sex 0.8000 0.6940 0.2650 . 0.1060 -#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 -#> eCDF Max -#> distance 0.6703 -#> Age 0.2350 -#> Weight 0.2717 -#> Height 0.3237 -#> Biomarker1 0.3640 -#> Biomarker2 0.3387 -#> Smoker 0.1183 -#> Sex 0.1060 -#> ECOG1 0.0907 -#> -#> Summary of Balance for Matched Data: -#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7450 0.2566 2.0229 0.9127 0.3621 -#> Age 55.2223 57.0000 -0.4571 1.0612 0.1347 -#> Weight 149.6287 147.9994 0.5148 0.9235 0.1424 -#> Height 64.6082 62.9993 0.6555 1.0563 0.1834 -#> Biomarker1 35.2911 36.9995 -0.7609 0.9710 0.2065 -#> Biomarker2 49.6665 47.9993 0.6820 1.1171 0.1910 -#> Smoker 0.2200 0.3200 -0.2492 . 0.1000 -#> Sex 0.7960 0.6940 0.2550 . 0.1020 -#> ECOG1 0.3180 0.3940 -0.1653 . 0.0760 -#> eCDF Max Std. Pair Dist. -#> distance 0.644 2.0229 -#> Age 0.210 1.1380 -#> Weight 0.236 1.1969 -#> Height 0.290 1.1318 -#> Biomarker1 0.310 1.2242 -#> Biomarker2 0.298 1.1685 -#> Smoker 0.100 0.9471 -#> Sex 0.102 1.0250 -#> ECOG1 0.076 1.0442 -#> -#> Sample Sizes: -#> Control Treated -#> All 500 600 -#> Matched 500 500 -#> Unmatched 0 100 -#> Discarded 0 0 +#> Error in `h()`: +#> ! error in evaluating the argument 'object' in selecting a method for function 'summary': object 'm.out.optimal.ratio1' not found ``` ``` r -final.data$optimal_ratio1_weights = m.out.optimal.ratio1$weights +final.data$optimal_ratio1_weights <- m.out.optimal.ratio1$weights +#> Error: +#> ! object 'm.out.optimal.ratio1' not found ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~optimal_ratio1_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~optimal_ratio1_weights) +#> Error: +#> ! object 'optimal_ratio1_weights' not found +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 500.00 500.00 -#> Age (mean (SD)) 57.00 (3.75) 55.22 (3.86) <0.001 0.468 -#> Weight (mean (SD)) 148.00 (3.16) 149.63 (3.03) <0.001 0.527 -#> Height (mean (SD)) 63.00 (2.23) 64.61 (2.29) <0.001 0.713 -#> Biomarker1 (mean (SD)) 37.00 (2.23) 35.29 (2.20) <0.001 0.772 -#> Biomarker2 (mean (SD)) 48.00 (2.23) 49.67 (2.35) <0.001 0.728 -#> Smoker = 1 (%) 160.0 (32.0) 110.0 (22.0) <0.001 0.227 -#> Sex = 1 (%) 347.0 (69.4) 398.0 (79.6) <0.001 0.236 -#> ECOG1 = 1 (%) 197.0 (39.4) 159.0 (31.8) 0.012 0.159 +#> n 201.00 201.00 +#> Age (mean (SD)) 56.07 (3.53) 55.97 (4.09) 0.785 0.027 +#> Weight (mean (SD)) 148.80 (3.24) 149.31 (3.09) 0.108 0.160 +#> Height (mean (SD)) 63.89 (2.07) 64.12 (2.40) 0.297 0.104 +#> Biomarker1 (mean (SD)) 36.14 (2.05) 36.13 (2.19) 0.961 0.005 +#> Biomarker2 (mean (SD)) 48.97 (2.10) 49.29 (2.45) 0.160 0.140 +#> Smoker = 1 (%) 47.0 (23.4) 47.0 (23.4) 1.000 <0.001 +#> Sex = 1 (%) 149.0 (74.1) 154.0 (76.6) 0.563 0.058 +#> ECOG1 = 1 (%) 71.0 (35.3) 64.0 (31.8) 0.460 0.074 ``` @@ -1077,61 +958,64 @@ set.seed(1) # Toolkit for Weighting and Analysis of Nonequivalent Groups # https://cran.r-project.org/web/packages/twang/vignettes/twang.pdf -# Model includes non-linear effects and interactions with shrinkage to +# Model includes non-linear effects and interactions with shrinkage to # avoid overfitting - -ps.AOD.ATT <- ps(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, data = final.data, - estimand = "ATT", interaction.depth=3, - shrinkage=0.01, verbose = FALSE, n.trees = 7000, - stop.method = c("es.mean","ks.max")) - -# interaction.dept is the tree depth used in gradient boosting; loosely interpreted as + +ps.AOD.ATT <- ps( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + data = final.data, + estimand = "ATT", interaction.depth = 3, + shrinkage = 0.01, verbose = FALSE, n.trees = 7000, + stop.method = c("es.mean", "ks.max") +) + +# interaction.dept is the tree depth used in gradient boosting; loosely interpreted as # the maximum number of variables that can be included in an interaction # n.trees is the maximum number of gradient boosting iterations to be considered. The # more iterations allows for more nonlienarity and interactions to be considered. -# shrinkage is a numeric value between 0 and 1 denoting the learning rate. Smaller -# values restrict the complexity that is added at each iteration of the gradient -# boosting algorithm. A smaller learning rate requires more iterations (n.trees), but +# shrinkage is a numeric value between 0 and 1 denoting the learning rate. Smaller +# values restrict the complexity that is added at each iteration of the gradient +# boosting algorithm. A smaller learning rate requires more iterations (n.trees), but # adds some protection against model overfit. The default value is 0.01. # windows() # plot(ps.AOD.ATT, plot=5) -# +# # summary(ps.AOD.ATT) -# +# # # Relative influence # summary(ps.AOD.ATT$gbm.obj, n.trees=ps.AOD.ATT$desc$ks.max.ATT$n.trees, plot=FALSE) -# +# # bal.table(ps.AOD.ATT) -final.data$weights_gbm <- ps.AOD.ATT$w[,1] +final.data$weights_gbm <- ps.AOD.ATT$w[, 1] ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~weights_gbm) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~weights_gbm) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 278.08 600.00 -#> Age (mean (SD)) 55.73 (3.32) 55.00 (3.89) 0.040 0.201 -#> Weight (mean (SD)) 149.24 (2.67) 150.00 (3.17) 0.006 0.259 -#> Height (mean (SD)) 64.25 (2.05) 65.00 (2.45) 0.001 0.334 -#> Biomarker1 (mean (SD)) 35.36 (2.07) 35.00 (2.25) 0.154 0.165 -#> Biomarker2 (mean (SD)) 49.28 (2.20) 50.00 (2.44) 0.011 0.308 -#> Smoker = 1 (%) 54.9 (19.7) 121.0 (20.2) 0.909 0.011 -#> Sex = 1 (%) 227.7 (81.9) 480.0 (80.0) 0.597 0.048 -#> ECOG1 = 1 (%) 111.1 (40.0) 182.0 (30.3) 0.094 0.203 +#> n 149.20 600.00 +#> Age (mean (SD)) 55.95 (3.43) 55.00 (3.88) 0.002 0.260 +#> Weight (mean (SD)) 149.42 (3.10) 150.00 (3.17) 0.053 0.186 +#> Height (mean (SD)) 64.08 (2.02) 65.00 (2.47) <0.001 0.406 +#> Biomarker1 (mean (SD)) 35.77 (2.16) 35.00 (2.24) <0.001 0.353 +#> Biomarker2 (mean (SD)) 49.22 (2.09) 50.00 (2.46) <0.001 0.344 +#> Smoker = 1 (%) 34.5 (23.1) 119.0 (19.8) 0.389 0.081 +#> Sex = 1 (%) 112.8 (75.6) 476.0 (79.3) 0.344 0.089 +#> ECOG1 = 1 (%) 45.8 (30.7) 183.0 (30.5) 0.960 0.005 ``` ## EB @@ -1139,10 +1023,9 @@ print(t1,smd=TRUE) Entropy balancing is a weighting method to balance the covariates by assigning a scalar weight to each external control observations such that the reweighted groups satisfy a set of balance constraints that are imposed on the sample moments of the covariate distributions [@hainmueller2012entropy]. ``` r -eb.out <- ebalance(final.data$indicator,final.data[,c(2:9)],max.iterations = 300) -#> Converged within tolerance -final.data$eb_weights <- rep(1,nrow(final.data)) -final.data$eb_weights[final.data$indicator==0] <- eb.out$w +eb.out <- ebalance(final.data$indicator, final.data[, c(2:9)], max.iterations = 300) +final.data$eb_weights <- rep(1, nrow(final.data)) +final.data$eb_weights[final.data$indicator == 0] <- eb.out$w ``` Note that the entropy balancing method failed to converge. @@ -1157,24 +1040,24 @@ We now compare the SMD between the two datasets. By definition, the SMD after EB ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~eb_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~eb_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD #> n 600.00 600.00 -#> Age (mean (SD)) 55.00 (3.29) 55.00 (3.89) 1.000 <0.001 -#> Weight (mean (SD)) 150.00 (2.43) 150.00 (3.17) 1.000 <0.001 -#> Height (mean (SD)) 65.00 (2.36) 65.00 (2.45) 1.000 <0.001 -#> Biomarker1 (mean (SD)) 35.00 (2.37) 35.00 (2.25) 1.000 <0.001 -#> Biomarker2 (mean (SD)) 50.00 (2.00) 50.00 (2.44) 1.000 <0.001 -#> Smoker = 1 (%) 121.0 (20.2) 121.0 (20.2) 1.000 <0.001 -#> Sex = 1 (%) 480.0 (80.0) 480.0 (80.0) 1.000 <0.001 -#> ECOG1 = 1 (%) 182.0 (30.3) 182.0 (30.3) 1.000 <0.001 +#> Age (mean (SD)) 55.00 (3.42) 55.00 (3.88) 1.000 <0.001 +#> Weight (mean (SD)) 150.00 (2.87) 150.00 (3.17) 1.000 <0.001 +#> Height (mean (SD)) 65.00 (2.12) 65.00 (2.47) 1.000 <0.001 +#> Biomarker1 (mean (SD)) 35.00 (2.03) 35.00 (2.24) 1.000 <0.001 +#> Biomarker2 (mean (SD)) 50.00 (2.32) 50.00 (2.46) 1.000 <0.001 +#> Smoker = 1 (%) 119.0 (19.8) 119.0 (19.8) 1.000 <0.001 +#> Sex = 1 (%) 476.0 (79.3) 476.0 (79.3) 1.000 <0.001 +#> ECOG1 = 1 (%) 183.0 (30.5) 183.0 (30.5) 1.000 <0.001 ``` @@ -1191,41 +1074,44 @@ print(t1,smd=TRUE) The propensity score is defined as the probability of a patient being in a trial given the observed baseline covariates. We utilized the ATT weights, which are defined for the IPW as fixing the trial patients weight at unity, and external control patients as $\hat{e}(x)/(1-\hat{e}(x))$ where $\hat{e}(x)$ is estimated using a logistic regression model [@amusa2019examination]. ``` r -ps.logit <- glm(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, data = final.data, - family=binomial) -psfit=predict(ps.logit,type = "response",data=final.data) +ps.logit <- glm( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + data = final.data, + family = binomial +) +psfit <- predict(ps.logit, type = "response", data = final.data) -ps_trial <- psfit[final.data$indicator==1] -ps_extcont <- psfit[final.data$indicator==0] +ps_trial <- psfit[final.data$indicator == 1] +ps_extcont <- psfit[final.data$indicator == 0] final.data$invprob_weights <- NA -final.data$invprob_weights[final.data$indicator==0] <- ps_extcont/(1-ps_extcont) -final.data$invprob_weights[final.data$indicator==1] <- ps_trial/ps_trial +final.data$invprob_weights[final.data$indicator == 0] <- ps_extcont / (1 - ps_extcont) +final.data$invprob_weights[final.data$indicator == 1] <- ps_trial / ps_trial ``` We now compare the SMD between the two datasets. ``` r -svy <- svydesign(id = ~0, data=final.data,weights = ~invprob_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~invprob_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ``` r -print(t1,smd=TRUE) +print(t1, smd = TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 519.03 600.00 -#> Age (mean (SD)) 55.59 (3.19) 55.00 (3.89) 0.270 0.166 -#> Weight (mean (SD)) 149.27 (2.58) 150.00 (3.17) 0.014 0.254 -#> Height (mean (SD)) 64.80 (2.23) 65.00 (2.45) 0.646 0.084 -#> Biomarker1 (mean (SD)) 34.92 (2.27) 35.00 (2.25) 0.853 0.036 -#> Biomarker2 (mean (SD)) 49.39 (2.09) 50.00 (2.44) 0.041 0.270 -#> Smoker = 1 (%) 87.6 (16.9) 121.0 (20.2) 0.467 0.085 -#> Sex = 1 (%) 456.8 (88.0) 480.0 (80.0) 0.019 0.220 -#> ECOG1 = 1 (%) 187.0 (36.0) 182.0 (30.3) 0.404 0.121 +#> n 502.38 600.00 +#> Age (mean (SD)) 55.57 (3.45) 55.00 (3.88) 0.237 0.154 +#> Weight (mean (SD)) 150.14 (3.34) 150.00 (3.17) 0.803 0.042 +#> Height (mean (SD)) 64.90 (2.28) 65.00 (2.47) 0.756 0.044 +#> Biomarker1 (mean (SD)) 35.61 (2.07) 35.00 (2.24) 0.063 0.282 +#> Biomarker2 (mean (SD)) 49.92 (2.39) 50.00 (2.46) 0.848 0.035 +#> Smoker = 1 (%) 94.3 (18.8) 119.0 (19.8) 0.816 0.027 +#> Sex = 1 (%) 336.1 (66.9) 476.0 (79.3) 0.100 0.283 +#> ECOG1 = 1 (%) 124.7 (24.8) 183.0 (30.5) 0.324 0.127 ``` @@ -1242,7 +1128,7 @@ Next we will investigate balance plots. ``` r -covs <- data.frame(final.data[,c("Age","Weight","Height","Biomarker1","Biomarker2","Smoker","Sex","ECOG1")]) +covs <- data.frame(final.data[, c("Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", "ECOG1")]) data_with_weights <- final.data ``` @@ -1252,46 +1138,55 @@ We now conduct a balance diagnostic by considering SMD plot. The x-axis of the p ``` r -love.plot(covs, - treat=data_with_weights$data, - weights = list(NNMPS=data_with_weights$ratio1_caliper_weights, - NNMLPS=data_with_weights$ratio1_caliper_weights_lps, - OPTM=data_with_weights$optimal_ratio1_weights, - GENMATCH=data_with_weights$genetic_ratio1_weights, - GENMATCHW=data_with_weights$genetic_ratio1_weights_no_replace), - thresholds=0.1 ,binary="std",shapes = c("circle filled"), - line=FALSE,estimand="ATT",abs=TRUE, - sample.names = c("PSMR", - "PSML", - "OM", - "GM", - "GMW"), - title="Covariate Balance after matching methods", - s.d.denom="pooled") +love.plot(covs, + treat = data_with_weights$data, + weights = list( + NNMPS = data_with_weights$ratio1_caliper_weights, + NNMLPS = data_with_weights$ratio1_caliper_weights_lps, + OPTM = data_with_weights$optimal_ratio1_weights, + GENMATCH = data_with_weights$genetic_ratio1_weights, + GENMATCHW = data_with_weights$genetic_ratio1_weights_no_replace + ), + thresholds = 0.1, binary = "std", shapes = c("circle filled"), + line = FALSE, estimand = "ATT", abs = TRUE, + sample.names = c( + "PSMR", + "PSML", + "OM", + "GM", + "GMW" + ), + title = "Covariate Balance after matching methods", + s.d.denom = "pooled" +) +#> Error in `love.plot()`: +#> ! The argument supplied to `weights` must be a named list of weights, names of +#> variables containing weights in an available data set, or objects with a +#> `get.w()` method. ``` -
-plot of chunk unnamed-chunk-61 -

plot of chunk unnamed-chunk-61

-
- # Balance Plots for Weighting Methods ``` r -love.plot(covs, - treat=data_with_weights$data, - weights = list(EB=data_with_weights$eb_weights, - IPW=data_with_weights$invprob_weights, - GBM=data_with_weights$weights_gbm), - thresholds=0.1 ,binary="std",shapes = c("circle filled"), - line=FALSE,estimand="ATT",abs=TRUE, - sample.names = c("EB", - "IPW", - "GBM"), - title="Covariate Balance after weighting methods", - s.d.denom="pooled") +love.plot(covs, + treat = data_with_weights$data, + weights = list( + EB = data_with_weights$eb_weights, + IPW = data_with_weights$invprob_weights, + GBM = data_with_weights$weights_gbm + ), + thresholds = 0.1, binary = "std", shapes = c("circle filled"), + line = FALSE, estimand = "ATT", abs = TRUE, + sample.names = c( + "EB", + "IPW", + "GBM" + ), + title = "Covariate Balance after weighting methods", + s.d.denom = "pooled" +) ```
@@ -1304,78 +1199,80 @@ We now investigate the effective sample size (ESS) in the trial and external con ``` r -#External control -ess.PSML.extcont <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==0])^2) +# External control +ess.PSML.extcont <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 0])^2) -ess.PSMR.extcont <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==0])^2) +ess.PSMR.extcont <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 0])^2) -ess.OM.extcont <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==0])^2) +ess.OM.extcont <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 0])^2) -ess.GM.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0])^2) +ess.GM.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0])^2) -ess.eb.extcont <- (sum(data_with_weights$eb_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$eb_weights[data_with_weights$indicator==0])^2) +ess.eb.extcont <- (sum(data_with_weights$eb_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$eb_weights[data_with_weights$indicator == 0])^2) -ess.ipw.extcont <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$invprob_weights[data_with_weights$indicator==0])^2) +ess.ipw.extcont <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$invprob_weights[data_with_weights$indicator == 0])^2) -ess.gbm.extcont <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$weights_gbm[data_with_weights$indicator==0])^2) +ess.gbm.extcont <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$weights_gbm[data_with_weights$indicator == 0])^2) -ess.genetic.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0])^2) +ess.genetic.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0])^2) -ess.genetic.no.replace.extcont <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==0])^2) +ess.genetic.no.replace.extcont <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 0])^2) ``` ``` r -#Trial -ess.PSML.trial <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==1])^2) +# Trial +ess.PSML.trial <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 1])^2) -ess.PSMR.trial <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==1])^2) +ess.PSMR.trial <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 1])^2) -ess.OM.trial <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==1])^2) +ess.OM.trial <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 1])^2) -ess.GM.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1])^2) +ess.GM.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1])^2) -ess.eb.trial <- (sum(data_with_weights$eb_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$eb_weights[data_with_weights$indicator==1])^2) +ess.eb.trial <- (sum(data_with_weights$eb_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$eb_weights[data_with_weights$indicator == 1])^2) -ess.ipw.trial <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$invprob_weights[data_with_weights$indicator==1])^2) +ess.ipw.trial <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$invprob_weights[data_with_weights$indicator == 1])^2) -ess.gbm.trial <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$weights_gbm[data_with_weights$indicator==1])^2) +ess.gbm.trial <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$weights_gbm[data_with_weights$indicator == 1])^2) -ess.genetic.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1])^2) -ess.genetic.no.replace.trial <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==1])^2) +ess.genetic.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1])^2) +ess.genetic.no.replace.trial <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 1])^2) ``` ``` r -out.ess <- data.frame(Unadjusted=c(length(which(final.data$data=="TRIAL")),length(which(final.data$data=="EC"))), - PSML=c(ess.PSML.trial,ess.PSML.extcont), - PSMR=c(ess.PSMR.trial,ess.PSMR.extcont), - OM=c(ess.OM.trial,ess.OM.extcont), - GM=c(ess.genetic.trial,ess.genetic.extcont), - GMW=c(ess.genetic.no.replace.trial,ess.genetic.no.replace.extcont), - EB=c(ess.eb.trial,ess.eb.extcont), - IPW=c(ess.ipw.trial,ess.ipw.extcont), - GBM=c(ess.gbm.trial,ess.gbm.extcont)) +out.ess <- data.frame( + Unadjusted = c(length(which(final.data$data == "TRIAL")), length(which(final.data$data == "EC"))), + PSML = c(ess.PSML.trial, ess.PSML.extcont), + PSMR = c(ess.PSMR.trial, ess.PSMR.extcont), + OM = c(ess.OM.trial, ess.OM.extcont), + GM = c(ess.genetic.trial, ess.genetic.extcont), + GMW = c(ess.genetic.no.replace.trial, ess.genetic.no.replace.extcont), + EB = c(ess.eb.trial, ess.eb.extcont), + IPW = c(ess.ipw.trial, ess.ipw.extcont), + GBM = c(ess.gbm.trial, ess.gbm.extcont) +) -rownames(out.ess) <- c("Trial","External Control") +rownames(out.ess) <- c("Trial", "External Control") ``` Note: After applying the matching methods, some patients in RCT were excluded. For demonstration purpose in this article, we will also exclude RCT patients that were not matched in the Bayesian outcome model. However, in reality we may keep the full sample size in RCT and discounting could be done at the second stage with power prior and commensurate prior. Before, moving to the next stage of Bayesian borrowing, ESS also needs to be taken into account. @@ -1384,23 +1281,22 @@ The ESS for each cohort using different methods are shown below. ``` r out.ess -#> Unadjusted PSML PSMR OM GM GMW EB IPW -#> Trial 600 219 200 500 600.00000 500 600.00000 600.00000 -#> External Control 500 219 200 500 48.68813 500 23.87066 42.41241 -#> GBM -#> Trial 600.00000 -#> External Control 74.15286 +#> Unadjusted PSML PSMR OM GM GMW EB IPW GBM +#> Trial 600 216 201 NaN NaN NaN 600.00000 600.00000 600.0000 +#> External Control 500 216 201 NaN NaN NaN 27.66206 48.08584 111.8361 ``` We also investigate the histogram of the weights for external control patients and the effective sample size (ESS). ``` r -par(mfrow=c(2,2)) -hist(data_with_weights$eb_weights[data_with_weights$indicator==0],main=paste("EB \n ESS=",round(ess.eb.extcont),sep=""),xlab="Weight") -hist(data_with_weights$invprob_weights[data_with_weights$indicator==0],main=paste("IPW\n ESS=",round(ess.ipw.extcont),sep=""),xlab="Weight") -hist(data_with_weights$weights_gbm[data_with_weights$indicator==0],main=paste("GBM \n ESS=",round(ess.gbm.extcont),sep=""),xlab="Weight") -hist(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0],main=paste("GM \n ESS=",round(ess.genetic.extcont),sep=""),xlab="Weight") +par(mfrow = c(2, 2)) +hist(data_with_weights$eb_weights[data_with_weights$indicator == 0], main = paste("EB \n ESS=", round(ess.eb.extcont), sep = ""), xlab = "Weight") +hist(data_with_weights$invprob_weights[data_with_weights$indicator == 0], main = paste("IPW\n ESS=", round(ess.ipw.extcont), sep = ""), xlab = "Weight") +hist(data_with_weights$weights_gbm[data_with_weights$indicator == 0], main = paste("GBM \n ESS=", round(ess.gbm.extcont), sep = ""), xlab = "Weight") +hist(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0], main = paste("GM \n ESS=", round(ess.genetic.extcont), sep = ""), xlab = "Weight") +#> Error in `hist.default()`: +#> ! 'x' must be numeric ```
diff --git a/vignettes/match_weight_02_application.Rmd b/vignettes/match_weight_02_application.Rmd index 0b15c421..82fb84e7 100644 --- a/vignettes/match_weight_02_application.Rmd +++ b/vignettes/match_weight_02_application.Rmd @@ -4,7 +4,7 @@ bibliography: references.bib author: - Manoj Khanal - Eli Lilly & Company -date: "2025-05-07" +date: "2026-05-12" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{2. Matching and Weighting Application with Dynamic Borrowing} @@ -36,7 +36,7 @@ We will also consider several Bayesian dynamic borrowing methods * Power prior # Hazard function -The hazard function of $T$ at time $t$ is defined as the instantaneous rate of the event occurence that is still at risk at time $t$. It is defined as +The hazard function of $T$ at time $t$ is defined as the instantaneous rate of the event occurrence that is still at risk at time $t$. It is defined as \begin{equation} h(t)=\lim_\limits{\Delta \rightarrow 0 } P\frac{(t\leq T Error in library(kableExtra): there is no package called 'kableExtra' library(survival) ``` @@ -263,7 +262,7 @@ outcome ``` -Next, the borrowing method is implemented as shown below. We consider Bayesian Dynamic Borrowing (BDB) in which gamma prior is assigned for the commensurability parameter. The `tau_prior` shown below is the hyperparameter of the commensurate prior which determines the degree of borrowing. We assign a gamma prior for this hyperparameter. Furhtermore, we also need to specify a flag for external data which is denoted by `ext`. +Next, the borrowing method is implemented as shown below. We consider Bayesian Dynamic Borrowing (BDB) in which gamma prior is assigned for the commensurability parameter. The `tau_prior` shown below is the hyperparameter of the commensurate prior which determines the degree of borrowing. We assign a gamma prior for this hyperparameter. Furthermore, we also need to specify a flag for external data which is denoted by `ext`. ``` r #Borrowing @@ -337,13 +336,13 @@ results.exp.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 5.9 seconds. -#> Chain 2 finished in 5.2 seconds. -#> Chain 3 finished in 6.3 seconds. +#> Chain 1 finished in 7.2 seconds. +#> Chain 2 finished in 7.1 seconds. +#> Chain 3 finished in 9.4 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 5.8 seconds. -#> Total execution time: 17.6 seconds. +#> Mean chain execution time: 7.9 seconds. +#> Total execution time: 23.9 seconds. draws1 <- results.exp.psborrow$draws() draws1 <- rename_draws_covariates(draws1, anls_obj) @@ -356,12 +355,12 @@ results.exp.psborrow$summary() #> # A tibble: 6 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -718. -718. 1.48 1.32 -721. -716. 1.00 11600. -#> 2 beta_trt -0.358 -0.360 0.110 0.109 -0.537 -0.173 1.00 12088. -#> 3 alpha[1] -0.620 -0.618 0.0916 0.0915 -0.775 -0.474 1.00 11788. -#> 4 alpha[2] 2.36 2.38 0.371 0.367 1.71 2.93 1.00 17931. -#> 5 tau 0.119 0.0511 0.182 0.0701 0.000400 0.468 1.00 12852. -#> 6 HR_trt 0.704 0.698 0.0777 0.0758 0.584 0.841 1.00 12088. +#> 1 lp__ -718. -718. 1.48 1.28 -721. -716. 1.00 10917. +#> 2 beta_trt -0.358 -0.358 0.109 0.108 -0.537 -0.179 1.00 13767. +#> 3 alpha[1] -0.621 -0.620 0.0904 0.0902 -0.771 -0.475 1.00 13896. +#> 4 alpha[2] 2.36 2.38 0.368 0.363 1.72 2.92 1.00 16892. +#> 5 tau 0.119 0.0507 0.181 0.0697 0.000480 0.463 1.00 12986. +#> 6 HR_trt 0.704 0.699 0.0768 0.0756 0.585 0.836 1.00 13767. #> # ℹ 1 more variable: ess_tail ``` @@ -395,14 +394,14 @@ The $95\%$ credible interval can be calculated as follows. ``` r summarize_draws(draws1, ~ quantile(.x, probs = c(0.025, 0.975))) #> # A tibble: 6 × 3 -#> variable `2.5%` `97.5%` -#> -#> 1 lp__ -722. -716. -#> 2 treatment log HR -0.571 -0.138 -#> 3 baseline log hazard rate, internal -0.806 -0.445 -#> 4 baseline log hazard rate, external 1.56 3.02 -#> 5 commensurability parameter 0.0000996 0.621 -#> 6 treatment HR 0.565 0.871 +#> variable `2.5%` `97.5%` +#> +#> 1 lp__ -722. -716. +#> 2 treatment log HR -0.571 -0.145 +#> 3 baseline log hazard rate, internal -0.802 -0.447 +#> 4 baseline log hazard rate, external 1.58 3.02 +#> 5 commensurability parameter 0.000120 0.619 +#> 6 treatment HR 0.565 0.865 ``` We can graph other plots that help us evaluate convergence and diagnose problems with the MCMC sampler, such as trace plot. @@ -537,13 +536,13 @@ results.weib.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 14.7 seconds. -#> Chain 2 finished in 14.8 seconds. -#> Chain 3 finished in 11.5 seconds. +#> Chain 1 finished in 11.9 seconds. +#> Chain 2 finished in 12.8 seconds. +#> Chain 3 finished in 12.6 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 13.6 seconds. -#> Total execution time: 41.1 seconds. +#> Mean chain execution time: 12.4 seconds. +#> Total execution time: 37.5 seconds. draws2 <- results.weib.psborrow$draws() draws2 <- rename_draws_covariates(draws2, anls_obj) @@ -556,13 +555,13 @@ results.weib.psborrow$summary() #> # A tibble: 7 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -708. -7.07e+2 1.66 1.48 -7.11e+2 -705. 1.00 11420. -#> 2 beta_trt -0.301 -3.01e-1 0.111 0.111 -4.82e-1 -0.118 1.00 16859. -#> 3 alpha[1] -0.510 -5.09e-1 0.0944 0.0940 -6.68e-1 -0.357 1.00 16239. -#> 4 alpha[2] 1.97 1.99e+0 0.382 0.377 1.30e+0 2.56 1.00 22836. -#> 5 tau 0.177 7.53e-2 0.281 0.103 6.87e-4 0.691 1.00 14556. -#> 6 shape_weibull 0.836 8.36e-1 0.0323 0.0327 7.83e-1 0.889 1.00 22322. -#> 7 HR_trt 0.744 7.40e-1 0.0828 0.0815 6.18e-1 0.888 1.00 16859. +#> 1 lp__ -708. -7.07e+2 1.66 1.47 -7.11e+2 -705. 1.00 11655. +#> 2 beta_trt -0.300 -3.01e-1 0.110 0.110 -4.79e-1 -0.117 1.00 15789. +#> 3 alpha[1] -0.511 -5.09e-1 0.0936 0.0946 -6.67e-1 -0.361 1.00 15831. +#> 4 alpha[2] 1.97 1.99e+0 0.382 0.378 1.31e+0 2.55 1.00 21480. +#> 5 tau 0.182 7.54e-2 0.320 0.104 6.30e-4 0.710 1.00 14273. +#> 6 shape_weibull 0.836 8.36e-1 0.0319 0.0319 7.84e-1 0.889 1.00 21440. +#> 7 HR_trt 0.745 7.40e-1 0.0827 0.0817 6.20e-1 0.890 1.00 15789. #> # ℹ 1 more variable: ess_tail ``` @@ -599,12 +598,12 @@ summarize_draws(draws2, ~ quantile(.x, probs = c(0.025, 0.975))) #> variable `2.5%` `97.5%` #> #> 1 lp__ -712. -705. -#> 2 treatment log HR -0.516 -0.0847 -#> 3 baseline log hazard rate, internal -0.699 -0.328 -#> 4 baseline log hazard rate, external 1.15 2.66 -#> 5 commensurability parameter 0.000179 0.933 -#> 6 Weibull shape parameter 0.773 0.900 -#> 7 treatment HR 0.597 0.919 +#> 2 treatment log HR -0.513 -0.0816 +#> 3 baseline log hazard rate, internal -0.699 -0.334 +#> 4 baseline log hazard rate, external 1.16 2.64 +#> 5 commensurability parameter 0.000165 0.957 +#> 6 Weibull shape parameter 0.775 0.900 +#> 7 treatment HR 0.599 0.922 ``` We can see other plots that help us evaluate convergence and diagnose problems with the MCMC sampler, such as trace plot. @@ -1121,7 +1120,7 @@ traceplot(model2,varname=c("alpha","alphaR","beta","tau"))
# Exponential distribution (constant hazard) and no borrowing: Using `psborrow2` R package -In addition to specifying an exponential distribution using Bayesian dynamic borrowing, naive no-borrowing and full-borrowing analysis can be conducted using the exponential distribution following similar steps as above. These ype of analysis could be used as a sensitivity or exploratory analysis. +In addition to specifying an exponential distribution using Bayesian dynamic borrowing, naive no-borrowing and full-borrowing analysis can be conducted using the exponential distribution following similar steps as above. These type of analysis could be used as a sensitivity or exploratory analysis. The `psborrow2` package will be used to conduct the analysis. Following are the steps. @@ -1215,13 +1214,13 @@ results.no.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 2.0 seconds. +#> Chain 1 finished in 1.7 seconds. #> Chain 2 finished in 1.7 seconds. #> Chain 3 finished in 1.8 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 1.8 seconds. -#> Total execution time: 5.7 seconds. +#> Mean chain execution time: 1.7 seconds. +#> Total execution time: 5.4 seconds. draws3 <- results.no.psborrow$draws() draws3 <- rename_draws_covariates(draws3, anls_obj) @@ -1234,10 +1233,10 @@ results.no.psborrow$summary() #> # A tibble: 4 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -727. -727. 1.02 0.728 -729. -726. 1.00 9041. -#> 2 beta_trt -0.357 -0.357 0.111 0.111 -0.537 -0.172 1.00 8473. -#> 3 alpha -0.622 -0.621 0.0929 0.0926 -0.778 -0.472 1.00 8588. -#> 4 HR_trt 0.704 0.699 0.0788 0.0773 0.584 0.842 1.00 8473. +#> 1 lp__ -727. -727. 1.03 0.719 -729. -726. 1.00 10367. +#> 2 beta_trt -0.357 -0.358 0.111 0.111 -0.537 -0.172 1.00 8928. +#> 3 alpha -0.622 -0.621 0.0926 0.0929 -0.775 -0.473 1.00 8752. +#> 4 HR_trt 0.704 0.699 0.0788 0.0775 0.585 0.842 1.00 8928. #> # ℹ 1 more variable: ess_tail ``` @@ -1265,9 +1264,9 @@ summarize_draws(draws3, ~ quantile(.x, probs = c(0.025, 0.975))) #> variable `2.5%` `97.5%` #> #> 1 lp__ -730. -726. -#> 2 treatment log HR -0.576 -0.138 -#> 3 baseline log hazard rate -0.807 -0.444 -#> 4 treatment HR 0.562 0.871 +#> 2 treatment log HR -0.571 -0.137 +#> 3 baseline log hazard rate -0.807 -0.445 +#> 4 treatment HR 0.565 0.872 ``` We can see other plots that help us understand and diagnose problems with the MCMC sampler, such as trace plot. @@ -1378,13 +1377,13 @@ results.full.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 2.8 seconds. -#> Chain 2 finished in 2.5 seconds. -#> Chain 3 finished in 2.8 seconds. +#> Chain 1 finished in 3.0 seconds. +#> Chain 2 finished in 2.4 seconds. +#> Chain 3 finished in 2.5 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 2.7 seconds. -#> Total execution time: 8.4 seconds. +#> Mean chain execution time: 2.6 seconds. +#> Total execution time: 8.1 seconds. draws4 <- results.full.psborrow$draws() draws4 <- rename_draws_covariates(draws4, anls_obj) @@ -1397,10 +1396,10 @@ results.full.psborrow$summary() #> # A tibble: 4 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -747. -747. 0.992 0.698 -749. -746. 1.00 13207. -#> 2 beta_trt -1.51 -1.51 0.0761 0.0761 -1.64 -1.39 1.00 11791. -#> 3 alpha 0.534 0.534 0.0464 0.0459 0.456 0.609 1.00 11410. -#> 4 HR_trt 0.221 0.221 0.0169 0.0168 0.194 0.250 1.00 11791. +#> 1 lp__ -747. -747. 1.00 0.709 -749. -746. 1.00 13222. +#> 2 beta_trt -1.51 -1.51 0.0763 0.0755 -1.64 -1.39 1.00 11880. +#> 3 alpha 0.535 0.535 0.0465 0.0465 0.458 0.610 1.00 11478. +#> 4 HR_trt 0.221 0.220 0.0169 0.0166 0.194 0.249 1.00 11880. #> # ℹ 1 more variable: ess_tail ``` @@ -1429,7 +1428,7 @@ summarize_draws(draws4, ~ quantile(.x, probs = c(0.025, 0.975))) #> #> 1 lp__ -750. -746. #> 2 treatment log HR -1.66 -1.36 -#> 3 baseline log hazard rate 0.441 0.624 +#> 3 baseline log hazard rate 0.442 0.623 #> 4 treatment HR 0.190 0.256 ``` @@ -1554,9 +1553,22 @@ colnames(out)[2:4] = c("Hazard Ratio","Lower 95% CI","Upper 95% CI") The hazard ratio estimates and $95\%$ credible intervals for all the methods are shown below. -``` -#> Error in add_footnote(x, c("CI: Credible Interval for Bayesian methods and Confidence interval for Frequentist method"), : could not find function "add_footnote" -``` + +|Method | Hazard Ratio| Lower 95% CI| Upper 95% CI| +|:-------------------------------------------------------------------------------|------------:|------------:|------------:| +|Exponential distribution (constant hazard) and gamma prior | 0.7035| 0.5652| 0.8648| +|Piecewise exponential distribution (proportional hazard) and power prior | 0.6594| 0.5342| 0.8055| +|Weibull distribution (proportional hazard) and gamma prior | 0.7453| 0.5987| 0.9216| +|Piecewise exponential distribution (proportional hazard) and commensurate prior | 0.7039| 0.5634| 0.8713| +|Exponential distribution (constant hazard): No borrowing | 0.7043| 0.5651| 0.8718| +|Exponential distribution (constant hazard): Full borrowing | 0.2210| 0.1898| 0.2558| +|Cox model (Frequentist approach): No borrowing | 0.7757| 0.6246| 0.9633| +|Cox model (Frequentist approach): Full borrowing | 0.3327| 0.2845| 0.3890| + +__Note:__ +CI: Credible Interval for Bayesian methods and Confidence interval for Frequentist method + + # References diff --git a/vignettes/original/_match_weight_01_methods.Rmd b/vignettes/original/_match_weight_01_methods.Rmd index 8f63fb5c..d1572273 100644 --- a/vignettes/original/_match_weight_01_methods.Rmd +++ b/vignettes/original/_match_weight_01_methods.Rmd @@ -20,12 +20,12 @@ knitr::opts_chunk$set( ) ``` -Using external control in the analysis of clinical trial data can be challenging as the two data source may be heterogenous. In this document, we demonstrate the use of various matching and weighting techniques using readily available packages in R to adjust for imbalance in baseline covariates between the data sets. +Using external control in the analysis of clinical trial data can be challenging as the two data source may be heterogeneous. In this document, we demonstrate the use of various matching and weighting techniques using readily available packages in R to adjust for imbalance in baseline covariates between the data sets. We first load all of the libraries used in this tutorial. ```{r message=FALSE, warning=FALSE} -#Loading the libraries +# Loading the libraries library(SimMultiCorrData) library(ebal) library(ggplot2) @@ -74,36 +74,42 @@ Other variables in the simulated data include To generate the covariates, we first specify the sample sizes, number of continuous and categorical variables, the marginal moments of the covariates, and the correlation matrix. Note that rcorrvar2 requires the correlation matrix to be ordered as ordinal, continuous, Poisson, and Negative Binomial. ```{r message=FALSE, warning=FALSE} -#Simulate correlated covariates -n_t <- 600 #Sample size in trial data -n_ec <- 500 #Sample size in external control data -k_cont <- 5 #Number of continuous variables -k_cat <- 3 #Number of categorical variables -means_cont_tc <- c(55,150,65,35,50) #Vector of means of continuous variables -vars_cont_tc <- c(15,10,6,5,6) -marginal_tc = list(0.2,0.8,0.3) +# Simulate correlated covariates +n_t <- 600 # Sample size in trial data +n_ec <- 500 # Sample size in external control data +k_cont <- 5 # Number of continuous variables +k_cat <- 3 # Number of categorical variables +means_cont_tc <- c(55, 150, 65, 35, 50) # Vector of means of continuous variables +vars_cont_tc <- c(15, 10, 6, 5, 6) +marginal_tc <- list(0.2, 0.8, 0.3) rho_tc <- matrix(0.2, 8, 8) diag(rho_tc) <- 1 -skews_tc <- rep(0.2,5) -skurts_tc <- rep(0.1,5) +skews_tc <- rep(0.2, 5) +skurts_tc <- rep(0.1, 5) ``` # Simulating trial data After specifying the moments of the covariate distribution, we simulate the covariates using `rcorrvar2` function from `SimMultiCorrData` package. ```{r message=FALSE} -#Simulating covariates -trial.data <- rcorrvar2(n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, - method = "Fleishman", seed=1, - means = means_cont_tc, vars = vars_cont_tc, # if continuous variables - skews = skews_tc, skurts = skurts_tc, - marginal=marginal_tc, rho = rho_tc) +# Simulating covariates +trial.data <- rcorrvar2( + n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, + method = "Fleishman", seed = 1, + means = means_cont_tc, vars = vars_cont_tc, # if continuous variables + skews = skews_tc, skurts = skurts_tc, + marginal = marginal_tc, rho = rho_tc +) ``` ```{r message = FALSE} -trial.data <- data.frame(cbind(id=paste("TRIAL",1:n_t,sep=""), trial.data$continuous_variables, - ifelse(trial.data$ordinal_variables==1,1,0))) -colnames(trial.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomarker2","Smoker", - "Sex", "ECOG1") +trial.data <- data.frame(cbind( + id = paste("TRIAL", 1:n_t, sep = ""), trial.data$continuous_variables, + ifelse(trial.data$ordinal_variables == 1, 1, 0) +)) +colnames(trial.data) <- c( + "ID", "Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", + "Sex", "ECOG1" +) ``` We now simulate the survival outcome data using the Cox proportional hazards model [@cox1972regression] in which the hazard function $\lambda(t|\boldsymbol X, Z)$ is given by $$\lambda(t|\boldsymbol X, Z)=\lambda_0(t) \exp\{\boldsymbol X \boldsymbol \beta_{Trial} + Z \gamma\},$$ @@ -124,22 +130,22 @@ where $\lambda_0(t)=2$ is the baseline hazard function and assumed to be constan The survival function is given by $$S(t|\boldsymbol X, Z)=exp\{-\Lambda(t|\boldsymbol X, Z)\},$$ where $\Lambda(t|\boldsymbol X, Z)=\int_0^t\lambda(u|\boldsymbol X, Z)du$. The time to event is generated using a inverse CDF method. The censoring time is generated independently from an exponential distribution with `rate=1/4`. ```{r message=FALSE, warning=FALSE} -#Simulate survival outcome using Cox proportional hazards regression model +# Simulate survival outcome using Cox proportional hazards regression model set.seed(1) u <- runif(1) -lambda0 <- 2 #constant baseline hazard -#Simulate treatment indicator in the trial data -trial.data$group <- rbinom(n_t,1,prob=0.7) -beta <- c(0.3,0.1,-0.3,-0.2,-0.12,0.3,1,-1,-0.4) -times <- -log(u)/(lambda0*exp(as.matrix(trial.data[,-1])%*%beta)) #Inverse CDF method -cens.time <- rexp(n_t,rate=1/4) #Censoring time from exponential distribution -event <- as.numeric(times <= cens.time) #Event indicator. 0 is censored. -time <- pmin(times,cens.time) +lambda0 <- 2 # constant baseline hazard +# Simulate treatment indicator in the trial data +trial.data$group <- rbinom(n_t, 1, prob = 0.7) +beta <- c(0.3, 0.1, -0.3, -0.2, -0.12, 0.3, 1, -1, -0.4) +times <- -log(u) / (lambda0 * exp(as.matrix(trial.data[, -1]) %*% beta)) # Inverse CDF method +cens.time <- rexp(n_t, rate = 1 / 4) # Censoring time from exponential distribution +event <- as.numeric(times <= cens.time) # Event indicator. 0 is censored. +time <- pmin(times, cens.time) ``` ```{r message=FALSE} -#Combine trial data -trial.data <- data.frame(trial.data,time,event,data="TRIAL") +# Combine trial data +trial.data <- data.frame(trial.data, time, event, data = "TRIAL") ``` The first 10 observations in the RCT data is shown below. ```{r, echo=FALSE, results='asis'} @@ -147,7 +153,7 @@ knitr::kable(head(trial.data, 10)) ``` The censoring and event rate in the RCT data is ```{r message=FALSE} -table(trial.data$event)/nrow(trial.data) +table(trial.data$event) / nrow(trial.data) ``` The distribution of the outcome time is @@ -158,56 +164,61 @@ summary(trial.data$time) The summary of each of the baseline covariates and their standardized mean difference between treatment arms is shown below. ```{r message=FALSE} -myVars <- c("Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", - "ECOG1") +myVars <- c( + "Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", + "ECOG1" +) -## Vector of categorical variables +## Vector of categorical variables catVars <- c("Smoker", "Sex", "ECOG1", "group") -tab1 <- CreateTableOne(vars = myVars, strata = "group" , data = trial.data, factorVars = catVars) +tab1 <- CreateTableOne(vars = myVars, strata = "group", data = trial.data, factorVars = catVars) ``` ```{r message=FALSE} -print(tab1,smd=TRUE) +print(tab1, smd = TRUE) ``` # Simulating external control data The same set of covariates $X$ were simulated for the external control data as the RCT. The means, variances for continuous variables and proportion for categorical variables are modified for the external controls compared to the RCT according to the code below. ```{r message=FALSE} -means_cont_ec <- c(55+2,150-2,65-2,35+2,50-2) #Vector of means of continuous variables -vars_cont_ec <- c(14,10,5,5,5) -marginal_ec = list(0.3,0.7,0.4) -ext.cont.data <- rcorrvar2(n = n_ec, k_cont = k_cont, k_cat = k_cat, k_nb = 0, - method = "Fleishman", seed=3, - means = means_cont_ec, vars = vars_cont_ec, # if continuous variables - skews = skews_tc, skurts = skurts_tc, - marginal=marginal_ec, rho = rho_tc) - +means_cont_ec <- c(55 + 2, 150 - 2, 65 - 2, 35 + 2, 50 - 2) # Vector of means of continuous variables +vars_cont_ec <- c(14, 10, 5, 5, 5) +marginal_ec <- list(0.3, 0.7, 0.4) +ext.cont.data <- rcorrvar2( + n = n_ec, k_cont = k_cont, k_cat = k_cat, k_nb = 0, + method = "Fleishman", seed = 3, + means = means_cont_ec, vars = vars_cont_ec, # if continuous variables + skews = skews_tc, skurts = skurts_tc, + marginal = marginal_ec, rho = rho_tc +) ``` ```{r message = FALSE} -ext.cont.data <- data.frame(cbind(id=paste("EC",1:n_ec,sep=""), ext.cont.data$continuous_variables, - ifelse(ext.cont.data$ordinal_variables==1,1,0))) -colnames(ext.cont.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomarker2","Smoker", "Sex", "ECOG1") +ext.cont.data <- data.frame(cbind( + id = paste("EC", 1:n_ec, sep = ""), ext.cont.data$continuous_variables, + ifelse(ext.cont.data$ordinal_variables == 1, 1, 0) +)) +colnames(ext.cont.data) <- c("ID", "Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", "ECOG1") ``` The same generating mechanism used for the RCT data was used to simulate survival time for the external controls. ```{r message=FALSE} -#Simulate survival outcome using Cox proportional hazards regression model +# Simulate survival outcome using Cox proportional hazards regression model set.seed(1111) u <- runif(1) -lambda0 <- 6 #constant baseline hazard -beta <- c(-0.27,-0.1,0.3,0.2,0.1,-0.31,-1,1) -times <- -log(u)/(lambda0*exp(as.matrix(ext.cont.data[,-1])%*%beta)) #Inverse CDF method -cens.time <- rexp(n_ec,rate=3) #Censoring time from exponential distribution -event <- as.numeric(times <= cens.time) #Event indicator. 0 is censored. -time <- pmin(times,cens.time) - -#Simulate treatment indicator in the trial data +lambda0 <- 6 # constant baseline hazard +beta <- c(-0.27, -0.1, 0.3, 0.2, 0.1, -0.31, -1, 1) +times <- -log(u) / (lambda0 * exp(as.matrix(ext.cont.data[, -1]) %*% beta)) # Inverse CDF method +cens.time <- rexp(n_ec, rate = 3) # Censoring time from exponential distribution +event <- as.numeric(times <= cens.time) # Event indicator. 0 is censored. +time <- pmin(times, cens.time) + +# Simulate treatment indicator in the trial data group <- 0 -ext.cont.data <- data.frame(ext.cont.data,group,time,event,data="EC") +ext.cont.data <- data.frame(ext.cont.data, group, time, event, data = "EC") ``` The first 10 observations in the external control data is shown below. ```{r message=FALSE} @@ -216,7 +227,7 @@ knitr::kable(head(ext.cont.data, 10)) The censoring and event rate in the trial data is ```{r message=FALSE} -table(ext.cont.data$event)/nrow(ext.cont.data) +table(ext.cont.data$event) / nrow(ext.cont.data) ``` The distribution of the outcome time is @@ -233,13 +244,13 @@ names(ext.cont.data) ```{r message=FALSE} names(trial.data) -#MS I recommend adding a statement that forces the colnames to be the same, like names(ex.cont.data)[1:10]=names(trial.data)[1:10] +# MS I recommend adding a statement that forces the colnames to be the same, like names(ex.cont.data)[1:10]=names(trial.data)[1:10] ``` Now, we merge the two datasets. ```{r message=FALSE} -final.data <- data.frame(bind_rows(trial.data,ext.cont.data)) +final.data <- data.frame(bind_rows(trial.data, ext.cont.data)) ``` @@ -254,11 +265,11 @@ knitr::kable(tail(final.data, 10)) We examine the standardized mean difference for covariates between the RCT and external control data before conducting matching/weighting. Note that `strata="data"` in the following code. ```{r message=FALSE} -tab2 <- CreateTableOne(vars = myVars, strata = "data" , data = final.data, factorVars = catVars) +tab2 <- CreateTableOne(vars = myVars, strata = "data", data = final.data, factorVars = catVars) ``` ```{r message=FALSE} -print(tab2,smd=TRUE) +print(tab2, smd = TRUE) ``` Note that the standardized mean difference for all covariates is large. Next, we will conduct matching/weighting approach to reduce difference in baseline characteristics. @@ -266,11 +277,11 @@ Note that the standardized mean difference for all covariates is large. Next, we We also examine the standardized mean difference for covariates between the treated and control patients before conducting matching/weighting. Note that `strata="group"` in the following code. ```{r message=FALSE} -tab2 <- CreateTableOne(vars = myVars, strata = "group" , data = final.data, factorVars = catVars) +tab2 <- CreateTableOne(vars = myVars, strata = "group", data = final.data, factorVars = catVars) ``` ```{r message=FALSE} -print(tab2,smd=TRUE) +print(tab2, smd = TRUE) ``` # Propensity scores overlap @@ -278,18 +289,21 @@ print(tab2,smd=TRUE) Before applying the matching/weighting methods, we investigate the overlapping of propensity scores. The overlapping coefficient is only $0.19$ indicating a very small overlap. ```{r message=FALSE} -final.data$indicator <- ifelse(final.data$data=="TRIAL",1,0) -ps.logit <- glm(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, data = final.data, - family=binomial) -psfit=predict(ps.logit,type = "response",data=final.data) +final.data$indicator <- ifelse(final.data$data == "TRIAL", 1, 0) +ps.logit <- glm( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + data = final.data, + family = binomial +) +psfit <- predict(ps.logit, type = "response", data = final.data) -ps_trial <- psfit[final.data$indicator==1] -ps_extcont <- psfit[final.data$indicator==0] +ps_trial <- psfit[final.data$indicator == 1] +ps_extcont <- psfit[final.data$indicator == 0] ``` ```{r, fig.width=10,fig.height=8, fig.align = "center"} -overlap(list(ps_trial=ps_trial, ps_extcont=ps_extcont),plot=TRUE) +overlap(list(ps_trial = ps_trial, ps_extcont = ps_extcont), plot = TRUE) ``` # Matching Methods @@ -306,32 +320,35 @@ We will explore several matching methods to balance the balance the baseline cha All of the matching methods can be conducted using the `MatchIt` package. The matching is conducted between the RCT subjects and external control subjects. Hence, we introduce a variable named `indicator` in `final.data` to represent the data source indicator. ```{r message=FALSE} -final.data$indicator <- ifelse(final.data$data=="TRIAL",1,0) +final.data$indicator <- ifelse(final.data$data == "TRIAL", 1, 0) ``` ## PSML This matching method is a variation of nearest neighbour or greedy matching that selects matches based on the difference in the logit of the propensity score, up to a certain distance (caliper) [@austin2011optimal]. We selected a caliper width of 0.2 of the standard deviation of the logit of the propensity score, where the propensity score is estimated using a logistic regression. ```{r message=FALSE} -m.out.nearest.ratio1.caliper.lps <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, estimand="ATT",data = final.data, - method="nearest",ratio=1,caliper=0.20, - distance="glm",link="linear.logit",replace=FALSE, - m.order="largest") +m.out.nearest.ratio1.caliper.lps <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + estimand = "ATT", data = final.data, + method = "nearest", ratio = 1, caliper = 0.20, + distance = "glm", link = "linear.logit", replace = FALSE, + m.order = "largest" +) summary(m.out.nearest.ratio1.caliper.lps) ``` ```{r message = FALSE} -final.data$ratio1_caliper_weights_lps = m.out.nearest.ratio1.caliper.lps$weights +final.data$ratio1_caliper_weights_lps <- m.out.nearest.ratio1.caliper.lps$weights ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~ratio1_caliper_weights_lps) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~ratio1_caliper_weights_lps) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` @@ -354,26 +371,29 @@ print(t1,smd=TRUE) This matching method is similar to PSML except the caliper width of 0.2 is based on the standard deviation of the propensity score scale [@stuart2011nonparametric]. ```{r message=FALSE} -m.out.nearest.ratio1.caliper <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, estimand="ATT",data = final.data, - method = "nearest", ratio = 1, caliper=0.2, replace=FALSE) +m.out.nearest.ratio1.caliper <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + estimand = "ATT", data = final.data, + method = "nearest", ratio = 1, caliper = 0.2, replace = FALSE +) summary(m.out.nearest.ratio1.caliper) ``` ```{r message=FALSE} -final.data$ratio1_caliper_weights = m.out.nearest.ratio1.caliper$weights +final.data$ratio1_caliper_weights <- m.out.nearest.ratio1.caliper$weights ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~ratio1_caliper_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~ratio1_caliper_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` @@ -413,27 +433,30 @@ For a treated unit $i$ and a control unit $j$, genetic matching uses the general If $w_k=1$ for all $k$ then the distance is the standard Mahalanobis distance. However, genetic matching estimates the optimal $w_k$s. The default is to maximize the smallest p-value among balance tests for the covariates in the matched sample (both Kolmogorov-Smirnov tests and t-tests for each covariate) [@greifer2020update]. ```{r message=FALSE} -m.out.genetic.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1 ,replace=TRUE, estimand="ATT", - data = final.data, method = "genetic", - ratio = 1,pop.size=200) +m.out.genetic.ratio1 <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + replace = TRUE, estimand = "ATT", + data = final.data, method = "genetic", + ratio = 1, pop.size = 200 +) summary(m.out.genetic.ratio1) ``` ```{r message=FALSE} -final.data$genetic_ratio1_weights = m.out.genetic.ratio1$weights +final.data$genetic_ratio1_weights <- m.out.genetic.ratio1$weights ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~genetic_ratio1_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~genetic_ratio1_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` ## GMW @@ -441,27 +464,30 @@ print(t1,smd=TRUE) We now consider genetic matching without replacement. ```{r message=FALSE} -m.out.genetic.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1 ,replace=FALSE, estimand="ATT", - data = final.data, method = "genetic", - ratio = 1,pop.size=200) +m.out.genetic.ratio1 <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + replace = FALSE, estimand = "ATT", + data = final.data, method = "genetic", + ratio = 1, pop.size = 200 +) summary(m.out.genetic.ratio1) ``` ```{r message=FALSE} -final.data$genetic_ratio1_weights_no_replace = m.out.genetic.ratio1$weights +final.data$genetic_ratio1_weights_no_replace <- m.out.genetic.ratio1$weights ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~genetic_ratio1_weights_no_replace) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~genetic_ratio1_weights_no_replace) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` @@ -484,27 +510,30 @@ print(t1,smd=TRUE) The optimal matching algorithm performs a global minimization of propensity score distance between the RCT subjects and matched external controls [@harris2016brief]. The criterion used is the sum of the absolute pair distances in the matched sample. Optimal pair matching and nearest neighbor matching often yield the same or very similar matched samples and some research has indicated that optimal pair matching is not much better than nearest neighbor matching at yielding balanced matched samples [@greifer2020update]. ```{r message=FALSE} -m.out.optimal.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1 , estimand="ATT", - data = final.data, method = "optimal", - ratio = 1) +m.out.optimal.ratio1 <- matchit( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + estimand = "ATT", + data = final.data, method = "optimal", + ratio = 1 +) summary(m.out.optimal.ratio1) ``` ```{r message=FALSE} -final.data$optimal_ratio1_weights = m.out.optimal.ratio1$weights +final.data$optimal_ratio1_weights <- m.out.optimal.ratio1$weights ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~optimal_ratio1_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~optimal_ratio1_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` @@ -537,57 +566,60 @@ set.seed(1) # Toolkit for Weighting and Analysis of Nonequivalent Groups # https://cran.r-project.org/web/packages/twang/vignettes/twang.pdf -# Model includes non-linear effects and interactions with shrinkage to +# Model includes non-linear effects and interactions with shrinkage to # avoid overfitting - -ps.AOD.ATT <- ps(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, data = final.data, - estimand = "ATT", interaction.depth=3, - shrinkage=0.01, verbose = FALSE, n.trees = 7000, - stop.method = c("es.mean","ks.max")) - -# interaction.dept is the tree depth used in gradient boosting; loosely interpreted as + +ps.AOD.ATT <- ps( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + data = final.data, + estimand = "ATT", interaction.depth = 3, + shrinkage = 0.01, verbose = FALSE, n.trees = 7000, + stop.method = c("es.mean", "ks.max") +) + +# interaction.dept is the tree depth used in gradient boosting; loosely interpreted as # the maximum number of variables that can be included in an interaction # n.trees is the maximum number of gradient boosting iterations to be considered. The # more iterations allows for more nonlienarity and interactions to be considered. -# shrinkage is a numeric value between 0 and 1 denoting the learning rate. Smaller -# values restrict the complexity that is added at each iteration of the gradient -# boosting algorithm. A smaller learning rate requires more iterations (n.trees), but +# shrinkage is a numeric value between 0 and 1 denoting the learning rate. Smaller +# values restrict the complexity that is added at each iteration of the gradient +# boosting algorithm. A smaller learning rate requires more iterations (n.trees), but # adds some protection against model overfit. The default value is 0.01. # windows() # plot(ps.AOD.ATT, plot=5) -# +# # summary(ps.AOD.ATT) -# +# # # Relative influence # summary(ps.AOD.ATT$gbm.obj, n.trees=ps.AOD.ATT$desc$ks.max.ATT$n.trees, plot=FALSE) -# +# # bal.table(ps.AOD.ATT) -final.data$weights_gbm <- ps.AOD.ATT$w[,1] +final.data$weights_gbm <- ps.AOD.ATT$w[, 1] ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~weights_gbm) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~weights_gbm) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` ## EB Entropy balancing is a weighting method to balance the covariates by assigning a scalar weight to each external control observations such that the reweighted groups satisfy a set of balance constraints that are imposed on the sample moments of the covariate distributions [@hainmueller2012entropy]. ```{r message=FALSE} -eb.out <- ebalance(final.data$indicator,final.data[,c(2:9)],max.iterations = 300) -final.data$eb_weights <- rep(1,nrow(final.data)) -final.data$eb_weights[final.data$indicator==0] <- eb.out$w +eb.out <- ebalance(final.data$indicator, final.data[, c(2:9)], max.iterations = 300) +final.data$eb_weights <- rep(1, nrow(final.data)) +final.data$eb_weights[final.data$indicator == 0] <- eb.out$w ``` Note that the entropy balancing method failed to converge. @@ -599,12 +631,12 @@ eb.out$converged We now compare the SMD between the two datasets. By definition, the SMD after EB should be zero. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~eb_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~eb_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` @@ -620,28 +652,31 @@ print(t1,smd=TRUE) ## IPW The propensity score is defined as the probability of a patient being in a trial given the observed baseline covariates. We utilized the ATT weights, which are defined for the IPW as fixing the trial patients weight at unity, and external control patients as $\hat{e}(x)/(1-\hat{e}(x))$ where $\hat{e}(x)$ is estimated using a logistic regression model [@amusa2019examination]. ```{r message=FALSE} -ps.logit <- glm(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + - Sex +ECOG1, data = final.data, - family=binomial) -psfit=predict(ps.logit,type = "response",data=final.data) +ps.logit <- glm( + indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + + Sex + ECOG1, + data = final.data, + family = binomial +) +psfit <- predict(ps.logit, type = "response", data = final.data) -ps_trial <- psfit[final.data$indicator==1] -ps_extcont <- psfit[final.data$indicator==0] +ps_trial <- psfit[final.data$indicator == 1] +ps_extcont <- psfit[final.data$indicator == 0] final.data$invprob_weights <- NA -final.data$invprob_weights[final.data$indicator==0] <- ps_extcont/(1-ps_extcont) -final.data$invprob_weights[final.data$indicator==1] <- ps_trial/ps_trial +final.data$invprob_weights[final.data$indicator == 0] <- ps_extcont / (1 - ps_extcont) +final.data$invprob_weights[final.data$indicator == 1] <- ps_trial / ps_trial ``` We now compare the SMD between the two datasets. ```{r message=FALSE} -svy <- svydesign(id = ~0, data=final.data,weights = ~invprob_weights) -t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) +svy <- svydesign(id = ~0, data = final.data, weights = ~invprob_weights) +t1 <- svyCreateTableOne(vars = myVars, strata = "data", data = svy, factorVars = catVars) ``` ```{r message=FALSE} -print(t1,smd=TRUE) +print(t1, smd = TRUE) ``` @@ -657,7 +692,7 @@ print(t1,smd=TRUE) Next we will investigate balance plots. ```{r message=FALSE} -covs <- data.frame(final.data[,c("Age","Weight","Height","Biomarker1","Biomarker2","Smoker","Sex","ECOG1")]) +covs <- data.frame(final.data[, c("Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", "ECOG1")]) data_with_weights <- final.data ``` @@ -666,118 +701,128 @@ data_with_weights <- final.data We now conduct a balance diagnostic by considering SMD plot. The x-axis of the plot represent the absolute value of the SMD and y-axis represent the list of all covariates. SMD greater than $0.1$ can be considered a sign of imbalance [@zhang2019balance]. Hence, we put a threshold of $0.1$ in the plot with a vertical dashed line. ```{r, fig.width=10,fig.height=8,fig.align = "center"} -love.plot(covs, - treat=data_with_weights$data, - weights = list(NNMPS=data_with_weights$ratio1_caliper_weights, - NNMLPS=data_with_weights$ratio1_caliper_weights_lps, - OPTM=data_with_weights$optimal_ratio1_weights, - GENMATCH=data_with_weights$genetic_ratio1_weights, - GENMATCHW=data_with_weights$genetic_ratio1_weights_no_replace), - thresholds=0.1 ,binary="std",shapes = c("circle filled"), - line=FALSE,estimand="ATT",abs=TRUE, - sample.names = c("PSMR", - "PSML", - "OM", - "GM", - "GMW"), - title="Covariate Balance after matching methods", - s.d.denom="pooled") - +love.plot(covs, + treat = data_with_weights$data, + weights = list( + NNMPS = data_with_weights$ratio1_caliper_weights, + NNMLPS = data_with_weights$ratio1_caliper_weights_lps, + OPTM = data_with_weights$optimal_ratio1_weights, + GENMATCH = data_with_weights$genetic_ratio1_weights, + GENMATCHW = data_with_weights$genetic_ratio1_weights_no_replace + ), + thresholds = 0.1, binary = "std", shapes = c("circle filled"), + line = FALSE, estimand = "ATT", abs = TRUE, + sample.names = c( + "PSMR", + "PSML", + "OM", + "GM", + "GMW" + ), + title = "Covariate Balance after matching methods", + s.d.denom = "pooled" +) ``` # Balance Plots for Weighting Methods ```{r, fig.width=10,fig.height=8,fig.align = "center"} -love.plot(covs, - treat=data_with_weights$data, - weights = list(EB=data_with_weights$eb_weights, - IPW=data_with_weights$invprob_weights, - GBM=data_with_weights$weights_gbm), - thresholds=0.1 ,binary="std",shapes = c("circle filled"), - line=FALSE,estimand="ATT",abs=TRUE, - sample.names = c("EB", - "IPW", - "GBM"), - title="Covariate Balance after weighting methods", - s.d.denom="pooled") - +love.plot(covs, + treat = data_with_weights$data, + weights = list( + EB = data_with_weights$eb_weights, + IPW = data_with_weights$invprob_weights, + GBM = data_with_weights$weights_gbm + ), + thresholds = 0.1, binary = "std", shapes = c("circle filled"), + line = FALSE, estimand = "ATT", abs = TRUE, + sample.names = c( + "EB", + "IPW", + "GBM" + ), + title = "Covariate Balance after weighting methods", + s.d.denom = "pooled" +) ``` We now investigate the effective sample size (ESS) in the trial and external control cohort. ```{r message=FALSE} -#External control -ess.PSML.extcont <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==0])^2) +# External control +ess.PSML.extcont <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 0])^2) -ess.PSMR.extcont <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==0])^2) +ess.PSMR.extcont <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 0])^2) -ess.OM.extcont <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==0])^2) +ess.OM.extcont <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 0])^2) -ess.GM.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0])^2) +ess.GM.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0])^2) -ess.eb.extcont <- (sum(data_with_weights$eb_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$eb_weights[data_with_weights$indicator==0])^2) +ess.eb.extcont <- (sum(data_with_weights$eb_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$eb_weights[data_with_weights$indicator == 0])^2) -ess.ipw.extcont <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$invprob_weights[data_with_weights$indicator==0])^2) +ess.ipw.extcont <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$invprob_weights[data_with_weights$indicator == 0])^2) -ess.gbm.extcont <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$weights_gbm[data_with_weights$indicator==0])^2) +ess.gbm.extcont <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$weights_gbm[data_with_weights$indicator == 0])^2) -ess.genetic.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0])^2) +ess.genetic.extcont <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0])^2) -ess.genetic.no.replace.extcont <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==0]))^2/ - sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==0])^2) +ess.genetic.no.replace.extcont <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 0]))^2 / + sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 0])^2) ``` ```{r message=FALSE} -#Trial -ess.PSML.trial <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==1])^2) +# Trial +ess.PSML.trial <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator == 1])^2) -ess.PSMR.trial <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator==1])^2) +ess.PSMR.trial <- (sum(data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$ratio1_caliper_weights[data_with_weights$indicator == 1])^2) -ess.OM.trial <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator==1])^2) +ess.OM.trial <- (sum(data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$optimal_ratio1_weights[data_with_weights$indicator == 1])^2) -ess.GM.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1])^2) +ess.GM.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1])^2) -ess.eb.trial <- (sum(data_with_weights$eb_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$eb_weights[data_with_weights$indicator==1])^2) +ess.eb.trial <- (sum(data_with_weights$eb_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$eb_weights[data_with_weights$indicator == 1])^2) -ess.ipw.trial <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$invprob_weights[data_with_weights$indicator==1])^2) +ess.ipw.trial <- (sum(data_with_weights$invprob_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$invprob_weights[data_with_weights$indicator == 1])^2) -ess.gbm.trial <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$weights_gbm[data_with_weights$indicator==1])^2) +ess.gbm.trial <- (sum(data_with_weights$weights_gbm[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$weights_gbm[data_with_weights$indicator == 1])^2) -ess.genetic.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==1])^2) -ess.genetic.no.replace.trial <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==1]))^2/ - sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator==1])^2) +ess.genetic.trial <- (sum(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 1])^2) +ess.genetic.no.replace.trial <- (sum(data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 1]))^2 / + sum((data_with_weights$genetic_ratio1_weights_no_replace[data_with_weights$indicator == 1])^2) ``` ```{r message=FALSE} -out.ess <- data.frame(Unadjusted=c(length(which(final.data$data=="TRIAL")),length(which(final.data$data=="EC"))), - PSML=c(ess.PSML.trial,ess.PSML.extcont), - PSMR=c(ess.PSMR.trial,ess.PSMR.extcont), - OM=c(ess.OM.trial,ess.OM.extcont), - GM=c(ess.genetic.trial,ess.genetic.extcont), - GMW=c(ess.genetic.no.replace.trial,ess.genetic.no.replace.extcont), - EB=c(ess.eb.trial,ess.eb.extcont), - IPW=c(ess.ipw.trial,ess.ipw.extcont), - GBM=c(ess.gbm.trial,ess.gbm.extcont)) +out.ess <- data.frame( + Unadjusted = c(length(which(final.data$data == "TRIAL")), length(which(final.data$data == "EC"))), + PSML = c(ess.PSML.trial, ess.PSML.extcont), + PSMR = c(ess.PSMR.trial, ess.PSMR.extcont), + OM = c(ess.OM.trial, ess.OM.extcont), + GM = c(ess.genetic.trial, ess.genetic.extcont), + GMW = c(ess.genetic.no.replace.trial, ess.genetic.no.replace.extcont), + EB = c(ess.eb.trial, ess.eb.extcont), + IPW = c(ess.ipw.trial, ess.ipw.extcont), + GBM = c(ess.gbm.trial, ess.gbm.extcont) +) -rownames(out.ess) <- c("Trial","External Control") +rownames(out.ess) <- c("Trial", "External Control") ``` Note: After applying the matching methods, some patients in RCT were excluded. For demonstration purpose in this article, we will also exclude RCT patients that were not matched in the Bayesian outcome model. However, in reality we may keep the full sample size in RCT and discounting could be done at the second stage with power prior and commensurate prior. Before, moving to the next stage of Bayesian borrowing, ESS also needs to be taken into account. @@ -790,11 +835,11 @@ out.ess We also investigate the histogram of the weights for external control patients and the effective sample size (ESS). ```{r, fig.width=10,fig.height=10, fig.align = "center" } -par(mfrow=c(2,2)) -hist(data_with_weights$eb_weights[data_with_weights$indicator==0],main=paste("EB \n ESS=",round(ess.eb.extcont),sep=""),xlab="Weight") -hist(data_with_weights$invprob_weights[data_with_weights$indicator==0],main=paste("IPW\n ESS=",round(ess.ipw.extcont),sep=""),xlab="Weight") -hist(data_with_weights$weights_gbm[data_with_weights$indicator==0],main=paste("GBM \n ESS=",round(ess.gbm.extcont),sep=""),xlab="Weight") -hist(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator==0],main=paste("GM \n ESS=",round(ess.genetic.extcont),sep=""),xlab="Weight") +par(mfrow = c(2, 2)) +hist(data_with_weights$eb_weights[data_with_weights$indicator == 0], main = paste("EB \n ESS=", round(ess.eb.extcont), sep = ""), xlab = "Weight") +hist(data_with_weights$invprob_weights[data_with_weights$indicator == 0], main = paste("IPW\n ESS=", round(ess.ipw.extcont), sep = ""), xlab = "Weight") +hist(data_with_weights$weights_gbm[data_with_weights$indicator == 0], main = paste("GBM \n ESS=", round(ess.gbm.extcont), sep = ""), xlab = "Weight") +hist(data_with_weights$genetic_ratio1_weights[data_with_weights$indicator == 0], main = paste("GM \n ESS=", round(ess.genetic.extcont), sep = ""), xlab = "Weight") ``` diff --git a/vignettes/original/_match_weight_02_application.Rmd b/vignettes/original/_match_weight_02_application.Rmd index c3bc7793..3778aebb 100644 --- a/vignettes/original/_match_weight_02_application.Rmd +++ b/vignettes/original/_match_weight_02_application.Rmd @@ -42,7 +42,7 @@ We will also consider several Bayesian dynamic borrowing methods * Power prior # Hazard function -The hazard function of $T$ at time $t$ is defined as the instantaneous rate of the event occurence that is still at risk at time $t$. It is defined as +The hazard function of $T$ at time $t$ is defined as the instantaneous rate of the event occurrence that is still at risk at time $t$. It is defined as \begin{equation} h(t)=\lim_\limits{\Delta \rightarrow 0 } P\frac{(t\leq T - %\VignetteIndexEntry{4. Conduct a simulation study} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, echo = FALSE, message = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#", - fig.width = 5, - fig.height = 3, - dpi = 120, - fig.align = "center" -) -options(rmarkdown.html_vignette.check_title = FALSE) -output_hook <- knitr::knit_hooks$get("output") -output_null <- function(x, options) { - return(NULL) -} -``` - -In this article, you'll learn how to use `psborrow2` to create a -simulation study with the goal of informing trial design. - -Note that we'll need `cmdstanr` to run the simulation study. Please install -`cmdstanr` if you have not done so already following -[this guide](https://mc-stan.org/cmdstanr/articles/cmdstanr.html). - -Let's load `psborrow2` to start: - -```{r setup, message = FALSE} -library(psborrow2) -library(cmdstanr) -``` - -# Bringing your own simulated data - -We'll start by showing how to conduct a simulation study when you bring your -own simulated data. To learn how to use `psborrow2` for the data generation -as well, please refer to the [data simulation article (https://genentech.github.io/psborrow2/articles/data_simulation.html)](https://genentech.github.io/psborrow2/articles/data_simulation.html). - -To execute a simulation study with your own data, -we need to build an object of class `Simulation` using the function -`create_simulation_obj()`. Let's look at the arguments to -`create_simulation_obj()` and consider them one-by-one below: - -``` -create_simulation_obj( - data_matrix_list, - outcome, - borrowing, - covariate, - treatment -) - -``` - -## `data_matrix_list` - -`data_matrix_list` is where you input the data you will be using for the -simulation study using the function `sim_data_list()`. - -The first argument is a list of lists of matrices. At the highest level, -we'll index different data generation parameters. At the lowest level, -we'll index different matrices generated with these parameters. - -## `data_list` - -Figure 1 below depicts an example `data_list` object. This object is a -list of lists with two data generation scenarios (e.g., true -HR of 1.0 and true HR of 0.8). Each scenario is arranged as a list of -matrices that were generated according to that data generation scenario. - -
-![Figure 1](data_list.jpg){width=80%} - -
- -We'll use the `simsurv` package to generate survival data and we'll -then put it in a similar format. In this example, we'll vary -two data generation parameters: true HR and drift HR (the HR comparing -external to internal controls). Suppose we have a function, -`sim_single_matrix()` which can simulate data for a single matrix. - -That is: - -```{r class.source = "fold-hide"} -library(simsurv) -# function to create a single matrix -sim_single_matrix <- function(n = 500, # n simulated pts - prob = c( - 0.1, # proportion internal control - 0.2, # proportion internal treated - 0.7 - ), # proportion external control - hr = 0.70, # true HR for the treatment - drift_hr = 1.0, # HR of external/internal - ...) { - # checks - if (sum(prob) != 1.0) { - stop("prob must sum to 1") - } - - # data frame with the subject IDs and treatment group - df_ids <- data.frame( - id = 1:n, - ext = c( - rep(0L, n * (prob[1] + prob[2])), - rep(1L, n * prob[3]) - ), - trt = c( - rep(0L, n * prob[1]), - rep(1L, n * prob[2]), - rep(0L, n * prob[3]) - ) - ) - - # simulated event times - df_surv <- simsurv( - lambdas = 0.1, - dist = "exponential", - betas = c( - trt = log(hr), - ext = log(drift_hr) - ), - x = df_ids, - maxt = 50, - ... - ) - - df_surv$censor <- 1 - df_surv$status - - # merge the simulated event times into data frame - df <- merge(df_ids, df_surv) - df <- df[, c("id", "ext", "trt", "eventtime", "status", "censor")] - colnames(df) <- c("id", "ext", "trt", "time", "status", "cnsr") - return(as.matrix(df)) -} -``` - - -```{r} -set.seed(123) -head(sim_single_matrix(n = 500, hr = 0.5, drift_hr = 1.2)) -``` - -Using this function, let's simulate a list of lists of matrices with four -scenarios: - -- True HR = 0.6, drift HR = 1.0 -- True HR = 1.0, drift HR = 1.0 -- True HR = 0.6, drift HR = 1.5 -- True HR = 1.0, drift HR = 1.5 - -```{r} -# Seed for reproducibility -set.seed(123) - -# Number of simulations per scenario -n <- 100 - -# Create list of lists of data -my_data_list <- list( - replicate(n, - sim_single_matrix(n = 250, hr = 0.6, drift_hr = 1.0), - simplify = FALSE - ), - replicate(n, - sim_single_matrix(n = 250, hr = 1.0, drift_hr = 1.0), - simplify = FALSE - ), - replicate(n, - sim_single_matrix(n = 250, hr = 0.6, drift_hr = 1.5), - simplify = FALSE - ), - replicate(n, - sim_single_matrix(n = 250, hr = 1.0, drift_hr = 1.5), - simplify = FALSE - ) -) -``` - -There are `r NROW(my_data_list)` scenarios. - -```{r} -NROW(my_data_list) -``` - -Each scenario has `r n` matrices. - -```{r} -NROW(my_data_list[[1]]) -``` - -The lowest level of the list of lists is a data matrix. - -```{r} -head(my_data_list[[1]][[1]]) -``` - -### `guide` - -In order to summarize the results from the different parameters in -your simulation study, `psborrow2` needs to know how the simulation parameters -differ. That is the purpose of the argument `guide`, which is a `data.frame` -that distinguishes the simulation study parameters. Three columns are required -in `guide`, though many more can be provided. The three required columns are: - -* The true treatment effect (in our case a HR) -* The true drift effect (in our case a HR). Drift effects >1 will mean -that the external control arm experiences greater hazard than the internal -control arm. -* The name of a column that indexes the `data_list` - -In this example, the `r NROW(my_data_list)` scenarios are -summarized with the below `guide`: - -```{r} -my_sim_data_guide <- expand.grid( - true_hr = c(0.6, 1.0), - drift_hr = c("No drift HR", "Moderate drift HR") -) - -my_sim_data_guide$id <- seq(1, NROW(my_sim_data_guide)) - -my_sim_data_guide -``` - -This guide implies that `my_sim_data_guide[[1]]` is a list of -matrices where the treatment HR was 0.6 and the drift HR was 1.0. - -### `effect`, `drift`, and `index` - -The last three inputs to `sim_data_list()`, `effect`, `drift`, and `index` -are the column names in `guide` that correspond to the true treatment effect, -true drift effect, and index of the `data_list` items, respectively. -For our study, these are `"true_hr"`, `"drift_hr"`, and `"id"`. - -Putting it all together, we can create an object of class `SimDataList`: - -```{r} -my_sim_data_list <- sim_data_list( - data_list = my_data_list, - guide = my_sim_data_guide, - effect = "true_hr", - drift = "drift_hr", - index = "id" -) - -my_sim_data_list -``` - -## `outcome` - -`outcome` is where you pass information on the study outcomes. You can pass -either a single `Outcome` class object (e.g., as produced by `outcome_surv_exponential()`), -or a list of `Outcome` class objects passed to `sim_outcome_list()`. For our -example, let's just use a single exponential distribution. - -```{r} -my_sim_out <- outcome_surv_exponential( - time_var = "time", - cens_var = "cnsr", - baseline_prior = prior_normal(0, 1000) -) - -my_sim_out -``` - -## `borrowing` - -`borrowing` is where we input information on the type of borrowing we want to -evaluate. This can be either a single object of class `Borrowing` or -a list of objects created with `sim_borrowing_list()`. For the sake of example, -let's assume we are interested in comparing four borrowing scenarios: - -* No borrowing -* BDB, conservative hyperprior -* BDB, aggressive hyperprior -* Full borrowing - -How do we specify that we want to evaluate multiple borrowing methods? -We'll use a special list of `Borrowing` objects, -which we'll create through the function `sim_borrowing_list()`. - -```{r} -my_borrowing_list <- sim_borrowing_list( - list( - "No borrowing" = borrowing_none("ext"), - "Full borrowing" = borrowing_full("ext"), - "BDB - conservative" = borrowing_hierarchical_commensurate("ext", prior_gamma(0.001, 0.001)), - "BDB - aggressive" = borrowing_hierarchical_commensurate("ext", prior_gamma(1, 0.001)) - ) -) - -my_borrowing_list -``` - -## `covariate` - -`covariate` is for information on covariate adjustment details. This can be -a single instance of class `Covariates` from `add_covariates()` or a list of -`Covariates` objects created by `sim_covariate_list()` . This is also the only -argument that is not required in `create_simulation_obj`. Let's leave -this argument empty (i.e., let's not adjust for any covariates). - -## `treatment` - -`treatment` is where we input the treatment details for our simulation study. -As with other inputs, this can be a single instance of a class `Treatment`, -or a list of these classes, created with `sim_treatment_list()`. Let's -just use a single instance: - -```{r} -my_sim_treat <- treatment_details("trt", prior_normal(0, 1000)) - -my_sim_treat -``` - -## `create_simulation_obj()` - -Now that we have all of the relevant inputs for `create_simulation_obj()`, -let's call the function, which will generate and compile Stan models -ready to sample on our behalf. - -***Important:*** `psborrow2` will simulate the Cartesian product of all -unique list elements in `data_matrix_list`, `outcome`, `borrowing`, -`covariate`, and `treatment`. We have `r NROW(my_sim_data_guide)` data -generation scenarios and `r NROW(my_borrowing_list@guide)` borrowing -scenarios. The other inputs just have one scenario (or 0 for `covariate`, -which is equivalent to one scenario). This means we should expect -`r NROW(my_sim_data_guide)` $\times$ `r NROW(my_borrowing_list@guide)` -$=$ `r NROW(my_sim_data_guide) * NROW(my_borrowing_list@guide)` -combinations of parameters. Let's create a simulation object of class -`Simulation`: - -```{r echo = FALSE} -knitr::knit_hooks$set(output = output_null) -``` - -```{r} -simulation_obj <- create_simulation_obj( - my_sim_data_list, - outcome = my_sim_out, - borrowing = my_borrowing_list, - treatment = my_sim_treat, - quiet = TRUE -) - -simulation_obj -``` -```{r echo = FALSE} -knitr::knit_hooks$set(output = output_hook) -``` - -While we get a warning about the size of the simulation study, we are not worried -because we are limiting our MCMC samples below in this example. - -We can access the guide to see the specific scenarios that will be -simulated with `show_guide()`: - -```{r} -show_guide(simulation_obj) -``` - -## `mcmc_sample()` - -Now that we've created a simulation object, we're ready to call `mcmc_sample()` -and generate draws for our model. - -***Note*** there is one important additional argument to `mcmc_sample()` for -simulation objects: `posterior_quantiles`. This numeric vector of length 2 -specifies the quantiles for null coverage and true coverage. For instance, -95% credible coverage would be estimated with -`posterior_quantiles = c(0.025, 0.975)`, the default argument. - -```{r echo = FALSE} -knitr::knit_hooks$set(output = output_null) -``` - -```{r} -simulation_res <- mcmc_sample( - simulation_obj, - posterior_quantiles = c(0.025, 0.975), - iter_warmup = 400, - iter_sampling = 1000, - chains = 1L, - seed = 112233 -) -``` - -```{r echo = FALSE} -knitr::knit_hooks$set(output = output_hook) -``` - -***Note*** unlike an analysis on a single dataset, `mcmc_sample()` does not -return a `CmdStanModel` object when applied to a simulation study object. -Instead, it returns a class unique to simulation study results: -`MCMCSimulationResult`. - -```{r} -class(simulation_res) -``` - -Let's look at the performance of our simulation study by extracting the data.frame -that summarizes results, `get_results()`: - -```{r} -simulation_res_df <- get_results(simulation_res) -head(simulation_res_df) -``` - -Let's quickly visualize the results using `ggplot2`. We will first -load `ggplot2` and factorize our borrowing scenarios: - -```{r} -# Load ggplot2 -library(ggplot2) - -# Factorize -simulation_res_df$borrowing_scenario <- factor(simulation_res_df$borrowing_scenario, - levels = c( - "No borrowing", - "BDB - conservative", - "BDB - aggressive", - "Full borrowing" - ) -) -``` - -### MSE - -```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} -ggplot(simulation_res_df) + - geom_bar(aes(x = factor(true_hr), fill = borrowing_scenario, y = mse_mean), - stat = "identity", position = "dodge" - ) + - labs( - fill = "Borrowing scenario", - x = "True HR", - y = "MSE" - ) + - facet_wrap(~drift_hr) + - scale_fill_manual(values = c("#EF798A", "#F7A9A8", "#7D82B8", "#613F75")) -``` - -### Type I error - -Because we included a true HR of 1.0, we can evaluate type I error by looking -at the compliment to the true parameter coverage: - -```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} -ggplot(simulation_res_df[simulation_res_df$true_hr == 1.0, ]) + - geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = 1 - true_coverage), - stat = "identity", position = "dodge" - ) + - labs( - fill = "Borrowing scenario", - x = "drift HR", - y = "Type I error" - ) + - scale_fill_manual(values = c("#EF798A", "#F7A9A8", "#7D82B8", "#613F75")) + - scale_y_continuous(breaks = seq(0, 1, .1), limits = c(0, 1)) + - geom_hline(aes(yintercept = 0.05), linetype = 2) -``` - -### Power - -We can include power by looking at the results for our true simulation of 0.6. - -```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} -ggplot(simulation_res_df[simulation_res_df$true_hr == 0.6, ]) + - geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = 1 - null_coverage), - stat = "identity", position = "dodge" - ) + - labs( - fill = "Borrowing scenario", - x = "drift HR", - y = "Power" - ) + - scale_fill_manual(values = c("#EF798A", "#F7A9A8", "#7D82B8", "#613F75")) + - scale_y_continuous(breaks = seq(0, 1, .1), limits = c(0, 1)) + - geom_hline(aes(yintercept = 0.80), linetype = 2) -``` - - -### EHSS - -We can calculate the external historical sample size (EHSS) based on the simulation results. - -```{r class.source = "fold-hide", fig.dim = c(8, 4), dpi = 140} -var_mat <- do.call(rbind, simulation_res@results$trt_var) -N_internalcontrol <- 250*0.3 - -simulation_res_df$EHSS <- rowMeans(var_mat[rep(1:4, 4), ]/var_mat-1)*N_internalcontrol -simulation_res_df2 <- simulation_res_df[simulation_res_df$borrowing_scenario!="No borrowing", ] - -ggplot(simulation_res_df2) + - geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = EHSS), - stat = "identity", position = "dodge" - ) + - labs( - fill = "Borrowing scenario", - x = "Drift HR", - y = "EHSS" - ) + - facet_grid(~true_hr)+ - scale_fill_manual(values = c("#F7A9A8", "#7D82B8", "#613F75")) + - scale_y_continuous(breaks = c(0,30,60,N_internalcontrol, 90,120)) + - geom_hline(aes(yintercept = N_internalcontrol), linetype = 2) -``` - - -### Optimal Accuracy Design - -We can also compare designs for accuracy on the basis of their Euclidean distance to the ideal design by minimizing the type I and type II errors [@zabor2022optimal]. We can visualize the accuracy of any design using a scatterplot displaying type I error rate on the x-axis and power (1 -type II error rate) on the y-axis. A perfectly accurate design approaches the point (0, 1), representing no type I error and power of one. - -In the real design of clinical trial, we can designate probability to scenarios of drift hazard ratio and obtain weighted Euclidean distance to the ideal design. An example shows the probability of moderate drift hazard ratio is 0.2 and the probability of no drift hazard ratio is 0.8. - -*results weighted by (0.8, 0.2) for no drift and moderate drift - - -```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} -df_accuracy <- data.frame(simulation_res_df2[simulation_res_df2$true_hr == 1.0, - c("drift_hr", "borrowing_scenario")], - typeI = 1 - simulation_res_df2[simulation_res_df2$true_hr == 1.0, - c("true_coverage")], - Power = 1 - simulation_res_df2[simulation_res_df2$true_hr == 0.6, - "null_coverage"]) -df_accuracy$weights <- rep(c(0.8, 0.2), 3) - -df_sum <- data.frame(aggregate(df_accuracy$typeI*df_accuracy$weights, - list(df_accuracy$borrowing_scenario), sum), - aggregate(df_accuracy$Power*df_accuracy$weights, - list(df_accuracy$borrowing_scenario), sum)) -df_sum <- df_sum[, c(1,2,4)] -colnames(df_sum) <- c("borrowing_scenario", "typeI", "Power") - -ggplot(df_sum) + - geom_point(aes(x = typeI, color = borrowing_scenario, y = Power), size = 4) + - xlim(c(0,1))+ - ylim(c(0,1))+ - labs( - color = "Borrowing scenario", - x = "Type I Error Rate", - y = "Power" - ) + - scale_color_manual(values = c("#F7A9A8", "#7D82B8", "#613F75")) + - ggtitle("Optimal Accuracy Design") - -``` - - -# References +--- +title: "4. Conduct a simulation study" +bibliography: references.bib +author: "Matt Secrest, Isaac Gravestock, Kexin Jin and Zhi Yang" +output: + rmarkdown::html_vignette: + toc: true + toc_depth: 3 + code_folding: show +vignette: > + %\VignetteIndexEntry{4. Conduct a simulation study} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, echo = FALSE, message = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#", + fig.width = 5, + fig.height = 3, + dpi = 120, + fig.align = "center" +) +options(rmarkdown.html_vignette.check_title = FALSE) +output_hook <- knitr::knit_hooks$get("output") +output_null <- function(x, options) { + return(NULL) +} +``` + +In this article, you'll learn how to use `psborrow2` to create a +simulation study with the goal of informing trial design. + +Note that we'll need `cmdstanr` to run the simulation study. Please install +`cmdstanr` if you have not done so already following +[this guide](https://mc-stan.org/cmdstanr/articles/cmdstanr.html). + +Let's load `psborrow2` to start: + +```{r setup, message = FALSE} +library(psborrow2) +library(cmdstanr) +``` + +# Bringing your own simulated data + +We'll start by showing how to conduct a simulation study when you bring your +own simulated data. To learn how to use `psborrow2` for the data generation +as well, please refer to the [data simulation article (https://genentech.github.io/psborrow2/articles/data_simulation.html)](https://genentech.github.io/psborrow2/articles/data_simulation.html). + +To execute a simulation study with your own data, +we need to build an object of class `Simulation` using the function +`create_simulation_obj()`. Let's look at the arguments to +`create_simulation_obj()` and consider them one-by-one below: + +``` +create_simulation_obj( + data_matrix_list, + outcome, + borrowing, + covariate, + treatment +) + +``` + +## `data_matrix_list` + +`data_matrix_list` is where you input the data you will be using for the +simulation study using the function `sim_data_list()`. + +The first argument is a list of lists of matrices. At the highest level, +we'll index different data generation parameters. At the lowest level, +we'll index different matrices generated with these parameters. + +## `data_list` + +Figure 1 below depicts an example `data_list` object. This object is a +list of lists with two data generation scenarios (e.g., true +HR of 1.0 and true HR of 0.8). Each scenario is arranged as a list of +matrices that were generated according to that data generation scenario. + +
+![Figure 1](data_list.jpg){width=80%} + +
+ +We'll use the `simsurv` package to generate survival data and we'll +then put it in a similar format. In this example, we'll vary +two data generation parameters: true HR and drift HR (the HR comparing +external to internal controls). Suppose we have a function, +`sim_single_matrix()` which can simulate data for a single matrix. + +That is: + +```{r class.source = "fold-hide"} +library(simsurv) +# function to create a single matrix +sim_single_matrix <- function(n = 500, # n simulated pts + prob = c( + 0.1, # proportion internal control + 0.2, # proportion internal treated + 0.7 + ), # proportion external control + hr = 0.70, # true HR for the treatment + drift_hr = 1.0, # HR of external/internal + ...) { + # checks + if (sum(prob) != 1.0) { + stop("prob must sum to 1") + } + + # data frame with the subject IDs and treatment group + df_ids <- data.frame( + id = 1:n, + ext = c( + rep(0L, n * (prob[1] + prob[2])), + rep(1L, n * prob[3]) + ), + trt = c( + rep(0L, n * prob[1]), + rep(1L, n * prob[2]), + rep(0L, n * prob[3]) + ) + ) + + # simulated event times + df_surv <- simsurv( + lambdas = 0.1, + dist = "exponential", + betas = c( + trt = log(hr), + ext = log(drift_hr) + ), + x = df_ids, + maxt = 50, + ... + ) + + df_surv$censor <- 1 - df_surv$status + + # merge the simulated event times into data frame + df <- merge(df_ids, df_surv) + df <- df[, c("id", "ext", "trt", "eventtime", "status", "censor")] + colnames(df) <- c("id", "ext", "trt", "time", "status", "cnsr") + return(as.matrix(df)) +} +``` + + +```{r} +set.seed(123) +head(sim_single_matrix(n = 500, hr = 0.5, drift_hr = 1.2)) +``` + +Using this function, let's simulate a list of lists of matrices with four +scenarios: + +- True HR = 0.6, drift HR = 1.0 +- True HR = 1.0, drift HR = 1.0 +- True HR = 0.6, drift HR = 1.5 +- True HR = 1.0, drift HR = 1.5 + +```{r} +# Seed for reproducibility +set.seed(123) + +# Number of simulations per scenario +n <- 100 + +# Create list of lists of data +my_data_list <- list( + replicate(n, + sim_single_matrix(n = 250, hr = 0.6, drift_hr = 1.0), + simplify = FALSE + ), + replicate(n, + sim_single_matrix(n = 250, hr = 1.0, drift_hr = 1.0), + simplify = FALSE + ), + replicate(n, + sim_single_matrix(n = 250, hr = 0.6, drift_hr = 1.5), + simplify = FALSE + ), + replicate(n, + sim_single_matrix(n = 250, hr = 1.0, drift_hr = 1.5), + simplify = FALSE + ) +) +``` + +There are `r NROW(my_data_list)` scenarios. + +```{r} +NROW(my_data_list) +``` + +Each scenario has `r n` matrices. + +```{r} +NROW(my_data_list[[1]]) +``` + +The lowest level of the list of lists is a data matrix. + +```{r} +head(my_data_list[[1]][[1]]) +``` + +### `guide` + +In order to summarize the results from the different parameters in +your simulation study, `psborrow2` needs to know how the simulation parameters +differ. That is the purpose of the argument `guide`, which is a `data.frame` +that distinguishes the simulation study parameters. Three columns are required +in `guide`, though many more can be provided. The three required columns are: + +* The true treatment effect (in our case a HR) +* The true drift effect (in our case a HR). Drift effects >1 will mean +that the external control arm experiences greater hazard than the internal +control arm. +* The name of a column that indexes the `data_list` + +In this example, the `r NROW(my_data_list)` scenarios are +summarized with the below `guide`: + +```{r} +my_sim_data_guide <- expand.grid( + true_hr = c(0.6, 1.0), + drift_hr = c("No drift HR", "Moderate drift HR") +) + +my_sim_data_guide$id <- seq(1, NROW(my_sim_data_guide)) + +my_sim_data_guide +``` + +This guide implies that `my_sim_data_guide[[1]]` is a list of +matrices where the treatment HR was 0.6 and the drift HR was 1.0. + +### `effect`, `drift`, and `index` + +The last three inputs to `sim_data_list()`, `effect`, `drift`, and `index` +are the column names in `guide` that correspond to the true treatment effect, +true drift effect, and index of the `data_list` items, respectively. +For our study, these are `"true_hr"`, `"drift_hr"`, and `"id"`. + +Putting it all together, we can create an object of class `SimDataList`: + +```{r} +my_sim_data_list <- sim_data_list( + data_list = my_data_list, + guide = my_sim_data_guide, + effect = "true_hr", + drift = "drift_hr", + index = "id" +) + +my_sim_data_list +``` + +## `outcome` + +`outcome` is where you pass information on the study outcomes. You can pass +either a single `Outcome` class object (e.g., as produced by `outcome_surv_exponential()`), +or a list of `Outcome` class objects passed to `sim_outcome_list()`. For our +example, let's just use a single exponential distribution. + +```{r} +my_sim_out <- outcome_surv_exponential( + time_var = "time", + cens_var = "cnsr", + baseline_prior = prior_normal(0, 1000) +) + +my_sim_out +``` + +## `borrowing` + +`borrowing` is where we input information on the type of borrowing we want to +evaluate. This can be either a single object of class `Borrowing` or +a list of objects created with `sim_borrowing_list()`. For the sake of example, +let's assume we are interested in comparing four borrowing scenarios: + +* No borrowing +* BDB, conservative hyperprior +* BDB, aggressive hyperprior +* Full borrowing + +How do we specify that we want to evaluate multiple borrowing methods? +We'll use a special list of `Borrowing` objects, +which we'll create through the function `sim_borrowing_list()`. + +```{r} +my_borrowing_list <- sim_borrowing_list( + list( + "No borrowing" = borrowing_none("ext"), + "Full borrowing" = borrowing_full("ext"), + "BDB - conservative" = borrowing_hierarchical_commensurate("ext", prior_gamma(0.001, 0.001)), + "BDB - aggressive" = borrowing_hierarchical_commensurate("ext", prior_gamma(1, 0.001)) + ) +) + +my_borrowing_list +``` + +## `covariate` + +`covariate` is for information on covariate adjustment details. This can be +a single instance of class `Covariates` from `add_covariates()` or a list of +`Covariates` objects created by `sim_covariate_list()` . This is also the only +argument that is not required in `create_simulation_obj`. Let's leave +this argument empty (i.e., let's not adjust for any covariates). + +## `treatment` + +`treatment` is where we input the treatment details for our simulation study. +As with other inputs, this can be a single instance of a class `Treatment`, +or a list of these classes, created with `sim_treatment_list()`. Let's +just use a single instance: + +```{r} +my_sim_treat <- treatment_details("trt", prior_normal(0, 1000)) + +my_sim_treat +``` + +## `create_simulation_obj()` + +Now that we have all of the relevant inputs for `create_simulation_obj()`, +let's call the function, which will generate and compile Stan models +ready to sample on our behalf. + +***Important:*** `psborrow2` will simulate the Cartesian product of all +unique list elements in `data_matrix_list`, `outcome`, `borrowing`, +`covariate`, and `treatment`. We have `r NROW(my_sim_data_guide)` data +generation scenarios and `r NROW(my_borrowing_list@guide)` borrowing +scenarios. The other inputs just have one scenario (or 0 for `covariate`, +which is equivalent to one scenario). This means we should expect +`r NROW(my_sim_data_guide)` $\times$ `r NROW(my_borrowing_list@guide)` +$=$ `r NROW(my_sim_data_guide) * NROW(my_borrowing_list@guide)` +combinations of parameters. Let's create a simulation object of class +`Simulation`: + +```{r echo = FALSE} +knitr::knit_hooks$set(output = output_null) +``` + +```{r} +simulation_obj <- create_simulation_obj( + my_sim_data_list, + outcome = my_sim_out, + borrowing = my_borrowing_list, + treatment = my_sim_treat, + quiet = TRUE +) + +simulation_obj +``` +```{r echo = FALSE} +knitr::knit_hooks$set(output = output_hook) +``` + +While we get a warning about the size of the simulation study, we are not worried +because we are limiting our MCMC samples below in this example. + +We can access the guide to see the specific scenarios that will be +simulated with `show_guide()`: + +```{r} +show_guide(simulation_obj) +``` + +## `mcmc_sample()` + +Now that we've created a simulation object, we're ready to call `mcmc_sample()` +and generate draws for our model. + +***Note*** there is one important additional argument to `mcmc_sample()` for +simulation objects: `posterior_quantiles`. This numeric vector of length 2 +specifies the quantiles for null coverage and true coverage. For instance, +95% credible coverage would be estimated with +`posterior_quantiles = c(0.025, 0.975)`, the default argument. + +```{r echo = FALSE} +knitr::knit_hooks$set(output = output_null) +``` + +```{r} +simulation_res <- mcmc_sample( + simulation_obj, + posterior_quantiles = c(0.025, 0.975), + iter_warmup = 400, + iter_sampling = 1000, + chains = 1L, + seed = 112233 +) +``` + +```{r echo = FALSE} +knitr::knit_hooks$set(output = output_hook) +``` + +***Note*** unlike an analysis on a single dataset, `mcmc_sample()` does not +return a `CmdStanModel` object when applied to a simulation study object. +Instead, it returns a class unique to simulation study results: +`MCMCSimulationResult`. + +```{r} +class(simulation_res) +``` + +Let's look at the performance of our simulation study by extracting the data.frame +that summarizes results, `get_results()`: + +```{r} +simulation_res_df <- get_results(simulation_res) +head(simulation_res_df) +``` + +Let's quickly visualize the results using `ggplot2`. We will first +load `ggplot2` and factorize our borrowing scenarios: + +```{r} +# Load ggplot2 +library(ggplot2) + +# Factorize +simulation_res_df$borrowing_scenario <- factor(simulation_res_df$borrowing_scenario, + levels = c( + "No borrowing", + "BDB - conservative", + "BDB - aggressive", + "Full borrowing" + ) +) +``` + +### MSE + +```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} +ggplot(simulation_res_df) + + geom_bar(aes(x = factor(true_hr), fill = borrowing_scenario, y = mse_mean), + stat = "identity", position = "dodge" + ) + + labs( + fill = "Borrowing scenario", + x = "True HR", + y = "MSE" + ) + + facet_wrap(~drift_hr) + + scale_fill_manual(values = c("#EF798A", "#F7A9A8", "#7D82B8", "#613F75")) +``` + +### Type I error + +Because we included a true HR of 1.0, we can evaluate type I error by looking +at the compliment to the true parameter coverage: + +```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} +ggplot(simulation_res_df[simulation_res_df$true_hr == 1.0, ]) + + geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = 1 - true_coverage), + stat = "identity", position = "dodge" + ) + + labs( + fill = "Borrowing scenario", + x = "drift HR", + y = "Type I error" + ) + + scale_fill_manual(values = c("#EF798A", "#F7A9A8", "#7D82B8", "#613F75")) + + scale_y_continuous(breaks = seq(0, 1, .1), limits = c(0, 1)) + + geom_hline(aes(yintercept = 0.05), linetype = 2) +``` + +### Power + +We can include power by looking at the results for our true simulation of 0.6. + +```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} +ggplot(simulation_res_df[simulation_res_df$true_hr == 0.6, ]) + + geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = 1 - null_coverage), + stat = "identity", position = "dodge" + ) + + labs( + fill = "Borrowing scenario", + x = "drift HR", + y = "Power" + ) + + scale_fill_manual(values = c("#EF798A", "#F7A9A8", "#7D82B8", "#613F75")) + + scale_y_continuous(breaks = seq(0, 1, .1), limits = c(0, 1)) + + geom_hline(aes(yintercept = 0.80), linetype = 2) +``` + + +### EHSS + +We can calculate the external historical sample size (EHSS) based on the simulation results. + +```{r class.source = "fold-hide", fig.dim = c(8, 4), dpi = 140} +var_mat <- do.call(rbind, simulation_res@results$trt_var) +N_internalcontrol <- 250 * 0.3 + +simulation_res_df$EHSS <- rowMeans(var_mat[rep(1:4, 4), ] / var_mat - 1) * N_internalcontrol +simulation_res_df2 <- simulation_res_df[simulation_res_df$borrowing_scenario != "No borrowing", ] + +ggplot(simulation_res_df2) + + geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = EHSS), + stat = "identity", position = "dodge" + ) + + labs( + fill = "Borrowing scenario", + x = "Drift HR", + y = "EHSS" + ) + + facet_grid(~true_hr) + + scale_fill_manual(values = c("#F7A9A8", "#7D82B8", "#613F75")) + + scale_y_continuous(breaks = c(0, 30, 60, N_internalcontrol, 90, 120)) + + geom_hline(aes(yintercept = N_internalcontrol), linetype = 2) +``` + + +### Optimal Accuracy Design + +We can also compare designs for accuracy on the basis of their Euclidean distance to the ideal design by minimizing the type I and type II errors [@zabor2022optimal]. We can visualize the accuracy of any design using a scatterplot displaying type I error rate on the x-axis and power (1 -type II error rate) on the y-axis. A perfectly accurate design approaches the point (0, 1), representing no type I error and power of one. + +In the real design of clinical trial, we can designate probability to scenarios of drift hazard ratio and obtain weighted Euclidean distance to the ideal design. An example shows the probability of moderate drift hazard ratio is 0.2 and the probability of no drift hazard ratio is 0.8. + +*results weighted by (0.8, 0.2) for no drift and moderate drift + + +```{r class.source = "fold-hide", fig.dim = c(5, 3), dpi = 140} +df_accuracy <- data.frame( + simulation_res_df2[ + simulation_res_df2$true_hr == 1.0, + c("drift_hr", "borrowing_scenario") + ], + typeI = 1 - simulation_res_df2[ + simulation_res_df2$true_hr == 1.0, + c("true_coverage") + ], + Power = 1 - simulation_res_df2[ + simulation_res_df2$true_hr == 0.6, + "null_coverage" + ] +) +df_accuracy$weights <- rep(c(0.8, 0.2), 3) + +df_sum <- data.frame( + aggregate( + df_accuracy$typeI * df_accuracy$weights, + list(df_accuracy$borrowing_scenario), sum + ), + aggregate( + df_accuracy$Power * df_accuracy$weights, + list(df_accuracy$borrowing_scenario), sum + ) +) +df_sum <- df_sum[, c(1, 2, 4)] +colnames(df_sum) <- c("borrowing_scenario", "typeI", "Power") + +ggplot(df_sum) + + geom_point(aes(x = typeI, color = borrowing_scenario, y = Power), size = 4) + + xlim(c(0, 1)) + + ylim(c(0, 1)) + + labs( + color = "Borrowing scenario", + x = "Type I Error Rate", + y = "Power" + ) + + scale_color_manual(values = c("#F7A9A8", "#7D82B8", "#613F75")) + + ggtitle("Optimal Accuracy Design") +``` + + +# References diff --git a/vignettes/propensity_scores.Rmd b/vignettes/propensity_scores.Rmd index 5d5c99e7..204de98f 100644 --- a/vignettes/propensity_scores.Rmd +++ b/vignettes/propensity_scores.Rmd @@ -50,28 +50,28 @@ summary(weightit_model) #> #> - Weight ranges: #> -#> Min Max -#> treated 1.0000 || 1.0000 -#> control 0.0742 |---------------------------| 5.7137 +#> Min Max +#> treated 1. || 1. +#> control 0.083 |---------------------------| 5.897 #> #> - Units with the 5 most extreme weights by group: -#> -#> 5 4 3 2 1 -#> treated 1 1 1 1 1 -#> 195 158 465 438 371 -#> control 2.7859 2.7859 5.7137 5.7137 5.7137 +#> +#> 5 4 3 2 1 +#> treated 1 1 1 1 1 +#> 195 158 465 438 371 +#> control 2.792 2.792 5.897 5.897 5.897 #> #> - Weight statistics: #> -#> Coef of Var MAD Entropy # Zeros -#> treated 0.000 0.000 0.000 0 -#> control 1.564 0.831 0.603 0 +#> Coef of Var MAD Entropy # Zeros +#> treated 0. 0. 0. 0 +#> control 1.582 0.82 0.6 0 #> #> - Effective Sample Sizes: #> #> Control Treated #> Unweighted 350. 150 -#> Weighted 101.76 150 +#> Weighted 100.09 150 ``` Another useful package is [cobalt](https://cran.r-project.org/package=cobalt), which provides tools for @@ -384,13 +384,13 @@ knitr::kable(cbind(models = names(models), results_table), digits = 3) |models |variable | mean| median| sd| q5| q95| |:--------------|:--------|-----:|------:|-----:|-----:|-----:| -|No borrowing |HR_trt | 0.898| 0.878| 0.180| 0.638| 1.223| -|Full borrowing |HR_trt | 0.388| 0.386| 0.049| 0.310| 0.471| -|Power Prior 01 |HR_trt | 0.634| 0.625| 0.106| 0.478| 0.824| -|ATT Weights |HR_trt | 0.891| 0.882| 0.126| 0.697| 1.110| -|IPTW + BDB |HR_trt | 0.893| 0.877| 0.165| 0.653| 1.185| -|Matching 1:1 |HR_trt | 0.819| 0.808| 0.133| 0.620| 1.054| -|BDB |HR_trt | 0.875| 0.857| 0.178| 0.619| 1.194| +|No borrowing |HR_trt | 0.896| 0.878| 0.178| 0.638| 1.216| +|Full borrowing |HR_trt | 0.388| 0.385| 0.049| 0.311| 0.473| +|Power Prior 01 |HR_trt | 0.635| 0.626| 0.105| 0.479| 0.821| +|ATT Weights |HR_trt | 0.892| 0.884| 0.128| 0.697| 1.114| +|IPTW + BDB |HR_trt | 0.894| 0.879| 0.164| 0.653| 1.183| +|Matching 1:1 |HR_trt | 0.818| 0.808| 0.134| 0.619| 1.056| +|BDB |HR_trt | 0.875| 0.857| 0.176| 0.622| 1.187| diff --git a/vignettes/simple_overview.Rmd b/vignettes/simple_overview.Rmd index ecf9f5aa..eb1eec03 100644 --- a/vignettes/simple_overview.Rmd +++ b/vignettes/simple_overview.Rmd @@ -303,12 +303,12 @@ results ``` ## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -## lp__ -1618.00 -1617.68 1.51 1.38 -1620.85 -1616.19 1.00 1747 2189 -## beta_trt -0.16 -0.17 0.20 0.21 -0.48 0.18 1.00 2400 2911 -## alpha[1] -3.35 -3.34 0.16 0.17 -3.64 -3.10 1.00 2292 2763 -## alpha[2] -2.40 -2.40 0.06 0.06 -2.49 -2.30 1.00 2848 2880 -## tau 1.21 0.46 2.02 0.64 0.00 4.88 1.00 1917 1265 -## HR_trt 0.87 0.85 0.18 0.17 0.62 1.20 1.00 2400 2911 +## lp__ -1617.98 -1617.62 1.52 1.30 -1620.95 -1616.18 1.00 1933 1780 +## beta_trt -0.15 -0.16 0.20 0.20 -0.48 0.18 1.00 2167 2493 +## alpha[1] -3.36 -3.36 0.16 0.16 -3.62 -3.10 1.00 2129 2970 +## alpha[2] -2.40 -2.40 0.05 0.05 -2.49 -2.31 1.00 4276 3255 +## tau 1.12 0.46 1.79 0.64 0.00 4.43 1.00 1650 1382 +## HR_trt 0.88 0.86 0.18 0.17 0.62 1.19 1.00 2167 2493 ``` # Interpret results @@ -353,11 +353,11 @@ posterior::summarize_draws(draws, ~ quantile(.x, probs = c(0.025, 0.50, 0.975))) ## variable `2.5%` `50%` `97.5%` ## ## 1 lp__ -1622. -1618. -1616. -## 2 treatment log HR -0.548 -0.165 0.256 -## 3 baseline log hazard rate, internal -3.68 -3.34 -3.06 +## 2 treatment log HR -0.545 -0.156 0.234 +## 3 baseline log hazard rate, internal -3.68 -3.36 -3.05 ## 4 baseline log hazard rate, external -2.51 -2.40 -2.29 -## 5 commensurability parameter 0.000673 0.459 6.90 -## 6 treatment HR 0.578 0.848 1.29 +## 5 commensurability parameter 0.000452 0.456 6.14 +## 6 treatment HR 0.580 0.856 1.26 ``` Look at histogram of draws with bayesplot package @@ -367,7 +367,13 @@ bayesplot::mcmc_hist(draws, c("treatment HR")) ``` ``` -## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. +## Registered S3 method overwritten by 'plyr': +## method from +## [.indexed table1 +``` + +``` +## `stat_bin()` using `bins = 30`. Pick better value `binwidth`. ``` ![plot of chunk unnamed-chunk-18](figure-simple_overview-unnamed-chunk-18-1.png) @@ -398,16 +404,16 @@ table1( -Not treated
(N=50)
-Treated
(N=100)
-Not treated
(N=350)
-Not treated
(N=400)
-Treated
(N=100)
+Not treated
(N=50)
+Treated
(N=100)
+Not treated
(N=350)
+Not treated
(N=400)
+Treated
(N=100)
-cov1 +cov1 @@ -431,7 +437,7 @@ table1( 1.00 [0, 1.00] -cov2 +cov2 @@ -455,7 +461,7 @@ table1( 0 [0, 1.00] -cov3 +cov3 @@ -479,7 +485,7 @@ table1( 1.00 [0, 1.00] -cov4 +cov4 @@ -584,13 +590,13 @@ summarize_draws(draws_ps_no_borrow, ~ quantile(.x, probs = c(0.025, 0.50, 0.975) ## variable `2.5%` `50%` `97.5%` ## ## 1 lp__ -466. -462. -459. -## 2 treatment log HR -0.738 -0.347 0.0689 -## 3 baseline log hazard rate -4.76 -4.20 -3.72 -## 4 ps_cat_low -0.344 0.408 1.13 -## 5 ps_cat_low_med 0.460 0.998 1.61 -## 6 ps_cat_high_med 1.44 2.10 2.80 -## 7 ps_cat_high 2.42 3.01 3.66 -## 8 treatment HR 0.478 0.707 1.07 +## 2 treatment log HR -0.732 -0.355 0.0462 +## 3 baseline log hazard rate -4.73 -4.19 -3.73 +## 4 ps_cat_low -0.314 0.413 1.13 +## 5 ps_cat_low_med 0.436 1.01 1.58 +## 6 ps_cat_high_med 1.43 2.10 2.80 +## 7 ps_cat_high 2.42 3.02 3.64 +## 8 treatment HR 0.481 0.701 1.05 ``` @@ -632,16 +638,16 @@ summarize_draws(draws_ps_bdb, ~ quantile(.x, probs = c(0.025, 0.50, 0.975))) ## # A tibble: 10 × 4 ## variable `2.5%` `50%` `97.5%` ## -## 1 lp__ -1426. -1420. -1418. -## 2 treatment log HR -0.671 -0.350 -0.0350 -## 3 baseline log hazard rate, internal -4.65 -4.19 -3.77 -## 4 baseline log hazard rate, external -4.64 -4.21 -3.80 -## 5 commensurability parameter 0.107 58.0 1659. -## 6 ps_cat_low -0.300 0.274 0.851 -## 7 ps_cat_low_med 0.639 1.07 1.52 -## 8 ps_cat_high_med 1.65 2.10 2.58 -## 9 ps_cat_high 2.54 2.95 3.40 -## 10 treatment HR 0.511 0.705 0.966 +## 1 lp__ -1426. -1421. -1418. +## 2 treatment log HR -0.702 -0.355 -0.0330 +## 3 baseline log hazard rate, internal -4.65 -4.17 -3.75 +## 4 baseline log hazard rate, external -4.66 -4.19 -3.77 +## 5 commensurability parameter 0.119 54.3 1212. +## 6 ps_cat_low -0.364 0.250 0.853 +## 7 ps_cat_low_med 0.589 1.04 1.56 +## 8 ps_cat_high_med 1.62 2.08 2.59 +## 9 ps_cat_high 2.51 2.93 3.43 +## 10 treatment HR 0.496 0.701 0.968 ``` It looks like PS + BDB allowed us to most accurately recover the diff --git a/vignettes/simulation_study.Rmd b/vignettes/simulation_study.Rmd index f3b7ab74..57fc2ba0 100644 --- a/vignettes/simulation_study.Rmd +++ b/vignettes/simulation_study.Rmd @@ -513,19 +513,19 @@ head(simulation_res_df) # 5 0.6 No drift HR 1 100 default # 6 1.0 No drift HR 2 100 default # borrowing_scenario covariate_scenario treatment_scenario trt_var -# 1 No borrowing No adjustment default 0.06359053 -# 2 No borrowing No adjustment default 0.06276921 -# 3 No borrowing No adjustment default 0.06305379 -# 4 No borrowing No adjustment default 0.06286691 -# 5 Full borrowing No adjustment default 0.02635029 -# 6 Full borrowing No adjustment default 0.02537450 +# 1 No borrowing No adjustment default 0.06325259 +# 2 No borrowing No adjustment default 0.06248074 +# 3 No borrowing No adjustment default 0.06315199 +# 4 No borrowing No adjustment default 0.06261805 +# 5 Full borrowing No adjustment default 0.02637975 +# 6 Full borrowing No adjustment default 0.02520886 # mse_mean bias_mean null_coverage true_coverage -# 1 0.05917938 0.06011393 0.59 0.96 -# 2 0.13170714 0.03556530 0.95 0.95 -# 3 0.04730839 0.01311444 0.41 0.96 -# 4 0.12775243 0.02715064 0.96 0.96 -# 5 0.02024202 0.02088567 0.10 0.96 -# 6 0.05035481 0.01011514 0.96 0.96 +# 1 0.05786728 0.05914705 0.56 0.97 +# 2 0.12951999 0.03316452 0.96 0.96 +# 3 0.04697662 0.01242635 0.44 0.96 +# 4 0.12886423 0.02776839 0.96 0.96 +# 5 0.02018223 0.02087946 0.10 0.96 +# 6 0.05019592 0.01050302 0.96 0.96 ``` Let's quickly visualize the results using `ggplot2`. We will first @@ -628,10 +628,10 @@ We can calculate the external historical sample size (EHSS) based on the simulat ```{.r .fold-hide} var_mat <- do.call(rbind, simulation_res@results$trt_var) -N_internalcontrol <- 250*0.3 +N_internalcontrol <- 250 * 0.3 -simulation_res_df$EHSS <- rowMeans(var_mat[rep(1:4, 4), ]/var_mat-1)*N_internalcontrol -simulation_res_df2 <- simulation_res_df[simulation_res_df$borrowing_scenario!="No borrowing", ] +simulation_res_df$EHSS <- rowMeans(var_mat[rep(1:4, 4), ] / var_mat - 1) * N_internalcontrol +simulation_res_df2 <- simulation_res_df[simulation_res_df$borrowing_scenario != "No borrowing", ] ggplot(simulation_res_df2) + geom_bar(aes(x = factor(drift_hr), fill = borrowing_scenario, y = EHSS), @@ -642,9 +642,9 @@ ggplot(simulation_res_df2) + x = "Drift HR", y = "EHSS" ) + - facet_grid(~true_hr)+ + facet_grid(~true_hr) + scale_fill_manual(values = c("#F7A9A8", "#7D82B8", "#613F75")) + - scale_y_continuous(breaks = c(0,30,60,N_internalcontrol, 90,120)) + + scale_y_continuous(breaks = c(0, 30, 60, N_internalcontrol, 90, 120)) + geom_hline(aes(yintercept = N_internalcontrol), linetype = 2) ``` @@ -665,25 +665,39 @@ In the real design of clinical trial, we can designate probability to scenarios ```{.r .fold-hide} -df_accuracy <- data.frame(simulation_res_df2[simulation_res_df2$true_hr == 1.0, - c("drift_hr", "borrowing_scenario")], - typeI = 1 - simulation_res_df2[simulation_res_df2$true_hr == 1.0, - c("true_coverage")], - Power = 1 - simulation_res_df2[simulation_res_df2$true_hr == 0.6, - "null_coverage"]) +df_accuracy <- data.frame( + simulation_res_df2[ + simulation_res_df2$true_hr == 1.0, + c("drift_hr", "borrowing_scenario") + ], + typeI = 1 - simulation_res_df2[ + simulation_res_df2$true_hr == 1.0, + c("true_coverage") + ], + Power = 1 - simulation_res_df2[ + simulation_res_df2$true_hr == 0.6, + "null_coverage" + ] +) df_accuracy$weights <- rep(c(0.8, 0.2), 3) -df_sum <- data.frame(aggregate(df_accuracy$typeI*df_accuracy$weights, - list(df_accuracy$borrowing_scenario), sum), - aggregate(df_accuracy$Power*df_accuracy$weights, - list(df_accuracy$borrowing_scenario), sum)) -df_sum <- df_sum[, c(1,2,4)] +df_sum <- data.frame( + aggregate( + df_accuracy$typeI * df_accuracy$weights, + list(df_accuracy$borrowing_scenario), sum + ), + aggregate( + df_accuracy$Power * df_accuracy$weights, + list(df_accuracy$borrowing_scenario), sum + ) +) +df_sum <- df_sum[, c(1, 2, 4)] colnames(df_sum) <- c("borrowing_scenario", "typeI", "Power") ggplot(df_sum) + geom_point(aes(x = typeI, color = borrowing_scenario, y = Power), size = 4) + - xlim(c(0,1))+ - ylim(c(0,1))+ + xlim(c(0, 1)) + + ylim(c(0, 1)) + labs( color = "Borrowing scenario", x = "Type I Error Rate", diff --git a/vignettes/weighting.Rmd b/vignettes/weighting.Rmd index bc8ca155..1337c121 100644 --- a/vignettes/weighting.Rmd +++ b/vignettes/weighting.Rmd @@ -173,26 +173,26 @@ knitr::kable(wide[new_order, ], digits = 3, row.names = FALSE) | borrowing|variable |est_ci.glm |est_ci.BayesPPD |est_ci.psborrow2 | |---------:|:-----------|:-----------------------|:-----------------------|:-----------------------| -| 0.00|(Intercept) |0.646 (-0.038, 1.357) |0.691 (-0.014, 1.399) |0.662 (-0.015, 1.383) | -| 0.25|(Intercept) |0.394 (-0.131, 0.931) |0.396 (-0.131, 0.941) |0.404 (-0.128, 0.948) | -| 0.50|(Intercept) |0.293 (-0.158, 0.751) |0.297 (-0.184, 0.767) |0.300 (-0.149, 0.767) | -| 0.75|(Intercept) |0.235 (-0.168, 0.642) |0.240 (-0.175, 0.665) |0.242 (-0.153, 0.643) | -| 1.00|(Intercept) |0.196 (-0.172, 0.567) |0.202 (-0.172, 0.578) |0.200 (-0.176, 0.580) | -| 0.00|cov1 |-0.771 (-1.465, -0.095) |-0.809 (-1.515, -0.126) |-0.790 (-1.488, -0.098) | -| 0.25|cov1 |-0.781 (-1.340, -0.231) |-0.793 (-1.350, -0.236) |-0.797 (-1.363, -0.237) | -| 0.50|cov1 |-0.769 (-1.252, -0.291) |-0.776 (-1.271, -0.270) |-0.783 (-1.275, -0.306) | -| 0.75|cov1 |-0.758 (-1.191, -0.329) |-0.769 (-1.212, -0.310) |-0.770 (-1.195, -0.352) | -| 1.00|cov1 |-0.749 (-1.145, -0.357) |-0.761 (-1.152, -0.369) |-0.756 (-1.154, -0.357) | -| 0.00|cov2 |-0.730 (-1.472, -0.008) |-0.745 (-1.496, -0.004) |-0.751 (-1.505, 0.008) | -| 0.25|cov2 |-0.559 (-1.114, -0.014) |-0.568 (-1.124, -0.023) |-0.574 (-1.127, -0.027) | -| 0.50|cov2 |-0.459 (-0.926, 0.003) |-0.471 (-0.953, -0.003) |-0.461 (-0.931, -0.000) | -| 0.75|cov2 |-0.398 (-0.811, 0.011) |-0.402 (-0.814, 0.006) |-0.403 (-0.813, 0.008) | -| 1.00|cov2 |-0.358 (-0.731, 0.013) |-0.358 (-0.736, 0.022) |-0.361 (-0.738, 0.014) | -| 0.00|trt |0.154 (-0.558, 0.871) |0.137 (-0.572, 0.864) |0.156 (-0.555, 0.876) | -| 0.25|trt |0.349 (-0.183, 0.885) |0.361 (-0.170, 0.899) |0.357 (-0.180, 0.912) | -| 0.50|trt |0.405 (-0.082, 0.894) |0.415 (-0.068, 0.916) |0.411 (-0.067, 0.907) | -| 0.75|trt |0.434 (-0.031, 0.900) |0.436 (-0.031, 0.913) |0.438 (-0.024, 0.910) | -| 1.00|trt |0.452 (-0.000, 0.905) |0.456 (-0.010, 0.909) |0.456 (-0.002, 0.930) | +| 0.00|(Intercept) |0.646 (-0.038, 1.357) |0.691 (-0.014, 1.399) |0.661 (-0.036, 1.359) | +| 0.25|(Intercept) |0.394 (-0.131, 0.931) |0.396 (-0.131, 0.941) |0.407 (-0.126, 0.938) | +| 0.50|(Intercept) |0.293 (-0.158, 0.751) |0.297 (-0.184, 0.767) |0.297 (-0.165, 0.760) | +| 0.75|(Intercept) |0.235 (-0.168, 0.642) |0.240 (-0.175, 0.665) |0.244 (-0.157, 0.649) | +| 1.00|(Intercept) |0.196 (-0.172, 0.567) |0.202 (-0.172, 0.578) |0.198 (-0.162, 0.560) | +| 0.00|cov1 |-0.771 (-1.465, -0.095) |-0.809 (-1.515, -0.126) |-0.795 (-1.488, -0.107) | +| 0.25|cov1 |-0.781 (-1.340, -0.231) |-0.793 (-1.350, -0.236) |-0.797 (-1.360, -0.239) | +| 0.50|cov1 |-0.769 (-1.252, -0.291) |-0.776 (-1.271, -0.270) |-0.774 (-1.262, -0.289) | +| 0.75|cov1 |-0.758 (-1.191, -0.329) |-0.769 (-1.212, -0.310) |-0.771 (-1.215, -0.350) | +| 1.00|cov1 |-0.749 (-1.145, -0.357) |-0.761 (-1.152, -0.369) |-0.755 (-1.148, -0.368) | +| 0.00|cov2 |-0.730 (-1.472, -0.008) |-0.745 (-1.496, -0.004) |-0.756 (-1.512, -0.029) | +| 0.25|cov2 |-0.559 (-1.114, -0.014) |-0.568 (-1.124, -0.023) |-0.572 (-1.130, -0.012) | +| 0.50|cov2 |-0.459 (-0.926, 0.003) |-0.471 (-0.953, -0.003) |-0.467 (-0.920, -0.012) | +| 0.75|cov2 |-0.398 (-0.811, 0.011) |-0.402 (-0.814, 0.006) |-0.404 (-0.826, -0.000) | +| 1.00|cov2 |-0.358 (-0.731, 0.013) |-0.358 (-0.736, 0.022) |-0.359 (-0.726, 0.006) | +| 0.00|trt |0.154 (-0.558, 0.871) |0.137 (-0.572, 0.864) |0.164 (-0.555, 0.898) | +| 0.25|trt |0.349 (-0.183, 0.885) |0.361 (-0.170, 0.899) |0.358 (-0.172, 0.890) | +| 0.50|trt |0.405 (-0.082, 0.894) |0.415 (-0.068, 0.916) |0.407 (-0.089, 0.907) | +| 0.75|trt |0.434 (-0.031, 0.900) |0.436 (-0.031, 0.913) |0.434 (-0.031, 0.898) | +| 1.00|trt |0.452 (-0.000, 0.905) |0.456 (-0.010, 0.909) |0.454 (0.006, 0.918) | @@ -376,26 +376,26 @@ knitr::kable(wide[new_order, ], digits = 3, row.names = FALSE) | borrowing|variable |est_ci.glm |est_ci.BayesPPD |est_ci.psborrow2 | |---------:|:-----------|:----------------------|:----------------------|:----------------------| -| 0.00|(Intercept) |1.930 (1.597, 2.263) |1.915 (1.572, 2.236) |1.913 (1.573, 2.230) | -| 0.25|(Intercept) |1.581 (1.323, 1.838) |1.567 (1.308, 1.814) |1.570 (1.313, 1.821) | -| 0.50|(Intercept) |1.473 (1.251, 1.695) |1.466 (1.247, 1.685) |1.466 (1.238, 1.681) | -| 0.75|(Intercept) |1.414 (1.215, 1.613) |1.407 (1.208, 1.601) |1.409 (1.203, 1.597) | -| 1.00|(Intercept) |1.376 (1.194, 1.558) |1.369 (1.183, 1.551) |1.372 (1.187, 1.550) | -| 0.00|cov1 |0.630 (0.300, 0.959) |0.630 (0.304, 0.960) |0.632 (0.306, 0.964) | -| 0.25|cov1 |0.722 (0.453, 0.991) |0.729 (0.459, 1.013) |0.726 (0.457, 0.996) | -| 0.50|cov1 |0.786 (0.552, 1.020) |0.789 (0.559, 1.026) |0.790 (0.558, 1.026) | -| 0.75|cov1 |0.827 (0.616, 1.037) |0.829 (0.622, 1.044) |0.828 (0.617, 1.042) | -| 1.00|cov1 |0.854 (0.662, 1.046) |0.859 (0.668, 1.057) |0.856 (0.661, 1.051) | -| 0.00|cov2 |0.043 (-0.309, 0.395) |0.039 (-0.318, 0.382) |0.038 (-0.318, 0.383) | -| 0.25|cov2 |-0.009 (-0.273, 0.255) |-0.008 (-0.283, 0.252) |-0.010 (-0.267, 0.247) | -| 0.50|cov2 |0.009 (-0.213, 0.232) |0.007 (-0.218, 0.226) |0.006 (-0.221, 0.232) | -| 0.75|cov2 |0.023 (-0.173, 0.220) |0.024 (-0.172, 0.216) |0.023 (-0.176, 0.217) | -| 1.00|cov2 |0.033 (-0.144, 0.211) |0.033 (-0.145, 0.206) |0.034 (-0.149, 0.214) | -| 0.00|trt |1.256 (0.911, 1.601) |1.260 (0.911, 1.629) |1.262 (0.913, 1.612) | -| 0.25|trt |1.564 (1.306, 1.822) |1.563 (1.302, 1.816) |1.566 (1.301, 1.821) | -| 0.50|trt |1.622 (1.386, 1.859) |1.620 (1.383, 1.851) |1.621 (1.381, 1.851) | -| 0.75|trt |1.649 (1.422, 1.875) |1.646 (1.414, 1.871) |1.646 (1.411, 1.876) | -| 1.00|trt |1.664 (1.443, 1.884) |1.660 (1.438, 1.874) |1.659 (1.435, 1.879) | +| 0.00|(Intercept) |1.930 (1.597, 2.263) |1.915 (1.572, 2.236) |1.916 (1.563, 2.245) | +| 0.25|(Intercept) |1.581 (1.323, 1.838) |1.567 (1.308, 1.814) |1.568 (1.311, 1.811) | +| 0.50|(Intercept) |1.473 (1.251, 1.695) |1.466 (1.247, 1.685) |1.466 (1.237, 1.684) | +| 0.75|(Intercept) |1.414 (1.215, 1.613) |1.407 (1.208, 1.601) |1.412 (1.211, 1.604) | +| 1.00|(Intercept) |1.376 (1.194, 1.558) |1.369 (1.183, 1.551) |1.370 (1.188, 1.547) | +| 0.00|cov1 |0.630 (0.300, 0.959) |0.630 (0.304, 0.960) |0.631 (0.298, 0.966) | +| 0.25|cov1 |0.722 (0.453, 0.991) |0.729 (0.459, 1.013) |0.728 (0.461, 1.003) | +| 0.50|cov1 |0.786 (0.552, 1.020) |0.789 (0.559, 1.026) |0.790 (0.556, 1.029) | +| 0.75|cov1 |0.827 (0.616, 1.037) |0.829 (0.622, 1.044) |0.825 (0.618, 1.037) | +| 1.00|cov1 |0.854 (0.662, 1.046) |0.859 (0.668, 1.057) |0.856 (0.667, 1.052) | +| 0.00|cov2 |0.043 (-0.309, 0.395) |0.039 (-0.318, 0.382) |0.036 (-0.322, 0.378) | +| 0.25|cov2 |-0.009 (-0.273, 0.255) |-0.008 (-0.283, 0.252) |-0.011 (-0.272, 0.249) | +| 0.50|cov2 |0.009 (-0.213, 0.232) |0.007 (-0.218, 0.226) |0.006 (-0.213, 0.223) | +| 0.75|cov2 |0.023 (-0.173, 0.220) |0.024 (-0.172, 0.216) |0.022 (-0.175, 0.216) | +| 1.00|cov2 |0.033 (-0.144, 0.211) |0.033 (-0.145, 0.206) |0.034 (-0.140, 0.212) | +| 0.00|trt |1.256 (0.911, 1.601) |1.260 (0.911, 1.629) |1.260 (0.909, 1.613) | +| 0.25|trt |1.564 (1.306, 1.822) |1.563 (1.302, 1.816) |1.564 (1.303, 1.823) | +| 0.50|trt |1.622 (1.386, 1.859) |1.620 (1.383, 1.851) |1.618 (1.382, 1.851) | +| 0.75|trt |1.649 (1.422, 1.875) |1.646 (1.414, 1.871) |1.644 (1.413, 1.867) | +| 1.00|trt |1.664 (1.443, 1.884) |1.660 (1.438, 1.874) |1.660 (1.438, 1.878) |