From 388ef8c7663753bad09180e19805f2fb87f5ed2b Mon Sep 17 00:00:00 2001 From: coffeecookey Date: Mon, 29 Dec 2025 03:14:23 +0530 Subject: [PATCH 01/20] implementing NSE to cube --- R/groupingsets.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/R/groupingsets.R b/R/groupingsets.R index f5fc2101f1..f04c506fc7 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,6 +29,25 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") + #implementing NSE in cube + jj = substitute(j) + usesSD = any(all.vars(jj) == ".SD") + if (usesSD) { + if (missing(.SDcols)) { + .SDcols = names(x)[vapply(x, is.numeric, logical(1L))] + } else { + sub.result = substitute(.SDcols) + if (is.call(sub.result)) { + #.SDcols = eval_with_cols(sub.result, names(x)) + check_var = eval_with_cols(sub.result, names(x)) + if (!is.null(check_var)) { + .SDcols = eval_with_cols(sub.result, names(x)) + } + } + } + } else { + .SDcols = NULL + } # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) From 3e96dfc95a595139f6b6486a6a2924f8aeac6d0f Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Mon, 29 Dec 2025 03:41:39 +0530 Subject: [PATCH 02/20] implementing NSE in cube --- R/groupingsets.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index f04c506fc7..54ddd26194 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,7 +29,7 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") - #implementing NSE in cube + #implementing NSE in cube jj = substitute(j) usesSD = any(all.vars(jj) == ".SD") if (usesSD) { @@ -48,6 +48,7 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { } else { .SDcols = NULL } + # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) From e1eb87a5f60d8292579c326dcdcfbd5b6193903f Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Mon, 29 Dec 2025 03:49:27 +0530 Subject: [PATCH 03/20] removed trailing whitespace --- R/groupingsets.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index 54ddd26194..93e36899e1 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,7 +29,8 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") - #implementing NSE in cube + + #implementing NSE in cube jj = substitute(j) usesSD = any(all.vars(jj) == ".SD") if (usesSD) { @@ -48,7 +49,7 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { } else { .SDcols = NULL } - + # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) From b6adef94fabc78213edb4099594abd5b44fd195b Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Mon, 29 Dec 2025 03:53:21 +0530 Subject: [PATCH 04/20] removed trailing whitespace --- R/groupingsets.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index 93e36899e1..21ec65bd42 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,7 +29,6 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") - #implementing NSE in cube jj = substitute(j) usesSD = any(all.vars(jj) == ".SD") From 47bb2c390b7800a6f712b71a2e23988ba1fae3e1 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Wed, 31 Dec 2025 15:13:26 +0530 Subject: [PATCH 05/20] revised implementation of NSE in cube --- ..Rcheck/00check.log | 13 +++++++++++++ Makevars | 3 +++ R/groupingsets.R | 42 +++++++++++++++++++++++++++++++++++------- 3 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 ..Rcheck/00check.log create mode 100644 Makevars diff --git a/..Rcheck/00check.log b/..Rcheck/00check.log new file mode 100644 index 0000000000..2d743cd4d3 --- /dev/null +++ b/..Rcheck/00check.log @@ -0,0 +1,13 @@ +* using log directory ‘/Users/tanishaojha/Desktop/Code-folder/data.table/..Rcheck’ +* using R version 4.5.1 (2025-06-13) +* using platform: aarch64-apple-darwin20 +* R was compiled by + Apple clang version 16.0.0 (clang-1600.0.26.6) + GNU Fortran (GCC) 14.2.0 +* running under: macOS Sequoia 15.5 +* using session charset: UTF-8 +* checking for file ‘./DESCRIPTION’ ... ERROR +Required fields missing or empty: + ‘Author’ ‘Maintainer’ +* DONE +Status: 1 ERROR diff --git a/Makevars b/Makevars new file mode 100644 index 0000000000..dd8d74e972 --- /dev/null +++ b/Makevars @@ -0,0 +1,3 @@ +CPPFLAGS += -I/opt/homebrew/opt/gettext/include +LDFLAGS += -L/opt/homebrew/opt/gettext/lib + diff --git a/R/groupingsets.R b/R/groupingsets.R index 21ec65bd42..26e01e2a3c 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,26 +29,54 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") - #implementing NSE in cube + + # Implementing NSE in cube jj = substitute(j) + bysub = substitute(by) + names_x = names(x) + + allbyvars = intersect(all.vars(bysub), names_x) usesSD = any(all.vars(jj) == ".SD") + if (usesSD) { if (missing(.SDcols)) { - .SDcols = names(x)[vapply(x, is.numeric, logical(1L))] + ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) + ansvals = match(ansvars, names_x) } else { sub.result = substitute(.SDcols) if (is.call(sub.result)) { - #.SDcols = eval_with_cols(sub.result, names(x)) - check_var = eval_with_cols(sub.result, names(x)) - if (!is.null(check_var)) { - .SDcols = eval_with_cols(sub.result, names(x)) + call_name = as.character(sub.result[[1L]]) + if (call_name %in% c("patterns", "is.numeric", "is.character", "is.factor")) { + .SDcols = eval_with_cols(sub.result, names_x) + } else { + .SDcols = eval(sub.result, parent.frame()) } + } else { + .SDcols = eval(sub.result, parent.frame()) + } + if (is.character(.SDcols)) { + if (!all(idx = .SDcols %chin% names_x)) + stopf("Some items of .SDcols are not column names: %s", + paste(.SDcols[!idx], collapse = ", ")) + ansvars = sdvars = .SDcols + ansvals = match(ansvars, names_x) + } else if (is.numeric(.SDcols)) { + ansvals = as.integer(.SDcols) + ansvars = sdvars = names_x[ansvals] + } else if (is.logical(.SDcols)) { + if (length(.SDcols) != length(names_x)) + stopf(".SDcols is a logical vector of length %d but there are %d columns", + length(.SDcols), length(names_x)) + ansvals = which(.SDcols) + ansvars = sdvars = names_x[ansvals] + } else { + stopf(".SDcols must be character, numeric, or logical") } } } else { .SDcols = NULL } - + # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) From e9876cb8af7ad1f4df58607542263428efd3c5b4 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Wed, 31 Dec 2025 15:31:52 +0530 Subject: [PATCH 06/20] cleaning up the code --- R/groupingsets.R | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index 26e01e2a3c..4890bf5652 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,15 +29,12 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") - # Implementing NSE in cube jj = substitute(j) bysub = substitute(by) names_x = names(x) - allbyvars = intersect(all.vars(bysub), names_x) usesSD = any(all.vars(jj) == ".SD") - if (usesSD) { if (missing(.SDcols)) { ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) @@ -55,9 +52,9 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { .SDcols = eval(sub.result, parent.frame()) } if (is.character(.SDcols)) { - if (!all(idx = .SDcols %chin% names_x)) - stopf("Some items of .SDcols are not column names: %s", - paste(.SDcols[!idx], collapse = ", ")) + idx = .SDcols %chin% names_x + if (any(!idx)) + stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) ansvars = sdvars = .SDcols ansvals = match(ansvars, names_x) } else if (is.numeric(.SDcols)) { @@ -65,8 +62,7 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { ansvars = sdvars = names_x[ansvals] } else if (is.logical(.SDcols)) { if (length(.SDcols) != length(names_x)) - stopf(".SDcols is a logical vector of length %d but there are %d columns", - length(.SDcols), length(names_x)) + stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) ansvals = which(.SDcols) ansvars = sdvars = names_x[ansvals] } else { @@ -76,7 +72,6 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { } else { .SDcols = NULL } - # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) From 2a15cb9d4cbbad25e27a240b4fe83c05da43502b Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Wed, 31 Dec 2025 15:37:35 +0530 Subject: [PATCH 07/20] more cleaning --- R/groupingsets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index 4890bf5652..da0e8fe3d0 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -53,7 +53,7 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { } if (is.character(.SDcols)) { idx = .SDcols %chin% names_x - if (any(!idx)) + if (!all(idx)) stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) ansvars = sdvars = .SDcols ansvals = match(ansvars, names_x) From 0ae97fabd9660b306bc5c2abe9c7a45cf07ac8e7 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Wed, 31 Dec 2025 20:23:27 +0530 Subject: [PATCH 08/20] removed unnecessary changes to code --- R/groupingsets.R | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index da0e8fe3d0..faeebc59fe 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -29,7 +29,7 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") - # Implementing NSE in cube +# Implementing NSE in cube jj = substitute(j) bysub = substitute(by) names_x = names(x) @@ -41,13 +41,8 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { ansvals = match(ansvars, names_x) } else { sub.result = substitute(.SDcols) - if (is.call(sub.result)) { - call_name = as.character(sub.result[[1L]]) - if (call_name %in% c("patterns", "is.numeric", "is.character", "is.factor")) { - .SDcols = eval_with_cols(sub.result, names_x) - } else { - .SDcols = eval(sub.result, parent.frame()) - } + if (is.call(sub.result) && as.character(sub.result[[1L]]) == "patterns") { + .SDcols = eval_with_cols(sub.result, names_x) } else { .SDcols = eval(sub.result, parent.frame()) } From 7215a4c857c8f9953557af8ebfa1a5a3e227189c Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 1 Jan 2026 11:50:56 +0530 Subject: [PATCH 09/20] adding some tests to the code --- inst/tests/tests.Rraw | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8cad916e3b..12440c968e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11503,6 +11503,31 @@ test(1750.34, character(0)), id = TRUE) ) +test(1750.35, + cube(dt, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols=patterns("value")), + groupingsets(dt, j = lapply(.SD, sum), by = c("color","year","status"), .SDcols = "value", + sets = list(c("color","year","status"), + c("color","year"), + c("color","status"), + "color", + c("year","status"), + "year", + "status", + character(0)), + id = TRUE) +) +test(1750.36, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("value", "BADCOL")), + error = "Some items of \\.SDcols are not column names" +) +test(1750.37, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(TRUE, FALSE)), + error = "\\.SDcols is a logical vector of length" +) +test(1750.38, + cube(dt, j = sum(value), by = "year", .SDcols = "value", id = TRUE), + cube(dt, j = sum(value), by = "year", id = TRUE) +) # grouping sets with integer64 if (test_bit64) { set.seed(26) From 2b7b7ff796c632b75187d15202b5119de2c1676c Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 1 Jan 2026 12:21:08 +0530 Subject: [PATCH 10/20] more tests --- inst/tests/tests.Rraw | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 12440c968e..93ecf96d97 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11528,6 +11528,18 @@ test(1750.38, cube(dt, j = sum(value), by = "year", .SDcols = "value", id = TRUE), cube(dt, j = sum(value), by = "year", id = TRUE) ) +test(1750.39, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = 5L, id = TRUE), + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = "value", id = TRUE) +) +test(1750.40, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = names(dt) == "value", id = TRUE), + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = "value", id = TRUE) +) +test( + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = list("value")), + error = "\\.SDcols must be character, numeric, or logical" +) # grouping sets with integer64 if (test_bit64) { set.seed(26) From 431ca81914b3f2319cbc7369c440eec10d42f99d Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 1 Jan 2026 12:26:39 +0530 Subject: [PATCH 11/20] Revert "more tests" This reverts commit 2b7b7ff796c632b75187d15202b5119de2c1676c. --- inst/tests/tests.Rraw | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 93ecf96d97..12440c968e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11528,18 +11528,6 @@ test(1750.38, cube(dt, j = sum(value), by = "year", .SDcols = "value", id = TRUE), cube(dt, j = sum(value), by = "year", id = TRUE) ) -test(1750.39, - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = 5L, id = TRUE), - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = "value", id = TRUE) -) -test(1750.40, - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = names(dt) == "value", id = TRUE), - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = "value", id = TRUE) -) -test( - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = list("value")), - error = "\\.SDcols must be character, numeric, or logical" -) # grouping sets with integer64 if (test_bit64) { set.seed(26) From 3bec96c4b6135912bb0f1e48ea34fac80a17b95e Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 1 Jan 2026 13:06:34 +0530 Subject: [PATCH 12/20] more tests --- inst/tests/tests.Rraw | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 12440c968e..6c4128a5a3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11525,8 +11525,18 @@ test(1750.37, error = "\\.SDcols is a logical vector of length" ) test(1750.38, - cube(dt, j = sum(value), by = "year", .SDcols = "value", id = TRUE), - cube(dt, j = sum(value), by = "year", id = TRUE) + cube(dt, j = lapply(.SD, mean), by = "color", .SDcols = c(FALSE, FALSE, FALSE, TRUE, FALSE), id=TRUE), + groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount", + sets = list("color", character(0)), + id = TRUE) +) +test(1750.39, + cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = list("amount")), + error = ".SDcols must be character, numeric, or logical" +) +test(1750.40, + cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(1, 99)), + error = "out of bounds" ) # grouping sets with integer64 if (test_bit64) { From c16f64dc34e73381eae53778f3cc0bfbffa5f100 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken <69812646+sisyphuswastaken@users.noreply.github.com> Date: Thu, 1 Jan 2026 15:30:11 +0530 Subject: [PATCH 13/20] Delete ..Rcheck/00check.log --- ..Rcheck/00check.log | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 ..Rcheck/00check.log diff --git a/..Rcheck/00check.log b/..Rcheck/00check.log deleted file mode 100644 index 2d743cd4d3..0000000000 --- a/..Rcheck/00check.log +++ /dev/null @@ -1,13 +0,0 @@ -* using log directory ‘/Users/tanishaojha/Desktop/Code-folder/data.table/..Rcheck’ -* using R version 4.5.1 (2025-06-13) -* using platform: aarch64-apple-darwin20 -* R was compiled by - Apple clang version 16.0.0 (clang-1600.0.26.6) - GNU Fortran (GCC) 14.2.0 -* running under: macOS Sequoia 15.5 -* using session charset: UTF-8 -* checking for file ‘./DESCRIPTION’ ... ERROR -Required fields missing or empty: - ‘Author’ ‘Maintainer’ -* DONE -Status: 1 ERROR From 4119cee6f1097f5242875f75ccc601f75d5d4e37 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken <69812646+sisyphuswastaken@users.noreply.github.com> Date: Thu, 1 Jan 2026 15:30:22 +0530 Subject: [PATCH 14/20] Delete Makevars --- Makevars | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 Makevars diff --git a/Makevars b/Makevars deleted file mode 100644 index dd8d74e972..0000000000 --- a/Makevars +++ /dev/null @@ -1,3 +0,0 @@ -CPPFLAGS += -I/opt/homebrew/opt/gettext/include -LDFLAGS += -L/opt/homebrew/opt/gettext/lib - From f792b159c749bee9636a2160d198bfd024920e76 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Fri, 2 Jan 2026 16:15:14 +0530 Subject: [PATCH 15/20] converting the NSE code into helper function --- R/groupingsets.R | 85 ++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index faeebc59fe..14e70bd634 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -16,6 +16,48 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) } +# Helper function to process SDcols +.processSDcols = function(SDcols_sub, SDcols_missing, x, jsub, by, enclos = parent.frame()) { + names_x = names(x) + bysub = substitute(by) + allbyvars = intersect(all.vars(bysub), names_x) + usesSD = any(all.vars(jsub) == ".SD") + if (!usesSD) { + return(NULL) + } + if (SDcols_missing) { + ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) + ansvals = match(ansvars, names_x) + return(list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals)) + } + sub.result = SDcols_sub + if (is.call(sub.result) && as.character(sub.result[[1L]]) == "patterns") { + .SDcols = eval_with_cols(sub.result, names_x) + } else { + .SDcols = eval(sub.result, enclos) + } + if (is.character(.SDcols)) { + idx = .SDcols %chin% names_x + if (!all(idx)) + stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) + ansvars = sdvars = .SDcols + ansvals = match(ansvars, names_x) + } else if (is.numeric(.SDcols)) { + ansvals = as.integer(.SDcols) + if (any(ansvals < 1L | ansvals > length(names_x))) + stopf(".SDcols contains indices out of bounds") + ansvars = sdvars = names_x[ansvals] + } else if (is.logical(.SDcols)) { + if (length(.SDcols) != length(names_x)) + stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) + ansvals = which(.SDcols) + ansvars = sdvars = names_x[ansvals] + } else { + stopf(".SDcols must be character, numeric, or logical") + } + list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals) +} + cube = function(x, ...) { UseMethod("cube") } @@ -29,43 +71,16 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { stopf("Argument 'id' must be a logical scalar.") if (missing(j)) stopf("Argument 'j' is required") -# Implementing NSE in cube + # Implementing NSE in cube using the helper, .processSDcols jj = substitute(j) - bysub = substitute(by) - names_x = names(x) - allbyvars = intersect(all.vars(bysub), names_x) - usesSD = any(all.vars(jj) == ".SD") - if (usesSD) { - if (missing(.SDcols)) { - ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) - ansvals = match(ansvars, names_x) - } else { - sub.result = substitute(.SDcols) - if (is.call(sub.result) && as.character(sub.result[[1L]]) == "patterns") { - .SDcols = eval_with_cols(sub.result, names_x) - } else { - .SDcols = eval(sub.result, parent.frame()) - } - if (is.character(.SDcols)) { - idx = .SDcols %chin% names_x - if (!all(idx)) - stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) - ansvars = sdvars = .SDcols - ansvals = match(ansvars, names_x) - } else if (is.numeric(.SDcols)) { - ansvals = as.integer(.SDcols) - ansvars = sdvars = names_x[ansvals] - } else if (is.logical(.SDcols)) { - if (length(.SDcols) != length(names_x)) - stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) - ansvals = which(.SDcols) - ansvars = sdvars = names_x[ansvals] - } else { - stopf(".SDcols must be character, numeric, or logical") - } - } - } else { + sdcols_result = .processSDcols(SDcols_sub = substitute(.SDcols), SDcols_missing = missing(.SDcols), x = x, jsub = jj, by = by, enclos = parent.frame()) + if (is.null(sdcols_result)) { .SDcols = NULL + } else { + ansvars = sdcols_result$ansvars + sdvars = sdcols_result$sdvars + ansvals = sdcols_result$ansvals + .SDcols = sdvars } # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) From 97b4536b44504b0af24a322be6599c1bf7b4111e Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 4 Jan 2026 00:25:06 +0530 Subject: [PATCH 16/20] including helper in [.data.table --- R/data.table.R | 114 +++++++++++++++++++++++++----------------- R/groupingsets.R | 2 + inst/tests/tests.Rraw | 5 ++ 3 files changed, 76 insertions(+), 45 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 27c985e44c..5fc9bfa1ce 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1036,56 +1036,80 @@ replace_dot_alias = function(e) { while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]] # fix for R-Forge #5190. colsub[[1L]] gave error when it's a symbol. # NB: _unary_ '-', not _binary_ '-' (#5826). Test for '!' length-2 should be redundant but low-cost & keeps code concise. - if (colsub %iscall% c("!", "-") && length(colsub) == 2L) { - negate_sdcols = TRUE - colsub = colsub[[2L]] - } else negate_sdcols = FALSE - # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) - while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]] - if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) { - # .SDcols is of the format a:b, ensure none of : arguments is a call data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1] #4231 - .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) - } else { - if (colsub %iscall% 'patterns') { - patterns_list_or_vector = eval_with_cols(colsub, names_x) - .SDcols = if (is.list(patterns_list_or_vector)) { - # each pattern gives a new filter condition, intersect the end result - Reduce(intersect, patterns_list_or_vector) + try_processSDcols = !(colsub %iscall% c("!", "-") && length(colsub) == 2L) && !(colsub %iscall% ':') && !(colsub %iscall% 'patterns') + if (try_processSDcols) { + tryCatch({ + sdcols_result = .processSDcols( + SDcols_sub = colsub, + SDcols_missing = FALSE, + x = x, + jsub = jsub, + by = union(bynames, allbyvars), + enclos = parent.frame() + ) + if (!is.null(sdcols_result)) { + ansvars = sdvars = sdcols_result$ansvars + ansvals = sdcols_result$ansvals } else { - patterns_list_or_vector + try_processSDcols = FALSE } + }, error = function(e) { + try_processSDcols <<- FALSE + }) + } + if (!try_processSDcols) { + + if (colsub %iscall% c("!", "-") && length(colsub) == 2L) { + negate_sdcols = TRUE + colsub = colsub[[2L]] + } else negate_sdcols = FALSE + # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) + while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]] + if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) { + # .SDcols is of the format a:b, ensure none of : arguments is a call data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1] #4231 + .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) } else { - .SDcols = eval(colsub, parent.frame(), parent.frame()) - # allow filtering via function in .SDcols, #3950 - if (is.function(.SDcols)) { - .SDcols = lapply(x, .SDcols) - if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA))) - stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx])) - .SDcols = unlist(.SDcols, use.names = FALSE) + if (colsub %iscall% 'patterns') { + patterns_list_or_vector = eval_with_cols(colsub, names_x) + .SDcols = if (is.list(patterns_list_or_vector)) { + # each pattern gives a new filter condition, intersect the end result + Reduce(intersect, patterns_list_or_vector) + } else { + patterns_list_or_vector + } + } else { + .SDcols = eval(colsub, parent.frame(), parent.frame()) + # allow filtering via function in .SDcols, #3950 + if (is.function(.SDcols)) { + .SDcols = lapply(x, .SDcols) + if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA))) + stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx])) + .SDcols = unlist(.SDcols, use.names = FALSE) + } } } - } - if (anyNA(.SDcols)) - stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols)))) - if (is.logical(.SDcols)) { - if (length(.SDcols)!=length(x)) stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(x)) - ansvals = which_(.SDcols, !negate_sdcols) - ansvars = sdvars = names_x[ansvals] - } else if (is.numeric(.SDcols)) { - .SDcols = as.integer(.SDcols) - # if .SDcols is numeric, use 'dupdiff' instead of 'setdiff' - if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices") - if (any(idx <- abs(.SDcols)>ncol(x) | abs(.SDcols)<1L)) - stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx))) - ansvars = sdvars = if (negate_sdcols) dupdiff(names_x[-.SDcols], bynames) else names_x[.SDcols] - ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols - } else { - if (!is.character(.SDcols)) stopf(".SDcols should be column numbers or names") - if (!all(idx <- .SDcols %chin% names_x)) - stopf("Some items of .SDcols are not column names: %s", brackify(.SDcols[!idx])) - ansvars = sdvars = if (negate_sdcols) setdiff(names_x, c(.SDcols, bynames)) else .SDcols - # dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove) - ansvals = chmatch(ansvars, names_x) + if (anyNA(.SDcols)) + stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols)))) + if (is.logical(.SDcols)) { + if (length(.SDcols)!=length(x)) stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(x)) + ansvals = which_(.SDcols, !negate_sdcols) + ansvars = sdvars = names_x[ansvals] + } else if (is.numeric(.SDcols)) { + .SDcols = as.integer(.SDcols) + # if .SDcols is numeric, use 'dupdiff' instead of 'setdiff' + if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices") + if (any(idx <- abs(.SDcols)>ncol(x) | abs(.SDcols)<1L)) + stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx))) + ansvars = sdvars = if (negate_sdcols) dupdiff(names_x[-.SDcols], bynames) else names_x[.SDcols] + ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols + } else { + if (!is.character(.SDcols)) stopf(".SDcols should be column numbers or names") + if (!all(idx <- .SDcols %chin% names_x)) + stopf("Some items of .SDcols are not column names: %s", brackify(.SDcols[!idx])) + ansvars = sdvars = if (negate_sdcols) setdiff(names_x, c(.SDcols, bynames)) else .SDcols + # dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove) + ansvals = chmatch(ansvars, names_x) + } } } # fix for long standing FR/bug, #495 and #484 diff --git a/R/groupingsets.R b/R/groupingsets.R index 14e70bd634..d18d0def62 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -36,6 +36,8 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { } else { .SDcols = eval(sub.result, enclos) } + if (anyNA(.SDcols)) + stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols)))) if (is.character(.SDcols)) { idx = .SDcols %chin% names_x if (!all(idx)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6c4128a5a3..6248cf2ca5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11468,6 +11468,11 @@ sets = local({ by=c("color","year","status") lapply(length(by):0, function(i) by[0:i]) }) +test(1750.25, + cube(copy(dt), j = lapply(.SD, mean), by = "color", .SDcols = 4, id=TRUE), + groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount", + sets = list("color", character(0)), id = TRUE) +) test(1750.31, rollup(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), id=TRUE), groupingsets(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), sets=sets, id=TRUE) From 159559564056740b153eb12c10b8fe28ac2684d3 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 4 Jan 2026 00:28:52 +0530 Subject: [PATCH 17/20] removing trailing spaces --- R/data.table.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5fc9bfa1ce..0924b1b7c5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1054,11 +1054,10 @@ replace_dot_alias = function(e) { try_processSDcols = FALSE } }, error = function(e) { - try_processSDcols <<- FALSE + try_processSDcols <<- FALSE }) } if (!try_processSDcols) { - if (colsub %iscall% c("!", "-") && length(colsub) == 2L) { negate_sdcols = TRUE colsub = colsub[[2L]] From 25fbd53d0bc4e9dac56e96bc1eb58a6dda5615b3 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 4 Jan 2026 00:48:08 +0530 Subject: [PATCH 18/20] removed super assignment --- R/data.table.R | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 0924b1b7c5..156a813079 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1038,24 +1038,25 @@ replace_dot_alias = function(e) { # NB: _unary_ '-', not _binary_ '-' (#5826). Test for '!' length-2 should be redundant but low-cost & keeps code concise. try_processSDcols = !(colsub %iscall% c("!", "-") && length(colsub) == 2L) && !(colsub %iscall% ':') && !(colsub %iscall% 'patterns') if (try_processSDcols) { - tryCatch({ - sdcols_result = .processSDcols( - SDcols_sub = colsub, - SDcols_missing = FALSE, - x = x, - jsub = jsub, - by = union(bynames, allbyvars), - enclos = parent.frame() - ) + sdcols_result = tryCatch({ + .processSDcols( + SDcols_sub = colsub, + SDcols_missing = FALSE, + x = x, + jsub = jsub, + by = union(bynames, allbyvars), + enclos = parent.frame() + ) + }, error = function(e) { + NULL + }) if (!is.null(sdcols_result)) { ansvars = sdvars = sdcols_result$ansvars ansvals = sdcols_result$ansvals + try_processSDcols = TRUE } else { try_processSDcols = FALSE } - }, error = function(e) { - try_processSDcols <<- FALSE - }) } if (!try_processSDcols) { if (colsub %iscall% c("!", "-") && length(colsub) == 2L) { From 32d078d606cf8e82acf810696241d697969ebc9b Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 4 Jan 2026 00:52:00 +0530 Subject: [PATCH 19/20] removed trailing whitespace --- R/data.table.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 156a813079..87f6a5cc5a 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1040,15 +1040,15 @@ replace_dot_alias = function(e) { if (try_processSDcols) { sdcols_result = tryCatch({ .processSDcols( - SDcols_sub = colsub, - SDcols_missing = FALSE, - x = x, - jsub = jsub, - by = union(bynames, allbyvars), + SDcols_sub = colsub, + SDcols_missing = FALSE, + x = x, + jsub = jsub, + by = union(bynames, allbyvars), enclos = parent.frame() ) }, error = function(e) { - NULL + NULL }) if (!is.null(sdcols_result)) { ansvars = sdvars = sdcols_result$ansvars From b2e6171d7a027a143a1b264cdb19f0e6fe733e9e Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 4 Jan 2026 02:11:22 +0530 Subject: [PATCH 20/20] review changes --- R/data.table.R | 2 +- R/groupingsets.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 87f6a5cc5a..9c6eaa8478 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1044,7 +1044,7 @@ replace_dot_alias = function(e) { SDcols_missing = FALSE, x = x, jsub = jsub, - by = union(bynames, allbyvars), + by = substitute(by), enclos = parent.frame() ) }, error = function(e) { diff --git a/R/groupingsets.R b/R/groupingsets.R index d18d0def62..29105e3163 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -21,7 +21,7 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { names_x = names(x) bysub = substitute(by) allbyvars = intersect(all.vars(bysub), names_x) - usesSD = any(all.vars(jsub) == ".SD") + usesSD = ".SD" %chin% all.vars(jsub) if (!usesSD) { return(NULL) } @@ -31,7 +31,7 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { return(list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals)) } sub.result = SDcols_sub - if (is.call(sub.result) && as.character(sub.result[[1L]]) == "patterns") { + if (sub.result %iscall% "patterns") { .SDcols = eval_with_cols(sub.result, names_x) } else { .SDcols = eval(sub.result, enclos)