Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
388ef8c
implementing NSE to cube
coffeecookey Dec 28, 2025
3e96dfc
implementing NSE in cube
sisyphuswastaken Dec 28, 2025
e1eb87a
removed trailing whitespace
sisyphuswastaken Dec 28, 2025
b6adef9
removed trailing whitespace
sisyphuswastaken Dec 28, 2025
47bb2c3
revised implementation of NSE in cube
sisyphuswastaken Dec 31, 2025
33572b5
Merge branch 'master' into master
sisyphuswastaken Dec 31, 2025
e9876cb
cleaning up the code
sisyphuswastaken Dec 31, 2025
2a15cb9
more cleaning
sisyphuswastaken Dec 31, 2025
0ae97fa
removed unnecessary changes to code
sisyphuswastaken Dec 31, 2025
7215a4c
adding some tests to the code
sisyphuswastaken Jan 1, 2026
2b7b7ff
more tests
sisyphuswastaken Jan 1, 2026
431ca81
Revert "more tests"
sisyphuswastaken Jan 1, 2026
3bec96c
more tests
sisyphuswastaken Jan 1, 2026
c16f64d
Delete ..Rcheck/00check.log
sisyphuswastaken Jan 1, 2026
4119cee
Delete Makevars
sisyphuswastaken Jan 1, 2026
f792b15
converting the NSE code into helper function
sisyphuswastaken Jan 2, 2026
97b4536
including helper in [.data.table
sisyphuswastaken Jan 3, 2026
1595595
removing trailing spaces
sisyphuswastaken Jan 3, 2026
25fbd53
removed super assignment
sisyphuswastaken Jan 3, 2026
32d078d
removed trailing whitespace
sisyphuswastaken Jan 3, 2026
b2e6171
review changes
sisyphuswastaken Jan 3, 2026
a6d00fa
moved helper to data.table.R
sisyphuswastaken Jan 6, 2026
ea3ddf9
Merge remote-tracking branch 'upstream/master' into fix-7543
sisyphuswastaken Jan 6, 2026
cde91ae
removed try catch block
sisyphuswastaken Jan 7, 2026
f2f2d9f
removed whitespace
sisyphuswastaken Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 121 additions & 46 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,61 @@ replace_dot_alias = function(e) {
}
}

# Helper function to process SDcols
.processSDcols = function(SDcols_sub, SDcols_missing, x, jsub, by, enclos = parent.frame()) {
names_x = names(x)
bysub = substitute(by)
allbyvars = intersect(all.vars(bysub), names_x)
usesSD = ".SD" %chin% all.vars(jsub)
if (!usesSD) {
return(NULL)
}
if (SDcols_missing) {
ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars))
ansvals = match(ansvars, names_x)
return(list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals))
}
sub.result = SDcols_sub
if (sub.result %iscall% ':' && length(sub.result) == 3L) {
return(NULL)
}
if (sub.result %iscall% c("!", "-") && length(sub.result) == 2L) {
negate_sdcols = TRUE
sub.result = sub.result[[2L]]
} else negate_sdcols = FALSE
if (sub.result %iscall% "patterns") {
.SDcols = eval_with_cols(sub.result, names_x)
} else {
.SDcols = eval(sub.result, enclos)
}
if (!is.character(.SDcols) && !is.numeric(.SDcols) && !is.logical(.SDcols)) {
return(NULL)
}
if (anyNA(.SDcols))
stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols))))
if (is.character(.SDcols)) {
idx = .SDcols %chin% names_x
if (!all(idx))
stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx]))
ansvars = sdvars = .SDcols
ansvals = match(ansvars, names_x)
} else if (is.numeric(.SDcols)) {
ansvals = as.integer(.SDcols)
if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices")
if (any(idx <- abs(.SDcols) > ncol(x) | abs(.SDcols) < 1L)) stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx)))
ansvars = sdvars = names_x[ansvals]
ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols
} else if (is.logical(.SDcols)) {
if (length(.SDcols) != length(names_x))
stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x))
ansvals = which(.SDcols)
ansvars = sdvars = names_x[ansvals]
} else {
stopf(".SDcols must be character, numeric, or logical")
}
list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals)
}

"[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=getOption("datatable.showProgress", interactive()))
{
# ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could
Expand Down Expand Up @@ -1036,57 +1091,77 @@ replace_dot_alias = function(e) {
while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]]
# fix for R-Forge #5190. colsub[[1L]] gave error when it's a symbol.
# NB: _unary_ '-', not _binary_ '-' (#5826). Test for '!' length-2 should be redundant but low-cost & keeps code concise.
if (colsub %iscall% c("!", "-") && length(colsub) == 2L) {
negate_sdcols = TRUE
colsub = colsub[[2L]]
} else negate_sdcols = FALSE
# fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4)))
while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]]
if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) {
# .SDcols is of the format a:b, ensure none of : arguments is a call data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1] #4231
.SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame())
} else {
if (colsub %iscall% 'patterns') {
patterns_list_or_vector = eval_with_cols(colsub, names_x)
.SDcols = if (is.list(patterns_list_or_vector)) {
# each pattern gives a new filter condition, intersect the end result
Reduce(intersect, patterns_list_or_vector)
try_processSDcols = !(colsub %iscall% c("!", "-") && length(colsub) == 2L) && !(colsub %iscall% ':') && !(colsub %iscall% 'patterns')
if (try_processSDcols) {
sdcols_result = .processSDcols(
SDcols_sub = colsub,
SDcols_missing = FALSE,
x = x,
jsub = jsub,
by = substitute(by),
enclos = parent.frame()
)
if (!is.null(sdcols_result)) {
ansvars = sdvars = sdcols_result$ansvars
ansvals = sdcols_result$ansvals
}
else {
try_processSDcols = FALSE
}
}
if (!try_processSDcols) {
if (colsub %iscall% c("!", "-") && length(colsub) == 2L) {
negate_sdcols = TRUE
colsub = colsub[[2L]]
} else negate_sdcols = FALSE
# fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4)))
while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]]
if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) {
# .SDcols is of the format a:b, ensure none of : arguments is a call data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1] #4231
.SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame())
} else {
if (colsub %iscall% 'patterns') {
patterns_list_or_vector = eval_with_cols(colsub, names_x)
.SDcols = if (is.list(patterns_list_or_vector)) {
# each pattern gives a new filter condition, intersect the end result
Reduce(intersect, patterns_list_or_vector)
} else {
patterns_list_or_vector
}
} else {
patterns_list_or_vector
.SDcols = eval(colsub, parent.frame(), parent.frame())
# allow filtering via function in .SDcols, #3950
if (is.function(.SDcols)) {
.SDcols = lapply(x, .SDcols)
if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx]))
.SDcols = unlist(.SDcols, use.names = FALSE)
}
}
}
if (anyNA(.SDcols))
stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols))))
if (is.logical(.SDcols)) {
if (length(.SDcols)!=length(x)) stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(x))
ansvals = which_(.SDcols, !negate_sdcols)
ansvars = sdvars = names_x[ansvals]
} else if (is.numeric(.SDcols)) {
.SDcols = as.integer(.SDcols)
# if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices")
if (any(idx <- abs(.SDcols)>ncol(x) | abs(.SDcols)<1L))
stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx)))
ansvars = sdvars = if (negate_sdcols) dupdiff(names_x[-.SDcols], bynames) else names_x[.SDcols]
ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols
} else {
.SDcols = eval(colsub, parent.frame(), parent.frame())
# allow filtering via function in .SDcols, #3950
if (is.function(.SDcols)) {
.SDcols = lapply(x, .SDcols)
if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx]))
.SDcols = unlist(.SDcols, use.names = FALSE)
}
if (!is.character(.SDcols)) stopf(".SDcols should be column numbers or names")
if (!all(idx <- .SDcols %chin% names_x))
stopf("Some items of .SDcols are not column names: %s", brackify(.SDcols[!idx]))
ansvars = sdvars = if (negate_sdcols) setdiff(names_x, c(.SDcols, bynames)) else .SDcols
# dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
ansvals = chmatch(ansvars, names_x)
}
}
if (anyNA(.SDcols))
stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols))))
if (is.logical(.SDcols)) {
if (length(.SDcols)!=length(x)) stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(x))
ansvals = which_(.SDcols, !negate_sdcols)
ansvars = sdvars = names_x[ansvals]
} else if (is.numeric(.SDcols)) {
.SDcols = as.integer(.SDcols)
# if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices")
if (any(idx <- abs(.SDcols)>ncol(x) | abs(.SDcols)<1L))
stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx)))
ansvars = sdvars = if (negate_sdcols) dupdiff(names_x[-.SDcols], bynames) else names_x[.SDcols]
ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else .SDcols
} else {
if (!is.character(.SDcols)) stopf(".SDcols should be column numbers or names")
if (!all(idx <- .SDcols %chin% names_x))
stopf("Some items of .SDcols are not column names: %s", brackify(.SDcols[!idx]))
ansvars = sdvars = if (negate_sdcols) setdiff(names_x, c(.SDcols, bynames)) else .SDcols
# dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
ansvals = chmatch(ansvars, names_x)
}
}
# fix for long standing FR/bug, #495 and #484
allcols = c(names_x, xdotprefix, names_i, idotprefix)
Expand Down
11 changes: 11 additions & 0 deletions R/groupingsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
stopf("Argument 'id' must be a logical scalar.")
if (missing(j))
stopf("Argument 'j' is required")
# Implementing NSE in cube using the helper, .processSDcols
jj = substitute(j)
sdcols_result = .processSDcols(SDcols_sub = substitute(.SDcols), SDcols_missing = missing(.SDcols), x = x, jsub = jj, by = by, enclos = parent.frame())
if (is.null(sdcols_result)) {
.SDcols = NULL
} else {
ansvars = sdcols_result$ansvars
sdvars = sdcols_result$sdvars
ansvals = sdcols_result$ansvals
.SDcols = sdvars
}
# generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497
n = length(by)
keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k))))
Expand Down
36 changes: 36 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -11468,6 +11468,11 @@ sets = local({
by=c("color","year","status")
lapply(length(by):0, function(i) by[0:i])
})
test(1750.25,
cube(copy(dt), j = lapply(.SD, mean), by = "color", .SDcols = 4, id=TRUE),
groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount",
sets = list("color", character(0)), id = TRUE)
)
test(1750.31,
rollup(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), id=TRUE),
groupingsets(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), sets=sets, id=TRUE)
Expand Down Expand Up @@ -11503,6 +11508,37 @@ test(1750.34,
character(0)),
id = TRUE)
)
test(1750.35,
cube(dt, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols=patterns("value")),
groupingsets(dt, j = lapply(.SD, sum), by = c("color","year","status"), .SDcols = "value",
sets = list(c("color","year","status"),
c("color","year"),
c("color","status"),
"color",
c("year","status"),
"year",
"status",
character(0)),
id = TRUE)
)
test(1750.36,
cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("value", "BADCOL")),
error = "Some items of \\.SDcols are not column names"
)
test(1750.37,
cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(TRUE, FALSE)),
error = "\\.SDcols is a logical vector of length"
)
test(1750.38,
cube(dt, j = lapply(.SD, mean), by = "color", .SDcols = c(FALSE, FALSE, FALSE, TRUE, FALSE), id=TRUE),
groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount",
sets = list("color", character(0)),
id = TRUE)
)
test(1750.39,
cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(1, 99)),
error = "out of bounds"
)
# grouping sets with integer64
if (test_bit64) {
set.seed(26)
Expand Down
Loading