Rdatatable · manmita · Jan 5, 2026 · Jan 5, 2026 · Jan 8, 2026 · Jan 8, 2026
@@ -106,5 +106,6 @@ Authors@R: c(
   person(given="@badasahog",       role="ctb", comment="GitHub user"),
   person("Vinit", "Thakur",        role="ctb"),
   person("Mukul", "Kumar",         role="ctb"),
-  person("Ildikó", "Czeller",      role="ctb")
+  person("Ildikó", "Czeller",      role="ctb"),
+  person("Manmita", "Das",         role="ctb")
   )
@@ -16,6 +16,10 @@
 
 1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR.
 
+2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR.
+
+3. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `list_search=TRUE`, with `list_len_threshold` to avoid scanning extremely long lists, [#2606](https://github.com/Rdatatable/data.table/issues/2606).
+
 ### Notes
 
 1. {data.table} now depends on R 3.5.0 (2018).
@@ -32,6 +36,8 @@
 
 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR.
 
+4. `sum(<int64 column>)` by group is correct with missing entries and GForce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups, and resulted in silently incorrect results!
+
 ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1)  23 December 2025
 
 ### BREAKING CHANGE

@@ -244,7 +244,7 @@ replace_dot_alias = function(e) {
   if ((isTRUE(which)||is.na(which)) && !missing(j)) stopf("which==%s (meaning return row numbers) but j is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.", which)
   if (is.null(nomatch) && is.na(which)) stopf("which=NA with nomatch=0|NULL would always return an empty vector. Please change or remove either which or nomatch.")
   if (!with && missing(j)) stopf("j must be provided when with=FALSE")
-  if (!missing(by) && !isTRUEorFALSE(showProgress)) stopf("%s must be TRUE or FALSE", "showProgress")
+  if (!missing(by) && !(isTRUEorFALSE(showProgress) || (is.numeric(showProgress) && length(showProgress)==1L && showProgress >= 0))) stopf("showProgress must be TRUE, FALSE, or a single non-negative number") # nocov
   irows = NULL  # Meaning all rows. We avoid creating 1:nrow(x) for efficiency.
   notjoin = FALSE
   rightcols = leftcols = integer()
@@ -1972,7 +1972,7 @@ replace_dot_alias = function(e) {
     }
     ans = c(g, ans)
   } else {
-    ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, showProgress)
+    ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, as.integer(showProgress))
   }
   # unlock any locked data.table components of the answer, #4159
   # MAX_DEPTH prevents possible infinite recursion from truly recursive object, #4173

@@ -19,28 +19,98 @@ type_size = function(DT) {
 }
 
 tables = function(mb=type_size, order.col="NAME", width=80L,
-                  env=parent.frame(), silent=FALSE, index=FALSE)
+                  env=parent.frame(), silent=FALSE, index=FALSE,
+                  list_search=FALSE, list_len_threshold=100000L)
 {
   # Prints name, size and colnames of all data.tables in the calling environment by default
   mb_name = as.character(substitute(mb))
   if (isTRUE(mb)) { mb=type_size; mb_name="type_size" }
   names = ls(envir=env, all.names=TRUE)  # include "hidden" objects (starting with .)
   obj = mget(names, envir=env)  # doesn't copy; mget is ok with ... unlike get, #5197
   w = which(vapply_1b(obj, is.data.table))
-  if (!length(w)) {
+
+  list_info = NULL
+  if (list_search) {
+    is_list = vapply_1b(obj, is.list)
+    dt_mask = logical(length(obj))
+    if (length(w)) dt_mask[w] = TRUE
+    is_df = vapply_1b(obj, is.data.frame)
+    list_candidates = which(is_list & !dt_mask & !is_df)
+
+    if (length(list_candidates)) {
+      n_extra = 0L
+      list_locations = vector("list", length(list_candidates))
+      for (j in seq_along(list_candidates)) {
+        obj_idx = list_candidates[j]
+        L = obj[[obj_idx]]
+        if (!is.list(L)) next
+        lenL = length(L)
+        if (is.finite(list_len_threshold) && lenL > list_len_threshold) next
+        elem_is_dt = vapply_1b(L, is.data.table)
+        idx = which(elem_is_dt)
+        if (length(idx)) {
+          list_locations[[j]] = idx
+          n_extra = n_extra + length(idx)
+        }
+      }
+
+      if (n_extra) {
+        list_info = data.table(NAME=character(n_extra), NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
+        extra_row = 0L
+        for (j in seq_along(list_candidates)) {
+          obj_idx = list_candidates[j]
+          idx = list_locations[[j]]
+          if (is.null(idx) || !length(idx)) next
+          L = obj[[obj_idx]]
+          obj_name = names[obj_idx]
+          elem_names = names(L)
+          for (k in idx) {
+            extra_row = extra_row + 1L
+            DT = L[[k]]
+            if (!is.null(elem_names) && length(elem_names) >= k && nzchar(elem_names[k])) {
+              nm = paste0(obj_name, "$", elem_names[k])
+            } else {
+              nm = paste0(obj_name, "[[", k, "]]")
+            }
+            set(list_info, extra_row, "NAME", nm)
+            set(list_info, extra_row, "NROW", nrow(DT))
+            set(list_info, extra_row, "NCOL", ncol(DT))
+            if (is.function(mb)) set(list_info, extra_row, "MB", as.integer(mb(DT)/1048576L))
+            if (!is.null(tt<-names(DT))) set(list_info, extra_row, "COLS", tt)
+            if (!is.null(tt<-key(DT))) set(list_info, extra_row, "KEY", tt)
+            if (index && !is.null(tt<-indices(DT))) set(list_info, extra_row, "INDICES", tt)
+          }
+        }
+      }
+    }
+  }
+
+  info = NULL
+  if (length(w)) {
+    info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
+    for (i in seq_along(w)) {  # avoid rbindlist(lapply(DT_names)) in case of a large number of tables
+      DT = obj[[w[i]]]
+      set(info, i, "NROW", nrow(DT))
+      set(info, i, "NCOL", ncol(DT))
+      if (is.function(mb)) set(info, i, "MB", as.integer(mb(DT)/1048576L)) # i.e. 1024**2
+      if (!is.null(tt<-names(DT))) set(info, i, "COLS", tt)  # TODO: don't need these if()s when #5526 is done
+      if (!is.null(tt<-key(DT))) set(info, i, "KEY", tt)
+      if (index && !is.null(tt<-indices(DT))) set(info, i, "INDICES", tt)
+    }
+  }
+
+  if (!is.null(list_info) && nrow(list_info)) {
+    if (is.null(info)) {
+      info = list_info
+    } else {
+      info = rbind(info, list_info)
+    }
+  }
+
+  if (is.null(info) || !nrow(info)) {
     if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
     return(invisible(data.table(NULL)))
   }
-  info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
-  for (i in seq_along(w)) {  # avoid rbindlist(lapply(DT_names)) in case of a large number of tables
-    DT = obj[[w[i]]]
-    set(info, i, "NROW", nrow(DT))
-    set(info, i, "NCOL", ncol(DT))
-    if (is.function(mb)) set(info, i, "MB", as.integer(mb(DT)/1048576L)) # i.e. 1024**2
-    if (!is.null(tt<-names(DT))) set(info, i, "COLS", tt)  # TODO: don't need these if()s when #5526 is done
-    if (!is.null(tt<-key(DT))) set(info, i, "KEY", tt)
-    if (index && !is.null(tt<-indices(DT))) set(info, i, "INDICES", tt)
-  }
   if (!is.function(mb)) info[,MB:=NULL]
   if (!index)           info[,INDICES:=NULL]
   if (!order.col %chin% names(info)) stopf("order.col='%s' not a column name of info", order.col)

@@ -21978,3 +21978,71 @@ local({
   test(2357.1, fread(f), DT)
   test(2357.2, fread(paste0("file://", f)), DT)
 })
+
+#7571 issue for na.rm on int64
+if (test_bit64) local({
+  # integer64 + GForce grouped sum with na.rm = FALSE
+  # Example 1 from issue: ids 1:8, 9, 9; three leading NAs then 4:10
+  dt_short = data.table(
+    id = c(1:8, 9, 9),
+    value = c(rep(NA_integer64_, 3L), as.integer64(4:10))
+  )
+  test(2358.1, options=c(datatable.optimize=2L),
+    dt_short[, sum(value, na.rm = FALSE), by = id]$V1,
+    as.integer64(c(NA, NA, NA, 4:8, 19))
+  )
+
+  # Example 2 from issue: ids in pairs, same values; checks multi-row groups
+  dt_short2 = data.table(
+    id = rep(1:5, each = 2L),
+    value = c(rep(NA_integer64_, 3L), as.integer64(4:10))
+  )
+  test(2358.2, options=c(datatable.optimize=2L),
+    dt_short2[, sum(value, na.rm = FALSE), by = id]$V1,
+    as.integer64(c(NA, NA, 11, 15, 19))
+  )
+
+  # Test mean for integer64 with NA
+  dt_mean = data.table(
+    id = c(1,1,2,2,3,3),
+    value = as.integer64(c(NA, NA, NA, 20000000, 5, 3))
+  )
+  test(2358.3, options=c(datatable.optimize=2L),
+    dt_mean[, mean(value, na.rm=FALSE), by = id]$V1,
+    c(NA, NA, 4)
+  )
+
+  # GForce sum vs base::sum for integer64
+  DT = data.table(id = sample(letters, 1000, TRUE), value = as.integer64(sample(c(1:100, NA), 1000, TRUE)))
+  gforce = DT[, .(gforce_sum = sum(value)), by=id]
+  base = DT[, .(true_sum = base::sum(value)), by=id]
+  merged = merge(gforce, base, by="id", all=TRUE)
+  test(2358.4, options=c(datatable.optimize=2L),
+    merged$gforce_sum, merged$true_sum
+  )
+
+  # GForce mean vs base::mean for integer64
+  DTm = data.table(id = sample(letters, 1000, TRUE), value = as.integer64(sample(c(1:100, NA), 1000, TRUE)))
+  gforce_m = DTm[, .(gforce_mean = mean(value)), by=id]
+  base_m = DTm[, .(true_mean = base::mean(value)), by=id]
+  merged_m = merge(gforce_m, base_m, by="id", all=TRUE)
+  test(2358.5, options=c(datatable.optimize=2L),
+    merged$gforce_mean, merged$true_mean
+  )
+})
+
+#2606 tables() list_search finds nested data.tables in lists
+xenv2 = new.env()
+xenv2$L = list(
+  data.table(a = 1L),
+  data.table(a = 1:2)
+)
+test(2360.1,
+  tables(env = xenv2, silent = TRUE, list_search = TRUE)[, .(NAME, NROW, NCOL)],
+  data.table(NAME = c("L[[1]]", "L[[2]]"), NROW = c(1L, 2L), NCOL = c(1L, 1L))
+)
+test(2360.2,
+  nrow(tables(env = xenv2, silent = TRUE, list_search = TRUE, list_len_threshold = 1L)),
+  0L
+)
+rm(xenv2)
@@ -181,7 +181,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
   \item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. For more details see \href{../doc/datatable-programming.html}{\code{vignette("datatable-programming")}}. }
 
-  \item{showProgress}{ \code{TRUE} shows progress indicator with estimated time to completion for lengthy "by" operations. }
+  \item{showProgress}{ \code{TRUE} (default when \code{interactive()}) shows a progress indicator with estimated time to completion for lengthy "by" operations, updating every 3 seconds. An integer value controls the update interval in seconds (minimum 3). \code{FALSE} disables the progress indicator. }
 }
 \details{
 \code{data.table} builds on base \R functionality to reduce 2 types of time:\cr

@@ -86,9 +86,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   SEXP SDall = PROTECT(findVar(install(".SDall"), env)); nprotect++;  // PROTECT for rchk
   SEXP SD = PROTECT(findVar(install(".SD"), env)); nprotect++;
 
-  const bool showProgress = LOGICAL(showProgressArg)[0]==1 && ngrp > 1; // showProgress only if more than 1 group
+  int updateTime = INTEGER(showProgressArg)[0];
+  const bool showProgress = updateTime > 0 && ngrp > 1; // showProgress only if more than 1 group
   double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning
-  double nextTime = (showProgress) ? startTime+3 : 0; // wait 3 seconds before printing progress
+  double nextTime = (showProgress) ? startTime + MAX(updateTime, 3) : 0; // wait at least 3 seconds before starting to print progress
 
   hashtab * specials = hash_create(3 + ngrpcols + xlength(SDall)); // .I, .N, .GRP plus columns of .BY plus SDall
   PROTECT(specials->prot); nprotect++;
@@ -451,17 +452,17 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     // could potentially refactor to use fread's progress() function, however we would lose some information in favor of simplicity.
     double now;
     if (showProgress && (now=wallclock())>=nextTime) {
+      // # nocov start. Requires long-running test case
       double avgTimePerGroup = (now-startTime)/(i+1);
       int ETA = (int)(avgTimePerGroup*(ngrp-i-1));
       if (hasPrinted || ETA >= 0) {
-        // # nocov start. Requires long-running test case
         if (verbose && !hasPrinted) Rprintf(_("\n"));
         Rprintf("\r"); // # notranslate. \r is not internationalizable
         Rprintf(_("Processed %d groups out of %d. %.0f%% done. Time elapsed: %ds. ETA: %ds."), i+1, ngrp, 100.0*(i+1)/ngrp, (int)(now-startTime), ETA);
-        // # nocov end
       }
-      nextTime = now+1;
+      nextTime = now+updateTime;
       hasPrinted = true;
+      // # nocov end
     }
     ansloc += maxn;
     if (firstalloc) {

@@ -502,13 +502,13 @@ SEXP gsum(SEXP x, SEXP narmArg)
               const int64_t *my_gx = gx + b*batchSize + pos;
               const uint16_t *my_low = low + b*batchSize + pos;
               for (int i=0; i<howMany; i++) {
-                const int64_t elem = my_gx[i];
-                if (elem!=INT64_MIN) {
-                  _ans[my_low[i]] += elem;
-                } else {
-                  _ans[my_low[i]] = INT64_MIN;
-                  break;
+                if (_ans[my_low[i]] == INT64_MIN) continue;
+                const int64_t b = my_gx[i];
+                if (b == INT64_MIN) {
+                  if (!narm) _ans[my_low[i]] = INT64_MIN;
+                  continue;
                 }
+                _ans[my_low[i]] += b;
               }
             }
           }