From 45dc9e708c8db7b45b3004823c51746166a3b87e Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 25 Oct 2025 18:43:10 +0200
Subject: [PATCH 01/21] add requires_utf8 argument to tests

---
 R/test.data.table.R   |  25 +++++-
 inst/tests/tests.Rraw | 187 ++++++++++++++++++++++++------------------
 2 files changed, 131 insertions(+), 81 deletions(-)

diff --git a/R/test.data.table.R b/R/test.data.table.R
index 6e264c871f..af7283e9bd 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -361,7 +361,7 @@ gc_mem = function() {
   # nocov end
 }
 
-test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL) {
+test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL,requires_utf8=FALSE) {
   if (!is.null(env)) {
     old = Sys.getenv(names(env), names=TRUE, unset=NA)
     to_unset = !lengths(env)
@@ -375,6 +375,29 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
       Sys.unsetenv(names(old)[!is_preset])
     }, add=TRUE)
   }
+  # Check UTF-8 requirement
+  if (requires_utf8) {
+    utf8_available = l10n_info()$`UTF-8` || {
+      lc_ctype = Sys.getlocale('LC_CTYPE')
+      lc_wantctype = 'en_US.UTF-8'
+      # Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance.
+      # Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English.
+      # Avoid the warning upon possible failure, #7210.
+      lc_newctype = suppressWarnings(Sys.setlocale('LC_CTYPE', lc_wantctype))
+      if (identical(lc_newctype, lc_wantctype)) {
+        on.exit(Sys.setlocale('LC_CTYPE', lc_ctype), add=TRUE)
+        TRUE
+      } else FALSE
+    }
+    if (!utf8_available) {
+      last_utf8_skip = get0("last_utf8_skip", parent.frame(), ifnotfound=0, inherits=TRUE)
+      if (num - last_utf8_skip >= 1) {
+        catf("Test %s skipped because it needs a UTF-8 locale.\n", num)
+      }
+      assign("last_utf8_skip", num, parent.frame(), inherits=TRUE)
+      return(invisible(TRUE))
+    }
+  }
   # Usage:
   # i) tests that x equals y when both x and y are supplied, the most common usage
   # ii) tests that x is TRUE when y isn't supplied
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 609977b991..9edc00a7d3 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3568,7 +3568,28 @@ DT[,`:=`(last.x=tail(x,1L),last.x1=tail(x1,1L)),by=y]
 test(1086, class(DT$last.x), c("POSIXct", "POSIXt"))
 test(1087, class(DT$last.x1), "ITime")
 
-# Tests 1088-1093 were non-ASCII. Now in DtNonAsciiTests
+# chmatch on 'unknown' encoding (e.g. as.character(as.symbol("\u00E4")) )falling back to match, #2538 and #4818
+x1 <- c("al\u00E4", "ala", "\u00E4allc", "coep")
+x2 <- c("ala", "al\u00E4")
+test(1088.1, requires_utf8=TRUE, chmatch(x1, x2), match(x1, x2)) # should not fallback to "match"
+test(1088.2, requires_utf8=TRUE, x1 %chin% x2, x1 %in% x2)
+# change x1 to symbol to character
+x3 <- unlist(lapply(x1, function(x) as.character(as.name(x))), use.names=FALSE)
+test(1089.1, requires_utf8=TRUE, chmatch(x3, x2), match(x3, x2)) # should fallback to match in "x"
+test(1089.2, requires_utf8=TRUE, x3 %chin% x2, x3 %in% x2) # should fallback to match in "x"
+# change x2 to symbol to character
+x4 <- unlist(lapply(x2, function(x) as.character(as.name(x))), use.names=FALSE)
+test(1090.1, requires_utf8=TRUE, chmatch(x1,x4), match(x1, x4)) # should fallback to match in "table"
+test(1090.2, requires_utf8=TRUE, x1 %chin% x4, x1 %in% x4)
+# both are symbols to characters
+test(1091.1, requires_utf8=TRUE, chmatch(x3, x4), match(x3, x4)) # should fallback to "match" in "x" as well.
+test(1091.2, requires_utf8=TRUE, x3 %chin% x4, x3 %in% x4)
+# for completness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
+
+DT = data.table(pas = c(1:5, NA, 6:10), good = c(1:10, NA))
+setnames(DT, "pas", "p\u00E4s")
+test(1092, requires_utf8=TRUE, eval(parse(text="DT[is.na(p\u00E4s), p\u00E4s := 99L]")), data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
+test(1093, requires_utf8=TRUE, eval(parse(text="DT[, p\u00E4s := 34L]")), data.table("p\u00E4s" = 34L, good=c(1:10,NA)))
 
 # print of unnamed DT with >20 <= 100 rows, #97 (RF#4934)
 DT <- data.table(x=1:25, y=letters[1:25])
@@ -4320,7 +4341,10 @@ test(1162.24, is.sorted(rep(NA_character_, 2)))
 x <- character(0)
 test(1163, last(x), character(0))
 
-# Test 1164 was a non-ASCII test, now in DtNonAsciiTests
+# Bug fix for #5159 - chmatch and character encoding (for some reason this seems to pass the test on a mac as well)
+a<-c("a","\u00E4","\u00DF","z")
+au<-iconv(a,"UTF8","latin1")
+test(1164.1, requires_utf8=TRUE, chmatch(a, au), match(a, au))
 
 # Bug fix for #73 - segfault when rbindlist on empty data.tables
 x <- as.data.table(BOD)
@@ -4606,7 +4630,22 @@ test(1228.4, class(DT), class(DT[, sum(b), by=a]))
 test(1228.5, class(DT), class(DT[a>1, sum(b), by=a]))
 test(1228.6, class(DT), class(DT[a>1, c:=sum(b), by=a]))
 
-# test 1229 was non-ASCII, now in package DtNonAsciiTests
+# savetl_init error after error, in v1.9.2, thanks Arun
+DT <- data.table(x=1:5, y=10:6)
+test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
+test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
+# umlaut in column names (red herring I think, but testing anyway)
+sentEx = data.table(abend = c(1, 1, 0, 0, 2),
+                    aber = c(0, 1, 0, 0, 0),
+                    "\u00FCber" = c(1, 0, 0, 0, 0),
+                    "\u00FCberall" = c(0, 0, 0, 0, 0),
+                    "\u00FCberlegt" = c(0, 0, 0, 0, 0),
+                    ID = structure(c(1L, 1L, 2L, 2L, 2L), .Label = c("0019", "0021"), class = "factor"),
+                    abgeandert = c(1, 1, 1, 0, 0),
+                    abgebildet = c(0, 0, 1, 1, 0),
+                    abgelegt = c(0, 0, 0, 0, 3))
+test(1229.3, requires_utf8=TRUE, sentEx[, lapply(.SD, sum), by=ID], data.table(ID=factor(c("0019","0021")), abend=c(2,2), aber=c(1,0), "\u00FCber"=c(1,0),
+             "\u00FCberall"=c(0,0), "\u00FCberlegt" = c(0,0), abgeandert=c(2,1), abgebildet = c(0,2), abgelegt=c(0,3)))
 
 # Test that ad hoc by detects if ordered and dogroups switches to memcpy if contiguous, #1050
 DT = data.table(a=1:3,b=1:6,key="a")
@@ -17655,11 +17694,7 @@ test(2194.5, endsWithAny(NA_character_, 'a'), FALSE)
 test(2194.6, endsWithAny(character(), 'a'), error="Internal error.*types or lengths incorrect")
 # file used in encoding tests
 txt = readLines(testDir("issue_563_fread.txt"))
-local(if (eval(utf8_check_expr)) {
-  test(2194.7, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
-} else {
-  cat("Test 2194.7 skipped because it needs a UTF-8 locale.\n")
-})
+test(2194.7, requires_utf8=TRUE, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
 test(2194.8, endsWith('abcd', 'd'), error="Internal error.*use endsWithAny")
 
 # uniqueN(x, by=character()) was internal error, #4594
@@ -18641,59 +18676,55 @@ test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.
 rm(.datatable.aware)
 
 # tests for trunc.char handling wide characters #5096
-local(if (eval(utf8_check_expr)) {
-  accented_a = "\u0061\u0301"
-  ja_ichi = "\u4E00"
-  ja_ni = "\u4E8C"
-  ja_ko = "\u3053"
-  ja_n = "\u3093"
-  dots = "..."
-  clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
-  # Tests for combining character latin a and acute accent, single row
-  DT = data.table(strrep(accented_a, 4L))
-  test(2253.01, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
-  test(2253.02, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
-  test(2253.03, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
-  # Tests for full-width japanese character ichi, single row
-  DT = data.table(strrep(ja_ichi, 4L))
-  test(2253.04, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
-  test(2253.05, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
-  test(2253.06, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
-  # Tests for multiple, different length combining character rows
-  DT = data.table(strrep(accented_a, 1L:4L))
-  test(2253.07, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
-  test(2253.08, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
-  test(2253.09, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
-  # Tests for multiple, different length full-width characters
-  DT = data.table(strrep(ja_ichi, 1L:4L))
-  test(2253.10, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
-  test(2253.11, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
-  test(2253.12, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
-  # Tests for combined characters, multiple columns
-  DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
-  test(2253.13, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
-  test(2253.14, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
-  test(2253.15, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
-  test(2253.16, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
-  # Tests for multiple columns, multiple rows
-  DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
-  test(2253.17, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
-    c(paste0(ja_ko, "     ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
-    paste0(strrep(ja_ko, 2L), "   ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
-    paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
-  test(2253.18, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
-    c(paste0(ja_ko, "      ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
-    paste0(strrep(ja_ko, 2L), "    ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
-    paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
-  test(2253.19, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
-    c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
-    paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
-    paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
-  # test for data.table with NA, #6441
-  test(2253.20, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
-} else {
-  cat("Tests 2253* skipped because they need a UTF-8 locale.\n")
-})
+accented_a = "\u0061\u0301"
+ja_ichi = "\u4E00"
+ja_ni = "\u4E8C"
+ja_ko = "\u3053"
+ja_n = "\u3093"
+dots = "..."
+clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
+# Tests for combining character latin a and acute accent, single row
+DT = data.table(strrep(accented_a, 4L))
+test(2253.01, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
+test(2253.02, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
+test(2253.03, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
+# Tests for full-width japanese character ichi, single row
+DT = data.table(strrep(ja_ichi, 4L))
+test(2253.04, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
+test(2253.05, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
+test(2253.06, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
+# Tests for multiple, different length combining character rows
+DT = data.table(strrep(accented_a, 1L:4L))
+test(2253.07, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
+test(2253.08, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
+test(2253.09, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
+# Tests for multiple, different length full-width characters
+DT = data.table(strrep(ja_ichi, 1L:4L))
+test(2253.10, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
+test(2253.11, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
+test(2253.12, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
+# Tests for combined characters, multiple columns
+DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
+test(2253.13, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
+test(2253.14, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
+test(2253.15, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
+test(2253.16, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
+# Tests for multiple columns, multiple rows
+DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
+test(2253.17, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+  c(paste0(ja_ko, "     ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
+  paste0(strrep(ja_ko, 2L), "   ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
+  paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
+test(2253.18, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+  c(paste0(ja_ko, "      ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
+  paste0(strrep(ja_ko, 2L), "    ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
+  paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
+test(2253.19, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+  c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
+  paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
+  paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
+# test for data.table with NA, #6441
+test(2253.20, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
 
 # allow 1-D matrix in j for consistency, #783
 DT=data.table(a = rep(1:2, 3), b = 1:6)
@@ -20830,21 +20861,17 @@ x = data.table(a=1, b=2L)
 y = data.table(c=1.5, d=1L)
 test(2297.31, y[x, on=.(c == a, d == a), nomatch=NULL], output="Empty data.table (0 rows and 3 cols): c,d,b")
 
-local(if (eval(utf8_check_expr)) {
-  # rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
-  x = data.table(a = 1, b = 2, c = 3)
-  y = data.table(x = 4, y = 5, z = 6)
-  # a-umlaut, o-umlaut, u-umlaut
-  setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
-  setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
-  test(2298.1, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
-  test(2298.2, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
-  set(y, j="\u00e4", value=NULL)
-  test(2298.3, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
-  test(2298.4, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2),  "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
-} else {
-  cat("Tests 2298.* skipped because they need a UTF-8 locale.\n")
-})
+# rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
+x = data.table(a = 1, b = 2, c = 3)
+y = data.table(x = 4, y = 5, z = 6)
+# a-umlaut, o-umlaut, u-umlaut
+setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
+setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
+test(2298.1, requires_utf8=TRUE, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
+test(2298.2, requires_utf8=TRUE, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
+set(y, j="\u00e4", value=NULL)
+test(2298.3, requires_utf8=TRUE, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
+test(2298.4, requires_utf8=TRUE, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2),  "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
 
 # #6592: printing nested single-column frames
 test(2299.01, format_list_item(data.frame(a=1)),                      output="<data.frame[1x1]>")
@@ -21615,13 +21642,13 @@ if (base::getRversion() >= "4.3.0") { ## follow up of #7213, see #7321
 }
 
 # fwrite: allow dec=',' with single column, #7227
-test(2337.1, fwrite(data.table(1), dec=","), NULL)
+test(2337.1, fwrite(data.table(1), dec=","), output = "V1\n1")
 if (base::getRversion() >= "4.0.0") { # rely on stopifnot(named = ...) for correct message
   test(2337.2, fwrite(data.table(0.1, 0.2), dec=",", sep=","), error = "dec and sep must be distinct")
 }
-test(2337.3, is.null(fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t")))
-test(2337.4, is.null(fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=",")))
-test(2337.5, is.null(fwrite(data.table(a=numeric()), dec=",", sep=",")))
+test(2337.3, fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t"), output = "V1\n0,1\n0,2")
+test(2337.4, fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=","), output = "a,b")
+test(2337.5, fwrite(data.table(a=numeric()), dec=",", sep=","), output = "a")
 
 # 2864 force decimal points for whole numbers in numeric columns
 dd = data.table(x=c(1, 2, 3))

From e9ecf6900384cd4c402b88c53e23fcc20cf9e60b Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 25 Oct 2025 20:48:00 +0200
Subject: [PATCH 02/21] fix warnings of sys.source

---
 R/test.data.table.R   |  25 ++++---
 inst/tests/tests.Rraw | 149 +++++++++++++++++++++++-------------------
 2 files changed, 93 insertions(+), 81 deletions(-)

diff --git a/R/test.data.table.R b/R/test.data.table.R
index af7283e9bd..61078a2996 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -361,6 +361,8 @@ gc_mem = function() {
   # nocov end
 }
 
+utf8_check = function(test_str) identical(test_str, enc2native(test_str))
+
 test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL,requires_utf8=FALSE) {
   if (!is.null(env)) {
     old = Sys.getenv(names(env), names=TRUE, unset=NA)
@@ -376,23 +378,18 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     }, add=TRUE)
   }
   # Check UTF-8 requirement
-  if (requires_utf8) {
-    utf8_available = l10n_info()$`UTF-8` || {
-      lc_ctype = Sys.getlocale('LC_CTYPE')
-      lc_wantctype = 'en_US.UTF-8'
-      # Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance.
-      # Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English.
-      # Avoid the warning upon possible failure, #7210.
-      lc_newctype = suppressWarnings(Sys.setlocale('LC_CTYPE', lc_wantctype))
-      if (identical(lc_newctype, lc_wantctype)) {
-        on.exit(Sys.setlocale('LC_CTYPE', lc_ctype), add=TRUE)
-        TRUE
-      } else FALSE
+  if (!isFALSE(requires_utf8)) {
+    # Test string with common UTF-8 symbols that appear in tests: ñ (test 2266), ü (test 1229.3), ん (Japanese)
+    # If requires_utf8 is TRUE, use the default test string; if it's a string, use that string
+    if (isTRUE(requires_utf8)) {
+      test_str = "a\u00F1o \u00FCber \u3093"
+    } else {
+      test_str = requires_utf8
     }
-    if (!utf8_available) {
+    if (!utf8_check(test_str)) {
       last_utf8_skip = get0("last_utf8_skip", parent.frame(), ifnotfound=0, inherits=TRUE)
       if (num - last_utf8_skip >= 1) {
-        catf("Test %s skipped because it needs a UTF-8 locale.\n", num)
+        catf("Test %s skipped because required UTF-8 symbols cannot be represented in native encoding.\n", num)
       }
       assign("last_utf8_skip", num, parent.frame(), inherits=TRUE)
       return(invisible(TRUE))
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 9edc00a7d3..58f5687b09 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3569,27 +3569,30 @@ test(1086, class(DT$last.x), c("POSIXct", "POSIXt"))
 test(1087, class(DT$last.x1), "ITime")
 
 # chmatch on 'unknown' encoding (e.g. as.character(as.symbol("\u00E4")) )falling back to match, #2538 and #4818
-x1 <- c("al\u00E4", "ala", "\u00E4allc", "coep")
-x2 <- c("ala", "al\u00E4")
-test(1088.1, requires_utf8=TRUE, chmatch(x1, x2), match(x1, x2)) # should not fallback to "match"
-test(1088.2, requires_utf8=TRUE, x1 %chin% x2, x1 %in% x2)
+x1 = c("al\u00E4", "ala", "\u00E4allc", "coep")
+x2 = c("ala", "al\u00E4")
+tstc = function(y) unlist(lapply(y, function(x) as.character(as.name(x))), use.names=FALSE)
+test(1088.1, requires_utf8="\u00E4", chmatch(x1, x2), match(x1, x2)) # should not fallback to "match"
+test(1088.2, requires_utf8="\u00E4", x1 %chin% x2, x1 %in% x2)
 # change x1 to symbol to character
-x3 <- unlist(lapply(x1, function(x) as.character(as.name(x))), use.names=FALSE)
-test(1089.1, requires_utf8=TRUE, chmatch(x3, x2), match(x3, x2)) # should fallback to match in "x"
-test(1089.2, requires_utf8=TRUE, x3 %chin% x2, x3 %in% x2) # should fallback to match in "x"
+test(1089.1, requires_utf8="\u00E4", chmatch(tstc(x1), x2), match(tstc(x1), x2)) # should fallback to match in "x"
+test(1089.2, requires_utf8="\u00E4", tstc(x1) %chin% x2, tstc(x1) %in% x2) # should fallback to match in "x"
 # change x2 to symbol to character
-x4 <- unlist(lapply(x2, function(x) as.character(as.name(x))), use.names=FALSE)
-test(1090.1, requires_utf8=TRUE, chmatch(x1,x4), match(x1, x4)) # should fallback to match in "table"
-test(1090.2, requires_utf8=TRUE, x1 %chin% x4, x1 %in% x4)
+test(1090.1, requires_utf8="\u00E4", chmatch(x1,tstc(x2)), match(x1, tstc(x2))) # should fallback to match in "table"
+test(1090.2, requires_utf8="\u00E4", x1 %chin% tstc(x2), x1 %in% tstc(x2))
 # both are symbols to characters
-test(1091.1, requires_utf8=TRUE, chmatch(x3, x4), match(x3, x4)) # should fallback to "match" in "x" as well.
-test(1091.2, requires_utf8=TRUE, x3 %chin% x4, x3 %in% x4)
+test(1091.1, requires_utf8="\u00E4", chmatch(tstc(x1), tstc(x2)), match(tstc(x1), tstc(x2))) # should fallback to "match" in "x" as well.
+test(1091.2, requires_utf8="\u00E4", tstc(x1) %chin% tstc(x2), tstc(x1) %in% tstc(x2))
 # for completness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
 
-DT = data.table(pas = c(1:5, NA, 6:10), good = c(1:10, NA))
-setnames(DT, "pas", "p\u00E4s")
-test(1092, requires_utf8=TRUE, eval(parse(text="DT[is.na(p\u00E4s), p\u00E4s := 99L]")), data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
-test(1093, requires_utf8=TRUE, eval(parse(text="DT[, p\u00E4s := 34L]")), data.table("p\u00E4s" = 34L, good=c(1:10,NA)))
+local(if (utf8_check("\u00E4")) {
+eval(parse(text='
+  DT = data.table(pas = c(1:5, NA, 6:10), good = c(1:10, NA))
+  setnames(DT, "pas", "p\u00E4s")
+  test(1092, requires_utf8="\u00E4", eval(parse(text="DT[is.na(p\u00E4s), p\u00E4s := 99L]")), data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
+  test(1093, requires_utf8="\u00E4", eval(parse(text="DT[, p\u00E4s := 34L]")), data.table("p\u00E4s" = 34L, good=c(1:10,NA)))
+'))
+} else cat("Tests 1092+1093 skipped because required UTF-8 symbols cannot be represented in native encoding.\n"))
 
 # print of unnamed DT with >20 <= 100 rows, #97 (RF#4934)
 DT <- data.table(x=1:25, y=letters[1:25])
@@ -4343,8 +4346,8 @@ test(1163, last(x), character(0))
 
 # Bug fix for #5159 - chmatch and character encoding (for some reason this seems to pass the test on a mac as well)
 a<-c("a","\u00E4","\u00DF","z")
-au<-iconv(a,"UTF8","latin1")
-test(1164.1, requires_utf8=TRUE, chmatch(a, au), match(a, au))
+ au<-iconv(a,"UTF8","latin1")
+ test(1164.1, requires_utf8=c("\u00E4", "\u00DF"), chmatch(a, au), match(a, au))
 
 # Bug fix for #73 - segfault when rbindlist on empty data.tables
 x <- as.data.table(BOD)
@@ -4634,18 +4637,24 @@ test(1228.6, class(DT), class(DT[a>1, c:=sum(b), by=a]))
 DT <- data.table(x=1:5, y=10:6)
 test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
 test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
-# umlaut in column names (red herring I think, but testing anyway)
-sentEx = data.table(abend = c(1, 1, 0, 0, 2),
-                    aber = c(0, 1, 0, 0, 0),
-                    "\u00FCber" = c(1, 0, 0, 0, 0),
-                    "\u00FCberall" = c(0, 0, 0, 0, 0),
-                    "\u00FCberlegt" = c(0, 0, 0, 0, 0),
-                    ID = structure(c(1L, 1L, 2L, 2L, 2L), .Label = c("0019", "0021"), class = "factor"),
-                    abgeandert = c(1, 1, 1, 0, 0),
-                    abgebildet = c(0, 0, 1, 1, 0),
-                    abgelegt = c(0, 0, 0, 0, 3))
-test(1229.3, requires_utf8=TRUE, sentEx[, lapply(.SD, sum), by=ID], data.table(ID=factor(c("0019","0021")), abend=c(2,2), aber=c(1,0), "\u00FCber"=c(1,0),
-             "\u00FCberall"=c(0,0), "\u00FCberlegt" = c(0,0), abgeandert=c(2,1), abgebildet = c(0,2), abgelegt=c(0,3)))
+# umlaut in column names (red herring I think, but testing anyway
+local(if (utf8_check("\u00e4\u00f6\u00fc")) {
+  eval(parse(text = '
+    sentEx = data.table(abend = c(1, 1, 0, 0, 2),
+                      aber = c(0, 1, 0, 0, 0),
+                      "\u00FCber" = c(1, 0, 0, 0, 0),
+                      "\u00FCberall" = c(0, 0, 0, 0, 0),
+                      "\u00FCberlegt" = c(0, 0, 0, 0, 0),
+                      ID = structure(c(1L, 1L, 2L, 2L, 2L), .Label = c("0019", "0021"), class = "factor"),
+                      abgeandert = c(1, 1, 1, 0, 0),
+                      abgebildet = c(0, 0, 1, 1, 0),
+                      abgelegt = c(0, 0, 0, 0, 3))
+    test(1229.3, sentEx[, lapply(.SD, sum), by=ID], data.table(ID=factor(c("0019","0021")), abend=c(2,2), aber=c(1,0), "\u00FCber"=c(1,0),
+         "\u00FCberall"=c(0,0), "\u00FCberlegt" = c(0,0), abgeandert=c(2,1), abgebildet = c(0,2), abgelegt=c(0,3)))
+  '))
+} else {
+  cat("Tests 1229.3 skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
+})
 
 # Test that ad hoc by detects if ordered and dogroups switches to memcpy if contiguous, #1050
 DT = data.table(a=1:3,b=1:6,key="a")
@@ -7938,10 +7947,8 @@ test(1547, foo(1L, 5L, a=2L, "c"), c("2", "c"))
 
 # Fix for encoding issues in windows, #563
 f = testDir("issue_563_fread.txt")
-ans1 <- fread(f, sep=",", header=TRUE)
-ans2 <- fread(f, sep=",", header=TRUE, encoding="UTF-8")
-test(1548.1, unique(unlist(lapply(ans1, Encoding))), "unknown")
-test(1548.2, unique(unlist(lapply(ans2, Encoding))), "UTF-8")
+test(1548.1, requires_utf8=TRUE, unique(unlist(lapply(fread(f, sep=",", header=TRUE), Encoding))), "unknown")
+test(1548.2, requires_utf8=TRUE, unique(unlist(lapply(fread(f, sep=",", header=TRUE, encoding="UTF-8"), Encoding))), "UTF-8")
 
 # 1549 moved to benchmark.Rraw, #5517
 
@@ -17693,8 +17700,9 @@ test(2194.4, endsWithAny(letters, 'e'), error="Internal error.*types or lengths
 test(2194.5, endsWithAny(NA_character_, 'a'), FALSE)
 test(2194.6, endsWithAny(character(), 'a'), error="Internal error.*types or lengths incorrect")
 # file used in encoding tests
-txt = readLines(testDir("issue_563_fread.txt"))
-test(2194.7, requires_utf8=TRUE, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
+needed_chars = "\u0105\u017E\u016B\u012F\u0173\u0117\u0161\u0119"
+txt = parse(text='readLines(testDir("issue_563_fread.txt"))')
+test(2194.7, requires_utf8=needed_chars, endsWithAny(eval(txt), 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
 test(2194.8, endsWith('abcd', 'd'), error="Internal error.*use endsWithAny")
 
 # uniqueN(x, by=character()) was internal error, #4594
@@ -18681,50 +18689,51 @@ ja_ichi = "\u4E00"
 ja_ni = "\u4E8C"
 ja_ko = "\u3053"
 ja_n = "\u3093"
+nc = paste0(accented_a, ja_ichi, ja_ni, ja_ko, ja_n)
 dots = "..."
 clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
 # Tests for combining character latin a and acute accent, single row
 DT = data.table(strrep(accented_a, 4L))
-test(2253.01, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
-test(2253.02, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
-test(2253.03, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
+test(2253.01, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
+test(2253.02, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
+test(2253.03, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
 # Tests for full-width japanese character ichi, single row
 DT = data.table(strrep(ja_ichi, 4L))
-test(2253.04, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
-test(2253.05, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
-test(2253.06, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
+test(2253.04, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
+test(2253.05, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
+test(2253.06, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
 # Tests for multiple, different length combining character rows
 DT = data.table(strrep(accented_a, 1L:4L))
-test(2253.07, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
-test(2253.08, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
-test(2253.09, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
+test(2253.07, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
+test(2253.08, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
+test(2253.09, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
 # Tests for multiple, different length full-width characters
 DT = data.table(strrep(ja_ichi, 1L:4L))
-test(2253.10, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
-test(2253.11, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
-test(2253.12, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
+test(2253.10, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
+test(2253.11, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
+test(2253.12, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
 # Tests for combined characters, multiple columns
 DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
-test(2253.13, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
-test(2253.14, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
-test(2253.15, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
-test(2253.16, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
+test(2253.13, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
+test(2253.14, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
+test(2253.15, requires_utf8=nc, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
+test(2253.16, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
 # Tests for multiple columns, multiple rows
 DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
-test(2253.17, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+test(2253.17, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
   c(paste0(ja_ko, "     ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
   paste0(strrep(ja_ko, 2L), "   ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
   paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
-test(2253.18, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+test(2253.18, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
   c(paste0(ja_ko, "      ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
   paste0(strrep(ja_ko, 2L), "    ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
   paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
-test(2253.19, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+test(2253.19, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
   c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
   paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
   paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
 # test for data.table with NA, #6441
-test(2253.20, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
+test(2253.20, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
 
 # allow 1-D matrix in j for consistency, #783
 DT=data.table(a = rep(1:2, 3), b = 1:6)
@@ -20861,17 +20870,23 @@ x = data.table(a=1, b=2L)
 y = data.table(c=1.5, d=1L)
 test(2297.31, y[x, on=.(c == a, d == a), nomatch=NULL], output="Empty data.table (0 rows and 3 cols): c,d,b")
 
-# rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
-x = data.table(a = 1, b = 2, c = 3)
-y = data.table(x = 4, y = 5, z = 6)
-# a-umlaut, o-umlaut, u-umlaut
-setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
-setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
-test(2298.1, requires_utf8=TRUE, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
-test(2298.2, requires_utf8=TRUE, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
-set(y, j="\u00e4", value=NULL)
-test(2298.3, requires_utf8=TRUE, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
-test(2298.4, requires_utf8=TRUE, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2),  "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
+local(if (utf8_check("\u00e4\u00f6\u00fc")) {
+  # rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
+  x = data.table(a = 1, b = 2, c = 3)
+  y = data.table(x = 4, y = 5, z = 6)
+  # a-umlaut, o-umlaut, u-umlaut
+  eval(parse(text = '
+    setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
+    setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
+    test(2298.1, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
+    test(2298.2, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
+    set(y, j="\u00e4", value=NULL)
+    test(2298.3, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
+    test(2298.4, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2),  "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
+  '))
+} else {
+  cat("Tests 2298.* skipped because they need a UTF-8 locale.\n")
+})
 
 # #6592: printing nested single-column frames
 test(2299.01, format_list_item(data.frame(a=1)),                      output="<data.frame[1x1]>")

From 8161adf24ff9ccf326674af5bb5961f327605cfb Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 25 Oct 2025 20:53:19 +0200
Subject: [PATCH 03/21] change typos

---
 R/test.data.table.R   | 8 +-------
 inst/tests/tests.Rraw | 6 +++---
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/R/test.data.table.R b/R/test.data.table.R
index 61078a2996..91656d8576 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -379,13 +379,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
   }
   # Check UTF-8 requirement
   if (!isFALSE(requires_utf8)) {
-    # Test string with common UTF-8 symbols that appear in tests: ñ (test 2266), ü (test 1229.3), ん (Japanese)
-    # If requires_utf8 is TRUE, use the default test string; if it's a string, use that string
-    if (isTRUE(requires_utf8)) {
-      test_str = "a\u00F1o \u00FCber \u3093"
-    } else {
-      test_str = requires_utf8
-    }
+    test_str = if (isTRUE(requires_utf8)) "\u00F1\u00FC\u3093" else requires_utf8
     if (!utf8_check(test_str)) {
       last_utf8_skip = get0("last_utf8_skip", parent.frame(), ifnotfound=0, inherits=TRUE)
       if (num - last_utf8_skip >= 1) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 58f5687b09..d6c3adace6 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -4346,8 +4346,8 @@ test(1163, last(x), character(0))
 
 # Bug fix for #5159 - chmatch and character encoding (for some reason this seems to pass the test on a mac as well)
 a<-c("a","\u00E4","\u00DF","z")
- au<-iconv(a,"UTF8","latin1")
- test(1164.1, requires_utf8=c("\u00E4", "\u00DF"), chmatch(a, au), match(a, au))
+au<-iconv(a,"UTF8","latin1")
+test(1164.1, requires_utf8=c("\u00E4", "\u00DF"), chmatch(a, au), match(a, au))
 
 # Bug fix for #73 - segfault when rbindlist on empty data.tables
 x <- as.data.table(BOD)
@@ -4653,7 +4653,7 @@ local(if (utf8_check("\u00e4\u00f6\u00fc")) {
          "\u00FCberall"=c(0,0), "\u00FCberlegt" = c(0,0), abgeandert=c(2,1), abgebildet = c(0,2), abgelegt=c(0,3)))
   '))
 } else {
-  cat("Tests 1229.3 skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
+  cat("Test 1229.3 skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
 })
 
 # Test that ad hoc by detects if ordered and dogroups switches to memcpy if contiguous, #1050

From 1a963900326a83d2030b467448c10dc794005ecb Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 25 Oct 2025 21:25:04 +0200
Subject: [PATCH 04/21] register utf8_check function

---
 inst/tests/tests.Rraw | 1 +
 1 file changed, 1 insertion(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index d6c3adace6..26cec19b74 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -78,6 +78,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   test = data.table:::test
   uniqlengths = data.table:::uniqlengths
   uniqlist = data.table:::uniqlist
+  utf8_check = data.table:::utf8_check
   warningf = data.table:::warningf
   which_ = data.table:::which_
   which.first = data.table:::which.first

From 0e82cf0befe386c5ea3c71df8a31884f32693326 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 25 Oct 2025 22:04:05 +0200
Subject: [PATCH 05/21] add documentation and NEWS

---
 NEWS.md     | 2 ++
 man/test.Rd | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 3face7519b..db69c7ef80 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -363,6 +363,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
 
 7. In rare situations a data.table object may lose its internal attribute that holds a self-reference. New helper function `.selfref.ok()` tests just that. It is only intended for technical use cases. See manual for examples.
 
+8. `test()` gains new argument `requires_utf8` to skip tests when UTF-8 support is not available, [#7336](https://github.com/Rdatatable/data.table/issues/7336). Thanks @MichaelChirico for the suggestion and @ben-schwen for the implementation.
+
 ## data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
 
 1. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070).
diff --git a/man/test.Rd b/man/test.Rd
index 594040aca9..fcbbab3f79 100644
--- a/man/test.Rd
+++ b/man/test.Rd
@@ -8,7 +8,7 @@
 test(num, x, y = TRUE,
      error = NULL, warning = NULL, message = NULL,
      output = NULL, notOutput = NULL, ignore.warning = NULL,
-     options = NULL, env = NULL)
+     options = NULL, env = NULL, requires_utf8 = FALSE)
 }
 \arguments{
 \item{num}{ A unique identifier for a test, helpful in identifying the source of failure when testing is not working. Currently, we use a manually-incremented system with tests formatted as \code{n.m}, where essentially \code{n} indexes an issue and \code{m} indexes aspects of that issue. For the most part, your new PR should only have one value of \code{n} (scroll to the end of \code{inst/tests/tests.Rraw} to see the next available ID) and then index the tests within your PR by increasing \code{m}. Note -- \code{n.m} is interpreted as a number, so \code{123.4} and \code{123.40} are actually the same -- please \code{0}-pad as appropriate. Test identifiers are checked to be in increasing order at runtime to prevent duplicates being possible. }
@@ -22,6 +22,7 @@ test(num, x, y = TRUE,
 \item{ignore.warning}{ A single character string. Any warnings emitted by \code{x} that contain this string are dropped. Remaining warnings are compared to the expected \code{warning} as normal. }
 \item{options}{ A named list of options to set for the duration of the test. Any code evaluated during this call to \code{test()} (usually, \code{x}, or maybe \code{y}) will run with the named options set, and the original options will be restored on return. This is a named list since different options can have different types in general, but in typical usage, only one option is set at a time, in which case a named vector is also accepted. }
 \item{env}{ A named list of environment variables to set for the duration of the test, much like \code{options}. A list entry set to \code{NULL} will unset (i.e., \code{\link{Sys.unsetenv}}) the corresponding variable. }
+\item{requires_utf8}{ \code{FALSE} (default), \code{TRUE}, or a character string. When set, the test is skipped if UTF-8 characters cannot be represented in the native encoding. Use \code{TRUE} for default UTF-8 test characters or provide a custom string of test characters. }
 }
 \note{
    \code{NA_real_} and \code{NaN} are treated as equal, use \code{identical} if distinction is needed. See examples below.

From fa5967f054ed6d45eeb77f8ccffd194e1fcf0501 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 25 Oct 2025 22:35:06 +0200
Subject: [PATCH 06/21] add nocov for region that only hits on non UTF8 systems

---
 R/test.data.table.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/test.data.table.R b/R/test.data.table.R
index 91656d8576..e3148220ba 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -381,12 +381,14 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
   if (!isFALSE(requires_utf8)) {
     test_str = if (isTRUE(requires_utf8)) "\u00F1\u00FC\u3093" else requires_utf8
     if (!utf8_check(test_str)) {
+      # nocov start
       last_utf8_skip = get0("last_utf8_skip", parent.frame(), ifnotfound=0, inherits=TRUE)
       if (num - last_utf8_skip >= 1) {
         catf("Test %s skipped because required UTF-8 symbols cannot be represented in native encoding.\n", num)
       }
       assign("last_utf8_skip", num, parent.frame(), inherits=TRUE)
       return(invisible(TRUE))
+      # nocov end
     }
   }
   # Usage:

From 05b3923ee817b136dcbed9fddb54379fa2071c30 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Tue, 30 Dec 2025 18:25:38 +0100
Subject: [PATCH 07/21] be more specific in NEWS

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index d9776abd5f..4f030ed850 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -24,7 +24,7 @@
 
 3. Vignettes are now built using `litedown` instead of `knitr`, [#6394](https://github.com/Rdatatable/data.table/issues/6394). Thanks @jangorecki for the suggestion and @ben-schwen and @aitap for the implementation.
 
-4. `test()` gains new argument `requires_utf8` to skip tests when UTF-8 support is not available, [#7336](https://github.com/Rdatatable/data.table/issues/7336). Thanks @MichaelChirico for the suggestion and @ben-schwen for the implementation.
+4. The data.table test suite is a bit more robust to lacking UTF-8 support via a new `requires_utf8` argument to `test()` to skip tests when UTF-8 support is not available, [#7336](https://github.com/Rdatatable/data.table/issues/7336). Thanks @MichaelChirico for the suggestion and @ben-schwen for the implementation.
 
 ### BUG FIXES
 

From 47a1aa00c2fd1b6ab4835e78b8239ae56c31448a Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Tue, 30 Dec 2025 18:25:56 +0100
Subject: [PATCH 08/21] use combine instead of paste

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 28a3eb45d8..d20c567042 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18699,7 +18699,7 @@ ja_ichi = "\u4E00"
 ja_ni = "\u4E8C"
 ja_ko = "\u3053"
 ja_n = "\u3093"
-nc = paste0(accented_a, ja_ichi, ja_ni, ja_ko, ja_n)
+nc = c(accented_a, ja_ichi, ja_ni, ja_ko, ja_n)
 dots = "..."
 clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
 # Tests for combining character latin a and acute accent, single row

From e11d36d331f1ae8fd12721294905d933a01af62d Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Tue, 30 Dec 2025 18:27:03 +0100
Subject: [PATCH 09/21] use vector form

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index d20c567042..1795d22d73 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17700,7 +17700,7 @@ test(2194.4, endsWithAny(letters, 'e'), error="Internal error.*types or lengths
 test(2194.5, endsWithAny(NA_character_, 'a'), FALSE)
 test(2194.6, endsWithAny(character(), 'a'), error="Internal error.*types or lengths incorrect")
 # file used in encoding tests
-needed_chars = "\u0105\u017E\u016B\u012F\u0173\u0117\u0161\u0119"
+needed_chars = c("\u0105", "\u017E", "\u016B", "\u012F", "\u0173", "\u0117", "\u0161", "\u0119")
 txt = parse(text='readLines(testDir("issue_563_fread.txt"))')
 test(2194.7, requires_utf8=needed_chars, endsWithAny(eval(txt), 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
 test(2194.8, endsWith('abcd', 'd'), error="Internal error.*use endsWithAny")

From 18c1722b28ae174b6eea6f277d98ce945f9b2cc5 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 13:22:05 +0100
Subject: [PATCH 10/21] use local

---
 inst/tests/tests.Rraw | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1795d22d73..e21d264e5f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3571,20 +3571,24 @@ test(1086, class(DT$last.x), c("POSIXct", "POSIXt"))
 test(1087, class(DT$last.x1), "ITime")
 
 # chmatch on 'unknown' encoding (e.g. as.character(as.symbol("\u00E4")) )falling back to match, #2538 and #4818
+local({
 x1 = c("al\u00E4", "ala", "\u00E4allc", "coep")
 x2 = c("ala", "al\u00E4")
-tstc = function(y) unlist(lapply(y, function(x) as.character(as.name(x))), use.names=FALSE)
-test(1088.1, requires_utf8="\u00E4", chmatch(x1, x2), match(x1, x2)) # should not fallback to "match"
-test(1088.2, requires_utf8="\u00E4", x1 %chin% x2, x1 %in% x2)
-# change x1 to symbol to character
-test(1089.1, requires_utf8="\u00E4", chmatch(tstc(x1), x2), match(tstc(x1), x2)) # should fallback to match in "x"
-test(1089.2, requires_utf8="\u00E4", tstc(x1) %chin% x2, tstc(x1) %in% x2) # should fallback to match in "x"
-# change x2 to symbol to character
-test(1090.1, requires_utf8="\u00E4", chmatch(x1,tstc(x2)), match(x1, tstc(x2))) # should fallback to match in "table"
-test(1090.2, requires_utf8="\u00E4", x1 %chin% tstc(x2), x1 %in% tstc(x2))
-# both are symbols to characters
-test(1091.1, requires_utf8="\u00E4", chmatch(tstc(x1), tstc(x2)), match(tstc(x1), tstc(x2))) # should fallback to "match" in "x" as well.
-test(1091.2, requires_utf8="\u00E4", tstc(x1) %chin% tstc(x2), tstc(x1) %in% tstc(x2))
+if (utf8_check(c(x1,x2))) {
+  tstc = function(y) unlist(lapply(y, function(x) as.character(as.name(x))), use.names=FALSE)
+  test(1088.1, chmatch(x1, x2), match(x1, x2)) # should not fallback to "match"
+  test(1088.2, x1 %chin% x2, x1 %in% x2)
+  # change x1 to symbol to character
+  test(1089.1, chmatch(tstc(x1), x2), match(tstc(x1), x2)) # should fallback to match in "x"
+  test(1089.2, tstc(x1) %chin% x2, tstc(x1) %in% x2) # should fallback to match in "x"
+  # change x2 to symbol to character
+  test(1090.1, chmatch(x1,tstc(x2)), match(x1, tstc(x2))) # should fallback to match in "table"
+  test(1090.2, x1 %chin% tstc(x2), x1 %in% tstc(x2))
+  # both are symbols to characters
+  test(1091.1, chmatch(tstc(x1), tstc(x2)), match(tstc(x1), tstc(x2))) # should fallback to "match" in "x" as well.
+  test(1091.2, tstc(x1) %chin% tstc(x2), tstc(x1) %in% tstc(x2))
+} else cat("Tests 1088-1091 skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
+})
 # for completness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
 
 local(if (utf8_check("\u00E4")) {

From b4d49ee8df0627e7e48803a14719a9e21b0490b7 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 13:28:54 +0100
Subject: [PATCH 11/21] explain eval parse for utf8 check

---
 R/test.data.table.R   | 5 +++++
 inst/tests/tests.Rraw | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/R/test.data.table.R b/R/test.data.table.R
index 99b48abc3c..ea34d6c8bc 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -370,6 +370,11 @@ gc_mem = function() {
   # nocov end
 }
 
+# Check if UTF-8 symbols can be represented in native encoding
+# R's parser requires symbol names (PRINTNAME in LANGSXP) to be in native encoding. In non-UTF-8
+# locales, parsing Unicode escapes like \u00FC fails with a warning and substitutes <U+00FC>.
+# Tests using requires_utf8 are skipped when UTF-8 cannot be represented. Using eval(parse(text=...))
+# defers parsing to runtime, allowing the encoding check to run first and avoid source() warnings.
 utf8_check = function(test_str) identical(test_str, enc2native(test_str))
 
 test = function(num, x, y=TRUE,
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e21d264e5f..cfe952453d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3592,7 +3592,7 @@ if (utf8_check(c(x1,x2))) {
 # for completness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
 
 local(if (utf8_check("\u00E4")) {
-eval(parse(text='
+eval(parse(text='  # eval(parse()) defers parsing to runtime; see utf8_check description
   DT = data.table(pas = c(1:5, NA, 6:10), good = c(1:10, NA))
   setnames(DT, "pas", "p\u00E4s")
   test(1092, requires_utf8="\u00E4", eval(parse(text="DT[is.na(p\u00E4s), p\u00E4s := 99L]")), data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
@@ -4645,7 +4645,7 @@ test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
 test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
 # umlaut in column names (red herring I think, but testing anyway
 local(if (utf8_check("\u00e4\u00f6\u00fc")) {
-  eval(parse(text = '
+  eval(parse(text = '  # eval(parse()) defers parsing to runtime; see utf8_check description
     sentEx = data.table(abend = c(1, 1, 0, 0, 2),
                       aber = c(0, 1, 0, 0, 0),
                       "\u00FCber" = c(1, 0, 0, 0, 0),
@@ -20908,7 +20908,7 @@ local(if (utf8_check("\u00e4\u00f6\u00fc")) {
   x = data.table(a = 1, b = 2, c = 3)
   y = data.table(x = 4, y = 5, z = 6)
   # a-umlaut, o-umlaut, u-umlaut
-  eval(parse(text = '
+  eval(parse(text = '  # eval(parse()) defers parsing to runtime; see utf8_check description
     setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
     setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
     test(2298.1, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))

From 9f19186fa288e23b02143cb78e5adb3a0e4a909a Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 13:36:43 +0100
Subject: [PATCH 12/21] remove nested eval parse

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index cfe952453d..09d16af221 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3595,8 +3595,8 @@ local(if (utf8_check("\u00E4")) {
 eval(parse(text='  # eval(parse()) defers parsing to runtime; see utf8_check description
   DT = data.table(pas = c(1:5, NA, 6:10), good = c(1:10, NA))
   setnames(DT, "pas", "p\u00E4s")
-  test(1092, requires_utf8="\u00E4", eval(parse(text="DT[is.na(p\u00E4s), p\u00E4s := 99L]")), data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
-  test(1093, requires_utf8="\u00E4", eval(parse(text="DT[, p\u00E4s := 34L]")), data.table("p\u00E4s" = 34L, good=c(1:10,NA)))
+  test(1092, DT[is.na(p\u00E4s), p\u00E4s := 99L], data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
+  test(1093, DT[, p\u00E4s := 34L], data.table("p\u00E4s" = 34L, good=c(1:10,NA)))
 '))
 } else cat("Tests 1092+1093 skipped because required UTF-8 symbols cannot be represented in native encoding.\n"))
 

From 96db2c149653c9565921e7b4eb07fa30cdbcaa4c Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 13:42:49 +0100
Subject: [PATCH 13/21] coding style

---
 inst/tests/tests.Rraw | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 09d16af221..346b031a89 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -4351,8 +4351,8 @@ x <- character(0)
 test(1163, last(x), character(0))
 
 # Bug fix for #5159 - chmatch and character encoding (for some reason this seems to pass the test on a mac as well)
-a<-c("a","\u00E4","\u00DF","z")
-au<-iconv(a,"UTF8","latin1")
+a = c("a","\u00E4","\u00DF","z")
+au = iconv(a,"UTF8","latin1")
 test(1164.1, requires_utf8=c("\u00E4", "\u00DF"), chmatch(a, au), match(a, au))
 
 # Bug fix for #73 - segfault when rbindlist on empty data.tables
@@ -4640,7 +4640,7 @@ test(1228.5, class(DT), class(DT[a>1, sum(b), by=a]))
 test(1228.6, class(DT), class(DT[a>1, c:=sum(b), by=a]))
 
 # savetl_init error after error, in v1.9.2, thanks Arun
-DT <- data.table(x=1:5, y=10:6)
+DT = data.table(x=1:5, y=10:6)
 test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
 test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
 # umlaut in column names (red herring I think, but testing anyway

From 5fcaaa56c84908ffa71f7e7b06a00d97bc1fdf2a Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 13:51:35 +0100
Subject: [PATCH 14/21] use local instead of multiple requires_utf8

---
 inst/tests/tests.Rraw | 92 ++++++++++++++++++++++---------------------
 1 file changed, 48 insertions(+), 44 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c8b155ef13..2d21804f49 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18698,56 +18698,60 @@ test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.
 rm(.datatable.aware)
 
 # tests for trunc.char handling wide characters #5096
+local({
 accented_a = "\u0061\u0301"
 ja_ichi = "\u4E00"
 ja_ni = "\u4E8C"
 ja_ko = "\u3053"
 ja_n = "\u3093"
 nc = c(accented_a, ja_ichi, ja_ni, ja_ko, ja_n)
-dots = "..."
-clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
-# Tests for combining character latin a and acute accent, single row
-DT = data.table(strrep(accented_a, 4L))
-test(2253.01, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
-test(2253.02, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
-test(2253.03, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
-# Tests for full-width japanese character ichi, single row
-DT = data.table(strrep(ja_ichi, 4L))
-test(2253.04, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
-test(2253.05, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
-test(2253.06, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
-# Tests for multiple, different length combining character rows
-DT = data.table(strrep(accented_a, 1L:4L))
-test(2253.07, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
-test(2253.08, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
-test(2253.09, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
-# Tests for multiple, different length full-width characters
-DT = data.table(strrep(ja_ichi, 1L:4L))
-test(2253.10, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
-test(2253.11, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
-test(2253.12, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
-# Tests for combined characters, multiple columns
-DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
-test(2253.13, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
-test(2253.14, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
-test(2253.15, requires_utf8=nc, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
-test(2253.16, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
-# Tests for multiple columns, multiple rows
-DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
-test(2253.17, requires_utf8=nc, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
-  c(paste0(ja_ko, "     ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
-  paste0(strrep(ja_ko, 2L), "   ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
-  paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
-test(2253.18, requires_utf8=nc, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
-  c(paste0(ja_ko, "      ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
-  paste0(strrep(ja_ko, 2L), "    ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
-  paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
-test(2253.19, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
-  c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
-  paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
-  paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
-# test for data.table with NA, #6441
-test(2253.20, requires_utf8=nc, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
+if (utf8_check(nc)) {
+  dots = "..."
+  clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
+  # Tests for combining character latin a and acute accent, single row
+  DT = data.table(strrep(accented_a, 4L))
+  test(2253.01, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
+  test(2253.02, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
+  test(2253.03, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
+  # Tests for full-width japanese character ichi, single row
+  DT = data.table(strrep(ja_ichi, 4L))
+  test(2253.04, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
+  test(2253.05, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
+  test(2253.06, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
+  # Tests for multiple, different length combining character rows
+  DT = data.table(strrep(accented_a, 1L:4L))
+  test(2253.07, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
+  test(2253.08, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
+  test(2253.09, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
+  # Tests for multiple, different length full-width characters
+  DT = data.table(strrep(ja_ichi, 1L:4L))
+  test(2253.10, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
+  test(2253.11, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
+  test(2253.12, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
+  # Tests for combined characters, multiple columns
+  DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
+  test(2253.13, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
+  test(2253.14, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
+  test(2253.15, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
+  test(2253.16, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
+  # Tests for multiple columns, multiple rows
+  DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
+  test(2253.17, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+    c(paste0(ja_ko, "     ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
+    paste0(strrep(ja_ko, 2L), "   ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
+    paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
+  test(2253.18, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+    c(paste0(ja_ko, "      ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
+    paste0(strrep(ja_ko, 2L), "    ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
+    paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
+  test(2253.19, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
+    c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
+    paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
+    paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
+  # test for data.table with NA, #6441
+  test(2253.20, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
+} else cat("Tests 2253.* skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
+})
 
 # allow 1-D matrix in j for consistency, #783
 DT=data.table(a = rep(1:2, 3), b = 1:6)

From d48b26953f461f893f54ad2b7f3437fd36158607 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 13:54:04 +0100
Subject: [PATCH 15/21] restore cat

---
 inst/tests/tests.Rraw | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2d21804f49..8606ec219c 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18750,7 +18750,8 @@ if (utf8_check(nc)) {
     paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
   # test for data.table with NA, #6441
   test(2253.20, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
-} else cat("Tests 2253.* skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
+} else {
+  cat("Tests 2253* skipped because they need a UTF-8 locale.\n")
 })
 
 # allow 1-D matrix in j for consistency, #783

From 8d8597213be383c4c00965987b78613fb9ad39f5 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sat, 3 Jan 2026 14:06:00 +0100
Subject: [PATCH 16/21] fix bracket

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8606ec219c..884818bee6 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18752,7 +18752,7 @@ if (utf8_check(nc)) {
   test(2253.20, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output="      a\n1: a...\n2: <NA>")
 } else {
   cat("Tests 2253* skipped because they need a UTF-8 locale.\n")
-})
+}})
 
 # allow 1-D matrix in j for consistency, #783
 DT=data.table(a = rep(1:2, 3), b = 1:6)

From c9c843f0689714988c84344f7ebdf1c7ae3e9c5e Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Sat, 10 Jan 2026 23:35:05 -0800
Subject: [PATCH 17/21] nit: typo

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 4d7ef4c170..d517dfde83 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3589,7 +3589,7 @@ if (utf8_check(c(x1,x2))) {
   test(1091.2, tstc(x1) %chin% tstc(x2), tstc(x1) %in% tstc(x2))
 } else cat("Tests 1088-1091 skipped because required UTF-8 symbols cannot be represented in native encoding.\n")
 })
-# for completness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
+# for completeness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
 
 local(if (utf8_check("\u00E4")) {
 eval(parse(text='  # eval(parse()) defers parsing to runtime; see utf8_check description

From 6d10dc977e9fb259220e017075096b856a49b306 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Sat, 10 Jan 2026 23:37:13 -0800
Subject: [PATCH 18/21] nit: match ')'

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index d517dfde83..2606a4c80c 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -4644,7 +4644,7 @@ test(1228.6, class(DT), class(DT[a>1, c:=sum(b), by=a]))
 DT = data.table(x=1:5, y=10:6)
 test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
 test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
-# umlaut in column names (red herring I think, but testing anyway
+# umlaut in column names (red herring I think, but testing anyway)
 local(if (utf8_check("\u00e4\u00f6\u00fc")) {
   eval(parse(text = '  # eval(parse()) defers parsing to runtime; see utf8_check description
     sentEx = data.table(abend = c(1, 1, 0, 0, 2),

From d4c3fa8a5230879bf766b2faf57db9805a3259cf Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Sat, 10 Jan 2026 23:41:18 -0800
Subject: [PATCH 19/21] prefer passing characters individually

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2606a4c80c..2c1d782c51 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -20909,7 +20909,7 @@ x = data.table(a=1, b=2L)
 y = data.table(c=1.5, d=1L)
 test(2297.31, y[x, on=.(c == a, d == a), nomatch=NULL], output="Empty data.table (0 rows and 3 cols): c,d,b")
 
-local(if (utf8_check("\u00e4\u00f6\u00fc")) {
+local(if (utf8_check(c("\u00e4", "\u00f6", "\u00fc"))) {
   # rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
   x = data.table(a = 1, b = 2, c = 3)
   y = data.table(x = 4, y = 5, z = 6)

From 0d352ef67c799ffd22f41b104fc3dc2243fa0139 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sun, 11 Jan 2026 17:04:34 +0100
Subject: [PATCH 20/21] simplify check

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2c1d782c51..1fb831c2bc 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -4645,7 +4645,7 @@ DT = data.table(x=1:5, y=10:6)
 test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
 test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
 # umlaut in column names (red herring I think, but testing anyway)
-local(if (utf8_check("\u00e4\u00f6\u00fc")) {
+local(if (utf8_check("\u00fc")) {
   eval(parse(text = '  # eval(parse()) defers parsing to runtime; see utf8_check description
     sentEx = data.table(abend = c(1, 1, 0, 0, 2),
                       aber = c(0, 1, 0, 0, 0),

From 1d18cff17f7ce62f267d5c1bf7c9d2a877b9769b Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sun, 11 Jan 2026 17:10:54 +0100
Subject: [PATCH 21/21] add comment about default test_str

---
 R/test.data.table.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/test.data.table.R b/R/test.data.table.R
index ea34d6c8bc..282d71bca0 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -396,7 +396,7 @@ test = function(num, x, y=TRUE,
   }
   # Check UTF-8 requirement
   if (!isFALSE(requires_utf8)) {
-    test_str = if (isTRUE(requires_utf8)) "\u00F1\u00FC\u3093" else requires_utf8
+    test_str = if (isTRUE(requires_utf8)) "\u00F1\u00FC\u3093" else requires_utf8 # the default test_str are UTF-8 symbols we found over time, TOOD: harden this default
     if (!utf8_check(test_str)) {
       # nocov start
       last_utf8_skip = get0("last_utf8_skip", parent.frame(), ifnotfound=0, inherits=TRUE)