From 6ce7288ab54660de4a96188689ab17f07fd5e54c Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Fri, 1 May 2026 21:26:49 +0000 Subject: [PATCH] Use size_t for getLine column accumulator to prevent int overflow `getLine` (src/parseSyntaxErrors.h) walks `src` to find the column of a syntax error. The column accumulator was a signed `int col` and the subsequent allocation was `R_Calloc(col + 1, char)`. On a source line wider than INT_MAX bytes, `col` wraps to a negative int; the allocation either fails or is undersized, and the following `memcpy(buf, src + i, col)` writes past the buffer. Switch the accumulator to `size_t`, add an explicit `col == INT_MAX` guard inside the column-counting loop, and add an `i + col > INT_MAX` overflow check before the cast back to `int` for the R_Calloc call. Adds tests/testthat/test-mem-getline-col-overflow.R as a regression test. The boundary test is `skip()`ed because it requires constructing an INT_MAX-byte single-line input. Co-Authored-By: Claude Opus 4.7 (1M context) --- NEWS.md | 11 +++++ src/parseSyntaxErrors.h | 16 ++++++-- .../testthat/test-mem-getline-col-overflow.R | 40 +++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-mem-getline-col-overflow.R diff --git a/NEWS.md b/NEWS.md index 2f38cf6..dc5925a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# monolix2rx 0.0.7 + +* Fix `int col` overflow in `getLine` (`src/parseSyntaxErrors.h`). When + reporting a syntax error, `getLine` walks the source string to locate + the offending line. The column accumulator was a signed `int` that + could wrap on lines wider than `INT_MAX` bytes. After the wrap, + `R_Calloc(col + 1, char)` received a tiny (or negative) size, and the + subsequent `memcpy(buf, src + i, col)` then wrote past the + allocation. The fix uses `size_t` for the accumulator and adds + explicit bounds checks before the cast back to `int`. + # monolix2rx 0.0.6 * Updated to add types for rstudio completion diff --git a/src/parseSyntaxErrors.h b/src/parseSyntaxErrors.h index acd4594..a4b7926 100644 --- a/src/parseSyntaxErrors.h +++ b/src/parseSyntaxErrors.h @@ -21,13 +21,21 @@ #define syntaxErrorExtra monolix2rx_syntaxErrorExtra static inline char *getLine (char *src, int line, int *lloc) { - int cur = 1, col=0, i; + int cur = 1, i; + size_t col = 0; for(i = 0; src[i] != '\0' && cur != line; i++){ if(src[i] == '\n') cur++; } - for(col = 0; src[i + col] != '\n' && src[i + col] != '\0'; col++); - *lloc=i+col; - char *buf = R_Calloc(col + 1, char); + for(col = 0; src[i + col] != '\n' && src[i + col] != '\0'; col++){ + if (col == (size_t)INT_MAX) { + Rf_error(_("line too long in getLine")); + } + } + if ((size_t)i + col > (size_t)INT_MAX) { + Rf_error(_("source offset overflow in getLine")); + } + *lloc = i + (int)col; + char *buf = R_Calloc((int)col + 1, char); memcpy(buf, src + i, col); buf[col] = '\0'; return buf; diff --git a/tests/testthat/test-mem-getline-col-overflow.R b/tests/testthat/test-mem-getline-col-overflow.R new file mode 100644 index 0000000..6364ca6 --- /dev/null +++ b/tests/testthat/test-mem-getline-col-overflow.R @@ -0,0 +1,40 @@ +test_that("getLine handles normal-sized lines without error", { + # Sanity check: regular syntax errors still report cleanly after the + # `int col` -> `size_t col` change in getLine. + result <- tryCatch( + .Call(`_monolix2rx_trans_equation`, + "[LONGITUDINAL] EQUATION:\nf = !!!syntax error here!!!\n", + "[LONGITUDINAL] EQUATION:"), + error = function(e) conditionMessage(e), + warning = function(w) conditionMessage(w) + ) + # Either an R error/warning is raised or a value is returned; + # specifically, the new "line too long in getLine" must NOT appear. + expect_false(grepl("line too long in getLine|source offset overflow", as.character(result))) +}) + +test_that("getLine col overflow guard triggers on >INT_MAX-byte line (skipped: requires ~2GB RAM)", { + skip("Requires ~2GB free RAM to construct an >INT_MAX-byte single-line input") + # --- What this test checks --- + # `getLine` (src/parseSyntaxErrors.h) walks `src` to find the column of + # a syntax error. The column accumulator was `int col` and the + # subsequent allocation was `R_Calloc(col + 1, char)`. When the line + # is wider than INT_MAX bytes, `col` wraps to a negative int; the + # allocation either fails or is undersized, and the following + # `memcpy(buf, src + i, col)` writes past the buffer. + # + # The fix changes `col` to `size_t` and bounds-checks before the cast + # back to `int` for the R_Calloc call. + # + # --- How to trigger manually --- + # Construct a single-line input that is wider than INT_MAX bytes and + # ends with a syntax error, so that getLine is actually exercised on + # the giant line. + + giant_line <- strrep("x_var_abc", 200000000L) # ~1.8 GB on a single line + bad <- paste0(giant_line, " = !!!bad") + expect_error( + .Call(`_monolix2rx_trans_equation`, bad, "[LONGITUDINAL]"), + "line too long in getLine|source offset overflow" + ) +})