diff --git a/NEWS.md b/NEWS.md index 2f38cf6..6128c3c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# monolix2rx 0.0.7 + +* Fix implicit `ptrdiff_t` to `int` truncation in `rc_dup_str` + (`src/shared.c`). When the parser passes a string segment longer than + `INT_MAX` bytes (or a NUL-terminated string of that length), the + pointer difference / `strlen` result was silently cast to `int`, + truncating the length to a wrong (often negative) value. The new + guard rejects such inputs with an informative R error. Also adds a + thread-safety comment documenting that the parser globals are + intentionally not mutex-protected (R is single-threaded). + # monolix2rx 0.0.6 * Updated to add types for rstudio completion diff --git a/src/shared.c b/src/shared.c index d7e48d1..7699aab 100644 --- a/src/shared.c +++ b/src/shared.c @@ -19,6 +19,10 @@ dparserPtrIni #include "parseSyntaxErrors.h" // These are the shared variables +// NOTE: These globals are intentionally not mutex-protected. +// R's interpreter is single-threaded; its memory allocator (R_Calloc, R_Free) +// and error handling (Rf_error) are not safe to call from multiple threads. +// All parse operations must occur on the R main thread. const char *record; int _rxode2_reallyHasAfter = 0; @@ -41,8 +45,20 @@ int lastStrLoc=0; vLines _dupStrs; char * rc_dup_str(const char *s, const char *e) { lastStr=s; - int l = e ? e-s : (int)strlen(s); - //syntaxErrorExtra=min(l-1, 40); + int l; + if (e) { + ptrdiff_t diff = e - s; + if (diff < 0 || diff > (ptrdiff_t)INT_MAX) { + Rf_error(_("string segment too long in rc_dup_str")); + } + l = (int)diff; + } else { + size_t sLen = strlen(s); + if (sLen > (size_t)INT_MAX) { + Rf_error(_("string too long in rc_dup_str")); + } + l = (int)sLen; + } addLine(&_dupStrs, "%.*s", l, s); return _dupStrs.line[_dupStrs.n-1]; } diff --git a/tests/testthat/test-mem-rc-dup-str.R b/tests/testthat/test-mem-rc-dup-str.R new file mode 100644 index 0000000..ce8a779 --- /dev/null +++ b/tests/testthat/test-mem-rc-dup-str.R @@ -0,0 +1,44 @@ +test_that("rc_dup_str handles normal-sized inputs without error", { + # Sanity check: regular Mlxtran fragments must continue to parse cleanly + # after the INT_MAX guards added to rc_dup_str. + expect_no_error( + tryCatch( + .Call(`_monolix2rx_trans_equation`, + "[LONGITUDINAL] EQUATION:\nf = exp(-k*t)\n", + "[LONGITUDINAL] EQUATION:"), + error = function(e) { + if (grepl("rc_dup_str", conditionMessage(e))) stop(e) + # Other parse errors (e.g., from synthetic test input) are fine. + NULL + } + ) + ) +}) + +test_that("rc_dup_str int truncation guard triggers on huge inputs (skipped: requires ~2GB RAM)", { + skip("Requires ~2GB free RAM to construct a >INT_MAX-byte string and exercise the rc_dup_str path") + # --- What this test checks --- + # `rc_dup_str` (src/shared.c) computes the length as + # int l = e ? e-s : (int)strlen(s); + # When the source string segment is larger than INT_MAX bytes, the + # `(int)` cast silently truncates to a wrong value, which propagates + # into `addLine(&_dupStrs, "%.*s", l, s)` and either reads past the + # buffer (OOB) or wraps to a tiny copy. + # + # The guard added by this fix range-checks the ptrdiff_t / size_t + # length and raises an R error before the truncation can happen. + # + # --- How to run manually (outside devtools::test()) --- + # Start a fresh R session with at least 3 GB of available RAM, then: + # + # library(monolix2rx) + # big <- strrep("a", 2147483647L) # exactly INT_MAX bytes + # # Before fix: silent truncation; potential heap corruption + # # After fix: error "string too long in rc_dup_str" + + big <- strrep("a", 2147483647L) + expect_error( + .Call(`_monolix2rx_trans_equation`, big, "[LONGITUDINAL]"), + "rc_dup_str|too long" + ) +})