From 5e335822f0951b7923581ef9ab96d5c27430a5f0 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Sun, 7 Sep 2025 15:40:38 +0200 Subject: [PATCH 1/4] fix out-of-bounds error for quick check --- R/methylDBFunctions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/methylDBFunctions.R b/R/methylDBFunctions.R index dd8e8ec..825ac84 100644 --- a/R/methylDBFunctions.R +++ b/R/methylDBFunctions.R @@ -704,7 +704,7 @@ unite.methylRawListDB <- function(object,destrand=FALSE,min.per.group=NULL, destrandFun <- function(obj){ ## if resolution is not base or if strand is * then return object - if(obj@resolution != "base" || any(obj[1:100]$strand == "*")) {return(obj)} + if(obj@resolution != "base" || any(headTabix(obj@dbpath)$strand == "*")) {return(obj)} dir <- dirname(obj@dbpath) filename <- paste(gsub(".txt.bgz","",obj@dbpath), From e0f64a522f8953170ab9d0a2d025e23b4a1c8c7d Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Sun, 7 Sep 2025 15:56:28 +0200 Subject: [PATCH 2/4] add tests for destranding functionality and reproduce issue #296 --- tests/testthat/test-destrand.R | 266 +++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 tests/testthat/test-destrand.R diff --git a/tests/testthat/test-destrand.R b/tests/testthat/test-destrand.R new file mode 100644 index 0000000..8f765c5 --- /dev/null +++ b/tests/testthat/test-destrand.R @@ -0,0 +1,266 @@ +context("test for known destranding issues") + +## reproduce issue 296 +write(file = "ctrl_cytosineReport.tst",x = + "chr12__pp 3000261 - 0 11 CHH CCT +chr12__pp 3000262 - 0 12 CHH CCC +chr12__pp 3000264 + 0 17 CHH CCA +chr12__pp 3000265 + 0 17 CHG CAG +chr12__pp 3000267 - 0 13 CHG CTG +chr12__pp 3000270 + 0 17 CHG CCG +chr12__pp 3000271 + 17 0 CG CGG +chr12__pp 3000286 - 0 14 CHH CCC +chr12__pp 3000289 - 0 13 CHH CTT +chr1__pp 3000286 - 0 14 CHH CCC +chr1__pp 3000289 - 0 13 CHH CTT +chr1__pp 3000290 - 0 13 CHH CCT +chr1__pp 3000291 - 0 13 CHH CCC +chr1__pp 3000292 - 0 12 CHH CCC +chr1__pp 3000293 + 0 16 CHH CTC +chr1__pp 3000295 + 15 1 CG CGG +chr1__pp 3000296 - 11 1 CG CGA +chr1__pp 3000297 - 0 12 CHG CCG +chr1__pp 3000301 - 0 11 CHH CAA +chr1__pp 3000303 - 0 11 CHH CAC +chr1__pp 3000310 + 0 16 CHH CAC +chr12__pp 3000290 - 0 13 CHH CCT +chr12__pp 3000291 - 0 13 CHH CCC +chr12__pp 3000292 - 0 12 CHH CCC +chr12__pp 3000293 + 0 16 CHH CTC +chr12__pp 3000295 + 15 1 CG CGG +chr12__pp 3000296 - 11 1 CG CGA +chr12__pp 3000297 - 0 12 CHG CCG +chr12__pp 3000301 - 0 11 CHH CAA +chr12__pp 3000303 - 0 11 CHH CAC +chr12__pp 3000310 + 0 16 CHH CAC +chr12__pp 3000312 + 0 16 CHH CAA +chr12__pp 3000315 - 0 12 CHH CTT +chr12__pp 3000316 - 0 12 CHH CCT +chr1__pp0 3000261 - 0 11 CHH CCT +chr1__pp0 3000262 - 0 12 CHH CCC +chr1__pp0 3000264 + 0 17 CHH CCA +chr1__pp0 3000284 - 0 14 CHH CCA +chr1__pp0 3000285 - 0 14 CHH CCC +chr1__pp0 3000286 - 0 14 CHH CCC +chr1__pp0 3000289 - 0 13 CHH CTT +chr1__pp0 3000290 - 0 13 CHH CCT +chr1__pp0 3000291 - 0 13 CHH CCC +chr1__pp0 3000292 - 0 12 CHH CCC +chr1__pp0 3000293 + 0 16 CHH CTC +chr1__pp0 3000295 + 15 1 CG CGG +chr1__pp0 3000296 - 11 1 CG CGA +chr1__pp0 3000297 - 0 12 CHG CCG +chr1__pp0 3000301 - 0 11 CHH CAA +chr1__pp0 3000303 - 0 11 CHH CAC +chr1__pp0 3000310 + 0 16 CHH CAC +chr1__pp0 3000312 + 0 16 CHH CAA +chr1__pp0 3000315 - 0 12 CHH CTT +chr1__pp0 3000316 - 0 12 CHH CCT +chr9__pp 3000261 - 0 11 CHH CCT +chr9__pp 3000262 - 0 12 CHH CCC +chr9__pp 3000264 + 0 17 CHH CCA +chr9__pp 3000265 + 0 17 CHG CAG +chr9__pp 3000267 - 0 13 CHG CTG +chr9__pp 3000270 + 0 17 CHG CCG +chr9__pp 3000271 + 17 0 CG CGG +chr9__pp 3000272 - 12 1 CG CGG +chr9__pp 3000273 - 0 13 CHG CCG +chr9__pp 3000276 + 0 17 CHH CTA +chr9__pp 3000281 + 1 16 CHG CTG +chr9__pp 3000283 - 0 13 CHG CAG +chr9__pp 3000284 - 0 14 CHH CCA +chr9__pp 3000285 - 0 14 CHH CCC +chr9__pp 3000286 - 0 14 CHH CCC +chr9__pp 3000289 - 0 13 CHH CTT +chr9__pp 3000290 - 0 13 CHH CCT +chr9__pp 3000291 - 0 13 CHH CCC +chr9__pp 3000292 - 0 12 CHH CCC +chr1__pp0 3000265 + 0 17 CHG CAG +chr1__pp0 3000267 - 0 13 CHG CTG +chr1__pp0 3000270 + 0 17 CHG CCG +chr1__pp0 3000271 + 17 0 CG CGG +chr1__pp0 3000272 - 12 1 CG CGG +chr1__pp0 3000273 - 0 13 CHG CCG +chr1__pp0 3000276 + 0 17 CHH CTA +chr1__pp0 3000281 + 1 16 CHG CTG +chr1__pp0 3000283 - 0 13 CHG CAG +chr1__pp 3000262 - 0 12 CHH CCC +chr1__pp 3000264 + 0 17 CHH CCA +chr1__pp 3000265 + 0 17 CHG CAG +chr1__pp 3000267 - 0 13 CHG CTG +chr1__pp 3000270 + 0 17 CHG CCG +chr1__pp 3000271 + 17 0 CG CGG +chr1__pp 3000272 - 12 1 CG CGG +chr1__pp 3000273 - 0 13 CHG CCG +chr1__pp 3000276 + 0 17 CHH CTA +chr1__pp 3000281 + 1 16 CHG CTG +chr1__pp 3000283 - 0 13 CHG CAG +chr1__pp 3000284 - 0 14 CHH CCA +chr1__pp 3000285 - 0 14 CHH CCC +chr1__pp 3000312 + 0 16 CHH CAA +chr1__pp 3000315 - 0 12 CHH CTT +chr1__pp 3000316 - 0 12 CHH CCT +chr12__pp 3000272 - 12 1 CG CGG +chr12__pp 3000273 - 0 13 CHG CCG +chr12__pp 3000276 + 0 17 CHH CTA +chr12__pp 3000281 + 1 16 CHG CTG +chr12__pp 3000283 - 0 13 CHG CAG +chr12__pp 3000284 - 0 14 CHH CCA +chr12__pp 3000285 - 0 14 CHH CCC +chr9__pp 3000293 + 0 16 CHH CTC +chr9__pp 3000295 + 15 1 CG CGG +chr9__pp 3000296 - 11 1 CG CGA +chr9__pp 3000297 - 0 12 CHG CCG +chr9__pp 3000301 - 0 11 CHH CAA +chr9__pp 3000303 - 0 11 CHH CAC +chr9__pp 3000310 + 0 16 CHH CAC +chr9__pp 3000312 + 0 16 CHH CAA +chr9__pp 3000315 - 0 12 CHH CTT +chr9__pp 3000316 - 0 12 CHH CCT +chr1__pp 3000261 - 0 11 CHH CCT" +) + +write(file="test_cytosineReport.tst", x = + "chr1__pp2__pp 3000261 - 0 11 CHH CCT +chr1__pp2__pp 3000262 - 0 12 CHH CCC +chr1__pp2__pp 3000264 + 0 17 CHH CCA +chr1__pp2__pp 3000265 + 0 17 CHG CAG +chr1__pp2__pp 3000267 - 0 13 CHG CTG +chr1__pp2__pp 3000270 + 0 17 CHG CCG +chr1__pp2__pp 3000271 + 17 0 CG CGG +chr1__pp2__pp 3000272 - 12 1 CG CGG +chr1__pp2__pp 3000273 - 0 13 CHG CCG +chr1__pp2__pp 3000276 + 0 17 CHH CTA +chr1__pp2__pp 3000281 + 1 16 CHG CTG +chr1__pp2__pp 3000283 - 0 13 CHG CAG +chr1__pp2__pp 3000284 - 0 14 CHH CCA +chr1__pp2__pp 3000285 - 0 14 CHH CCC +chr1__pp2__pp 3000286 - 0 14 CHH CCC +chr1__pp2__pp 3000289 - 0 13 CHH CTT +chr1__pp2__pp 3000290 - 0 13 CHH CCT +chr1__pp2__pp 3000291 - 0 13 CHH CCC +chr1__pp2__pp 3000292 - 0 12 CHH CCC +chr1__pp2__pp 3000293 + 0 16 CHH CTC +chr1__pp2__pp 3000295 + 15 1 CG CGG +chr1__pp2__pp 3000296 - 11 1 CG CGA +chr1__pp2__pp 3000297 - 0 12 CHG CCG +chr1__pp2__pp 3000301 - 0 11 CHH CAA +chr1__pp2__pp 3000303 - 0 11 CHH CAC +chr1__pp2__pp 3000310 + 0 16 CHH CAC +chr1__pp2__pp 3000312 + 0 16 CHH CAA +chr1__pp2__pp 3000315 - 0 12 CHH CTT +chr1__pp2__pp 3000316 - 0 12 CHH CCT +chr1__pp0 3000261 - 0 11 CHH CCT +chr1__pp0 3000262 - 0 12 CHH CCC +chr1__pp0 3000264 + 0 17 CHH CCA +chr1__pp0 3000265 + 0 17 CHG CAG +chr1__pp0 3000267 - 0 13 CHG CTG +chr1__pp0 3000270 + 0 17 CHG CCG +chr1__pp0 3000271 + 17 0 CG CGG +chr1__pp0 3000272 - 12 1 CG CGG +chr1__pp0 3000273 - 0 13 CHG CCG +chr1__pp0 3000276 + 0 17 CHH CTA +chr1__pp0 3000281 + 1 16 CHG CTG +chr1__pp0 3000283 - 0 13 CHG CAG +chr1__pp0 3000284 - 0 14 CHH CCA +chr1__pp0 3000285 - 0 14 CHH CCC +chr1__pp0 3000286 - 0 14 CHH CCC +chr1__pp0 3000289 - 0 13 CHH CTT +chr1__pp0 3000290 - 0 13 CHH CCT +chr1__pp0 3000291 - 0 13 CHH CCC +chr1__pp0 3000292 - 0 12 CHH CCC +chr1__pp0 3000293 + 0 16 CHH CTC +chr1__pp0 3000295 + 15 1 CG CGG +chr1__pp0 3000296 - 11 1 CG CGA +chr1__pp0 3000297 - 0 12 CHG CCG +chr1__pp0 3000301 - 0 11 CHH CAA +chr1__pp0 3000303 - 0 11 CHH CAC +chr1__pp0 3000310 + 0 16 CHH CAC +chr1__pp0 3000312 + 0 16 CHH CAA +chr1__pp0 3000315 - 0 12 CHH CTT +chr1__pp0 3000316 - 0 12 CHH CCT +chr9__pp 3000261 - 0 11 CHH CCT +chr9__pp 3000262 - 0 12 CHH CCC +chr9__pp 3000264 + 0 17 CHH CCA +chr9__pp 3000265 + 0 17 CHG CAG +chr9__pp 3000267 - 0 13 CHG CTG +chr9__pp 3000270 + 0 17 CHG CCG +chr9__pp 3000271 + 17 0 CG CGG +chr9__pp 3000272 - 12 1 CG CGG +chr9__pp 3000273 - 0 13 CHG CCG +chr9__pp 3000276 + 0 17 CHH CTA +chr9__pp 3000281 + 1 16 CHG CTG +chr9__pp 3000283 - 0 13 CHG CAG +chr9__pp 3000284 - 0 14 CHH CCA +chr9__pp 3000285 - 0 14 CHH CCC +chr9__pp 3000286 - 0 14 CHH CCC +chr9__pp 3000289 - 0 13 CHH CTT +chr9__pp 3000290 - 0 13 CHH CCT +chr9__pp 3000291 - 0 13 CHH CCC +chr9__pp 3000292 - 0 12 CHH CCC +chr9__pp 3000293 + 0 16 CHH CTC +chr9__pp 3000295 + 15 1 CG CGG +chr9__pp 3000296 - 11 1 CG CGA +chr9__pp 3000297 - 0 12 CHG CCG +chr9__pp 3000301 - 0 11 CHH CAA +chr9__pp 3000303 - 0 11 CHH CAC +chr9__pp 3000310 + 0 16 CHH CAC +chr9__pp 3000312 + 0 16 CHH CAA +chr9__pp 3000315 - 0 12 CHH CTT +chr9__pp 3000316 - 0 12 CHH CCT +chr1__pp 3000261 - 0 11 CHH CCT +chr1__pp 3000262 - 0 12 CHH CCC +chr1__pp 3000264 + 0 17 CHH CCA +chr1__pp 3000265 + 0 17 CHG CAG +chr1__pp 3000267 - 0 13 CHG CTG +chr1__pp 3000270 + 0 17 CHG CCG +chr1__pp 3000271 + 17 0 CG CGG +chr1__pp 3000272 - 12 1 CG CGG +chr1__pp 3000273 - 0 13 CHG CCG +chr1__pp 3000276 + 0 17 CHH CTA +chr1__pp 3000281 + 1 16 CHG CTG +chr1__pp 3000283 - 0 13 CHG CAG +chr1__pp 3000284 - 0 14 CHH CCA +chr1__pp 3000285 - 0 14 CHH CCC +chr1__pp 3000286 - 0 14 CHH CCC +chr1__pp 3000289 - 0 13 CHH CTT +chr1__pp 3000290 - 0 13 CHH CCT +chr1__pp 3000291 - 0 13 CHH CCC +chr1__pp 3000292 - 0 12 CHH CCC +chr1__pp 3000293 + 0 16 CHH CTC +chr1__pp 3000295 + 15 1 CG CGG +chr1__pp 3000296 - 11 1 CG CGA +chr1__pp 3000297 - 0 12 CHG CCG +chr1__pp 3000301 - 0 11 CHH CAA +chr1__pp 3000303 - 0 11 CHH CAC +chr1__pp 3000310 + 0 16 CHH CAC +chr1__pp 3000312 + 0 16 CHH CAA +chr1__pp 3000315 - 0 12 CHH CTT +chr1__pp 3000316 - 0 12 CHH CCT") + + + +files <- list("test_cytosineReport.tst","ctrl_cytosineReport.tst") + +myobj_test <- methRead(files, + sample.id = list("test","ctrl"), + assembly = "misc", + dbtype="tabix", + pipeline = "bismarkCytosineReport", + header=TRUE, + sep="\t", + context="CpG", + resolution="base", + dbdir="CpG_methylDB_test", + treatment=c(2,1), + mincov=1) + +test_that("test if unite with destranding works as tabix", { + expect_is(meth_test_db <- unite(myobj_test, destrand=TRUE, min.per.group=1L, chunk.size = 1e1, save.db = TRUE), "methylBaseDB") +}) +test_that("test if unite with destranding works in memory and as tabix", { + expect_is(meth_test_mem <- unite(myobj_test, destrand=TRUE, min.per.group=1L, chunk.size = 1e1, save.db = FALSE), "methylBase") +}) +test_that("test if output of unite with destranding is the same in memory and as tabix", { + expect_identical(getData(meth_test_mem), getData(meth_test_db)) +}) From 019afb811a88573ff9e7d486849a6723f15cbfe3 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Sun, 7 Sep 2025 16:23:37 +0200 Subject: [PATCH 3/4] apply destranding per chromosome --- R/methylDBFunctions.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/methylDBFunctions.R b/R/methylDBFunctions.R index 825ac84..9a0000f 100644 --- a/R/methylDBFunctions.R +++ b/R/methylDBFunctions.R @@ -701,7 +701,7 @@ unite.methylRawListDB <- function(object,destrand=FALSE,min.per.group=NULL, message("destranding...") - destrandFun <- function(obj){ + destrandFun <- function(obj, mc.cores=1){ ## if resolution is not base or if strand is * then return object if(obj@resolution != "base" || any(headTabix(obj@dbpath)$strand == "*")) {return(obj)} @@ -726,15 +726,16 @@ unite.methylRawListDB <- function(object,destrand=FALSE,min.per.group=NULL, tabixHead = tabixHead) # need to use .CpG.dinuc.unifyOld because output needs to be ordered - newdbpath <- applyTbxByChunk(obj@dbpath, - chunk.size = chunk.size, - dir=dir,filename = filename, + newdbpath <- applyTbxByChr(obj@dbpath, + dir=dir, + filename = filename, return.type = "tabix", FUN = function(x) { .CpG.dinuc.unify( .setMethylDBNames(x,"methylRawDB") )}, - tabixHead = tabixHeadString) + tabixHead = tabixHeadString, + mc.cores = mc.cores) readMethylRawDB(dbpath = newdbpath,dbtype = "tabix", sample.id = obj@sample.id, @@ -742,7 +743,7 @@ unite.methylRawListDB <- function(object,destrand=FALSE,min.per.group=NULL, resolution = obj@resolution) } - new.list=suppressMessages(lapply(object,destrandFun)) + new.list=suppressMessages(lapply(object,destrandFun, mc.cores = mc.cores)) object <- new("methylRawListDB", new.list,treatment=object@treatment) ## on exit remove all destrand files From b2a0dd740f47659426650b421b59bd395fd78cbc Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Sun, 7 Sep 2025 16:40:23 +0200 Subject: [PATCH 4/4] store test results in parent environment for reuse in later tests --- tests/testthat/test-destrand.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-destrand.R b/tests/testthat/test-destrand.R index 8f765c5..8f22c63 100644 --- a/tests/testthat/test-destrand.R +++ b/tests/testthat/test-destrand.R @@ -256,10 +256,10 @@ myobj_test <- methRead(files, mincov=1) test_that("test if unite with destranding works as tabix", { - expect_is(meth_test_db <- unite(myobj_test, destrand=TRUE, min.per.group=1L, chunk.size = 1e1, save.db = TRUE), "methylBaseDB") + expect_is(meth_test_db <<- unite(myobj_test, destrand=TRUE, min.per.group=1L, chunk.size = 1e1, save.db = TRUE), "methylBaseDB") }) test_that("test if unite with destranding works in memory and as tabix", { - expect_is(meth_test_mem <- unite(myobj_test, destrand=TRUE, min.per.group=1L, chunk.size = 1e1, save.db = FALSE), "methylBase") + expect_is(meth_test_mem <<- unite(myobj_test, destrand=TRUE, min.per.group=1L, chunk.size = 1e1, save.db = FALSE), "methylBase") }) test_that("test if output of unite with destranding is the same in memory and as tabix", { expect_identical(getData(meth_test_mem), getData(meth_test_db))