From 65edd1857e0ca98c586dc3f3748308980780373e Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Sun, 7 Sep 2025 13:14:50 +0200 Subject: [PATCH 1/2] add test for .setMethylDBNames function to verify correct name assignment --- tests/testthat/test-setMethylDBNames.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tests/testthat/test-setMethylDBNames.R diff --git a/tests/testthat/test-setMethylDBNames.R b/tests/testthat/test-setMethylDBNames.R new file mode 100644 index 0000000..9333c06 --- /dev/null +++ b/tests/testthat/test-setMethylDBNames.R @@ -0,0 +1,26 @@ +context("make sure setting names of known classes works") + + +# https://github.com/al2na/methylKit/issues/360 + + +data("methylKit") + +mdb_diff <- methylDiff.obj |> makeMethylDB(dbdir = tempdir()) + +test_that("check if .setMethylDBNames works", { + expect_equal( + mdb_diff |> + getDBPath() |> + headTabix() |> + (function(x) { + x$V5 <- as.integer(x$V5) + x$V6 <- as.integer(x$V6) + return(x) + })() |> + .setMethylDBNames() |> names(), + names(methylDiff.obj) + ) +}) + +mdb_diff |> getDBPath() |> unlink() From 48a495a9a9a21ddf2b4e3e5a4dae955a40c66eb9 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Sun, 7 Sep 2025 13:15:22 +0200 Subject: [PATCH 2/2] refactor .setMethylDBNames function to improve column name assignment and add validation for input data frame this fixes https://github.com/al2na/methylKit/issues/360 --- R/methylDBClasses.R | 111 +++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 64 deletions(-) diff --git a/R/methylDBClasses.R b/R/methylDBClasses.R index 31609b7..0c310a2 100644 --- a/R/methylDBClasses.R +++ b/R/methylDBClasses.R @@ -7,77 +7,60 @@ ## flat file database ## @param df data.frame containing methylRaw or methylBase data ## @param methylDBclass -.setMethylDBNames <- function(df, - methylDBclass=c("methylRawDB","methylBaseDB", - "methylDiffDB")){ +.setMethylDBNames <- function(df, + methylDBclass = NULL) { + + if (nrow(df) == 0) return(df) - if(nrow(df) == 0) return(df) + if (length(df) < 7) { + stop("Expected data frame must have at least 7 columns") + } + + if (is.null(methylDBclass)) { + if (length(df) > 7) { + methylDBclass = "methylBaseDB" + } else { + if (all(sapply(df[5:7], is.integer))) { + methylDBclass = "methylRawDB" + } else { + methylDBclass = "methylDiffDB" + } + } + } else { + if (!methylDBclass %in% c("methylRawDB", "methylBaseDB", "methylDiffDB")) { + stop("Unknown methylDBclass provided, ", + "allowed values are: methylRawDB, methylBaseDB, methylDiffDB") + } + } - if(missing(methylDBclass)){ - - if( length(df) == 7 & unique(sapply(df,class)[5:7])=="integer"){ - setnames(x = df,old = names(df), - new = c("chr","start","end","strand", - "coverage","numCs","numTs")) - - } else if( length(df) == 7 & unique(sapply(df,class)[5:7])=="numeric"){ - setnames(x = df,old = names(df), - new = c("chr","start","end","strand", - "pvalue","qvalue","meth.diff")) - - } else if( length(df) > 7){ - setnames(x = df,old = names(df)[1:4], - new = c("chr","start","end","strand")) - # get indices of coverage,numCs and numTs in the data frame - numsamples = (length(df)-4)/3 - coverage.ind=seq(5,by=3,length.out=numsamples) - numCs.ind =coverage.ind+1 - numTs.ind =coverage.ind+2 - - # change column names - setnames(df,names(df)[coverage.ind], - paste(c("coverage"),1:numsamples,sep="" )) - setnames(df,names(df)[numCs.ind], - paste(c("numCs"),1:numsamples,sep="" )) - setnames(df,names(df)[numTs.ind], - paste(c("numTs"),1:numsamples,sep="" )) - - } - - #return(df) + + if (methylDBclass == "methylRawDB") { + setnames(df, new = c("chr", "start", "end", "strand", + "coverage", "numCs", "numTs")) - } else { + } else if (methylDBclass == "methylBaseDB") { + # get indices of coverage,numCs and numTs in the data frame + numsamples = (length(df) - 4) / 3 + coverage.ind = seq(5, by = 3, length.out = numsamples) + numCs.ind = coverage.ind + 1 + numTs.ind = coverage.ind + 2 - if( methylDBclass == "methylRawDB" ){ - setnames(x = df,old = names(df), - new = c("chr","start","end","strand", - "coverage","numCs","numTs")) + # change column names + setnames(df, old = names(df)[1:4], + new = c("chr", "start", "end", "strand")) + setnames(df, old = names(df)[coverage.ind], + new = paste(c("coverage"), 1:numsamples, sep = "")) + setnames(df, old = names(df)[numCs.ind], + new = paste(c("numCs"), 1:numsamples, sep = "")) + setnames(df, old = names(df)[numTs.ind], + new = paste(c("numTs"), 1:numsamples, sep = "")) - } else if ( methylDBclass == "methylBaseDB"){ - setnames(x = df,old = names(df)[1:4], - new = c("chr","start","end","strand")) - # get indices of coverage,numCs and numTs in the data frame - numsamples = (length(df)-4)/3 - coverage.ind=seq(5,by=3,length.out=numsamples) - numCs.ind =coverage.ind+1 - numTs.ind =coverage.ind+2 - - # change column names - setnames(df,names(df)[coverage.ind], - paste(c("coverage"),1:numsamples,sep="" )) - setnames(df,names(df)[numCs.ind], - paste(c("numCs"),1:numsamples,sep="" )) - setnames(df,names(df)[numTs.ind], - paste(c("numTs"),1:numsamples,sep="" )) - - } else if( methylDBclass == "methylDiffDB" ){ - setnames(x = df,old = names(df), - new = c("chr","start","end","strand", - "pvalue","qvalue","meth.diff")) + } else if (methylDBclass == "methylDiffDB") { + setnames(df, new = c( "chr", "start", "end", "strand", + "pvalue", "qvalue", "meth.diff" )) - #return(df) - } } + return(df) }