From 7b7c22c94d655d20929021314be28805d0712521 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Fri, 5 Sep 2025 12:30:22 +0000 Subject: [PATCH 1/2] fix(ENGKNOW-2803): Fix case insensitivity in calcifmissing, add warning if duplicate columns. --- .../scala/gorsat/Commands/CalcIfMissing.scala | 4 ++-- .../gorsat/Utilities/IteratorUtilities.scala | 5 ++++- .../test/java/gorsat/UTestHeaderFlags.java | 22 +++++++++++++++++++ .../gorsat/parser/UTestCalcIfMissing.java | 8 +++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/gortools/src/main/scala/gorsat/Commands/CalcIfMissing.scala b/gortools/src/main/scala/gorsat/Commands/CalcIfMissing.scala index 02e60dc4e..c5b0b8f9c 100644 --- a/gortools/src/main/scala/gorsat/Commands/CalcIfMissing.scala +++ b/gortools/src/main/scala/gorsat/Commands/CalcIfMissing.scala @@ -39,11 +39,11 @@ class CalcIfMissing extends CommandInfo("CALCIFMISSING", val columnNames = Calc.getColumnsFromArgs(args) val exprSrc = Calc.getExpressionsFromArgs(args, columnNames) - val existingColumns = forcedInputHeader.split('\t') + val existingColumns = forcedInputHeader.toLowerCase.split('\t') val newColumns = ArrayBuffer[String]() val expressions = ArrayBuffer[String]() for (i <- columnNames.indices) { - if (!existingColumns.contains(columnNames(i))) { + if (!existingColumns.contains(columnNames(i).toLowerCase)) { newColumns += columnNames(i) expressions += exprSrc(i) } diff --git a/gortools/src/main/scala/gorsat/Utilities/IteratorUtilities.scala b/gortools/src/main/scala/gorsat/Utilities/IteratorUtilities.scala index ccdbe84c4..79350286f 100644 --- a/gortools/src/main/scala/gorsat/Utilities/IteratorUtilities.scala +++ b/gortools/src/main/scala/gorsat/Utilities/IteratorUtilities.scala @@ -92,8 +92,11 @@ object IteratorUtilities { if (ALLOW_DUPLICATE_COLUMNS || allowDuplicates) { column = column + "x" colToUp = column.toUpperCase + if (!allowDuplicates) { + logger.warn(s"Duplicate column name '$col' detected in header: $header. Renaming to '$column'") + } } else { - throw new GorDataException(f"Error: Duplicate column name '$column%s' detected in header: $header%s") + throw new GorDataException(f"Error: Duplicate column name '$column' detected in header: $header") } } usedCols.add(colToUp) diff --git a/gortools/src/test/java/gorsat/UTestHeaderFlags.java b/gortools/src/test/java/gorsat/UTestHeaderFlags.java index cbdbb8bb5..fffc7ffe9 100644 --- a/gortools/src/test/java/gorsat/UTestHeaderFlags.java +++ b/gortools/src/test/java/gorsat/UTestHeaderFlags.java @@ -25,7 +25,10 @@ import gorsat.Utilities.IteratorUtilities; import org.gorpipe.exceptions.GorDataException; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.contrib.java.lang.system.RestoreSystemProperties; +import org.junit.contrib.java.lang.system.SystemErrRule; import org.slf4j.LoggerFactory; import java.io.File; @@ -35,6 +38,13 @@ public class UTestHeaderFlags { private static final org.slf4j.Logger log = LoggerFactory.getLogger(UTestHeaderFlags.class); + + @Rule + public final RestoreSystemProperties restoreSystemProperties = new RestoreSystemProperties(); + + @Rule + public final SystemErrRule systemErrRule = new SystemErrRule().enableLog(); + /** * If an excluded char becomes included after the initial state in gorsat.Utilities.IteratorUtilities#validHeader * this test will throw an exception. @@ -68,6 +78,8 @@ public void testValidHeaderUsedKeywordsWithDupAllowingDup() { String testHeader = "#abc\tstart\tfrom\tselect\tmax\tmin\tfrom\tgroup\trange\torder\trank\torder"; String resultingHeader = IteratorUtilities.validHeader(testHeader, true); Assert.assertEquals("#abc\tstart\tfrom\tselect\tmax\tmin\tfromx\tgroup\trange\torder\trank\torderx", resultingHeader); + Assert.assertFalse(systemErrRule.getLog().contains("Duplicate column name 'from'")); + Assert.assertFalse(systemErrRule.getLog().contains("Duplicate column name 'order'")); } @Test @@ -76,6 +88,16 @@ public void testValidHeaderUsedKeywordsWithDupNotAllowingDup() { Assert.assertThrows(GorDataException.class, () -> IteratorUtilities.validHeader(testHeader, false)); } + @Test + public void testValidHeaderUsedKeywordsWithDupAllowingDupOnlyGlobal() { + System.setProperty("gor.iterators.allowDuplicateColumns", "true"); + String testHeader = "#abc\tstart\tfrom\tselect\tmax\tmin\tfrom\tgroup\trange\torder\trank\torder"; + String resultingHeader = IteratorUtilities.validHeader(testHeader, false); + Assert.assertEquals("#abc\tstart\tfrom\tselect\tmax\tmin\tfromx\tgroup\trange\torder\trank\torderx", resultingHeader); + Assert.assertTrue(systemErrRule.getLog().contains("Duplicate column name 'from'")); + Assert.assertTrue(systemErrRule.getLog().contains("Duplicate column name 'order'")); + } + @Test public void testHeaderParsing() { String testStr = "test$He*aDer"; diff --git a/gortools/src/test/java/gorsat/parser/UTestCalcIfMissing.java b/gortools/src/test/java/gorsat/parser/UTestCalcIfMissing.java index 4dfa26295..68d455126 100644 --- a/gortools/src/test/java/gorsat/parser/UTestCalcIfMissing.java +++ b/gortools/src/test/java/gorsat/parser/UTestCalcIfMissing.java @@ -58,4 +58,12 @@ public void addsOnlyMissingColumns() { "chr1\t1\t64\t3.14\tbingo\n"; Assert.assertEquals(expected, result); } + + @Test + public void ignoresColumnWhenItExistsWithCaseDiff() { + final String result = TestUtils.runGorPipe("gorrow 1,1 | calc data1 64 | calcifmissing DATA1,data2,data3 42,3.14,'bingo'"); + final String expected = "chrom\tpos\tdata1\tdata2\tdata3\n" + + "chr1\t1\t64\t3.14\tbingo\n"; + Assert.assertEquals(expected, result); + } } From e9c6bb1e9a324fc9d8649761ca5fe9e201b9749b Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Fri, 5 Sep 2025 23:24:19 +0000 Subject: [PATCH 2/2] fix(ENGKNOW-2803): Fix case insensitivity in calcifmissing, add warning if duplicate columns. --- gortools/src/test/java/gorsat/UTestHeaderFlags.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gortools/src/test/java/gorsat/UTestHeaderFlags.java b/gortools/src/test/java/gorsat/UTestHeaderFlags.java index fffc7ffe9..5eb5dc625 100644 --- a/gortools/src/test/java/gorsat/UTestHeaderFlags.java +++ b/gortools/src/test/java/gorsat/UTestHeaderFlags.java @@ -25,6 +25,7 @@ import gorsat.Utilities.IteratorUtilities; import org.gorpipe.exceptions.GorDataException; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.contrib.java.lang.system.RestoreSystemProperties; @@ -88,6 +89,7 @@ public void testValidHeaderUsedKeywordsWithDupNotAllowingDup() { Assert.assertThrows(GorDataException.class, () -> IteratorUtilities.validHeader(testHeader, false)); } + @Ignore("Only works if run alone, as the property is read only on class load") @Test public void testValidHeaderUsedKeywordsWithDupAllowingDupOnlyGlobal() { System.setProperty("gor.iterators.allowDuplicateColumns", "true");