From ffee3d827e4f685a07b043b0c5385b60b24d5d7a Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Mon, 27 Oct 2025 19:24:44 +0000 Subject: [PATCH 1/4] fix(ENGKNOW-2891): Remove -vlink WRITE option, all link files are versioned when written by W.ITE> --- documentation/src/command/TSVAPPEND.rst | 5 +- documentation/src/command/WRITE.rst | 5 +- .../scala/gorsat/Commands/TsvAppend.scala | 9 +-- .../main/scala/gorsat/Commands/Write.scala | 9 +-- .../src/test/java/gorsat/UTestGorWrite.java | 76 ++++--------------- .../stream/sources/file/FileSource.java | 7 ++ .../gor/driver/linkfile/UTestTimeTravel.java | 4 +- 7 files changed, 29 insertions(+), 86 deletions(-) diff --git a/documentation/src/command/TSVAPPEND.rst b/documentation/src/command/TSVAPPEND.rst index bf5d715e..1726e0ff 100644 --- a/documentation/src/command/TSVAPPEND.rst +++ b/documentation/src/command/TSVAPPEND.rst @@ -25,10 +25,7 @@ Options +-------------------+-----------------------------------------------------------------+ | ``-noheader`` | Don't write a header lines. Not valid with gor/gorz/nor/norz. | +-------------------+-----------------------------------------------------------------+ -| ``-link `` | Writes a link file pointing to the the . | -| | | -+-------------------+-----------------------------------------------------------------+ -| ``-vlink `` | Writes a versioned link file pointing to the the . | +| ``-link `` | Writes a versioned link file pointing to the the . | | | The should not be overwritten if it has previously | | | been used in a link file. | +-------------------+-----------------------------------------------------------------+ diff --git a/documentation/src/command/WRITE.rst b/documentation/src/command/WRITE.rst index ca0ee087..8ca31814 100644 --- a/documentation/src/command/WRITE.rst +++ b/documentation/src/command/WRITE.rst @@ -62,10 +62,7 @@ Options | ``-card ''``| Calculate cardinality of columns in '' and adds to the | | | outputs meta data. | +-------------------+-----------------------------------------------------------------+ -| ``-link `` | Writes a link file pointing to the the . | -| | | -+-------------------+-----------------------------------------------------------------+ -| ``-vlink `` | Writes a versioned link file pointing to the the . | +| ``-link `` | Writes a versioned link file pointing to the the . | | | The should not be overwritten if it has previously | | | been used in a link file. | +-------------------+-----------------------------------------------------------------+ diff --git a/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala b/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala index b342cd62..518af8e8 100644 --- a/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala +++ b/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala @@ -32,7 +32,7 @@ import org.gorpipe.gor.util.StringUtil class TsvAppend extends CommandInfo("TSVAPPEND", - CommandArguments("-noheader", "-prefix -link -vlink", 0), + CommandArguments("-noheader", "-prefix -link", 0), CommandOptions(gorCommand = false, norCommand = true, verifyCommand = true)) { override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String], executeNor: Boolean, forcedInputHeader: String): CommandParsingResult = { @@ -72,13 +72,8 @@ class TsvAppend extends CommandInfo("TSVAPPEND", else prefixFile = Option(prfx) } - if (hasOption(args, "-link") && hasOption(args, "-vlink")) { - throw new GorParsingException("Options -link and -vlink are mutually exclusive") - } val (link, linkVersion) = if (hasOption(args, "-link")) { - (stringValueOfOption(args, "-link"), 0) - } else if (hasOption(args, "-vlink")) { - (stringValueOfOption(args, "-vlink"), 1) + (stringValueOfOption(args, "-link"), 1) } else { ("", 0) } diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 4527408e..6e24e958 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -34,7 +34,7 @@ import org.gorpipe.gor.util.DataUtil class Write extends CommandInfo("WRITE", - CommandArguments("-r -c -m -inferschema -maxseg -noheader", "-d -f -i -t -l -tags -card -prefix -link -vlink", 0), + CommandArguments("-r -c -m -inferschema -maxseg -noheader", "-d -f -i -t -l -tags -card -prefix -link", 0), CommandOptions(gorCommand = true, norCommand = true, verifyCommand = true)) { override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String], executeNor: Boolean, forcedInputHeader: String): CommandParsingResult = { @@ -63,13 +63,8 @@ class Write extends CommandInfo("WRITE", md5 = hasOption(args, "-m") if (hasOption(args, "-l")) compressionLevel = stringValueOfOptionWithErrorCheck(args, "-l", Array("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")).toInt - if (hasOption(args, "-link") && hasOption(args, "-vlink")) { - throw new GorParsingException("Options -link and -vlink are mutually exclusive") - } val (link, linkVersion) = if (hasOption(args, "-link")) { - (stringValueOfOption(args, "-link"), 0) - } else if (hasOption(args, "-vlink")) { - (stringValueOfOption(args, "-vlink"), 1) + (stringValueOfOption(args, "-link"), 1) } else { ("", 0) } diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index 686d3f2c..564ef5a7 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -74,23 +74,11 @@ public void setupTest() throws IOException { defaultV1LinkFileHeader = meta.formatHeader(); } - @Test - public void testWritePathWithLinkFile() throws IOException { - Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); - Files.copy(p, workDirPath.resolve("dbsnp.gor")); - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); - - Assert.assertEquals(workDirPath.resolve("dbsnp2.gor").toString() + "\n", Files.readString(workDirPath.resolve("dbsnp3.gor.link"))); - - String linkresult = TestUtils.runGorPipe("gor dbsnp3.gor | top 1", "-gorroot", workDirPath.toString()); - Assert.assertEquals(testdbsnpTestLine1, linkresult); - } - @Test public void testWritePathWithVersionedLinkFile() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); Files.copy(p, workDirPath.resolve("dbsnp.gor")); - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -vlink dbsnp3.gor", "-gorroot", workDirPath.toString()); + TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); Assert.assertTrue( Files.readString(workDirPath.resolve("dbsnp3.gor.link")).startsWith( defaultV1LinkFileHeader + workDirPath.resolve("dbsnp2.gor") + "\t")); @@ -99,34 +87,11 @@ public void testWritePathWithVersionedLinkFile() throws IOException { Assert.assertEquals(testdbsnpTestLine1, linkresult); } - @Test - public void testWritePathWithBothLinkTypes() throws IOException { - Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); - Files.copy(p, workDirPath.resolve("dbsnp.gor")); - var e = Assert.assertThrows(GorParsingException.class, - () -> TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp4.gor -vlink dbsnp3.gor", - "-gorroot", workDirPath.toString())); - Assert.assertTrue(e.getMessage().contains("Options -link and -vlink are mutually exclusive")); - } - - @Test - public void testWritePathWithServerLinkFile() throws IOException { - Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); - Files.copy(p, workDirPath.resolve("dbsnp.gor")); - TestUtils.runGorPipe(new String[] {"gor dbsnp.gor | write user_data/dbsnp2.gor -link user_data/dbsnp3.gor", "-gorroot", workDirPath.toString()}, true); - - Assert.assertEquals(workDirPath.resolve("user_data").resolve(DataUtil.toFile("dbsnp2", DataType.GOR)).toString() + "\n", - Files.readString(workDirPath.resolve("user_data").resolve(DataUtil.toLinkFile("dbsnp3", DataType.GOR)))); - - String linkresult = TestUtils.runGorPipe("gor user_data/dbsnp3.gor | top 1", "-gorroot", workDirPath.toString()); - Assert.assertEquals(testdbsnpTestLine1, linkresult); - } - @Test public void testWritePathWithServerVersionedLinkFile() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); Files.copy(p, workDirPath.resolve("dbsnp.gor")); - TestUtils.runGorPipe(new String[] {"gor dbsnp.gor | write user_data/dbsnp2.gor -vlink user_data/dbsnp3.gor", "-gorroot", workDirPath.toString()}, true); + TestUtils.runGorPipe(new String[] {"gor dbsnp.gor | write user_data/dbsnp2.gor -link user_data/dbsnp3.gor", "-gorroot", workDirPath.toString()}, true); Assert.assertTrue( Files.readString(workDirPath.resolve("user_data").resolve(DataUtil.toLinkFile("dbsnp3", DataType.GOR))).startsWith( defaultV1LinkFileHeader + workDirPath.resolve("user_data").resolve(DataUtil.toFile("dbsnp2", DataType.GOR)).toString() + "\t")); @@ -160,8 +125,8 @@ public void testWritePathWithExistingLinkFile() throws IOException { Files.copy(p, workDirPath.resolve("dbsnp.gor")); Files.writeString(workDirPath.resolve("dbsnp3.gor.link"), workDirPath.resolve("dbsnp.gor").toString() + "\n"); TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); - - Assert.assertEquals(workDirPath.resolve("dbsnp2.gor").toString() + "\n", Files.readString(workDirPath.resolve("dbsnp3.gor.link"))); + var linkUrl = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gor.link"))).getLatestEntryUrl(); + Assert.assertEquals(workDirPath.resolve("dbsnp2.gor").toString(), linkUrl); } @Test @@ -169,7 +134,7 @@ public void testWritePathWithExistingVersionedLinkFile() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); Files.copy(p, workDirPath.resolve("dbsnp.gor")); Files.writeString(workDirPath.resolve("dbsnp3.gor.link"), workDirPath.resolve("dbsnp.gor").toString() + "\n"); - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -vlink dbsnp3.gor", "-gorroot", workDirPath.toString()); + TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); Assert.assertTrue(Files.readString(workDirPath.resolve("dbsnp3.gor.link")).startsWith( defaultV1LinkFileHeader @@ -179,12 +144,12 @@ public void testWritePathWithExistingVersionedLinkFile() throws IOException { @Test public void testWritePathWithExistingBadLinkFile() throws IOException { - Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); - Files.copy(p, workDirPath.resolve("dbsnp.gor")); - Files.writeString(workDirPath.resolve("dbsnp3.gor.link"), ""); + Path link = workDirPath.resolve("dbsnp3.gor.link"); + Files.copy(Paths.get("../tests/data/gor/dbsnp_test.gor"), workDirPath.resolve("dbsnp.gor")); + Files.writeString(link, ""); TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); - - Assert.assertEquals(workDirPath.resolve("dbsnp2.gor").toString() + "\n", Files.readString(workDirPath.resolve("dbsnp3.gor.link"))); + var linkUrl = LinkFile.load(new FileSource(link)).getLatestEntryUrl(); + Assert.assertEquals(workDirPath.resolve("dbsnp2.gor").toString(), linkUrl); } @Test @@ -192,43 +157,30 @@ public void testWritePathWithExistingBadVersionedLinkFile() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); Files.copy(p, workDirPath.resolve("dbsnp.gor")); Files.writeString(workDirPath.resolve("dbsnp3.gor.link"), ""); - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -vlink dbsnp3.gor", "-gorroot", workDirPath.toString()); + TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); Assert.assertTrue(Files.readString(workDirPath.resolve("dbsnp3.gor.link")).startsWith( defaultV1LinkFileHeader + workDirPath.resolve("dbsnp2.gor") + "\t")); } - @Test - public void testOverwritePathWithExistingLinkFile() throws IOException { - Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); - Files.copy(p, workDirPath.resolve("dbsnp.gor")); - - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); - - Assert.assertEquals(workDirPath.resolve("dbsnp2.gor").toString() + "\n", - Files.readString(workDirPath.resolve("dbsnp3.gor.link"))); - - } - @Test public void testOverwritePathWithExistingVersionedLinkFile() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); Files.copy(p, workDirPath.resolve("dbsnp.gor")); - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -vlink dbsnp3.gor", "-gorroot", workDirPath.toString()); + TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); LinkFile linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gor.link").toString())); Assert.assertEquals(1, linkFile.getEntriesCount()); // Test with same file. - TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -vlink dbsnp3.gor", "-gorroot", workDirPath.toString()); + TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gor.link").toString())); Assert.assertEquals(2, linkFile.getEntriesCount()); // Test with different file Assert.assertThrows( "Overwriting link with same path, throws exception", GorSystemException.class, - () -> TestUtils.runGorPipe("gor dbsnp.gor | top 1 | write dbsnp2.gor -vlink dbsnp3.gor", + () -> TestUtils.runGorPipe("gor dbsnp.gor | top 1 | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString())); } diff --git a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSource.java b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSource.java index a623b1b1..4382df0f 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSource.java +++ b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSource.java @@ -67,6 +67,13 @@ public FileSource(String fileName) { this(new SourceReferenceBuilder(fileName).build()); } + /** + * Name of file. This should be the full path to the file. + */ + public FileSource(Path fileName) { + this(new SourceReferenceBuilder(fileName.toString()).build()); + } + /** * Name of file. This should be the full path to the file. */ diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/UTestTimeTravel.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/UTestTimeTravel.java index 190c7b8a..fac8a9de 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/UTestTimeTravel.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/UTestTimeTravel.java @@ -80,7 +80,7 @@ public void testQueryIntegrityChangeWhileRunning() { // Use the timestamp when query started. assertEquals(anew, TestUtils.runGorPipe(""" create before = gor A.gor.link; - create update = gor B.gor.link | join -snpsnp [before] | select 1-3 | write Alatest.gor -vlink A.gor.link; + create update = gor B.gor.link | join -snpsnp [before] | select 1-3 | write Alatest.gor -link A.gor.link; create after = gor A.gor.link | join -snpsnp [update] | select 1-3; gor [after] """, "-gorroot", workPath.toString())); @@ -88,7 +88,7 @@ public void testQueryIntegrityChangeWhileRunning() { // Force use latest. assertEquals(bnew, TestUtils.runGorPipe(String.format(""" create before = gor A.gor.link; - create update = gor B.gor.link | join -snpsnp [before] | select 1-3 | write Alatest.gor -vlink A.gor.link; + create update = gor B.gor.link | join -snpsnp [before] | select 1-3 | write Alatest.gor -link A.gor.link; create after = gor -time %d A.gor.link | join -snpsnp [update] | select 1-3; gor [after] """, Long.MAX_VALUE), "-gorroot", workPath.toString())); From 60cd45d0fd14835b37b4d7964d0b3cadd35d1147 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Tue, 28 Oct 2025 17:31:41 +0000 Subject: [PATCH 2/4] fix(ENGKNOW-2891): Add -linkmeta option to write and tsvappend. Make link file version 1 default. --- documentation/src/command/TSVAPPEND.rst | 7 ++-- documentation/src/command/WRITE.rst | 5 ++- .../scala/gorsat/Analysis/ForkWrite.scala | 28 ++++++++------- .../scala/gorsat/Commands/TsvAppend.scala | 13 +++---- .../main/scala/gorsat/Commands/Write.scala | 13 +++---- .../src/test/java/gorsat/UTestGorWrite.java | 27 ++++++++++++++ .../gorsat/monitors/StatsMonitorTest.java | 2 +- .../gorpipe/gor/driver/linkfile/LinkFile.java | 35 ++++++++++++++++--- .../gor/driver/linkfile/LinkFileMeta.java | 5 +++ 9 files changed, 97 insertions(+), 38 deletions(-) diff --git a/documentation/src/command/TSVAPPEND.rst b/documentation/src/command/TSVAPPEND.rst index 1726e0ff..576d18f1 100644 --- a/documentation/src/command/TSVAPPEND.rst +++ b/documentation/src/command/TSVAPPEND.rst @@ -14,7 +14,7 @@ Usage .. code-block:: gor - nor ... | tsvappend [-noheader] [-prefix ] [-link ] + nor ... | tsvappend [-noheader] [-prefix ] [-link [-lmeta ] Options ======= @@ -25,10 +25,13 @@ Options +-------------------+-----------------------------------------------------------------+ | ``-noheader`` | Don't write a header lines. Not valid with gor/gorz/nor/norz. | +-------------------+-----------------------------------------------------------------+ -| ``-link `` | Writes a versioned link file pointing to the the . | +| ``-link `` | Writes a versioned link file pointing to the the . | | | The should not be overwritten if it has previously | | | been used in a link file. | +-------------------+-----------------------------------------------------------------+ +| ``-linkmeta `` | Writes as meta data to the . is string for | +| | of comma separated key=value elements. | ++-------------------+-----------------------------------------------------------------+ Examples ======== diff --git a/documentation/src/command/WRITE.rst b/documentation/src/command/WRITE.rst index 8ca31814..49904e9d 100644 --- a/documentation/src/command/WRITE.rst +++ b/documentation/src/command/WRITE.rst @@ -62,10 +62,13 @@ Options | ``-card ''``| Calculate cardinality of columns in '' and adds to the | | | outputs meta data. | +-------------------+-----------------------------------------------------------------+ -| ``-link `` | Writes a versioned link file pointing to the the . | +| ``-link `` | Writes a versioned link file pointing to the the . | | | The should not be overwritten if it has previously | | | been used in a link file. | +-------------------+-----------------------------------------------------------------+ +| ``-linkmeta `` | Writes as meta data to the . is string for | +| | of comma separated key=value elements. | ++-------------------+-----------------------------------------------------------------+ Examples ======== diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index 940f0eaa..742c739a 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -23,9 +23,10 @@ package gorsat.Analysis import java.util.zip.Deflater -import gorsat.Commands.{Analysis, Output, RowHeader} +import gorsat.Commands.{Analysis, CommandParseUtilities, Output, RowHeader} import gorsat.Outputs.OutFile import org.apache.commons.io.FilenameUtils +import org.apache.commons.lang3.StringUtils import org.gorpipe.exceptions.GorResourceException import org.gorpipe.gor.binsearch.GorIndexType import org.gorpipe.gor.driver.linkfile.LinkFile @@ -36,6 +37,7 @@ import org.gorpipe.gor.session.{GorSession, ProjectContext} import org.gorpipe.gor.table.util.PathUtils import org.gorpipe.gor.util.DataUtil import org.gorpipe.model.gor.RowObj +import org.gorpipe.util.Strings import java.util.UUID import scala.collection.mutable @@ -57,7 +59,7 @@ case class OutputOptions(remove: Boolean = false, writeMeta: Boolean = true, cardCol: String = null, linkFile: String = "", - linkFileVersion: Int = 1, + linkFileMeta: String = "", command: String = null, infer: Boolean = false, maxseg: Boolean = false @@ -274,17 +276,17 @@ case class ForkWrite(forkCol: Int, if (useFork) { forkMap.values.foreach(sh => { - val (linkFile, linkFileContent, linkFileVersion) = extractLink(sh.fileName) + val (linkFile, linkFileUrl, linkFileMeta) = extractLink(sh.fileName) if (linkFile.nonEmpty) { - writeLinkFile(linkFile, linkFileContent, linkFileVersion) + writeLinkFile(linkFile, linkFileUrl, linkFileMeta) } }) } else { - val (linkFile, linkFileContent, linkFileVersion) = extractLink(fullFileName, options.linkFile, options.linkFileVersion) + val (linkFile, linkFileUrl, linkFileMeta) = extractLink(fullFileName, options.linkFile, options.linkFileMeta) if (linkFile.nonEmpty) { - writeLinkFile(linkFile, linkFileContent, linkFileVersion, getMd5) + writeLinkFile(linkFile, linkFileUrl, linkFileMeta, getMd5) } } } @@ -298,9 +300,8 @@ case class ForkWrite(forkCol: Int, } } - private def extractLink(fileName: String, optLinkFile: String = "", optLinkFileVersion: Int = 0) : (String, String, Int) = { + private def extractLink(fileName: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String) = { var linkFile = optLinkFile - var linkFileVersion = optLinkFileVersion var linkFileContent = "" if (fileName.nonEmpty) { if (linkFile.isEmpty) { @@ -308,18 +309,18 @@ case class ForkWrite(forkCol: Int, if (dataSource != null && dataSource.forceLink()) { linkFile = dataSource.getProjectLinkFile linkFileContent = dataSource.getProjectLinkFileContent - linkFileVersion = 0 } } else { linkFileContent = PathUtils.resolve(session.getProjectContext.getProjectRoot, fileName) } } - (linkFile,linkFileContent,linkFileVersion) + val meta = if (!Strings.isNullOrEmpty(optLinkFileMeta)) CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',').map(s => "## " + s.trim).mkString("\n") else "" + (linkFile,linkFileContent, meta) } private def writeLinkFile(linkFilePath: String, linkFileContent: String, - linkFileVersion: Int = 0, md5: String = null) : Unit = { - val linkFileToWrite = LinkFile.validateAndUpdateLinkFileName(linkFilePath, linkFileVersion) + linkFileMeta: String = "", md5: String = null) : Unit = { + val linkFileToWrite = LinkFile.validateAndUpdateLinkFileName(linkFilePath) // Validate that we can write to the location (skip link extension as writing links is always forbidden). session.getProjectContext.getFileReader.resolveUrl(FilenameUtils.removeExtension(linkFileToWrite), true) @@ -328,7 +329,8 @@ case class ForkWrite(forkCol: Int, val fileReader = new DriverBackedFileReader(session.getProjectContext.getFileReader.getSecurityContext, session.getProjectContext.getProjectRoot) - LinkFile.load(fileReader.resolveUrl(linkFileToWrite, true).asInstanceOf[StreamSource], linkFileVersion) + LinkFile.load(fileReader.resolveUrl(linkFileToWrite, true).asInstanceOf[StreamSource]) + .appendMeta(linkFileMeta) .appendEntry(linkFileContent, md5, fileReader) .save() } diff --git a/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala b/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala index 518af8e8..90956e6d 100644 --- a/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala +++ b/gortools/src/main/scala/gorsat/Commands/TsvAppend.scala @@ -32,7 +32,7 @@ import org.gorpipe.gor.util.StringUtil class TsvAppend extends CommandInfo("TSVAPPEND", - CommandArguments("-noheader", "-prefix -link", 0), + CommandArguments("-noheader", "-prefix -link -linkmeta", 0), CommandOptions(gorCommand = false, norCommand = true, verifyCommand = true)) { override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String], executeNor: Boolean, forcedInputHeader: String): CommandParsingResult = { @@ -72,11 +72,8 @@ class TsvAppend extends CommandInfo("TSVAPPEND", else prefixFile = Option(prfx) } - val (link, linkVersion) = if (hasOption(args, "-link")) { - (stringValueOfOption(args, "-link"), 1) - } else { - ("", 0) - } + val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" + val linkMetaOpt = if (hasOption(args, "-linkmeta")) stringValueOfOption(args, "-linkmeta") else "" val fixedHeader = forcedInputHeader.split("\t").slice(0, 2).mkString("\t") @@ -90,8 +87,8 @@ class TsvAppend extends CommandInfo("TSVAPPEND", prefix=prefix, prefixFile=prefixFile, skipHeader=skipHeader, - linkFile=link, - linkFileVersion=linkVersion, + linkFile=linkOpt, + linkFileMeta=linkMetaOpt, command=argString ) ), diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 6e24e958..e065097c 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -34,7 +34,7 @@ import org.gorpipe.gor.util.DataUtil class Write extends CommandInfo("WRITE", - CommandArguments("-r -c -m -inferschema -maxseg -noheader", "-d -f -i -t -l -tags -card -prefix -link", 0), + CommandArguments("-r -c -m -inferschema -maxseg -noheader", "-d -f -i -t -l -tags -card -prefix -link -linkmeta", 0), CommandOptions(gorCommand = true, norCommand = true, verifyCommand = true)) { override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String], executeNor: Boolean, forcedInputHeader: String): CommandParsingResult = { @@ -63,11 +63,8 @@ class Write extends CommandInfo("WRITE", md5 = hasOption(args, "-m") if (hasOption(args, "-l")) compressionLevel = stringValueOfOptionWithErrorCheck(args, "-l", Array("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")).toInt - val (link, linkVersion) = if (hasOption(args, "-link")) { - (stringValueOfOption(args, "-link"), 1) - } else { - ("", 0) - } + val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" + val linkMetaOpt = if (hasOption(args, "-linkmeta")) stringValueOfOption(args, "-linkmeta") else "" if(fileName.isEmpty && useFolder.isEmpty) throw new GorResourceException("No file or folder specified",""); @@ -143,8 +140,8 @@ class Write extends CommandInfo("WRITE", useFolder, skipHeader, cardCol = card, - linkFile = link, - linkFileVersion = linkVersion, + linkFile = linkOpt, + linkFileMeta = linkMetaOpt, command = argString, infer = infer, maxseg = maxseg diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index 564ef5a7..fd6a301f 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -184,6 +184,33 @@ public void testOverwritePathWithExistingVersionedLinkFile() throws IOException "-gorroot", workDirPath.toString())); } + @Test + public void testWriteLinkFileAndMeta() throws IOException { + TestUtils.runGorPipe("gorrow chr1,1,100 | write test.gor -link ltest.gor -linkmeta 'TEST1=T1,TEST2=T2, TEST3=T3'", "-gorroot", workDirPath.toString()); + + String linkresult = TestUtils.runGorPipe("gor ltest.gor | top 1", "-gorroot", workDirPath.toString()); + Assert.assertEquals("chrom\tbpStart\tbpStop\nchr1\t1\t100\n", linkresult); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals("T1", linkFile.getMeta().getProperty("TEST1")); + Assert.assertEquals("T2", linkFile.getMeta().getProperty("TEST2")); + Assert.assertEquals("T3", linkFile.getMeta().getProperty("TEST3")); + } + + @Test + public void testWriteLinkFileAndUnqotedMeta() throws IOException { + TestUtils.runGorPipe("gorrow chr1,1,100 | write test.gor -link ltest.gor -linkmeta TEST1=T1,TEST2=T2", "-gorroot", workDirPath.toString()); + + String linkresult = TestUtils.runGorPipe("gor ltest.gor | top 1", "-gorroot", workDirPath.toString()); + Assert.assertEquals("chrom\tbpStart\tbpStop\nchr1\t1\t100\n", linkresult); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals("T1", linkFile.getMeta().getProperty("TEST1")); + Assert.assertEquals("T2", linkFile.getMeta().getProperty("TEST2")); + } + @Test public void testTxtWriteServer() throws IOException { Path p = Paths.get("../tests/data/nor/simple.nor"); diff --git a/gortools/src/test/java/gorsat/monitors/StatsMonitorTest.java b/gortools/src/test/java/gorsat/monitors/StatsMonitorTest.java index 42802c86..ca042622 100644 --- a/gortools/src/test/java/gorsat/monitors/StatsMonitorTest.java +++ b/gortools/src/test/java/gorsat/monitors/StatsMonitorTest.java @@ -87,7 +87,7 @@ public void testRowCountAndBytesCountForAddedWrite() { var outputOptions = new OutputOptions(false, false, true, false, false, GorIndexType.NONE, new String[0], new String[0], Option.empty(), Option.empty(), Deflater.BEST_SPEED, - Option.empty(), false, false, null, "", 0, null, false, false); + Option.empty(), false, false, null, "", "", null, false, false); forkWrite = new ForkWrite(-1, workDirPath.resolve("test.gor").toString(), pipe.getSession(), pipe.getHeader(), outputOptions); pipe.lastStep().$bar(forkWrite); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index c96b752e..012a5eb5 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -10,6 +10,7 @@ import org.gorpipe.gor.model.FileReader; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.util.DataUtil; +import org.gorpipe.util.Strings; import java.io.IOException; import java.io.InputStream; @@ -20,7 +21,7 @@ /** * Class to work with link files, read, write and access metadata. * - * Link file format, a valid nor format. Note, the required fields form the current link file format. + * Link file format, a valid nor format. Example: * * ## VERSION= * ## ENTRIES_COUNT_MAX= @@ -33,10 +34,27 @@ * 1. No timestamp or serial is treated as 0 (older). * 2. Entries are added to the bottom. * 3. If entries have the same timestamp, the appearing later in the file is picked. + * 4. Required fields. + * - URL + * 5, Optional fields. + * - TIMESTAMP - in ISO data format or milliseconds since epoch, active time. + * - MD5 - md5 checksum of the file or data the link points to. + * - SERIAL - incrementing serial number for the link file entry. + * 6, Required meta fields. + * - VERSION - Link file format version. + * 7. Optional meta fields. + * - ENTRIES_COUNT_MAX - max entries to store in this file. + * - ENTRIES_AGE_MAX - max age of entries in milliseconds. + * - * */ public abstract class LinkFile { + // TODO: + // 1. Remove source from the link file. Instead just passed into the load and save static methods. + // Offload to its own helper calss LineFilePersister/LinkFileLifeCycle or similar. + // 2. Add Info field add the end, to set external version info etc. + public static final int LINK_FILE_MAX_SIZE = 10000; private static final boolean USE_LINK_CACHE = Boolean.parseBoolean(System.getProperty("gor.driver.cache.link", "true")); @@ -52,10 +70,10 @@ public static LinkFile load(StreamSource source) throws IOException { public static LinkFile load(StreamSource source, String content) { var meta = LinkFileMeta.createAndLoad(content); - if ("1".equals(meta.getVersion())) { - return new LinkFileV1(source, meta, content); - } else { + if ("0".equals(meta.getVersion())) { return new LinkFileV0(source, meta, content); + } else { + return new LinkFileV1(source, meta, content); } } @@ -69,7 +87,7 @@ public static LinkFile load(StreamSource source, int linkVersion) throws IOExcep } } - public static String validateAndUpdateLinkFileName(String linkFilePath, int linkVersion) { + public static String validateAndUpdateLinkFileName(String linkFilePath) { if (DataUtil.isLink(linkFilePath)) { return linkFilePath; } else { @@ -186,6 +204,13 @@ public LinkFile appendEntry(String link, String md5) { public abstract LinkFile appendEntry(String link, String md5, FileReader reader); + public LinkFile appendMeta(String meta) { + if (!Strings.isNullOrEmpty(meta)) { + this.meta.loadAndMergeMeta(meta); + } + return this; + } + public void save() { try (OutputStream os = source.getOutputStream()) { save(os); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java index 1664683a..5619f982 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java @@ -54,6 +54,11 @@ public void setEntriesAgeMax(int entriesAgeMax) { setProperty(HEADER_ENTRIES_AGE_MAX_KEY, String.valueOf(entriesAgeMax)); } + @Override + public String getVersion() { + return getProperty(HEADER_VERSION_KEY, "1"); + } + public static String getDefaultMetaContent() { return String.format(""" ## SERIAL = 0 From 5e9dcd8d3dfab76ed55b1e566c724d0da7b3e3a8 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Wed, 29 Oct 2025 22:47:00 +0000 Subject: [PATCH 3/4] fix(ENGKNOW-2891): Minor refactoring. Add support for link file entry info. --- .../scala/gorsat/Analysis/ForkWrite.scala | 33 +++++++++++++------ .../src/test/java/gorsat/UTestGorWrite.java | 15 +++++++-- .../gorpipe/gor/driver/linkfile/LinkFile.java | 32 ++++++------------ .../gor/driver/linkfile/LinkFileEntry.java | 2 ++ .../gor/driver/linkfile/LinkFileEntryV0.java | 4 +++ .../gor/driver/linkfile/LinkFileEntryV1.java | 14 +++++--- .../gor/driver/linkfile/LinkFileMeta.java | 4 +-- .../gor/driver/linkfile/LinkFileV0.java | 2 +- .../gor/driver/linkfile/LinkFileV1.java | 4 +-- .../org/gorpipe/gor/model/FileReader.java | 2 +- .../gor/driver/linkfile/LinkFileTest.java | 20 +++++------ 11 files changed, 78 insertions(+), 54 deletions(-) diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index 742c739a..2b6bb7c5 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -29,7 +29,7 @@ import org.apache.commons.io.FilenameUtils import org.apache.commons.lang3.StringUtils import org.gorpipe.exceptions.GorResourceException import org.gorpipe.gor.binsearch.GorIndexType -import org.gorpipe.gor.driver.linkfile.LinkFile +import org.gorpipe.gor.driver.linkfile.{LinkFile, LinkFileEntryV1} import org.gorpipe.gor.driver.meta.DataType import org.gorpipe.gor.driver.providers.stream.sources.StreamSource import org.gorpipe.gor.model.{DriverBackedFileReader, GorMeta, GorOptions, Row} @@ -276,17 +276,17 @@ case class ForkWrite(forkCol: Int, if (useFork) { forkMap.values.foreach(sh => { - val (linkFile, linkFileUrl, linkFileMeta) = extractLink(sh.fileName) + val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(sh.fileName) if (linkFile.nonEmpty) { - writeLinkFile(linkFile, linkFileUrl, linkFileMeta) + writeLinkFile(linkFile, linkFileUrl, linkFileMeta, linkFileInfo) } }) } else { - val (linkFile, linkFileUrl, linkFileMeta) = extractLink(fullFileName, options.linkFile, options.linkFileMeta) + val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(fullFileName, options.linkFile, options.linkFileMeta) if (linkFile.nonEmpty) { - writeLinkFile(linkFile, linkFileUrl, linkFileMeta, getMd5) + writeLinkFile(linkFile, linkFileUrl, linkFileMeta, getMd5, linkFileInfo) } } } @@ -300,7 +300,7 @@ case class ForkWrite(forkCol: Int, } } - private def extractLink(fileName: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String) = { + private def extractLink(fileName: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String, String) = { var linkFile = optLinkFile var linkFileContent = "" if (fileName.nonEmpty) { @@ -314,12 +314,25 @@ case class ForkWrite(forkCol: Int, linkFileContent = PathUtils.resolve(session.getProjectContext.getProjectRoot, fileName) } } - val meta = if (!Strings.isNullOrEmpty(optLinkFileMeta)) CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',').map(s => "## " + s.trim).mkString("\n") else "" - (linkFile,linkFileContent, meta) + + var linkFileMeta = "" + var linkFileInfo = "" + if (!Strings.isNullOrEmpty(optLinkFileMeta)) { + for (s <- CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',')) { + val l = s.trim + if (l.startsWith(LinkFileEntryV1.ENTRY_INFO_KEY)) { + linkFileInfo = StringUtils.strip(l.substring(LinkFileEntryV1.ENTRY_INFO_KEY.length + 1), "\"\'") + } else { + linkFileMeta += "## " + l + "\n" + } + } + } + + (linkFile, linkFileContent, linkFileMeta, linkFileInfo) } private def writeLinkFile(linkFilePath: String, linkFileContent: String, - linkFileMeta: String = "", md5: String = null) : Unit = { + linkFileMeta: String = "", md5: String = null, linkFileInfo: String = null) : Unit = { val linkFileToWrite = LinkFile.validateAndUpdateLinkFileName(linkFilePath) // Validate that we can write to the location (skip link extension as writing links is always forbidden). @@ -331,7 +344,7 @@ case class ForkWrite(forkCol: Int, LinkFile.load(fileReader.resolveUrl(linkFileToWrite, true).asInstanceOf[StreamSource]) .appendMeta(linkFileMeta) - .appendEntry(linkFileContent, md5, fileReader) + .appendEntry(linkFileContent, md5, linkFileInfo, fileReader) .save() } } diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index fd6a301f..b60a67ba 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -133,12 +133,12 @@ public void testWritePathWithExistingLinkFile() throws IOException { public void testWritePathWithExistingVersionedLinkFile() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); Files.copy(p, workDirPath.resolve("dbsnp.gor")); - Files.writeString(workDirPath.resolve("dbsnp3.gor.link"), workDirPath.resolve("dbsnp.gor").toString() + "\n"); + Files.writeString(workDirPath.resolve("dbsnp3.gor.link"), defaultV1LinkFileHeader + workDirPath.resolve("dbsnp.gor").toString() + "\n"); TestUtils.runGorPipe("gor dbsnp.gor | write dbsnp2.gor -link dbsnp3.gor", "-gorroot", workDirPath.toString()); Assert.assertTrue(Files.readString(workDirPath.resolve("dbsnp3.gor.link")).startsWith( defaultV1LinkFileHeader - + workDirPath.resolve("dbsnp.gor") + "\t1970-01-01T00:00:00Z\t\t0\n" + + workDirPath.resolve("dbsnp.gor") + "\t1970-01-01T00:00:00Z\t\t0\t\n" + workDirPath.resolve("dbsnp2.gor") + "\t")); } @@ -211,6 +211,17 @@ public void testWriteLinkFileAndUnqotedMeta() throws IOException { Assert.assertEquals("T2", linkFile.getMeta().getProperty("TEST2")); } + @Test + public void testWriteLinkFileAndMetaWithInfo() throws IOException { + TestUtils.runGorPipe("gorrow chr1,1,100 | write test.gor -link ltest.gor -linkmeta TEST1=T1,ENTRY_INFO='Some file info'", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals("T1", linkFile.getMeta().getProperty("TEST1")); + Assert.assertEquals("NOTFOUND", linkFile.getMeta().getProperty("ENTRY_INFO", "NOTFOUND")); + Assert.assertEquals("Some file info", linkFile.getLatestEntry().info()); + } + @Test public void testTxtWriteServer() throws IOException { Path p = Paths.get("../tests/data/nor/simple.nor"); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index 012a5eb5..3c3fb5c6 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -50,11 +50,6 @@ */ public abstract class LinkFile { - // TODO: - // 1. Remove source from the link file. Instead just passed into the load and save static methods. - // Offload to its own helper calss LineFilePersister/LinkFileLifeCycle or similar. - // 2. Add Info field add the end, to set external version info etc. - public static final int LINK_FILE_MAX_SIZE = 10000; private static final boolean USE_LINK_CACHE = Boolean.parseBoolean(System.getProperty("gor.driver.cache.link", "true")); @@ -64,10 +59,10 @@ public abstract class LinkFile { public static LinkFile load(StreamSource source) throws IOException { var content = loadContentFromSource(source); - return load(source, content); + return create(source, content); } - public static LinkFile load(StreamSource source, String content) { + public static LinkFile create(StreamSource source, String content) { var meta = LinkFileMeta.createAndLoad(content); if ("0".equals(meta.getVersion())) { @@ -77,21 +72,10 @@ public static LinkFile load(StreamSource source, String content) { } } - public static LinkFile load(StreamSource source, int linkVersion) throws IOException { - switch (linkVersion) { - case 0: - return new LinkFileV0(source); - case 1: - default: - return new LinkFileV1(source); - } - } - public static String validateAndUpdateLinkFileName(String linkFilePath) { if (DataUtil.isLink(linkFilePath)) { return linkFilePath; } else { - //return linkVersion == 0 ? DataUtil.toLink(linkFilePath) : DataUtil.toVersionedLink(linkFilePath); return DataUtil.toLink(linkFilePath); } } @@ -166,7 +150,7 @@ public int getEntriesCount() { * @param timestamp timestamp to match * @return best match entry or null if no entries. */ - LinkFileEntry getEntry(long timestamp) { + public LinkFileEntry getEntry(long timestamp) { int index = entries.size() - 1; while (index >= 0 && entries.get(index).timestamp() > timestamp) { index--; @@ -178,7 +162,7 @@ LinkFileEntry getEntry(long timestamp) { * Get the latest entry. * @return the latest entry */ - LinkFileEntry getLatestEntry() { + public LinkFileEntry getLatestEntry() { return entries != null && !entries.isEmpty() ? entries.get(entries.size() - 1) : null; } @@ -199,10 +183,14 @@ public long getEntriesAgeMax() { } public LinkFile appendEntry(String link, String md5) { - return appendEntry(link, md5, null); + return appendEntry(link, md5, null, null); + } + + public LinkFile appendEntry(String link, String md5, String info) { + return appendEntry(link, md5, info, null); } - public abstract LinkFile appendEntry(String link, String md5, FileReader reader); + public abstract LinkFile appendEntry(String link, String md5, String info, FileReader reader); public LinkFile appendMeta(String meta) { if (!Strings.isNullOrEmpty(meta)) { diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntry.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntry.java index 51b1cb9a..2add3949 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntry.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntry.java @@ -10,4 +10,6 @@ public interface LinkFileEntry { String md5(); int serial(); + + String info(); } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV0.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV0.java index 0b31b1ce..34a207fc 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV0.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV0.java @@ -42,4 +42,8 @@ public String md5() { public int serial() { return 0; // No serial in V0 } + + public String info() { + return ""; // No info in V0 + } } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV1.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV1.java index 0d9c556c..a8be5d8b 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV1.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileEntryV1.java @@ -19,10 +19,14 @@ * @param timestamp timestamp of the link file entry. Optional. * @param md5 md5 of file or data the link points to. Optional. * @param serial incrementing serial number for the link file entry. Optional. + * @param info extra info about the this version */ -public record LinkFileEntryV1(String url, long timestamp, String md5, int serial) implements LinkFileEntry { +public record LinkFileEntryV1(String url, long timestamp, String md5, int serial, String info) implements LinkFileEntry { - public LinkFileEntryV1(String url, long timestamp, String md5, int serial) { + // Special key to store entry info in link file metadata. + public static final String ENTRY_INFO_KEY = "ENTRY_INFO"; + + public LinkFileEntryV1(String url, long timestamp, String md5, int serial, String info) { if (Strings.isNullOrBlank(url)) { throw new IllegalArgumentException("URL cannot be null or empty"); } @@ -37,6 +41,7 @@ public LinkFileEntryV1(String url, long timestamp, String md5, int serial) { this.timestamp = timestamp; this.md5 = md5; this.serial = serial; + this.info = info; } public static List parse(String content) { @@ -59,11 +64,12 @@ private static LinkFileEntryV1 parseLine(String line) { parts[0].trim(), parts.length > 1 && !Strings.isNullOrEmpty(parts[1]) ? DateUtils.parseDateISOEpoch(parts[1], true).toEpochMilli() : 0, parts.length > 2 ? parts[2] : "", - parts.length > 3 && !Strings.isNullOrEmpty(parts[3]) ? Integer.parseInt(parts[3]) : 0 + parts.length > 3 && !Strings.isNullOrEmpty(parts[3]) ? Integer.parseInt(parts[3]) : 0, + parts.length > 4 ? parts[4] : "" ); } public String format() { - return url + "\t" + Instant.ofEpochMilli(timestamp) + "\t" + md5 + "\t" + serial; + return url + "\t" + Instant.ofEpochMilli(timestamp) + "\t" + md5 + "\t" + serial+ "\t" + info; } } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java index 5619f982..860195a0 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java @@ -9,7 +9,7 @@ public class LinkFileMeta extends BaseMeta { public static final String HEADER_ENTRIES_COUNT_MAX_KEY = "ENTRIES_COUNT_MAX"; public static final String HEADER_ENTRIES_AGE_MAX_KEY = "ENTRIES_AGE_MAX"; - public static final String[] DEFAULT_TABLE_HEADER = new String[] {"File", "Timestamp", "MD5", "Serial"}; + public static final String[] DEFAULT_TABLE_HEADER = new String[] {"File", "Timestamp", "MD5", "Serial", "Info"}; public static final int DEFAULT_ENTRIES_COUNT_MAX = 100; public static final long DEFAULT_ENTRIES_AGE_MAX = Long.MAX_VALUE; @@ -56,7 +56,7 @@ public void setEntriesAgeMax(int entriesAgeMax) { @Override public String getVersion() { - return getProperty(HEADER_VERSION_KEY, "1"); + return getProperty(HEADER_VERSION_KEY, "0"); } public static String getDefaultMetaContent() { diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java index d3cea533..ff277725 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java @@ -35,7 +35,7 @@ protected List parseEntries(String content) { } @Override - public LinkFile appendEntry(String link, String md5, FileReader reader) { + public LinkFile appendEntry(String link, String md5, String info, FileReader reader) { entries.clear(); // V0 does not support multiple entries, so we clear the list entries.add(new LinkFileEntryV0(link)); return this; diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java index f3a36cdd..271a83ec 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java @@ -36,9 +36,9 @@ protected List parseEntries(String content) { } @Override - public LinkFile appendEntry(String link, String md5, FileReader reader) { + public LinkFile appendEntry(String link, String md5, String info, FileReader reader) { var latestEntry = getLatestEntry(); - var entry = new LinkFileEntryV1(link, System.currentTimeMillis(), md5, latestEntry != null ? latestEntry.serial() + 1 : 1); + var entry = new LinkFileEntryV1(link, System.currentTimeMillis(), md5, latestEntry != null ? latestEntry.serial() + 1 : 1, info); validateEntry(entry, reader); entries.add(entry); return this; diff --git a/model/src/main/java/org/gorpipe/gor/model/FileReader.java b/model/src/main/java/org/gorpipe/gor/model/FileReader.java index 10b625ac..11591e2d 100644 --- a/model/src/main/java/org/gorpipe/gor/model/FileReader.java +++ b/model/src/main/java/org/gorpipe/gor/model/FileReader.java @@ -343,7 +343,7 @@ public void writeLinkIfNeeded(String url) throws IOException { DataSource dataSource = resolveUrl(url, true); if (dataSource.forceLink()) { DataSource linkDataSource = resolveUrl(dataSource.getProjectLinkFile(), true); - LinkFile.load((StreamSource) linkDataSource, dataSource.getProjectLinkFileContent()).save(); + LinkFile.create((StreamSource) linkDataSource, dataSource.getProjectLinkFileContent()).save(); } } diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index cdae8fd6..6a3836d4 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -24,9 +24,9 @@ public class LinkFileTest { private final String v1LinkFileContent = """ ## SERIAL = 0 ## VERSION = 1 - #FILE\tTIMESTAMP\tMD5\tSERIAL - source/v1/ver1.gorz\t2024-12-15T11:21:30.790Z\tABCDEAF13422\t1 - source/v1/ver2.gorz\t2024-12-15T23:25:24.533Z\t334DEAF13422\t2 + #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + source/v1/ver1.gorz\t2024-12-15T11:21:30.790Z\tABCDEAF13422\t1\t + source/v1/ver2.gorz\t2024-12-15T23:25:24.533Z\t334DEAF13422\t2\t """; // 2024-12-15T11:21:30.790Z = 1734261690790 // 2024-12-15T23:25:24.533Z = 1734305124533L @@ -43,7 +43,7 @@ public void setUp() { @Test public void testCreateLinkFile() { - LinkFile linkFile = LinkFile.load(mockSource, v1LinkFileContent); + LinkFile linkFile = LinkFile.create(mockSource, v1LinkFileContent); assertNotNull(linkFile); assertEquals(2, linkFile.getEntries().size()); assertEquals(100, linkFile.getEntriesCountMax()); @@ -61,24 +61,24 @@ public void testLoadLinkFile() throws IOException { @Test public void testAppendEntry() { - LinkFile linkFile = LinkFile.load(mockSource, v1LinkFileContent); - linkFile.appendEntry(simpleFile, "NEWMD5SUM"); + LinkFile linkFile = LinkFile.create(mockSource, v1LinkFileContent); + linkFile.appendEntry(simpleFile, "NEWMD5SUM", "Test1"); assertEquals(3, linkFile.getEntries().size()); } @Test public void testGetLatestPath() { when(mockSource.getFullPath()).thenReturn("/mnt/csa/projects/test/x.link"); - LinkFile linkFile = LinkFile.load(mockSource, v1LinkFileContent); + LinkFile linkFile = LinkFile.create(mockSource, v1LinkFileContent); assertEquals("/mnt/csa/projects/test/source/v1/ver2.gorz", linkFile.getLatestEntryUrl()); - linkFile.appendEntry(simpleFile, "NEWMD5SUM"); + linkFile.appendEntry(simpleFile, "NEWMD5SUM", ""); assertEquals("/mnt/csa/projects/test/" + simpleFile, linkFile.getLatestEntryUrl()); } @Test public void testGetTimedPath() { when(mockSource.getFullPath()).thenReturn("/mnt/csa/projects/test/x.link"); - LinkFile linkFile = LinkFile.load(mockSource, v1LinkFileContent); + LinkFile linkFile = LinkFile.create(mockSource, v1LinkFileContent); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); assertEquals(null, linkFile.getEntryUrl(1734261690790L - 1000)); @@ -125,7 +125,7 @@ public void testSaveLinkFileV1ToV1() throws IOException { public void testSaveLinkFileV0ToV0() throws IOException { var linkPath = workPath.resolve("test.link"); Files.writeString(linkPath, "a/b/c.gorz"); - LinkFile linkFile = LinkFile.load(new FileSource(linkPath.toString())); + LinkFile linkFile = new LinkFileV0(new FileSource(linkPath.toString())); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); linkFile.save(); String savedContent = Files.readString(linkPath); From 890c9eb9b590e44a865d04fa72712e9708688c39 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Thu, 30 Oct 2025 12:32:37 +0000 Subject: [PATCH 4/4] fix(ENGKNOW-2891): Change default query time. --- .../org/gorpipe/s3/table/ITestS3Table.java | 4 +++- .../main/scala/gorsat/Analysis/ForkWrite.scala | 4 ++-- .../scala/gorsat/process/PipeInstance.scala | 12 +----------- .../gorpipe/gor/driver/PluggableGorDriver.java | 6 ++++-- .../gorpipe/gor/driver/linkfile/LinkFile.java | 18 +++++++++++++++--- .../gor/model/DriverBackedFileReader.java | 8 ++++---- .../model/DriverBackedSecureFileReader.java | 2 +- .../java/org/gorpipe/gor/model/FileReader.java | 2 +- test/src/main/java/gorsat/TestUtils.java | 3 ++- 9 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/src/test/java/org/gorpipe/s3/table/ITestS3Table.java b/drivers/src/test/java/org/gorpipe/s3/table/ITestS3Table.java index 5926b0af..2a93e133 100644 --- a/drivers/src/test/java/org/gorpipe/s3/table/ITestS3Table.java +++ b/drivers/src/test/java/org/gorpipe/s3/table/ITestS3Table.java @@ -3,7 +3,9 @@ import gorsat.TestUtils; import org.gorpipe.base.security.BundledCredentials; import org.gorpipe.base.security.Credentials; +import org.gorpipe.gor.driver.linkfile.LinkFile; import org.gorpipe.gor.driver.meta.DataType; +import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource; import org.gorpipe.gor.model.DriverBackedFileReader; import org.gorpipe.gor.table.dictionary.DictionaryTable; import org.gorpipe.gor.table.lock.NoTableLock; @@ -224,7 +226,7 @@ public void testBucketizeLocalTableS3DataS3DataBucketsRelative() throws IOExcept String localBucketFile = PathUtils.resolve(table.getRootPath(), DataUtil.toFile( bucket, DataType.LINK)); - Assert.assertEquals("s3data://project/" + bucket + "\n", Files.readString(Path.of(localBucketFile))); + Assert.assertEquals("s3data://project/" + bucket, LinkFile.load(new FileSource(localBucketFile)).getLatestEntryUrl()); String[] bucketResult = runGorPipeServer("gor " + localBucketFile, workDirPath.resolve("some_project").toString(), fileReader.getSecurityContext()).split("\n"); diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index 2b6bb7c5..115bc351 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -340,11 +340,11 @@ case class ForkWrite(forkCol: Int, // Use the nonsecure driver file reader as this is an exception from the write no links rule. val fileReader = new DriverBackedFileReader(session.getProjectContext.getFileReader.getSecurityContext, - session.getProjectContext.getProjectRoot) + session.getProjectContext.getProjectRoot, session.getProjectContext.getFileReader.getQueryTime) LinkFile.load(fileReader.resolveUrl(linkFileToWrite, true).asInstanceOf[StreamSource]) .appendMeta(linkFileMeta) .appendEntry(linkFileContent, md5, linkFileInfo, fileReader) - .save() + .save(session.getProjectContext.getFileReader.getQueryTime) } } diff --git a/gortools/src/main/scala/gorsat/process/PipeInstance.scala b/gortools/src/main/scala/gorsat/process/PipeInstance.scala index 602c6a7a..e25bbff2 100644 --- a/gortools/src/main/scala/gorsat/process/PipeInstance.scala +++ b/gortools/src/main/scala/gorsat/process/PipeInstance.scala @@ -37,7 +37,7 @@ import gorsatGorIterator.{MemoryMonitorUtil, gorsatGorIterator} import process.GorJavaUtilities.CmdParams import process.GorPipe.brsConfig import org.gorpipe.exceptions.{GorParsingException, GorResourceException, GorSystemException, GorUserException} -import org.gorpipe.gor.model.{DriverBackedFileReader, FileReader, GenomicIterator} +import org.gorpipe.gor.model.GenomicIterator import org.gorpipe.gor.monitor.GorMonitor import org.gorpipe.gor.session.{GorContext, GorSession, ProjectContext} import org.gorpipe.gor.util.{CommandSubstitutions, StringUtil} @@ -49,8 +49,6 @@ object PipeInstance { private val logger = LoggerFactory.getLogger(this.getClass) - val DEFAULT_REQUEST_ID: String = "" - // Set the dyniterator iterator create function DynIterator.createGorIterator = (context: GorContext) => createGorIterator(context) @@ -222,14 +220,6 @@ class PipeInstance(context: GorContext, outputValidateOrder: Boolean = false) ex def getPipeStep : Analysis = thePipeStep - def createFileReader(gorRoot: String): FileReader = { - if (!StringUtil.isEmpty(gorRoot)) { - new DriverBackedFileReader(null, gorRoot) - } else { - ProjectContext.DEFAULT_READER - } - } - @Deprecated def subProcessArguments(pipeOptions: PipeOptions): GenomicIterator = { init(pipeOptions.query, pipeOptions.stdIn, "") diff --git a/model/src/main/java/org/gorpipe/gor/driver/PluggableGorDriver.java b/model/src/main/java/org/gorpipe/gor/driver/PluggableGorDriver.java index 1f2c5abe..68af0e17 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/PluggableGorDriver.java +++ b/model/src/main/java/org/gorpipe/gor/driver/PluggableGorDriver.java @@ -40,6 +40,7 @@ import org.gorpipe.gor.model.GenomicIteratorBase; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.util.DataUtil; +import org.gorpipe.util.Strings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -227,9 +228,10 @@ private DataSource handleLinks(DataSource source) throws IOException { var sourceRef = getSourceRef(source, LinkFile.load((StreamSource)source).getEntryUrl(source.getSourceReference().queryTime), null); sourceRef.setLinkLastModified(source.getSourceMetadata().getLastModified()); source.close(); - DataSource rawLinkSource = resolveDataSource(sourceRef); + DataSource rawLinkSource = !Strings.isNullOrEmpty(sourceRef.getUrl()) ? resolveDataSource(sourceRef) : null; if (rawLinkSource == null) { - throw new GorResourceException("Link file content: " + sourceRef.getUrl() + " can not be resolved", sourceRef.getUrl()); + throw new GorResourceException("Link file content can not be resolved. Link file: %s, content: %s" + .formatted(source.getName(), sourceRef.getUrl()), sourceRef.getUrl()); } DataSource fromLinkSource = wrap(rawLinkSource); fromLinkSource.getSourceReference().setCreatedFromLink(true); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index 3c3fb5c6..b7f94d3c 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -50,6 +50,8 @@ */ public abstract class LinkFile { + private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(LinkFile.class); + public static final int LINK_FILE_MAX_SIZE = 10000; private static final boolean USE_LINK_CACHE = Boolean.parseBoolean(System.getProperty("gor.driver.cache.link", "true")); @@ -155,6 +157,12 @@ public LinkFileEntry getEntry(long timestamp) { while (index >= 0 && entries.get(index).timestamp() > timestamp) { index--; } + if (index < 0) { + log.warn("No entry found for timestamp: %d in link file: %s".formatted(timestamp, source.getFullPath())); + for (var entry : entries) { + log.warn(" Entry: " + entry.url() + " ts: " + entry.timestamp()); + } + } return index >= 0 ? entries.get(index) : null; } @@ -200,18 +208,22 @@ public LinkFile appendMeta(String meta) { } public void save() { + save(-1); + } + + public void save(long timestamp) { try (OutputStream os = source.getOutputStream()) { - save(os); + save(os, timestamp); } catch (IOException e) { throw new GorResourceException("Could not save: " + source.getFullPath(), source.getFullPath(), e); } } - private void save(OutputStream os) { + private void save(OutputStream os, long timestamp) { var content = new StringBuilder(getHeader()); if (!entries.isEmpty()) { - var currentTimestamp = System.currentTimeMillis(); + var currentTimestamp = timestamp > 0 ? timestamp : System.currentTimeMillis(); entries.stream() .skip(Math.max(0, entries.size() - getEntriesCountMax())) .filter(entry -> entry.timestamp() <= 0 || currentTimestamp - entry.timestamp() <= getEntriesAgeMax()) diff --git a/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java b/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java index 97c14f41..888e4b50 100644 --- a/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java +++ b/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java @@ -72,14 +72,14 @@ public class DriverBackedFileReader extends FileReader { private final String securityContext; protected final String commonRoot; - protected long queryTime; + protected long queryTime = -1; // -1 means not set, in which case we use current time at time when we query for the queryTime. public DriverBackedFileReader(String securityContext) { - this(securityContext, null, System.currentTimeMillis()); + this(securityContext, null, -1); } public DriverBackedFileReader(String securityContext, String commonRoot) { - this(securityContext, commonRoot, System.currentTimeMillis()); + this(securityContext, commonRoot, -1); } public DriverBackedFileReader(String securityContext, String commonRoot, long queryTime) { @@ -99,7 +99,7 @@ public String getCommonRoot() { @Override public long getQueryTime() { - return queryTime; + return queryTime >= 0 ? queryTime : System.currentTimeMillis(); } @Override diff --git a/model/src/main/java/org/gorpipe/gor/model/DriverBackedSecureFileReader.java b/model/src/main/java/org/gorpipe/gor/model/DriverBackedSecureFileReader.java index ebe9eb5d..5820b4e0 100644 --- a/model/src/main/java/org/gorpipe/gor/model/DriverBackedSecureFileReader.java +++ b/model/src/main/java/org/gorpipe/gor/model/DriverBackedSecureFileReader.java @@ -62,7 +62,7 @@ public class DriverBackedSecureFileReader extends DriverBackedFileReader { */ public DriverBackedSecureFileReader(String commonRoot, String securityContext, AccessControlContext accessControlContext) { - this(commonRoot, securityContext, accessControlContext, System.currentTimeMillis()); + this(commonRoot, securityContext, accessControlContext, -1); } public DriverBackedSecureFileReader(String commonRoot, diff --git a/model/src/main/java/org/gorpipe/gor/model/FileReader.java b/model/src/main/java/org/gorpipe/gor/model/FileReader.java index 11591e2d..87a8214d 100644 --- a/model/src/main/java/org/gorpipe/gor/model/FileReader.java +++ b/model/src/main/java/org/gorpipe/gor/model/FileReader.java @@ -343,7 +343,7 @@ public void writeLinkIfNeeded(String url) throws IOException { DataSource dataSource = resolveUrl(url, true); if (dataSource.forceLink()) { DataSource linkDataSource = resolveUrl(dataSource.getProjectLinkFile(), true); - LinkFile.create((StreamSource) linkDataSource, dataSource.getProjectLinkFileContent()).save(); + LinkFile.create((StreamSource) linkDataSource, dataSource.getProjectLinkFileContent()).save(getQueryTime()); } } diff --git a/test/src/main/java/gorsat/TestUtils.java b/test/src/main/java/gorsat/TestUtils.java index 8a94e228..5b869488 100644 --- a/test/src/main/java/gorsat/TestUtils.java +++ b/test/src/main/java/gorsat/TestUtils.java @@ -46,6 +46,7 @@ import java.io.*; import java.lang.management.ManagementFactory; import java.lang.management.OperatingSystemMXBean; +import java.lang.reflect.Array; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -134,7 +135,7 @@ public static String runGorPipe(String... args) { return runGorPipe(args, () -> { PipeOptions options = new PipeOptions(); options.parseOptions(args); - return new CLISessionFactory(options, null).create(); + return new TestSessionFactory(options, null, false, null, new String[]{"user_data"}).create(); }); }