From cc6da458e6b7cb43ee67ee65c5d65bb14e045b3e Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 2 Nov 2025 22:52:27 +0000 Subject: [PATCH 1/7] fix(ENGKNOW-2779): Add support for managed data location if -link is used. --- .../Script/BaseScriptExecutionEngine.java | 17 ++- .../scala/gorsat/Analysis/ForkWrite.scala | 128 +++++++++++++----- .../main/scala/gorsat/Commands/Write.scala | 4 +- .../src/test/java/gorsat/UTestAppend.java | 1 - .../src/test/java/gorsat/UTestGorWrite.java | 28 +++- .../gorpipe/gor/driver/GorDriverConfig.java | 6 + .../gorpipe/gor/driver/linkfile/LinkFile.java | 53 +++++++- .../gor/driver/linkfile/LinkFileMeta.java | 7 + 8 files changed, 195 insertions(+), 49 deletions(-) diff --git a/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java b/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java index ccfe247d..8ae1b656 100644 --- a/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java +++ b/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java @@ -15,6 +15,7 @@ import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.util.DataUtil; import org.gorpipe.gor.util.Tuple; +import org.gorpipe.util.Strings; import java.io.IOException; import java.util.*; @@ -68,15 +69,19 @@ private Optional> resolveCache(GorContext context, String var split = CommandParseUtilities.quoteSafeSplit(lastCommand.substring(6).trim(), ' '); var args = write.validateArguments(split); String lastField; - if (args.length==0) { - var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix); - writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath); - queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath); - lastField = writeFilePath; + if (args.length == 0) { + if (queryBlock.signature() != null) { + var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix); + writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath); + queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath); + lastField = writeFilePath; + } else { + lastField = null; + } } else { lastField = args[0].trim(); } - return !lastField.startsWith("-") ? resolveForkPathParent(lastField) : Optional.empty(); + return !Strings.isNullOrEmpty(lastField) && !lastField.startsWith("-") ? resolveForkPathParent(lastField) : Optional.empty(); } public Optional> getExplicitWrite(GorContext context, ExecutionBlock queryBlock) { diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index 115bc351..6f8cb5b5 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -32,7 +32,7 @@ import org.gorpipe.gor.binsearch.GorIndexType import org.gorpipe.gor.driver.linkfile.{LinkFile, LinkFileEntryV1} import org.gorpipe.gor.driver.meta.DataType import org.gorpipe.gor.driver.providers.stream.sources.StreamSource -import org.gorpipe.gor.model.{DriverBackedFileReader, GorMeta, GorOptions, Row} +import org.gorpipe.gor.model.{DriverBackedFileReader, Row} import org.gorpipe.gor.session.{GorSession, ProjectContext} import org.gorpipe.gor.table.util.PathUtils import org.gorpipe.gor.util.DataUtil @@ -43,6 +43,60 @@ import java.util.UUID import scala.collection.mutable import scala.collection.mutable.ArrayBuffer +/* + +NOTES: +1. Write for pgor is generally forbidden, except when writing gord files. +2. Explict link file writing is not allowed, link files to given data files are allowed using the -link option. + +The GOR write has several different "modes" of operation: + +1. Single file write + gor ... | write output.gor + + In this mode a single file is created with the name output.gor. If the file already exists it will be overwritten. + +2. Forked write with variable in filename + gor ... | write -f col output_#{fork}.gor + + In this mode a file is created for each fork value with the fork value replacing the #{fork} variable in the filename. + +3. GOR dictionary write + pgor ... | write output.gord + + In this mode a GOR dictionary file is created (if it does not already exist) and file for each part is created, + using fingerprints for the file names. Additional gord file, thedict.gord, is creaate within the folder. + +4. Forked directory write + gor ... | write -f col -d output_dir/ + + In this mode a directory is created (if it does not already exist) and a subfolders are created for each fork value. + +5. Link file write + gor ... | write -link output.link + + In this mode a data file with a unique name is created in the default data location and a link file with the specified name + is created pointing to the data file. + +Modes that do not work: + +6. Directory write + gor ... | write -d output_dir/ + + In this mode a directory is created (if it does not already exist) and a file with a unique name is created + inside the directory. If the directory already exists the file will be created inside the existing directory. + + This works for gor but is kind of pointless. + Does not work for pgor, which could make sense to allow gor write. + + 7. Forked link file write + gor ... | write -f col -link output_#{fork}.gor + + In this mode a data file with a unique name is created in the default data location for each fork value and link files with the specified name + are created pointing to the data files. + This mode is not supported. + */ + case class OutputOptions(remove: Boolean = false, columnCompress: Boolean = false, md5: Boolean = false, @@ -80,13 +134,14 @@ case class ForkWrite(forkCol: Int, if(options.useFolder.nonEmpty) { val folder = options.useFolder.get ensureDir(projectContext, folder) - val fn = if(fullFileName.isEmpty) { - val uuid = UUID.randomUUID().toString - val ending = folder.substring(folder.lastIndexOf('.')) - s"$uuid${if(DataUtil.isGord(folder)) DataType.GORZ.suffix else ending}" - } else { - fullFileName - } + val fn = if (fullFileName.isEmpty) { + val uuid = UUID.randomUUID().toString + val folderEnding = FilenameUtils.getExtension(folder) + val ending = if (folderEnding.nonEmpty) "." + folderEnding else (if (options.nor) DataType.NOR.suffix else DataType.GORZ.suffix) + s"$uuid${if(DataUtil.isGord(folder)) DataType.GORZ.suffix else ending}" + } else { + fullFileName + } val dir = if(folder.endsWith("/")) folder else folder + "/" @@ -100,9 +155,20 @@ case class ForkWrite(forkCol: Int, fileName = dir + fn } } else { - fileName = fullFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue) + fileName = if (forkCol >= 0) { + fullFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue) + } else { + if (fullFileName.isEmpty && options.linkFile.nonEmpty) { + // Infer the full file name from the link (and defautl locations) + LinkFile.inferDataFileNameFromLinkFile(projectContext.getFileReader.resolveUrl(options.linkFile).asInstanceOf[StreamSource]) + } else { + fullFileName + } + } + ensureDir(projectContext, fileName, parent = true) } + var fileOpen = false var headerWritten = false var rowBuffer = new ArrayBuffer[Row] @@ -112,11 +178,11 @@ case class ForkWrite(forkCol: Int, def ensureDir(projectContext: ProjectContext, path: String, parent: Boolean = false): Unit = { val fileReader = projectContext.getFileReader val dir = if (parent) { - val parent = PathUtils.getParent(path) - if (parent != null) parent else null - } else { - path - } + val parent = PathUtils.getParent(path) + if (parent != null) parent else null + } else { + path + } if (dir != null && !fileReader.exists(dir)) { fileReader.createDirectories(dir) @@ -283,7 +349,7 @@ case class ForkWrite(forkCol: Int, } }) } else { - val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(fullFileName, options.linkFile, options.linkFileMeta) + val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(singleFileHolder.fileName, options.linkFile, options.linkFileMeta) if (linkFile.nonEmpty) { writeLinkFile(linkFile, linkFileUrl, linkFileMeta, getMd5, linkFileInfo) @@ -300,24 +366,22 @@ case class ForkWrite(forkCol: Int, } } - private def extractLink(fileName: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String, String) = { - var linkFile = optLinkFile - var linkFileContent = "" - if (fileName.nonEmpty) { - if (linkFile.isEmpty) { - val dataSource = session.getProjectContext.getFileReader.resolveUrl(fileName, true) - if (dataSource != null && dataSource.forceLink()) { - linkFile = dataSource.getProjectLinkFile - linkFileContent = dataSource.getProjectLinkFileContent - } - } else { - linkFileContent = PathUtils.resolve(session.getProjectContext.getProjectRoot, fileName) + private def extractLink(source: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String, String) = { + var linkFile = LinkFile.validateAndUpdateLinkFileName(optLinkFile) + var linkFileContent = if (linkFile.nonEmpty) PathUtils.resolve(session.getProjectContext.getProjectRoot, source) else "" + + if (linkFile.isEmpty && source.nonEmpty) { + // Check if link file is forced from the source + val dataSource = session.getProjectContext.getFileReader.resolveUrl(source, true) + if (dataSource != null && dataSource.forceLink()) { + linkFile = dataSource.getProjectLinkFile + linkFileContent = dataSource.getProjectLinkFileContent } } - var linkFileMeta = "" var linkFileInfo = "" - if (!Strings.isNullOrEmpty(optLinkFileMeta)) { + + if (linkFile.nonEmpty && !Strings.isNullOrEmpty(optLinkFileMeta)) { for (s <- CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',')) { val l = s.trim if (l.startsWith(LinkFileEntryV1.ENTRY_INFO_KEY)) { @@ -333,16 +397,14 @@ case class ForkWrite(forkCol: Int, private def writeLinkFile(linkFilePath: String, linkFileContent: String, linkFileMeta: String = "", md5: String = null, linkFileInfo: String = null) : Unit = { - val linkFileToWrite = LinkFile.validateAndUpdateLinkFileName(linkFilePath) - // Validate that we can write to the location (skip link extension as writing links is always forbidden). - session.getProjectContext.getFileReader.resolveUrl(FilenameUtils.removeExtension(linkFileToWrite), true) + session.getProjectContext.getFileReader.resolveUrl(FilenameUtils.removeExtension(linkFilePath), true) // Use the nonsecure driver file reader as this is an exception from the write no links rule. val fileReader = new DriverBackedFileReader(session.getProjectContext.getFileReader.getSecurityContext, session.getProjectContext.getProjectRoot, session.getProjectContext.getFileReader.getQueryTime) - LinkFile.load(fileReader.resolveUrl(linkFileToWrite, true).asInstanceOf[StreamSource]) + LinkFile.load(fileReader.resolveUrl(linkFilePath, true).asInstanceOf[StreamSource]) .appendMeta(linkFileMeta) .appendEntry(linkFileContent, md5, linkFileInfo, fileReader) .save(session.getProjectContext.getFileReader.getQueryTime) diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index e065097c..bf4920ee 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -66,7 +66,9 @@ class Write extends CommandInfo("WRITE", val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" val linkMetaOpt = if (hasOption(args, "-linkmeta")) stringValueOfOption(args, "-linkmeta") else "" - if(fileName.isEmpty && useFolder.isEmpty) throw new GorResourceException("No file or folder specified",""); + if (fileName.isEmpty && useFolder.isEmpty && linkOpt.isEmpty) { + throw new GorResourceException("No file or folder specified","") + }; val infer = hasOption(args, "-inferschema") val maxseg = hasOption(args, "-maxseg") diff --git a/gortools/src/test/java/gorsat/UTestAppend.java b/gortools/src/test/java/gorsat/UTestAppend.java index 9bd6b33b..4f1ec896 100644 --- a/gortools/src/test/java/gorsat/UTestAppend.java +++ b/gortools/src/test/java/gorsat/UTestAppend.java @@ -107,7 +107,6 @@ public void testWriteNewNoHeaderNor() throws IOException { } } - @Test public void testWriteAppendNorz() { Path file = workPath.resolve("file.norz"); diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index b60a67ba..21e3d7e4 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -26,14 +26,16 @@ import org.gorpipe.exceptions.GorParsingException; import org.gorpipe.exceptions.GorSecurityException; import org.gorpipe.exceptions.GorSystemException; +import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.linkfile.LinkFile; import org.gorpipe.gor.driver.linkfile.LinkFileMeta; -import org.gorpipe.gor.driver.linkfile.LinkFileV1; import org.gorpipe.gor.driver.meta.DataType; import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource; import org.gorpipe.gor.model.BaseMeta; import org.gorpipe.gor.util.DataUtil; import org.junit.*; +import org.junit.contrib.java.lang.system.EnvironmentVariables; +import org.junit.contrib.java.lang.system.RestoreSystemProperties; import org.junit.rules.TemporaryFolder; import java.io.*; @@ -57,6 +59,13 @@ public class UTestGorWrite { public TemporaryFolder tempRoot = new TemporaryFolder(); private Path tempRootPath; + @Rule + public final RestoreSystemProperties restoreSystemProperties = new RestoreSystemProperties(); + + @Rule + public final EnvironmentVariables environmentVariables + = new EnvironmentVariables(); + private String defaultV1LinkFileHeader; private String testdbsnpTestLine1 = """ Chrom\tPOS\treference\tallele\tdifferentrsIDs @@ -222,6 +231,22 @@ public void testWriteLinkFileAndMetaWithInfo() throws IOException { Assert.assertEquals("Some file info", linkFile.getLatestEntry().info()); } + @Test + public void testWriteLinkFileWithInferFileName() throws IOException { + + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, workDirPath.resolve("managed_data").toString()); + TestUtils.runGorPipe("gorrow chr1,1,100 | write -link ltest.gor", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data").toString())); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + Assert.assertEquals("#chrom\tbpStart\tbpStop\nchr1\t1\t100\n", + Files.readString(Path.of(linkFile.getLatestEntry().url()))); + + } + @Test public void testTxtWriteServer() throws IOException { Path p = Paths.get("../tests/data/nor/simple.nor"); @@ -239,7 +264,6 @@ public void testTxtFolderWriteServer() throws IOException { TestUtils.runGorPipeCount(args, true); } - @Ignore @Test public void testFolderWriteServer() throws IOException { Path p = Paths.get("../tests/data/nor/simple.nor"); diff --git a/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java b/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java index 345723ff..315a8793 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java +++ b/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java @@ -181,4 +181,10 @@ public interface GorDriverConfig extends Config { @Key("org.gorpipe.gor.driver.plink.executable") @DefaultValue("plink2") String plinkExecutable(); + + String GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL = "GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL"; + @Documentation("Whether to enable link folders.") + @Key(GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL) + @DefaultValue("") + String managedLinkDataFilesURL(); } \ No newline at end of file diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index b7f94d3c..0fa85ede 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -4,6 +4,7 @@ import com.github.benmanes.caffeine.cache.Caffeine; import com.google.common.util.concurrent.UncheckedExecutionException; import org.gorpipe.exceptions.GorResourceException; +import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.StreamUtils; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; @@ -24,11 +25,12 @@ * Link file format, a valid nor format. Example: * * ## VERSION= + * ## SERIAl= * ## ENTRIES_COUNT_MAX= * ## ENTRIES_AGE_MAX= - * # FILE\tTIMESTAMP\tMD5\tSERIAL - * source/var/var.gorz\t1734304890790\tABCDEAF13422\t1 - * source/var/var.gorz\t1734305124533\t334DEAF13422\t2 + * # FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + * source/var/var.gorz\t1734304890790\tABCDEAF13422\t1\tSome info + * source/var/var.gorz\t1734305124533\t334DEAF13422\t2\tSome other info * * Notes: * 1. No timestamp or serial is treated as 0 (older). @@ -40,12 +42,13 @@ * - TIMESTAMP - in ISO data format or milliseconds since epoch, active time. * - MD5 - md5 checksum of the file or data the link points to. * - SERIAL - incrementing serial number for the link file entry. + * - INFO - free text info field. * 6, Required meta fields. * - VERSION - Link file format version. - * 7. Optional meta fields. + * 7. Optional meta fields. See: LinkFileMeta for complete list. + * - SERIAL - serial number of this link file. * - ENTRIES_COUNT_MAX - max entries to store in this file. * - ENTRIES_AGE_MAX - max age of entries in milliseconds. - * - * */ public abstract class LinkFile { @@ -75,13 +78,51 @@ public static LinkFile create(StreamSource source, String content) { } public static String validateAndUpdateLinkFileName(String linkFilePath) { - if (DataUtil.isLink(linkFilePath)) { + if (Strings.isNullOrEmpty(linkFilePath) || DataUtil.isLink(linkFilePath)) { return linkFilePath; } else { return DataUtil.toLink(linkFilePath); } } + /** + * Infer the data file name from the link file name. + * + * @param linkSource the link file path with the link extension + * @return the data file path + */ + public static String inferDataFileNameFromLinkFile(StreamSource linkSource) throws IOException { + if (linkSource == null || Strings.isNullOrEmpty(linkSource.getFullPath())) { + throw new IllegalArgumentException("Link file path is null or empty. Can not infer data file name."); + } + + var linkPath = linkSource.getSourceReference().getUrl(); + + if (PathUtils.isAbsolutePath(linkPath)) { + throw new IllegalArgumentException("Link file path is absolute. Can not infer data file name: " + linkSource.getFullPath()); + } + + var dataFileRootPath = ""; + + if (linkSource.exists()) { + var link = load(linkSource); + var linkDataFileRootPath = link.getMeta().getProperty(LinkFileMeta.HEADER_CONTENT_LOCATION_MANAGED_KEY); + if (!Strings.isNullOrEmpty(linkDataFileRootPath)) { + dataFileRootPath = linkDataFileRootPath; + } + } + + if (Strings.isNullOrEmpty(dataFileRootPath)) { + dataFileRootPath = System.getenv(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL); + } + + if (Strings.isNullOrEmpty(dataFileRootPath)) { + throw new IllegalArgumentException("Link file data root path is not set. Can not infer data file name from link file: " + linkSource.getFullPath()); + } + + return PathUtils.resolve(dataFileRootPath, linkPath); + } + protected final StreamSource source; protected final LinkFileMeta meta; protected final List entries; // Entries sorted by time (oldest first) diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java index 860195a0..b37abb2f 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java @@ -6,8 +6,15 @@ import org.gorpipe.util.Strings; public class LinkFileMeta extends BaseMeta { + + // Max number of entries to keep track of in the link file. public static final String HEADER_ENTRIES_COUNT_MAX_KEY = "ENTRIES_COUNT_MAX"; + // Max age of entries to keep track of in the link file. public static final String HEADER_ENTRIES_AGE_MAX_KEY = "ENTRIES_AGE_MAX"; + // Determines if the content data location should be managed. + public static final String HEADER_CONTENT_LOCATION_MANAGED_KEY = "CONTENT_LOCATION_MANAGED"; + // Should the content lifecycle be managed (data deleted if the link is removed from the link file). + public static final String HEADER_CONTENT_LIFECYCLE_MANAGED_KEY = "CONTENT_LIFECYCLE_MANAGED"; public static final String[] DEFAULT_TABLE_HEADER = new String[] {"File", "Timestamp", "MD5", "Serial", "Info"}; From 4139d45bdb56b9d5ede6bb20524444ec9c6e7545 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Wed, 19 Nov 2025 17:24:10 +0000 Subject: [PATCH 2/7] fix(ENGKNOW-2779): Add support for managed data location if -link is used. --- gortools/src/test/java/gorsat/UTestGorWrite.java | 3 ++- .../org/gorpipe/gor/driver/GorDriverConfig.java | 2 +- .../org/gorpipe/gor/driver/linkfile/LinkFile.java | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index 21e3d7e4..57f518e4 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -240,7 +240,8 @@ public void testWriteLinkFileWithInferFileName() throws IOException { var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); Assert.assertEquals(1, linkFile.getEntriesCount()); - Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data").toString())); + Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data/ltest").toString())); + Assert.assertTrue(linkFile.getLatestEntry().url().endsWith(".gor")); Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); Assert.assertEquals("#chrom\tbpStart\tbpStop\nchr1\t1\t100\n", Files.readString(Path.of(linkFile.getLatestEntry().url()))); diff --git a/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java b/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java index 315a8793..d97c1496 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java +++ b/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java @@ -183,7 +183,7 @@ public interface GorDriverConfig extends Config { String plinkExecutable(); String GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL = "GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL"; - @Documentation("Whether to enable link folders.") + @Documentation("Root location of managed data files for link files.") @Key(GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL) @DefaultValue("") String managedLinkDataFilesURL(); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index 0fa85ede..5e12466b 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -3,6 +3,8 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import com.google.common.util.concurrent.UncheckedExecutionException; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.gorpipe.exceptions.GorResourceException; import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.meta.SourceReference; @@ -120,6 +122,19 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource) thro throw new IllegalArgumentException("Link file data root path is not set. Can not infer data file name from link file: " + linkSource.getFullPath()); } + String randomString = RandomStringUtils.random(8, true, true); + var linkPathSplit = linkPath.indexOf('.'); + if (linkPathSplit > 0) { + linkPath = "%s.%s.%s".formatted( + linkPath.substring(0, linkPathSplit), + randomString, + linkPath.substring(linkPathSplit + 1)); + } else { + linkPath = "%s.%s".formatted(linkPath, randomString); + } + + linkPath = linkPath.replaceAll("\\.link$", ""); + return PathUtils.resolve(dataFileRootPath, linkPath); } From 0b371e2962f600fa174798db5649bd1ac0e7949c Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Thu, 20 Nov 2025 18:09:33 +0000 Subject: [PATCH 3/7] feat(ENGKNOW-2903): Add support for multiple MDR servers. Add project to path. --- .../gorpipe/gor/driver/linkfile/LinkFile.java | 6 +++ .../gor/driver/linkfile/LinkFileTest.java | 51 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index cc04649f..816e20df 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -135,6 +135,12 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource) thro linkPath = linkPath.replaceAll("\\.link$", ""); + var project = linkSource.getSourceReference().getCommonRoot() != null + ? PathUtils.getFileName(linkSource.getSourceReference().getCommonRoot()) : ""; + if (!Strings.isNullOrEmpty(project)) { + dataFileRootPath = PathUtils.resolve(dataFileRootPath, project); + } + return PathUtils.resolve(dataFileRootPath, linkPath); } diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index b64d7a9a..f2bb7ff6 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -1,16 +1,22 @@ package org.gorpipe.gor.driver.linkfile; +import org.gorpipe.gor.driver.GorDriverConfig; +import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource; import org.junit.Before; import org.junit.Rule; import org.junit.Test; +import org.junit.contrib.java.lang.system.EnvironmentVariables; import org.junit.rules.TemporaryFolder; import java.io.ByteArrayInputStream; import java.io.IOException; +import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; import static org.junit.Assert.*; import static org.mockito.Mockito.*; @@ -20,6 +26,10 @@ public class LinkFileTest { @Rule public TemporaryFolder workDir = new TemporaryFolder(); + @Rule + public final EnvironmentVariables environmentVariables + = new EnvironmentVariables(); + private StreamSource mockSource; private final String v1LinkFileContent = """ ## SERIAL = 1 @@ -154,4 +164,45 @@ public void testSaveLinkFileV1ToV0() throws IOException { String savedContent = Files.readString(linkPath); assertEquals(simpleFile, savedContent.trim()); } + + + @Test(expected = IllegalArgumentException.class) + public void testInferDataFileNameFromLinkFile_NullOrEmptyPath() throws Exception { + LinkFile.inferDataFileNameFromLinkFile(new FileSource("")); + } + + @Test(expected = IllegalArgumentException.class) + public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { + LinkFile.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.link")); + } + + @Test(expected = IllegalArgumentException.class) + public void testInferDataFileNameFromLinkFile_NoRootConfigured() throws Exception { + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, null); + LinkFile.inferDataFileNameFromLinkFile(new FileSource("x.link")); + } + + @Test + public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithProject() throws Exception { + String root = "/managed/root"; + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, root); + + String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource(new SourceReference("x.gor.link", null, "/projects/test", -1, null, null, false, false))); + assertNotNull(result); + assertTrue(result.startsWith(root + "/test/x")); + assertFalse(result.endsWith(".gor.link")); + assertNotEquals(result, root + "/test/x.gor.link"); + } + + @Test + public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithOutProject() throws Exception { + String root = "/managed/root"; + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, root); + + String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource("x.gor.link")); + assertNotNull(result); + assertTrue(result.startsWith(root + "/x")); + assertFalse(result.endsWith(".gor.link")); + assertNotEquals(result, root + "/x.gor.link"); + } } From f84b82e3cfa4ecbd31cbd7654ee1d1e29d857fd8 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Thu, 20 Nov 2025 18:30:09 +0000 Subject: [PATCH 4/7] feat(ENGKNOW-2903): Add support for multiple MDR servers. Add project to path. --- gortools/src/test/java/gorsat/UTestGorWrite.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index ca5b7c0f..4c0470d2 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -23,6 +23,7 @@ package gorsat; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.file.PathUtils; import org.gorpipe.exceptions.GorParsingException; import org.gorpipe.exceptions.GorSecurityException; import org.gorpipe.exceptions.GorSystemException; @@ -240,7 +241,7 @@ public void testWriteLinkFileWithInferFileName() throws IOException { var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); Assert.assertEquals(1, linkFile.getEntriesCount()); - Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data/ltest").toString())); + Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data/" + workDirPath.getFileName() + "/ltest").toString())); Assert.assertTrue(linkFile.getLatestEntry().url().endsWith(".gor")); Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); Assert.assertEquals("#chrom\tbpStart\tbpStop\nchr1\t1\t100\n", From c1daafa751034d938a896e10d9d846df43bf6955 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 22 Nov 2025 00:01:23 +0000 Subject: [PATCH 5/7] fix(ENGKNOW-2779): Add support for managed data location if -link is used. --- .../main/scala/gorsat/Analysis/ForkWrite.scala | 6 ++++-- .../src/test/java/gorsat/UTestGorWrite.java | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index 6f8cb5b5..92835e32 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -30,7 +30,7 @@ import org.apache.commons.lang3.StringUtils import org.gorpipe.exceptions.GorResourceException import org.gorpipe.gor.binsearch.GorIndexType import org.gorpipe.gor.driver.linkfile.{LinkFile, LinkFileEntryV1} -import org.gorpipe.gor.driver.meta.DataType +import org.gorpipe.gor.driver.meta.{DataType, SourceReference} import org.gorpipe.gor.driver.providers.stream.sources.StreamSource import org.gorpipe.gor.model.{DriverBackedFileReader, Row} import org.gorpipe.gor.session.{GorSession, ProjectContext} @@ -159,8 +159,10 @@ case class ForkWrite(forkCol: Int, fullFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue) } else { if (fullFileName.isEmpty && options.linkFile.nonEmpty) { + val linkSourceRef = new SourceReference(options.linkFile, null, projectContext.getFileReader.getCommonRoot, null, null, true); // Infer the full file name from the link (and defautl locations) - LinkFile.inferDataFileNameFromLinkFile(projectContext.getFileReader.resolveUrl(options.linkFile).asInstanceOf[StreamSource]) + LinkFile.inferDataFileNameFromLinkFile( + projectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource]); } else { fullFileName } diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index 4c0470d2..3e78f21b 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -249,6 +249,24 @@ public void testWriteLinkFileWithInferFileName() throws IOException { } + @Test + public void testWriteLinkFileWithInferFileNameForExistingLink() throws IOException { + + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, workDirPath.resolve("managed_data").toString()); + TestUtils.runGorPipe("gorrow chr1,1,100 | write -link ltest.gor", "-gorroot", workDirPath.toString()); + TestUtils.runGorPipe("gorrow chr1,1,101 | write -link ltest.gor", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); + + Assert.assertEquals(2, linkFile.getEntriesCount()); + Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data/" + workDirPath.getFileName() + "/ltest").toString())); + Assert.assertTrue(linkFile.getLatestEntry().url().endsWith(".gor")); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + Assert.assertEquals("#chrom\tbpStart\tbpStop\nchr1\t1\t101\n", + Files.readString(Path.of(linkFile.getLatestEntry().url()))); + + } + @Test public void testTxtWriteServer() throws IOException { Path p = Paths.get("../tests/data/nor/simple.nor"); From 330e77b096f628e52ebd25af3ca29127e146b194 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 22 Nov 2025 00:14:55 +0000 Subject: [PATCH 6/7] fix(ENGKNOW-2779): Add support for managed data location if -link is used. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6b2aa47d..5c19bcfb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,7 +142,7 @@ jobs: files: '**/TEST-*.xml' publishSnapshot: - if: ${{ github.ref == 'refs/heads/main' }} + #if: ${{ github.ref == 'refs/heads/main' }} needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: From e43c355e70d58a8e7d19b9a5fc2bd39e7d758432 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 22 Nov 2025 01:01:38 +0000 Subject: [PATCH 7/7] fix(ENGKNOW-2779): Add support for managed data location if -link is used. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5c19bcfb..6b2aa47d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,7 +142,7 @@ jobs: files: '**/TEST-*.xml' publishSnapshot: - #if: ${{ github.ref == 'refs/heads/main' }} + if: ${{ github.ref == 'refs/heads/main' }} needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: