Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.gorpipe.gor.table.util.PathUtils;
import org.gorpipe.gor.util.DataUtil;
import org.gorpipe.gor.util.Tuple;
import org.gorpipe.util.Strings;

import java.io.IOException;
import java.util.*;
Expand Down Expand Up @@ -68,15 +69,19 @@ private Optional<Tuple<String,Boolean>> resolveCache(GorContext context, String
var split = CommandParseUtilities.quoteSafeSplit(lastCommand.substring(6).trim(), ' ');
var args = write.validateArguments(split);
String lastField;
if (args.length==0) {
var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix);
writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath);
queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath);
lastField = writeFilePath;
if (args.length == 0) {
if (queryBlock.signature() != null) {
var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix);
writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath);
queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath);
lastField = writeFilePath;
} else {
lastField = null;
}
} else {
lastField = args[0].trim();
}
return !lastField.startsWith("-") ? resolveForkPathParent(lastField) : Optional.empty();
return !Strings.isNullOrEmpty(lastField) && !lastField.startsWith("-") ? resolveForkPathParent(lastField) : Optional.empty();
}

public Optional<Tuple<String,Boolean>> getExplicitWrite(GorContext context, ExecutionBlock queryBlock) {
Expand Down
128 changes: 95 additions & 33 deletions gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import org.gorpipe.gor.binsearch.GorIndexType
import org.gorpipe.gor.driver.linkfile.{LinkFile, LinkFileEntryV1}
import org.gorpipe.gor.driver.meta.DataType
import org.gorpipe.gor.driver.providers.stream.sources.StreamSource
import org.gorpipe.gor.model.{DriverBackedFileReader, GorMeta, GorOptions, Row}
import org.gorpipe.gor.model.{DriverBackedFileReader, Row}
import org.gorpipe.gor.session.{GorSession, ProjectContext}
import org.gorpipe.gor.table.util.PathUtils
import org.gorpipe.gor.util.DataUtil
Expand All @@ -43,6 +43,60 @@ import java.util.UUID
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

/*

NOTES:
1. Write for pgor is generally forbidden, except when writing gord files.
2. Explict link file writing is not allowed, link files to given data files are allowed using the -link option.

The GOR write has several different "modes" of operation:

1. Single file write
gor ... | write output.gor

In this mode a single file is created with the name output.gor. If the file already exists it will be overwritten.

2. Forked write with variable in filename
gor ... | write -f col output_#{fork}.gor

In this mode a file is created for each fork value with the fork value replacing the #{fork} variable in the filename.

3. GOR dictionary write
pgor ... | write output.gord

In this mode a GOR dictionary file is created (if it does not already exist) and file for each part is created,
using fingerprints for the file names. Additional gord file, thedict.gord, is creaate within the folder.

4. Forked directory write
gor ... | write -f col -d output_dir/

In this mode a directory is created (if it does not already exist) and a subfolders are created for each fork value.

5. Link file write
gor ... | write -link output.link

In this mode a data file with a unique name is created in the default data location and a link file with the specified name
is created pointing to the data file.

Modes that do not work:

6. Directory write
gor ... | write -d output_dir/

In this mode a directory is created (if it does not already exist) and a file with a unique name is created
inside the directory. If the directory already exists the file will be created inside the existing directory.

This works for gor but is kind of pointless.
Does not work for pgor, which could make sense to allow gor write.

7. Forked link file write
gor ... | write -f col -link output_#{fork}.gor

In this mode a data file with a unique name is created in the default data location for each fork value and link files with the specified name
are created pointing to the data files.
This mode is not supported.
*/

case class OutputOptions(remove: Boolean = false,
columnCompress: Boolean = false,
md5: Boolean = false,
Expand Down Expand Up @@ -80,13 +134,14 @@ case class ForkWrite(forkCol: Int,
if(options.useFolder.nonEmpty) {
val folder = options.useFolder.get
ensureDir(projectContext, folder)
val fn = if(fullFileName.isEmpty) {
val uuid = UUID.randomUUID().toString
val ending = folder.substring(folder.lastIndexOf('.'))
s"$uuid${if(DataUtil.isGord(folder)) DataType.GORZ.suffix else ending}"
} else {
fullFileName
}
val fn = if (fullFileName.isEmpty) {
val uuid = UUID.randomUUID().toString
val folderEnding = FilenameUtils.getExtension(folder)
val ending = if (folderEnding.nonEmpty) "." + folderEnding else (if (options.nor) DataType.NOR.suffix else DataType.GORZ.suffix)
s"$uuid${if(DataUtil.isGord(folder)) DataType.GORZ.suffix else ending}"
} else {
fullFileName
}

val dir = if(folder.endsWith("/")) folder else folder + "/"

Expand All @@ -100,9 +155,20 @@ case class ForkWrite(forkCol: Int,
fileName = dir + fn
}
} else {
fileName = fullFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue)
fileName = if (forkCol >= 0) {
fullFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue)
} else {
if (fullFileName.isEmpty && options.linkFile.nonEmpty) {
// Infer the full file name from the link (and defautl locations)
LinkFile.inferDataFileNameFromLinkFile(projectContext.getFileReader.resolveUrl(options.linkFile).asInstanceOf[StreamSource])
} else {
fullFileName
}
}

ensureDir(projectContext, fileName, parent = true)
}

var fileOpen = false
var headerWritten = false
var rowBuffer = new ArrayBuffer[Row]
Expand All @@ -112,11 +178,11 @@ case class ForkWrite(forkCol: Int,
def ensureDir(projectContext: ProjectContext, path: String, parent: Boolean = false): Unit = {
val fileReader = projectContext.getFileReader
val dir = if (parent) {
val parent = PathUtils.getParent(path)
if (parent != null) parent else null
} else {
path
}
val parent = PathUtils.getParent(path)
if (parent != null) parent else null
} else {
path
}

if (dir != null && !fileReader.exists(dir)) {
fileReader.createDirectories(dir)
Expand Down Expand Up @@ -283,7 +349,7 @@ case class ForkWrite(forkCol: Int,
}
})
} else {
val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(fullFileName, options.linkFile, options.linkFileMeta)
val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(singleFileHolder.fileName, options.linkFile, options.linkFileMeta)

if (linkFile.nonEmpty) {
writeLinkFile(linkFile, linkFileUrl, linkFileMeta, getMd5, linkFileInfo)
Expand All @@ -300,24 +366,22 @@ case class ForkWrite(forkCol: Int,
}
}

private def extractLink(fileName: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String, String) = {
var linkFile = optLinkFile
var linkFileContent = ""
if (fileName.nonEmpty) {
if (linkFile.isEmpty) {
val dataSource = session.getProjectContext.getFileReader.resolveUrl(fileName, true)
if (dataSource != null && dataSource.forceLink()) {
linkFile = dataSource.getProjectLinkFile
linkFileContent = dataSource.getProjectLinkFileContent
}
} else {
linkFileContent = PathUtils.resolve(session.getProjectContext.getProjectRoot, fileName)
private def extractLink(source: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String, String) = {
var linkFile = LinkFile.validateAndUpdateLinkFileName(optLinkFile)
var linkFileContent = if (linkFile.nonEmpty) PathUtils.resolve(session.getProjectContext.getProjectRoot, source) else ""

if (linkFile.isEmpty && source.nonEmpty) {
// Check if link file is forced from the source
val dataSource = session.getProjectContext.getFileReader.resolveUrl(source, true)
if (dataSource != null && dataSource.forceLink()) {
linkFile = dataSource.getProjectLinkFile
linkFileContent = dataSource.getProjectLinkFileContent
}
}

var linkFileMeta = ""
var linkFileInfo = ""
if (!Strings.isNullOrEmpty(optLinkFileMeta)) {

if (linkFile.nonEmpty && !Strings.isNullOrEmpty(optLinkFileMeta)) {
for (s <- CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',')) {
val l = s.trim
if (l.startsWith(LinkFileEntryV1.ENTRY_INFO_KEY)) {
Expand All @@ -333,16 +397,14 @@ case class ForkWrite(forkCol: Int,

private def writeLinkFile(linkFilePath: String, linkFileContent: String,
linkFileMeta: String = "", md5: String = null, linkFileInfo: String = null) : Unit = {
val linkFileToWrite = LinkFile.validateAndUpdateLinkFileName(linkFilePath)

// Validate that we can write to the location (skip link extension as writing links is always forbidden).
session.getProjectContext.getFileReader.resolveUrl(FilenameUtils.removeExtension(linkFileToWrite), true)
session.getProjectContext.getFileReader.resolveUrl(FilenameUtils.removeExtension(linkFilePath), true)

// Use the nonsecure driver file reader as this is an exception from the write no links rule.
val fileReader = new DriverBackedFileReader(session.getProjectContext.getFileReader.getSecurityContext,
session.getProjectContext.getProjectRoot, session.getProjectContext.getFileReader.getQueryTime)

LinkFile.load(fileReader.resolveUrl(linkFileToWrite, true).asInstanceOf[StreamSource])
LinkFile.load(fileReader.resolveUrl(linkFilePath, true).asInstanceOf[StreamSource])
.appendMeta(linkFileMeta)
.appendEntry(linkFileContent, md5, linkFileInfo, fileReader)
.save(session.getProjectContext.getFileReader.getQueryTime)
Expand Down
4 changes: 3 additions & 1 deletion gortools/src/main/scala/gorsat/Commands/Write.scala
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ class Write extends CommandInfo("WRITE",
val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else ""
val linkMetaOpt = if (hasOption(args, "-linkmeta")) stringValueOfOption(args, "-linkmeta") else ""

if(fileName.isEmpty && useFolder.isEmpty) throw new GorResourceException("No file or folder specified","");
if (fileName.isEmpty && useFolder.isEmpty && linkOpt.isEmpty) {
throw new GorResourceException("No file or folder specified","")
};

val infer = hasOption(args, "-inferschema")
val maxseg = hasOption(args, "-maxseg")
Expand Down
1 change: 0 additions & 1 deletion gortools/src/test/java/gorsat/UTestAppend.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ public void testWriteNewNoHeaderNor() throws IOException {
}
}


@Test
public void testWriteAppendNorz() {
Path file = workPath.resolve("file.norz");
Expand Down
30 changes: 28 additions & 2 deletions gortools/src/test/java/gorsat/UTestGorWrite.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,20 @@
package gorsat;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.file.PathUtils;
import org.gorpipe.exceptions.GorParsingException;
import org.gorpipe.exceptions.GorSecurityException;
import org.gorpipe.exceptions.GorSystemException;
import org.gorpipe.gor.driver.GorDriverConfig;
import org.gorpipe.gor.driver.linkfile.LinkFile;
import org.gorpipe.gor.driver.linkfile.LinkFileMeta;
import org.gorpipe.gor.driver.linkfile.LinkFileV1;
import org.gorpipe.gor.driver.meta.DataType;
import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource;
import org.gorpipe.gor.model.BaseMeta;
import org.gorpipe.gor.util.DataUtil;
import org.junit.*;
import org.junit.contrib.java.lang.system.EnvironmentVariables;
import org.junit.contrib.java.lang.system.RestoreSystemProperties;
import org.junit.rules.TemporaryFolder;

import java.io.*;
Expand All @@ -57,6 +60,13 @@ public class UTestGorWrite {
public TemporaryFolder tempRoot = new TemporaryFolder();
private Path tempRootPath;

@Rule
public final RestoreSystemProperties restoreSystemProperties = new RestoreSystemProperties();

@Rule
public final EnvironmentVariables environmentVariables
= new EnvironmentVariables();

private String defaultV1LinkFileHeader;
private String testdbsnpTestLine1 = """
Chrom\tPOS\treference\tallele\tdifferentrsIDs
Expand Down Expand Up @@ -222,6 +232,23 @@ public void testWriteLinkFileAndMetaWithInfo() throws IOException {
Assert.assertEquals("Some file info", linkFile.getLatestEntry().info());
}

@Test
public void testWriteLinkFileWithInferFileName() throws IOException {

environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, workDirPath.resolve("managed_data").toString());
TestUtils.runGorPipe("gorrow chr1,1,100 | write -link ltest.gor", "-gorroot", workDirPath.toString());

var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString()));

Assert.assertEquals(1, linkFile.getEntriesCount());
Assert.assertTrue(linkFile.getLatestEntry().url().startsWith(workDirPath.resolve("managed_data/" + workDirPath.getFileName() + "/ltest").toString()));
Assert.assertTrue(linkFile.getLatestEntry().url().endsWith(".gor"));
Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url())));
Assert.assertEquals("#chrom\tbpStart\tbpStop\nchr1\t1\t100\n",
Files.readString(Path.of(linkFile.getLatestEntry().url())));

}

@Test
public void testTxtWriteServer() throws IOException {
Path p = Paths.get("../tests/data/nor/simple.nor");
Expand All @@ -239,7 +266,6 @@ public void testTxtFolderWriteServer() throws IOException {
TestUtils.runGorPipeCount(args, true);
}

@Ignore
@Test
public void testFolderWriteServer() throws IOException {
Path p = Paths.get("../tests/data/nor/simple.nor");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,10 @@ public interface GorDriverConfig extends Config {
@Key("org.gorpipe.gor.driver.plink.executable")
@DefaultValue("plink2")
String plinkExecutable();

String GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL = "GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL";
@Documentation("Root location of managed data files for link files.")
@Key(GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL)
@DefaultValue("")
String managedLinkDataFilesURL();
}
Loading
Loading