From 46d83eefec24ee96275720ab74f157324f27da80 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Sun, 1 Mar 2026 13:13:24 +0200 Subject: [PATCH 01/28] oap-logstream: row-binary --- .../main/java/oap/logstream/disk/AbstractWriter.java | 2 ++ .../java/oap/logstream/disk/DiskLoggerBackend.java | 10 +--------- pom.xml | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java index 5d052f1e5..6e2348f8a 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java @@ -88,6 +88,8 @@ static String currentPattern( LogFormat logFormat, String filePattern, LogId log String pattern = logId.filePrefixPattern + suffix; if( pattern.startsWith( "/" ) ) pattern = pattern.substring( 1 ); + pattern = pattern + ".rb.gz"; + LogIdTemplate logIdTemplate = new LogIdTemplate( logId ); logIdTemplate diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java index 4d4341e7e..c771db5d1 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java @@ -116,15 +116,7 @@ public AbstractWriter load( LogId id ) { log.trace( "new writer id '{}' filePattern '{}'", id, fp ); - LogFormat logFormat = LogFormat.parse( fp.path ); - return switch( logFormat ) { - case PARQUET -> new ParquetLogWriter( logDirectory, fp.path, id, - writerConfiguration.parquet, bufferSize, timestamp, maxVersions ); - case TSV_GZ, TSV_ZSTD -> new TsvWriter( logDirectory, fp.path, id, - writerConfiguration.tsv, bufferSize, timestamp, maxVersions ); - case ROW_BINARY_GZ -> new RowBinaryWriter( logDirectory, fp.path, id, - bufferSize, timestamp, maxVersions ); - }; + return new RowBinaryWriter( logDirectory, fp.path, id, bufferSize, timestamp, maxVersions ); } } ); Metrics.gauge( "logstream_logging_disk_writers", List.of( Tag.of( "path", logDirectory.toString() ) ), diff --git a/pom.xml b/pom.xml index 3ac94d7c6..0282b2823 100644 --- a/pom.xml +++ b/pom.xml @@ -57,7 +57,7 @@ - 25.4.11 + 25.5.0 25.0.1 25.0.0 From 745d67b8dc9186624f2533116e1e1d464a8744a2 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Sun, 1 Mar 2026 15:44:51 +0200 Subject: [PATCH 02/28] oap-logstream: row-binary --- .../logstream/formats/RowBinaryAssertion.java | 72 +++ .../logstream/disk/DiskLoggerBackendTest.java | 55 +- .../oap/logstream/disk/ParquetWriterTest.java | 178 ------- .../oap/logstream/disk/TsvWriterTest.java | 468 ------------------ .../rowbinary/RowBinaryInputStream.java | 2 +- .../src/main/java/oap/testng/Asserts.java | 6 + 6 files changed, 104 insertions(+), 677 deletions(-) create mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/ParquetWriterTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/TsvWriterTest.java diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java new file mode 100644 index 000000000..2f7b7a214 --- /dev/null +++ b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java @@ -0,0 +1,72 @@ +package oap.logstream.formats; + +import lombok.SneakyThrows; +import lombok.ToString; +import oap.io.Files; +import oap.io.IoStreams; +import oap.logstream.formats.rowbinary.RowBinaryInputStream; +import org.apache.commons.lang3.ArrayUtils; +import org.assertj.core.api.AbstractAssert; +import org.assertj.core.api.Assertions; +import org.assertj.core.api.ListAssert; + +import javax.annotation.Nullable; +import java.io.ByteArrayInputStream; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import static oap.io.content.ContentReader.ofBytes; +import static org.assertj.core.api.Assertions.assertThat; + +public class RowBinaryAssertion extends AbstractAssert { + protected RowBinaryAssertion( RowBinaryData rowBinaryData ) { + super( rowBinaryData, RowBinaryAssertion.class ); + } + + public static RowBinaryAssertion assertRowBinaryFile( Path file, byte[][] types, IoStreams.Encoding encoding ) { + return assertRowBinaryFile( file, null, types, encoding ); + } + + public static RowBinaryAssertion assertRowBinaryFile( Path file, @Nullable String[] headers, byte[][] types, IoStreams.Encoding encoding ) { + Assertions.assertThatPath( file ).exists(); + + return new RowBinaryAssertion( new RowBinaryData( headers, types, Files.read( file, encoding, ofBytes() ) ) ); + } + + @SneakyThrows + public ListAssert> content( String... header ) { + List> ret = new ArrayList<>(); + + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream( this.actual.data ); + RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( byteArrayInputStream, this.actual.headers == null, this.actual.headers, this.actual.types ); + + List objects; + while( ( objects = rowBinaryInputStream.readRow() ) != null ) { + ArrayList filtered = new ArrayList<>(); + for( int i = 0; i < rowBinaryInputStream.headers.length; i++ ) { + if( header.length == 0 || ArrayUtils.contains( rowBinaryInputStream.headers, header[i] ) ) { + filtered.add( objects.get( i ) ); + } + } + + + ret.add( filtered ); + } + + return assertThat( ret ); + } + + @ToString + public static class RowBinaryData { + public final String[] headers; + public final byte[][] types; + public final byte[] data; + + public RowBinaryData( String[] headers, byte[][] types, byte[] data ) { + this.headers = headers; + this.types = types; + this.data = data; + } + } +} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java index 3cb7cd40d..2642f070e 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java @@ -24,7 +24,7 @@ package oap.logstream.disk; -import oap.io.IoStreams; +import oap.compression.Compression; import oap.logstream.Logger; import oap.logstream.Timestamp; import oap.logstream.formats.rowbinary.RowBinaryUtils; @@ -38,13 +38,11 @@ import java.util.List; import java.util.Map; +import static oap.io.IoStreams.Encoding.GZIP; import static oap.logstream.Timestamp.BPH_12; import static oap.logstream.disk.DiskLoggerBackend.DEFAULT_BUFFER; -import static oap.logstream.formats.parquet.ParquetAssertion.assertParquet; -import static oap.logstream.formats.parquet.ParquetAssertion.row; +import static oap.logstream.formats.RowBinaryAssertion.assertRowBinaryFile; import static oap.net.Inet.HOSTNAME; -import static oap.testng.Asserts.assertFile; -import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -73,7 +71,7 @@ public void testPatternByType() throws IOException { Dates.setTimeFixed( 2015, 10, 10, 1, 16 ); String[] headers = new String[] { "REQUEST_ID", "REQUEST_ID2" }; byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - byte[] lines = RowBinaryUtils.lines( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ); + byte[] lines = Compression.gzip( RowBinaryUtils.lines( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ) ); try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), Timestamp.BPH_12, 4000 ) ) { backend.filePattern = "${LOG_TYPE}_${LOG_VERSION}_${INTERVAL}.tsv.gz"; @@ -88,25 +86,27 @@ public void testPatternByType() throws IOException { backend.refresh( true ); - assertFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e-1_03.tsv.gz" ) ) - .hasContent( """ - REQUEST_ID\tREQUEST_ID2 - 12345678\trrrr5678 - 1\t2 - """, IoStreams.Encoding.GZIP ); - assertParquet( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e-1_16.parquet" ) ) - .containOnlyHeaders( "REQUEST_ID", "REQUEST_ID2" ) - .contains( row( "12345678", "rrrr5678" ), - row( "1", "2" ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e-1_03.tsv.gz.rb.gz" ), types, GZIP ) + .content() + .isEqualTo( List.of( + List.of( "12345678", "rrrr5678" ), + List.of( "1", "2" ) + ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e-1_16.parquet.rb.gz" ), types, GZIP ) + .content() + .isEqualTo( List.of( + List.of( "12345678", "rrrr5678" ), + List.of( "1", "2" ) + ) ); } } @Test - public void testRefreshForceSync() throws IOException { + public void testWriteSync() throws IOException { Dates.setTimeFixed( 2015, 10, 10, 1 ); String[] headers = new String[] { "REQUEST_ID", "REQUEST_ID2" }; byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - byte[] lines = RowBinaryUtils.lines( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ); + byte[] lines = Compression.gzip( RowBinaryUtils.lines( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ) ); //init new logger try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER ) ) { backend.start(); @@ -114,18 +114,13 @@ public void testRefreshForceSync() throws IOException { Logger logger = new Logger( backend ); //log a line to lfn1 logger.log( "lfn1", Map.of(), "log", headers, types, lines ); - //check file size - assertThat( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasSize( 10 ); - //call refresh() with forceSync flag = true -> trigger flush() - backend.refresh( true ); - //check file size once more after flush() -> now the size is larger - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( """ - REQUEST_ID\tREQUEST_ID2 - 12345678\trrrr5678 - 1\t2 - """, IoStreams.Encoding.GZIP ); + + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), types, GZIP ) + .content() + .isEqualTo( List.of( + List.of( "12345678", "rrrr5678" ), + List.of( "1", "2" ) + ) ); } } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/ParquetWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/ParquetWriterTest.java deleted file mode 100644 index 701984573..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/ParquetWriterTest.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.disk; - -import oap.logstream.LogId; -import oap.logstream.formats.rowbinary.RowBinaryUtils; -import oap.template.BinaryUtils; -import oap.template.Types; -import oap.testng.Fixtures; -import oap.testng.TestDirectoryFixture; -import oap.util.Dates; -import org.joda.time.DateTime; -import org.testng.annotations.Test; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; - -import static oap.logstream.LogStreamProtocol.CURRENT_PROTOCOL_VERSION; -import static oap.logstream.LogStreamProtocol.ProtocolVersion.BINARY_V2; -import static oap.logstream.Timestamp.BPH_12; -import static oap.logstream.formats.parquet.ParquetAssertion.assertParquet; -import static oap.logstream.formats.parquet.ParquetAssertion.row; -import static org.joda.time.DateTimeZone.UTC; - -public class ParquetWriterTest extends Fixtures { - private static final String FILE_PATTERN = "${p}-file-${INTERVAL}-${LOG_VERSION}.parquet"; - private final TestDirectoryFixture testDirectoryFixture; - - public ParquetWriterTest() { - testDirectoryFixture = fixture( new TestDirectoryFixture() ); - } - - @Test - public void testWrite() throws IOException { - Dates.setTimeFixed( 2022, 3, 8, 21, 11 ); - - byte[] content1 = RowBinaryUtils.lines( List.of( - List.of( "s11", 21L, List.of( "1" ), new DateTime( 2022, 3, 11, 15, 16, 12, UTC ) ), - List.of( "s12", 22L, List.of( "1", "2" ), new DateTime( 2022, 3, 11, 15, 16, 13, UTC ) ) - ) ); - - byte[] content2 = RowBinaryUtils.lines( List.of( - List.of( "s111", 121L, List.of( "rr" ), new DateTime( 2022, 3, 11, 15, 16, 14, UTC ) ), - List.of( "s112", 122L, List.of( "zz", "66" ), new DateTime( 2022, 3, 11, 15, 16, 15, UTC ) ) - ) ); - - - String[] headers = new String[] { "COL1", "COL2", "COL3", "DATETIME" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, - new byte[] { Types.LONG.id }, - new byte[] { Types.LIST.id, Types.STRING.id }, - new byte[] { Types.DATETIME.id } - }; - LogId logId = new LogId( "", "log", "log", - Map.of( "p", "1" ), headers, types ); - Path logs = testDirectoryFixture.testPath( "logs" ); - try( ParquetLogWriter writer = new ParquetLogWriter( logs, FILE_PATTERN, logId, new WriterConfiguration.ParquetConfiguration(), 1024, BPH_12, 20 ) ) { - writer.write( CURRENT_PROTOCOL_VERSION, content1 ); - writer.write( CURRENT_PROTOCOL_VERSION, content2 ); - } - - assertParquet( logs.resolve( "1-file-02-4cd64dae-1.parquet" ) ) - .containOnlyHeaders( "COL1", "COL2", "COL3", "DATETIME" ) - .containsExactly( - row( "s11", 21L, List.of( "1" ), s( 2022, 3, 11, 15, 16, 12 ) ), - row( "s12", 22L, List.of( "1", "2" ), s( 2022, 3, 11, 15, 16, 13 ) ), - row( "s111", 121L, List.of( "rr" ), s( 2022, 3, 11, 15, 16, 14 ) ), - row( "s112", 122L, List.of( "zz", "66" ), s( 2022, 3, 11, 15, 16, 15 ) ) - ); - - assertParquet( logs.resolve( "1-file-02-4cd64dae-1.parquet" ), "COL3", "COL2" ) - .containOnlyHeaders( "COL3", "COL2" ) - .contains( row( List.of( "1" ), 21L ) ); - } - - @Test - public void testWriteV2() throws IOException { - Dates.setTimeFixed( 2022, 3, 8, 21, 11 ); - - byte[] content1 = BinaryUtils.lines( List.of( - List.of( "s11", 21L, List.of( "1" ), new DateTime( 2022, 3, 11, 15, 16, 12, UTC ) ), - List.of( "s12", 22L, List.of( "1", "2" ), new DateTime( 2022, 3, 11, 15, 16, 13, UTC ) ) - ) ); - - byte[] content2 = BinaryUtils.lines( List.of( - List.of( "s111", 121L, List.of( "rr" ), new DateTime( 2022, 3, 11, 15, 16, 14, UTC ) ), - List.of( "s112", 122L, List.of( "zz", "66" ), new DateTime( 2022, 3, 11, 15, 16, 15, UTC ) ) - ) ); - - - String[] headers = new String[] { "COL1", "COL2", "COL3", "DATETIME" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, - new byte[] { Types.LONG.id }, - new byte[] { Types.LIST.id, Types.STRING.id }, - new byte[] { Types.DATETIME.id } - }; - LogId logId = new LogId( "", "log", "log", - Map.of( "p", "1" ), headers, types ); - Path logs = testDirectoryFixture.testPath( "logs" ); - try( ParquetLogWriter writer = new ParquetLogWriter( logs, FILE_PATTERN, logId, new WriterConfiguration.ParquetConfiguration(), 1024, BPH_12, 20 ) ) { - writer.write( BINARY_V2, content1 ); - writer.write( BINARY_V2, content2 ); - } - - assertParquet( logs.resolve( "1-file-02-4cd64dae-1.parquet" ) ) - .containOnlyHeaders( "COL1", "COL2", "COL3", "DATETIME" ) - .containsExactly( - row( "s11", 21L, List.of( "1" ), s( 2022, 3, 11, 15, 16, 12 ) ), - row( "s12", 22L, List.of( "1", "2" ), s( 2022, 3, 11, 15, 16, 13 ) ), - row( "s111", 121L, List.of( "rr" ), s( 2022, 3, 11, 15, 16, 14 ) ), - row( "s112", 122L, List.of( "zz", "66" ), s( 2022, 3, 11, 15, 16, 15 ) ) - ); - - assertParquet( logs.resolve( "1-file-02-4cd64dae-1.parquet" ), "COL3", "COL2" ) - .containOnlyHeaders( "COL3", "COL2" ) - .contains( row( List.of( "1" ), 21L ) ); - } - - @Test - public void testWriteExcludeFields() throws IOException { - Dates.setTimeFixed( 2022, 3, 8, 21, 11 ); - - byte[] content1 = RowBinaryUtils.lines( List.of( - List.of( "1", 21L, List.of( "1" ), new DateTime( 2022, 3, 11, 15, 16, 12, UTC ) ), - List.of( "1", 22L, List.of( "1", "2" ), new DateTime( 2022, 3, 11, 15, 16, 13, UTC ) ) - ) ); - - String[] headers = new String[] { "COL1", "COL2", "COL3", "DATETIME" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, - new byte[] { Types.LONG.id }, - new byte[] { Types.LIST.id, Types.STRING.id }, - new byte[] { Types.DATETIME.id } - }; - LogId logId = new LogId( "", "log", "log", - Map.of( "p", "1", "COL1_property_name", "1" ), headers, types ); - Path logs = testDirectoryFixture.testPath( "logs" ); - WriterConfiguration.ParquetConfiguration parquetConfiguration = new WriterConfiguration.ParquetConfiguration(); - parquetConfiguration.excludeFieldsIfPropertiesExists.put( "COL1", "COL1_property_name" ); - try( ParquetLogWriter writer = new ParquetLogWriter( logs, FILE_PATTERN, logId, parquetConfiguration, 1024, BPH_12, 20 ) ) { - writer.write( CURRENT_PROTOCOL_VERSION, content1 ); - } - - assertParquet( logs.resolve( "1-file-02-4cd64dae-1.parquet" ) ) - .containOnlyHeaders( "COL2", "COL3", "DATETIME" ) - .containsExactly( - row( 21L, List.of( "1" ), s( 2022, 3, 11, 15, 16, 12 ) ), - row( 22L, List.of( "1", "2" ), s( 2022, 3, 11, 15, 16, 13 ) ) - ); - } - - private long s( int year, int monthOfYear, int dayOfMonth, int hourOfDay, int minuteOfHour, int secondOfMinute ) { - return new DateTime( year, monthOfYear, dayOfMonth, hourOfDay, minuteOfHour, secondOfMinute, UTC ).getMillis() / 1000; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/TsvWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/TsvWriterTest.java deleted file mode 100644 index e650f31c7..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/TsvWriterTest.java +++ /dev/null @@ -1,468 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.disk; - -import oap.io.Files; -import oap.io.content.ContentWriter; -import oap.logstream.LogId; -import oap.logstream.formats.rowbinary.RowBinaryUtils; -import oap.template.BinaryUtils; -import oap.template.Types; -import oap.testng.Fixtures; -import oap.testng.TestDirectoryFixture; -import oap.util.Dates; -import oap.util.LinkedHashMaps; -import org.testng.annotations.Test; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; - -import static oap.io.IoStreams.Encoding.GZIP; -import static oap.io.IoStreams.Encoding.PLAIN; -import static oap.logstream.LogStreamProtocol.CURRENT_PROTOCOL_VERSION; -import static oap.logstream.LogStreamProtocol.ProtocolVersion.BINARY_V2; -import static oap.logstream.LogStreamProtocol.ProtocolVersion.TSV_V1; -import static oap.logstream.Timestamp.BPH_12; -import static oap.testng.Asserts.assertFile; - -public class TsvWriterTest extends Fixtures { - private static final String FILE_PATTERN = "${p}-file-${INTERVAL}-${LOG_VERSION}-#{if}($ORGANIZATION)${ORGANIZATION}#{else}UNKNOWN#{end}.log.gz"; - private final TestDirectoryFixture testDirectoryFixture; - - public TsvWriterTest() { - testDirectoryFixture = fixture( new TestDirectoryFixture() ); - } - - @Test - public void testEscape() throws IOException { - String[] headers = new String[] { "RAW" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id } }; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - String content = "1\n2\n\r3\t4"; - byte[] bytes = RowBinaryUtils.line( List.of( content ) ); - Path logs = testDirectoryFixture.testPath( "logs" ); - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", LinkedHashMaps.of( "p", "1", "ORGANIZATION", "" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ) ) { - - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - } - - assertFile( logs.resolve( "1-file-00-198163-1-UNKNOWN.log.gz" ) ) - .hasContent( "RAW\n1\\n2\\n\\r3\\t4\n", GZIP ); - } - - @Test - public void testEscapeV2() throws IOException { - String[] headers = new String[] { "RAW" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id } }; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - String content = "1\n2\n\r3\t4"; - byte[] bytes = BinaryUtils.line( List.of( content ) ); - Path logs = testDirectoryFixture.testPath( "logs" ); - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", LinkedHashMaps.of( "p", "1", "ORGANIZATION", "" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ) ) { - - writer.write( BINARY_V2, bytes ); - } - - assertFile( logs.resolve( "1-file-00-198163-1-UNKNOWN.log.gz" ) ) - .hasContent( "RAW\n1\\n2\\n\\r3\\t4\n", GZIP ); - } - - @Test - public void metadataChanged() throws IOException { - String[] headers = new String[] { "REQUEST_ID" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id } }; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - String content = "1234567890"; - byte[] bytes = RowBinaryUtils.line( List.of( content ) ); - Path logs = testDirectoryFixture.testPath( "logs" ); - - TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", LinkedHashMaps.of( "p", "1" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ); - - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - - writer.close(); - - writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", LinkedHashMaps.of( "p", "1", "p2", "2" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ); - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - - writer.close(); - - assertFile( logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - assertFile( logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - 11 - p: "1" - VERSION: "80723ad6-1" - """ ); - - assertFile( logs.resolve( "1-file-00-80723ad6-2-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - assertFile( logs.resolve( "1-file-00-80723ad6-2-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - 11 - p: "1" - p2: "2" - VERSION: "80723ad6-2" - """ ); - - } - - @Test - public void write() throws IOException { - String[] headers = new String[] { "REQUEST_ID" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id } }; - String[] newHeaders = new String[] { "REQUEST_ID", "H2" }; - byte[][] newTypes = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - String content = "1234567890"; - byte[] bytes = RowBinaryUtils.line( List.of( content ) ); - List newContent = List.of( "1234567890", "" ); - byte[] newBytes = RowBinaryUtils.line( newContent ); - Path logs = testDirectoryFixture.testPath( "logs" ); - Files.write( - logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz" ), - PLAIN, "corrupted file", ContentWriter.ofString() ); - Files.write( - logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz.metadata.yaml" ), - PLAIN, """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: "REQUEST_ID" - p: "1" - VERSION: "80723ad6-1" - """, ContentWriter.ofString() ); - - TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", Map.of( "p", "1" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ); - - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 5 ); - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 10 ); - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - - writer.close(); - - writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", Map.of( "p", "1" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 14 ); - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 59 ); - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - writer.close(); - - writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", Map.of( "p", "1" ), newHeaders, newTypes ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 14 ); - writer.write( CURRENT_PROTOCOL_VERSION, newBytes ); - writer.close(); - - - assertFile( logs.resolve( "1-file-01-80723ad6-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - assertFile( logs.resolve( "1-file-01-80723ad6-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - 11 - p: "1" - VERSION: "80723ad6-1" - """ ); - - assertFile( logs.resolve( "1-file-02-80723ad6-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - assertFile( logs.resolve( "1-file-02-80723ad6-2-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - assertFile( logs.resolve( "1-file-02-80723ad6-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - 11 - p: "1" - VERSION: "80723ad6-1" - """ ); - - assertFile( logs.resolve( "1-file-11-80723ad6-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - - assertFile( logs.resolve( "1-file-11-80723ad6-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content + "\n", GZIP ); - - assertFile( logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz" ) ) - .hasContent( "corrupted file" ); - assertFile( logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: "REQUEST_ID" - p: "1" - VERSION: "80723ad6-1" - """ ); - - assertFile( logs.resolve( "1-file-02-ab96b20e-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\tH2\n" + String.join( "\t", newContent ) + "\n", GZIP ); - } - - @Test - public void testVersions() throws IOException { - String[] headers = new String[] { "REQUEST_ID", "H2" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - - Path logs = testDirectoryFixture.testPath( "logs" ); - String metadata = """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - 11 - p: "1" - VERSION: "80723ad6-1" - """; - Files.write( - logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz" ), - PLAIN, "1\t2", ContentWriter.ofString() ); - Files.write( - logs.resolve( "1-file-00-80723ad6-1-UNKNOWN.log.gz.metadata.yaml" ), - PLAIN, metadata, ContentWriter.ofString() ); - - Files.write( - logs.resolve( "1-file-00-80723ad6-2-UNKNOWN.log.gz" ), - PLAIN, "11\t22", ContentWriter.ofString() ); - Files.write( - logs.resolve( "1-file-00-80723ad6-2-UNKNOWN.log.gz.metadata.yaml" ), - PLAIN, metadata, ContentWriter.ofString() ); - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", Map.of( "p", "1" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ) ) { - writer.write( CURRENT_PROTOCOL_VERSION, RowBinaryUtils.line( List.of( "111", "222" ) ) ); - } - - assertFile( logs.resolve( "1-file-00-ab96b20e-1-UNKNOWN.log.gz" ) ) - .hasContent( """ - REQUEST_ID\tH2 - 111\t222 - """, GZIP ); - - assertFile( logs.resolve( "1-file-00-ab96b20e-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - - "H2" - types: - - - 11 - - - 11 - p: "1" - VERSION: "ab96b20e-1" - """ ); - } - - @Test - public void testProtocolVersion1() { - String headers = "REQUEST_ID"; - String newHeaders = "REQUEST_ID\tH2"; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - - String content = "1234567890"; - byte[] bytes = content.getBytes(); - Path logs = testDirectoryFixture.testPath( "logs" ); - Files.write( - logs.resolve( "1-file-00-9042dc83-1-UNKNOWN.log.gz" ), - PLAIN, "corrupted file", ContentWriter.ofString() ); - Files.write( - logs.resolve( "1-file-00-9042dc83-1-UNKNOWN.log.gz.metadata.yaml" ), - PLAIN, """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: "REQUEST_ID" - p: "1" - """, ContentWriter.ofString() ); - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", Map.of( "p", "1" ), new String[] { headers }, new byte[][] { { -1 } } ), new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 10 ) ) { - writer.write( TSV_V1, bytes ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 5 ); - writer.write( TSV_V1, bytes ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 10 ); - writer.write( TSV_V1, bytes ); - } - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, new LogId( "", "type", "log", Map.of( "p", "1" ), new String[] { headers }, new byte[][] { { -1 } } ), new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 10 ) ) { - Dates.setTimeFixed( 2015, 10, 10, 1, 14 ); - writer.write( TSV_V1, bytes ); - - Dates.setTimeFixed( 2015, 10, 10, 1, 59 ); - writer.write( TSV_V1, bytes ); - } - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, new LogId( "", "type", "log", Map.of( "p", "1" ), new String[] { newHeaders }, new byte[][] { { -1 } } ), new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 10 ) ) { - Dates.setTimeFixed( 2015, 10, 10, 1, 14 ); - writer.write( TSV_V1, bytes ); - } - - assertFile( logs.resolve( "1-file-01-9042dc83-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content, GZIP ); - assertFile( logs.resolve( "1-file-01-9042dc83-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - -1 - p: "1" - VERSION: "9042dc83-1" - """ ); - - assertFile( logs.resolve( "1-file-02-9042dc83-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content, GZIP ); - assertFile( logs.resolve( "1-file-02-9042dc83-2-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content, GZIP ); - assertFile( logs.resolve( "1-file-02-9042dc83-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: - - "REQUEST_ID" - types: - - - -1 - p: "1" - VERSION: "9042dc83-1" - """ ); - - assertFile( logs.resolve( "1-file-11-9042dc83-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content, GZIP ); - - assertFile( logs.resolve( "1-file-11-9042dc83-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\n" + content, GZIP ); - - assertFile( logs.resolve( "1-file-00-9042dc83-1-UNKNOWN.log.gz" ) ) - .hasContent( "corrupted file" ); - assertFile( logs.resolve( "1-file-00-9042dc83-1-UNKNOWN.log.gz.metadata.yaml" ) ) - .hasContent( """ - --- - filePrefixPattern: "" - type: "type" - clientHostname: "log" - headers: "REQUEST_ID" - p: "1" - """ ); - - assertFile( logs.resolve( "1-file-02-e56ba426-1-UNKNOWN.log.gz" ) ) - .hasContent( "REQUEST_ID\tH2\n" + content, GZIP ); - } - - @Test - public void testEmpty() throws IOException { - String[] headers = new String[] { "T1", "T2" }; - byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - - Dates.setTimeFixed( 2015, 10, 10, 1, 0 ); - - byte[] bytes = RowBinaryUtils.lines( List.of( List.of( "", "a" ), List.of( "", "a" ) ) ); - Path logs = testDirectoryFixture.testPath( "logs" ); - - try( TsvWriter writer = new TsvWriter( logs, FILE_PATTERN, - new LogId( "", "type", "log", LinkedHashMaps.of( "p", "1" ), headers, types ), - new WriterConfiguration.TsvConfiguration(), 10, BPH_12, 20 ) ) { - - writer.write( CURRENT_PROTOCOL_VERSION, bytes ); - } - - assertFile( logs.resolve( "1-file-00-50137474-1-UNKNOWN.log.gz" ) ) - .hasContent( "T1\tT2\n\ta\n\ta\n", GZIP ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index ca0adf90b..cea6c740a 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -31,7 +31,7 @@ public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) this( in, false, headers, types ); } - protected RowBinaryInputStream( InputStream in, boolean readHeaders, String[] headers, byte[][] types ) throws IOException { + public RowBinaryInputStream( InputStream in, boolean readHeaders, String[] headers, byte[][] types ) throws IOException { this.in = in; if( readHeaders ) { diff --git a/oap-stdlib-test/src/main/java/oap/testng/Asserts.java b/oap-stdlib-test/src/main/java/oap/testng/Asserts.java index 3a3bc0404..3093fb8ba 100644 --- a/oap-stdlib-test/src/main/java/oap/testng/Asserts.java +++ b/oap-stdlib-test/src/main/java/oap/testng/Asserts.java @@ -36,6 +36,7 @@ import oap.util.Strings; import oap.util.function.Try; import org.apache.commons.lang3.StringUtils; +import org.assertj.core.api.AbstractByteArrayAssert; import org.assertj.core.api.AbstractCharSequenceAssert; import org.assertj.core.api.AbstractFileAssert; import org.testng.Assert; @@ -248,6 +249,11 @@ public FileAssertion hasSize( long size ) { return this; } + public AbstractByteArrayAssert binaryContent( IoStreams.Encoding encoding ) { + exists(); + return assertThat( Files.read( this.actual.toPath(), encoding, ofBytes() ) ); + } + @Override public FileAssertion hasContent( String expected ) { return hasContent( expected, IoStreams.Encoding.PLAIN ); From 6976e2f70e2b5b34abd28bea70cc2ef71e1b7b8f Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Sun, 1 Mar 2026 17:22:03 +0200 Subject: [PATCH 03/28] oap-logstream: row-binary --- .../logstream/formats/RowBinaryAssertion.java | 11 +- .../logstream/disk/DiskLoggerBackendTest.java | 6 +- .../logstream/disk/RowBinaryWriterTest.java | 4 +- .../formats/rowbinary/RowBinaryTest.java | 18 +- .../oap/logstream/disk/RowBinaryWriter.java | 2 +- .../rowbinary/RowBinaryInputStream.java | 159 +++++++++++------- .../rowbinary/RowBinaryOutputStream.java | 50 +++++- .../formats/rowbinary/RowBinaryUtils.java | 16 +- 8 files changed, 180 insertions(+), 86 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java index 2f7b7a214..fa3772472 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java @@ -10,7 +10,6 @@ import org.assertj.core.api.Assertions; import org.assertj.core.api.ListAssert; -import javax.annotation.Nullable; import java.io.ByteArrayInputStream; import java.nio.file.Path; import java.util.ArrayList; @@ -24,14 +23,10 @@ protected RowBinaryAssertion( RowBinaryData rowBinaryData ) { super( rowBinaryData, RowBinaryAssertion.class ); } - public static RowBinaryAssertion assertRowBinaryFile( Path file, byte[][] types, IoStreams.Encoding encoding ) { - return assertRowBinaryFile( file, null, types, encoding ); - } - - public static RowBinaryAssertion assertRowBinaryFile( Path file, @Nullable String[] headers, byte[][] types, IoStreams.Encoding encoding ) { + public static RowBinaryAssertion assertRowBinaryFile( Path file, IoStreams.Encoding encoding ) { Assertions.assertThatPath( file ).exists(); - return new RowBinaryAssertion( new RowBinaryData( headers, types, Files.read( file, encoding, ofBytes() ) ) ); + return new RowBinaryAssertion( new RowBinaryData( null, null, Files.read( file, encoding, ofBytes() ) ) ); } @SneakyThrows @@ -39,7 +34,7 @@ public ListAssert> content( String... header ) { List> ret = new ArrayList<>(); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream( this.actual.data ); - RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( byteArrayInputStream, this.actual.headers == null, this.actual.headers, this.actual.types ); + RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( byteArrayInputStream, this.actual.headers, this.actual.types ); List objects; while( ( objects = rowBinaryInputStream.readRow() ) != null ) { diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java index 2642f070e..dcd2db468 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java @@ -86,13 +86,13 @@ public void testPatternByType() throws IOException { backend.refresh( true ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e-1_03.tsv.gz.rb.gz" ), types, GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e-1_03.tsv.gz.rb.gz" ), GZIP ) .content() .isEqualTo( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e-1_16.parquet.rb.gz" ), types, GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e-1_16.parquet.rb.gz" ), GZIP ) .content() .isEqualTo( List.of( List.of( "12345678", "rrrr5678" ), @@ -115,7 +115,7 @@ public void testWriteSync() throws IOException { //log a line to lfn1 logger.log( "lfn1", Map.of(), "log", headers, types, lines ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), types, GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .content() .isEqualTo( List.of( List.of( "12345678", "rrrr5678" ), diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java index bc240d1d0..3700c212d 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java @@ -83,11 +83,11 @@ public void testWrite() throws IOException { writer.write( CURRENT_PROTOCOL_VERSION, content2 ); } - Path path = logs.resolve( "1-file-02-4cd64dae-1.rb.gz" ); + Path path = logs.resolve( "1-file-02-4cd64dae-1.rb.gz.rb.gz" ); byte[] rb = Compression.ungzip( Files.readAllBytes( path ) ); - Pair>, List> read = RowBinaryUtils.read( rb, headers, types, true ); + Pair>, List> read = RowBinaryUtils.read( rb, 0, rb.length, null, null ); assertThat( read._2 ).isEqualTo( List.of( headers ) ); assertThat( read._1 ) .isEqualTo( List.of( diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/rowbinary/RowBinaryTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/rowbinary/RowBinaryTest.java index 25fa0853e..9b88056c0 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/rowbinary/RowBinaryTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/rowbinary/RowBinaryTest.java @@ -5,6 +5,7 @@ import com.clickhouse.data.ClickHouseFormat; import com.github.dockerjava.api.command.CreateContainerCmd; import lombok.extern.slf4j.Slf4j; +import oap.template.Types; import oap.testng.Fixtures; import org.apache.commons.lang3.RandomStringUtils; import org.joda.time.DateTime; @@ -32,7 +33,7 @@ public class RowBinaryTest extends Fixtures { public static final int HTTP_PORT = 8123; - public static final String CLICKHOUSE_VERSION = "25.9.3.48-alpine"; + public static final String CLICKHOUSE_VERSION = "25.8.7.3-alpine"; public static final String CLICKHOUSE_REPOASITORY = "clickhouse/clickhouse-server"; private GenericContainer container; @@ -91,7 +92,9 @@ public void testFormat() { .succeedsWithin( Duration.ofSeconds( 10 ) ); assertThat( client.insert( "TEST", out -> { - RowBinaryOutputStream rowBinaryOutputStream = new RowBinaryOutputStream( out, List.of( "b", "bt", "i", "l", "f", "d", "dt", "date", "ls" ) ); + RowBinaryOutputStream rowBinaryOutputStream = new RowBinaryOutputStream( out, List.of( "b", "bt", "i", "l", "f", "d", "dt", "date", "ls" ), new byte[][] { + { Types.BOOLEAN.id }, { Types.BYTE.id }, { Types.INTEGER.id }, { Types.LONG.id }, { Types.FLOAT.id }, { Types.DOUBLE.id }, { Types.DATETIME.id }, { Types.DATE.id }, { Types.LIST.id, Types.STRING.id } + } ); rowBinaryOutputStream.writeBoolean( true ); rowBinaryOutputStream.writeByte( ( byte ) 134 ); rowBinaryOutputStream.writeInt( 12345 ); @@ -111,14 +114,17 @@ public void testFormat() { rowBinaryOutputStream.writeDateTime( new DateTime( 2025, 7, 10, 19, 21, 39, 124, UTC ) ); rowBinaryOutputStream.writeDate( new Date( new DateTime( 2025, 7, 10, 19, 21, 39, 123, UTC ).getMillis() ) ); rowBinaryOutputStream.writeList( List.of() ); - }, ClickHouseFormat.RowBinaryWithNames, new InsertSettings() ) ) + }, ClickHouseFormat.RowBinaryWithNamesAndTypes, new InsertSettings() ) ) .succeedsWithin( Duration.ofSeconds( 10 ) ); - assertThat( client.query( "SELECT * FROM TEST FORMAT " + ClickHouseFormat.RowBinaryWithNames ) ) + assertThat( client.query( "SELECT * FROM TEST FORMAT " + ClickHouseFormat.RowBinaryWithNamesAndTypes ) ) .succeedsWithin( Duration.ofSeconds( 10 ) ) .satisfies( resp -> { - RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( resp.getInputStream(), true ); - assertThat( List.of( rowBinaryInputStream.headers ) ).isEqualTo( List.of( "b", "bt", "i", "l", "f", "d", "dt", "date", "ls" ) ); + RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( resp.getInputStream() ); + assertThat( rowBinaryInputStream.headers ).isEqualTo( new String[] { "b", "bt", "i", "l", "f", "d", "dt", "date", "ls" } ); + assertThat( rowBinaryInputStream.types ).isEqualTo( new byte[][] { + { Types.BOOLEAN.id }, { Types.BYTE.id }, { Types.INTEGER.id }, { Types.LONG.id }, { Types.FLOAT.id }, { Types.DOUBLE.id }, { Types.DATETIME.id }, { Types.DATE.id }, { Types.LIST.id, Types.STRING.id } + } ); assertThat( rowBinaryInputStream.readBoolean() ).isTrue(); assertThat( rowBinaryInputStream.readByte() ).isEqualTo( ( byte ) 134 ); diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java index d1924f177..a55883e39 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java @@ -40,7 +40,7 @@ public String write( LogStreamProtocol.ProtocolVersion protocolVersion, byte[] b FastByteArrayOutputStream outputStream = new FastByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream( outputStream ); - RowBinaryOutputStream rbOut = new RowBinaryOutputStream( gzip, List.of( logId.headers ) ); + RowBinaryOutputStream rbOut = new RowBinaryOutputStream( gzip, List.of( logId.headers ), logId.types ); rbOut.close(); ByteBuffer byteBuffer = ByteBuffer.wrap( outputStream.array, 0, outputStream.length ); diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index cea6c740a..d3278f97b 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -1,6 +1,7 @@ package oap.logstream.formats.rowbinary; import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.bytes.ByteArrayList; import oap.template.Types; import org.joda.time.DateTime; @@ -17,36 +18,120 @@ /** * https://clickhouse.com/docs/interfaces/formats/RowBinary */ +@SuppressWarnings( "checkstyle:OverloadMethodsDeclarationOrder" ) public class RowBinaryInputStream extends InputStream { - private final InputStream in; public final String[] headers; - private final byte[][] types; + public final InputStream in; + public final byte[][] types; protected byte[] readBuffer = new byte[8]; - public RowBinaryInputStream( InputStream in, boolean readHeaders ) throws IOException { - this( in, readHeaders, null, null ); + public RowBinaryInputStream( InputStream in ) throws IOException { + this( in, null, null ); } - public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) throws IOException { - this( in, false, headers, types ); + public RowBinaryInputStream( InputStream in, byte[][] types ) throws IOException { + this( in, null, types ); } - public RowBinaryInputStream( InputStream in, boolean readHeaders, String[] headers, byte[][] types ) throws IOException { + public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) throws IOException { this.in = in; - if( readHeaders ) { - int count = readVarInt(); - this.headers = new String[count]; - for( int i = 0; i < count; i++ ) { - this.headers[i] = readString(); + this.headers = headers == null ? readHeaders() : null; + this.types = types == null ? readTypes() : null; + } + + private static void convertType( String rbType, ByteArrayList type ) { + if( rbType.startsWith( "Array(" ) ) { + type.add( Types.LIST.id ); + convertType( rbType.substring( "Array(".length(), rbType.length() - 1 ), type ); + } else { + type.add( switch( rbType ) { + case "Bool" -> Types.BOOLEAN.id; + case "UInt8" -> Types.BYTE.id; + case "Int16" -> Types.SHORT.id; + case "Int32" -> Types.INTEGER.id; + case "Int64" -> Types.LONG.id; + case "Float32" -> Types.FLOAT.id; + case "Float64" -> Types.DOUBLE.id; + case "String" -> Types.STRING.id; + case "Date" -> Types.DATE.id; + case "DateTime" -> Types.DATETIME.id; + case null, default -> throw new IllegalArgumentException( "unknown type " + type ); + } ); + } + } + + protected static void readFully( InputStream in, byte[] b, int off, int len ) throws IOException { + int n = 0; + while( n < len ) { + int count = in.read( b, off + n, len - n ); + if( count < 0 ) + throw new EOFException(); + n += count; + } + } + + private byte[][] readTypes() throws IOException { + int count = headers.length; + byte[][] types = new byte[count][]; + + ByteArrayList type = new ByteArrayList(); + + for( int i = 0; i < count; i++ ) { + String rbType = readString(); + + convertType( rbType, type ); + + types[i] = type.toByteArray(); + + type.clear(); + } + + return types; + } + + protected int readVarInt() throws IOException { + int value = 0; + + for( int i = 0; i < 10; i++ ) { + byte b = readByteOrEof(); + value |= ( b & 0x7F ) << ( 7 * i ); + + if( ( b & 0x80 ) == 0 ) { + break; } - } else if( types != null ) { - this.headers = headers; + } + + return value; + } + + private byte readByteOrEof() throws IOException { + int b = in.read(); + if( b < 0 ) { + throw new EOFException( "End of stream reached before reading all data" ); + } + return ( byte ) b; + } + + public String readString() throws IOException { + int length = readVarInt(); + if( length == 0 ) { + return ""; } else { - throw new IllegalArgumentException( "unknown headers" ); + byte[] buf = new byte[length]; + readFully( buf, 0, length ); + return new String( buf, UTF_8 ); } + } - this.types = types; + private String[] readHeaders() throws IOException { + int count = readVarInt(); + String[] headers = new String[count]; + for( int i = 0; i < count; i++ ) { + headers[i] = readString(); + } + + return headers; } public byte readByte() throws IOException { @@ -108,17 +193,6 @@ public Date readDate() throws IOException { return new Date( readShort() * 24L * 60L * 60L * 1000L ); } - public String readString() throws IOException { - int length = readVarInt(); - if( length == 0 ) { - return ""; - } else { - byte[] buf = new byte[length]; - readFully( buf, 0, length ); - return new String( buf, UTF_8 ); - } - } - public List readList( Class clazz ) throws IOException { int size = readVarInt(); @@ -170,36 +244,7 @@ private T readObject( Class clazz ) throws IOException { } protected void readFully( byte[] b, int off, int len ) throws IOException { - int n = 0; - while( n < len ) { - int count = in.read( b, off + n, len - n ); - if( count < 0 ) - throw new EOFException(); - n += count; - } - } - - protected int readVarInt() throws IOException { - int value = 0; - - for( int i = 0; i < 10; i++ ) { - byte b = readByteOrEof(); - value |= ( b & 0x7F ) << ( 7 * i ); - - if( ( b & 0x80 ) == 0 ) { - break; - } - } - - return value; - } - - private byte readByteOrEof() throws IOException { - int b = in.read(); - if( b < 0 ) { - throw new EOFException( "End of stream reached before reading all data" ); - } - return ( byte ) b; + readFully( in, b, off, len ); } public List readRow() throws IOException { diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryOutputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryOutputStream.java index c6ff018ab..9a28b4cf3 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryOutputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryOutputStream.java @@ -1,9 +1,12 @@ package oap.logstream.formats.rowbinary; +import com.google.common.base.Preconditions; import oap.dictionary.Dictionary; +import oap.template.Types; import oap.util.Strings; import org.joda.time.DateTime; +import javax.annotation.Nullable; import java.io.IOException; import java.io.OutputStream; import java.util.Collection; @@ -25,15 +28,60 @@ public RowBinaryOutputStream( OutputStream out ) { this.out = out; } - public RowBinaryOutputStream( OutputStream out, List headers ) throws IOException { + public RowBinaryOutputStream( OutputStream out, @Nullable List headers, @Nullable byte[][] types ) throws IOException { this( out ); + Preconditions.checkArgument( headers != null || types == null ); + + if( headers != null ) { + writeHeaders( headers ); + if( types != null ) { + writeTypes( types ); + } + } + } + + private void writeHeaders( List headers ) throws IOException { writeVarInt( headers.size() ); for( String header : headers ) { writeString( header ); } } + private void writeTypes( byte[][] types ) throws IOException { + for( byte[] type : types ) { + writeString( getTypeAsString( type, 0 ) ); + } + } + + public static String getTypeAsString( byte[] type, int offset ) throws IOException { + StringBuilder ret = new StringBuilder(); + + Types dataType = Types.valueOf( type[offset] ); + ret.append( switch( dataType ) { + case BOOLEAN -> "Bool"; + case BYTE -> "UInt8"; + case SHORT -> "Int16"; + case INTEGER -> "Int32"; + case LONG -> "Int64"; + case FLOAT -> "Float32"; + case DOUBLE -> "Float64"; + case STRING -> "String"; + case DATE -> "Date"; + case DATETIME -> "DateTime"; + case LIST -> "Array"; + case null, default -> throw new IllegalArgumentException( "unknown type " + dataType ); + } ); + + for( int i = offset + 1; i < type.length; i++ ) { + Preconditions.checkArgument( dataType == Types.LIST ); + + ret.append( "(" ).append( getTypeAsString( type, offset + 1 ) ).append( ")" ); + } + + return ret.toString(); + } + @SuppressWarnings( "checkstyle:ParameterAssignment" ) private void writeVarInt( long value ) throws IOException { for( int i = 0; i < 9; i++ ) { diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java index 8ad1a5292..d647d5dc5 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java @@ -11,20 +11,20 @@ import static oap.util.Pair.__; public class RowBinaryUtils { - public static List> read( byte[] bytes, String[] headers, byte[][] types ) throws IOException { - return read( bytes, 0, bytes.length, headers, types ); + public static List> read( byte[] bytes ) throws IOException { + return read( bytes, null, null ); } - public static Pair>, List> read( byte[] bytes, String[] headers, byte[][] types, boolean readHeaders ) throws IOException { - return read( bytes, 0, bytes.length, headers, types, readHeaders ); + public static List> read( byte[] bytes, String[] headers, byte[][] types ) throws IOException { + return read( bytes, 0, bytes.length, headers, types )._1; } - public static List> read( byte[] bytes, int offset, int length, String[] headers, byte[][] types ) throws IOException { - return read( bytes, offset, length, headers, types, false )._1; + public static Pair>, List> read( byte[] bytes, int offset, int length ) throws IOException { + return read( bytes, offset, length, null, null ); } - public static Pair>, List> read( byte[] bytes, int offset, int length, String[] headers, byte[][] types, boolean readHeaders ) throws IOException { - RowBinaryInputStream binaryInputStream = new RowBinaryInputStream( new ByteArrayInputStream( bytes, offset, length ), readHeaders, headers, types ); + public static Pair>, List> read( byte[] bytes, int offset, int length, String[] headers, byte[][] types ) throws IOException { + RowBinaryInputStream binaryInputStream = new RowBinaryInputStream( new ByteArrayInputStream( bytes, offset, length ), headers, types ); ArrayList> res = new ArrayList<>(); From 19380ecb6f3c1c4e9aa71d561ee95b1351ea3de5 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Sun, 1 Mar 2026 17:40:19 +0200 Subject: [PATCH 04/28] oap-logstream: row-binary --- .../oap/logstream/formats/rowbinary/RowBinaryInputStream.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index d3278f97b..317c360b6 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -36,8 +36,8 @@ public RowBinaryInputStream( InputStream in, byte[][] types ) throws IOException public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) throws IOException { this.in = in; - this.headers = headers == null ? readHeaders() : null; - this.types = types == null ? readTypes() : null; + this.headers = headers == null ? readHeaders() : headers; + this.types = types == null ? readTypes() : types; } private static void convertType( String rbType, ByteArrayList type ) { From 61880340fd928118388d380ab0c2a649f76afff8 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Sun, 1 Mar 2026 19:19:56 +0200 Subject: [PATCH 05/28] oap-logstream: row-binary --- .../data/object/BinaryObjectLogger.java | 188 ---------- .../data/object/BinaryObjectLoggerTest.java | 109 ------ .../data/dynamic/DynamicMapLogger.java | 82 ----- .../logstream/data/map/AbstractMapLogger.java | 54 --- .../oap/logstream/data/map/MapLogModel.java | 74 ---- .../logstream/data/map/MapLogRenderer.java | 81 ----- .../data/dynamic/DynamicMapLoggerTest.java | 98 ----- .../logstream/data/map/MapLogModelTest.java | 33 -- .../oap/logstream/data/map/MapLoggerTest.java | 78 ---- .../oap-logstream/oap-logstream-test/pom.xml | 29 +- .../formats/parquet/ParquetAssertion.java | 292 --------------- .../java/oap/logstream/LoggerJsonTest.java | 10 +- .../test/java/oap/logstream/LoggerTest.java | 65 ++-- .../logstream/disk/AbstractWriterTest.java | 24 +- .../formats/parquet/ParquetAssertionTest.java | 49 --- .../formats/parquet/ParquetTest.java | 144 -------- .../formats/parquet/ParquetUtilsTest.java | 39 -- .../src/test/resources/datamodel.conf | 69 ---- .../DynamicMapLoggerTest/datamodel.conf | 0 .../dynamic/DynamicMapLoggerTest/event.json | 0 .../oap-logstream/oap-logstream/pom.xml | 92 ----- .../oap/logstream/disk/ParquetLogWriter.java | 317 ---------------- .../java/oap/logstream/disk/TsvWriter.java | 242 ------------- .../logstream/disk/WriterConfiguration.java | 22 +- .../formats/MemoryInputStreamWrapper.java | 92 ----- .../formats/parquet/LocalInputFile.java | 178 --------- .../parquet/ParquetBufferedWriter.java | 94 ----- .../parquet/ParquetGroupRecordConverter.java | 60 --- .../formats/parquet/ParquetInputFile.java | 55 --- .../formats/parquet/ParquetNumberValue.java | 82 ----- .../formats/parquet/ParquetSimpleGroup.java | 250 ------------- .../parquet/ParquetSimpleGroupConverter.java | 73 ---- .../parquet/ParquetSimpleGroupFactory.java | 43 --- .../ParquetSimplePrimitiveConverter.java | 99 ----- .../formats/parquet/ParquetUtils.java | 341 ------------------ .../formats/parquet/ParquetWriteBuilder.java | 50 --- .../TemplateAccumulatorRowBinary.java | 2 - oap-formats/oap-logstream/pom.xml | 4 - .../java/oap/template/BinaryInputStream.java | 329 ----------------- .../java/oap/template/BinaryOutputStream.java | 219 ----------- .../main/java/oap/template/BinaryUtils.java | 79 ---- .../template/TemplateAccumulatorBinary.java | 252 ------------- .../oap/template/TemplateAccumulators.java | 1 - .../src/main/java/oap/template/Types.java | 2 +- .../java/oap/template/BinaryStreamTest.java | 113 ------ .../TemplateEngineConcatenationTest.java | 13 - .../oap/template/TemplateEngineOrTest.java | 19 - .../java/oap/template/TemplateEngineTest.java | 22 -- 48 files changed, 58 insertions(+), 4605 deletions(-) delete mode 100644 oap-formats/oap-logstream/oap-logstream-data-object/src/main/java/oap/logstream/data/object/BinaryObjectLogger.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data-object/src/test/java/oap/logstream/data/object/BinaryObjectLoggerTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/dynamic/DynamicMapLogger.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/AbstractMapLogger.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogModel.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogRenderer.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/dynamic/DynamicMapLoggerTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLogModelTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLoggerTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/parquet/ParquetAssertion.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetAssertionTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetUtilsTest.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/test/resources/datamodel.conf rename oap-formats/oap-logstream/{oap-logstream-data => oap-logstream-test}/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/datamodel.conf (100%) rename oap-formats/oap-logstream/{oap-logstream-data => oap-logstream-test}/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/event.json (100%) delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/ParquetLogWriter.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/TsvWriter.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/MemoryInputStreamWrapper.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/LocalInputFile.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetBufferedWriter.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetGroupRecordConverter.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetInputFile.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetNumberValue.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroup.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupConverter.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupFactory.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimplePrimitiveConverter.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetUtils.java delete mode 100644 oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetWriteBuilder.java delete mode 100644 oap-formats/oap-template/src/main/java/oap/template/BinaryInputStream.java delete mode 100644 oap-formats/oap-template/src/main/java/oap/template/BinaryOutputStream.java delete mode 100644 oap-formats/oap-template/src/main/java/oap/template/BinaryUtils.java delete mode 100644 oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulatorBinary.java delete mode 100644 oap-formats/oap-template/src/test/java/oap/template/BinaryStreamTest.java diff --git a/oap-formats/oap-logstream/oap-logstream-data-object/src/main/java/oap/logstream/data/object/BinaryObjectLogger.java b/oap-formats/oap-logstream/oap-logstream-data-object/src/main/java/oap/logstream/data/object/BinaryObjectLogger.java deleted file mode 100644 index f179c5743..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data-object/src/main/java/oap/logstream/data/object/BinaryObjectLogger.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.object; - -import com.google.common.base.Preconditions; -import oap.dictionary.Dictionary; -import oap.dictionary.DictionaryRoot; -import oap.logstream.AbstractLoggerBackend; -import oap.logstream.AvailabilityReport; -import oap.logstream.LogStreamProtocol; -import oap.net.Inet; -import oap.reflect.TypeRef; -import oap.template.Template; -import oap.template.TemplateAccumulatorBinary; -import oap.template.TemplateEngine; -import oap.template.TemplateException; -import oap.template.Types; -import oap.util.FastByteArrayOutputStream; -import org.apache.commons.lang3.StringUtils; - -import javax.annotation.Nonnull; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import static java.util.Objects.requireNonNull; -import static oap.template.ErrorStrategy.ERROR; - -/** - * Class for beans described in the datamodel via oap-logstream protocol. - */ -public class BinaryObjectLogger { - public static final String COLLECTION_SUFFIX = "_ARRAY"; - public static final HashMap types = new HashMap<>(); - - static { - types.put( "DATETIME", new TypeConfiguration( "org.joda.time.DateTime", Types.DATETIME ) ); - types.put( "BOOLEAN", new TypeConfiguration( "java.lang.Boolean", Types.BOOLEAN ) ); - types.put( "ENUM", new TypeConfiguration( "java.lang.Enum", Types.STRING ) ); - types.put( "STRING", new TypeConfiguration( "java.lang.String", Types.STRING ) ); - types.put( "LONG", new TypeConfiguration( "java.lang.Long", Types.LONG ) ); - types.put( "INTEGER", new TypeConfiguration( "java.lang.Integer", Types.INTEGER ) ); - types.put( "SHORT", new TypeConfiguration( "java.lang.Short", Types.SHORT ) ); - types.put( "FLOAT", new TypeConfiguration( "java.lang.Float", Types.FLOAT ) ); - types.put( "DOUBLE", new TypeConfiguration( "java.lang.Double", Types.DOUBLE ) ); - } - - public final DictionaryRoot model; - public final TemplateEngine engine; - public final AbstractLoggerBackend backend; - - public BinaryObjectLogger( DictionaryRoot model, AbstractLoggerBackend backend, TemplateEngine engine ) { - this.model = model; - this.backend = backend; - this.engine = engine; - } - - public BinaryObjectLogger( DictionaryRoot model, AbstractLoggerBackend backend, @Nonnull Path diskCache, long ttl ) { - this( model, backend, new TemplateEngine( diskCache, ttl ) ); - } - - private static String checkStringAndGet( Dictionary dictionary, String fieldName ) { - Object fieldObject = dictionary.getProperty( fieldName ).orElseThrow( () -> new TemplateException( dictionary.getId() + ": type is required" ) ); - Preconditions.checkArgument( fieldObject instanceof String, dictionary.getId() + ": type must be String, but is " + fieldObject.getClass() ); - return ( String ) fieldObject; - } - - public TypedBinaryLogger typed( TypeRef typeRef, String id ) { - var value = requireNonNull( model.getValue( id ), "configuration for " + id + " is not found" ); - - var headers = new ArrayList(); - var rowTypes = new ArrayList(); - var expressions = new ArrayList(); - - for( var field : value.getValues( d -> d.containsProperty( "path" ) ) ) { - var name = field.getId(); - var path = checkStringAndGet( field, "path" ); - var fieldType = checkStringAndGet( field, "type" ); - var format = field.getProperty( "format" ).orElse( null ); - - boolean collection = false; - var idType = fieldType; - if( idType.endsWith( COLLECTION_SUFFIX ) ) { - collection = true; - idType = idType.substring( 0, idType.length() - COLLECTION_SUFFIX.length() ); - } - - TypeConfiguration rowType = types.get( idType ); - Preconditions.checkNotNull( rowType, "unknown type " + idType ); - - var defaultValue = field.getProperty( "default" ) - .orElseThrow( () -> new IllegalStateException( "default not found for " + id + "/" + name ) ); - - var templateFunction = format != null ? "; format(\"" + format + "\")" : ""; - var comment = "model " + id + " id " + name + " path " + path + " type " + fieldType + " defaultValue '" + defaultValue + "'"; - var pDefaultValue = - defaultValue instanceof String ? "\"" + ( ( String ) defaultValue ).replace( "\"", "\\\"" ) + '"' - : defaultValue; - - expressions.add( "{{ /* " + comment + " */" + toJavaType( rowType.javaType, collection ) + path + " ?? " + pDefaultValue + templateFunction + " }}" ); - headers.add( name ); - if( collection ) { - rowTypes.add( new byte[] { Types.LIST.id, rowType.templateType.id } ); - } else { - rowTypes.add( new byte[] { rowType.templateType.id } ); - } - } - - var template = String.join( "", expressions ); - var renderer = engine.getTemplate( - "Log" + StringUtils.capitalize( id ), - typeRef, - template, - new TemplateAccumulatorBinary(), - ERROR, - null ); - return new TypedBinaryLogger<>( renderer, headers.toArray( new String[0] ), rowTypes.toArray( new byte[0][] ) ); - - } - - public boolean isLoggingAvailable() { - return backend.isLoggingAvailable(); - } - - public AvailabilityReport availabilityReport() { - return backend.availabilityReport(); - } - - private String toJavaType( String javaType, boolean collection ) { - StringBuilder sb = new StringBuilder( "<" ); - if( collection ) sb.append( "java.util.Collection<" ); - sb.append( javaType ); - if( collection ) sb.append( ">" ); - sb.append( ">" ); - return sb.toString(); - } - - public static class TypeConfiguration { - public final String javaType; - public final Types templateType; - - public TypeConfiguration( String javaType, Types templateType ) { - this.javaType = javaType; - this.templateType = templateType; - } - } - - public class TypedBinaryLogger { - public final String[] headers; - public final byte[][] types; - private final Template renderer; - - public TypedBinaryLogger( Template renderer, String[] headers, byte[][] types ) { - this.renderer = renderer; - - this.headers = headers; - this.types = types; - } - - public void log( D data, String filePreffix, Map properties, String logType ) { - byte[] bytes = renderer.render( data, true ).getBytes(); - backend.log( LogStreamProtocol.ProtocolVersion.BINARY_V2, Inet.HOSTNAME, filePreffix, properties, logType, headers, types, bytes ); - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data-object/src/test/java/oap/logstream/data/object/BinaryObjectLoggerTest.java b/oap-formats/oap-logstream/oap-logstream-data-object/src/test/java/oap/logstream/data/object/BinaryObjectLoggerTest.java deleted file mode 100644 index 1370ca83d..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data-object/src/test/java/oap/logstream/data/object/BinaryObjectLoggerTest.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.object; - -import oap.dictionary.DictionaryLeaf; -import oap.dictionary.DictionaryRoot; -import oap.dictionary.DictionaryValue; -import oap.logstream.LogId; -import oap.logstream.MemoryLoggerBackend; -import oap.net.Inet; -import oap.reflect.TypeRef; -import oap.template.BinaryUtils; -import oap.template.Types; -import oap.testng.Fixtures; -import oap.testng.TestDirectoryFixture; -import oap.util.Dates; -import org.testng.annotations.Test; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -import static org.assertj.core.api.Assertions.assertThat; - -public class BinaryObjectLoggerTest extends Fixtures { - private final TestDirectoryFixture testDirectoryFixture; - - public BinaryObjectLoggerTest() { - testDirectoryFixture = fixture( new TestDirectoryFixture() ); - } - - @Test - public void testLog() throws IOException { - MemoryLoggerBackend memoryLoggerBackend = new MemoryLoggerBackend(); - BinaryObjectLogger binaryObjectLogger = new BinaryObjectLogger( new DictionaryRoot( "model", List.of( - new DictionaryValue( "MODEL1", true, 1, List.of( - new DictionaryLeaf( "a", true, 2, Map.of( "path", "a", "type", "STRING", "default", "" ) ), - new DictionaryLeaf( "b", true, 2, Map.of( "path", "b", "type", "INTEGER", "default", 123 ) ), - new DictionaryLeaf( "aaa", true, 2, Map.of( "path", "a | default aa", "type", "STRING", "default", "" ) ), - new DictionaryLeaf( "list", true, 2, Map.of( "path", "data1.list | default data2.list", "type", "STRING_ARRAY", "default", "[]" ) ), - new DictionaryLeaf( "x", true, 2, Map.of( "type", "INTEGER", "default", 1 ) ) - ) ) - ) ), memoryLoggerBackend, testDirectoryFixture.testPath( "tmp" ), Dates.d( 10 ) ); - - BinaryObjectLogger.TypedBinaryLogger logger = binaryObjectLogger.typed( new TypeRef<>() {}, "MODEL1" ); - - logger.log( new TestData( "ff", "cc", 12, List.of( "1" ), null ), "prefix", Map.of(), "mylog" ); - logger.log( new TestData( null, "dd", 44, null, List.of( "2" ) ), "prefix", Map.of(), "mylog" ); - - byte[] bytes = memoryLoggerBackend.loggedBytes( new LogId( "prefix", "mylog", Inet.HOSTNAME, Map.of(), - new String[] { "a", "b", "aaa", "list" }, - new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.INTEGER.id }, new byte[] { Types.STRING.id }, new byte[] { Types.LIST.id, Types.STRING.id } } ) ); - - assertThat( BinaryUtils.read( bytes ) ).isEqualTo( List.of( List.of( "ff", 12, "ff", List.of( "1" ) ), List.of( "", 44, "dd", List.of( "2" ) ) ) ); - } - - public static class TestData { - public String a; - public String aa; - public int b; - - public Optional data1 = Optional.empty(); - public Optional data2 = Optional.empty(); - - public TestData() { - } - - public TestData( String a, String aa, int b, List data1, List data2 ) { - this.a = a; - this.aa = aa; - this.b = b; - - if( data1 != null ) this.data1 = Optional.of( new TestData1( data1 ) ); - if( data2 != null ) this.data2 = Optional.of( new TestData1( data2 ) ); - } - - public static class TestData1 { - public final ArrayList list = new ArrayList<>(); - - public TestData1( List list ) { - this.list.addAll( list ); - } - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/dynamic/DynamicMapLogger.java b/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/dynamic/DynamicMapLogger.java deleted file mode 100644 index 25c25fb2d..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/dynamic/DynamicMapLogger.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.dynamic; - -import lombok.extern.slf4j.Slf4j; -import oap.dictionary.DictionaryRoot; -import oap.logstream.AbstractLoggerBackend; -import oap.logstream.LogStreamProtocol; -import oap.logstream.Logger; -import oap.logstream.data.map.MapLogModel; -import oap.logstream.data.map.MapLogRenderer; -import oap.util.AssocList; - -import javax.annotation.Nonnull; -import java.util.Map; - -@Slf4j -public class DynamicMapLogger extends Logger { - private final Extractors extractors = new Extractors(); - - public DynamicMapLogger( AbstractLoggerBackend backend ) { - super( backend, LogStreamProtocol.ProtocolVersion.TSV_V1 ); - } - - public void addExtractor( AbstractExtractor extractor ) { - this.extractors.add( extractor ); - } - - public void log( String name, Map data ) { - AbstractExtractor extractor = extractors.get( name ) - .orElseThrow( () -> new IllegalStateException( "not extractor for " + name ) ); - log.trace( "name: {}, extractor: {}, data: {}, ", name, extractor, data ); - log( extractor.prefix( data ), extractor.substitutions( data ), name, - extractor.renderer.headers(), extractor.renderer.types(), extractor.renderer.render( data ) ); - } - - public abstract static class AbstractExtractor { - private final MapLogRenderer renderer; - - public AbstractExtractor( DictionaryRoot model, String id, String tag ) { - renderer = new MapLogModel( model ).renderer( id, tag ); - } - - @Nonnull - public abstract String prefix( @Nonnull Map data ); - - @Nonnull - public abstract Map substitutions( @Nonnull Map data ); - - @Nonnull - public abstract String name(); - } - - private static class Extractors extends AssocList { - @Override - protected String keyOf( AbstractExtractor extractor ) { - return extractor.name(); - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/AbstractMapLogger.java b/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/AbstractMapLogger.java deleted file mode 100644 index 955e4ac60..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/AbstractMapLogger.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.map; - -import oap.dictionary.DictionaryRoot; -import oap.logstream.AbstractLoggerBackend; -import oap.logstream.Logger; - -import javax.annotation.Nonnull; -import java.util.Map; - -public abstract class AbstractMapLogger extends Logger { - private final MapLogRenderer renderer; - private final String name; - - public AbstractMapLogger( AbstractLoggerBackend backend, DictionaryRoot datamodel, String id, String tag, String name ) { - super( backend ); - this.name = name; - this.renderer = new MapLogModel( datamodel ).renderer( id, tag ); - } - - public void log( @Nonnull Map data ) { - this.log( prefix( data ), substitutions( data ), name, renderer.headers(), renderer.types(), renderer.render( data ) ); - } - - @Nonnull - public abstract String prefix( @Nonnull Map data ); - - @Nonnull - public abstract Map substitutions( @Nonnull Map data ); - -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogModel.java b/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogModel.java deleted file mode 100644 index c80f805d6..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogModel.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.map; - -import oap.dictionary.Dictionary; -import oap.dictionary.DictionaryRoot; -import oap.logstream.data.AbstractLogModel; -import oap.logstream.data.LogRenderer; -import oap.reflect.TypeRef; -import oap.template.TemplateAccumulatorString; -import oap.template.Types; -import org.apache.parquet.Preconditions; - -import javax.annotation.Nonnull; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.StringJoiner; - -import static java.util.Objects.requireNonNull; - -public class MapLogModel extends AbstractLogModel { - public MapLogModel( @Nonnull DictionaryRoot model ) { - super( model, new TemplateAccumulatorString() ); - } - - public MapLogRenderer renderer( String id, String tag ) { - return renderer( new TypeRef<>() {}, id, tag ); - } - - @Override - public > LD renderer( TypeRef typeRef, String id, String tag ) { - return renderer( typeRef, new TemplateAccumulatorString(), id, tag ); - } - - @SuppressWarnings( "unchecked" ) - @Override - public > LD renderer( TypeRef typeRef, TemplateAccumulatorString accumulator, String id, String tag ) { - Preconditions.checkArgument( typeRef.type().equals( new TypeRef>() {}.type() ), "Map" ); - - Dictionary dictionary = requireNonNull( this.model.getValue( id ), id + " not found" ); - var headers = new StringJoiner( "\t" ); - List expressions = new ArrayList<>(); - headers.add( "TIMESTAMP" ); - for( Dictionary field : dictionary.getValues( d -> d.getTags().contains( tag ) ) ) { - headers.add( field.getId() ); - expressions.add( field.getProperty( "path" ) - .orElseThrow( () -> new IllegalArgumentException( "undefined property path for " + field.getId() ) ) ); - } - return ( LD ) new MapLogRenderer( new String[] { headers.toString() }, new byte[][] { new byte[] { Types.RAW.id } }, expressions ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogRenderer.java b/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogRenderer.java deleted file mode 100644 index 97576dc06..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/main/java/oap/logstream/data/map/MapLogRenderer.java +++ /dev/null @@ -1,81 +0,0 @@ -package oap.logstream.data.map; - -import lombok.EqualsAndHashCode; -import lombok.ToString; -import oap.logstream.data.LogRenderer; -import oap.reflect.Reflect; -import oap.template.BinaryOutputStream; -import oap.template.TemplateAccumulatorString; -import oap.util.Dates; -import org.joda.time.DateTime; - -import javax.annotation.Nonnull; -import java.io.ByteArrayOutputStream; -import java.util.List; -import java.util.Map; -import java.util.StringJoiner; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static oap.logstream.data.TsvDataTransformer.ofBoolean; -import static oap.logstream.data.TsvDataTransformer.ofString; - -@ToString -@EqualsAndHashCode -public class MapLogRenderer implements LogRenderer, String, StringBuilder, TemplateAccumulatorString> { - private final String[] headers; - private final List expressions; - private final byte[][] types; - - public MapLogRenderer( String[] headers, byte[][] types, List expressions ) { - this.headers = headers; - this.types = types; - this.expressions = expressions; - } - - @Nonnull - @Override - public String[] headers() { - return headers; - } - - @Nonnull - @Override - public byte[] render( @Nonnull Map data ) { - var bos = new BinaryOutputStream( new ByteArrayOutputStream() ); - StringJoiner joiner = new StringJoiner( "\t" ); - joiner.add( Dates.FORMAT_SIMPLE_CLEAN.print( DateTime.now() ) ); - - render( data, joiner ); - - String line = joiner + "\n"; - return line.getBytes( UTF_8 ); - } - - @Override - public byte[] render( @Nonnull Map data, StringBuilder sb ) { - StringJoiner joiner = new StringJoiner( "\t" ); - - render( data, joiner ); - - sb.append( joiner ); - sb.append( "\n" ); - return sb.toString().getBytes( UTF_8 ); - } - - private void render( @Nonnull Map data, StringJoiner joiner ) { - for( String expression : expressions ) { - Object v = Reflect.get( data, expression ); - joiner.add( switch( v ) { - case null -> ""; - case String str -> ofString( str ); - case Boolean b -> ofBoolean( b ); - default -> String.valueOf( v ); - } ); - } - } - - @Override - public byte[][] types() { - return types; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/dynamic/DynamicMapLoggerTest.java b/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/dynamic/DynamicMapLoggerTest.java deleted file mode 100644 index fd78e4a56..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/dynamic/DynamicMapLoggerTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.dynamic; - -import oap.dictionary.DictionaryRoot; -import oap.io.IoStreams; -import oap.logstream.Timestamp; -import oap.logstream.disk.DiskLoggerBackend; -import oap.net.Inet; -import oap.reflect.TypeRef; -import oap.testng.Fixtures; -import oap.testng.SystemTimerFixture; -import oap.testng.TestDirectoryFixture; -import oap.util.Dates; -import org.testng.annotations.Test; - -import javax.annotation.Nonnull; -import java.util.Map; - -import static oap.json.testng.JsonAsserts.objectOfTestJsonResource; -import static oap.testng.Asserts.assertFile; -import static oap.testng.Asserts.objectOfTestResource; - -public class DynamicMapLoggerTest extends Fixtures { - - private final TestDirectoryFixture testDirectoryFixture; - - public DynamicMapLoggerTest() { - fixture( new SystemTimerFixture( true ) ); - testDirectoryFixture = fixture( new TestDirectoryFixture() ); - } - - @Test - public void log() { - Dates.setTimeFixed( 2021, 1, 1, 1 ); - var backend = new DiskLoggerBackend( testDirectoryFixture.testDirectory(), Timestamp.BPH_12, 1024 ); - DynamicMapLogger logger = new DynamicMapLogger( backend ); - logger.addExtractor( new TestExtractor( objectOfTestResource( DictionaryRoot.class, getClass(), "datamodel.conf" ) ) ); - logger.log( "EVENT", objectOfTestJsonResource( getClass(), new TypeRef>() {}.clazz(), "event.json" ) ); - - backend.refresh( true ); - - assertFile( testDirectoryFixture.testPath( "EVENT/event/2021-01/01/EVENT_v7c18022a-1_" + Inet.HOSTNAME + "-2021-01-01-01-00.tsv.gz" ) ) - .hasContent( """ - TIMESTAMP\tNAME\tVALUE1\tVALUE2\tVALUE3 - 2021-01-01 01:00:00\tevent\tvalue1\t222\t333 - """, IoStreams.Encoding.GZIP ); - } - - public static class TestExtractor extends DynamicMapLogger.AbstractExtractor { - public static final String ID = "EVENT"; - - public TestExtractor( DictionaryRoot model ) { - super( model, ID, "LOG" ); - } - - @Override - @Nonnull - public String prefix( @Nonnull Map data ) { - return "/EVENT/${NAME}"; - } - - @Nonnull - @Override - public Map substitutions( @Nonnull Map data ) { - return Map.of( "NAME", String.valueOf( data.get( "name" ) ) ); - } - - @Override - @Nonnull - public String name() { - return ID; - } - - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLogModelTest.java b/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLogModelTest.java deleted file mode 100644 index 7c53e698a..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLogModelTest.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * - * * Copyright (c) Xenoss - * * Unauthorized copying of this file, via any medium is strictly prohibited - * * Proprietary and confidential - * - * - */ - -package oap.logstream.data.map; - -import oap.dictionary.DictionaryRoot; -import oap.util.Dates; -import org.testng.annotations.Test; - -import java.util.Map; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static oap.testng.Asserts.objectOfTestResource; -import static org.assertj.core.api.AssertionsForClassTypes.assertThat; - -public class MapLogModelTest { - @Test - public void render() { - Dates.setTimeFixed( 2022, 12, 16, 18, 1, 1 ); - - MapLogModel dataModel = new MapLogModel( objectOfTestResource( DictionaryRoot.class, getClass(), "datamodel.conf" ) ); - MapLogRenderer renderer = dataModel.renderer( "EVENT1", "LOG" ); - assertThat( renderer.headers() ).isEqualTo( new String[] { "TIMESTAMP\tNAME\tVALUE1\tVALUE2" } ); - assertThat( new String( renderer.render( Map.of( "name", "n", "value1", "v", "value2", 2 ) ), UTF_8 ) ) - .isEqualTo( "2022-12-16 18:01:01\tn\tv\t2\n" ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLoggerTest.java b/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLoggerTest.java deleted file mode 100644 index 73ce80003..000000000 --- a/oap-formats/oap-logstream/oap-logstream-data/src/test/java/oap/logstream/data/map/MapLoggerTest.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.data.map; - -import oap.dictionary.DictionaryRoot; -import oap.logstream.AbstractLoggerBackend; -import oap.logstream.LogId; -import oap.logstream.MemoryLoggerBackend; -import oap.reflect.TypeRef; -import oap.template.Types; -import oap.testng.Fixtures; -import oap.util.Dates; -import org.testng.annotations.Test; - -import javax.annotation.Nonnull; -import java.util.Map; - -import static oap.json.testng.JsonAsserts.objectOfTestJsonResource; -import static oap.net.Inet.HOSTNAME; -import static oap.testng.Asserts.objectOfTestResource; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.entry; - -public class MapLoggerTest extends Fixtures { - @Test - public void log() { - Dates.setTimeFixed( 2021, 1, 1, 1 ); - MemoryLoggerBackend backend = new MemoryLoggerBackend(); - AbstractMapLogger logger = new EventMapLogger( backend, objectOfTestResource( DictionaryRoot.class, getClass(), "datamodel.conf" ) ); - logger.log( objectOfTestJsonResource( getClass(), new TypeRef>() {}.clazz(), "event.json" ) ); - assertThat( backend.logs() ).containsExactly( entry( - new LogId( "/EVENT/${NAME}", "EVENT", HOSTNAME, Map.of( "NAME", "event" ), - new String[] { "TIMESTAMP\tNAME\tVALUE1\tVALUE2\tVALUE3" }, - new byte[][] { new byte[] { Types.RAW.id } } ), - "2021-01-01 01:00:00\tevent\tvalue1\t222\t333\n" - ) ); - } - - static class EventMapLogger extends AbstractMapLogger { - EventMapLogger( AbstractLoggerBackend backend, DictionaryRoot model ) { - super( backend, model, "EVENT", "LOG", "EVENT" ); - } - - @Nonnull - @Override - public String prefix( @Nonnull Map data ) { - return "/EVENT/${NAME}"; - } - - @Nonnull - @Override - public Map substitutions( @Nonnull Map data ) { - return Map.of( "NAME", String.valueOf( data.get( "name" ) ) ); - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/pom.xml b/oap-formats/oap-logstream/oap-logstream-test/pom.xml index 78c8171df..fb8cbef5a 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/pom.xml +++ b/oap-formats/oap-logstream/oap-logstream-test/pom.xml @@ -1,28 +1,4 @@ - - @@ -43,6 +19,11 @@ oap-logstream-net-server ${project.version} + + oap + oap-logstream-data + ${project.version} + oap oap-logstream-net-client diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/parquet/ParquetAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/parquet/ParquetAssertion.java deleted file mode 100644 index 1f7b7a00d..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/parquet/ParquetAssertion.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import lombok.EqualsAndHashCode; -import lombok.ToString; -import oap.util.Lists; -import oap.util.Throwables; -import org.apache.commons.io.IOUtils; -import org.apache.parquet.column.page.PageReadStore; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.hadoop.ParquetFileReader; -import org.apache.parquet.io.ColumnIOFactory; -import org.apache.parquet.io.MessageColumnIO; -import org.apache.parquet.io.RecordReader; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.Type; -import org.apache.parquet.schema.Types; -import org.assertj.core.api.AbstractAssert; -import org.joda.time.DateTime; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.joda.time.DateTimeZone.UTC; - -public class ParquetAssertion extends AbstractAssert { - protected ParquetAssertion( ParquetData data ) { - super( data, ParquetAssertion.class ); - } - - public static ParquetAssertion assertParquet( Path path, String... headers ) { - try { - byte[] buffer = Files.readAllBytes( path ); - return assertParquet( buffer, headers ); - } catch( IOException e ) { - throw Throwables.propagate( e ); - } - } - - public static ParquetAssertion assertParquet( InputStream inputStream, String... headers ) { - try { - var out = new ByteArrayOutputStream(); - IOUtils.copy( inputStream, out ); - return assertParquet( out.toByteArray(), headers ); - } catch( IOException e ) { - throw Throwables.propagate( e ); - } - } - - public static ParquetAssertion assertParquet( String data, String... headers ) { - return assertParquet( data.getBytes( UTF_8 ), headers ); - } - - public static ParquetAssertion assertParquet( byte[] data, String... headers ) { - try { - return new ParquetAssertion( new ParquetData( data, 0, data.length, List.of( headers ) ) ); - } catch( IOException e ) { - throw Throwables.propagate( e ); - } - } - - public static Row row( Object... cols ) { - return new Row( cols ); - } - - public ParquetAssertion hasHeaders( String... headers ) { - assertThat( actual.headers ).contains( headers ); - return this; - } - - public ParquetAssertion hasHeaders( Iterable headers ) { - assertThat( actual.headers ).containsAll( headers ); - return this; - } - - public ParquetAssertion containOnlyHeaders( String... headers ) { - assertThat( actual.headers ).containsOnly( headers ); - return this; - } - - public final ParquetAssertion containsExactlyInAnyOrder( Row... rows ) { - assertThat( actual.data ).containsExactlyInAnyOrder( rows ); - - return this; - } - - public final ParquetAssertion contains( Row... rows ) { - assertThat( actual.data ).contains( rows ); - - return this; - } - - public final ParquetAssertion containsLogicalTypes( List>... types ) { - List>> list = Lists.map( actual.types, tl -> Lists.map( tl, t -> { - LogicalTypeAnnotation logicalTypeAnnotation = t.getLogicalTypeAnnotation(); - if( logicalTypeAnnotation == null ) return null; - return logicalTypeAnnotation.getClass(); - } ) ); - assertThat( list ).contains( types ); - - return this; - } - - public final ParquetAssertion containsExactly( Row... rows ) { - assertThat( actual.data ).containsExactly( rows ); - - return this; - } - - public final ParquetAssertion containsOnly( Row... rows ) { - assertThat( actual.data ).containsOnly( rows ); - - return this; - } - - public final ParquetAssertion containsOnlyOnce( Row... rows ) { - assertThat( actual.data ).containsOnlyOnce( rows ); - - return this; - } - - public final ParquetAssertion containsAnyOf( Row... rows ) { - assertThat( actual.data ).containsAnyOf( rows ); - - return this; - } - - @ToString - @EqualsAndHashCode - public static class Row { - private final ArrayList cols = new ArrayList<>(); - - public Row( int size ) { - for( var i = 0; i < size; i++ ) cols.add( null ); - } - - public Row( Object... cols ) { - this.cols.addAll( List.of( cols ) ); - } - - public Row( List cols ) { - this.cols.addAll( cols ); - } - } - - @ToString - public static class ParquetData { - public final ArrayList headers = new ArrayList<>(); - public final ArrayList data = new ArrayList<>(); - public final ArrayList> types = new ArrayList<>(); - - @SuppressWarnings( "checkstyle:ModifiedControlVariable" ) - public ParquetData( byte[] buffer, int offset, int length, List includeCols ) throws IOException { - try( ParquetFileReader reader = ParquetFileReader.open( new ParquetInputFile( new ByteArrayInputStream( buffer, offset, length ) ) ) ) { - MessageType messageType = reader.getFileMetaData().getSchema(); - - this.headers.addAll( - includeCols.isEmpty() ? Lists.map( messageType.getFields(), Type::getName ) : includeCols ); - - Types.MessageTypeBuilder select = Types.buildMessage(); - - var id = 0; - for( var header : this.headers ) { - int fieldIndex = messageType.getFieldIndex( header ); - select.addField( messageType.getType( fieldIndex ).withId( id ) ); - id++; - } - - MessageType selectSchema = select.named( "selected" ); - reader.setRequestedSchema( selectSchema ); - - PageReadStore pages; - while( ( pages = reader.readNextRowGroup() ) != null ) { - long rows = pages.getRowCount(); - - MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO( selectSchema ); - RecordReader recordReader = columnIO.getRecordReader( pages, new ParquetGroupRecordConverter( selectSchema ) ); - - for( int i = 0; i < rows; i++ ) { - var row = new Row( this.headers.size() ); - var types = Arrays.asList( new Type[this.headers.size()] ); - ParquetSimpleGroup simpleGroup = ( ParquetSimpleGroup ) recordReader.read(); - - for( var x = 0; x < this.headers.size(); x++ ) { - int index = selectSchema.getFieldIndex( this.headers.get( x ) ); - Type type = selectSchema.getType( index ); - int idx = this.headers.indexOf( type.getName() ); - row.cols.set( idx, toJavaObject( type, simpleGroup, index ) ); - types.set( idx, type ); - } - this.data.add( row ); - this.types.add( types ); - } - } - - } - } - - private Object toJavaObject( Type type, Group group, int col ) { - return toJavaObject( type, group, col, 0 ); - } - - private Object toJavaObject( Type type, Group group, int col, int y ) { - LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation(); - if( logicalTypeAnnotation == null ) { - if( type.isPrimitive() ) { - PrimitiveType.PrimitiveTypeName primitiveTypeName = type.asPrimitiveType().getPrimitiveTypeName(); - if( primitiveTypeName == PrimitiveType.PrimitiveTypeName.INT64 ) { - return group.getLong( col, y ); - } else if( primitiveTypeName == PrimitiveType.PrimitiveTypeName.INT32 ) { - return group.getInteger( col, y ); - } else if( primitiveTypeName == PrimitiveType.PrimitiveTypeName.BINARY ) { - return group.getString( col, y ); - } else if( primitiveTypeName == PrimitiveType.PrimitiveTypeName.DOUBLE ) { - return group.getDouble( col, y ); - } else if( primitiveTypeName == PrimitiveType.PrimitiveTypeName.FLOAT ) { - return group.getFloat( col, y ); - } else if( primitiveTypeName == PrimitiveType.PrimitiveTypeName.BOOLEAN ) { - return group.getBoolean( col, y ); - } - } - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation ) { - int bitWidth = ( ( LogicalTypeAnnotation.IntLogicalTypeAnnotation ) logicalTypeAnnotation ).getBitWidth(); - return switch( bitWidth ) { - case 8 -> ( byte ) group.getInteger( col, y ); - case 16 -> ( short ) group.getInteger( col, y ); - case 32 -> group.getInteger( col, y ); - default -> group.getLong( col, y ); - }; - - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation ) { - if( type.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.DOUBLE ) { - return group.getDouble( col, y ); - } else - return group.getFloat( col, y ); - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation ) { - return group.getString( col, y ); - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation ) { - return new DateTime( group.getInteger( col, y ) * 24L * 60 * 60 * 1000, UTC ); - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation ) { - return new DateTime( group.getLong( col, y ), UTC ); - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation ) { - var listGroup = group.getGroup( col, 0 ); - Type elementType = ( ( GroupType ) ( ( GroupType ) type ).getType( 0 ) ).getType( 0 ); - var count = listGroup.getFieldRepetitionCount( 0 ); - var list = new ArrayList<>(); - for( var yy = 0; yy < count; yy++ ) { - list.add( toJavaObject( elementType, listGroup.getGroup( 0, yy ), 0, 0 ) ); - } - return list; - } - throw new IllegalStateException( "Unknown type: " + type + ", logical: " + type.getLogicalTypeAnnotation() ); - } - - - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java index f0aff52f1..b58a6408d 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java @@ -24,9 +24,11 @@ package oap.logstream; +import oap.compression.Compression; import oap.io.IoStreams.Encoding; import oap.json.Binder; import oap.logstream.disk.DiskLoggerBackend; +import oap.logstream.formats.RowBinaryAssertion; import oap.logstream.formats.rowbinary.RowBinaryUtils; import oap.template.Types; import oap.testng.Fixtures; @@ -42,7 +44,6 @@ import static oap.logstream.Timestamp.BPH_12; import static oap.logstream.disk.DiskLoggerBackend.DEFAULT_BUFFER; import static oap.net.Inet.HOSTNAME; -import static oap.testng.Asserts.assertFile; import static oap.testng.Asserts.contentOfTestResource; import static org.assertj.core.api.Assertions.assertThat; @@ -67,11 +68,12 @@ public void diskJSON() throws IOException { String jsonContent = Binder.json.marshal( o ); assertThat( jsonContent ).isEqualTo( content ); - logger.log( "open_rtb_json", Map.of(), "request_response", headers, types, RowBinaryUtils.line( List.of( jsonContent ) ) ); + logger.log( "open_rtb_json", Map.of(), "request_response", headers, types, Compression.gzip( RowBinaryUtils.line( List.of( jsonContent ) ) ) ); } - assertFile( testDirectoryFixture.testPath( "logs/open_rtb_json/2015-10/10/request_response_v3b5d9e1b-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( String.join( "\t", headers ) + '\n' + content + "\n", Encoding.GZIP ); + RowBinaryAssertion.assertRowBinaryFile( testDirectoryFixture.testPath( "logs/open_rtb_json/2015-10/10/request_response_v3b5d9e1b-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), Encoding.GZIP ) + .content() + .contains( List.of( content ) ); } public static class SimpleJson { diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java index 812e53b3f..b54c5021d 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java @@ -25,6 +25,7 @@ package oap.logstream; import lombok.extern.slf4j.Slf4j; +import oap.compression.Compression; import oap.http.server.nio.NioHttpServer; import oap.logstream.disk.DiskLoggerBackend; import oap.logstream.formats.rowbinary.RowBinaryUtils; @@ -49,6 +50,7 @@ import static oap.logstream.Timestamp.BPH_12; import static oap.logstream.disk.DiskLoggerBackend.DEFAULT_BUFFER; import static oap.logstream.disk.DiskLoggerBackend.DEFAULT_FREE_SPACE_REQUIRED; +import static oap.logstream.formats.RowBinaryAssertion.assertRowBinaryFile; import static oap.net.Inet.HOSTNAME; import static oap.testng.Asserts.assertEventually; import static oap.testng.Asserts.assertFile; @@ -68,16 +70,14 @@ public LoggerTest() { public void disk() throws IOException { Dates.setTimeFixed( 2015, 10, 10, 1 ); - byte[] line1 = RowBinaryUtils.line( List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678", "12345678" ) ); - String loggedLine1 = "2015-10-10 01:00:00\t12345678\t12345678\n"; + List lineData1 = List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678", "12345678" ); + byte[] line1 = Compression.gzip( RowBinaryUtils.line( lineData1 ) ); String[] headers1 = new String[] { "TIMESTAMP", "REQUEST_ID", "REQUEST_ID2" }; byte[][] types1 = new byte[][] { new byte[] { Types.DATETIME.id }, new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - String loggedHeaders1 = String.join( "\t", headers1 ) + "\n"; - byte[] line2 = RowBinaryUtils.line( List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678" ) ); - String loggedLine2 = "2015-10-10 01:00:00\t12345678\n"; + List lineData2 = List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678" ); + byte[] line2 = Compression.gzip( RowBinaryUtils.line( lineData2 ) ); String[] headers2 = new String[] { "TIMESTAMP", "REQUEST_ID2" }; byte[][] types2 = new byte[][] { new byte[] { Types.DATETIME.id }, new byte[] { Types.STRING.id } }; - String loggedHeaders2 = String.join( "\t", headers2 ) + "\n"; try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER ) ) { Logger logger = new Logger( backend ); logger.log( "lfn1", Map.of(), "log", headers1, types1, line1 ); @@ -88,14 +88,18 @@ public void disk() throws IOException { logger.log( "lfn1", Map.of(), "log", headers2, types2, line2 ); } - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( loggedHeaders1 + loggedLine1 + loggedLine1, GZIP ); - assertFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( loggedHeaders1 + loggedLine1, GZIP ); - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( loggedHeaders2 + loggedLine2, GZIP ); - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( loggedHeaders2 + loggedLine2, GZIP ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData1, lineData1 ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData1 ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData2 ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData2 ) ); } @Test @@ -105,10 +109,12 @@ public void net() throws IOException { int port = Ports.getFreePort( getClass() ); Path controlStatePath = testDirectoryFixture.testPath( "controlStatePath.st" ); - byte[] line1 = RowBinaryUtils.line( List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678", "12345678" ) ); + List lineData1 = List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678", "12345678" ); + byte[] line1 = Compression.gzip( RowBinaryUtils.line( lineData1 ) ); String[] headers1 = new String[] { "TIMESTAMP", "REQUEST_ID", "REQUEST_ID2" }; byte[][] types1 = new byte[][] { new byte[] { Types.DATETIME.id }, new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; - byte[] line2 = RowBinaryUtils.line( List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678" ) ); + List lineData2 = List.of( new DateTime( 2015, 10, 10, 1, 0, UTC ), "12345678" ); + byte[] line2 = Compression.gzip( RowBinaryUtils.line( lineData2 ) ); String[] headers2 = new String[] { "TIMESTAMP", "REQUEST_ID2" }; byte[][] types2 = new byte[][] { new byte[] { Types.DATETIME.id }, new byte[] { Types.STRING.id } }; @@ -132,7 +138,7 @@ public void net() throws IOException { client.syncMemory(); assertEventually( 50, 100, () -> assertFalse( logger.isLoggingAvailable() ) ); - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) + assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ) ) .doesNotExist(); serverBackend.requiredFreeSpace = DEFAULT_FREE_SPACE_REQUIRED; @@ -156,21 +162,14 @@ public void net() throws IOException { } assertEventually( 10, 1000, () -> - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( """ - TIMESTAMP\tREQUEST_ID\tREQUEST_ID2 - 2015-10-10 01:00:00 12345678\t12345678 - 2015-10-10 01:00:00 12345678\t12345678 - """.stripIndent(), GZIP ) ); - assertFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( """ - TIMESTAMP\tREQUEST_ID\tREQUEST_ID2 - 2015-10-10 01:00:00\t12345678\t12345678 - """, GZIP ); - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz" ) ) - .hasContent( """ - TIMESTAMP\tREQUEST_ID2 - 2015-10-10 01:00:00\t12345678 - """, GZIP ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData1, lineData1 ) ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData1 ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + .content() + .isEqualTo( List.of( lineData2 ) ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java index a02bbd53e..372b5be1d 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java @@ -37,38 +37,38 @@ public class AbstractWriterTest { @Test public void testFileName() { - var h1Headers = new String[] { "h1" }; - var strTypes = new byte[][] { new byte[] { Types.STRING.id } }; + String[] h1Headers = new String[] { "h1" }; + byte[][] strTypes = new byte[][] { new byte[] { Types.STRING.id } }; - var lid1 = new LogId( "ln", "lt", "chn", Map.of(), h1Headers, strTypes ); + LogId lid1 = new LogId( "ln", "lt", "chn", Map.of(), h1Headers, strTypes ); Dates.setTimeFixed( 2023, 1, 23, 21, 6, 0 ); assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_FORMAT_TSV_GZ}-${INTERVAL} -${LOG_VERSION}-#{if}(${ORGANIZATION})${ORGANIZATION}#{else}UNKNOWN#{end}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) - .isEqualTo( "ln/tsv.gz-01-85594397-1-UNKNOWN.tsv.gz" ); + .isEqualTo( "ln/tsv.gz-01-85594397-1-UNKNOWN.tsv.gz.rb.gz" ); assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) - .isEqualTo( "ln/01-85594397-1-.tsv.gz" ); + .isEqualTo( "ln/01-85594397-1-.tsv.gz.rb.gz" ); assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) - .isEqualTo( "ln/01-85594397-1-.tsv.gz" ); + .isEqualTo( "ln/01-85594397-1-.tsv.gz.rb.gz" ); assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_TIME_INTERVAL}.log.gz", lid1, Timestamp.BPH_6, 1, Dates.nowUtc() ) ) - .isEqualTo( "ln/10.log.gz" ); + .isEqualTo( "ln/10.log.gz.rb.gz" ); } @Test public void testFileNameConditional() { - var h1Headers = new String[] { "h1" }; - var strTypes = new byte[][] { new byte[] { Types.STRING.id } }; + String[] h1Headers = new String[] { "h1" }; + byte[][] strTypes = new byte[][] { new byte[] { Types.STRING.id } }; Dates.setTimeFixed( 2023, 1, 23, 21, 6, 0 ); - var lid1 = new LogId( "ln", "lt", "chn", Map.of(), h1Headers, strTypes ); + LogId lid1 = new LogId( "ln", "lt", "chn", Map.of(), h1Headers, strTypes ); assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) - .isEqualTo( "ln/01-85594397-1.tsv.gz" ); + .isEqualTo( "ln/01-85594397-1.tsv.gz.rb.gz" ); lid1 = new LogId( "ln", "lt", "chn", Map.of( "ORGANIZATION", "org1", "ACCOUNT", "acc1" ), h1Headers, strTypes ); assertThat( AbstractWriter.currentPattern( LogFormat.PARQUET, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) - .isEqualTo( "ln/org1/acc1/01-85594397-1.parquet" ); + .isEqualTo( "ln/org1/acc1/01-85594397-1.parquet.rb.gz" ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetAssertionTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetAssertionTest.java deleted file mode 100644 index ad17d7ee4..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetAssertionTest.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import oap.testng.Fixtures; -import oap.testng.TestDirectoryFixture; -import org.testng.annotations.Test; - -import static oap.logstream.formats.parquet.ParquetAssertion.assertParquet; -import static oap.logstream.formats.parquet.ParquetAssertion.row; - -public class ParquetAssertionTest extends Fixtures { - private final TestDirectoryFixture testDirectoryFixture; - - public ParquetAssertionTest() { - testDirectoryFixture = fixture( new TestDirectoryFixture() ); - } - - @Test - public void testWithoutLogicalTypes() { - testDirectoryFixture.deployTestData( getClass() ); - - assertParquet( testDirectoryFixture.testPath( "test.parquet" ) ) - .containOnlyHeaders( "DATETIME", "BID_ID", "TEST_3", "AGR", "REPORT_SOURCE" ) - .contains( row( 1551112200L, "val1", "", 3L, "GR" ) ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetTest.java deleted file mode 100644 index cb2babc98..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetTest.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import oap.dictionary.DictionaryParser; -import oap.dictionary.DictionaryRoot; -import oap.testng.Fixtures; -import oap.testng.TestDirectoryFixture; -import oap.util.Lists; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.parquet.column.page.PageReadStore; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.hadoop.ParquetFileReader; -import org.apache.parquet.hadoop.ParquetWriter; -import org.apache.parquet.hadoop.example.GroupWriteSupport; -import org.apache.parquet.hadoop.util.HadoopInputFile; -import org.apache.parquet.io.ColumnIOFactory; -import org.apache.parquet.io.MessageColumnIO; -import org.apache.parquet.io.RecordReader; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.Type; -import org.testng.annotations.Test; - -import java.io.BufferedOutputStream; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.sql.Timestamp; -import java.util.List; - -public class ParquetTest extends Fixtures { - private final TestDirectoryFixture testDirectoryFixture; - - public ParquetTest() { - testDirectoryFixture = fixture( new TestDirectoryFixture() ); - } - - @Test - public void testRW() throws IOException { - DictionaryRoot dictionaryRoot = DictionaryParser.parse( "/datamodel.conf", new DictionaryParser.IncrementalIdStrategy() ); - ParquetUtils schema = new ParquetUtils( dictionaryRoot.getValue( "TEST" ) ); - - long time = 1653579985423L; - System.out.println( "time = " + new Timestamp( time ) ); - - - Configuration conf = new Configuration(); - // https://issues.apache.org/jira/browse/HADOOP-19212 - conf.setBoolean( "fs.file.impl.disable.cache", true ); - MessageType messageType = ( MessageType ) schema.schema.named( "group" ); - GroupWriteSupport.setSchema( messageType, conf ); - - java.nio.file.Path file = testDirectoryFixture.testPath( "test.parquet" ); - - try( ParquetWriter writer = new ParquetWriteBuilder( new ParquetBufferedWriter( new BufferedOutputStream( new FileOutputStream( file.toFile() ) ) ) ) - .withConf( conf ) - .build() ) { - - for( long i = 0; i < 3; i++ ) { - ParquetSimpleGroup simpleGroup = new ParquetSimpleGroup( messageType ); - - simpleGroup.add( 0, time + i ); - simpleGroup.add( 1, "ID_SOURCE" ); - simpleGroup.add( 2, "ID_STRING_WITH_LENGTH" ); - simpleGroup.add( 3, i ); - - writer.write( simpleGroup ); - } - } - - try( ParquetFileReader reader = ParquetFileReader.open( HadoopInputFile.fromPath( new Path( file.toString() ), conf ) ) ) { - read( reader ); - } - - try( FileInputStream fis = new FileInputStream( file.toString() ); - ParquetFileReader reader = ParquetFileReader.open( new ParquetInputFile( fis ) ) ) { - - read( reader ); - } - - ParquetAssertion.assertParquet( file ) - .hasHeaders( "ID_DATETIME", "ID_SOURCE", "ID_STRING_WITH_LENGTH", "ID_LONG" ) - .containsExactly( - ParquetAssertion.row( 1653579985423L, "ID_SOURCE", "ID_STRING_WITH_LENGTH", 0L ), - ParquetAssertion.row( 1653579985424L, "ID_SOURCE", "ID_STRING_WITH_LENGTH", 1L ), - ParquetAssertion.row( 1653579985425L, "ID_SOURCE", "ID_STRING_WITH_LENGTH", 2L ) - ); - - ParquetAssertion.assertParquet( file, "ID_SOURCE", "ID_DATETIME", "ID_STRING_WITH_LENGTH" ) - .containsExactly( - ParquetAssertion.row( "ID_SOURCE", 1653579985423L, "ID_STRING_WITH_LENGTH" ), - ParquetAssertion.row( "ID_SOURCE", 1653579985424L, "ID_STRING_WITH_LENGTH" ), - ParquetAssertion.row( "ID_SOURCE", 1653579985425L, "ID_STRING_WITH_LENGTH" ) - ); - - } - - private void read( ParquetFileReader reader ) throws IOException { - MessageType messageType = reader.getFooter().getFileMetaData().getSchema(); - - List fieldNames = Lists.map( messageType.getFields(), Type::getName ); - System.out.println( fieldNames ); - - PageReadStore pages; - while( ( pages = reader.readNextRowGroup() ) != null ) { - long rows = pages.getRowCount(); - - MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO( messageType ); - RecordReader recordReader = columnIO.getRecordReader( pages, new ParquetGroupRecordConverter( messageType ) ); - - for( int i = 0; i < rows; i++ ) { - ParquetSimpleGroup simpleGroup = ( ParquetSimpleGroup ) recordReader.read(); - - for( int x = 0; x < fieldNames.size(); x++ ) { - System.out.print( " " + fieldNames.get( x ) + " = " ); - System.out.println( simpleGroup.getValueToString( x, 0 ) ); - } - } - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetUtilsTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetUtilsTest.java deleted file mode 100644 index e6616b491..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/formats/parquet/ParquetUtilsTest.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.joda.time.DateTime; -import org.testng.annotations.Test; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.joda.time.DateTimeZone.UTC; - -public class ParquetUtilsTest { - @Test - public void testToDateTime() { - assertThat( new DateTime( ParquetUtils.toTimestamp( "2021-01-01T01:00:00" ), UTC ) ) - .isEqualTo( new DateTime( 2021, 1, 1, 1, 0, 0, UTC ) ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/resources/datamodel.conf b/oap-formats/oap-logstream/oap-logstream-test/src/test/resources/datamodel.conf deleted file mode 100644 index 42b927e1a..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/resources/datamodel.conf +++ /dev/null @@ -1,69 +0,0 @@ -name = datamodel -values = [ - { - id = TEST - values = [ - { - id = ID_DATETIME - type = DATETIME - default = "1970-01-01 00:00:00" - } - { - id = ID_SOURCE - type = STRING - default = "" - } - { - id = ID_STRING_WITH_LENGTH - type = STRING - length = 23 - default = "" - } - { - id = ID_LONG - type = LONG - default = 0 - } - ] - } - { - id = PERF - values = [ - { - id = DATETIME - type = DATETIME - default = "1970-01-01 00:00:00" - } - { - id = L - type = LONG - default = 0 - } - { - id = L1 - type = LONG - default = 0 - } - { - id = BL - type = LONG - default = 0 - } - { - id = FI - type = INTEGER - default = 0 - } - { - id = V1 - type = STRING - default = "" - } - { - id = V2 - type = STRING - default = "" - } - ] - } -] diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/datamodel.conf b/oap-formats/oap-logstream/oap-logstream-test/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/datamodel.conf similarity index 100% rename from oap-formats/oap-logstream/oap-logstream-data/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/datamodel.conf rename to oap-formats/oap-logstream/oap-logstream-test/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/datamodel.conf diff --git a/oap-formats/oap-logstream/oap-logstream-data/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/event.json b/oap-formats/oap-logstream/oap-logstream-test/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/event.json similarity index 100% rename from oap-formats/oap-logstream/oap-logstream-data/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/event.json rename to oap-formats/oap-logstream/oap-logstream-test/src/test/resources/oap/logstream/data/dynamic/DynamicMapLoggerTest/event.json diff --git a/oap-formats/oap-logstream/oap-logstream/pom.xml b/oap-formats/oap-logstream/oap-logstream/pom.xml index a2d87b8b3..e4455ab20 100644 --- a/oap-formats/oap-logstream/oap-logstream/pom.xml +++ b/oap-formats/oap-logstream/oap-logstream/pom.xml @@ -40,98 +40,6 @@ ${project.version} - - org.apache.parquet - parquet-hadoop - ${oap.deps.parquet.version} - - - org.apache.hadoop - hadoop-aws - ${oap.deps.hadoop.version} - - - software.amazon.awssdk - bundle - - - - - org.apache.hadoop - hadoop-client - ${oap.deps.hadoop.version} - - - com.fasterxml.jackson.core - jackson-databind - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - org.slf4j - slf4j-reload4j - - - org.eclipse.jetty - jetty-security - - - org.eclipse.jetty - jetty-client - - - org.eclipse.jetty - jetty-servlet - - - org.eclipse.jetty - jetty-util-ajax - - - org.eclipse.jetty - jetty-webapp - - - org.eclipse.jetty - jetty-xml - - - org.eclipse.jetty - jetty-http - - - org.eclipse.jetty - jetty-io - - - org.eclipse.jetty - jetty-util - - - org.eclipse.jetty.websocket - websocket-client - - - org.eclipse.jetty.websocket - websocket-common - - - org.eclipse.jetty.websocket - websocket-api - - - com.google.code.gson - gson - - - - org.apache.velocity velocity-engine-core diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/ParquetLogWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/ParquetLogWriter.java deleted file mode 100644 index 90ab24e3c..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/ParquetLogWriter.java +++ /dev/null @@ -1,317 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.disk; - -import lombok.extern.slf4j.Slf4j; -import oap.logstream.InvalidProtocolVersionException; -import oap.logstream.LogId; -import oap.logstream.LogIdTemplate; -import oap.logstream.LogStreamProtocol.ProtocolVersion; -import oap.logstream.LoggerException; -import oap.logstream.Timestamp; -import oap.logstream.formats.parquet.ParquetSimpleGroup; -import oap.logstream.formats.parquet.ParquetWriteBuilder; -import oap.logstream.formats.rowbinary.RowBinaryInputStream; -import oap.template.BinaryInputStream; -import oap.template.BinaryUtils; -import oap.util.Lists; -import org.apache.commons.io.FilenameUtils; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.Preconditions; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.hadoop.example.GroupWriteSupport; -import org.apache.parquet.hadoop.util.HadoopOutputFile; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.Type; -import org.apache.parquet.schema.Types; -import org.joda.time.DateTime; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.function.Function; - -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; - -@Slf4j -public class ParquetLogWriter extends AbstractWriter> { - private static final HashMap>, Types.Builder>> types = new HashMap<>(); - - static { - types.put( oap.template.Types.BOOLEAN.id, _ -> org.apache.parquet.schema.Types.required( BOOLEAN ) ); - types.put( oap.template.Types.BYTE.id, _ -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.intType( 8, true ) ) ); - types.put( oap.template.Types.SHORT.id, _ -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.intType( 16, true ) ) ); - types.put( oap.template.Types.INTEGER.id, _ -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.intType( 32, true ) ) ); - types.put( oap.template.Types.LONG.id, _ -> org.apache.parquet.schema.Types.required( INT64 ).as( LogicalTypeAnnotation.intType( 64, true ) ) ); - types.put( oap.template.Types.FLOAT.id, _ -> org.apache.parquet.schema.Types.required( FLOAT ) ); - types.put( oap.template.Types.DOUBLE.id, _ -> org.apache.parquet.schema.Types.required( DOUBLE ) ); - types.put( oap.template.Types.RAW.id, _ -> org.apache.parquet.schema.Types.required( BINARY ).as( LogicalTypeAnnotation.stringType() ) ); - types.put( oap.template.Types.STRING.id, _ -> org.apache.parquet.schema.Types.required( BINARY ).as( LogicalTypeAnnotation.stringType() ) ); - types.put( oap.template.Types.DATE.id, _ -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.dateType() ) ); - types.put( oap.template.Types.DATETIME.id, _ -> org.apache.parquet.schema.Types.required( INT64 ) ); -// types.put( Types.DADATETIME64.id, _ -> org.apache.parquet.schema.Types.required( INT64 ).as( LogicalTypeAnnotation.timestampType( true, MILLIS ) ) ); - types.put( oap.template.Types.LIST.id, children -> org.apache.parquet.schema.Types.requiredList().element( ( Type ) children.get( 0 ).named( "element" ) ) ); -// types.put( Types.ENUM.id, _ -> org.apache.parquet.schema.Types.required( BINARY ).as( LogicalTypeAnnotation.stringType() ) ); - } - - private final MessageType messageType; - private final WriterConfiguration.ParquetConfiguration configuration; - private final LinkedHashSet excludeFields = new LinkedHashSet<>(); - - public ParquetLogWriter( Path logDirectory, String filePattern, LogId logId, WriterConfiguration.ParquetConfiguration configuration, - int bufferSize, Timestamp timestamp, int maxVersions ) - throws IllegalArgumentException { - super( LogFormat.PARQUET, logDirectory, filePattern, logId, bufferSize, timestamp, maxVersions ); - this.configuration = configuration; - - - configuration.excludeFieldsIfPropertiesExists.forEach( ( field, property ) -> { - if( logId.properties.containsKey( property ) ) { - excludeFields.add( field ); - } - } ); - - log.debug( "exclude fields {}", excludeFields ); - - Types.MessageTypeBuilder messageTypeBuilder = Types.buildMessage(); - - for( var i = 0; i < logId.headers.length; i++ ) { - var header = logId.headers[i]; - var type = logId.types[i]; - - if( excludeFields.contains( header ) ) { - continue; - } - - Types.Builder fieldType = null; - for( var idx = type.length - 1; idx >= 0; idx-- ) { - Function>, Types.Builder> builderFunction = types.get( type[idx] ); - Preconditions.checkArgument( builderFunction != null, "" ); - fieldType = builderFunction.apply( fieldType != null ? List.of( fieldType ) : List.of() ); - } - - com.google.common.base.Preconditions.checkNotNull( fieldType ); - messageTypeBuilder.addField( ( Type ) fieldType.named( header ) ); - } - - log.debug( "writer path '{}' logType '{}' headers {} filePrefixPattern '{}' properties {} configuration '{}' bufferSize '{}'", - currentPattern(), logId.logType, Arrays.asList( logId.headers ), logId.filePrefixPattern, - logId.properties, configuration, bufferSize - ); - - messageType = messageTypeBuilder.named( "logger" ); - } - - private static void addValue( int col, Object obj, byte[] colType, int typeIdx, Group group ) { - var type = colType[typeIdx]; - if( type == oap.template.Types.BOOLEAN.id ) { - group.add( col, ( boolean ) obj ); - } else if( type == oap.template.Types.BYTE.id ) { - group.add( col, ( byte ) obj ); - } else if( type == oap.template.Types.SHORT.id ) { - group.add( col, ( short ) obj ); - } else if( type == oap.template.Types.INTEGER.id ) { - group.add( col, ( int ) obj ); - } else if( type == oap.template.Types.LONG.id ) { - group.add( col, ( long ) obj ); - } else if( type == oap.template.Types.FLOAT.id ) { - group.add( col, ( float ) obj ); - } else if( type == oap.template.Types.DOUBLE.id ) { - group.add( col, ( double ) obj ); - } else if( type == oap.template.Types.STRING.id ) { - group.add( col, ( String ) obj ); - } else if( type == oap.template.Types.DATETIME.id ) { - group.add( col, ( ( DateTime ) obj ).getMillis() / 1000 ); - } else if( type == oap.template.Types.LIST.id ) { - var listGroup = group.addGroup( col ); - for( var item : ( List ) obj ) { - addValue( 0, item, colType, typeIdx + 1, listGroup.addGroup( "list" ) ); - } - } else { - throw new IllegalStateException( "Unknown type:" + type ); - } - } - - @Override - public String write( ProtocolVersion protocolVersion, byte[] buffer, int offset, int length ) throws LoggerException { - if( protocolVersion.version < ProtocolVersion.BINARY_V2.version ) { - throw new InvalidProtocolVersionException( "parquet", protocolVersion.version ); - } - - if( closed ) { - throw new LoggerException( "writer is already closed!" ); - } - lock.lock(); - try { - refresh(); - Path filename = filename(); - if( out == null ) - if( !java.nio.file.Files.exists( filename ) ) { - log.info( "[{}] open new file v{}", filename, fileVersion ); - outFilename = filename; - - Configuration conf = new Configuration(); - GroupWriteSupport.setSchema( messageType, conf ); - // https://issues.apache.org/jira/browse/HADOOP-19212 - conf.setBoolean( "fs.file.impl.disable.cache", true ); - - out = new ParquetWriteBuilder( HadoopOutputFile.fromPath( new org.apache.hadoop.fs.Path( filename.toUri() ), conf ) ) - .withConf( conf ) - .withCompressionCodec( configuration.compressionCodecName ) - .build(); - - LogIdTemplate logIdTemplate = new LogIdTemplate( logId ); - new LogMetadata( logId ).withProperty( "VERSION", logIdTemplate.getHashWithVersion( fileVersion ) ).writeFor( filename ); - } else { - log.info( "[{}] file exists v{}", filename, fileVersion ); - fileVersion += 1; - if( fileVersion > maxVersions ) throw new IllegalStateException( "version > " + maxVersions ); - return write( protocolVersion, buffer, offset, length ); - } - log.trace( "writing {} bytes to {}", length, this ); - if( protocolVersion == ProtocolVersion.BINARY_V2 ) { - convertToParquetV2( buffer, offset, length, logId.types, logId.headers ); - } else if( protocolVersion == ProtocolVersion.ROW_BINARY_V3 ) { - convertToParquetV3( buffer, offset, length, logId.types, logId.headers ); - } else { - throw new IllegalArgumentException( "Unknown protocol version: " + protocolVersion ); - } - - return filename.toString(); - } catch( IOException e ) { - log.error( e.getMessage(), e ); - try { - closeOutput(); - } finally { - outFilename = null; - out = null; - } - throw new LoggerException( e ); - } finally { - lock.unlock(); - } - } - - private void convertToParquetV2( byte[] buffer, int offset, int length, byte[][] types, String[] headers ) throws IOException { - BinaryInputStream bis = new BinaryInputStream( new ByteArrayInputStream( buffer, offset, length ) ); - int col = 0; - ParquetSimpleGroup group = new ParquetSimpleGroup( messageType ); - Object obj = bis.readObject(); - while( obj != null ) { - int parquetCol = 0; - while( obj != null && obj != BinaryInputStream.EOL ) { - byte[] colType = types[col]; - String header = headers[col]; - if( !excludeFields.contains( header ) ) { - try { - addValue( parquetCol, obj, colType, 0, group ); - } catch( Exception e ) { - log.error( "header {} class {} type {} col {}", header, obj.getClass().getName(), - Lists.map( List.of( ArrayUtils.toObject( types[col] ) ), oap.template.Types::valueOf ), - parquetCol ); - - var data = BinaryUtils.read( buffer, offset, length ); - log.error( "object data {}", data ); - - throw e; - } - parquetCol++; - } - obj = bis.readObject(); - col++; - } - out.write( group ); - col = 0; - group = new ParquetSimpleGroup( messageType ); - obj = bis.readObject(); - } - } - - private void convertToParquetV3( byte[] buffer, int offset, int length, byte[][] types, String[] headers ) throws IOException { - RowBinaryInputStream bis = new RowBinaryInputStream( new ByteArrayInputStream( buffer, offset, length ), headers, types ); - ParquetSimpleGroup group = new ParquetSimpleGroup( messageType ); - - List row = bis.readRow(); - while( row != null ) { - int col = 0; - for( int i = 0; i < row.size(); i++ ) { - Object obj = row.get( i ); - byte[] colType = types[i]; - String header = headers[i]; - if( !excludeFields.contains( header ) ) { - try { - addValue( col, obj, colType, 0, group ); - } catch( Exception e ) { - log.error( "header {} class {} type {} col {}", header, obj.getClass().getName(), - Lists.map( List.of( ArrayUtils.toObject( types[i] ) ), oap.template.Types::valueOf ), - col ); - - throw e; - } - col++; - } - } - out.write( group ); - group = new ParquetSimpleGroup( messageType ); - - row = bis.readRow(); - } - } - - @Override - protected void closeOutput() throws LoggerException { - Path parquetFile = outFilename; - - try { - super.closeOutput(); - } finally { - if( parquetFile != null ) { - var name = FilenameUtils.getName( parquetFile.toString() ); - var parent = FilenameUtils.getFullPathNoEndSeparator( parquetFile.toString() ); - java.nio.file.Path crcPath = Paths.get( parent + "/." + name + ".crc" ); - - if( Files.exists( crcPath ) ) - try { - Files.delete( crcPath ); - } catch( IOException e ) { - log.error( e.getMessage(), e ); - } - } - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/TsvWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/TsvWriter.java deleted file mode 100644 index d6eb2fd00..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/TsvWriter.java +++ /dev/null @@ -1,242 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.disk; - -import com.google.common.io.CountingOutputStream; -import lombok.extern.slf4j.Slf4j; -import oap.io.IoStreams; -import oap.logstream.LogId; -import oap.logstream.LogIdTemplate; -import oap.logstream.LogStreamProtocol.ProtocolVersion; -import oap.logstream.LoggerException; -import oap.logstream.Timestamp; -import oap.logstream.formats.rowbinary.RowBinaryInputStream; -import oap.template.BinaryInputStream; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.file.Path; -import java.util.List; - -import static java.nio.charset.StandardCharsets.UTF_8; - -@Slf4j -public class TsvWriter extends AbstractWriter { - private final WriterConfiguration.TsvConfiguration configuration; - - public TsvWriter( Path logDirectory, String filePattern, LogId logId, - WriterConfiguration.TsvConfiguration configuration, - int bufferSize, Timestamp timestamp, - int maxVersions ) { - super( LogFormat.TSV_GZ, logDirectory, filePattern, logId, bufferSize, timestamp, maxVersions ); - - this.configuration = configuration; - } - - @Override - public String write( ProtocolVersion protocolVersion, byte[] buffer, int offset, int length ) throws LoggerException { - lock.lock(); - try { - if( closed ) { - throw new LoggerException( "writer is already closed!" ); - } - - return switch( protocolVersion ) { - case TSV_V1 -> writeTsvV1( protocolVersion, buffer, offset, length ); - case BINARY_V2 -> writeBinaryV2( protocolVersion, buffer, offset, length ); - case ROW_BINARY_V3 -> writeBinaryV3( protocolVersion, buffer, offset, length ); - }; - } finally { - lock.unlock(); - } - } - - private String writeTsvV1( ProtocolVersion protocolVersion, byte[] buffer, int offset, int length ) { - try { - refresh(); - Path filename = filename(); - if( out == null ) - if( !java.nio.file.Files.exists( filename ) ) { - log.debug( "[{}] open new file v{}", filename, fileVersion ); - outFilename = filename; - out = new CountingOutputStream( IoStreams.out( filename, IoStreams.Encoding.from( filename ), bufferSize ) ); - LogIdTemplate logIdTemplate = new LogIdTemplate( logId ); - new LogMetadata( logId ).withProperty( "VERSION", logIdTemplate.getHashWithVersion( fileVersion ) ).writeFor( filename ); - - out.write( logId.headers[0].getBytes( UTF_8 ) ); - out.write( '\n' ); - log.trace( "[{}] write headers {}", filename, logId.headers ); - } else { - log.debug( "[{}] file exists v{}", filename, fileVersion ); - fileVersion += 1; - if( fileVersion > maxVersions ) throw new IllegalStateException( "version > " + maxVersions ); - return write( protocolVersion, buffer, offset, length ); - } - log.trace( "writing {} bytes to {}", length, this ); - - out.write( buffer, offset, length ); - - return filename.toString(); - - } catch( IOException e ) { - log.error( e.getMessage(), e ); - try { - closeOutput(); - } finally { - outFilename = null; - out = null; - } - throw new LoggerException( e ); - } - - } - - private String writeBinaryV2( ProtocolVersion protocolVersion, byte[] buffer, int offset, int length ) { - try { - refresh(); - Path filename = filename(); - if( out == null ) - if( !java.nio.file.Files.exists( filename ) ) { - log.debug( "[{}] open new file v{}", filename, fileVersion ); - outFilename = filename; - out = new CountingOutputStream( IoStreams.out( filename, IoStreams.Encoding.from( filename ), bufferSize ) ); - LogIdTemplate logIdTemplate = new LogIdTemplate( logId ); - new LogMetadata( logId ).withProperty( "VERSION", logIdTemplate.getHashWithVersion( fileVersion ) ).writeFor( filename ); - - out.write( String.join( "\t", logId.headers ).getBytes( UTF_8 ) ); - out.write( '\n' ); - log.trace( "[{}] write headers {}", filename, logId.headers ); - } else { - log.debug( "[{}] file exists v{}", filename, fileVersion ); - fileVersion += 1; - if( fileVersion > maxVersions ) throw new IllegalStateException( "version > " + maxVersions ); - return write( protocolVersion, buffer, offset, length ); - } - log.trace( "writing {} bytes to {}", length, this ); - - convertToTsvV2( buffer, offset, length, line -> out.write( line ) ); - - return filename.toString(); - } catch( IOException e ) { - log.error( e.getMessage(), e ); - try { - closeOutput(); - } finally { - outFilename = null; - out = null; - } - throw new LoggerException( e ); - } - } - - private String writeBinaryV3( ProtocolVersion protocolVersion, byte[] buffer, int offset, int length ) { - try { - refresh(); - Path filename = filename(); - if( out == null ) - if( !java.nio.file.Files.exists( filename ) ) { - log.debug( "[{}] open new file v{}", filename, fileVersion ); - outFilename = filename; - out = new CountingOutputStream( IoStreams.out( filename, IoStreams.Encoding.from( filename ), bufferSize ) ); - LogIdTemplate logIdTemplate = new LogIdTemplate( logId ); - new LogMetadata( logId ).withProperty( "VERSION", logIdTemplate.getHashWithVersion( fileVersion ) ).writeFor( filename ); - - out.write( String.join( "\t", logId.headers ).getBytes( UTF_8 ) ); - out.write( '\n' ); - log.trace( "[{}] write headers {}", filename, logId.headers ); - } else { - log.debug( "[{}] file exists v{}", filename, fileVersion ); - fileVersion += 1; - if( fileVersion > maxVersions ) throw new IllegalStateException( "version > " + maxVersions ); - return write( protocolVersion, buffer, offset, length ); - } - log.trace( "writing {} bytes to {}", length, this ); - - convertToTsvV3( buffer, offset, length, line -> out.write( line ), logId.headers, logId.types ); - - return filename.toString(); - } catch( IOException e ) { - log.error( e.getMessage(), e ); - try { - closeOutput(); - } finally { - outFilename = null; - out = null; - } - throw new LoggerException( e ); - } - } - - private void convertToTsvV2( byte[] buffer, int offset, int length, IOExceptionConsumer cons ) throws IOException { - BinaryInputStream bis = new BinaryInputStream( new ByteArrayInputStream( buffer, offset, length ) ); - - StringBuilder sb = new StringBuilder(); - TemplateAccumulatorTsv ta = new TemplateAccumulatorTsv( sb, configuration.dateTime32Format ); - Object obj = bis.readObject(); - while( obj != null ) { - boolean first = true; - while( obj != null && obj != BinaryInputStream.EOL ) { - if( !first ) { - sb.append( '\t' ); - } else { - first = false; - } - ta.accept( obj ); - obj = bis.readObject(); - } - cons.accept( ta.addEol( obj == BinaryInputStream.EOL ).getBytes() ); - sb.setLength( 0 ); - obj = bis.readObject(); - } - } - - private void convertToTsvV3( byte[] buffer, int offset, int length, IOExceptionConsumer cons, String[] headers, byte[][] types ) throws IOException { - RowBinaryInputStream bis = new RowBinaryInputStream( new ByteArrayInputStream( buffer, offset, length ), headers, types ); - - StringBuilder sb = new StringBuilder(); - TemplateAccumulatorTsv ta = new TemplateAccumulatorTsv( sb, configuration.dateTime32Format ); - - List row = bis.readRow(); - while( row != null ) { - boolean first = true; - for( Object item : row ) { - if( !first ) { - sb.append( '\t' ); - } else { - first = false; - } - ta.accept( item ); - } - cons.accept( ta.addEol( true ).getBytes() ); - sb.setLength( 0 ); - row = bis.readRow(); - } - } - - @FunctionalInterface - public interface IOExceptionConsumer { - void accept( T t ) throws IOException; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/WriterConfiguration.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/WriterConfiguration.java index 8ce8c8d39..8b9ddc955 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/WriterConfiguration.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/WriterConfiguration.java @@ -26,12 +26,11 @@ import lombok.ToString; import oap.util.Dates; -import org.apache.parquet.hadoop.metadata.CompressionCodecName; - -import java.util.LinkedHashMap; @ToString public class WriterConfiguration { + public final TsvConfiguration tsv = new TsvConfiguration(); + @ToString public static class TsvConfiguration { public final String dateTime32Format; @@ -44,21 +43,4 @@ public TsvConfiguration( String dateTime32Format ) { this.dateTime32Format = dateTime32Format; } } - - @ToString - public static class ParquetConfiguration { - public final CompressionCodecName compressionCodecName; - public final LinkedHashMap excludeFieldsIfPropertiesExists = new LinkedHashMap<>(); - - public ParquetConfiguration() { - this( CompressionCodecName.ZSTD ); - } - - public ParquetConfiguration( CompressionCodecName compressionCodecName ) { - this.compressionCodecName = compressionCodecName; - } - } - - public final TsvConfiguration tsv = new TsvConfiguration(); - public final ParquetConfiguration parquet = new ParquetConfiguration(); } diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/MemoryInputStreamWrapper.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/MemoryInputStreamWrapper.java deleted file mode 100644 index ba047f455..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/MemoryInputStreamWrapper.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats; - -import it.unimi.dsi.fastutil.io.FastByteArrayInputStream; -import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.fs.PositionedReadable; -import org.apache.hadoop.fs.Seekable; - -import java.io.IOException; -import java.io.InputStream; - -public class MemoryInputStreamWrapper extends FastByteArrayInputStream implements Seekable, PositionedReadable { - protected MemoryInputStreamWrapper( InputStream is, int size ) throws IOException { - super( new byte[size] ); - - IOUtils.readFully( is, array ); - } - - protected MemoryInputStreamWrapper( byte[] array, int offset, int length ) { - super( array, offset, length ); - } - - public static MemoryInputStreamWrapper wrap( InputStream is, int size ) throws IOException { - return new MemoryInputStreamWrapper( is, size ); - } - - public static MemoryInputStreamWrapper wrap( InputStream is ) throws IOException { - var out = new FastByteArrayOutputStream(); - - IOUtils.copy( is, out ); - - return new MemoryInputStreamWrapper( out.array, 0, out.length ); - } - - @Override - public int read( long position, byte[] buffer, int offset, int length ) { - int availableLength = Math.min( length, length - ( int ) position ); - - System.arraycopy( array, ( int ) position, buffer, offset, availableLength ); - - return availableLength; - } - - @Override - public void readFully( long position, byte[] buffer, int offset, int length ) { - System.arraycopy( array, ( int ) position, buffer, offset, length ); - } - - @Override - public void readFully( long position, byte[] buffer ) { - System.arraycopy( array, ( int ) position, buffer, 0, buffer.length ); - } - - @Override - public void seek( long pos ) { - position( pos ); - } - - @Override - public long getPos() { - return position(); - } - - @Override - public boolean seekToNewSource( long targetPos ) { - return false; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/LocalInputFile.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/LocalInputFile.java deleted file mode 100644 index ba7df34ad..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/LocalInputFile.java +++ /dev/null @@ -1,178 +0,0 @@ -package oap.logstream.formats.parquet; - -import org.apache.parquet.io.InputFile; -import org.apache.parquet.io.SeekableInputStream; - -import java.io.EOFException; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.file.Path; - -/** - * from - */ -public class LocalInputFile implements InputFile { - private static final int COPY_BUFFER_SIZE = 8192; - - private final RandomAccessFile raf; - - public LocalInputFile( Path path ) throws FileNotFoundException { - this( path.toFile() ); - } - - public LocalInputFile( File file ) throws FileNotFoundException { - raf = new RandomAccessFile( file, "r" ); - } - - private static int readDirectBuffer( ByteBuffer byteBuffer, byte[] tmpBuf, ByteBufReader rdr ) - throws IOException { - // copy all the bytes that return immediately, stopping at the first - // read that doesn't return a full buffer. - int nextReadLength = Math.min( byteBuffer.remaining(), tmpBuf.length ); - int totalBytesRead = 0; - int bytesRead; - - while( ( bytesRead = rdr.read( tmpBuf, 0, nextReadLength ) ) == tmpBuf.length ) { - byteBuffer.put( tmpBuf ); - totalBytesRead += bytesRead; - nextReadLength = Math.min( byteBuffer.remaining(), tmpBuf.length ); - } - - if( bytesRead < 0 ) { - // return -1 if nothing was read - return totalBytesRead == 0 ? -1 : totalBytesRead; - } else { - // copy the last partial buffer - byteBuffer.put( tmpBuf, 0, bytesRead ); - totalBytesRead += bytesRead; - return totalBytesRead; - } - } - - private static void readFullyDirectBuffer( ByteBuffer byteBuffer, byte[] tmpBuf, ByteBufReader rdr ) - throws IOException { - int nextReadLength = Math.min( byteBuffer.remaining(), tmpBuf.length ); - int bytesRead = 0; - - while( nextReadLength > 0 && ( bytesRead = rdr.read( tmpBuf, 0, nextReadLength ) ) >= 0 ) { - byteBuffer.put( tmpBuf, 0, bytesRead ); - nextReadLength = Math.min( byteBuffer.remaining(), tmpBuf.length ); - } - - if( bytesRead < 0 && byteBuffer.remaining() > 0 ) { - throw new EOFException( - "Reached the end of stream with " + byteBuffer.remaining() + " bytes left to read" ); - } - } - - @Override - public long getLength() throws IOException { - return raf.length(); - } - - @Override - public SeekableInputStream newStream() { - return new SeekableInputStream() { - private final byte[] tmpBuf = new byte[COPY_BUFFER_SIZE]; - private long markPos = 0; - - @Override - public long getPos() throws IOException { - return raf.getFilePointer(); - } - - @Override - public void seek( long l ) throws IOException { - raf.seek( l ); - } - - @Override - public void readFully( byte[] bytes ) throws IOException { - raf.readFully( bytes ); - } - - @Override - public void readFully( byte[] bytes, int i, int i1 ) throws IOException { - raf.readFully( bytes, i, i1 ); - } - - @Override - public void readFully( ByteBuffer byteBuffer ) throws IOException { - readFullyDirectBuffer( byteBuffer, tmpBuf, raf::read ); - } - - @Override - public int read( ByteBuffer byteBuffer ) throws IOException { - return readDirectBuffer( byteBuffer, tmpBuf, raf::read ); - } - - @Override - public int read() throws IOException { - return raf.read(); - } - - @Override - public int read( byte[] b ) throws IOException { - return raf.read( b ); - } - - @Override - public int read( byte[] b, int off, int len ) throws IOException { - return raf.read( b, off, len ); - } - - @SuppressWarnings( "checkstyle:ParameterAssignment" ) - @Override - public long skip( long n ) throws IOException { - final long savPos = raf.getFilePointer(); - final long amtLeft = raf.length() - savPos; - n = Math.min( n, amtLeft ); - final long newPos = savPos + n; - raf.seek( newPos ); - final long curPos = raf.getFilePointer(); - return curPos - savPos; - } - - @Override - public int available() { - return 0; - } - - @Override - public void close() throws IOException { - raf.close(); - } - - @SuppressWarnings( { "unchecked", "unused", "UnusedReturnValue" } ) - private R uncheckedExceptionThrow( Throwable t ) throws T { - throw ( T ) t; - } - - @Override - public synchronized void mark( int readlimit ) { - try { - markPos = raf.getFilePointer(); - } catch( IOException e ) { - uncheckedExceptionThrow( e ); - } - } - - @Override - public synchronized void reset() throws IOException { - raf.seek( markPos ); - } - - @Override - public boolean markSupported() { - return true; - } - }; - } - - private interface ByteBufReader { - int read( byte[] b, int off, int len ) throws IOException; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetBufferedWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetBufferedWriter.java deleted file mode 100644 index bcc0e3618..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetBufferedWriter.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.parquet.io.OutputFile; -import org.apache.parquet.io.PositionOutputStream; - -import javax.annotation.Nonnull; -import java.io.BufferedOutputStream; -import java.io.IOException; - -public class ParquetBufferedWriter implements OutputFile { - private final BufferedOutputStream out; - - public ParquetBufferedWriter( BufferedOutputStream out ) { - this.out = out; - } - - @Override - public PositionOutputStream create( long blockSizeHint ) { - return createPositionOutputstream(); - } - - private PositionOutputStream createPositionOutputstream() { - return new PositionOutputStream() { - - int pos = 0; - - @Override - public long getPos() { - return pos; - } - - @Override - public void flush() throws IOException { - out.flush(); - } - - @Override - public void close() throws IOException { - out.close(); - } - - @Override - public void write( int b ) throws IOException { - out.write( b ); - pos++; - } - - @Override - public void write( @Nonnull byte[] b, int off, int len ) throws IOException { - out.write( b, off, len ); - pos += len; - } - }; - } - - @Override - public PositionOutputStream createOrOverwrite( long blockSizeHint ) { - return createPositionOutputstream(); - } - - @Override - public boolean supportsBlockSize() { - return false; - } - - @Override - public long defaultBlockSize() { - return 0; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetGroupRecordConverter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetGroupRecordConverter.java deleted file mode 100644 index 7b6a28ba1..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetGroupRecordConverter.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.parquet.example.data.Group; -import org.apache.parquet.io.api.GroupConverter; -import org.apache.parquet.io.api.RecordMaterializer; -import org.apache.parquet.schema.MessageType; - -public class ParquetGroupRecordConverter extends RecordMaterializer { - private final ParquetSimpleGroupFactory simpleGroupFactory; - - private ParquetSimpleGroupConverter root; - - public ParquetGroupRecordConverter( MessageType schema ) { - this.simpleGroupFactory = new ParquetSimpleGroupFactory( schema ); - this.root = new ParquetSimpleGroupConverter( null, 0, schema ) { - @Override - public void start() { - this.current = simpleGroupFactory.newGroup(); - } - - @Override - public void end() { - } - }; - } - - @Override - public Group getCurrentRecord() { - return root.getCurrentRecord(); - } - - @Override - public GroupConverter getRootConverter() { - return root; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetInputFile.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetInputFile.java deleted file mode 100644 index 44e7eecfb..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetInputFile.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import oap.logstream.formats.MemoryInputStreamWrapper; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.parquet.hadoop.util.HadoopStreams; -import org.apache.parquet.io.InputFile; -import org.apache.parquet.io.SeekableInputStream; - -import java.io.IOException; -import java.io.InputStream; - -public class ParquetInputFile implements InputFile { - private final SeekableInputStream wrap; - private final MemoryInputStreamWrapper mw; - - public ParquetInputFile( InputStream is ) throws IOException { - mw = MemoryInputStreamWrapper.wrap( is ); - FSDataInputStream fsdis = new FSDataInputStream( mw ); - wrap = HadoopStreams.wrap( fsdis ); - } - - @Override - public long getLength() throws IOException { - return mw.length(); - } - - @Override - public SeekableInputStream newStream() throws IOException { - return wrap; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetNumberValue.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetNumberValue.java deleted file mode 100644 index 75cb3f96e..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetNumberValue.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.parquet.example.data.simple.Primitive; -import org.apache.parquet.io.api.RecordConsumer; - -public class ParquetNumberValue extends Primitive { - private final Number value; - - public ParquetNumberValue( int value ) { - this.value = value; - } - - public ParquetNumberValue( boolean value ) { - this.value = value ? 1 : 0; - } - - public ParquetNumberValue( Number value ) { - this.value = value; - } - - @Override - public String toString() { - return String.valueOf( value ); - } - - @Override - public int getInteger() { - return value.intValue(); - } - - @Override - public long getLong() { - return value.longValue(); - } - - @Override - public boolean getBoolean() { - return value.intValue() == 1; - } - - @Override - public float getFloat() { - return value.floatValue(); - } - - @Override - public double getDouble() { - return value.doubleValue(); - } - - @Override - public void writeValue( RecordConsumer recordConsumer ) { - if( value instanceof Integer integerValue ) recordConsumer.addInteger( integerValue ); - else if( value instanceof Long longValue ) recordConsumer.addLong( longValue ); - else if( value instanceof Float floatValue ) recordConsumer.addFloat( floatValue ); - else recordConsumer.addDouble( value.doubleValue() ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroup.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroup.java deleted file mode 100644 index b0632cb64..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroup.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import lombok.extern.slf4j.Slf4j; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.example.data.simple.BinaryValue; -import org.apache.parquet.example.data.simple.BooleanValue; -import org.apache.parquet.example.data.simple.Int96Value; -import org.apache.parquet.example.data.simple.NanoTime; -import org.apache.parquet.example.data.simple.Primitive; -import org.apache.parquet.io.api.Binary; -import org.apache.parquet.io.api.RecordConsumer; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.Type; - -import java.util.ArrayList; -import java.util.List; - -@Slf4j -public class ParquetSimpleGroup extends Group { - - private final GroupType schema; - private final List[] data; - - @SuppressWarnings( "unchecked" ) - public ParquetSimpleGroup( GroupType schema ) { - this.schema = schema; - this.data = new List[schema.getFields().size()]; - for( int i = 0; i < schema.getFieldCount(); i++ ) { - this.data[i] = new ArrayList<>(); - } - } - - @Override - public String toString() { - return toString( "" ); - } - - private StringBuilder appendToString( StringBuilder builder, String indent ) { - int i = 0; - for( Type field : schema.getFields() ) { - String name = field.getName(); - List values = data[i]; - ++i; - if( values != null && !values.isEmpty() ) { - for( Object value : values ) { - builder.append( indent ).append( name ); - if( value == null ) { - builder.append( ": NULL\n" ); - } else if( value instanceof Group ) { - builder.append( '\n' ); - ( ( ParquetSimpleGroup ) value ).appendToString( builder, indent + " " ); - } else { - builder.append( ": " ).append( value ).append( '\n' ); - } - } - } - } - return builder; - } - - @SuppressWarnings( "checkstyle:OverloadMethodsDeclarationOrder" ) - public String toString( String indent ) { - StringBuilder builder = new StringBuilder(); - appendToString( builder, indent ); - return builder.toString(); - } - - @Override - public Group addGroup( int fieldIndex ) { - ParquetSimpleGroup g = new ParquetSimpleGroup( schema.getType( fieldIndex ).asGroupType() ); - add( fieldIndex, g ); - return g; - } - - @Override - public Group getGroup( int fieldIndex, int index ) { - return ( Group ) getValue( fieldIndex, index ); - } - - public Object getValue( int fieldIndex, int index ) { - List list; - try { - list = data[fieldIndex]; - } catch( IndexOutOfBoundsException e ) { - throw new RuntimeException( "not found " + fieldIndex + "(" + schema.getFieldName( fieldIndex ) + ") in group:\n" + this ); - } - try { - return list.get( index ); - } catch( IndexOutOfBoundsException e ) { - throw new RuntimeException( "not found " + fieldIndex + "(" + schema.getFieldName( fieldIndex ) + ") element number " + index + " in group:\n" + this ); - } - } - - private void add( int fieldIndex, Primitive value ) { - Type type = schema.getType( fieldIndex ); - List list = data[fieldIndex]; - if( !type.isRepetition( Type.Repetition.REPEATED ) - && !list.isEmpty() ) { - throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list ); - } - list.add( value ); - } - - @Override - public int getFieldRepetitionCount( int fieldIndex ) { - List list = data[fieldIndex]; - return list == null ? 0 : list.size(); - } - - @Override - public String getValueToString( int fieldIndex, int index ) { - return String.valueOf( getValue( fieldIndex, index ) ); - } - - @Override - public String getString( int fieldIndex, int index ) { - return ( ( BinaryValue ) getValue( fieldIndex, index ) ).getString(); - } - - @Override - public int getInteger( int fieldIndex, int index ) { - return ( ( ParquetNumberValue ) getValue( fieldIndex, index ) ).getInteger(); - } - - @Override - public long getLong( int fieldIndex, int index ) { - return ( ( ParquetNumberValue ) getValue( fieldIndex, index ) ).getLong(); - } - - @Override - public double getDouble( int fieldIndex, int index ) { - return ( ( ParquetNumberValue ) getValue( fieldIndex, index ) ).getDouble(); - } - - @Override - public float getFloat( int fieldIndex, int index ) { - return ( ( ParquetNumberValue ) getValue( fieldIndex, index ) ).getFloat(); - } - - @Override - public boolean getBoolean( int fieldIndex, int index ) { - Object value = getValue( fieldIndex, index ); - if( value instanceof BooleanValue booleanValue ) return booleanValue.getBoolean(); - return ( ( ParquetNumberValue ) value ).getBoolean(); - } - - @Override - public Binary getBinary( int fieldIndex, int index ) { - return ( ( BinaryValue ) getValue( fieldIndex, index ) ).getBinary(); - } - - public NanoTime getTimeNanos( int fieldIndex, int index ) { - return NanoTime.fromInt96( ( Int96Value ) getValue( fieldIndex, index ) ); - } - - @Override - public Binary getInt96( int fieldIndex, int index ) { - return ( ( Int96Value ) getValue( fieldIndex, index ) ).getInt96(); - } - - @SuppressWarnings( "checkstyle:OverloadMethodsDeclarationOrder" ) - @Override - public void add( int fieldIndex, int value ) { - add( fieldIndex, new ParquetNumberValue( value ) ); - } - - @Override - public void add( int fieldIndex, long value ) { - add( fieldIndex, new ParquetNumberValue( value ) ); - } - - @Override - public void add( int fieldIndex, String value ) { - add( fieldIndex, new BinaryValue( Binary.fromString( value ) ) ); - } - - @Override - public void add( int fieldIndex, NanoTime value ) { - add( fieldIndex, value.toInt96() ); - } - - @Override - public void add( int fieldIndex, boolean value ) { - add( fieldIndex, new BooleanValue( value ) ); - } - - @Override - public void add( int fieldIndex, Binary value ) { - switch( getType().getType( fieldIndex ).asPrimitiveType().getPrimitiveTypeName() ) { - case BINARY, FIXED_LEN_BYTE_ARRAY -> add( fieldIndex, new BinaryValue( value ) ); - case INT96 -> add( fieldIndex, new Int96Value( value ) ); - default -> throw new UnsupportedOperationException( - getType().asPrimitiveType().getName() + " not supported for Binary" ); - } - } - - @Override - public void add( int fieldIndex, float value ) { - add( fieldIndex, new ParquetNumberValue( value ) ); - } - - @Override - public void add( int fieldIndex, double value ) { - add( fieldIndex, new ParquetNumberValue( value ) ); - } - - @Override - public void add( int fieldIndex, Group value ) { - data[fieldIndex].add( value ); - } - - @Override - public GroupType getType() { - return schema; - } - - @Override - public void writeValue( int field, int index, RecordConsumer recordConsumer ) { - try { - ( ( Primitive ) getValue( field, index ) ).writeValue( recordConsumer ); - } catch( Exception e ) { - log.error( "field {} name {} index {}: {}", field, schema.getFieldName( index ), index, e.getMessage() ); - throw new RuntimeException( e ); - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupConverter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupConverter.java deleted file mode 100644 index ef6e3b888..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupConverter.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.parquet.example.data.Group; -import org.apache.parquet.io.api.Converter; -import org.apache.parquet.io.api.GroupConverter; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.Type; - -public class ParquetSimpleGroupConverter extends GroupConverter { - private final ParquetSimpleGroupConverter parent; - private final int index; - protected Group current; - private Converter[] converters; - - ParquetSimpleGroupConverter( ParquetSimpleGroupConverter parent, int index, GroupType schema ) { - this.parent = parent; - this.index = index; - - converters = new Converter[schema.getFieldCount()]; - - for( int i = 0; i < converters.length; i++ ) { - final Type type = schema.getType( i ); - if( type.isPrimitive() ) { - converters[i] = new ParquetSimplePrimitiveConverter( this, i ); - } else { - converters[i] = new ParquetSimpleGroupConverter( this, i, type.asGroupType() ); - } - - } - } - - @Override - public void start() { - current = parent.getCurrentRecord().addGroup( index ); - } - - @Override - public Converter getConverter( int fieldIndex ) { - return converters[fieldIndex]; - } - - @Override - public void end() { - } - - public Group getCurrentRecord() { - return current; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupFactory.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupFactory.java deleted file mode 100644 index 894f1514f..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimpleGroupFactory.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.parquet.example.data.Group; -import org.apache.parquet.example.data.GroupFactory; -import org.apache.parquet.schema.MessageType; - -public class ParquetSimpleGroupFactory extends GroupFactory { - - private final MessageType schema; - - public ParquetSimpleGroupFactory( MessageType schema ) { - this.schema = schema; - } - - @Override - public Group newGroup() { - return new ParquetSimpleGroup( schema ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimplePrimitiveConverter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimplePrimitiveConverter.java deleted file mode 100644 index 0a73ccba5..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetSimplePrimitiveConverter.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.parquet.io.api.Binary; -import org.apache.parquet.io.api.PrimitiveConverter; - -public class ParquetSimplePrimitiveConverter extends PrimitiveConverter { - - private final ParquetSimpleGroupConverter parent; - private final int index; - - ParquetSimplePrimitiveConverter( ParquetSimpleGroupConverter parent, int index ) { - this.parent = parent; - this.index = index; - } - - /** - * {@inheritDoc} - * - * @see org.apache.parquet.io.api.PrimitiveConverter#addBinary(Binary) - */ - @Override - public void addBinary( Binary value ) { - parent.getCurrentRecord().add( index, value ); - } - - /** - * {@inheritDoc} - * - * @see org.apache.parquet.io.api.PrimitiveConverter#addBoolean(boolean) - */ - @Override - public void addBoolean( boolean value ) { - parent.getCurrentRecord().add( index, value ); - } - - /** - * {@inheritDoc} - * - * @see org.apache.parquet.io.api.PrimitiveConverter#addDouble(double) - */ - @Override - public void addDouble( double value ) { - parent.getCurrentRecord().add( index, value ); - } - - /** - * {@inheritDoc} - * - * @see org.apache.parquet.io.api.PrimitiveConverter#addFloat(float) - */ - @Override - public void addFloat( float value ) { - parent.getCurrentRecord().add( index, value ); - } - - /** - * {@inheritDoc} - * - * @see org.apache.parquet.io.api.PrimitiveConverter#addInt(int) - */ - @Override - public void addInt( int value ) { - parent.getCurrentRecord().add( index, value ); - } - - /** - * {@inheritDoc} - * - * @see org.apache.parquet.io.api.PrimitiveConverter#addLong(long) - */ - @Override - public void addLong( long value ) { - parent.getCurrentRecord().add( index, value ); - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetUtils.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetUtils.java deleted file mode 100644 index b53229a7b..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetUtils.java +++ /dev/null @@ -1,341 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import com.google.common.base.Preconditions; -import lombok.ToString; -import oap.dictionary.Dictionary; -import oap.dictionary.DictionaryParser; -import oap.dictionary.DictionaryRoot; -import oap.io.IoStreams; -import oap.template.Types; -import oap.tsv.Tsv; -import oap.tsv.TsvArray; -import oap.tsv.TsvStream; -import oap.util.Dates; -import oap.util.Lists; -import oap.util.Stream; -import org.apache.commons.io.FilenameUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.hadoop.ParquetWriter; -import org.apache.parquet.hadoop.example.GroupWriteSupport; -import org.apache.parquet.hadoop.util.HadoopOutputFile; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.Type; -import org.apache.parquet.schema.Types.Builder; -import org.joda.time.DateTime; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.function.Function; - -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; - -/** - * TSV to Parquet conversion utility - */ -public class ParquetUtils { - private static final HashMap>, Builder>> types = new HashMap<>(); - - static { - types.put( Types.BOOLEAN, children -> org.apache.parquet.schema.Types.required( BOOLEAN ) ); - types.put( Types.BYTE, children -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.intType( 8, true ) ) ); - types.put( Types.SHORT, children -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.intType( 16, true ) ) ); - types.put( Types.INTEGER, children -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.intType( 32, true ) ) ); - types.put( Types.LONG, children -> org.apache.parquet.schema.Types.required( INT64 ).as( LogicalTypeAnnotation.intType( 64, true ) ) ); - types.put( Types.FLOAT, children -> org.apache.parquet.schema.Types.required( FLOAT ) ); - types.put( Types.DOUBLE, children -> org.apache.parquet.schema.Types.required( DOUBLE ) ); - types.put( Types.STRING, children -> org.apache.parquet.schema.Types.required( BINARY ).as( LogicalTypeAnnotation.stringType() ) ); - types.put( Types.DATE, children -> org.apache.parquet.schema.Types.required( INT32 ).as( LogicalTypeAnnotation.dateType() ) ); - types.put( Types.DATETIME, children -> org.apache.parquet.schema.Types.required( INT64 ) ); -// types.put( Types.DADATETIME64, children -> org.apache.parquet.schema.Types.required( INT64 ).as( LogicalTypeAnnotation.timestampType( true, MILLIS ) ) ); - types.put( Types.LIST, children -> org.apache.parquet.schema.Types.requiredList().element( ( Type ) children.get( 0 ).named( "element" ) ) ); -// types.put( Types.ENUM, children -> org.apache.parquet.schema.Types.required( BINARY ).as( LogicalTypeAnnotation.stringType() ) ); - } - - public final Builder schema; - protected final HashMap defaultValuesMap = new HashMap<>(); - protected final ArrayList defaultValuesList; - - public ParquetUtils( Dictionary dictionary ) { - schema = org.apache.parquet.schema.Types.buildMessage(); - - defaultValuesList = new ArrayList<>( dictionary.getValues().size() ); - var i = 0; - - var fields = new LinkedHashMap>(); - - for( var col : dictionary.getValues() ) { - Object typeObj = col.getProperty( "type" ).orElse( null ); - Preconditions.checkArgument( typeObj instanceof String || typeObj instanceof List, - "[" + col.getId() + "] type must be string or list" ); - List type = typeObj instanceof List ? ( List ) typeObj : List.of( typeObj.toString() ); - Preconditions.checkArgument( type.size() > 0 ); - - Builder fieldType = null; - for( var typeIdx = type.size() - 1; typeIdx >= 0; typeIdx-- ) { - var typeEnum = Types.valueOf( type.get( typeIdx ) ); - var func = types.get( typeEnum ); - fieldType = func.apply( fieldType != null ? List.of( fieldType ) : List.of() ); - } - - fields.put( col.getId(), fieldType ); - - Object defaultValue = col.getProperty( "default" ).orElseThrow( () -> new IllegalArgumentException( col.getId() + ": default is required" ) ); - - FieldInfo fieldInfo = new FieldInfo( defaultValue, fieldType, Lists.map( type, Types::valueOf ) ); - ParquetUtils.this.defaultValuesMap.put( col.getId(), fieldInfo ); - ParquetUtils.this.defaultValuesList.add( fieldInfo ); - } - - setFields( fields ); - } - - protected static Timestamp toTimestamp( Object value ) { - if( value instanceof Timestamp valueTimestamp ) return valueTimestamp; - else if( value instanceof DateTime valueDateTime ) - return new Timestamp( valueDateTime.getMillis() ); - else if( value instanceof Long longValue ) - return new Timestamp( longValue ); - else - return new Timestamp( Dates.FORMAT_SIMPLE.parseMillis( value.toString() ) ); - } - - public static void main( String[] args ) throws IOException { - String source = args[0]; - String datamodel = args[1]; - String type = args[2]; - String out = FilenameUtils.removeExtension( source ) + ".parquet"; - - DictionaryRoot dictionaryRoot = DictionaryParser.parse( Paths.get( datamodel ), new DictionaryParser.IncrementalIdStrategy() ); - ParquetUtils schema = new ParquetUtils( dictionaryRoot.getValue( type ) ); - - Configuration conf = new Configuration(); - - if( Files.exists( Paths.get( out ) ) ) - Files.delete( Paths.get( out ) ); - - TsvStream tsvStream = Tsv.tsv.fromStream( IoStreams.lines( Paths.get( source ) ) ).withHeaders(); - List headers = tsvStream.headers(); - - MessageType modelMessageType = ( MessageType ) schema.schema.named( "group" ); - org.apache.parquet.schema.Types.MessageTypeBuilder tsvMessageTypeBuilder = org.apache.parquet.schema.Types.buildMessage(); - - for( Type modelType : modelMessageType.getFields() ) { - if( headers.contains( modelType.getName() ) ) - tsvMessageTypeBuilder.addField( modelType ); - } - - MessageType tsvMessageType = tsvMessageTypeBuilder.named( "tsv" ); - - GroupWriteSupport.setSchema( tsvMessageType, conf ); - - List select = Lists.map( modelMessageType.getFields(), Type::getName ); - - try( ParquetWriter writer = new ParquetWriteBuilder( HadoopOutputFile.fromPath( new Path( out ), conf ) ) - .withConf( conf ) - .build() ) { - - try( Stream> stream = tsvStream.select( select ).stripHeaders().toStream() ) { - stream.forEach( cols -> { - try { - ParquetSimpleGroup simpleGroup = new ParquetSimpleGroup( tsvMessageType ); - - for( int i = 0; i < tsvMessageType.getFields().size(); i++ ) { - String header = tsvMessageType.getType( i ).getName(); - schema.setString( simpleGroup, header, cols.get( i ) ); - } - writer.write( simpleGroup ); - } catch( Exception e ) { - e.printStackTrace(); - throw new RuntimeException( e ); - } - } ); - } - } finally { - String name = FilenameUtils.getName( out ); - String parent = FilenameUtils.getFullPathNoEndSeparator( out ); - java.nio.file.Path crcPath = Paths.get( parent + "/." + name + ".crc" ); - if( Files.exists( crcPath ) ) - Files.delete( crcPath ); - } - } - - public static String toString( Type type, ParquetSimpleGroup group, int x, int y ) { - LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation(); - - if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation ) { - return Dates.FORMAT_DATE.print( group.getInteger( x, y ) * 24L * 60 * 60 * 1000 ); - } else if( logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation ) { - ArrayList list = new ArrayList<>( group.getFieldRepetitionCount( x ) ); - for( int listIndex = 0; listIndex < group.getFieldRepetitionCount( x ); listIndex++ ) { - Type listItemType = ( ( GroupType ) type ).getType( 0 ); - LogicalTypeAnnotation listItemLogicalTypeAnnotation = listItemType.getLogicalTypeAnnotation(); - - if( listItemLogicalTypeAnnotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation - || listItemLogicalTypeAnnotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation - || listItemLogicalTypeAnnotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation ) { - list.add( "'" + toString( listItemType, group, x, listIndex ) + "'" ); - } - } - return TsvArray.print( list, Dates.FORMAT_DATE ); - } - - return group.getValueToString( x, y ); - } - - protected void setFields( LinkedHashMap> fields ) { - fields.forEach( ( n, b ) -> ( ( org.apache.parquet.schema.Types.MessageTypeBuilder ) schema ).addField( ( Type ) b.named( n ) ) ); - } - - public void setString( ParquetSimpleGroup group, String index, String value ) { - int fieldIndex = group.getType().getFieldIndex( index ); - List types = defaultValuesList.get( fieldIndex ).type; - - setString( group, fieldIndex, value, types ); - } - - private void setString( Group group, int index, String value, List types ) { - if( value == null ) return; - - switch( types.get( 0 ) ) { - case BOOLEAN -> group.add( index, Byte.parseByte( value ) == 1 ); - case BYTE -> group.add( index, Byte.parseByte( value ) ); - case SHORT -> group.add( index, Short.parseShort( value ) ); - case INTEGER -> group.add( index, Integer.parseInt( value ) ); - case LONG -> group.add( index, Long.parseLong( value ) ); - case FLOAT -> group.add( index, Float.parseFloat( value ) ); - case DOUBLE -> group.add( index, Double.parseDouble( value ) ); - case STRING/*, ENUM*/ -> group.add( index, value ); - case DATE -> { - long ms = Dates.FORMAT_DATE.parseMillis( value ); - group.add( index, ( int ) ( ms / 24L / 60 / 60 / 1000 ) ); - } - case DATETIME -> { - long ms = Dates.PARSER_MULTIPLE_DATETIME.parseMillis( value ); - group.add( index, ms / 1000 ); - } -// case DATETIME64 -> group.add( index, Dates.PARSER_MULTIPLE_DATETIME.parseMillis( value ) ); - case LIST -> { - List listType = types.subList( 1, types.size() ); - Group listGroup = group.addGroup( index ); - for( String item : TsvArray.parse( value ) ) { - setString( listGroup.addGroup( "list" ), 0, item, listType ); - } - - } - } - } - - protected String enumToString( Object value ) { - if( value instanceof Enum valueEnum ) return valueEnum.name(); - - return toString( value ); - } - - @SuppressWarnings( "checkstyle:OverloadMethodsDeclarationOrder" ) - protected String toString( Object value ) { - return value.toString(); - } - - protected double toDouble( Object value ) { - return value instanceof Number ? ( ( Number ) value ).doubleValue() : Double.parseDouble( value.toString() ); - } - - @SuppressWarnings( "unchecked" ) - protected List toList( Object value, List types ) { - if( value instanceof List ) return ( List ) value; - - String arrayStr = value.toString().trim(); - String array = arrayStr.substring( 1, arrayStr.length() - 1 ); - - String[] data = StringUtils.splitPreserveAllTokens( array, ',' ); - - return List.of( data ); - } - - protected long toDate( Object value ) { - if( value instanceof DateTime ) - return ( ( DateTime ) value ).getMillis() / 24 / 60 / 60 / 1000; - else if( value instanceof Long ) - return ( long ) value; - else - return Dates.FORMAT_DATE.parseMillis( value.toString() ) / 24 / 60 / 60 / 1000; - } - - protected short toShort( Object value ) { - return value instanceof Number ? ( ( Number ) value ).shortValue() : Short.parseShort( value.toString() ); - } - - protected long toBoolean( Object value ) { - if( value instanceof Boolean booleanValue ) return booleanValue ? 1 : 0; - else return Boolean.parseBoolean( value.toString() ) ? 1 : 0; - } - - protected int toByte( Object value ) { - return value instanceof Number ? ( ( Number ) value ).byteValue() : Byte.parseByte( value.toString() ); - } - - protected int toInt( Object value ) { - return value instanceof Number ? ( ( Number ) value ).intValue() : Integer.parseInt( value.toString() ); - } - - protected long toLong( Object value ) { - return value instanceof Number ? ( ( Number ) value ).longValue() : Long.parseLong( value.toString() ); - } - - protected float toFloat( Object value ) { - return value instanceof Number ? ( ( Number ) value ).floatValue() : Float.parseFloat( value.toString() ); - } - - @ToString - protected static class FieldInfo { - public final Object defaultValue; - public final List type; - public final org.apache.parquet.schema.Types.Builder schema; - - public FieldInfo( Object defaultValue, org.apache.parquet.schema.Types.Builder schema, List type ) { - this.defaultValue = defaultValue; - this.schema = schema; - this.type = type; - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetWriteBuilder.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetWriteBuilder.java deleted file mode 100644 index 67c1069dc..000000000 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/parquet/ParquetWriteBuilder.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.logstream.formats.parquet; - -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.hadoop.ParquetWriter; -import org.apache.parquet.hadoop.api.WriteSupport; -import org.apache.parquet.hadoop.example.GroupWriteSupport; -import org.apache.parquet.io.OutputFile; - -public class ParquetWriteBuilder extends ParquetWriter.Builder { - public ParquetWriteBuilder( OutputFile path ) { - super( path ); - } - - @Override - protected ParquetWriteBuilder self() { - return this; - } - - @Override - protected WriteSupport getWriteSupport( Configuration conf ) { - GroupWriteSupport groupWriteSupport = new GroupWriteSupport(); - groupWriteSupport.init( conf ); - return groupWriteSupport; - } -} diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/TemplateAccumulatorRowBinary.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/TemplateAccumulatorRowBinary.java index 1d94ad947..eb1546a75 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/TemplateAccumulatorRowBinary.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/TemplateAccumulatorRowBinary.java @@ -3,7 +3,6 @@ import lombok.SneakyThrows; import oap.dictionary.Dictionary; import oap.template.TemplateAccumulator; -import oap.template.TemplateAccumulatorBinary; import oap.template.TemplateAccumulatorString; import oap.template.TemplateException; import oap.util.FastByteArrayOutputStream; @@ -155,7 +154,6 @@ public void accept( Object obj ) { case DateTime dt -> accept( dt ); case Date d -> accept( d ); case Collection c -> accept( c ); - case TemplateAccumulatorBinary tab -> accept( tab ); case TemplateAccumulatorString tab -> accept( tab ); case null, default -> throw new IllegalArgumentException( "Unknown type " + obj.getClass() ); } diff --git a/oap-formats/oap-logstream/pom.xml b/oap-formats/oap-logstream/pom.xml index f9c514d72..e92e4a8f7 100644 --- a/oap-formats/oap-logstream/pom.xml +++ b/oap-formats/oap-logstream/pom.xml @@ -13,10 +13,6 @@ pom oap-logstream-parent - - 1.15.2 - - oap-logstream oap-logstream-data diff --git a/oap-formats/oap-template/src/main/java/oap/template/BinaryInputStream.java b/oap-formats/oap-template/src/main/java/oap/template/BinaryInputStream.java deleted file mode 100644 index 746b2febc..000000000 --- a/oap-formats/oap-template/src/main/java/oap/template/BinaryInputStream.java +++ /dev/null @@ -1,329 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.template; - -import org.joda.time.DateTime; - -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.io.UTFDataFormatException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; - -import static org.joda.time.DateTimeZone.UTC; - -public class BinaryInputStream extends InputStream { - public static final Object EOL = new Object() { - @Override - public String toString() { - return "BinaryInputStream#EOL"; - } - }; - - protected byte[] readBuffer = new byte[8]; - protected byte[] bytearr = new byte[80]; - protected char[] chararr = new char[80]; - - protected final InputStream in; - - public BinaryInputStream( InputStream in ) { - this.in = in; - } - - @Override - public int read() throws IOException { - return in.read(); - } - - public boolean readBoolean() throws IOException { - checkType( Types.BOOLEAN ); - - return _readBoolean(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected boolean _readBoolean() throws IOException { - int ch = in.read(); - if( ch < 0 ) - throw new EOFException(); - return ch != 0; - } - - public byte readByte() throws IOException { - checkType( Types.BYTE ); - - return _readByte(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected byte _readByte() throws IOException { - int ch = in.read(); - if( ch < 0 ) - throw new EOFException(); - return ( byte ) ch; - } - - public short readShort() throws IOException { - checkType( Types.SHORT ); - - return _readShort(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected short _readShort() throws IOException { - int ch1 = in.read(); - int ch2 = in.read(); - if( ( ch1 | ch2 ) < 0 ) - throw new EOFException(); - return ( short ) ( ( ch1 << 8 ) + ( ch2 << 0 ) ); - } - - public int readInt() throws IOException { - checkType( Types.INTEGER ); - - return _readInt(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected int _readInt() throws IOException { - readFully( readBuffer, 0, 4 ); - - return ( readBuffer[0] << 24 ) - + ( ( readBuffer[1] & 255 ) << 16 ) - + ( ( readBuffer[2] & 255 ) << 8 ) - + ( ( readBuffer[3] & 255 ) << 0 ); - } - - public long readLong() throws IOException { - checkType( Types.LONG ); - - return _readLong(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected long _readLong() throws IOException { - readFully( readBuffer, 0, 8 ); - - return ( ( long ) readBuffer[0] << 56 ) - + ( ( long ) ( readBuffer[1] & 255 ) << 48 ) - + ( ( long ) ( readBuffer[2] & 255 ) << 40 ) - + ( ( long ) ( readBuffer[3] & 255 ) << 32 ) - + ( ( long ) ( readBuffer[4] & 255 ) << 24 ) - + ( ( readBuffer[5] & 255 ) << 16 ) - + ( ( readBuffer[6] & 255 ) << 8 ) - + ( ( readBuffer[7] & 255 ) << 0 ); - } - - public float readFloat() throws IOException { - checkType( Types.FLOAT ); - - return _readFloat(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected float _readFloat() throws IOException { - return Float.intBitsToFloat( _readInt() ); - } - - public double readDouble() throws IOException { - checkType( Types.DOUBLE ); - - return _readDouble(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected double _readDouble() throws IOException { - return Double.longBitsToDouble( _readLong() ); - } - - public DateTime readDateTime() throws IOException { - checkType( Types.DATETIME ); - - return _readDateTime(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected DateTime _readDateTime() throws IOException { - return new DateTime( _readLong(), UTC ); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - public String readString() throws IOException { - checkType( Types.STRING ); - - return _readString(); - - } - - @SuppressWarnings( { "checkstyle:MethodName", "checkstyle:OperatorWrap", "checkstyle:UnnecessaryParentheses" } ) - private String _readString() throws IOException { - int utflen = readUnsignedShort(); - if( bytearr.length < utflen ) { - bytearr = new byte[utflen * 2]; - chararr = new char[utflen * 2]; - } - - int c, char2, char3; - int count = 0; - int chararrCount = 0; - - readFully( bytearr, 0, utflen ); - - while( count < utflen ) { - c = ( int ) bytearr[count] & 0xff; - if( c > 127 ) break; - count++; - chararr[chararrCount++] = ( char ) c; - } - - while( count < utflen ) { - c = ( int ) bytearr[count] & 0xff; - switch( c >> 4 ) { - case 0, 1, 2, 3, 4, 5, 6, 7 -> { - /* 0xxxxxxx*/ - count++; - chararr[chararrCount++] = ( char ) c; - } - case 12, 13 -> { - /* 110x xxxx 10xx xxxx*/ - count += 2; - if( count > utflen ) - throw new UTFDataFormatException( - "malformed input: partial character at end" ); - char2 = bytearr[count - 1]; - if( ( char2 & 0xC0 ) != 0x80 ) - throw new UTFDataFormatException( - "malformed input around byte " + count ); - chararr[chararrCount++] = ( char ) ( ( ( c & 0x1F ) << 6 ) | - ( char2 & 0x3F ) ); - } - case 14 -> { - /* 1110 xxxx 10xx xxxx 10xx xxxx */ - count += 3; - if( count > utflen ) - throw new UTFDataFormatException( - "malformed input: partial character at end" ); - char2 = bytearr[count - 2]; - char3 = bytearr[count - 1]; - if( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) ) - throw new UTFDataFormatException( - "malformed input around byte " + ( count - 1 ) ); - chararr[chararrCount++] = ( char ) ( ( ( c & 0x0F ) << 12 ) | - ( ( char2 & 0x3F ) << 6 ) | - ( ( char3 & 0x3F ) << 0 ) ); - } - default -> - /* 10xx xxxx, 1111 xxxx */ - throw new UTFDataFormatException( - "malformed input around byte " + count ); - } - } - // The number of chars produced may be less than utflen - return new String( chararr, 0, chararrCount ); - } - - public List readList() throws IOException { - checkType( Types.LIST ); - - return _readList(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected ArrayList _readList() throws IOException { - ArrayList ret = new ArrayList<>(); - - int size = _readInt(); - - for( int i = 0; i < size; i++ ) { - ret.add( readObject() ); - } - - return ret; - } - - public Object readObject() throws IOException { - int type = in.read(); - - if( type == Types.BOOLEAN.id ) return _readBoolean(); - else if( type == Types.BYTE.id ) return _readByte(); - else if( type == Types.SHORT.id ) return _readShort(); - else if( type == Types.INTEGER.id ) return _readInt(); - else if( type == Types.LONG.id ) return _readLong(); - else if( type == Types.FLOAT.id ) return _readFloat(); - else if( type == Types.DOUBLE.id ) return _readDouble(); - else if( type == Types.STRING.id ) return _readString(); - else if( type == Types.DATETIME.id ) return _readDateTime(); - else if( type == Types.LIST.id ) return _readList(); - else if( type == Types.RAW.id ) return _readRaw(); - else if( type == Types.EOL.id ) return EOL; - else if( type < 0 ) return null; - - throw new IllegalArgumentException( "Unknown type: " + type ); - } - - private byte[] readRaw() throws IOException { - checkType( Types.RAW ); - - return _readRaw(); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - private byte[] _readRaw() throws IOException { - int length = _readInt(); - - var bytes = new byte[length]; - readFully( bytes, 0, length ); - return bytes; - } - - protected void checkType( Types type ) throws IOException { - byte readType = ( byte ) in.read(); - - if( readType != type.id ) - throw new IOException( "required :" + type.name() + ":" + type.id + ", but found: " + readType ); - } - - @SuppressWarnings( "checkstyle:ArrayTypeStyle" ) - protected void readFully( byte b[], int off, int len ) throws IOException { - Objects.checkFromIndexSize( off, len, b.length ); - int n = 0; - while( n < len ) { - int count = in.read( b, off + n, len - n ); - if( count < 0 ) - throw new EOFException(); - n += count; - } - } - - protected int readUnsignedShort() throws IOException { - int ch1 = in.read(); - int ch2 = in.read(); - if( ( ch1 | ch2 ) < 0 ) - throw new EOFException(); - return ( ch1 << 8 ) + ( ch2 << 0 ); - } -} diff --git a/oap-formats/oap-template/src/main/java/oap/template/BinaryOutputStream.java b/oap-formats/oap-template/src/main/java/oap/template/BinaryOutputStream.java deleted file mode 100644 index 50d2877cc..000000000 --- a/oap-formats/oap-template/src/main/java/oap/template/BinaryOutputStream.java +++ /dev/null @@ -1,219 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.template; - -import oap.dictionary.Dictionary; -import oap.util.Strings; -import org.joda.time.DateTime; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.Collection; -import java.util.Date; - -import static org.joda.time.DateTimeZone.UTC; - -public class BinaryOutputStream extends OutputStream { - protected final byte[] writeBuffer = new byte[9]; - protected final OutputStream out; - /** - * bytearr is initialized on demand by writeUTF - */ - private byte[] bytearr = null; - - public BinaryOutputStream( OutputStream out ) { - this.out = out; - } - - @Override - public void write( int i ) throws IOException { - out.write( i ); - } - - public void writeByte( byte v ) throws IOException { - writeBuffer[0] = Types.BYTE.id; - writeBuffer[1] = v; - - out.write( writeBuffer, 0, 2 ); - } - - public void writeBoolean( boolean v ) throws IOException { - writeBuffer[0] = Types.BOOLEAN.id; - writeBuffer[1] = v ? ( byte ) 1 : ( byte ) 0; - - out.write( writeBuffer, 0, 2 ); - } - - public void writeShort( short v ) throws IOException { - writeBuffer[0] = Types.SHORT.id; - writeBuffer[1] = ( byte ) ( v >>> 8 ); - writeBuffer[2] = ( byte ) ( v >>> 0 ); - - out.write( writeBuffer, 0, 3 ); - } - - public void writeInt( int v ) throws IOException { - writeBuffer[0] = Types.INTEGER.id; - - _writeInt( v ); - } - - public void writeList( Collection v ) throws IOException { - writeBuffer[0] = Types.LIST.id; - if( v == null || v.isEmpty() ) _writeInt( 0 ); - else { - _writeInt( v.size() ); - - for( var item : v ) { - writeObject( item ); - } - } - } - - public void writeObject( Object v ) throws IOException { - if( v instanceof String s ) writeString( s ); - else if( v instanceof Boolean b ) writeBoolean( b ); - else if( v instanceof Byte b ) writeByte( b ); - else if( v instanceof Short s ) writeShort( s ); - else if( v instanceof Integer i ) writeInt( i ); - else if( v instanceof Long l ) writeLong( l ); - else if( v instanceof Float f ) writeFloat( f ); - else if( v instanceof Enum e ) writeEnum( e ); - else if( v instanceof Dictionary d ) writeDictionary( d ); - else if( v instanceof Double d ) writeDouble( d ); - else if( v instanceof DateTime dt ) writeDateTime( dt ); - else if( v instanceof Date d ) writeDateTime( new DateTime( d, UTC ) ); - else if( v instanceof byte[] b ) writeRaw( b ); - else if( v instanceof Collection c ) writeList( c ); - else - throw new IllegalArgumentException( "Unknown type " + v.getClass() ); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected void _writeInt( int v ) throws IOException { - writeBuffer[1] = ( byte ) ( v >>> 24 ); - writeBuffer[2] = ( byte ) ( v >>> 16 ); - writeBuffer[3] = ( byte ) ( v >>> 8 ); - writeBuffer[4] = ( byte ) ( v >>> 0 ); - - out.write( writeBuffer, 0, 5 ); - } - - public void writeLong( long v ) throws IOException { - writeBuffer[0] = Types.LONG.id; - _writeLong( v ); - } - - public void writeRaw( byte[] bytes ) throws IOException { - writeByte( Types.RAW.id ); - out.write( bytes ); - } - - @SuppressWarnings( "checkstyle:MethodName" ) - protected void _writeLong( long v ) throws IOException { - writeBuffer[1] = ( byte ) ( v >>> 56 ); - writeBuffer[2] = ( byte ) ( v >>> 48 ); - writeBuffer[3] = ( byte ) ( v >>> 40 ); - writeBuffer[4] = ( byte ) ( v >>> 32 ); - writeBuffer[5] = ( byte ) ( v >>> 24 ); - writeBuffer[6] = ( byte ) ( v >>> 16 ); - writeBuffer[7] = ( byte ) ( v >>> 8 ); - writeBuffer[8] = ( byte ) ( v >>> 0 ); - - out.write( writeBuffer, 0, 9 ); - } - - public void writeFloat( float v ) throws IOException { - writeBuffer[0] = Types.FLOAT.id; - - _writeInt( Float.floatToIntBits( v ) ); - } - - public void writeDouble( double v ) throws IOException { - writeBuffer[0] = Types.DOUBLE.id; - - _writeLong( Double.doubleToLongBits( v ) ); - } - - public void writeDateTime( DateTime jodaDateTime ) throws IOException { - writeBuffer[0] = Types.DATETIME.id; - - _writeLong( jodaDateTime.getMillis() ); - } - - public void writeEnum( Enum e ) throws IOException { - writeString( e.name() ); - } - - public void writeDictionary( Dictionary d ) throws IOException { - writeString( d.getId() ); - } - - public void writeString( String str ) throws IOException { - writeUTFWithType( Strings.UNKNOWN.equals( str ) ? "" : str, Types.STRING ); - } - - @SuppressWarnings( "checkstyle:UnnecessaryParentheses" ) - private void writeUTFWithType( String str, Types type ) throws IOException { - final int strlen = str.length(); - int utflen = strlen; // optimized for ASCII - - for( int i = 0; i < strlen; i++ ) { - int c = str.charAt( i ); - if( c >= 0x80 || c == 0 ) - utflen += ( c >= 0x800 ) ? 2 : 1; - } - - if( bytearr == null || ( bytearr.length < ( utflen + 2 + 1 ) ) ) - bytearr = new byte[( utflen * 2 ) + 2 + 1]; - - bytearr[0] = type.id; - int count = 1; - bytearr[count++] = ( byte ) ( ( utflen >>> 8 ) & 0xFF ); - bytearr[count++] = ( byte ) ( ( utflen >>> 0 ) & 0xFF ); - - int i; - for( i = 0; i < strlen; i++ ) { // optimized for initial run of ASCII - int c = str.charAt( i ); - if( c >= 0x80 || c == 0 ) break; - bytearr[count++] = ( byte ) c; - } - - for( ; i < strlen; i++ ) { - int c = str.charAt( i ); - if( c < 0x80 && c != 0 ) { - bytearr[count++] = ( byte ) c; - } else if( c >= 0x800 ) { - bytearr[count++] = ( byte ) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) ); - bytearr[count++] = ( byte ) ( 0x80 | ( ( c >> 6 ) & 0x3F ) ); - bytearr[count++] = ( byte ) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); - } else { - bytearr[count++] = ( byte ) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) ); - bytearr[count++] = ( byte ) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); - } - } - out.write( bytearr, 0, utflen + 3 ); - } -} diff --git a/oap-formats/oap-template/src/main/java/oap/template/BinaryUtils.java b/oap-formats/oap-template/src/main/java/oap/template/BinaryUtils.java deleted file mode 100644 index a4f78a7bc..000000000 --- a/oap-formats/oap-template/src/main/java/oap/template/BinaryUtils.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.template; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public class BinaryUtils { - public static byte[] line( Object... cols ) throws IOException { - return line( List.of( cols ) ); - } - - public static byte[] line( List cols ) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryOutputStream bos = new BinaryOutputStream( baos ); - - for( Object col : cols ) bos.writeObject( col ); - - baos.write( Types.EOL.id ); - - return baos.toByteArray(); - } - - public static byte[] lines( List> rows ) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - for( List row : rows ) { - baos.write( line( row ) ); - } - - return baos.toByteArray(); - } - - public static List> read( byte[] bytes ) throws IOException { - return read( bytes, 0, bytes.length ); - } - - public static List> read( byte[] bytes, int offset, int length ) throws IOException { - BinaryInputStream binaryInputStream = new BinaryInputStream( new ByteArrayInputStream( bytes, offset, length ) ); - Object obj = binaryInputStream.readObject(); - ArrayList line = new ArrayList<>(); - ArrayList> res = new ArrayList<>(); - while( obj != null ) { - if( obj != BinaryInputStream.EOL ) line.add( obj ); - else { - res.add( line ); - line = new ArrayList<>(); - } - obj = binaryInputStream.readObject(); - } - if( !line.isEmpty() ) res.add( line ); - - return res; - } -} diff --git a/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulatorBinary.java b/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulatorBinary.java deleted file mode 100644 index c11cf69b7..000000000 --- a/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulatorBinary.java +++ /dev/null @@ -1,252 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.template; - -import lombok.SneakyThrows; -import oap.dictionary.Dictionary; -import oap.util.FastByteArrayOutputStream; -import oap.util.Strings; -import org.apache.commons.lang3.EnumUtils; -import org.joda.time.DateTime; - -import java.util.Collection; -import java.util.Date; -import java.util.HashSet; -import java.util.List; - -import static org.joda.time.DateTimeZone.UTC; - -public class TemplateAccumulatorBinary implements TemplateAccumulator { - protected final FastByteArrayOutputStream baos; - private final BinaryOutputStream bos; - - public TemplateAccumulatorBinary( FastByteArrayOutputStream baos ) { - this.baos = baos; - this.bos = new BinaryOutputStream( baos ); - } - - public TemplateAccumulatorBinary() { - this( new FastByteArrayOutputStream() ); - } - - @SneakyThrows - @Override - public void acceptText( String text ) { - bos.writeString( text == null ? "" : text ); - } - - @SneakyThrows - @Override - public void accept( String text ) { - acceptText( text ); - } - - @SneakyThrows - @Override - public void accept( boolean b ) { - bos.writeBoolean( b ); - } - - @SneakyThrows - @Override - public void accept( byte b ) { - bos.writeByte( b ); - } - - @SneakyThrows - @Override - public void accept( short s ) { - bos.writeShort( s ); - } - - @SneakyThrows - @Override - public void accept( int i ) { - bos.writeInt( i ); - } - - @SneakyThrows - @Override - public void accept( long l ) { - bos.writeLong( l ); - } - - @SneakyThrows - @Override - public void accept( float f ) { - bos.writeFloat( f ); - } - - @SneakyThrows - @Override - public void accept( double d ) { - bos.writeDouble( d ); - } - - @SneakyThrows - @Override - public void accept( DateTime jodaDateTime ) { - bos.writeDateTime( jodaDateTime ); - } - - @SneakyThrows - @Override - public void accept( Date javaDate ) { - bos.writeDateTime( new DateTime( javaDate, UTC ) ); - } - - @SneakyThrows - @Override - public void accept( Enum e ) { - bos.writeString( e.name() ); - } - - @SneakyThrows - @Override - public void accept( Collection list ) { - bos.writeList( list ); - } - - @SneakyThrows - @Override - public void accept( TemplateAccumulatorBinary acc ) { - FastByteArrayOutputStream baos = acc.baos; - - bos.write( baos.array, 0, baos.length ); - } - - @SneakyThrows - public void accept( TemplateAccumulatorString acc ) { - bos.writeString( acc.get() ); - } - - @Override - public void accept( Object obj ) { - if( obj instanceof String s ) accept( s ); - else if( obj instanceof Byte b ) accept( b ); - else if( obj instanceof Short s ) accept( s ); - else if( obj instanceof Integer i ) accept( i ); - else if( obj instanceof Long l ) accept( l ); - else if( obj instanceof Float f ) accept( f ); - else if( obj instanceof Double d ) accept( d ); - else if( obj instanceof Enum e ) accept( e ); - else if( obj instanceof Dictionary d ) accept( d ); - else if( obj instanceof DateTime dt ) accept( dt ); - else if( obj instanceof Date d ) accept( d ); - else if( obj instanceof Collection c ) accept( c ); - else if( obj instanceof TemplateAccumulatorBinary tab ) accept( tab ); - else if( obj instanceof TemplateAccumulatorString tab ) accept( tab ); - else - throw new IllegalArgumentException( "Unknown type " + obj.getClass() ); - } - - @Override - public void acceptNull( Class type ) { - throw new IllegalArgumentException( "type " + type ); - } - - private static final HashSet> numberClass = new HashSet<>(); - - static { - numberClass.add( Byte.class ); - numberClass.add( byte.class ); - numberClass.add( Short.class ); - numberClass.add( short.class ); - numberClass.add( Integer.class ); - numberClass.add( int.class ); - numberClass.add( Long.class ); - numberClass.add( long.class ); - numberClass.add( Float.class ); - numberClass.add( float.class ); - numberClass.add( Double.class ); - numberClass.add( double.class ); - } - - - @Override - public String getDefault( Class type ) { - if( String.class.equals( type ) ) return ""; - else if( Boolean.class.equals( type ) || boolean.class.equals( type ) ) return "false"; - else if( numberClass.contains( type ) ) return "0"; - else if( Enum.class.isAssignableFrom( type ) ) { - try { - return Enum.valueOf( ( Class ) type, Strings.UNKNOWN ).name(); - } catch( IllegalArgumentException ignored ) { - List enumList = EnumUtils.getEnumList( ( Class ) type ); - return enumList.get( 0 ).name(); - } - } else if( Collection.class.isAssignableFrom( type ) ) return "[]"; - else - throw new TemplateException( new IllegalArgumentException( "class " + type + " unknown default value" ) ); - } - - @Override - public boolean isEmpty() { - return baos.length == 0; - } - - @Override - public TemplateAccumulatorBinary newInstance() { - return new TemplateAccumulatorBinary(); - } - - @Override - public TemplateAccumulatorBinary newInstance( FastByteArrayOutputStream mutable ) { - return new TemplateAccumulatorBinary( mutable ); - } - - @Override - public String getTypeName() { - return "byte[]"; - } - - @Override - public String delimiter() { - return ""; - } - - @SneakyThrows - @Override - public TemplateAccumulatorBinary addEol( boolean eol ) { - if( eol ) bos.write( Types.EOL.id ); - return this; - } - - @Override - public void reset() { - baos.reset(); - } - - @SneakyThrows - @Override - public byte[] get() { - return baos.toByteArray(); - } - - @Override - public byte[] getBytes() { - return get(); - } -} diff --git a/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulators.java b/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulators.java index 8561596fd..335a7682e 100644 --- a/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulators.java +++ b/oap-formats/oap-template/src/main/java/oap/template/TemplateAccumulators.java @@ -25,7 +25,6 @@ package oap.template; public final class TemplateAccumulators { - public static final TemplateAccumulatorBinary BINARY = new TemplateAccumulatorBinary(); public static final TemplateAccumulatorString STRING = new TemplateAccumulatorString(); public static final TemplateAccumulatorObject OBJECT = new TemplateAccumulatorObject(); } diff --git a/oap-formats/oap-template/src/main/java/oap/template/Types.java b/oap-formats/oap-template/src/main/java/oap/template/Types.java index 11b730444..cfacf4395 100644 --- a/oap-formats/oap-template/src/main/java/oap/template/Types.java +++ b/oap-formats/oap-template/src/main/java/oap/template/Types.java @@ -31,7 +31,7 @@ public enum Types { EOL( 0, null ), - RAW( 1, null ), +// RAW( 1, null ), DATETIME( 2, DateTime.class ), DATE( 3, Date.class ), BOOLEAN( 4, Boolean.class ), diff --git a/oap-formats/oap-template/src/test/java/oap/template/BinaryStreamTest.java b/oap-formats/oap-template/src/test/java/oap/template/BinaryStreamTest.java deleted file mode 100644 index 96eda3880..000000000 --- a/oap-formats/oap-template/src/test/java/oap/template/BinaryStreamTest.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.template; - -import oap.dictionary.DictionaryRoot; -import oap.util.Strings; -import oap.util.function.Try; -import org.joda.time.DateTime; -import org.testng.annotations.Test; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.joda.time.DateTimeZone.UTC; - -public class BinaryStreamTest { - @Test - public void testTypes() throws Exception { - check( true, BinaryOutputStream::writeBoolean, BinaryInputStream::readBoolean ); - check( false, BinaryOutputStream::writeBoolean, BinaryInputStream::readBoolean ); - - check( ( byte ) 1, BinaryOutputStream::writeByte, BinaryInputStream::readByte ); - check( ( short ) 2, BinaryOutputStream::writeShort, BinaryInputStream::readShort ); - check( ( int ) 3, BinaryOutputStream::writeInt, BinaryInputStream::readInt ); - check( ( long ) 4, BinaryOutputStream::writeLong, BinaryInputStream::readLong ); - check( 5.5f, BinaryOutputStream::writeFloat, BinaryInputStream::readFloat ); - check( 6.6d, BinaryOutputStream::writeDouble, BinaryInputStream::readDouble ); - check( "test", BinaryOutputStream::writeString, BinaryInputStream::readString ); - check( new DateTime( 2022, 12, 12, 14, 3, UTC ), BinaryOutputStream::writeDateTime, BinaryInputStream::readDateTime ); - - check( List.of( 1L, 2L, 3L ), BinaryOutputStream::writeList, BinaryInputStream::readList ); - check( List.of( "1", "2", "3" ), BinaryOutputStream::writeList, BinaryInputStream::readList ); - check( List.of( 1.1d, 2.2f, 3L, 4, List.of( "test", 4d ) ), BinaryOutputStream::writeList, BinaryInputStream::readList ); - - check( Strings.UNKNOWN, "", BinaryOutputStream::writeString, BinaryInputStream::readString ); - check( TestEnum.A, "A", BinaryOutputStream::writeEnum, BinaryInputStream::readString ); - check( new DictionaryRoot( "dict", List.of() ), "dict", BinaryOutputStream::writeDictionary, BinaryInputStream::readString ); - } - - private void check( T v, - Try.ThrowingBiConsumer write, - Try.ThrowingFunction read ) throws Exception { - check( v, v, write, read ); - } - - private void check( In in, Out out, - Try.ThrowingBiConsumer write, - Try.ThrowingFunction read ) throws Exception { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryOutputStream bos = new BinaryOutputStream( baos ); - write.accept( bos, in ); - - Out rv = read.apply( new BinaryInputStream( new ByteArrayInputStream( baos.toByteArray() ) ) ); - - assertThat( out ).isEqualTo( rv ); - - baos = new ByteArrayOutputStream(); - bos = new BinaryOutputStream( baos ); - bos.writeObject( in ); - - Object rvo = ( new BinaryInputStream( new ByteArrayInputStream( baos.toByteArray() ) ) ).readObject(); - - assertThat( out ).isEqualTo( rvo ); - } - - @Test - public void testLines() throws IOException { - var bytes = BinaryUtils.lines( List.of( - List.of( 1L, "1", new DictionaryRoot( "id", List.of() ) ), - List.of( 2L, "", new DictionaryRoot( "id2", List.of() ) ) ) ); - var bais = new ByteArrayInputStream( bytes ); - var bis = new BinaryInputStream( bais ); - assertThat( bis.readObject() ).isEqualTo( 1L ); - assertThat( bis.readObject() ).isEqualTo( "1" ); - assertThat( bis.readObject() ).isEqualTo( "id" ); - assertThat( bis.readObject() ).isEqualTo( BinaryInputStream.EOL ); - assertThat( bis.readObject() ).isEqualTo( 2L ); - assertThat( bis.readObject() ).isEqualTo( "" ); - assertThat( bis.readObject() ).isEqualTo( "id2" ); - assertThat( bis.readObject() ).isEqualTo( BinaryInputStream.EOL ); - assertThat( bis.readObject() ).isNull(); - assertThat( bis.readObject() ).isNull(); - } - - public enum TestEnum { - A, B - } -} diff --git a/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineConcatenationTest.java b/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineConcatenationTest.java index 5cfd6bdd8..49b1b58ca 100644 --- a/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineConcatenationTest.java +++ b/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineConcatenationTest.java @@ -31,12 +31,9 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import java.io.IOException; import java.lang.reflect.Method; -import java.util.List; import java.util.Optional; -import static oap.template.TemplateAccumulators.BINARY; import static oap.template.TemplateAccumulators.STRING; import static org.assertj.core.api.Assertions.assertThat; @@ -64,16 +61,6 @@ public void testConcatenation() { .isEqualTo( "f1xf2" ); } - @Test - public void testConcatenationBinary() throws IOException { - var c = new TestTemplateClass(); - c.field = "f1"; - c.field2 = "f2"; - - assertThat( BinaryUtils.read( engine.getTemplate( testMethodName, new TypeRef() {}, "${{field,\"x\",field2}}", BINARY, null ).render( c ).get() ) ) - .isEqualTo( List.of( List.of( "f1xf2" ) ) ); - } - @Test public void testConcatenationWithNumber() { var c = new TestTemplateClass(); diff --git a/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineOrTest.java b/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineOrTest.java index 2d0cb6dc3..9cee9fc2f 100644 --- a/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineOrTest.java +++ b/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineOrTest.java @@ -31,11 +31,9 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import java.io.IOException; import java.lang.reflect.Method; import java.util.List; -import static oap.template.TemplateAccumulators.BINARY; import static oap.template.TemplateAccumulators.STRING; import static org.assertj.core.api.Assertions.assertThat; @@ -70,23 +68,6 @@ public void testOrNull() { .isEqualTo( "" ); } - @Test - public void testOrDefaultBinary() throws IOException { - TestTemplateClass c = new TestTemplateClass(); - - assertThat( BinaryUtils.read( engine.getTemplate( testMethodName, new TypeRef() {}, "{{ intObjectField | default childNullable.intObjectField ?? 3 }}", BINARY, null ).render( c ).get() ) ) - .isEqualTo( List.of( List.of( 3 ) ) ); - } - - @Test - public void testOrEmptyStringWithBinaryAccumulator() throws IOException { - TestTemplateClass c = new TestTemplateClass(); - c.field2 = "f2"; - - assertThat( BinaryUtils.read( engine.getTemplate( testMethodName, new TypeRef() {}, "{{ field | default field2 }}", BINARY, null ).render( c ).get() ) ) - .isEqualTo( List.of( List.of( "f2" ) ) ); - } - @Test public void testOrCollections() { TestTemplateClass c = new TestTemplateClass(); diff --git a/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineTest.java b/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineTest.java index e01f70ce4..cc100ee61 100644 --- a/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineTest.java +++ b/oap-formats/oap-template/src/test/java/oap/template/TemplateEngineTest.java @@ -28,10 +28,8 @@ import oap.testng.Fixtures; import oap.testng.TestDirectoryFixture; import oap.util.Dates; -import oap.util.Strings; import org.apache.commons.io.FileUtils; import org.codehaus.plexus.util.StringUtils; -import org.joda.time.DateTime; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -46,12 +44,10 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static oap.template.ErrorStrategy.ERROR; import static oap.template.ErrorStrategy.IGNORE; -import static oap.template.TemplateAccumulators.BINARY; import static oap.template.TemplateAccumulators.OBJECT; import static oap.template.TemplateAccumulators.STRING; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.joda.time.DateTimeZone.UTC; public class TemplateEngineTest extends Fixtures { private final TestDirectoryFixture testDirectoryFixture; @@ -87,12 +83,6 @@ public void testWithoutDefaultValue() { .isEqualTo( 0L ); } - @Test - public void testRenderUNKNOWNStringTextAsBinary() throws IOException { - assertThat( BinaryUtils.read( engine.getTemplate( testMethodName, new TypeRef>() {}, Strings.UNKNOWN, BINARY, null ).render( null ).get() ) ) - .isEqualTo( List.of( List.of( "" ) ) ); - } - @Test public void testEscapeVariables() { TestTemplateClass c = new TestTemplateClass(); @@ -322,18 +312,6 @@ public void testDefaultDouble() { .isEqualTo( "0.0" ); } - @Test - public void testDefaultDoubleBinary() throws IOException { - assertThat( BinaryUtils.read( engine.getTemplate( testMethodName, new TypeRef>() {}, "{{ bbb ?? 0.0 }}", BINARY, null ).render( Map.of( "prop", 1.1 ) ).get() ) ) - .isEqualTo( List.of( List.of( 0.0d ) ) ); - } - - @Test - public void testDefaultDateTime() throws IOException { - assertThat( BinaryUtils.read( engine.getTemplate( testMethodName, new TypeRef() {}, "{{ dateTimeOptional ?? '2023-01-04 18:09:11' }}", BINARY, null ).render( new TestTemplateClass() ).get() ) ) - .isEqualTo( List.of( List.of( new DateTime( 2023, 1, 4, 18, 9, 11, UTC ) ) ) ); - } - @Test public void testMix() { assertThat( engine.getTemplate( testMethodName, new TypeRef>() {}, "-{{ prop }}-{{ b }}-", STRING, null ).render( Map.of( "prop", "val", "b", "b1" ) ).get() ) From 1254e7d7b0abaa9a4ede8a4835afaaab973c9c31 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 08:39:07 +0200 Subject: [PATCH 06/28] oap-logstream: row-binary --- .../main/java/oap/testng/TsvAssertion.java | 189 ------------------ .../java/oap/testng/TsvAssertionTest.java | 35 ---- 2 files changed, 224 deletions(-) delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertion.java delete mode 100644 oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertionTest.java diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertion.java deleted file mode 100644 index 1f937a4d2..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertion.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) Open Application Platform Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package oap.testng; - -import oap.util.Lists; -import org.apache.commons.collections4.IteratorUtils; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.io.IOUtils; -import org.assertj.core.api.AbstractCharSequenceAssert; -import org.assertj.core.data.MapEntry; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.io.StringReader; -import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - - -@Deprecated -public class TsvAssertion extends AbstractCharSequenceAssert { - protected TsvAssertion( CharSequence value ) { - super( value, TsvAssertion.class ); - } - - public static TsvAssertion assertTsv( CharSequence actual ) { - return new TsvAssertion( actual ); - } - - public static TsvAssertion assertTsv( Path path ) { - try { - return new TsvAssertion( Files.readString( path ) ); - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public static TsvAssertion assertTsv( File file ) { - try { - return new TsvAssertion( Files.readString( file.toPath() ) ); - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public static TsvAssertion assertTsv( InputStream inputStream ) { - try { - return new TsvAssertion( IOUtils.toString( inputStream, StandardCharsets.UTF_8 ) ); - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public static TsvAssertion assertTsv( Reader reader ) { - try { - return new TsvAssertion( IOUtils.toString( reader ) ); - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public final TsvAssertion containsHeader( String headerName ) { - try( var parser = getParser() ) { - var headers = parser.getHeaderMap(); - - assertThat( headers ).containsKey( headerName ); - - return this; - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public final TsvAssertion containsHeaders( String... headerNames ) { - try( var parser = getParser() ) { - var headers = parser.getHeaderMap(); - - assertThat( headers ).containsKeys( headerNames ); - - return this; - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public final TsvAssertion containsHeaders( Iterable headerNames ) { - try( var parser = getParser() ) { - var headers = parser.getHeaderMap(); - - assertThat( headers ).containsKeys( IteratorUtils.toArray( headerNames.iterator(), String.class ) ); - - return this; - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public final TsvAssertion containsOnlyHeaders( String... headerNames ) { - try( var parser = getParser() ) { - var headers = parser.getHeaderMap(); - - assertThat( headers ).containsOnlyKeys( headerNames ); - - return this; - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - public final TsvAssertion containsOnlyHeaders( Iterable headerNames ) { - try( var parser = getParser() ) { - var headers = parser.getHeaderMap(); - - assertThat( headers ).containsOnlyKeys( headerNames ); - - return this; - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - @SuppressWarnings( "unchecked" ) - @SafeVarargs - public final TsvAssertion containsRowCols( MapEntry... entries ) { - try( var parser = getParser() ) { - var headers = parser.getHeaderMap(); - - containsHeaders( Lists.map( List.of( entries ), e -> e.key ) ); - - var result = new ArrayList>(); - - for( var record : parser ) { - var arr = new ArrayList(); - - for( var entry : entries ) { - arr.add( record.get( entry.key ) ); - } - - result.add( arr ); - } - - assertThat( result ).contains( Lists.map( List.of( entries ), e -> e.value ) ); - - return this; - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } - - private CSVParser getParser() { - try { - return new CSVParser( new StringReader( actual.toString() ), CSVFormat.TDF - .withFirstRecordAsHeader() - .withIgnoreSurroundingSpaces( false ) ); - } catch( IOException e ) { - throw new UncheckedIOException( e ); - } - } -} diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertionTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertionTest.java deleted file mode 100644 index 5489f735b..000000000 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/testng/TsvAssertionTest.java +++ /dev/null @@ -1,35 +0,0 @@ -package oap.testng; - -import org.testng.annotations.Test; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.api.Assertions.entry; - - -@Deprecated -public class TsvAssertionTest { - - @Test - public void testContainsHeader() { - TsvAssertion.assertTsv( "a\tb\tc\n1\t2\t3" ).containsHeader( "a" ); - - assertThatThrownBy( () -> - TsvAssertion.assertTsv( "a\tb\tc\n1\t2\t3" ).containsHeader( "unknown" ) ) - .isInstanceOf( AssertionError.class ); - } - - @Test - public void testContainsRowCols() { - TsvAssertion.assertTsv( """ - a\tb\tc - 11\t12\t13 - 21\t22\t23""" ).containsRowCols( entry( "a", "11" ), entry( "b", "12" ) ); - - assertThatThrownBy( () -> - TsvAssertion.assertTsv( """ - a\tb\tc - 11\t12\t13 - 21\t22\t23""" ).containsRowCols( entry( "a", "11" ), entry( "b", "22" ) ) - .isInstanceOf( AssertionError.class ) ); - } -} From 996bd9a000b0bee904923767345b8cc5339d0c2d Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 08:56:25 +0200 Subject: [PATCH 07/28] oap-logstream: row-binary --- .../logstream/formats/rowbinary/RowBinaryUtils.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java index d647d5dc5..bda073493 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryUtils.java @@ -5,6 +5,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.List; @@ -15,6 +16,10 @@ public static List> read( byte[] bytes ) throws IOException { return read( bytes, null, null ); } + public static List> read( InputStream inputStream ) throws IOException { + return read( inputStream, null, null )._1; + } + public static List> read( byte[] bytes, String[] headers, byte[][] types ) throws IOException { return read( bytes, 0, bytes.length, headers, types )._1; } @@ -23,8 +28,8 @@ public static Pair>, List> read( byte[] bytes, int off return read( bytes, offset, length, null, null ); } - public static Pair>, List> read( byte[] bytes, int offset, int length, String[] headers, byte[][] types ) throws IOException { - RowBinaryInputStream binaryInputStream = new RowBinaryInputStream( new ByteArrayInputStream( bytes, offset, length ), headers, types ); + public static Pair>, List> read( InputStream inputStream, String[] headers, byte[][] types ) throws IOException { + RowBinaryInputStream binaryInputStream = new RowBinaryInputStream( inputStream, headers, types ); ArrayList> res = new ArrayList<>(); @@ -38,6 +43,10 @@ public static Pair>, List> read( byte[] bytes, int off return __( res, List.of( binaryInputStream.headers ) ); } + public static Pair>, List> read( byte[] bytes, int offset, int length, String[] headers, byte[][] types ) throws IOException { + return read( new ByteArrayInputStream( bytes, offset, length ), headers, types ); + } + public static byte[] lines( List> rows ) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); for( List row : rows ) { From 7bfeaf005863ccf0177364f5974940a42e93b51f Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 10:19:12 +0200 Subject: [PATCH 08/28] oap-logstream: row-binary --- .../main/java/oap/logstream/formats/RowBinaryAssertion.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java index fa3772472..a0c60f17c 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java @@ -29,6 +29,10 @@ public static RowBinaryAssertion assertRowBinaryFile( Path file, IoStreams.Encod return new RowBinaryAssertion( new RowBinaryData( null, null, Files.read( file, encoding, ofBytes() ) ) ); } + public static RowBinaryAssertion assertRowBinary( byte[] bytes ) { + return new RowBinaryAssertion( new RowBinaryData( null, null, bytes ) ); + } + @SneakyThrows public ListAssert> content( String... header ) { List> ret = new ArrayList<>(); From 6ae2aa99629a67de9bdaf89afbbc1c4652235751 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 10:53:57 +0200 Subject: [PATCH 09/28] oap-logstream: row-binary --- .../logstream/formats/RowBinaryAssertion.java | 188 +++++++++++++++--- .../java/oap/logstream/LoggerJsonTest.java | 7 +- .../test/java/oap/logstream/LoggerTest.java | 21 +- .../logstream/disk/DiskLoggerBackendTest.java | 15 +- .../main/java/oap/tsv/test/TsvAssertion.java | 4 +- 5 files changed, 183 insertions(+), 52 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java index a0c60f17c..043da13dc 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java @@ -1,16 +1,20 @@ package oap.logstream.formats; +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import lombok.EqualsAndHashCode; import lombok.SneakyThrows; import lombok.ToString; import oap.io.Files; import oap.io.IoStreams; import oap.logstream.formats.rowbinary.RowBinaryInputStream; +import oap.util.Lists; import org.apache.commons.lang3.ArrayUtils; import org.assertj.core.api.AbstractAssert; import org.assertj.core.api.Assertions; -import org.assertj.core.api.ListAssert; import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; @@ -26,46 +30,184 @@ protected RowBinaryAssertion( RowBinaryData rowBinaryData ) { public static RowBinaryAssertion assertRowBinaryFile( Path file, IoStreams.Encoding encoding ) { Assertions.assertThatPath( file ).exists(); - return new RowBinaryAssertion( new RowBinaryData( null, null, Files.read( file, encoding, ofBytes() ) ) ); + return new RowBinaryAssertion( new RowBinaryData( null, null, new ByteArrayInputStream( Files.read( file, encoding, ofBytes() ) ) ) ); } public static RowBinaryAssertion assertRowBinary( byte[] bytes ) { - return new RowBinaryAssertion( new RowBinaryData( null, null, bytes ) ); + return new RowBinaryAssertion( new RowBinaryData( null, null, new ByteArrayInputStream( bytes ) ) ); } - @SneakyThrows - public ListAssert> content( String... header ) { - List> ret = new ArrayList<>(); + public static Row row( String... cols ) { + return new Row( cols ); + } - ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream( this.actual.data ); - RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( byteArrayInputStream, this.actual.headers, this.actual.types ); + public static Header header( String... cols ) { + return new Header( cols ); + } - List objects; - while( ( objects = rowBinaryInputStream.readRow() ) != null ) { - ArrayList filtered = new ArrayList<>(); - for( int i = 0; i < rowBinaryInputStream.headers.length; i++ ) { - if( header.length == 0 || ArrayUtils.contains( rowBinaryInputStream.headers, header[i] ) ) { - filtered.add( objects.get( i ) ); - } - } + public RowBinaryAssertion hasHeaders( String... headers ) { + assertThat( actual.headers ).contains( headers ); + return this; + } + + public RowBinaryAssertion hasHeaders( Iterable headers ) { + assertThat( actual.headers ).containsAll( headers ); + return this; + } + + public RowBinaryAssertion hasHeaders( Header header ) { + assertThat( actual.headers ).containsAll( header.cols ); + return this; + } + + public RowBinaryAssertion containOnlyHeaders( String... headers ) { + assertThat( actual.headers ).containsOnly( headers ); + return this; + } + + @SafeVarargs + public final RowBinaryAssertion containsExactlyInAnyOrderEntriesOf( List... entries ) { + assertThat( actual.data ).containsExactlyInAnyOrderElementsOf( List.of( entries ) ); + return this; + } + + public RowBinaryAssertion containsExactlyInAnyOrderEntriesOf( Header header, Row... rows ) { + hasHeaders( header ); + for( Row row : rows ) { + assertThat( row.cols ) + .withFailMessage( "entries length doesnt match headers" ) + .hasSize( header.size() ); + } + assertThat( actual.getCols( header ) ) + .containsExactlyInAnyOrderElementsOf( Lists.map( rows, r -> r.cols ) ); + + return this; + } + + public RowBinaryAssertion containsAnyEntriesOf( Header header, Row... rows ) { + hasHeaders( header.cols ); + for( Row row : rows ) { + assertThat( row.cols ) + .withFailMessage( "entries length doesnt match headers" ) + .hasSize( header.size() ); + } + + assertThat( actual.getCols( header ) ) + .containsAnyElementsOf( Lists.map( rows, r -> r.cols ) ); + return this; + } + + public RowBinaryAssertion containsOnlyOnceEntriesOf( Header header, Row... rows ) { + hasHeaders( header ); + for( Row row : rows ) { + assertThat( row.cols ) + .withFailMessage( "entries length doesnt match headers" ) + .hasSize( header.size() ); + } + assertThat( actual.getCols( header ) ).containsOnlyOnceElementsOf( Lists.map( rows, r -> r.cols ) ); + return this; + } + + public RowBinaryAssertion doesNotContainAnyEntriesOf( Header header, Row... rows ) { + hasHeaders( header ); + for( Row row : rows ) { + assertThat( row.cols ) + .withFailMessage( "entries length doesnt match headers" ) + .hasSize( header.size() ); + } + + assertThat( actual.getCols( header ) ).doesNotContainAnyElementsOf( Lists.map( rows, r -> r.cols ) ); + return this; + } + public RowBinaryAssertion doesNotContainAnyEntriesOf( Row... rows ) { + assertThat( actual.headers ) + .withFailMessage( "tsv must contain headers" ) + .isNotEmpty(); - ret.add( filtered ); + for( Row row : rows ) { + assertThat( row.cols ) + .withFailMessage( "entries length doesnt match headers" ) + .hasSameSizeAs( actual.headers ); } + assertThat( actual.data ).doesNotContainAnyElementsOf( Lists.map( rows, r -> r.cols ) ); + return this; + } - return assertThat( ret ); + public RowBinaryAssertion isNotEmpty() { + assertThat( actual.data ).isNotEmpty(); + return this; } @ToString public static class RowBinaryData { public final String[] headers; public final byte[][] types; - public final byte[] data; + public final ArrayList> data = new ArrayList<>(); + + @SneakyThrows + public RowBinaryData( String[] headers, byte[][] types, InputStream inputStream ) { + RowBinaryInputStream rowBinaryInputStream = new RowBinaryInputStream( inputStream, headers, types ); + + List objects; + while( ( objects = rowBinaryInputStream.readRow() ) != null ) { + ArrayList row = new ArrayList<>(); + for( int i = 0; i < rowBinaryInputStream.headers.length; i++ ) { + row.add( objects.get( i ) ); + } + + data.add( row ); + } + + this.headers = rowBinaryInputStream.headers; + this.types = rowBinaryInputStream.types; + } + + public List> getCols( Header headers ) { + + IntArrayList hIndexes = new IntArrayList(); + + for( String header : headers.cols ) { + int index = ArrayUtils.indexOf( this.headers, header ); + Preconditions.checkArgument( index >= 0, "header %s not found", header ); + hIndexes.add( index ); + } + + ArrayList> ret = new ArrayList<>(); + + for( List row : data ) { + ArrayList filtered = new ArrayList<>(); + for( int index : hIndexes ) { + filtered.add( row.get( index ) ); + } + ret.add( filtered ); + } + + return ret; + } + } + + @ToString + @EqualsAndHashCode + public static class Row { + private final List cols; + + public Row( Object... cols ) { + this.cols = List.of( cols ); + } + } + + @ToString + @EqualsAndHashCode + public static class Header { + public final List cols; + + public Header( String... cols ) { + this.cols = List.of( cols ); + } - public RowBinaryData( String[] headers, byte[][] types, byte[] data ) { - this.headers = headers; - this.types = types; - this.data = data; + public int size() { + return cols.size(); } } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java index b58a6408d..87144edd4 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java @@ -28,7 +28,6 @@ import oap.io.IoStreams.Encoding; import oap.json.Binder; import oap.logstream.disk.DiskLoggerBackend; -import oap.logstream.formats.RowBinaryAssertion; import oap.logstream.formats.rowbinary.RowBinaryUtils; import oap.template.Types; import oap.testng.Fixtures; @@ -43,6 +42,7 @@ import static oap.io.content.ContentReader.ofJson; import static oap.logstream.Timestamp.BPH_12; import static oap.logstream.disk.DiskLoggerBackend.DEFAULT_BUFFER; +import static oap.logstream.formats.RowBinaryAssertion.assertRowBinaryFile; import static oap.net.Inet.HOSTNAME; import static oap.testng.Asserts.contentOfTestResource; import static org.assertj.core.api.Assertions.assertThat; @@ -71,9 +71,8 @@ public void diskJSON() throws IOException { logger.log( "open_rtb_json", Map.of(), "request_response", headers, types, Compression.gzip( RowBinaryUtils.line( List.of( jsonContent ) ) ) ); } - RowBinaryAssertion.assertRowBinaryFile( testDirectoryFixture.testPath( "logs/open_rtb_json/2015-10/10/request_response_v3b5d9e1b-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), Encoding.GZIP ) - .content() - .contains( List.of( content ) ); + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/open_rtb_json/2015-10/10/request_response_v3b5d9e1b-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), Encoding.GZIP ) + .containsExactlyInAnyOrderEntriesOf( List.of( content ) ); } public static class SimpleJson { diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java index b54c5021d..17ba092ac 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java @@ -89,17 +89,13 @@ public void disk() throws IOException { } assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData1, lineData1 ) ); + .containsExactlyInAnyOrderEntriesOf( lineData1, lineData1 ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData1 ) ); + .containsExactlyInAnyOrderEntriesOf( lineData1 ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData2 ) ); + .containsExactlyInAnyOrderEntriesOf( lineData2 ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData2 ) ); + .containsExactlyInAnyOrderEntriesOf( lineData2 ); } @Test @@ -163,13 +159,10 @@ public void net() throws IOException { assertEventually( 10, 1000, () -> assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData1, lineData1 ) ) ); + .containsExactlyInAnyOrderEntriesOf( lineData1, lineData1 ) ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData1 ) ); + .containsExactlyInAnyOrderEntriesOf( lineData1 ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( lineData2 ) ); + .containsExactlyInAnyOrderEntriesOf( lineData2 ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java index dcd2db468..bf721c4fb 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java @@ -87,17 +87,15 @@ public void testPatternByType() throws IOException { backend.refresh( true ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e-1_03.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( + .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) - ) ); + ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e-1_16.parquet.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( + .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) - ) ); + ); } } @@ -116,11 +114,10 @@ public void testWriteSync() throws IOException { logger.log( "lfn1", Map.of(), "log", headers, types, lines ); assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) - .content() - .isEqualTo( List.of( + .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) - ) ); + ); } } } diff --git a/oap-formats/oap-tsv/oap-tsv-test/src/main/java/oap/tsv/test/TsvAssertion.java b/oap-formats/oap-tsv/oap-tsv-test/src/main/java/oap/tsv/test/TsvAssertion.java index 8d325e133..23f95a3fb 100644 --- a/oap-formats/oap-tsv/oap-tsv-test/src/main/java/oap/tsv/test/TsvAssertion.java +++ b/oap-formats/oap-tsv/oap-tsv-test/src/main/java/oap/tsv/test/TsvAssertion.java @@ -128,7 +128,7 @@ public final TsvAssertion containsExactlyInAnyOrderEntriesOf( List... en public TsvAssertion containsExactlyInAnyOrderEntriesOf( Header header, Row... rows ) { hasHeaders( header ); - for( var row : rows ) { + for( Row row : rows ) { assertThat( row.cols ) .withFailMessage( "entries length doesnt match headers" ) .hasSize( header.size() ); @@ -198,7 +198,7 @@ public TsvAssertion doesNotContainAnyEntriesOf( Row... rows ) { for( var row : rows ) { assertThat( row.cols ) .withFailMessage( "entries length doesnt match headers" ) - .hasSize( actual.headers.size() ); + .hasSameSizeAs( actual.headers ); } assertThat( actual.data ).doesNotContainAnyElementsOf( Lists.map( rows, r -> r.cols ) ); return this; From deae423a9746af965203fc9b0fc848449ef9af3b Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 12:21:40 +0200 Subject: [PATCH 10/28] oap-logstream: row-binary --- .../src/main/java/oap/logstream/formats/RowBinaryAssertion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java index 043da13dc..39cb784ce 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java @@ -37,7 +37,7 @@ public static RowBinaryAssertion assertRowBinary( byte[] bytes ) { return new RowBinaryAssertion( new RowBinaryData( null, null, new ByteArrayInputStream( bytes ) ) ); } - public static Row row( String... cols ) { + public static Row row( Object... cols ) { return new Row( cols ); } From eac8a0dbee8a29c959d1c753854b012d0ba4323c Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 12:29:35 +0200 Subject: [PATCH 11/28] oap-logstream: row-binary --- .../main/java/oap/logstream/formats/RowBinaryAssertion.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java index 39cb784ce..1464db02f 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/main/java/oap/logstream/formats/RowBinaryAssertion.java @@ -34,7 +34,11 @@ public static RowBinaryAssertion assertRowBinaryFile( Path file, IoStreams.Encod } public static RowBinaryAssertion assertRowBinary( byte[] bytes ) { - return new RowBinaryAssertion( new RowBinaryData( null, null, new ByteArrayInputStream( bytes ) ) ); + return assertRowBinary( new ByteArrayInputStream( bytes ) ); + } + + public static RowBinaryAssertion assertRowBinary( InputStream inputStream ) { + return new RowBinaryAssertion( new RowBinaryData( null, null, inputStream ) ); } public static Row row( Object... cols ) { From 3a18f525c8fc867532af42341c832d69506c5fcb Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 13:12:24 +0200 Subject: [PATCH 12/28] oap-logstream: row-binary --- .../formats/rowbinary/RowBinaryInputStream.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 317c360b6..98dd2f6c2 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -2,6 +2,7 @@ import com.google.common.base.Preconditions; import it.unimi.dsi.fastutil.bytes.ByteArrayList; +import lombok.extern.slf4j.Slf4j; import oap.template.Types; import org.joda.time.DateTime; @@ -19,7 +20,10 @@ * https://clickhouse.com/docs/interfaces/formats/RowBinary */ @SuppressWarnings( "checkstyle:OverloadMethodsDeclarationOrder" ) +@Slf4j public class RowBinaryInputStream extends InputStream { + public static final String TYPE_NULLABLE = "Nullable("; + public static final String TYPE_ARRAY = "Array("; public final String[] headers; public final InputStream in; public final byte[][] types; @@ -41,9 +45,11 @@ public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) } private static void convertType( String rbType, ByteArrayList type ) { - if( rbType.startsWith( "Array(" ) ) { + if( rbType.startsWith( TYPE_NULLABLE ) ) { + convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type ); + } else if( rbType.startsWith( TYPE_ARRAY ) ) { type.add( Types.LIST.id ); - convertType( rbType.substring( "Array(".length(), rbType.length() - 1 ), type ); + convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type ); } else { type.add( switch( rbType ) { case "Bool" -> Types.BOOLEAN.id; @@ -79,6 +85,7 @@ private byte[][] readTypes() throws IOException { for( int i = 0; i < count; i++ ) { String rbType = readString(); + log.trace( "in type {}", rbType ); convertType( rbType, type ); From 5ce9378ff9e75c15aed5829ca9d1d2142143886e Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 14:32:17 +0200 Subject: [PATCH 13/28] oap-logstream: row-binary --- .../oap/logstream/formats/rowbinary/RowBinaryInputStream.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 98dd2f6c2..84d865776 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -60,8 +60,8 @@ private static void convertType( String rbType, ByteArrayList type ) { case "Float32" -> Types.FLOAT.id; case "Float64" -> Types.DOUBLE.id; case "String" -> Types.STRING.id; - case "Date" -> Types.DATE.id; - case "DateTime" -> Types.DATETIME.id; + case "Date", "Date32" -> Types.DATE.id; + case "DateTime", "DateTime32" -> Types.DATETIME.id; case null, default -> throw new IllegalArgumentException( "unknown type " + type ); } ); } From 2d30b9bb98f60806f4a7895bfedec1f2fba9d093 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Mon, 2 Mar 2026 14:46:27 +0200 Subject: [PATCH 14/28] oap-logstream: row-binary --- .../oap/logstream/formats/rowbinary/RowBinaryInputStream.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 84d865776..378d61f24 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -50,6 +50,8 @@ private static void convertType( String rbType, ByteArrayList type ) { } else if( rbType.startsWith( TYPE_ARRAY ) ) { type.add( Types.LIST.id ); convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type ); + } else if( rbType.startsWith( "FixedString(" ) ) { + type.add( Types.STRING.id ); } else { type.add( switch( rbType ) { case "Bool" -> Types.BOOLEAN.id; From b54f49fda62f86419184f9a2e1e53bce2ee62e41 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 09:07:35 +0200 Subject: [PATCH 15/28] fix: FixedLength --- .../rowbinary/RowBinaryInputStream.java | 50 +++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 378d61f24..bc323286e 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -10,8 +10,10 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; import static java.nio.charset.StandardCharsets.UTF_8; import static org.joda.time.DateTimeZone.UTC; @@ -24,9 +26,11 @@ public class RowBinaryInputStream extends InputStream { public static final String TYPE_NULLABLE = "Nullable("; public static final String TYPE_ARRAY = "Array("; + public static final String FIXED_STRING_PREFIX = "FixedString("; public final String[] headers; public final InputStream in; public final byte[][] types; + public final int[] fixedLength; protected byte[] readBuffer = new byte[8]; public RowBinaryInputStream( InputStream in ) throws IOException { @@ -41,17 +45,25 @@ public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) this.in = in; this.headers = headers == null ? readHeaders() : headers; + + this.fixedLength = new int[this.headers.length]; + Arrays.fill( this.fixedLength, 0 ); + this.types = types == null ? readTypes() : types; } - private static void convertType( String rbType, ByteArrayList type ) { + private static void convertType( String rbType, ByteArrayList type, AtomicInteger fixedLength ) { if( rbType.startsWith( TYPE_NULLABLE ) ) { - convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type ); + convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type, fixedLength ); } else if( rbType.startsWith( TYPE_ARRAY ) ) { type.add( Types.LIST.id ); - convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type ); - } else if( rbType.startsWith( "FixedString(" ) ) { + convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type, fixedLength ); + } else if( rbType.startsWith( FIXED_STRING_PREFIX ) ) { type.add( Types.STRING.id ); + + int endLength = rbType.indexOf( ')', FIXED_STRING_PREFIX.length() ); + + fixedLength.set( Integer.parseInt( rbType.substring( FIXED_STRING_PREFIX.length(), endLength ) ) ); } else { type.add( switch( rbType ) { case "Bool" -> Types.BOOLEAN.id; @@ -89,10 +101,13 @@ private byte[][] readTypes() throws IOException { String rbType = readString(); log.trace( "in type {}", rbType ); - convertType( rbType, type ); + AtomicInteger iFixedLength = new AtomicInteger(); + convertType( rbType, type, iFixedLength ); types[i] = type.toByteArray(); + this.fixedLength[i] = iFixedLength.get(); + type.clear(); } @@ -133,6 +148,19 @@ public String readString() throws IOException { } } + public String readString( int fixedLength ) throws IOException { + byte[] buf = new byte[fixedLength]; + readFully( buf, 0, fixedLength ); + + + int end = fixedLength - 1; + while( end >= 0 && buf[end] == 0 ) { + end--; + } + + return new String( buf, 0, end, UTF_8 ); + } + private String[] readHeaders() throws IOException { int count = readVarInt(); String[] headers = new String[count]; @@ -208,7 +236,7 @@ public List readList( Class clazz ) throws IOException { ArrayList list = new ArrayList<>( size ); for( int i = 0; i < size; i++ ) { - T v = readObject( clazz ); + T v = readObject( clazz, i ); list.add( v ); } @@ -216,9 +244,10 @@ public List readList( Class clazz ) throws IOException { } @SuppressWarnings( "unchecked" ) - private T readObject( Class clazz ) throws IOException { + private T readObject( Class clazz, int col ) throws IOException { if( clazz == String.class ) { - return ( T ) readString(); + int length = fixedLength[col]; + return ( T ) ( length == 0 ? readString() : readString( length ) ); } else if( clazz == byte.class ) { return ( T ) ( Byte ) readByte(); } else if( clazz == Byte.class ) { @@ -276,7 +305,10 @@ public List readRow() throws IOException { case LONG -> readLong(); case FLOAT -> readFloat(); case DOUBLE -> readDouble(); - case STRING -> readString(); + case STRING -> { + int length = fixedLength[i]; + yield length == 0 ? readString() : readString( length ); + } case BOOLEAN -> readBoolean(); case LIST -> { Types listItemType = Types.valueOf( bytes[1] ); From df09cb7735f276c3b77433b1bc93090f724273e3 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 09:13:01 +0200 Subject: [PATCH 16/28] nullable --- .../rowbinary/RowBinaryInputStream.java | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index bc323286e..116f948dc 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -13,6 +13,7 @@ import java.util.Arrays; import java.util.Date; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import static java.nio.charset.StandardCharsets.UTF_8; @@ -31,6 +32,7 @@ public class RowBinaryInputStream extends InputStream { public final InputStream in; public final byte[][] types; public final int[] fixedLength; + public final boolean[] nullable; protected byte[] readBuffer = new byte[8]; public RowBinaryInputStream( InputStream in ) throws IOException { @@ -48,16 +50,19 @@ public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) this.fixedLength = new int[this.headers.length]; Arrays.fill( this.fixedLength, 0 ); + this.nullable = new boolean[this.headers.length]; + Arrays.fill( this.nullable, false ); this.types = types == null ? readTypes() : types; } - private static void convertType( String rbType, ByteArrayList type, AtomicInteger fixedLength ) { + private static void convertType( String rbType, ByteArrayList type, AtomicInteger fixedLength, AtomicBoolean nullable ) { if( rbType.startsWith( TYPE_NULLABLE ) ) { - convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type, fixedLength ); + nullable.set( true ); + convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type, fixedLength, nullable ); } else if( rbType.startsWith( TYPE_ARRAY ) ) { type.add( Types.LIST.id ); - convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type, fixedLength ); + convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type, fixedLength, nullable ); } else if( rbType.startsWith( FIXED_STRING_PREFIX ) ) { type.add( Types.STRING.id ); @@ -101,12 +106,14 @@ private byte[][] readTypes() throws IOException { String rbType = readString(); log.trace( "in type {}", rbType ); - AtomicInteger iFixedLength = new AtomicInteger(); - convertType( rbType, type, iFixedLength ); + AtomicInteger iFixedLength = new AtomicInteger( 0 ); + AtomicBoolean iNullable = new AtomicBoolean( false ); + convertType( rbType, type, iFixedLength, iNullable ); types[i] = type.toByteArray(); this.fixedLength[i] = iFixedLength.get(); + this.nullable[i] = iNullable.get(); type.clear(); } @@ -236,6 +243,16 @@ public List readList( Class clazz ) throws IOException { ArrayList list = new ArrayList<>( size ); for( int i = 0; i < size; i++ ) { + boolean isNullable = nullable[i]; + + if( isNullable ) { + byte isNull = readByte(); + if( isNull == 1 ) { + list.add( null ); + continue; + } + } + T v = readObject( clazz, i ); list.add( v ); } @@ -296,6 +313,16 @@ public List readRow() throws IOException { byte[] bytes = types[i]; Types types = Types.valueOf( bytes[0] ); + boolean isNullable = nullable[i]; + + if( isNullable ) { + byte isNull = readByte(); + if( isNull == 1 ) { + row.add( null ); + continue; + } + } + row.add( switch( types ) { case DATETIME -> readDateTime(); case DATE -> readDate(); From 4c6eddffaf1010b1bb5d1bcc656414cd0b0d234c Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 10:31:33 +0200 Subject: [PATCH 17/28] fix: date --- .../oap/logstream/formats/rowbinary/RowBinaryInputStream.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 116f948dc..2eb718a4e 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -79,8 +79,8 @@ private static void convertType( String rbType, ByteArrayList type, AtomicIntege case "Float32" -> Types.FLOAT.id; case "Float64" -> Types.DOUBLE.id; case "String" -> Types.STRING.id; - case "Date", "Date32" -> Types.DATE.id; - case "DateTime", "DateTime32" -> Types.DATETIME.id; + case "Date" -> Types.DATE.id; + case "Date32", "DateTime", "DateTime32" -> Types.DATETIME.id; case null, default -> throw new IllegalArgumentException( "unknown type " + type ); } ); } From 139e065c4c23f7b2d9bb28b29097df3f3bcf56b7 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 10:54:33 +0200 Subject: [PATCH 18/28] fix: date --- .../formats/rowbinary/RowBinaryInputStream.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 2eb718a4e..04787a160 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -303,11 +303,12 @@ protected void readFully( byte[] b, int off, int len ) throws IOException { } public List readRow() throws IOException { - try { - Preconditions.checkNotNull( types ); - Preconditions.checkNotNull( headers ); + Preconditions.checkNotNull( types ); + Preconditions.checkNotNull( headers ); + + ArrayList row = new ArrayList<>( headers.length ); - ArrayList row = new ArrayList<>( headers.length ); + try { for( int i = 0; i < headers.length; i++ ) { byte[] bytes = types[i]; @@ -347,6 +348,10 @@ public List readRow() throws IOException { return row; } catch( EOFException e ) { + if( !row.isEmpty() ) { + throw e; + } + return null; } } From 37603e08e46d28bcec04cfd96150b91f68997cba Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 11:00:25 +0200 Subject: [PATCH 19/28] fix: date --- .../oap/logstream/formats/rowbinary/RowBinaryInputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 04787a160..55bfd62b3 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -165,7 +165,7 @@ public String readString( int fixedLength ) throws IOException { end--; } - return new String( buf, 0, end, UTF_8 ); + return new String( buf, 0, end + 1, UTF_8 ); } private String[] readHeaders() throws IOException { From 5ac962e2779537706de2e4bc4e8b668ae409c19a Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 13:48:27 +0200 Subject: [PATCH 20/28] fix: date --- .../rowbinary/RowBinaryInputStream.java | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 55bfd62b3..2045f0d28 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -33,6 +33,7 @@ public class RowBinaryInputStream extends InputStream { public final byte[][] types; public final int[] fixedLength; public final boolean[] nullable; + public final boolean[] datetime32; protected byte[] readBuffer = new byte[8]; public RowBinaryInputStream( InputStream in ) throws IOException { @@ -52,17 +53,19 @@ public RowBinaryInputStream( InputStream in, String[] headers, byte[][] types ) Arrays.fill( this.fixedLength, 0 ); this.nullable = new boolean[this.headers.length]; Arrays.fill( this.nullable, false ); + this.datetime32 = new boolean[this.headers.length]; + Arrays.fill( this.datetime32, false ); this.types = types == null ? readTypes() : types; } - private static void convertType( String rbType, ByteArrayList type, AtomicInteger fixedLength, AtomicBoolean nullable ) { + private static void convertType( String rbType, ByteArrayList type, AtomicInteger fixedLength, AtomicBoolean nullable, AtomicBoolean datetime32 ) { if( rbType.startsWith( TYPE_NULLABLE ) ) { nullable.set( true ); - convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type, fixedLength, nullable ); + convertType( rbType.substring( TYPE_NULLABLE.length(), rbType.length() - 1 ), type, fixedLength, nullable, datetime32 ); } else if( rbType.startsWith( TYPE_ARRAY ) ) { type.add( Types.LIST.id ); - convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type, fixedLength, nullable ); + convertType( rbType.substring( TYPE_ARRAY.length(), rbType.length() - 1 ), type, fixedLength, nullable, datetime32 ); } else if( rbType.startsWith( FIXED_STRING_PREFIX ) ) { type.add( Types.STRING.id ); @@ -80,7 +83,11 @@ private static void convertType( String rbType, ByteArrayList type, AtomicIntege case "Float64" -> Types.DOUBLE.id; case "String" -> Types.STRING.id; case "Date" -> Types.DATE.id; - case "Date32", "DateTime", "DateTime32" -> Types.DATETIME.id; + case "Date2" -> { + datetime32.set( true ); + yield Types.DATE.id; + } + case "DateTime", "DateTime32" -> Types.DATETIME.id; case null, default -> throw new IllegalArgumentException( "unknown type " + type ); } ); } @@ -108,12 +115,14 @@ private byte[][] readTypes() throws IOException { AtomicInteger iFixedLength = new AtomicInteger( 0 ); AtomicBoolean iNullable = new AtomicBoolean( false ); - convertType( rbType, type, iFixedLength, iNullable ); + AtomicBoolean iDatetime32 = new AtomicBoolean( false ); + convertType( rbType, type, iFixedLength, iNullable, iDatetime32 ); types[i] = type.toByteArray(); this.fixedLength[i] = iFixedLength.get(); this.nullable[i] = iNullable.get(); + this.datetime32[i] = iDatetime32.get(); type.clear(); } @@ -237,6 +246,10 @@ public Date readDate() throws IOException { return new Date( readShort() * 24L * 60L * 60L * 1000L ); } + public Date readDate32() throws IOException { + return new Date( readInt() * 24L * 60L * 60L * 1000L ); + } + public List readList( Class clazz ) throws IOException { int size = readVarInt(); @@ -292,7 +305,8 @@ private T readObject( Class clazz, int col ) throws IOException { } else if( clazz == DateTime.class ) { return ( T ) readDateTime(); } else if( clazz == Date.class ) { - return ( T ) readDate(); + boolean d32 = datetime32[col]; + return ( T ) ( d32 ? readDate32() : readDate() ); } else { throw new IllegalArgumentException( "unknown class " + clazz ); } @@ -326,7 +340,10 @@ public List readRow() throws IOException { row.add( switch( types ) { case DATETIME -> readDateTime(); - case DATE -> readDate(); + case DATE -> { + boolean d32 = datetime32[i]; + yield d32 ? readDate32() : readDate(); + } case BYTE -> readByte(); case SHORT -> readShort(); case INTEGER -> readInt(); From 057bb4160f08477b32fb95f53091bb13b1277eae Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 20 Mar 2026 14:49:11 +0200 Subject: [PATCH 21/28] fix: date --- .../oap/logstream/formats/rowbinary/RowBinaryInputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java index 2045f0d28..885da5eef 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/formats/rowbinary/RowBinaryInputStream.java @@ -83,7 +83,7 @@ private static void convertType( String rbType, ByteArrayList type, AtomicIntege case "Float64" -> Types.DOUBLE.id; case "String" -> Types.STRING.id; case "Date" -> Types.DATE.id; - case "Date2" -> { + case "Date32" -> { datetime32.set( true ); yield Types.DATE.id; } From 9650c0bd42c7b5378ada924334bf10fdfd1a3a93 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Thu, 2 Apr 2026 21:08:24 +0300 Subject: [PATCH 22/28] replicas --- .../logstream/disk/DiskLoggerBackendTest.java | 6 ++--- .../java/oap/logstream/LogIdTemplate.java | 3 ++- .../java/oap/kubernetes/ReplicaUtils.java | 24 +++++++++++++++++ oap-stdlib/src/main/java/oap/system/Env.java | 11 ++++---- .../java/oap/kubernetes/ReplicaUtilsTest.java | 26 +++++++++++++++++++ 5 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java create mode 100644 oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java index bf721c4fb..d26aedb27 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java @@ -86,12 +86,12 @@ public void testPatternByType() throws IOException { backend.refresh( true ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e-1_03.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e0-1_03.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e-1_16.parquet.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e0-1_16.parquet.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) @@ -113,7 +113,7 @@ public void testWriteSync() throws IOException { //log a line to lfn1 logger.log( "lfn1", Map.of(), "log", headers, types, lines ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java index 412638bf6..ec3473079 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java @@ -25,6 +25,7 @@ package oap.logstream; import oap.io.Closeables; +import oap.kubernetes.ReplicaUtils; import oap.net.Inet; import org.apache.velocity.VelocityContext; import org.apache.velocity.app.VelocityEngine; @@ -106,7 +107,7 @@ public void init( VelocityContext context, DateTime time, Timestamp timestamp, i } public String getHashWithVersion( int version ) { - return "%x-%d".formatted( logId.getHash(), version ); + return "%x%d-%d".formatted( logId.getHash(), ReplicaUtils.getReplicaId(), version ); } private String print2Chars( int v ) { diff --git a/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java b/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java new file mode 100644 index 000000000..9f9ed805e --- /dev/null +++ b/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java @@ -0,0 +1,24 @@ +package oap.kubernetes; + +import oap.system.Env; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class ReplicaUtils { + public static final Pattern POD_NAME_PATTERN = Pattern.compile( "^.+-(\\d+)$" ); + + public static int getReplicaId() { + String podName = Env.get( "POD_NAME" ).orElse( null ); + if( podName == null ) { + return 0; + } + + Matcher matcher = POD_NAME_PATTERN.matcher( podName ); + if( matcher.find() ) { + return Integer.parseInt( matcher.group( 1 ) ); + } + + return 0; + } +} diff --git a/oap-stdlib/src/main/java/oap/system/Env.java b/oap-stdlib/src/main/java/oap/system/Env.java index 867aca301..cfa946800 100644 --- a/oap-stdlib/src/main/java/oap/system/Env.java +++ b/oap-stdlib/src/main/java/oap/system/Env.java @@ -28,6 +28,7 @@ import org.apache.commons.lang3.SystemUtils; import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.util.Collections; import java.util.Map; import java.util.Optional; @@ -60,18 +61,18 @@ public static void set( String name, String value ) { Field theEnvironmentField = processEnvironmentClass.getDeclaredField( "theEnvironment" ); theEnvironmentField.setAccessible( true ); @SuppressWarnings( "unchecked" ) - var env = ( Map ) theEnvironmentField.get( null ); + Map env = ( Map ) theEnvironmentField.get( null ); if( SystemUtils.IS_OS_WINDOWS ) if( value == null ) env.remove( name ); else env.put( name, value ); else { - var variableClass = Class.forName( "java.lang.ProcessEnvironment$Variable" ); - var convertToVariable = variableClass.getMethod( "valueOf", String.class ); + Class variableClass = Class.forName( "java.lang.ProcessEnvironment$Variable" ); + Method convertToVariable = variableClass.getMethod( "valueOf", String.class ); convertToVariable.setAccessible( true ); - var valueClass = Class.forName( "java.lang.ProcessEnvironment$Value" ); - var convertToValue = valueClass.getMethod( "valueOf", String.class ); + Class valueClass = Class.forName( "java.lang.ProcessEnvironment$Value" ); + Method convertToValue = valueClass.getMethod( "valueOf", String.class ); convertToValue.setAccessible( true ); if( value == null ) env.remove( convertToVariable.invoke( null, name ) ); diff --git a/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java b/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java new file mode 100644 index 000000000..a0ccdbbdb --- /dev/null +++ b/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java @@ -0,0 +1,26 @@ +package oap.kubernetes; + +import oap.system.Env; +import org.testng.annotations.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class ReplicaUtilsTest { + @Test + public void testGetReplicaId() { + try { + Env.set( "POD_NAME", null ); + assertThat( ReplicaUtils.getReplicaId() ).isZero(); + + Env.set( "POD_NAME", "aasdasd-6-ff" ); + assertThat( ReplicaUtils.getReplicaId() ).isZero(); + + Env.set( "POD_NAME", "aasdasd-6-0" ); + assertThat( ReplicaUtils.getReplicaId() ).isZero(); + Env.set( "POD_NAME", "aasdasd-6-12" ); + assertThat( ReplicaUtils.getReplicaId() ).isEqualTo( 12 ); + } finally { + Env.set( "POD_NAME", null ); + } + } +} From cfa7ea70a9165b5c156b913f186de7d43d3c9d25 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Thu, 2 Apr 2026 21:09:36 +0300 Subject: [PATCH 23/28] replicas --- .../src/main/java/oap/logstream/disk/DiskLoggerBackend.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java index c771db5d1..e4efd558e 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java @@ -69,6 +69,10 @@ import static oap.logstream.AvailabilityReport.State.FAILED; import static oap.logstream.AvailabilityReport.State.OPERATIONAL; +/** + * replica ENV: + *
  • POD_NAME
  • + */ @Slf4j public class DiskLoggerBackend extends AbstractLoggerBackend implements Cloneable, AutoCloseable { public static final int DEFAULT_BUFFER = 1024 * 100; From e1652504c917b2e700ab2610ff988e0bad1b7aaf Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Thu, 2 Apr 2026 21:13:02 +0300 Subject: [PATCH 24/28] replicas --- .../main/java/oap/logstream/LogIdTemplate.java | 2 +- .../main/java/oap/kubernetes/ReplicaUtils.java | 9 ++------- .../java/oap/kubernetes/ReplicaUtilsTest.java | 17 +++-------------- 3 files changed, 6 insertions(+), 22 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java index ec3473079..d4efaa652 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java @@ -107,7 +107,7 @@ public void init( VelocityContext context, DateTime time, Timestamp timestamp, i } public String getHashWithVersion( int version ) { - return "%x%d-%d".formatted( logId.getHash(), ReplicaUtils.getReplicaId(), version ); + return "%x%d-%d".formatted( logId.getHash(), ReplicaUtils.getReplicaId( Inet.hostname() ), version ); } private String print2Chars( int v ) { diff --git a/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java b/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java index 9f9ed805e..708a63187 100644 --- a/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java +++ b/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java @@ -8,13 +8,8 @@ public class ReplicaUtils { public static final Pattern POD_NAME_PATTERN = Pattern.compile( "^.+-(\\d+)$" ); - public static int getReplicaId() { - String podName = Env.get( "POD_NAME" ).orElse( null ); - if( podName == null ) { - return 0; - } - - Matcher matcher = POD_NAME_PATTERN.matcher( podName ); + public static int getReplicaId( String hostname ) { + Matcher matcher = POD_NAME_PATTERN.matcher( hostname ); if( matcher.find() ) { return Integer.parseInt( matcher.group( 1 ) ); } diff --git a/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java b/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java index a0ccdbbdb..fe68454e9 100644 --- a/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java +++ b/oap-stdlib/src/test/java/oap/kubernetes/ReplicaUtilsTest.java @@ -1,6 +1,5 @@ package oap.kubernetes; -import oap.system.Env; import org.testng.annotations.Test; import static org.assertj.core.api.Assertions.assertThat; @@ -8,19 +7,9 @@ public class ReplicaUtilsTest { @Test public void testGetReplicaId() { - try { - Env.set( "POD_NAME", null ); - assertThat( ReplicaUtils.getReplicaId() ).isZero(); + assertThat( ReplicaUtils.getReplicaId( "aasdasd-6-ff" ) ).isZero(); - Env.set( "POD_NAME", "aasdasd-6-ff" ); - assertThat( ReplicaUtils.getReplicaId() ).isZero(); - - Env.set( "POD_NAME", "aasdasd-6-0" ); - assertThat( ReplicaUtils.getReplicaId() ).isZero(); - Env.set( "POD_NAME", "aasdasd-6-12" ); - assertThat( ReplicaUtils.getReplicaId() ).isEqualTo( 12 ); - } finally { - Env.set( "POD_NAME", null ); - } + assertThat( ReplicaUtils.getReplicaId( "aasdasd-6-0" ) ).isZero(); + assertThat( ReplicaUtils.getReplicaId( "aasdasd-6-12" ) ).isEqualTo( 12 ); } } From ae08fcf0c9f02797ada8c6d16d52722af089f713 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 3 Apr 2026 07:37:36 +0300 Subject: [PATCH 25/28] replicas --- oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java b/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java index 708a63187..a6670585c 100644 --- a/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java +++ b/oap-stdlib/src/main/java/oap/kubernetes/ReplicaUtils.java @@ -1,7 +1,5 @@ package oap.kubernetes; -import oap.system.Env; - import java.util.regex.Matcher; import java.util.regex.Pattern; From b981d9f7918cece998fdef46ff1e4a2f0c0cc188 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 3 Apr 2026 08:08:17 +0300 Subject: [PATCH 26/28] replicas --- .../java/oap/logstream/LoggerJsonTest.java | 2 +- .../test/java/oap/logstream/LoggerTest.java | 18 +++++++-------- .../logstream/disk/AbstractWriterTest.java | 13 ++++++----- .../logstream/disk/DiskLoggerBackendTest.java | 6 ++--- .../logstream/disk/RowBinaryWriterTest.java | 2 +- .../java/oap/logstream/LogIdTemplate.java | 12 +++++----- .../oap/logstream/disk/AbstractWriter.java | 13 ++++++----- .../oap/logstream/disk/DiskLoggerBackend.java | 22 ++++++++++--------- .../oap/logstream/disk/RowBinaryWriter.java | 7 +++--- 9 files changed, 51 insertions(+), 44 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java index 87144edd4..5d68bba1e 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java @@ -61,7 +61,7 @@ public void diskJSON() throws IOException { String content = "{\"title\":\"response\",\"status\":false,\"values\":[1,2,3]}"; String[] headers = new String[] { "test" }; byte[][] types = new byte[][] { new byte[] { Types.STRING.id } }; - try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER ) ) { + try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER, "localhost" ) ) { Logger logger = new Logger( backend ); SimpleJson o = contentOfTestResource( getClass(), "simple_json.json", ofJson( SimpleJson.class ) ); diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java index 17ba092ac..d606a52a7 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java @@ -78,7 +78,7 @@ public void disk() throws IOException { byte[] line2 = Compression.gzip( RowBinaryUtils.line( lineData2 ) ); String[] headers2 = new String[] { "TIMESTAMP", "REQUEST_ID2" }; byte[][] types2 = new byte[][] { new byte[] { Types.DATETIME.id }, new byte[] { Types.STRING.id } }; - try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER ) ) { + try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER, "localhost" ) ) { Logger logger = new Logger( backend ); logger.log( "lfn1", Map.of(), "log", headers1, types1, line1 ); logger.log( "lfn2", Map.of(), "log", headers1, types1, line1 ); @@ -88,13 +88,13 @@ public void disk() throws IOException { logger.log( "lfn1", Map.of(), "log", headers2, types2, line2 ); } - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1, lineData1 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData2 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData2 ); } @@ -114,7 +114,7 @@ public void net() throws IOException { String[] headers2 = new String[] { "TIMESTAMP", "REQUEST_ID2" }; byte[][] types2 = new byte[][] { new byte[] { Types.DATETIME.id }, new byte[] { Types.STRING.id } }; - try( DiskLoggerBackend serverBackend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER ); + try( DiskLoggerBackend serverBackend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER, "localhost" ); SocketLoggerServer server = new SocketLoggerServer( serverBackend ); NioHttpServer mServer = new NioHttpServer( new NioHttpServer.DefaultPort( port ) ); MessageHttpHandler messageHttpHandler = new MessageHttpHandler( mServer, "/messages", controlStatePath, List.of( server ), -1 ); @@ -158,11 +158,11 @@ public void net() throws IOException { } assertEventually( 10, 1000, () -> - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1, lineData1 ) ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData2 ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java index 372b5be1d..e11b42c17 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java @@ -26,6 +26,7 @@ import oap.logstream.LogId; import oap.logstream.Timestamp; +import oap.net.Inet; import oap.template.Types; import oap.util.Dates; import org.testng.annotations.Test; @@ -44,15 +45,15 @@ public void testFileName() { Dates.setTimeFixed( 2023, 1, 23, 21, 6, 0 ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_FORMAT_TSV_GZ}-${INTERVAL} -${LOG_VERSION}-#{if}(${ORGANIZATION})${ORGANIZATION}#{else}UNKNOWN#{end}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_FORMAT_TSV_GZ}-${INTERVAL} -${LOG_VERSION}-#{if}(${ORGANIZATION})${ORGANIZATION}#{else}UNKNOWN#{end}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) .isEqualTo( "ln/tsv.gz-01-85594397-1-UNKNOWN.tsv.gz.rb.gz" ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) .isEqualTo( "ln/01-85594397-1-.tsv.gz.rb.gz" ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) .isEqualTo( "ln/01-85594397-1-.tsv.gz.rb.gz" ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_TIME_INTERVAL}.log.gz", lid1, Timestamp.BPH_6, 1, Dates.nowUtc() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_TIME_INTERVAL}.log.gz", lid1, Timestamp.BPH_6, 1, Dates.nowUtc(), Inet.hostname() ) ) .isEqualTo( "ln/10.log.gz.rb.gz" ); } @@ -64,11 +65,11 @@ public void testFileNameConditional() { Dates.setTimeFixed( 2023, 1, 23, 21, 6, 0 ); LogId lid1 = new LogId( "ln", "lt", "chn", Map.of(), h1Headers, strTypes ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) .isEqualTo( "ln/01-85594397-1.tsv.gz.rb.gz" ); lid1 = new LogId( "ln", "lt", "chn", Map.of( "ORGANIZATION", "org1", "ACCOUNT", "acc1" ), h1Headers, strTypes ); - assertThat( AbstractWriter.currentPattern( LogFormat.PARQUET, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.PARQUET, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) .isEqualTo( "ln/org1/acc1/01-85594397-1.parquet.rb.gz" ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java index d26aedb27..442d830df 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java @@ -55,7 +55,7 @@ public DiskLoggerBackendTest() { @Test public void spaceAvailable() { - try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), Timestamp.BPH_12, 4000 ) ) { + try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), Timestamp.BPH_12, 4000, "localhost" ) ) { backend.start(); assertTrue( backend.isLoggingAvailable() ); @@ -73,7 +73,7 @@ public void testPatternByType() throws IOException { byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; byte[] lines = Compression.gzip( RowBinaryUtils.lines( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ) ); - try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), Timestamp.BPH_12, 4000 ) ) { + try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), Timestamp.BPH_12, 4000, "localhost" ) ) { backend.filePattern = "${LOG_TYPE}_${LOG_VERSION}_${INTERVAL}.tsv.gz"; backend.filePatternByType.put( "LOG_TYPE_WITH_DIFFERENT_FILE_PATTERN", new DiskLoggerBackend.FilePatternConfiguration( "${LOG_TYPE}_${LOG_VERSION}_${MINUTE}.parquet" ) ); @@ -106,7 +106,7 @@ public void testWriteSync() throws IOException { byte[][] types = new byte[][] { new byte[] { Types.STRING.id }, new byte[] { Types.STRING.id } }; byte[] lines = Compression.gzip( RowBinaryUtils.lines( List.of( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ) ) ); //init new logger - try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER ) ) { + try( DiskLoggerBackend backend = new DiskLoggerBackend( testDirectoryFixture.testPath( "logs" ), BPH_12, DEFAULT_BUFFER, "localhost" ) ) { backend.start(); Logger logger = new Logger( backend ); diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java index 3700c212d..c8175d18e 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java @@ -78,7 +78,7 @@ public void testWrite() throws IOException { LogId logId = new LogId( "", "log", "log", Map.of( "p", "1" ), headers, types ); Path logs = testDirectoryFixture.testPath( "logs" ); - try( RowBinaryWriter writer = new RowBinaryWriter( logs, FILE_PATTERN, logId, 1024, BPH_12, 20 ) ) { + try( RowBinaryWriter writer = new RowBinaryWriter( logs, FILE_PATTERN, logId, 1024, BPH_12, 20, "localhost" ) ) { writer.write( CURRENT_PROTOCOL_VERSION, content1 ); writer.write( CURRENT_PROTOCOL_VERSION, content2 ); } diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java index d4efaa652..0eeb0e683 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/LogIdTemplate.java @@ -66,7 +66,7 @@ public LogIdTemplate addVariables( Map variables ) { return this; } - public String render( String template, DateTime time, Timestamp timestamp, int version ) { + public String render( String template, DateTime time, Timestamp timestamp, int version, String hostname ) { VelocityContext context = new VelocityContext(); EventCartridge eventCartridge = new EventCartridge(); context.attachEventCartridge( eventCartridge ); @@ -77,7 +77,7 @@ public Object referenceInsert( Context context, String s, Object o ) { } } ); - init( context, time, timestamp, version ); + init( context, time, timestamp, version, hostname ); variables.forEach( context::put ); @@ -88,9 +88,9 @@ public Object referenceInsert( Context context, String s, Object o ) { return writer.toString(); } - public void init( VelocityContext context, DateTime time, Timestamp timestamp, int version ) { + public void init( VelocityContext context, DateTime time, Timestamp timestamp, int version, String hostname ) { context.put( "LOG_TYPE", logId.logType ); - context.put( "LOG_VERSION", getHashWithVersion( version ) ); + context.put( "LOG_VERSION", getHashWithVersion( version, hostname ) ); context.put( "SERVER_HOST", Inet.HOSTNAME ); context.put( "CLIENT_HOST", logId.clientHostname ); context.put( "YEAR", String.valueOf( time.getYear() ) ); @@ -106,8 +106,8 @@ public void init( VelocityContext context, DateTime time, Timestamp timestamp, i logId.properties.forEach( context::put ); } - public String getHashWithVersion( int version ) { - return "%x%d-%d".formatted( logId.getHash(), ReplicaUtils.getReplicaId( Inet.hostname() ), version ); + public String getHashWithVersion( int version, String hostname ) { + return "%x%d-%d".formatted( logId.getHash(), ReplicaUtils.getReplicaId( hostname ), version ); } private String print2Chars( int v ) { diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java index 6e2348f8a..4db59f65a 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java @@ -34,6 +34,7 @@ import oap.logstream.LogStreamProtocol.ProtocolVersion; import oap.logstream.LoggerException; import oap.logstream.Timestamp; +import oap.net.Inet; import oap.util.Dates; import org.codehaus.plexus.util.StringUtils; import org.joda.time.DateTime; @@ -55,6 +56,7 @@ public abstract class AbstractWriter implements Closeable { protected final int bufferSize; protected final Stopwatch stopwatch = new Stopwatch(); protected final int maxVersions; + protected final String hostname; protected final ReentrantLock lock = new ReentrantLock(); protected T out; protected Path outFilename; @@ -63,11 +65,12 @@ public abstract class AbstractWriter implements Closeable { protected boolean closed = false; protected AbstractWriter( LogFormat logFormat, Path logDirectory, String filePattern, LogId logId, int bufferSize, Timestamp timestamp, - int maxVersions ) { + int maxVersions, String hostname ) { this.logFormat = logFormat; this.logDirectory = logDirectory; this.filePattern = filePattern; this.maxVersions = maxVersions; + this.hostname = hostname; log.trace( "filePattern {}", filePattern ); Preconditions.checkArgument( filePattern.contains( "${LOG_VERSION}" ) ); @@ -80,7 +83,7 @@ protected AbstractWriter( LogFormat logFormat, Path logDirectory, String filePat } @SneakyThrows - static String currentPattern( LogFormat logFormat, String filePattern, LogId logId, Timestamp timestamp, int version, DateTime time ) { + static String currentPattern( LogFormat logFormat, String filePattern, LogId logId, Timestamp timestamp, int version, DateTime time, String hostname ) { String suffix = filePattern; if( filePattern.startsWith( "/" ) && filePattern.endsWith( "/" ) ) suffix = suffix.substring( 1 ); else if( !filePattern.startsWith( "/" ) && !logId.filePrefixPattern.endsWith( "/" ) ) suffix = "/" + suffix; @@ -95,15 +98,15 @@ static String currentPattern( LogFormat logFormat, String filePattern, LogId log logIdTemplate .addVariable( "LOG_FORMAT", logFormat.extension ) .addVariable( "LOG_FORMAT_" + logFormat.name(), logFormat.extension ); - return logIdTemplate.render( StringUtils.replace( pattern, " ", "" ), time, timestamp, version ); + return logIdTemplate.render( StringUtils.replace( pattern, " ", "" ), time, timestamp, version, hostname ); } protected String currentPattern( int version ) { - return currentPattern( logFormat, filePattern, logId, timestamp, version, Dates.nowUtc() ); + return currentPattern( logFormat, filePattern, logId, timestamp, version, Dates.nowUtc(), hostname ); } protected String currentPattern() { - return currentPattern( logFormat, filePattern, logId, timestamp, fileVersion, Dates.nowUtc() ); + return currentPattern( logFormat, filePattern, logId, timestamp, fileVersion, Dates.nowUtc(), hostname ); } public void write( ProtocolVersion protocolVersion, byte[] buffer ) throws LoggerException { diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java index e4efd558e..c2dff1e06 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/DiskLoggerBackend.java @@ -90,19 +90,21 @@ public class DiskLoggerBackend extends AbstractLoggerBackend implements Cloneabl public long refreshInitDelay = Dates.s( 10 ); public long refreshPeriod = Dates.s( 10 ); public volatile boolean closed; + public final String hostname; - public DiskLoggerBackend( Path logDirectory, Timestamp timestamp, int bufferSize ) { - this( logDirectory, new WriterConfiguration(), timestamp, bufferSize ); + public DiskLoggerBackend( Path logDirectory, Timestamp timestamp, int bufferSize, String hostname ) { + this( logDirectory, new WriterConfiguration(), timestamp, bufferSize, hostname ); } @SuppressWarnings( "unchecked" ) - public DiskLoggerBackend( Path logDirectory, WriterConfiguration writerConfiguration, Timestamp timestamp, int bufferSize ) { - log.info( "logDirectory '{}' timestamp {} bufferSize {} writerConfiguration {} refreshInitDelay {} refreshPeriod {}", + public DiskLoggerBackend( Path logDirectory, WriterConfiguration writerConfiguration, Timestamp timestamp, int bufferSize, String hostname ) { + this.hostname = hostname; + log.info( "logDirectory '{}' timestamp {} bufferSize {} writerConfiguration {} refreshInitDelay {} refreshPeriod {} hostname {}", logDirectory, timestamp, FileUtils.byteCountToDisplaySize( bufferSize ), writerConfiguration, - Dates.durationToString( refreshInitDelay ), Dates.durationToString( refreshPeriod ) ); + Dates.durationToString( refreshInitDelay ), Dates.durationToString( refreshPeriod ), hostname ); - this.logDirectory = logDirectory; + this.logDirectory = logDirectory.resolve( hostname ); this.writerConfiguration = writerConfiguration; this.timestamp = timestamp; this.bufferSize = bufferSize; @@ -120,10 +122,10 @@ public AbstractWriter load( LogId id ) { log.trace( "new writer id '{}' filePattern '{}'", id, fp ); - return new RowBinaryWriter( logDirectory, fp.path, id, bufferSize, timestamp, maxVersions ); + return new RowBinaryWriter( DiskLoggerBackend.this.logDirectory, fp.path, id, bufferSize, timestamp, maxVersions, hostname ); } } ); - Metrics.gauge( "logstream_logging_disk_writers", List.of( Tag.of( "path", logDirectory.toString() ) ), + Metrics.gauge( "logstream_logging_disk_writers", List.of( Tag.of( "path", this.logDirectory.toString() ) ), writers, Cache::size ); pool = Executors.newScheduledThreadPool( 1, "disk-logger-backend" ); @@ -146,8 +148,8 @@ private void filePatternValidation( String type, String filePattern ) { LogId logId = new LogId( "", type, "", Map.of(), new String[] {}, new byte[][] {} ); DateTime time = Dates.nowUtc(); - String currentPattern = AbstractWriter.currentPattern( LogFormat.TSV_GZ, filePattern, logId, timestamp, 0, time ); - String previousPattern = AbstractWriter.currentPattern( LogFormat.TSV_GZ, filePattern, logId, timestamp, 0, time.minusMinutes( 60 / timestamp.bucketsPerHour ).minusSeconds( 1 ) ); + String currentPattern = AbstractWriter.currentPattern( LogFormat.TSV_GZ, filePattern, logId, timestamp, 0, time, hostname ); + String previousPattern = AbstractWriter.currentPattern( LogFormat.TSV_GZ, filePattern, logId, timestamp, 0, time.minusMinutes( 60 / timestamp.bucketsPerHour ).minusSeconds( 1 ), hostname ); if( currentPattern.equals( previousPattern ) ) { log.error( "cp {}", currentPattern ); diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java index a55883e39..52157d527 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/RowBinaryWriter.java @@ -8,6 +8,7 @@ import oap.logstream.LoggerException; import oap.logstream.Timestamp; import oap.logstream.formats.rowbinary.RowBinaryOutputStream; +import oap.net.Inet; import oap.util.FastByteArrayOutputStream; import java.io.IOException; @@ -20,8 +21,8 @@ @Slf4j public class RowBinaryWriter extends AbstractWriter { - protected RowBinaryWriter( Path logDirectory, String filePattern, LogId logId, int bufferSize, Timestamp timestamp, int maxVersions ) { - super( LogFormat.ROW_BINARY_GZ, logDirectory, filePattern, logId, bufferSize, timestamp, maxVersions ); + public RowBinaryWriter( Path logDirectory, String filePattern, LogId logId, int bufferSize, Timestamp timestamp, int maxVersions, String hostname ) { + super( LogFormat.ROW_BINARY_GZ, logDirectory, filePattern, logId, bufferSize, timestamp, maxVersions, hostname ); } @Override @@ -36,7 +37,7 @@ public String write( LogStreamProtocol.ProtocolVersion protocolVersion, byte[] b Files.ensureDirectory( filename.getParent() ); out = FileChannel.open( filename, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE, StandardOpenOption.APPEND ); LogIdTemplate logIdTemplate = new LogIdTemplate( logId ); - new LogMetadata( logId ).withProperty( "VERSION", logIdTemplate.getHashWithVersion( fileVersion ) ).writeFor( filename ); + new LogMetadata( logId ).withProperty( "VERSION", logIdTemplate.getHashWithVersion( fileVersion, Inet.hostname() ) ).writeFor( filename ); FastByteArrayOutputStream outputStream = new FastByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream( outputStream ); From 9cd4c4e784c3f73e686580ab7c6c4d324aefce0e Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 3 Apr 2026 08:18:47 +0300 Subject: [PATCH 27/28] replicas --- .../src/main/java/oap/logstream/disk/AbstractWriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java index 4db59f65a..15804224e 100644 --- a/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java +++ b/oap-formats/oap-logstream/oap-logstream/src/main/java/oap/logstream/disk/AbstractWriter.java @@ -34,7 +34,6 @@ import oap.logstream.LogStreamProtocol.ProtocolVersion; import oap.logstream.LoggerException; import oap.logstream.Timestamp; -import oap.net.Inet; import oap.util.Dates; import org.codehaus.plexus.util.StringUtils; import org.joda.time.DateTime; From 8d374ab8a8626f4dc19fc833ab4501e7fa2569c6 Mon Sep 17 00:00:00 2001 From: "igor.petrenko" Date: Fri, 3 Apr 2026 08:30:06 +0300 Subject: [PATCH 28/28] replicas --- .../java/oap/logstream/LoggerJsonTest.java | 2 +- .../test/java/oap/logstream/LoggerTest.java | 16 ++++++------- .../logstream/disk/AbstractWriterTest.java | 23 +++++++++---------- .../logstream/disk/DiskLoggerBackendTest.java | 6 ++--- .../logstream/disk/RowBinaryWriterTest.java | 2 +- 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java index 5d68bba1e..c26a943c8 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerJsonTest.java @@ -71,7 +71,7 @@ public void diskJSON() throws IOException { logger.log( "open_rtb_json", Map.of(), "request_response", headers, types, Compression.gzip( RowBinaryUtils.line( List.of( jsonContent ) ) ) ); } - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/open_rtb_json/2015-10/10/request_response_v3b5d9e1b-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), Encoding.GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/open_rtb_json/2015-10/10/request_response_v3b5d9e1b0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), Encoding.GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( content ) ); } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java index d606a52a7..85b69e599 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/LoggerTest.java @@ -88,13 +88,13 @@ public void disk() throws IOException { logger.log( "lfn1", Map.of(), "log", headers2, types2, line2 ); } - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1, lineData1 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn2/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log2_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData2 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData2 ); } @@ -134,7 +134,7 @@ public void net() throws IOException { client.syncMemory(); assertEventually( 50, 100, () -> assertFalse( logger.isLoggingAvailable() ) ); - assertFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ) ) + assertFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log_v1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ) ) .doesNotExist(); serverBackend.requiredFreeSpace = DEFAULT_FREE_SPACE_REQUIRED; @@ -158,11 +158,11 @@ public void net() throws IOException { } assertEventually( 10, 1000, () -> - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1, lineData1 ) ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn2/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn2/2015-10/10/log_v356dae4c0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData1 ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log2_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log2_v8a769cda0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( lineData2 ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java index e11b42c17..f5479a381 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/AbstractWriterTest.java @@ -26,7 +26,6 @@ import oap.logstream.LogId; import oap.logstream.Timestamp; -import oap.net.Inet; import oap.template.Types; import oap.util.Dates; import org.testng.annotations.Test; @@ -45,15 +44,15 @@ public void testFileName() { Dates.setTimeFixed( 2023, 1, 23, 21, 6, 0 ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_FORMAT_TSV_GZ}-${INTERVAL} -${LOG_VERSION}-#{if}(${ORGANIZATION})${ORGANIZATION}#{else}UNKNOWN#{end}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) - .isEqualTo( "ln/tsv.gz-01-85594397-1-UNKNOWN.tsv.gz.rb.gz" ); + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_FORMAT_TSV_GZ}-${INTERVAL} -${LOG_VERSION}-#{if}(${ORGANIZATION})${ORGANIZATION}#{else}UNKNOWN#{end}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), "localhost" ) ) + .isEqualTo( "ln/tsv.gz-01-855943970-1-UNKNOWN.tsv.gz.rb.gz" ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) - .isEqualTo( "ln/01-85594397-1-.tsv.gz.rb.gz" ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) - .isEqualTo( "ln/01-85594397-1-.tsv.gz.rb.gz" ); + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), "localhost" ) ) + .isEqualTo( "ln/01-855943970-1-.tsv.gz.rb.gz" ); + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${INTERVAL}-${LOG_VERSION}-${ORGANIZATION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), "localhost" ) ) + .isEqualTo( "ln/01-855943970-1-.tsv.gz.rb.gz" ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_TIME_INTERVAL}.log.gz", lid1, Timestamp.BPH_6, 1, Dates.nowUtc(), Inet.hostname() ) ) + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "${LOG_TIME_INTERVAL}.log.gz", lid1, Timestamp.BPH_6, 1, Dates.nowUtc(), "localhost" ) ) .isEqualTo( "ln/10.log.gz.rb.gz" ); } @@ -65,11 +64,11 @@ public void testFileNameConditional() { Dates.setTimeFixed( 2023, 1, 23, 21, 6, 0 ); LogId lid1 = new LogId( "ln", "lt", "chn", Map.of(), h1Headers, strTypes ); - assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) - .isEqualTo( "ln/01-85594397-1.tsv.gz.rb.gz" ); + assertThat( AbstractWriter.currentPattern( LogFormat.TSV_GZ, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), "localhost" ) ) + .isEqualTo( "ln/01-855943970-1.tsv.gz.rb.gz" ); lid1 = new LogId( "ln", "lt", "chn", Map.of( "ORGANIZATION", "org1", "ACCOUNT", "acc1" ), h1Headers, strTypes ); - assertThat( AbstractWriter.currentPattern( LogFormat.PARQUET, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), Inet.hostname() ) ) - .isEqualTo( "ln/org1/acc1/01-85594397-1.parquet.rb.gz" ); + assertThat( AbstractWriter.currentPattern( LogFormat.PARQUET, "#{if}(${ORGANIZATION}&&${ACCOUNT})${ORGANIZATION}/${ACCOUNT}/#{end}${INTERVAL}-${LOG_VERSION}.${LOG_FORMAT}", lid1, Timestamp.BPH_12, 1, Dates.nowUtc(), "localhost" ) ) + .isEqualTo( "ln/org1/acc1/01-855943970-1.parquet.rb.gz" ); } } diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java index 442d830df..f64557bcc 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/DiskLoggerBackendTest.java @@ -86,12 +86,12 @@ public void testPatternByType() throws IOException { backend.refresh( true ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_default_file_pattern_59193f7e0-1_03.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/log_type_with_default_file_pattern_59193f7e0-1_03.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/log_type_with_different_file_pattern_59193f7e0-1_16.parquet.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/log_type_with_different_file_pattern_59193f7e0-1_16.parquet.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) @@ -113,7 +113,7 @@ public void testWriteSync() throws IOException { //log a line to lfn1 logger.log( "lfn1", Map.of(), "log", headers, types, lines ); - assertRowBinaryFile( testDirectoryFixture.testPath( "logs/lfn1/2015-10/10/log_v59193f7e0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) + assertRowBinaryFile( testDirectoryFixture.testPath( "logs/localhost/lfn1/2015-10/10/log_v59193f7e0-1_" + HOSTNAME + "-2015-10-10-01-00.tsv.gz.rb.gz" ), GZIP ) .containsExactlyInAnyOrderEntriesOf( List.of( "12345678", "rrrr5678" ), List.of( "1", "2" ) diff --git a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java index c8175d18e..a2c89452c 100644 --- a/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java +++ b/oap-formats/oap-logstream/oap-logstream-test/src/test/java/oap/logstream/disk/RowBinaryWriterTest.java @@ -83,7 +83,7 @@ public void testWrite() throws IOException { writer.write( CURRENT_PROTOCOL_VERSION, content2 ); } - Path path = logs.resolve( "1-file-02-4cd64dae-1.rb.gz.rb.gz" ); + Path path = logs.resolve( "1-file-02-4cd64dae0-1.rb.gz.rb.gz" ); byte[] rb = Compression.ungzip( Files.readAllBytes( path ) );