From 47ecf26a14c7cf83582c2505f74bea56b6c353ae Mon Sep 17 00:00:00 2001 From: teodordelibasic-db Date: Mon, 19 Jan 2026 08:59:03 +0000 Subject: [PATCH] Initial commit Signed-off-by: teodordelibasic-db --- .github/dependabot.yml | 2 +- .github/workflows/push.yml | 42 + .github/workflows/test.yml | 20 +- .gitignore | 9 +- CHANGELOG.md | 280 ++++ NEXT_CHANGELOG.md | 24 +- NOTICE | 28 +- README.md | 924 ++++++++--- cli/README.md | 210 +++ cli/pom.xml | 181 +++ .../databricks/zerobus/cli/GenerateProto.java | 632 ++++++++ .../java/com/databricks/zerobus/cli/Main.java | 77 + .../zerobus/cli/GenerateProtoTest.java | 822 ++++++++++ common/pom.xml | 68 + .../common/http/DefaultHttpClient.java | 100 ++ .../zerobus/common/http/HttpClient.java | 72 + .../common/http/RetryingHttpClient.java | 120 ++ .../databricks/zerobus/common/json/Json.java | 435 ++++++ .../common/http/DefaultHttpClientTest.java | 387 +++++ .../common/http/RetryingHttpClientTest.java | 277 ++++ .../zerobus/common/json/JsonTest.java | 388 +++++ examples/README.md | 129 -- .../examples/BlockingIngestionExample.java | 111 -- .../examples/NonBlockingIngestionExample.java | 157 -- pom.xml | 430 ++--- sdk/examples/README.md | 246 +++ sdk/examples/pom.xml | 76 + .../examples/json/BatchRecordExample.java | 145 ++ .../examples/json/SingleRecordExample.java | 132 ++ .../proto/compiled/BatchRecordExample.java | 132 ++ .../proto/compiled/SingleRecordExample.java | 110 ++ .../proto/dynamic/BatchRecordExample.java | 230 +++ .../proto/dynamic/SingleRecordExample.java | 217 +++ .../examples/src/main/proto}/record.proto | 5 + sdk/pom.xml | 341 ++++ .../zerobus/NonRetriableException.java | 18 +- .../zerobus/StreamConfigurationOptions.java | 217 ++- .../com/databricks/zerobus/StreamState.java | 20 +- .../databricks/zerobus/TableProperties.java | 54 + .../databricks/zerobus/ZerobusException.java | 51 + .../com/databricks/zerobus/ZerobusSdk.java | 383 +++++ .../databricks/zerobus/ZerobusSdkBuilder.java | 84 + .../zerobus/ZerobusSdkStubFactory.java | 228 +++ .../zerobus/ZerobusStreamBuilder.java | 898 +++++++++++ .../zerobus/auth/HeadersProvider.java | 78 + .../zerobus/auth/OAuthHeadersProvider.java | 115 ++ .../databricks/zerobus/auth/TokenFactory.java | 225 +++ .../com/databricks/zerobus/batch/Batch.java | 13 + .../zerobus/batch/PrimaryBatch.java | 18 + .../zerobus/batch/SecondaryBatch.java | 18 + .../zerobus/batch/json/MapBatch.java | 68 + .../zerobus/batch/json/StringBatch.java | 66 + .../zerobus/batch/proto/BytesBatch.java | 64 + .../zerobus/batch/proto/MessageBatch.java | 71 + .../zerobus/schema/BaseTableProperties.java | 35 + .../zerobus/schema/JsonTableProperties.java | 37 + .../zerobus/schema/ProtoTableProperties.java | 126 ++ .../zerobus/stream}/BackgroundTask.java | 2 +- .../zerobus/stream/BaseZerobusStream.java | 1129 ++++++++++++++ .../zerobus/stream/DualTypeStream.java | 95 ++ .../zerobus/stream/EncodedBatch.java | 126 ++ .../zerobus/stream/GrpcErrorHandling.java | 21 + .../zerobus/stream/InflightBatch.java | 64 + .../zerobus/stream/JsonZerobusStream.java | 368 +++++ .../zerobus/stream/LandingZone.java | 331 ++++ .../zerobus/stream/ProtoZerobusStream.java | 373 +++++ .../zerobus/stream/StreamFailure.java | 35 + .../zerobus/stream/ZerobusStream.java | 209 +++ .../zerobus/tls/SecureTlsConfig.java | 50 + .../com/databricks/zerobus/tls/TlsConfig.java | 66 + .../src}/main/proto/zerobus_service.proto | 64 +- .../zerobus/AcknowledgmentTest.java | 159 ++ .../databricks/zerobus/BaseZerobusTest.java | 96 ++ .../databricks/zerobus/ConfigurationTest.java | 70 + .../databricks/zerobus/ErrorHandlingTest.java | 66 + .../databricks/zerobus/JsonIngestionTest.java | 275 ++++ .../databricks/zerobus/MockedGrpcServer.java | 58 +- .../zerobus/ProtoIngestionTest.java | 265 ++++ .../zerobus/StreamCreationTest.java | 221 +++ .../zerobus/StreamLifecycleTest.java | 137 ++ .../zerobus/ZerobusSdkStubFactoryTest.java | 84 + .../zerobus/auth/TokenFactoryTest.java | 541 +++++++ .../zerobus/stream/LandingZoneTest.java | 730 +++++++++ {src => sdk/src}/test/proto/test_table.proto | 0 .../databricks/zerobus/TableProperties.java | 62 - .../com/databricks/zerobus/TokenFactory.java | 162 -- .../databricks/zerobus/ZerobusException.java | 27 - .../com/databricks/zerobus/ZerobusSdk.java | 289 ---- .../zerobus/ZerobusSdkStubUtils.java | 164 -- .../com/databricks/zerobus/ZerobusStream.java | 1386 ----------------- .../zerobus/tools/GenerateProto.java | 706 --------- .../databricks/zerobus/ZerobusSdkTest.java | 421 ----- src/test/resources/simplelogger.properties | 20 - tools/README.md | 262 ---- tools/generate_proto.sh | 27 - 95 files changed, 15145 insertions(+), 4462 deletions(-) create mode 100644 cli/README.md create mode 100644 cli/pom.xml create mode 100644 cli/src/main/java/com/databricks/zerobus/cli/GenerateProto.java create mode 100644 cli/src/main/java/com/databricks/zerobus/cli/Main.java create mode 100644 cli/src/test/java/com/databricks/zerobus/cli/GenerateProtoTest.java create mode 100644 common/pom.xml create mode 100644 common/src/main/java/com/databricks/zerobus/common/http/DefaultHttpClient.java create mode 100644 common/src/main/java/com/databricks/zerobus/common/http/HttpClient.java create mode 100644 common/src/main/java/com/databricks/zerobus/common/http/RetryingHttpClient.java create mode 100644 common/src/main/java/com/databricks/zerobus/common/json/Json.java create mode 100644 common/src/test/java/com/databricks/zerobus/common/http/DefaultHttpClientTest.java create mode 100644 common/src/test/java/com/databricks/zerobus/common/http/RetryingHttpClientTest.java create mode 100644 common/src/test/java/com/databricks/zerobus/common/json/JsonTest.java delete mode 100644 examples/README.md delete mode 100644 examples/src/main/java/com/databricks/zerobus/examples/BlockingIngestionExample.java delete mode 100644 examples/src/main/java/com/databricks/zerobus/examples/NonBlockingIngestionExample.java create mode 100644 sdk/examples/README.md create mode 100644 sdk/examples/pom.xml create mode 100644 sdk/examples/src/main/java/com/databricks/zerobus/examples/json/BatchRecordExample.java create mode 100644 sdk/examples/src/main/java/com/databricks/zerobus/examples/json/SingleRecordExample.java create mode 100644 sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/BatchRecordExample.java create mode 100644 sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/SingleRecordExample.java create mode 100644 sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/BatchRecordExample.java create mode 100644 sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/SingleRecordExample.java rename {examples => sdk/examples/src/main/proto}/record.proto (50%) create mode 100644 sdk/pom.xml rename {src => sdk/src}/main/java/com/databricks/zerobus/NonRetriableException.java (53%) rename {src => sdk/src}/main/java/com/databricks/zerobus/StreamConfigurationOptions.java (55%) rename {src => sdk/src}/main/java/com/databricks/zerobus/StreamState.java (52%) create mode 100644 sdk/src/main/java/com/databricks/zerobus/TableProperties.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/ZerobusException.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/ZerobusSdk.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/ZerobusSdkBuilder.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/ZerobusSdkStubFactory.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/ZerobusStreamBuilder.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/auth/HeadersProvider.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/auth/OAuthHeadersProvider.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/auth/TokenFactory.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/Batch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/PrimaryBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/SecondaryBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/json/MapBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/json/StringBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/proto/BytesBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/batch/proto/MessageBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/schema/BaseTableProperties.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/schema/JsonTableProperties.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/schema/ProtoTableProperties.java rename {src/main/java/com/databricks/zerobus => sdk/src/main/java/com/databricks/zerobus/stream}/BackgroundTask.java (98%) create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/BaseZerobusStream.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/DualTypeStream.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/EncodedBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/GrpcErrorHandling.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/InflightBatch.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/JsonZerobusStream.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/LandingZone.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/ProtoZerobusStream.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/StreamFailure.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/stream/ZerobusStream.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/tls/SecureTlsConfig.java create mode 100644 sdk/src/main/java/com/databricks/zerobus/tls/TlsConfig.java rename {src => sdk/src}/main/proto/zerobus_service.proto (75%) create mode 100644 sdk/src/test/java/com/databricks/zerobus/AcknowledgmentTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/BaseZerobusTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/ConfigurationTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/ErrorHandlingTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/JsonIngestionTest.java rename {src => sdk/src}/test/java/com/databricks/zerobus/MockedGrpcServer.java (87%) create mode 100644 sdk/src/test/java/com/databricks/zerobus/ProtoIngestionTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/StreamCreationTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/StreamLifecycleTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/ZerobusSdkStubFactoryTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/auth/TokenFactoryTest.java create mode 100644 sdk/src/test/java/com/databricks/zerobus/stream/LandingZoneTest.java rename {src => sdk/src}/test/proto/test_table.proto (100%) delete mode 100644 src/main/java/com/databricks/zerobus/TableProperties.java delete mode 100644 src/main/java/com/databricks/zerobus/TokenFactory.java delete mode 100644 src/main/java/com/databricks/zerobus/ZerobusException.java delete mode 100644 src/main/java/com/databricks/zerobus/ZerobusSdk.java delete mode 100644 src/main/java/com/databricks/zerobus/ZerobusSdkStubUtils.java delete mode 100644 src/main/java/com/databricks/zerobus/ZerobusStream.java delete mode 100644 src/main/java/com/databricks/zerobus/tools/GenerateProto.java delete mode 100644 src/test/java/com/databricks/zerobus/ZerobusSdkTest.java delete mode 100644 src/test/resources/simplelogger.properties delete mode 100644 tools/README.md delete mode 100755 tools/generate_proto.sh diff --git a/.github/dependabot.yml b/.github/dependabot.yml index daec318..8a24324 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,4 +3,4 @@ updates: - package-ecosystem: "maven" directory: "/" schedule: - interval: "daily" + interval: "monthly" diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 89ee4b0..ec83c91 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -1,11 +1,16 @@ name: build on: + push: + branches: [main] pull_request: types: [opened, synchronize] merge_group: types: [checks_requested] +permissions: + contents: write + jobs: tests-ubuntu: uses: ./.github/workflows/test.yml @@ -51,3 +56,40 @@ jobs: run: | echo "Code formatting issues detected. Run 'mvn spotless:apply' to fix." exit 1 + + update-coverage-badge: + needs: [tests-ubuntu] + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + steps: + - name: Checkout badges branch + uses: actions/checkout@v4 + with: + ref: badges + fetch-depth: 0 + + - name: Create badges branch if it doesn't exist + run: | + if ! git rev-parse --verify badges >/dev/null 2>&1; then + git checkout --orphan badges + git rm -rf . || true + mkdir -p .github/badges + git commit --allow-empty -m "Initialize badges branch" + git push origin badges + fi + + - name: Download badge artifact + uses: actions/download-artifact@v4 + with: + name: coverage-badge + path: .github/badges + + - name: Commit and push badge + run: | + git config --local user.name "github-actions[bot]" + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git add .github/badges/jacoco.svg + git diff --staged --quiet || git commit -m "Update coverage badge" + git push origin badges diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c1a263..86e8d41 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,8 +34,24 @@ jobs: - name: Build with Maven run: mvn clean compile - - name: Run tests - run: mvn test + - name: Run tests with coverage + run: mvn test jacoco:report + + - name: Generate JaCoCo badge + id: jacoco + uses: cicirello/jacoco-badge-generator@v2 + with: + jacoco-csv-file: target/site/jacoco/jacoco.csv + badges-directory: .github/badges + generate-summary: true + + - name: Upload badge artifact + if: inputs.javaVersion == '17' && inputs.os == 'linux-ubuntu-latest' + uses: actions/upload-artifact@v4 + with: + name: coverage-badge + path: .github/badges/jacoco.svg + retention-days: 1 - name: Upload test results if: always() diff --git a/.gitignore b/.gitignore index 5b02ee9..7791355 100644 --- a/.gitignore +++ b/.gitignore @@ -6,20 +6,17 @@ pom.xml.versionsBackup pom.xml.next release.properties dependency-reduced-pom.xml -buildNumber.properties -.mvn/timing.properties -.mvn/wrapper/maven-wrapper.jar # IDE .idea/ *.iml -*.iws -*.ipr .vscode/ +.metals/ +.bloop/ +.bazelbsp/ .settings/ .project .classpath # OS .DS_Store -Thumbs.db diff --git a/CHANGELOG.md b/CHANGELOG.md index 450ad94..498f1a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,285 @@ # Version changelog +## Release v0.2.0 + +### New Features and Improvements + +- **New ingestion API**: New stream classes have ingestion methods that return offset directly + - All `ingest()` and `ingestBatch()` methods return `long` offset ID + - Returns immediately after SDK accepts the record + - Use `waitForOffset(long)` to wait for server acknowledgment if needed + - Legacy `ZerobusStream.ingestRecord()` still returns `CompletableFuture` for backwards compatibility + - `ZerobusStream.ingestRecord()` is deprecated and will be changed to also return a `long` offset ID in a future relase +- **Wait for Offset**: Added `waitForOffset(long offset)` method + - Blocks until the specified offset is acknowledged by the server + - Enables fine-grained control over durability guarantees + - Use with `ingest()` / `ingestBatch()` for explicit acknowledgment control +- **Batch API**: Consistent `ingestBatch()` API using wrapper types + - Batch interface hierarchy: `Batch` base interface, `PrimaryBatch

` and `SecondaryBatch` marker interfaces + - All batch classes in `com.databricks.zerobus.batch` package + - `ProtoZerobusStream.ingestBatch(PrimaryBatch)` / `ingestBatch(MessageBatch)` for proto message batches + - `ProtoZerobusStream.ingestBatch(BytesBatch)` for pre-serialized bytes batches + - `JsonZerobusStream.ingestBatch(PrimaryBatch>)` / `ingestBatch(MapBatch)` for Map batches + - `JsonZerobusStream.ingestBatch(StringBatch)` for JSON string batches + - Wrapper types (`MessageBatch`, `BytesBatch`, `StringBatch`, `MapBatch`) provide consistent API and avoid Java type erasure issues + - Batch is assigned a single offset ID and acknowledged atomically + - Returns the offset ID directly for explicit acknowledgment control +- **JSON Record Support**: Added `JsonZerobusStream` for ingesting JSON records directly + - Use `sdk.streamBuilder(tableName).json()` to create a JSON stream + - Type-safe `ingest(String jsonRecord)` method accepts JSON strings + - JSON records are sent directly to the server without protobuf encoding + - Simplifies integration when records are already in JSON format +- **Type Widening for Ingest Methods**: Extended `ingest()` and `ingestBatch()` to accept multiple input types + - `ProtoZerobusStream.ingest(byte[])` accepts pre-serialized protobuf bytes directly + - `JsonZerobusStream.ingest(Map)` accepts Maps that are serialized to JSON automatically + - Useful when receiving pre-serialized data from Kafka or other systems +- **SDK Builder Pattern**: New builder for creating `ZerobusSdk` instances + - `ZerobusSdk.builder(serverEndpoint, unityCatalogEndpoint).executor(customExecutor).build()` + - Optional custom `ExecutorService` for thread management + - Original constructor still works +- **Stream Builder Pattern**: New `streamBuilder()` API for creating streams with compile-time type safety + - `sdk.streamBuilder(tableName).clientCredentials(clientId, clientSecret).compiledProto(MyRecord.getDefaultInstance()).build()` + - Two authentication paths: `clientCredentials(id, secret)` for OAuth, `unauthenticated()` for custom auth + - Three schema selection methods: `compiledProto(T defaultInstance)`, `dynamicProto(Descriptor descriptor)`, `json()` + - Returns typed streams: `ProtoZerobusStream` for proto or `JsonZerobusStream` for JSON + - **Compile-time enforcement**: `clientCredentials()` returns `AuthenticatedStreamBuilder`, `unauthenticated()` returns `UnauthenticatedStreamBuilder`. Schema methods only exist on these builders, so attempting to call `compiledProto()` directly on `StreamBuilder` results in a compile error. + - All stream configuration options available on the builder (recovery, timeouts, headersProvider, callbacks, etc.) +- **Extended Proto Type Support**: Added support for additional Unity Catalog types in proto generation + - `TINYINT` / `BYTE` -> `int32` + - `TIMESTAMP_NTZ` -> `int64` + - `VARIANT` -> `string` (unshredded JSON string) + - `STRUCT` -> nested protobuf message + - Field names are converted to PascalCase for message names (e.g., `user_location` -> `UserLocation`) + - Nested STRUCTs are fully supported (STRUCT within STRUCT) + - `ARRAY>` generates repeated nested messages + - `MAP>` generates map with nested message values +- **Unchecked Exceptions**: `ZerobusException` now extends `RuntimeException` + - Follows modern Java SDK conventions (AWS SDK v2, Google Cloud, etc.) + - No longer requires `throws` declarations or mandatory try/catch + - Simplifies client code while still allowing exception handling when needed + - `NonRetriableException` also becomes unchecked (as subclass of `ZerobusException`) +- **Type-Safe Stream Classes**: New strongly-typed stream classes replace generic `ZerobusStream` + - `BaseZerobusStream

` abstract base class with primary record type + - `DualTypeStream` extends base class with secondary record type support + - `ProtoZerobusStream` extends `DualTypeStream` for compiled and dynamic protobuf schemas + - `JsonZerobusStream` extends `DualTypeStream, String>` for JSON record ingestion + - Type-safe `ingest(P record)` methods prevent runtime type errors + - Secondary type methods (`ingest(S)`) for raw/pre-serialized data + - Generic `ZerobusStream` still available for backwards compatibility +- **TableProperties Hierarchy**: Simplified table properties class hierarchy for different schema types + - `BaseTableProperties` - abstract base class with `tableName` field + - `ProtoTableProperties` - concrete class storing `descriptorProto` directly with factory methods: + - `ProtoTableProperties.fromCompiled(tableName, defaultInstance)` - for compiled .proto schemas + - `ProtoTableProperties.fromDynamic(tableName, descriptor)` - for runtime-created descriptors + - `JsonTableProperties` - for JSON streams without protobuf schema + - `TableProperties` is deprecated in favor of `ProtoTableProperties` +- **Configuration Method Renames**: Renamed configuration methods for clarity + - `maxInflightRequests()` replaces `maxInflightRecords()` (deprecated) + - `setMaxInflightRequests()` replaces `setMaxInflightRecords()` (deprecated) + - `offsetCallback()` replaces `ackCallback()` (deprecated) + - `setOffsetCallback()` replaces `setAckCallback()` (deprecated) +- **Graceful Close Signal Handling**: Improved handling of server-initiated stream close signals + - When the server sends a `CloseStreamSignal` with a duration, the SDK now waits for pending acknowledgments up to the specified duration before triggering stream recovery + - This reduces duplicate records when the server gracefully closes a stream, as records that are acknowledged during the grace period won't be resent + - If all inflight records are acknowledged before the timeout, recovery proceeds with no duplicates + - Logs clearly indicate whether all records were acked or if recovery is proceeding with pending records +- **Headers Provider Support**: Added `HeadersProvider` interface for flexible authentication strategies + - New `HeadersProvider` interface allows custom authentication implementations + - New `OAuthHeadersProvider` class provides OAuth 2.0 authentication with Unity Catalog privileges + - Enhanced `createStream()` method with optional `headersProvider` parameter + - When `headersProvider` is null, automatically creates `OAuthHeadersProvider` (default OAuth behavior) + - Support for adding custom headers to all gRPC requests + - Simplified API: single `createStream()` method for both OAuth and custom authentication + - `recreateStream()` automatically uses the same authentication method as the original stream +- **TLS Configuration Support**: Added `TlsConfig` abstract class for flexible TLS settings + - New `TlsConfig` abstract class defines TLS configuration strategies + - New `SecureTlsConfig` class provides secure TLS with system CA certificates (default) + - Enhanced `createStream()` method with optional `tlsConfig` parameter + - When `tlsConfig` is null, automatically uses `SecureTlsConfig` (TLS with system CAs) + - `recreateStream()` automatically uses the same TLS configuration as the original stream +- **User-Agent Header**: Added user-agent header for SDK version tracking + - `OAuthHeadersProvider` includes `user-agent: zerobus-sdk-java/` header + - Sent during stream creation for server-side SDK usage tracking +- **Configurable Message Size Limit**: Added `maxMessageSizeBytes` option in `StreamConfigurationOptions` + - Default is 10MB (`DEFAULT_MAX_MESSAGE_SIZE_BYTES`) - matches server limit + - Messages exceeding 10MB will be rejected by the server +- **gRPC Channel Reuse**: gRPC channel is now cached and reused across all streams + - Reduces connection overhead when creating multiple streams + - Single HTTP/2 connection multiplexed across streams + - Improved performance for high-throughput applications +- **SDK-Level Thread Pool**: Changed from per-stream fixed thread pool to SDK-level cached thread pool + - Threads created on-demand and reused across streams + - Idle threads automatically terminated after 60 seconds + - Scales naturally with number of active streams +- **AutoCloseable Support**: `ZerobusSdk` and all stream types implement `AutoCloseable` + - Can be used with try-with-resources for automatic cleanup + - `close()` shuts down gRPC channel and thread pool + - Optional - daemon threads ensure cleanup on JVM shutdown even without explicit close + - Streams (`ProtoZerobusStream`, `JsonZerobusStream`) also implement `AutoCloseable` +- **Stream Recreation**: New `recreate(ZerobusSdk)` method on stream classes + - `ProtoZerobusStream.recreate(sdk)` returns `CompletableFuture>` + - `JsonZerobusStream.recreate(sdk)` returns `CompletableFuture` + - Creates a new stream with the same configuration and re-ingests unacknowledged records + - Replaces the deprecated `ZerobusSdk.recreateStream()` method +- **Dependency Shading**: Shaded JAR now relocates all dependencies to avoid classpath conflicts + - Protobuf relocated to `com.databricks.zerobus.shaded.protobuf` + - gRPC relocated to `com.databricks.zerobus.shaded.grpc` + - Guava relocated to `com.databricks.zerobus.shaded.guava` + - Perfmark relocated to `com.databricks.zerobus.shaded.perfmark` + - Error Prone annotations relocated to `com.databricks.zerobus.shaded.errorprone` + - Google API protos relocated to `com.databricks.zerobus.shaded.google.*` + - Prevents conflicts when host application uses different versions of these libraries +- **Input Validation**: Comprehensive input validation in builders + - `StreamBuilder` validates all parameters (non-null, positive values, etc.) + - Clear error messages indicate which parameter is invalid + - Validation happens at configuration time, not at stream creation +- **Graceful Executor Shutdown**: SDK now waits for in-flight tasks during shutdown + - `close()` waits up to 5 seconds for tasks to complete gracefully + - Falls back to forced shutdown if tasks don't complete in time + - Preserves the interrupted status of the calling thread +- **SDK Version Accessor**: New methods to access SDK version programmatically + - `ZerobusSdk.VERSION` constant (e.g., `"0.2.0"`) + - `ZerobusSdk.getVersion()` static method + - Useful for logging, debugging, and compatibility checks +- **Parameterized Logging**: All internal logging uses SLF4J parameterized style + - Uses `logger.debug("Message: {}", value)` instead of string concatenation + - Improves performance when logging is disabled (no string allocation) +- **Nullability Annotations**: Public APIs annotated with `@Nonnull` and `@Nullable` (JSR-305) + - Helps IDE provide better warnings and suggestions + - Improves code documentation and static analysis + - Uses `com.google.code.findbugs:jsr305` for annotation definitions +- Updated Protocol Buffers from 3.24.0 to 4.33.0 for improved performance and latest features +- Updated gRPC dependencies from 1.58.0 to 1.76.0 for enhanced stability and security +- Updated SLF4J logging framework from 1.7.36 to 2.0.17 for modern logging capabilities + +### Bug Fixes + +### Documentation + +- Reorganized examples into `proto/` and `json/` subdirectories for better organization +- Added single record and batch record examples for both proto and JSON formats +- Added documentation for custom authentication and TLS configuration in examples README +- Updated README.md with new dependency versions +- Updated protoc compiler version recommendations +- Updated Logback version compatibility for SLF4J 2.0 +- Updated type mapping documentation with new supported types (TINYINT, BYTE, TIMESTAMP_NTZ, VARIANT, STRUCT) + +### Internal Changes + +- **Multi-Module Project Structure**: Reorganized into a multi-module Maven project + - Parent POM at root level with `common/`, `sdk/`, and `cli/` modules + - `common/` - Shared utilities (HTTP client and JSON parsing) + - `sdk/` - SDK source code + - `cli/` - Standalone command-line tools +- **New CLI Module**: Separate command-line tool module (`zerobus-cli`) + - Version 0.1.0 - independent versioning from SDK + - Standalone shaded JAR with all dependencies bundled + - Entry point: `com.databricks.zerobus.cli.Main` + - `GenerateProto` tool moved from SDK to CLI module (`com.databricks.zerobus.cli.GenerateProto`) + - Currently supports `generate-proto` command for proto schema generation + - Run via: `java -jar zerobus-cli-0.1.0.jar [options]` +- **SDK JAR Cleanup**: Removed `Main-Class` from SDK shaded JAR manifest + - SDK JAR is now a pure library without executable entry point + - Use CLI JAR (`zerobus-cli-0.1.0.jar`) for command-line tools instead +- **Simplified Table Properties**: Removed intermediate classes + - Deleted `CompiledProtoTableProperties` and `DynamicProtoTableProperties` + - Unified into single `ProtoTableProperties` class with factory methods +- **Stream Config Simplification**: Removed internal config wrapper classes + - `ProtoStreamConfig` and `JsonStreamConfig` removed + - Stream classes now use `config` directly from base class +- Updated maven-compiler-plugin from 3.11.0 to 3.14.1 +- All gRPC artifacts now consistently use version 1.76.0 +- Reorganized test suite into separate test classes by functionality +- Added base test class for shared test infrastructure +- Changed Dependabot schedule from daily to monthly + +### API Changes + +**New APIs** + +- **Type-Safe Stream Classes**: New `ProtoZerobusStream` and `JsonZerobusStream` with type-safe `ingest()` methods + - `ProtoZerobusStream.ingest(T)` returns `long` offset ID directly + - `JsonZerobusStream.ingest(String)` returns `long` offset ID directly + - Legacy `ZerobusStream.ingestRecord()` still available and returns `CompletableFuture` + - Migration (optional): Replace `stream.ingestRecord(record).join()` with `long offset = stream.ingest(record);` + +- **Batch Ingestion**: New `ingestBatch()` methods on typed stream classes using wrapper types + - `ProtoZerobusStream.ingestBatch(MessageBatch)` / `ingestBatch(BytesBatch)` returns `Long` offset ID (or `null` if empty) + - `JsonZerobusStream.ingestBatch(StringBatch)` / `ingestBatch(MapBatch)` returns `Long` offset ID (or `null` if empty) + +- **Type Widening**: Additional `ingest()` overloads for flexible input types + - `ProtoZerobusStream.ingest(byte[])` for pre-serialized protobuf bytes + - `JsonZerobusStream.ingest(Map)` for Map input (auto-serialized to JSON) + +- **Batch Wrapper Types**: Consistent `ingestBatch()` API using wrapper types + - `Batch` - base interface for all batch types + - `PrimaryBatch

` - marker interface for primary type batches + - `SecondaryBatch` - marker interface for secondary type batches + - `MessageBatch.of(Iterable)` / `MessageBatch.of(T...)` - wrapper for proto message batches (implements `PrimaryBatch`) + - `BytesBatch.of(Iterable)` / `BytesBatch.of(byte[]...)` - wrapper for byte array batches (implements `SecondaryBatch`) + - `StringBatch.of(Iterable)` / `StringBatch.of(String...)` - wrapper for JSON string batches (implements `SecondaryBatch`) + - `MapBatch.of(Iterable>)` / `MapBatch.of(Map...)` - wrapper for Map batches (implements `PrimaryBatch>`) + +- **Stream Recreation Methods**: New `recreate()` method on stream classes + - `ProtoZerobusStream.recreate(ZerobusSdk)` - recreates proto stream with same config + - `JsonZerobusStream.recreate(ZerobusSdk)` - recreates JSON stream with same config + - Automatically re-ingests unacknowledged records from the original stream + - Returns `CompletableFuture` with the new stream instance + +- **Stream Class Hierarchy**: New abstract base classes for type-safe streams + - `BaseZerobusStream

` - base class with primary type parameter + - `DualTypeStream` - extends base with secondary type parameter + - All batch types are now in `com.databricks.zerobus.batch` package + +**Deprecations** + +- **ZerobusSdk Constructor**: `new ZerobusSdk(serverEndpoint, unityCatalogEndpoint)` is deprecated + - Use `ZerobusSdk.builder(serverEndpoint, unityCatalogEndpoint).build()` instead + +- **ZerobusStream Class**: `ZerobusStream` is deprecated + - Use `ProtoZerobusStream` instead via `ZerobusSdk.streamBuilder(String)` + - All methods on `ZerobusStream` are also deprecated: + - `ingestRecord()` - use `ingest()` instead, which returns the offset ID directly + - `getConfig()` - use `streamBuilder()` to create new streams instead + - Constructor - use `ZerobusSdk.streamBuilder(String)` instead + +- **ZerobusSdk.createStream()**: All `createStream()` overloads are deprecated + - Use `ZerobusSdk.streamBuilder(String)` instead + - Deprecated overloads: + - `createStream(TableProperties, clientId, clientSecret, options, headersProvider, tlsConfig)` + - `createStream(TableProperties, clientId, clientSecret, options)` + - `createStream(TableProperties, clientId, clientSecret)` + +- **ZerobusSdk.recreateStream()**: `recreateStream(ZerobusStream)` is deprecated + - Use `streamBuilder()` to create a new stream and manually re-ingest unacknowledged records + +- **Callback Methods**: `setAckCallback()` / `ackCallback()` are deprecated + - Use `setOffsetCallback(LongConsumer)` / `offsetCallback()` instead + - Callbacks now receive the offset ID directly (`long`) instead of the full protobuf response + +- **Configuration Methods**: + - `maxInflightRecords()` is deprecated - use `maxInflightRequests()` instead + - `setMaxInflightRecords()` is deprecated - use `setMaxInflightRequests()` instead + +- **TableProperties**: `TableProperties` is deprecated + - Use `ProtoTableProperties.fromCompiled(tableName, defaultInstance)` for compiled .proto schemas + - Use `ProtoTableProperties.fromDynamic(tableName, descriptor)` for runtime-created protobuf descriptors + - Use `JsonTableProperties` for JSON streams without protobuf schema + +**Breaking Changes** + +- **Protocol Buffers 4.x Migration**: If you use the regular JAR (not the shaded JAR), you must upgrade to protobuf-java 4.33.0 and regenerate any custom `.proto` files using protoc 4.x + - Download protoc 4.33.0 from: https://github.com/protocolbuffers/protobuf/releases/tag/v33.0 + - Regenerate proto files: `protoc --java_out=src/main/java src/main/proto/record.proto` + - Protobuf 4.x is binary-compatible over the wire with 3.x, but generated Java code may differ + +- **SLF4J 2.0 Migration**: If you use a logging implementation, you may need to update it: + - `slf4j-simple`: Use version 2.0.17 or later + - `logback-classic`: Use version 1.4.14 or later (for SLF4J 2.0 compatibility) + - `log4j-slf4j-impl`: Use version 2.20.0 or later + +**Note**: If you use the shaded JAR (classifier `shaded`), all dependencies are bundled and relocated, so no action is required. + ## Release v0.1.0 Initial release of the Databricks Zerobus Ingest SDK for Java. diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 8403e31..be64a15 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -1,38 +1,16 @@ # NEXT CHANGELOG -## Release v0.2.0 +## Release v0.3.0 ### New Features and Improvements -- Updated Protocol Buffers from 3.24.0 to 4.33.0 for improved performance and latest features -- Updated gRPC dependencies from 1.58.0 to 1.76.0 for enhanced stability and security -- Updated SLF4J logging framework from 1.7.36 to 2.0.17 for modern logging capabilities - ### Bug Fixes ### Documentation -- Updated README.md with new dependency versions -- Updated protoc compiler version recommendations -- Updated Logback version compatibility for SLF4J 2.0 - ### Internal Changes -- Updated maven-compiler-plugin from 3.11.0 to 3.14.1 -- All gRPC artifacts now consistently use version 1.76.0 - ### API Changes **Breaking Changes** -- **Protocol Buffers 4.x Migration**: If you use the regular JAR (not the fat JAR), you must upgrade to protobuf-java 4.33.0 and regenerate any custom `.proto` files using protoc 4.x - - Download protoc 4.33.0 from: https://github.com/protocolbuffers/protobuf/releases/tag/v33.0 - - Regenerate proto files: `protoc --java_out=src/main/java src/main/proto/record.proto` - - Protobuf 4.x is binary-compatible over the wire with 3.x, but generated Java code may differ - -- **SLF4J 2.0 Migration**: If you use a logging implementation, you may need to update it: - - `slf4j-simple`: Use version 2.0.17 or later - - `logback-classic`: Use version 1.4.14 or later (for SLF4J 2.0 compatibility) - - `log4j-slf4j-impl`: Use version 2.20.0 or later - -**Note**: If you use the fat JAR (`jar-with-dependencies`), all dependencies are bundled and no action is required. diff --git a/NOTICE b/NOTICE index 1ec04b5..72b946f 100644 --- a/NOTICE +++ b/NOTICE @@ -23,10 +23,34 @@ Copyright 2014 The Netty Project https://github.com/netty/netty License: https://github.com/netty/netty/blob/4.1/LICENSE.txt +### Guava +Copyright Google LLC +https://github.com/google/guava +License: https://github.com/google/guava/blob/master/LICENSE + +### Perfmark +Copyright 2019 Google LLC +https://github.com/perfmark/perfmark +License: https://github.com/perfmark/perfmark/blob/master/LICENSE + +### Error Prone Annotations +Copyright Google LLC +https://github.com/google/error-prone +License: https://github.com/google/error-prone/blob/master/LICENSE + +## MIT License + ### SLF4J -Copyright (c) 2004-2017 QOS.ch +Copyright (c) 2004-2023 QOS.ch https://github.com/qos-ch/slf4j -License: https://github.com/qos-ch/slf4j/blob/master/LICENSE.txt +License: https://www.slf4j.org/license.html + +## BSD 3-Clause License + +### JSR-305 Annotations +Copyright JSR-305 Expert Group +https://github.com/findbugsproject/findbugs +License: https://opensource.org/licenses/BSD-3-Clause ## CDDL + GPLv2 with classpath exception diff --git a/README.md b/README.md index 13d767b..3443337 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,12 @@ # Databricks Zerobus Ingest SDK for Java +[![Build](https://github.com/databricks/zerobus-sdk-java/actions/workflows/push.yml/badge.svg)](https://github.com/databricks/zerobus-sdk-java/actions/workflows/push.yml) +[![Maven Central](https://img.shields.io/maven-central/v/com.databricks/zerobus-ingest-sdk)](https://search.maven.org/artifact/com.databricks/zerobus-ingest-sdk) +[![Javadoc](https://javadoc.io/badge2/com.databricks/zerobus-ingest-sdk/javadoc.svg)](https://javadoc.io/doc/com.databricks/zerobus-ingest-sdk) +![Coverage](https://raw.githubusercontent.com/databricks/zerobus-sdk-java/badges/.github/badges/jacoco.svg) +![Java](https://img.shields.io/badge/Java-8%2B-blue) +[![License](https://img.shields.io/badge/License-Databricks-blue.svg)](LICENSE) + [Public Preview](https://docs.databricks.com/release-notes/release-types.html): This SDK is supported for production use cases and is available to all customers. Databricks is actively working on stabilizing the Zerobus Ingest SDK for Java. Minor version updates may include backwards-incompatible changes. We are keen to hear feedback from you on this SDK. Please [file issues](https://github.com/databricks/zerobus-sdk-java/issues), and we will address them. @@ -20,6 +27,8 @@ The Databricks Zerobus Ingest SDK for Java provides a high-performance client fo - [Usage Examples](#usage-examples) - [Blocking Ingestion](#blocking-ingestion) - [Non-Blocking Ingestion](#non-blocking-ingestion) + - [JSON Record Ingestion](#json-record-ingestion) + - [Using Custom Headers Provider](#using-custom-headers-provider) - [Configuration](#configuration) - [Logging](#logging) - [Error Handling](#error-handling) @@ -32,7 +41,9 @@ The Databricks Zerobus Ingest SDK for Java provides a high-performance client fo - **Automatic recovery**: Built-in retry and recovery mechanisms - **Flexible configuration**: Customizable stream behavior and timeouts - **Protocol Buffers**: Strongly-typed schema using protobuf +- **JSON record support**: Direct JSON ingestion without protobuf encoding - **OAuth 2.0 authentication**: Secure authentication with client credentials +- **Custom headers provider**: Flexible authentication strategies and custom headers support ## Requirements @@ -43,8 +54,8 @@ The Databricks Zerobus Ingest SDK for Java provides a high-performance client fo ### Dependencies -**When using the fat JAR** (recommended for most users): -- No additional dependencies required - all dependencies are bundled +**When using the shaded JAR** (recommended for most users): +- No additional dependencies required - all dependencies are bundled and relocated **When using the regular JAR**: - [`protobuf-java` 4.33.0](https://mvnrepository.com/artifact/com.google.protobuf/protobuf-java/4.33.0) @@ -134,7 +145,7 @@ Add the SDK as a dependency in your `pom.xml`: com.databricks zerobus-ingest-sdk - 0.1.0 + 0.2.0 ``` @@ -143,7 +154,7 @@ Or with Gradle (`build.gradle`): ```groovy dependencies { - implementation 'com.databricks:zerobus-ingest-sdk:0.1.0' + implementation 'com.databricks:zerobus-ingest-sdk:0.2.0' } ``` @@ -156,7 +167,7 @@ dependencies { com.databricks zerobus-ingest-sdk - 0.1.0 + 0.2.0 @@ -198,17 +209,17 @@ dependencies { ``` -**Fat JAR (with all dependencies bundled):** +**Shaded JAR (with all dependencies bundled and relocated):** -If you prefer the self-contained fat JAR with all dependencies included: +If you prefer the self-contained shaded JAR with all dependencies included: ```xml com.databricks zerobus-ingest-sdk - 0.1.0 - jar-with-dependencies + 0.2.0 + shaded ``` @@ -217,15 +228,15 @@ Or with Gradle: ```groovy dependencies { - implementation 'com.databricks:zerobus-ingest-sdk:0.1.0:jar-with-dependencies' + implementation 'com.databricks:zerobus-ingest-sdk:0.2.0:shaded' } ``` -**Note:** The fat JAR is typically not needed for Maven/Gradle projects. Use the regular JAR (without classifier) unless you have a specific reason to bundle all dependencies. +**Note:** The shaded JAR is typically not needed for Maven/Gradle projects. Use the regular JAR (without classifier) unless you have a specific reason to bundle all dependencies. #### Option 2: Build from Source -Clone and build the SDK: +Clone and build: ```bash git clone https://github.com/databricks/zerobus-sdk-java.git @@ -233,19 +244,22 @@ cd zerobus-sdk-java mvn clean package ``` -This generates two JAR files in the `target/` directory: +This is a multi-module project: +- `common/` - Shared utilities (HTTP client and JSON parsing) +- `sdk/` - The Zerobus Ingest SDK library +- `cli/` - Command-line tools -- **Regular JAR**: `zerobus-ingest-sdk-0.1.0.jar` (155KB) - - Contains only the SDK classes - - Requires all dependencies on the classpath +**SDK JARs** (in `sdk/target/`): +- `zerobus-ingest-sdk-0.2.0.jar` (~240KB) - Library for your applications +- `zerobus-ingest-sdk-0.2.0-shaded.jar` (~20MB) - Library with all dependencies bundled -- **Fat JAR**: `zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar` (18MB) - - Contains SDK classes plus all dependencies bundled - - Self-contained, easier to deploy +**CLI JAR** (in `cli/target/`): +- `zerobus-cli-0.1.0.jar` (~20MB) - Standalone command-line tool **Which JAR to use?** -- **Regular JAR**: When using Maven/Gradle (recommended) -- **Fat JAR**: For standalone scripts or CLI tools without a build system +- **SDK Regular JAR**: When using Maven/Gradle (recommended for applications) +- **SDK Shaded JAR**: For standalone scripts without a build system +- **CLI JAR**: For generating proto schemas from Unity Catalog tables ### Create Your Application Project @@ -283,7 +297,7 @@ Create `pom.xml`: com.databricks zerobus-ingest-sdk - 0.1.0 + 0.2.0 @@ -335,106 +349,26 @@ This generates `src/main/java/com/example/proto/Record.java`. ### Generate Protocol Buffer Schema from Unity Catalog (Alternative) -Instead of manually writing and compiling your protobuf schema, you can automatically generate it from an existing Unity Catalog table schema using the included `GenerateProto` tool. - -#### Using the Proto Generation Tool - -The `GenerateProto` tool fetches your table schema from Unity Catalog and generates a corresponding proto2 definition file with the correct type mappings. - -**First, download the fat JAR:** - -The proto generation tool requires the fat JAR (all dependencies included): - -```bash -# Download from Maven Central -wget https://repo1.maven.org/maven2/com/databricks/zerobus-ingest-sdk/0.1.0/zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar - -# Or if you built from source, it's in target/ -# cp target/zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar . -``` - -**Run the tool:** +Instead of manually writing your protobuf schema, you can generate it from an existing Unity Catalog table using the Zerobus CLI: ```bash -java -jar zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ +java -jar zerobus-cli-0.1.0.jar generate-proto \ --uc-endpoint "https://dbc-a1b2c3d4-e5f6.cloud.databricks.com" \ --client-id "your-service-principal-application-id" \ --client-secret "your-service-principal-secret" \ --table "main.default.air_quality" \ - --output "src/main/proto/record.proto" \ - --proto-msg "AirQuality" + --output "src/main/proto/record.proto" ``` -**Parameters:** -- `--uc-endpoint`: Your workspace URL (e.g., `https://dbc-a1b2c3d4-e5f6.cloud.databricks.com`) -- `--client-id`: Service principal application ID -- `--client-secret`: Service principal secret -- `--table`: Fully qualified table name (catalog.schema.table) -- `--output`: Output path for the generated proto file -- `--proto-msg`: (Optional) Name for the protobuf message (defaults to table name) - -**Example:** - -For a table defined as: -```sql -CREATE TABLE main.default.air_quality ( - device_name STRING, - temp INT, - humidity BIGINT -) -USING DELTA; -``` - -Running the generation tool will create `src/main/proto/record.proto`: -```protobuf -syntax = "proto2"; - -package com.example; - -option java_package = "com.example.proto"; -option java_outer_classname = "Record"; - -message AirQuality { - optional string device_name = 1; - optional int32 temp = 2; - optional int64 humidity = 3; -} -``` +After generating, compile the proto file: -After generating the proto file, compile it as shown above: ```bash protoc --java_out=src/main/java src/main/proto/record.proto ``` -**Type Mappings:** +See the [CLI README](cli/README.md) for installation instructions, all parameters, type mappings, and complex type examples. -The tool automatically maps Unity Catalog types to proto2 types: - -| Delta Type | Proto2 Type | -|-----------|-------------| -| INT, SMALLINT, SHORT | int32 | -| BIGINT, LONG | int64 | -| FLOAT | float | -| DOUBLE | double | -| STRING, VARCHAR | string | -| BOOLEAN | bool | -| BINARY | bytes | -| DATE | int32 | -| TIMESTAMP | int64 | -| ARRAY\ | repeated type | -| MAP\ | map\ | -| STRUCT\ | nested message | - -**Benefits:** -- No manual schema creation required -- Ensures schema consistency between your table and protobuf definitions -- Automatically handles complex types (arrays, maps, structs) -- Reduces errors from manual type mapping -- No need to clone the repository - runs directly from the SDK JAR - -For detailed documentation and examples, see [tools/README.md](tools/README.md). - -#### 4. Write Your Client Code +### Write Your Client Code Create `src/main/java/com/example/ZerobusClient.java`: @@ -453,39 +387,36 @@ public class ZerobusClient { String clientId = "your-service-principal-application-id"; String clientSecret = "your-service-principal-secret"; - // Initialize SDK - ZerobusSdk sdk = new ZerobusSdk(serverEndpoint, workspaceUrl); - - // Configure table properties - TableProperties tableProperties = new TableProperties<>( - tableName, - AirQuality.getDefaultInstance() - ); - - // Create stream - ZerobusStream stream = sdk.createStream( - tableProperties, - clientId, - clientSecret - ).join(); - - try { - // Ingest records - for (int i = 0; i < 100; i++) { - AirQuality record = AirQuality.newBuilder() - .setDeviceName("sensor-" + (i % 10)) - .setTemp(20 + (i % 15)) - .setHumidity(50 + (i % 40)) - .build(); - - stream.ingestRecord(record).join(); // Wait for durability - - System.out.println("Ingested record " + (i + 1)); + // Initialize SDK (implements AutoCloseable) + try (ZerobusSdk sdk = new ZerobusSdk(serverEndpoint, workspaceUrl)) { + + // Create stream using the fluent builder API + // The builder returns a type-safe ProtoZerobusStream + ProtoZerobusStream stream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) + .compiledProto(AirQuality.getDefaultInstance()) + .build() + .join(); + + // Both SDK and stream implement AutoCloseable + try { + // Ingest records - type-safe: only AirQuality records accepted + for (int i = 0; i < 100; i++) { + AirQuality record = AirQuality.newBuilder() + .setDeviceName("sensor-" + (i % 10)) + .setTemp(20 + (i % 15)) + .setHumidity(50 + (i % 40)) + .build(); + + stream.ingest(record); + + System.out.println("Ingested record " + (i + 1)); + } + + System.out.println("Successfully ingested 100 records!"); + } finally { + stream.close(); } - - System.out.println("Successfully ingested 100 records!"); - } finally { - stream.close(); } } } @@ -540,21 +471,83 @@ Successfully ingested 100 records! ## Usage Examples -See the `examples/` directory for complete working examples: +See the `sdk/examples/` directory for complete working examples organized by schema type: + +**Proto Examples:** +- `proto/compiled/SingleRecordExample.java` - Single record ingestion with compiled proto +- `proto/compiled/BatchRecordExample.java` - Batch ingestion with compiled proto +- `proto/dynamic/SingleRecordExample.java` - Single record ingestion with dynamic proto +- `proto/dynamic/BatchRecordExample.java` - Batch ingestion with dynamic proto -- **BlockingIngestionExample.java** - Synchronous ingestion with progress tracking -- **NonBlockingIngestionExample.java** - High-throughput asynchronous ingestion +**JSON Examples:** +- `json/SingleRecordExample.java` - Single JSON record ingestion +- `json/BatchRecordExample.java` - Batch JSON record ingestion -### Blocking Ingestion +### Fluent Stream Builder API -Ingest records synchronously, waiting for each record to be acknowledged: +The SDK provides a fluent builder API for creating streams with **compile-time type safety**. The step builder pattern enforces that you choose an authentication path before schema selection - the compiler prevents you from calling schema methods directly on `StreamBuilder`. + +**Builder Flow:** + +``` +sdk.streamBuilder(tableName) → StreamBuilder + .clientCredentials(...) → AuthenticatedStreamBuilder (OAuth path) + .unauthenticated() → UnauthenticatedStreamBuilder (custom auth path) + + .maxInflightRequests(...) → same builder (optional config methods) + .headersProvider(...) → same builder (optional, for custom headers) + .compiledProto(...) → ProtoStreamBuilder (or .dynamicProto() or .json()) + .build() → CompletableFuture> +``` + +**Example usage:** + +```java +// OAuth authentication (most common) +ProtoZerobusStream protoStream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) // Returns AuthenticatedStreamBuilder + .maxInflightRequests(50000) // Config methods + .recovery(true) + .compiledProto(AirQuality.getDefaultInstance()) + .build() + .join(); + +// JSON stream with OAuth +JsonZerobusStream jsonStream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) + .json() + .build() + .join(); + +// Custom authentication with headers provider +JsonZerobusStream stream = sdk.streamBuilder(tableName) + .unauthenticated() // Returns UnauthenticatedStreamBuilder + .headersProvider(customProvider) // Set custom auth headers + .json() + .build() + .join(); + +// This won't compile - schema selection requires auth path first: +// sdk.streamBuilder(tableName) +// .compiledProto(...) // ❌ Compile error: method not found on StreamBuilder +``` + +**The step builder pattern ensures:** +- **Compile-time safety**: `clientCredentials()` and `unauthenticated()` return different builder types. Schema methods (`compiledProto`, `dynamicProto`, `json`) only exist on these builders, so the compiler enforces the correct order. +- **Type safety**: Schema selection determines the return type (`ProtoZerobusStream` or `JsonZerobusStream`) +- **Validation**: Invalid parameters are caught at configuration time with clear error messages +- **Fluent API**: Configuration methods can be chained in any order after choosing auth path + +### Compiled Proto Stream + +For compiled protobuf schemas (generated from `.proto` files): ```java -ZerobusStream stream = sdk.createStream( - tableProperties, - clientId, - clientSecret -).join(); +ProtoZerobusStream stream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) + .compiledProto(AirQuality.getDefaultInstance()) + .build() + .join(); try { for (int i = 0; i < 1000; i++) { @@ -564,67 +557,209 @@ try { .setHumidity(50 + i % 40) .build(); - stream.ingestRecord(record).join(); // Wait for durability + stream.ingest(record); // Type-safe: only AirQuality accepted } + stream.flush(); } finally { stream.close(); } ``` -### Non-Blocking Ingestion +### Dynamic Proto Stream -Ingest records asynchronously for maximum throughput: +For runtime-defined protobuf schemas: ```java -StreamConfigurationOptions options = StreamConfigurationOptions.builder() - .setMaxInflightRecords(50000) - .setAckCallback(response -> - System.out.println("Acknowledged offset: " + - response.getDurabilityAckUpToOffset())) +// Build descriptor programmatically +DescriptorProto descriptorProto = DescriptorProto.newBuilder() + .setName("AirQuality") + .addField(FieldDescriptorProto.newBuilder() + .setName("device_name").setNumber(1) + .setType(FieldDescriptorProto.Type.TYPE_STRING) + .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)) + .addField(FieldDescriptorProto.newBuilder() + .setName("temp").setNumber(2) + .setType(FieldDescriptorProto.Type.TYPE_INT32) + .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)) .build(); -ZerobusStream stream = sdk.createStream( - tableProperties, - clientId, - clientSecret, - options -).join(); +// Create descriptor from proto +FileDescriptorProto fileDescriptorProto = FileDescriptorProto.newBuilder() + .addMessageType(descriptorProto) + .build(); +FileDescriptor fileDescriptor = FileDescriptor.buildFrom(fileDescriptorProto, new FileDescriptor[]{}); +Descriptor descriptor = fileDescriptor.findMessageTypeByName("AirQuality"); -List> futures = new ArrayList<>(); +// Create dynamic proto stream +ProtoZerobusStream stream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) + .dynamicProto(descriptor) + .build() + .join(); try { - for (int i = 0; i < 100000; i++) { - AirQuality record = AirQuality.newBuilder() - .setDeviceName("sensor-" + (i % 10)) - .setTemp(20 + i % 15) - .setHumidity(50 + i % 40) + for (int i = 0; i < 1000; i++) { + DynamicMessage record = DynamicMessage.newBuilder(descriptor) + .setField(descriptor.findFieldByName("device_name"), "sensor-" + i) + .setField(descriptor.findFieldByName("temp"), 20 + (i % 15)) .build(); - futures.add(stream.ingestRecord(record)); + stream.ingest(record); } + stream.flush(); +} finally { + stream.close(); +} +``` + +### JSON Stream - // Flush and wait for all records +For JSON record ingestion (no protobuf required): + +```java +JsonZerobusStream stream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) + .maxInflightRequests(10000) + .json() + .build() + .join(); + +try { + for (int i = 0; i < 1000; i++) { + String jsonRecord = String.format( + "{\"device_name\": \"sensor-%d\", \"temp\": %d, \"humidity\": %d}", + i % 10, 20 + (i % 15), 50 + (i % 40) + ); + + stream.ingest(jsonRecord); // Type-safe: only String accepted + } stream.flush(); - CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); } finally { stream.close(); } ``` +**Key differences from proto ingestion:** +- Use `.json()` builder method instead of `.compiledProto()` or `.dynamicProto()` +- Returns `JsonZerobusStream` instead of `ProtoZerobusStream` +- Pass JSON `String` to `ingest()` instead of protobuf `Message` +- JSON must match the table schema defined in Unity Catalog +- No protobuf schema or compilation required + +### Batch Ingestion + +For improved throughput, use `ingestBatch()` to send multiple records in a single request: + +```java +// Batch ingestion with proto records +List batch = new ArrayList<>(); +for (int i = 0; i < 100; i++) { + batch.add(AirQuality.newBuilder() + .setDeviceName("sensor-" + i) + .setTemp(20 + (i % 15)) + .setHumidity(50 + (i % 40)) + .build()); +} + +Long offsetId = stream.ingestBatch(MessageBatch.of(batch)); +if (offsetId != null) { + stream.waitForOffset(offsetId); // Wait for batch acknowledgment +} +``` + +```java +// Batch ingestion with JSON records +List jsonBatch = new ArrayList<>(); +for (int i = 0; i < 100; i++) { + jsonBatch.add(String.format( + "{\"device_name\": \"sensor-%d\", \"temp\": %d}", + i, 20 + (i % 15) + )); +} + +Long offsetId = jsonStream.ingestBatch(StringBatch.of(jsonBatch)); +if (offsetId != null) { + jsonStream.waitForOffset(offsetId); +} +``` + +**Benefits of batch ingestion:** +- Reduced network overhead - one request per batch instead of per record +- Atomic acknowledgment - entire batch succeeds or fails together +- Higher throughput for bulk data loads + +### Using Custom Headers Provider + +The SDK supports custom authentication strategies through the `HeadersProvider` interface. This is useful when you need custom authentication logic, want to add additional headers, or manage tokens externally. + +#### Implementing Custom HeadersProvider + +You can implement the `HeadersProvider` interface to create custom authentication strategies or add additional headers: + +```java +public class CustomHeadersProvider implements HeadersProvider { + private final OAuthHeadersProvider oauthProvider; + + public CustomHeadersProvider(String tableName, String workspaceId, + String workspaceUrl, String clientId, + String clientSecret) { + this.oauthProvider = new OAuthHeadersProvider( + tableName, workspaceId, workspaceUrl, clientId, clientSecret); + } + + @Override + public Map getHeaders() throws NonRetriableException { + // Get standard OAuth headers + Map headers = new HashMap<>(oauthProvider.getHeaders()); + + // Add custom headers + headers.put("x-custom-client-version", "1.0.0"); + headers.put("x-custom-environment", "production"); + headers.put("x-custom-request-id", UUID.randomUUID().toString()); + + return headers; + } +} + +// Use custom provider with the builder API +HeadersProvider customProvider = new CustomHeadersProvider( + "catalog.schema.table", "workspace-id", + "https://your-workspace.cloud.databricks.com", + "client-id", "client-secret" +); + +ProtoZerobusStream stream = sdk.streamBuilder(tableName) + .clientCredentials(clientId, clientSecret) + .headersProvider(customProvider) // Custom headers provider + .compiledProto(AirQuality.getDefaultInstance()) + .build() + .join(); +``` + +**Benefits of using HeadersProvider:** +- Flexible authentication strategies beyond OAuth +- Add custom headers to all gRPC requests +- Integrate with external token management systems +- Centralize authentication logic + +**Note:** The `getHeaders()` method is called synchronously when creating or recreating a stream. Make sure your implementation is thread-safe if using the same provider instance across multiple streams. + ## Configuration ### Stream Configuration Options | Option | Default | Description | |--------|---------|-------------| -| `maxInflightRecords` | 50000 | Maximum number of unacknowledged records | +| `maxInflightRequests` | 50000 | Maximum number of unacknowledged requests | | `recovery` | true | Enable automatic stream recovery | | `recoveryTimeoutMs` | 15000 | Timeout for recovery operations (ms) | | `recoveryBackoffMs` | 2000 | Delay between recovery attempts (ms) | | `recoveryRetries` | 3 | Maximum number of recovery attempts | | `flushTimeoutMs` | 300000 | Timeout for flush operations (ms) | | `serverLackOfAckTimeoutMs` | 60000 | Server acknowledgment timeout (ms) | -| `ackCallback` | None | Callback invoked on record acknowledgment | +| `maxMessageSizeBytes` | 10MB | Maximum message size (server enforces 10MB limit) | +| `recordType` | PROTO | Record type: `PROTO` or `JSON` | +| `offsetCallback` | None | Callback invoked on offset acknowledgment | ## Logging @@ -739,14 +874,20 @@ At the **ERROR** level, the SDK logs: ## Error Handling +The SDK uses **unchecked exceptions** (extending `RuntimeException`), following modern Java SDK conventions (AWS SDK v2, Google Cloud, etc.). This means you don't need to declare `throws` or wrap every call in try/catch, but you can still handle exceptions when needed. + The SDK throws two types of exceptions: - `ZerobusException`: Retriable errors (e.g., network issues, temporary server errors) -- `NonRetriableException`: Non-retriable errors (e.g., invalid credentials, missing table) +- `NonRetriableException`: Non-retriable errors (e.g., invalid credentials, missing table, schema mismatch) ```java +// Simple usage - let exceptions propagate +stream.ingest(record); // No throws declaration needed + +// Explicit error handling when needed try { - stream.ingestRecord(record); + stream.ingest(record); } catch (NonRetriableException e) { // Fatal error - do not retry logger.error("Non-retriable error: " + e.getMessage()); @@ -762,7 +903,7 @@ try { ### ZerobusSdk -Main entry point for the SDK. +Main entry point for the SDK. Implements `AutoCloseable` for optional resource cleanup. **Constructor:** ```java @@ -771,6 +912,14 @@ ZerobusSdk(String serverEndpoint, String unityCatalogEndpoint) - `serverEndpoint` - The Zerobus gRPC endpoint (e.g., `.zerobus.region.cloud.databricks.com`) - `unityCatalogEndpoint` - The Unity Catalog endpoint (your workspace URL) +**Builder:** +```java +ZerobusSdk.builder(String serverEndpoint, String unityCatalogEndpoint) + .executor(ExecutorService executor) // optional custom executor + .build() +``` +Creates an SDK instance with custom configuration. If no executor is provided, a cached thread pool is used. + **Methods:** ```java @@ -792,12 +941,393 @@ Creates a new ingestion stream with custom configuration. Returns a CompletableF ``` Creates a new ingestion stream with default configuration. Returns a CompletableFuture that completes when the stream is ready. +```java + CompletableFuture> createStream( + TableProperties tableProperties, + String clientId, + String clientSecret, + StreamConfigurationOptions options, + HeadersProvider headersProvider +) +``` +Creates a new ingestion stream. If `headersProvider` is null, automatically creates an `OAuthHeadersProvider` using the provided credentials. Returns a CompletableFuture that completes when the stream is ready. + ```java CompletableFuture> recreateStream( ZerobusStream stream ) ``` -Recreates a failed stream, resending unacknowledged records. Returns a CompletableFuture that completes when the stream is ready. +Recreates a failed stream, resending unacknowledged records. Uses the same authentication method (OAuth or custom headers provider) as the original stream. Returns a CompletableFuture that completes when the stream is ready. + +```java +void close() +``` +Closes the SDK and releases resources (gRPC channel and thread pool). Optional - daemon threads ensure cleanup on JVM shutdown even without explicit close. Can be used with try-with-resources. + +```java +StreamBuilder streamBuilder(String tableName) +``` +Creates a new builder for configuring and creating a stream. This is the preferred way to create streams with compile-time type safety. + +```java +static String getVersion() +``` +Returns the current SDK version string (e.g., `"0.2.0"`). + +**Constants:** + +```java +public static final String VERSION = "0.2.0" +``` +The current SDK version. + +--- + +### StreamBuilder + +Step builder for creating streams with compile-time type safety. You must call either `clientCredentials()` (for OAuth) or `unauthenticated()` (for custom auth) to get a configurable builder. + +**Methods:** + +```java +AuthenticatedStreamBuilder clientCredentials(String clientId, String clientSecret) +``` +Sets OAuth client credentials. Returns `AuthenticatedStreamBuilder` for configuration and schema selection. This is the most common authentication method. + +```java +UnauthenticatedStreamBuilder unauthenticated() +``` +Returns `UnauthenticatedStreamBuilder` for custom authentication. Use `headersProvider()` to set custom authentication headers. + +--- + +### AuthenticatedStreamBuilder / UnauthenticatedStreamBuilder + +Both builders provide the same configuration and schema selection methods. The difference is that `AuthenticatedStreamBuilder` has OAuth credentials configured, while `UnauthenticatedStreamBuilder` does not. + +**Configuration Methods:** + +```java +headersProvider(HeadersProvider headersProvider) +``` +Sets a custom headers provider for adding headers to gRPC requests. + +```java +tlsConfig(TlsConfig tlsConfig) +``` +Sets custom TLS configuration. Optional - defaults to secure TLS. + +```java +maxInflightRequests(int maxInflightRequests) +``` +Sets maximum in-flight requests. Must be positive. + +```java +recovery(boolean recovery) +``` +Enables/disables automatic recovery. + +```java +recoveryTimeoutMs(int recoveryTimeoutMs) +``` +Sets recovery timeout in milliseconds. Must be non-negative. + +```java +recoveryBackoffMs(int recoveryBackoffMs) +``` +Sets backoff between recovery attempts. Must be non-negative. + +```java +recoveryRetries(int recoveryRetries) +``` +Sets maximum recovery attempts. Must be non-negative. + +```java +flushTimeoutMs(int flushTimeoutMs) +``` +Sets flush timeout in milliseconds. Must be non-negative. + +```java +serverLackOfAckTimeoutMs(int serverLackOfAckTimeoutMs) +``` +Sets server acknowledgment timeout. Must be positive. + +```java +maxMessageSizeBytes(int maxMessageSizeBytes) +``` +Sets maximum message size in bytes. Must be positive. + +```java +offsetCallback(LongConsumer offsetCallback) +``` +Sets callback for offset acknowledgments. + +**Schema Selection Methods:** + +```java + ProtoStreamBuilder compiledProto(T defaultInstance) +``` +Configures for compiled protobuf records. Returns a builder that produces `ProtoZerobusStream`. + +```java +ProtoStreamBuilder dynamicProto(Descriptors.Descriptor descriptor) +``` +Configures for dynamic protobuf records. Returns a builder that produces `ProtoZerobusStream`. + +```java +JsonStreamBuilder json() +``` +Configures for JSON records. Returns a builder that produces `JsonZerobusStream`. + +--- + +### ProtoZerobusStream\ + +Type-safe stream for protobuf record ingestion. Implements `AutoCloseable`. + +**Methods:** + +```java +long ingest(T record) throws ZerobusException +``` +Ingests a single protobuf record. Type-safe - only accepts records of type `T`. + +```java +long ingest(byte[] bytes) throws ZerobusException +``` +Ingests pre-serialized protobuf bytes. Useful when receiving data from Kafka or other systems that already have serialized protobuf. + +```java +Long ingestBatch(MessageBatch messageBatch) throws ZerobusException +``` +Ingests a batch of protobuf records. Returns offset ID or null if empty. Use `MessageBatch.of(recordsList)` to create the batch wrapper. + +```java +Long ingestBatch(BytesBatch bytesBatch) throws ZerobusException +``` +Ingests a batch of pre-serialized protobuf bytes. Returns offset ID or null if empty. Use `BytesBatch.of(bytesList)` to create the batch wrapper. + +```java +void waitForOffset(long offset) throws ZerobusException +``` +Blocks until the offset is acknowledged. + +```java +void flush() throws ZerobusException +``` +Flushes all pending records. + +```java +void close() throws ZerobusException +``` +Flushes and closes the stream. + +```java +String getStreamId() +``` +Returns the server-assigned stream ID. + +```java +StreamState getState() +``` +Returns current stream state. + +--- + +### JsonZerobusStream + +Type-safe stream for JSON record ingestion. Implements `AutoCloseable`. + +**Methods:** + +```java +long ingest(String jsonRecord) throws ZerobusException +``` +Ingests a single JSON record string. + +```java +long ingest(Map record) throws ZerobusException +``` +Ingests a Map that is automatically serialized to JSON. Supports nested Maps, Lists, Strings, Numbers, Booleans, and null values. + +```java +Long ingestBatch(StringBatch stringBatch) throws ZerobusException +``` +Ingests a batch of JSON records. Returns offset ID or null if empty. Use `StringBatch.of(jsonStringsList)` to create the batch wrapper. + +```java +Long ingestBatch(MapBatch mapBatch) throws ZerobusException +``` +Ingests a batch of Maps, each serialized to JSON. Returns offset ID or null if empty. Use `MapBatch.of(mapsList)` to create the batch wrapper. + +```java +void waitForOffset(long offset) throws ZerobusException +``` +Blocks until the offset is acknowledged. + +```java +void flush() throws ZerobusException +``` +Flushes all pending records. + +```java +void close() throws ZerobusException +``` +Flushes and closes the stream. + +```java +String getStreamId() +``` +Returns the server-assigned stream ID. + +```java +StreamState getState() +``` +Returns current stream state. + +--- + +### MessageBatch\ + +Wrapper for a batch of protobuf message records. Used with `ProtoZerobusStream.ingestBatch()` to provide a consistent API. + +**Factory Methods:** + +```java +static MessageBatch of(Iterable records) +``` +Creates a MessageBatch from an iterable of protobuf messages. + +```java +static MessageBatch of(T... records) +``` +Creates a MessageBatch from varargs protobuf messages. + +**Example:** +```java +List records = new ArrayList<>(); +records.add(MyRecord.newBuilder().setField("value1").build()); +records.add(MyRecord.newBuilder().setField("value2").build()); +Long offset = stream.ingestBatch(MessageBatch.of(records)); +``` + +--- + +### BytesBatch + +Wrapper for a batch of pre-serialized protobuf byte arrays. Used with `ProtoZerobusStream.ingestBatch()` to provide a consistent API. + +**Factory Methods:** + +```java +static BytesBatch of(Iterable bytes) +``` +Creates a BytesBatch from an iterable of byte arrays. + +```java +static BytesBatch of(byte[]... bytes) +``` +Creates a BytesBatch from varargs byte arrays. + +**Example:** +```java +List serializedRecords = getSerializedRecordsFromKafka(); +Long offset = stream.ingestBatch(BytesBatch.of(serializedRecords)); +``` + +--- + +### StringBatch + +Wrapper for a batch of string records. Used with `JsonZerobusStream.ingestBatch()` to provide a consistent API. + +**Factory Methods:** + +```java +static StringBatch of(Iterable records) +``` +Creates a StringBatch from an iterable of strings. + +```java +static StringBatch of(String... records) +``` +Creates a StringBatch from varargs strings. + +**Example:** +```java +List records = new ArrayList<>(); +records.add("{\"name\": \"Alice\", \"age\": 30}"); +records.add("{\"name\": \"Bob\", \"age\": 25}"); +Long offset = stream.ingestBatch(StringBatch.of(records)); +``` + +--- + +### MapBatch + +Wrapper for a batch of Map records to be serialized as JSON. Used with `JsonZerobusStream.ingestBatch()` to provide a consistent API. + +**Factory Methods:** + +```java +static MapBatch of(Iterable> maps) +``` +Creates a MapBatch from an iterable of Maps. + +```java +static MapBatch of(Map... maps) +``` +Creates a MapBatch from varargs Maps. + +**Example:** +```java +List> records = new ArrayList<>(); +records.add(createRecord("sensor-1", 25)); +records.add(createRecord("sensor-2", 30)); +Long offset = stream.ingestBatch(MapBatch.of(records)); +``` + +--- + +### HeadersProvider + +Interface for providing custom headers for gRPC stream authentication and configuration. + +**Methods:** + +```java +Map getHeaders() throws NonRetriableException +``` +Returns headers to be attached to gRPC requests. Called when creating or recreating a stream. + +--- + +### OAuthHeadersProvider + +Default implementation of `HeadersProvider` that uses OAuth 2.0 Client Credentials flow with Unity Catalog privileges. + +**Constructor:** +```java +OAuthHeadersProvider( + String tableName, + String workspaceId, + String workspaceUrl, + String clientId, + String clientSecret +) +``` +- `tableName` - Fully qualified table name (catalog.schema.table) +- `workspaceId` - Databricks workspace ID +- `workspaceUrl` - Unity Catalog endpoint URL +- `clientId` - OAuth client ID +- `clientSecret` - OAuth client secret + +**Methods:** + +```java +Map getHeaders() throws NonRetriableException +``` +Fetches a fresh OAuth token and returns headers containing authorization and table name. --- @@ -808,9 +1338,19 @@ Represents an active ingestion stream. **Methods:** ```java -CompletableFuture ingestRecord(RecordType record) throws ZerobusException +long ingest(Object record) throws ZerobusException +``` +Ingests a single record into the stream. Accepts both protobuf `Message` objects and JSON `String` objects based on stream configuration. Returns the logical offset ID immediately. Use `waitForOffset(long)` to wait for server acknowledgment. + +```java +Long ingestBatch(Iterable records) throws ZerobusException +``` +Ingests a batch of records into the stream. All records must match the stream's configured record type. The batch is assigned a single offset ID and acknowledged atomically. Returns the offset ID for the batch, or `null` if empty. + +```java +void waitForOffset(long offset) throws ZerobusException ``` -Ingests a single record into the stream. Returns a future that completes when the record is durably written to storage. +Blocks until the specified offset is acknowledged by the server. Use with `ingest()`/`ingestBatch()` for explicit acknowledgment control. ```java void flush() throws ZerobusException @@ -894,7 +1434,7 @@ Builder for creating `StreamConfigurationOptions`. **Methods:** ```java -StreamConfigurationOptionsBuilder setMaxInflightRecords(int maxInflightRecords) +StreamConfigurationOptionsBuilder setMaxInflightRequests(int maxInflightRecords) ``` Sets the maximum number of unacknowledged records (default: 50000). @@ -929,9 +1469,19 @@ StreamConfigurationOptionsBuilder setServerLackOfAckTimeoutMs(int serverLackOfAc Sets the server acknowledgment timeout in milliseconds (default: 60000). ```java -StreamConfigurationOptionsBuilder setAckCallback(Consumer ackCallback) +StreamConfigurationOptionsBuilder setMaxMessageSizeBytes(int maxMessageSizeBytes) +``` +Sets the maximum message size in bytes. Default is 10MB (`DEFAULT_MAX_MESSAGE_SIZE_BYTES`). Server enforces 10MB limit - messages exceeding this will be rejected. + +```java +StreamConfigurationOptionsBuilder setRecordType(RecordType recordType) +``` +Sets the record type for the stream: `RecordType.PROTO` (default) for protobuf records, or `RecordType.JSON` for JSON records. + +```java +StreamConfigurationOptionsBuilder setOffsetCallback(LongConsumer offsetCallback) ``` -Sets a callback to be invoked when records are acknowledged by the server. +Sets a callback to be invoked when records are acknowledged. The callback receives the durability offset ID. ```java StreamConfigurationOptions build() @@ -969,7 +1519,7 @@ Represents the lifecycle state of a stream. ### ZerobusException -Base exception for retriable errors. +Base exception for retriable errors. Extends `RuntimeException` (unchecked), so no `throws` declaration is required. **Constructors:** ```java @@ -981,7 +1531,7 @@ ZerobusException(String message, Throwable cause) ### NonRetriableException -Exception for non-retriable errors (extends `ZerobusException`). +Exception for non-retriable errors such as invalid credentials, missing table, or schema mismatch. Extends `ZerobusException` (and therefore also `RuntimeException`). **Constructors:** ```java @@ -997,4 +1547,4 @@ NonRetriableException(String message, Throwable cause) 4. **Error handling**: Implement proper retry logic for retriable errors 5. **Monitoring**: Use `ackCallback` to track ingestion progress 6. **Token refresh**: Tokens are automatically refreshed on stream creation and recovery -7. **Proto generation**: Use the built-in `GenerateProto` tool to automatically generate proto files from your table schemas +7. **Proto generation**: Use the Zerobus CLI tool (`zerobus-cli-0.1.0.jar`) to automatically generate proto files from your table schemas diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..1f25568 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,210 @@ +# Zerobus CLI + +Command-line tools for the Databricks Zerobus Ingest SDK. + +## Installation + +**Download from Maven Central:** + +```bash +wget https://repo1.maven.org/maven2/com/databricks/zerobus-cli/0.1.0/zerobus-cli-0.1.0.jar +``` + +**Or build from source:** + +```bash +git clone https://github.com/databricks/zerobus-sdk-java.git +cd zerobus-sdk-java +mvn clean package -pl cli -am +``` + +## Commands + +### generate-proto + +Generates a Protocol Buffer schema file from an existing Unity Catalog table schema. This eliminates the need to manually write proto files and ensures your schema matches the table definition. + +**Usage:** + +```bash +java -jar zerobus-cli-0.1.0.jar generate-proto \ + --uc-endpoint \ + --client-id \ + --client-secret \ + --table \ + --output \ + [--proto-msg ] +``` + +**Parameters:** + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `--uc-endpoint` | Yes | Your Databricks workspace URL (e.g., `https://dbc-a1b2c3d4-e5f6.cloud.databricks.com`) | +| `--client-id` | Yes | Service principal application ID | +| `--client-secret` | Yes | Service principal secret | +| `--table` | Yes | Fully qualified table name (`catalog.schema.table`) | +| `--output` | Yes | Output path for the generated proto file | +| `--proto-msg` | No | Name for the protobuf message (defaults to table name) | + +**Example:** + +```bash +java -jar zerobus-cli-0.1.0.jar generate-proto \ + --uc-endpoint "https://dbc-a1b2c3d4-e5f6.cloud.databricks.com" \ + --client-id "your-service-principal-application-id" \ + --client-secret "your-service-principal-secret" \ + --table "main.default.air_quality" \ + --output "src/main/proto/record.proto" \ + --proto-msg "AirQuality" +``` + +For a table defined as: + +```sql +CREATE TABLE main.default.air_quality ( + device_name STRING, + temp INT, + humidity BIGINT +) +USING DELTA; +``` + +The tool generates: + +```protobuf +syntax = "proto3"; + +message AirQuality { + optional string device_name = 1; + optional int32 temp = 2; + optional int64 humidity = 3; +} +``` + +After generating, compile the proto file: + +```bash +protoc --java_out=src/main/java src/main/proto/record.proto +``` + +## Type Mappings + +The tool automatically maps Unity Catalog types to Protocol Buffer types: + +| Unity Catalog Type | Proto Type | +|-------------------|------------| +| TINYINT, BYTE | int32 | +| SMALLINT, SHORT | int32 | +| INT | int32 | +| BIGINT, LONG | int64 | +| FLOAT | float | +| DOUBLE | double | +| STRING, VARCHAR | string | +| BOOLEAN | bool | +| BINARY | bytes | +| DATE | int32 | +| TIMESTAMP | int64 | +| TIMESTAMP_NTZ | int64 | +| VARIANT | string (JSON) | +| ARRAY\ | repeated type | +| MAP\ | map\ | +| STRUCT\ | nested message | + +### Complex Type Examples + +**ARRAY:** + +```sql +-- Table definition +tags ARRAY +``` + +```protobuf +// Generated proto +repeated string tags = 1; +``` + +**MAP:** + +```sql +-- Table definition +metadata MAP +``` + +```protobuf +// Generated proto +map metadata = 1; +``` + +**STRUCT:** + +```sql +-- Table definition +address STRUCT +``` + +```protobuf +// Generated proto +message Address { + optional string city = 1; + optional int32 zip = 2; +} +optional Address address = 1; +``` + +**Nested STRUCT:** + +```sql +-- Table definition +person STRUCT> +``` + +```protobuf +// Generated proto +message Person { + message Address { + optional string city = 1; + optional string country = 2; + } + + optional string name = 1; + optional Address address = 2; +} +optional Person person = 1; +``` + +## Authentication + +The CLI uses OAuth 2.0 client credentials flow to authenticate with Unity Catalog. You need a service principal with the following permissions on the target table: + +- `USE_CATALOG` on the catalog +- `USE_SCHEMA` on the schema +- `SELECT` on the table (to read schema) + +Grant permissions using SQL: + +```sql +GRANT USE CATALOG ON CATALOG TO ``; +GRANT USE SCHEMA ON SCHEMA . TO ``; +GRANT SELECT ON TABLE .. TO ``; +``` + +## Error Handling + +The CLI provides clear error messages for common issues: + +| Error | Cause | Solution | +|-------|-------|----------| +| `OAuth request failed with status 401` | Invalid credentials | Verify client ID and secret | +| `OAuth request failed with status 403` | Insufficient permissions | Grant required permissions to service principal | +| `Failed to fetch table info with status 404` | Table not found | Check table name (catalog.schema.table) | +| `Unsupported column type` | Unsupported Delta type | Check type mappings above | + +## Limitations + +- Nested arrays (`ARRAY>`) are not supported +- Arrays of maps (`ARRAY>`) are not supported +- Maps with map values (`MAP<..., MAP<...>>`) are not supported +- Maps with array values (`MAP<..., ARRAY<...>>`) are not supported +- Maximum struct nesting depth is 100 levels diff --git a/cli/pom.xml b/cli/pom.xml new file mode 100644 index 0000000..41225a2 --- /dev/null +++ b/cli/pom.xml @@ -0,0 +1,181 @@ + + + 4.0.0 + + + com.databricks + zerobus-java-parent + 0.2.0 + + + zerobus-cli + 0.1.0 + jar + Zerobus CLI + Command-line tools for Databricks Zerobus - Generate proto schemas from Unity Catalog tables + https://github.com/databricks/zerobus-sdk-java + + + + + com.databricks + zerobus-common + ${project.parent.version} + + + + org.slf4j + slf4j-simple + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + + org.jacoco + jacoco-maven-plugin + 0.8.11 + + + prepare-agent + + prepare-agent + + + + report + + report + + test + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + com.databricks.zerobus.cli.Main + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + false + + + + + com.google.protobuf + com.databricks.zerobus.shaded.protobuf + + + + io.grpc + com.databricks.zerobus.shaded.grpc + + + + com.google.common + com.databricks.zerobus.shaded.guava + + + + io.perfmark + com.databricks.zerobus.shaded.perfmark + + + + com.google.errorprone + com.databricks.zerobus.shaded.errorprone + + + + com.google.api + com.databricks.zerobus.shaded.google.api + + + com.google.rpc + com.databricks.zerobus.shaded.google.rpc + + + com.google.type + com.databricks.zerobus.shaded.google.type + + + com.google.cloud + com.databricks.zerobus.shaded.google.cloud + + + com.google.logging + com.databricks.zerobus.shaded.google.logging + + + com.google.longrunning + com.databricks.zerobus.shaded.google.longrunning + + + + + + ${project.name} + ${project.version} + com.databricks.zerobus.cli.Main + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + META-INF/MANIFEST.MF + META-INF/LICENSE.txt + META-INF/versions/9/module-info.class + + + + + + + + + + diff --git a/cli/src/main/java/com/databricks/zerobus/cli/GenerateProto.java b/cli/src/main/java/com/databricks/zerobus/cli/GenerateProto.java new file mode 100644 index 0000000..7f0c492 --- /dev/null +++ b/cli/src/main/java/com/databricks/zerobus/cli/GenerateProto.java @@ -0,0 +1,632 @@ +package com.databricks.zerobus.cli; + +import com.databricks.zerobus.common.http.DefaultHttpClient; +import com.databricks.zerobus.common.http.HttpClient; +import com.databricks.zerobus.common.http.RetryingHttpClient; +import com.databricks.zerobus.common.json.Json; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Generate proto3 file from Unity Catalog table schema. + * + *

This tool fetches table schema from Unity Catalog and generates a corresponding proto3 + * definition file. It supports all Delta data types and maps them to appropriate Protocol Buffer + * types. + * + *

Usage: java GenerateProto --uc-endpoint <endpoint> --client-id <id> + * --client-secret <secret> --table <catalog.schema.table> --output <output.proto> + * [--proto-msg <message_name>] + * + *

Type mappings: INT -> int32 STRING -> string FLOAT -> float LONG/BIGINT -> int64 + * SHORT/SMALLINT -> int32 DOUBLE -> double BOOLEAN -> bool BINARY -> bytes DATE -> + * int32 TIMESTAMP -> int64 ARRAY<type> -> repeated type MAP<key_type, value_type> + * -> map<key_type, value_type> + */ +public class GenerateProto { + + // Proto field modifiers + private static final String OPTIONAL = "optional"; + private static final String REQUIRED = "required"; + private static final String REPEATED = "repeated"; + + // Proto types + private static final String PROTO_INT32 = "int32"; + private static final String PROTO_INT64 = "int64"; + private static final String PROTO_STRING = "string"; + private static final String PROTO_FLOAT = "float"; + private static final String PROTO_DOUBLE = "double"; + private static final String PROTO_BOOL = "bool"; + private static final String PROTO_BYTES = "bytes"; + + // Compiled regex patterns for type matching + private static final Pattern ARRAY_PATTERN = + Pattern.compile("^ARRAY<(.+)>$", Pattern.CASE_INSENSITIVE); + private static final Pattern MAP_PATTERN = + Pattern.compile("^MAP<(.+)>$", Pattern.CASE_INSENSITIVE); + private static final Pattern STRUCT_PATTERN = + Pattern.compile("^STRUCT<(.+)>$", Pattern.CASE_INSENSITIVE); + private static final Pattern VARCHAR_PATTERN = + Pattern.compile("^VARCHAR(\\(\\d+\\))?$", Pattern.CASE_INSENSITIVE); + + private static final String USAGE = + "Usage: java GenerateProto \n" + + " --uc-endpoint Unity Catalog endpoint URL\n" + + " --client-id OAuth client ID\n" + + " --client-secret OAuth client secret\n" + + " --table Full table name\n" + + " --output Output path for proto file\n" + + " [--proto-msg ] Name of protobuf message (defaults to table name)\n" + + "\n" + + "Examples:\n" + + " java GenerateProto \\\n" + + " --uc-endpoint \"https://your-workspace.cloud.databricks.com\" \\\n" + + " --client-id \"your-client-id\" \\\n" + + " --client-secret \"your-client-secret\" \\\n" + + " --table \"catalog.schema.table_name\" \\\n" + + " --proto-msg \"TableMessage\" \\\n" + + " --output \"output.proto\"\n" + + "\n" + + "Type mappings:\n" + + " Delta -> Proto2\n" + + " TINYINT/BYTE -> int32\n" + + " SMALLINT/SHORT -> int32\n" + + " INT -> int32\n" + + " BIGINT/LONG -> int64\n" + + " STRING -> string\n" + + " FLOAT -> float\n" + + " DOUBLE -> double\n" + + " BOOLEAN -> bool\n" + + " BINARY -> bytes\n" + + " DATE -> int32\n" + + " TIMESTAMP -> int64\n" + + " TIMESTAMP_NTZ -> int64\n" + + " VARIANT -> string (unshredded, JSON string)\n" + + " ARRAY -> repeated type\n" + + " MAP -> map\n" + + " STRUCT -> nested message\n"; + + public static void main(String[] args) { + try { + Args parsedArgs = parseArgs(args); + run(parsedArgs, new RetryingHttpClient(DefaultHttpClient.INSTANCE)); + System.out.println("Successfully generated proto file at: " + parsedArgs.output); + System.exit(0); + } catch (IllegalArgumentException e) { + System.err.println("Error: " + e.getMessage()); + System.err.println(); + System.err.println(USAGE); + System.exit(1); + } catch (Exception e) { + System.err.println("Error: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Runs the proto generation with the given arguments and HTTP client. + * + * @param args The parsed command line arguments + * @param httpClient The HTTP client to use for API calls + * @throws Exception if generation fails + */ + static void run(Args args, HttpClient httpClient) throws Exception { + String token = getOAuthToken(args.ucEndpoint, args.clientId, args.clientSecret, httpClient); + + Map tableInfo = fetchTableInfo(args.ucEndpoint, token, args.table, httpClient); + + List> columns = extractColumns(tableInfo); + + String messageName = args.protoMsg != null ? args.protoMsg : args.table.split("\\.")[2]; + + try (FileWriter writer = new FileWriter(args.output)) { + generateProtoContent(messageName, columns, writer); + } + } + + /** + * Parses command line arguments. + * + * @param args The command line arguments + * @return Parsed arguments + * @throws IllegalArgumentException if required arguments are missing or invalid + */ + static Args parseArgs(String[] args) { + Args result = new Args(); + + for (int i = 0; i < args.length; i++) { + String arg = args[i]; + if (arg.equals("--help") || arg.equals("-h")) { + System.out.println(USAGE); + System.exit(0); + } + if (arg.startsWith("--")) { + String key = arg.substring(2); + if (i + 1 >= args.length) { + throw new IllegalArgumentException("Missing value for argument: " + arg); + } + String value = args[++i]; + + switch (key) { + case "uc-endpoint": + result.ucEndpoint = value; + break; + case "client-id": + result.clientId = value; + break; + case "client-secret": + result.clientSecret = value; + break; + case "table": + result.table = value; + break; + case "output": + result.output = value; + break; + case "proto-msg": + result.protoMsg = value; + break; + default: + throw new IllegalArgumentException("Unknown argument: " + arg); + } + } + } + + // Validate required arguments + if (result.ucEndpoint == null) { + throw new IllegalArgumentException("Missing required argument: --uc-endpoint"); + } + if (result.clientId == null) { + throw new IllegalArgumentException("Missing required argument: --client-id"); + } + if (result.clientSecret == null) { + throw new IllegalArgumentException("Missing required argument: --client-secret"); + } + if (result.table == null) { + throw new IllegalArgumentException("Missing required argument: --table"); + } + if (result.output == null) { + throw new IllegalArgumentException("Missing required argument: --output"); + } + + return result; + } + + /** + * Obtains an OAuth token using client credentials flow. + * + * @param ucEndpoint The Unity Catalog endpoint URL + * @param clientId The OAuth client ID + * @param clientSecret The OAuth client secret + * @param httpClient The HTTP client to use + * @return The OAuth access token (JWT) + * @throws Exception if the token request fails + */ + static String getOAuthToken( + String ucEndpoint, String clientId, String clientSecret, HttpClient httpClient) + throws Exception { + String url = ucEndpoint + "/oidc/v1/token"; + String formData = "grant_type=client_credentials&scope=all-apis"; + + String credentials = + Base64.getEncoder() + .encodeToString((clientId + ":" + clientSecret).getBytes(StandardCharsets.UTF_8)); + + Map headers = new HashMap<>(); + headers.put("Authorization", "Basic " + credentials); + + HttpClient.HttpResponse response = httpClient.post(url, formData, headers); + + if (!response.isSuccess()) { + String errorBody = + response.getErrorBody() != null ? response.getErrorBody() : "No error details available"; + throw new IOException( + "OAuth request failed with status " + response.getStatusCode() + ": " + errorBody); + } + + Pattern accessTokenPattern = Pattern.compile("\"access_token\"\\s*:\\s*\"([^\"]+)\""); + Matcher matcher = accessTokenPattern.matcher(response.getBody()); + + if (matcher.find()) { + return matcher.group(1); + } else { + throw new IOException("No access token received from OAuth response"); + } + } + + /** + * Fetch table information from Unity Catalog. + * + * @param endpoint Base URL of the Unity Catalog endpoint + * @param token Authentication token + * @param table Table identifier (catalog.schema.table) + * @param httpClient The HTTP client to use + * @return The parsed table information as a Map + * @throws Exception If the HTTP request fails + */ + @SuppressWarnings("unchecked") + static Map fetchTableInfo( + String endpoint, String token, String table, HttpClient httpClient) throws Exception { + String encodedTable = URLEncoder.encode(table, "UTF-8"); + String url = endpoint + "/api/2.1/unity-catalog/tables/" + encodedTable; + + Map headers = new HashMap<>(); + headers.put("Authorization", "Bearer " + token); + headers.put("Content-Type", "application/json"); + + HttpClient.HttpResponse response = httpClient.get(url, headers); + + if (!response.isSuccess()) { + String errorBody = + response.getErrorBody() != null ? response.getErrorBody() : "No error details available"; + throw new IOException( + "Failed to fetch table info with status " + response.getStatusCode() + ": " + errorBody); + } + + return (Map) Json.parse(response.getBody()); + } + + /** + * Extract column information from the table schema. + * + * @param tableInfo Raw table information from Unity Catalog + * @return List of column information maps + * @throws IllegalArgumentException If the expected schema structure is not found + */ + @SuppressWarnings("unchecked") + static List> extractColumns(Map tableInfo) { + if (!tableInfo.containsKey("columns")) { + throw new IllegalArgumentException("No columns found in table info"); + } + return (List>) tableInfo.get("columns"); + } + + /** + * Get basic proto type mapping for simple types. + * + * @param type The Unity Catalog type + * @return The proto type or null if not a basic type + */ + static String getBasicProtoType(String type) { + String upperType = type.trim().toUpperCase(); + switch (upperType) { + case "TINYINT": + case "BYTE": + case "SMALLINT": + case "SHORT": + case "INT": + case "DATE": + return PROTO_INT32; + case "BIGINT": + case "LONG": + case "TIMESTAMP": + case "TIMESTAMP_NTZ": + return PROTO_INT64; + case "STRING": + case "VARIANT": + return PROTO_STRING; + case "FLOAT": + return PROTO_FLOAT; + case "DOUBLE": + return PROTO_DOUBLE; + case "BOOLEAN": + return PROTO_BOOL; + case "BINARY": + return PROTO_BYTES; + default: + if (VARCHAR_PATTERN.matcher(upperType).matches()) { + return PROTO_STRING; + } + return null; + } + } + + /** + * Convert a snake_case string to PascalCase. + * + * @param s The string to convert (e.g., "field_name") + * @return The string in PascalCase (e.g., "FieldName") + */ + static String toPascalCase(String s) { + if (s == null || s.isEmpty()) { + return s; + } + StringBuilder result = new StringBuilder(); + for (String word : s.split("_")) { + if (!word.isEmpty()) { + result.append(Character.toUpperCase(word.charAt(0))); + if (word.length() > 1) { + result.append(word.substring(1)); + } + } + } + return result.toString(); + } + + /** + * Parse struct fields from the inner content of a STRUCT type. + * + * @param inner The inner content of STRUCT (e.g., "field1:STRING, field2:INT") + * @return List of field name/type pairs, or null if parsing fails + */ + static List parseStructFields(String inner) { + List fields = new ArrayList<>(); + int depth = 0; + StringBuilder current = new StringBuilder(); + + for (char c : inner.toCharArray()) { + if (c == '<') { + depth++; + current.append(c); + } else if (c == '>') { + depth--; + current.append(c); + } else if (c == ',' && depth == 0) { + fields.add(current.toString().trim()); + current = new StringBuilder(); + } else { + current.append(c); + } + } + + String lastField = current.toString().trim(); + if (!lastField.isEmpty()) { + fields.add(lastField); + } + + List result = new ArrayList<>(); + for (String field : fields) { + int colonIndex = field.indexOf(':'); + if (colonIndex == -1) { + return null; + } + String fieldName = field.substring(0, colonIndex).trim(); + String fieldType = field.substring(colonIndex + 1).trim(); + result.add(new String[] {fieldName, fieldType}); + } + + return result.isEmpty() ? null : result; + } + + /** Holds proto field information including any nested message definitions. */ + static class ProtoFieldInfo { + final String modifier; + final String protoType; + final String nestedDefinition; + + ProtoFieldInfo(String modifier, String protoType, String nestedDefinition) { + this.modifier = modifier; + this.protoType = protoType; + this.nestedDefinition = nestedDefinition; + } + } + + /** Maximum nesting depth for STRUCT types to prevent infinite recursion. */ + private static final int MAX_NESTING_DEPTH = 100; + + /** + * Map Unity Catalog column types to proto3 field information. + * + *

Supports all Unity Catalog types including nested STRUCTs, ARRAYs, and MAPs. + * + * @param fieldName The field name (used for naming struct messages) + * @param columnType The Unity Catalog column type + * @param nullable Whether the column is nullable + * @param structCounter Counter for generating unique struct names (mutable) + * @param level Current nesting level (for recursion depth tracking) + * @return ProtoFieldInfo with modifier, type, and optional nested definition + * @throws IllegalArgumentException If the column type is not supported or nesting is too deep + */ + static ProtoFieldInfo getProtoFieldInfo( + String fieldName, String columnType, boolean nullable, int[] structCounter, int level) { + + if (level > MAX_NESTING_DEPTH) { + throw new IllegalArgumentException( + "Nesting level exceeds maximum depth of " + MAX_NESTING_DEPTH); + } + + String trimmedType = columnType.trim(); + + String protoType = getBasicProtoType(trimmedType); + if (protoType != null) { + return new ProtoFieldInfo(nullable ? OPTIONAL : REQUIRED, protoType, null); + } + + Matcher arrayMatcher = ARRAY_PATTERN.matcher(trimmedType); + if (arrayMatcher.matches()) { + String elementType = arrayMatcher.group(1).trim(); + + if (ARRAY_PATTERN.matcher(elementType).matches()) { + throw new IllegalArgumentException("Nested arrays are not supported: ARRAY>"); + } + + if (MAP_PATTERN.matcher(elementType).matches()) { + throw new IllegalArgumentException("Arrays of maps are not supported: ARRAY>"); + } + + ProtoFieldInfo elementInfo = + getProtoFieldInfo(fieldName, elementType, false, structCounter, level + 1); + return new ProtoFieldInfo(REPEATED, elementInfo.protoType, elementInfo.nestedDefinition); + } + + Matcher mapMatcher = MAP_PATTERN.matcher(trimmedType); + if (mapMatcher.matches()) { + String inner = mapMatcher.group(1); + + int depth = 0; + int splitIndex = -1; + for (int i = 0; i < inner.length(); i++) { + char c = inner.charAt(i); + if (c == '<') depth++; + else if (c == '>') depth--; + else if (c == ',' && depth == 0) { + splitIndex = i; + break; + } + } + + if (splitIndex == -1) { + throw new IllegalArgumentException("Invalid map type: " + columnType); + } + + String keyType = inner.substring(0, splitIndex).trim(); + String valueType = inner.substring(splitIndex + 1).trim(); + + ProtoFieldInfo keyInfo = + getProtoFieldInfo(fieldName, keyType, false, structCounter, level + 1); + if (keyInfo.nestedDefinition != null) { + throw new IllegalArgumentException("Unsupported map key type: " + keyType); + } + + if (MAP_PATTERN.matcher(valueType).matches()) { + throw new IllegalArgumentException( + "Maps with map values are not supported: MAP<..., MAP<...>>"); + } + + if (ARRAY_PATTERN.matcher(valueType).matches()) { + throw new IllegalArgumentException( + "Maps with array values are not supported: MAP<..., ARRAY<...>>"); + } + + ProtoFieldInfo valueInfo = + getProtoFieldInfo(fieldName, valueType, false, structCounter, level + 1); + + String mapType = "map<" + keyInfo.protoType + ", " + valueInfo.protoType + ">"; + return new ProtoFieldInfo("", mapType, valueInfo.nestedDefinition); + } + + Matcher structMatcher = STRUCT_PATTERN.matcher(trimmedType); + if (structMatcher.matches()) { + List structFields = parseStructFields(structMatcher.group(1)); + structCounter[0]++; + String baseName = toPascalCase(fieldName); + String structName = + (baseName != null && !baseName.isEmpty()) ? baseName : "Struct" + structCounter[0]; + + String indent = repeat(" ", level); + String innerIndent = repeat(" ", level + 1); + + StringBuilder structDef = new StringBuilder(); + structDef.append(indent).append("message ").append(structName).append(" {\n"); + + int fieldNumber = 1; + for (String[] fieldPair : structFields) { + String fname = fieldPair[0]; + String ftype = fieldPair[1]; + + ProtoFieldInfo fieldInfo = getProtoFieldInfo(fname, ftype, true, structCounter, level + 1); + + if (fieldInfo.nestedDefinition != null) { + structDef.append(fieldInfo.nestedDefinition).append("\n\n"); + } + + if (fieldInfo.modifier.isEmpty()) { + structDef + .append(innerIndent) + .append(fieldInfo.protoType) + .append(" ") + .append(fname) + .append(" = ") + .append(fieldNumber) + .append(";\n"); + } else { + structDef + .append(innerIndent) + .append(fieldInfo.modifier) + .append(" ") + .append(fieldInfo.protoType) + .append(" ") + .append(fname) + .append(" = ") + .append(fieldNumber) + .append(";\n"); + } + fieldNumber++; + } + + structDef.append(indent).append("}"); + + return new ProtoFieldInfo(nullable ? OPTIONAL : REQUIRED, structName, structDef.toString()); + } + + throw new IllegalArgumentException("Unsupported column type: " + columnType); + } + + /** Repeat a string n times. */ + private static String repeat(String s, int n) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n; i++) { + sb.append(s); + } + return sb.toString(); + } + + /** + * Generate proto3 content from the column information. + * + * @param messageName Name of the protobuf message + * @param columns List of column information maps + * @param writer Writer to write the proto content to + * @throws IOException If writing fails + */ + static void generateProtoContent( + String messageName, List> columns, Writer writer) throws IOException { + writer.write("syntax = \"proto3\";\n"); + writer.write("\n"); + writer.write("message " + messageName + " {\n"); + + int[] structCounter = {0}; + int fieldNumber = 1; + for (Map col : columns) { + String fieldName = (String) col.get("name"); + String typeText = (String) col.get("type_text"); + boolean nullable = (Boolean) col.get("nullable"); + + ProtoFieldInfo fieldInfo = getProtoFieldInfo(fieldName, typeText, nullable, structCounter, 1); + + if (fieldInfo.nestedDefinition != null) { + writer.write(fieldInfo.nestedDefinition); + writer.write("\n\n"); + } + + if (fieldInfo.modifier.isEmpty()) { + writer.write(" " + fieldInfo.protoType + " " + fieldName + " = " + fieldNumber + ";\n"); + } else { + writer.write( + " " + + fieldInfo.modifier + + " " + + fieldInfo.protoType + + " " + + fieldName + + " = " + + fieldNumber + + ";\n"); + } + fieldNumber++; + } + + writer.write("}\n"); + } + + /** Command-line arguments holder. */ + static class Args { + String ucEndpoint; + String clientId; + String clientSecret; + String table; + String output; + String protoMsg; + } +} diff --git a/cli/src/main/java/com/databricks/zerobus/cli/Main.java b/cli/src/main/java/com/databricks/zerobus/cli/Main.java new file mode 100644 index 0000000..5b33969 --- /dev/null +++ b/cli/src/main/java/com/databricks/zerobus/cli/Main.java @@ -0,0 +1,77 @@ +package com.databricks.zerobus.cli; + +/** + * Main entry point for the Zerobus CLI. + * + *

Usage: + * + *

{@code
+ * # Generate proto schema from Unity Catalog table
+ * java -jar zerobus-cli-0.1.0.jar generate-proto \
+ *   --uc-endpoint "https://your-workspace.cloud.databricks.com" \
+ *   --client-id "your-client-id" \
+ *   --client-secret "your-client-secret" \
+ *   --table "catalog.schema.table" \
+ *   --output "output.proto"
+ * }
+ */ +public class Main { + + private static final String VERSION = "0.1.0"; + + private static final String USAGE = + "Zerobus CLI - Tools for Databricks Zerobus\n" + + "\n" + + "Usage: java -jar zerobus-cli.jar [options]\n" + + "\n" + + "Commands:\n" + + " generate-proto Generate proto2 schema from Unity Catalog table\n" + + " version Show version information\n" + + " help Show this help message\n" + + "\n" + + "Examples:\n" + + " java -jar zerobus-cli.jar generate-proto \\\n" + + " --uc-endpoint \"https://your-workspace.cloud.databricks.com\" \\\n" + + " --client-id \"your-client-id\" \\\n" + + " --client-secret \"your-client-secret\" \\\n" + + " --table \"catalog.schema.table\" \\\n" + + " --output \"output.proto\"\n" + + "\n" + + "For command-specific help:\n" + + " java -jar zerobus-cli.jar generate-proto --help\n"; + + public static void main(String[] args) { + if (args.length == 0) { + System.out.println(USAGE); + System.exit(0); + } + + String command = args[0]; + + switch (command) { + case "generate-proto": + String[] protoArgs = new String[args.length - 1]; + System.arraycopy(args, 1, protoArgs, 0, args.length - 1); + GenerateProto.main(protoArgs); + break; + + case "version": + case "--version": + case "-v": + System.out.println("zerobus-cli " + VERSION); + break; + + case "help": + case "--help": + case "-h": + System.out.println(USAGE); + break; + + default: + System.err.println("Unknown command: " + command); + System.err.println(); + System.err.println(USAGE); + System.exit(1); + } + } +} diff --git a/cli/src/test/java/com/databricks/zerobus/cli/GenerateProtoTest.java b/cli/src/test/java/com/databricks/zerobus/cli/GenerateProtoTest.java new file mode 100644 index 0000000..c51f93d --- /dev/null +++ b/cli/src/test/java/com/databricks/zerobus/cli/GenerateProtoTest.java @@ -0,0 +1,822 @@ +package com.databricks.zerobus.cli; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.zerobus.common.http.HttpClient; +import com.databricks.zerobus.common.json.Json; +import java.io.IOException; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +/** Tests for GenerateProto tool. */ +class GenerateProtoTest { + + // ==================== parseArgs Tests ==================== + + @Test + void testParseArgsComplete() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--client-id", "my-client", + "--client-secret", "my-secret", + "--table", "cat.sch.tbl", + "--output", "output.proto", + "--proto-msg", "MyMessage" + }; + + GenerateProto.Args parsed = GenerateProto.parseArgs(args); + + assertEquals("https://endpoint.com", parsed.ucEndpoint); + assertEquals("my-client", parsed.clientId); + assertEquals("my-secret", parsed.clientSecret); + assertEquals("cat.sch.tbl", parsed.table); + assertEquals("output.proto", parsed.output); + assertEquals("MyMessage", parsed.protoMsg); + } + + @Test + void testParseArgsWithoutOptionalProtoMsg() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--client-id", "my-client", + "--client-secret", "my-secret", + "--table", "cat.sch.tbl", + "--output", "output.proto" + }; + + GenerateProto.Args parsed = GenerateProto.parseArgs(args); + + assertNull(parsed.protoMsg); + } + + @Test + void testParseArgsMissingEndpoint() { + String[] args = { + "--client-id", "my-client", + "--client-secret", "my-secret", + "--table", "cat.sch.tbl", + "--output", "output.proto" + }; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("--uc-endpoint")); + } + + @Test + void testParseArgsMissingClientId() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--client-secret", "my-secret", + "--table", "cat.sch.tbl", + "--output", "output.proto" + }; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("--client-id")); + } + + @Test + void testParseArgsMissingClientSecret() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--client-id", "my-client", + "--table", "cat.sch.tbl", + "--output", "output.proto" + }; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("--client-secret")); + } + + @Test + void testParseArgsMissingTable() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--client-id", "my-client", + "--client-secret", "my-secret", + "--output", "output.proto" + }; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("--table")); + } + + @Test + void testParseArgsMissingOutput() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--client-id", "my-client", + "--client-secret", "my-secret", + "--table", "cat.sch.tbl" + }; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("--output")); + } + + @Test + void testParseArgsUnknownArgument() { + String[] args = { + "--uc-endpoint", "https://endpoint.com", + "--unknown-arg", "value" + }; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("Unknown argument")); + } + + @Test + void testParseArgsMissingValue() { + String[] args = {"--uc-endpoint"}; + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.parseArgs(args)); + assertTrue(ex.getMessage().contains("Missing value")); + } + + // ==================== getBasicProtoType Tests ==================== + + @Test + void testGetBasicProtoTypeInt32Types() { + assertEquals("int32", GenerateProto.getBasicProtoType("INT")); + assertEquals("int32", GenerateProto.getBasicProtoType("int")); + assertEquals("int32", GenerateProto.getBasicProtoType("TINYINT")); + assertEquals("int32", GenerateProto.getBasicProtoType("BYTE")); + assertEquals("int32", GenerateProto.getBasicProtoType("SMALLINT")); + assertEquals("int32", GenerateProto.getBasicProtoType("SHORT")); + assertEquals("int32", GenerateProto.getBasicProtoType("DATE")); + } + + @Test + void testGetBasicProtoTypeInt64Types() { + assertEquals("int64", GenerateProto.getBasicProtoType("BIGINT")); + assertEquals("int64", GenerateProto.getBasicProtoType("LONG")); + assertEquals("int64", GenerateProto.getBasicProtoType("TIMESTAMP")); + assertEquals("int64", GenerateProto.getBasicProtoType("TIMESTAMP_NTZ")); + } + + @Test + void testGetBasicProtoTypeStringTypes() { + assertEquals("string", GenerateProto.getBasicProtoType("STRING")); + assertEquals("string", GenerateProto.getBasicProtoType("VARIANT")); + } + + @Test + void testGetBasicProtoTypeOtherTypes() { + assertEquals("float", GenerateProto.getBasicProtoType("FLOAT")); + assertEquals("double", GenerateProto.getBasicProtoType("DOUBLE")); + assertEquals("bool", GenerateProto.getBasicProtoType("BOOLEAN")); + assertEquals("bytes", GenerateProto.getBasicProtoType("BINARY")); + } + + @Test + void testGetBasicProtoTypeUnknown() { + assertNull(GenerateProto.getBasicProtoType("UNKNOWN")); + assertNull(GenerateProto.getBasicProtoType("STRUCT")); + assertNull(GenerateProto.getBasicProtoType("ARRAY")); + } + + // ==================== toPascalCase Tests ==================== + + @Test + void testToPascalCaseSimple() { + assertEquals("FieldName", GenerateProto.toPascalCase("field_name")); + } + + @Test + void testToPascalCaseSingleWord() { + assertEquals("Field", GenerateProto.toPascalCase("field")); + } + + @Test + void testToPascalCaseMultipleUnderscores() { + assertEquals("MyFieldName", GenerateProto.toPascalCase("my_field_name")); + } + + @Test + void testToPascalCaseEmpty() { + assertEquals("", GenerateProto.toPascalCase("")); + } + + @Test + void testToPascalCaseNull() { + assertNull(GenerateProto.toPascalCase(null)); + } + + // ==================== parseStructFields Tests ==================== + + @Test + void testParseStructFieldsSimple() { + List result = GenerateProto.parseStructFields("name:STRING, age:INT"); + assertNotNull(result); + assertEquals(2, result.size()); + assertEquals("name", result.get(0)[0]); + assertEquals("STRING", result.get(0)[1]); + assertEquals("age", result.get(1)[0]); + assertEquals("INT", result.get(1)[1]); + } + + @Test + void testParseStructFieldsNested() { + List result = GenerateProto.parseStructFields("addr:STRUCT"); + assertNotNull(result); + assertEquals(1, result.size()); + assertEquals("addr", result.get(0)[0]); + assertEquals("STRUCT", result.get(0)[1]); + } + + @Test + void testParseStructFieldsInvalidFormat() { + // Missing colon + assertNull(GenerateProto.parseStructFields("name STRING")); + } + + // ==================== getProtoFieldInfo Tests ==================== + + @Test + void testGetProtoFieldInfoBasicNullable() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo("field", "STRING", true, counter, 0); + assertEquals("optional", info.modifier); + assertEquals("string", info.protoType); + assertNull(info.nestedDefinition); + } + + @Test + void testGetProtoFieldInfoBasicRequired() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo("field", "INT", false, counter, 0); + assertEquals("required", info.modifier); + assertEquals("int32", info.protoType); + assertNull(info.nestedDefinition); + } + + @Test + void testGetProtoFieldInfoVarchar() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo("field", "VARCHAR(255)", true, counter, 0); + assertEquals("optional", info.modifier); + assertEquals("string", info.protoType); + assertNull(info.nestedDefinition); + } + + @Test + void testGetProtoFieldInfoArray() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo("tags", "ARRAY", true, counter, 0); + assertEquals("repeated", info.modifier); + assertEquals("string", info.protoType); + assertNull(info.nestedDefinition); + } + + @Test + void testGetProtoFieldInfoMap() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo("metadata", "MAP", true, counter, 0); + assertEquals("", info.modifier); + assertEquals("map", info.protoType); + assertNull(info.nestedDefinition); + } + + @Test + void testGetProtoFieldInfoSimpleStruct() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo( + "location", "STRUCT", true, counter, 0); + assertEquals("optional", info.modifier); + assertEquals("Location", info.protoType); + assertNotNull(info.nestedDefinition); + assertTrue(info.nestedDefinition.contains("message Location")); + assertTrue(info.nestedDefinition.contains("optional double lat")); + assertTrue(info.nestedDefinition.contains("optional double lng")); + } + + @Test + void testGetProtoFieldInfoNestedStruct() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo( + "address", + "STRUCT>", + true, + counter, + 0); + assertEquals("optional", info.modifier); + assertEquals("Address", info.protoType); + assertNotNull(info.nestedDefinition); + assertTrue(info.nestedDefinition.contains("message Address")); + assertTrue(info.nestedDefinition.contains("message Location")); + assertTrue(info.nestedDefinition.contains("optional string city")); + assertTrue(info.nestedDefinition.contains("optional double lat")); + } + + @Test + void testGetProtoFieldInfoArrayOfStruct() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo( + "addresses", "ARRAY>", true, counter, 0); + assertEquals("repeated", info.modifier); + assertEquals("Addresses", info.protoType); + assertNotNull(info.nestedDefinition); + assertTrue(info.nestedDefinition.contains("message Addresses")); + assertTrue(info.nestedDefinition.contains("optional string city")); + assertTrue(info.nestedDefinition.contains("optional int32 zip")); + } + + @Test + void testGetProtoFieldInfoMapWithStructValue() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo( + "users", "MAP>", true, counter, 0); + assertEquals("", info.modifier); + assertEquals("map", info.protoType); + assertNotNull(info.nestedDefinition); + assertTrue(info.nestedDefinition.contains("message Users")); + } + + @Test + void testGetProtoFieldInfoStructAsMapKey() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> + GenerateProto.getProtoFieldInfo( + "field", "MAP, STRING>", true, counter, 0)); + assertTrue(ex.getMessage().contains("Unsupported map key type")); + } + + @Test + void testGetProtoFieldInfoNestedArrays() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> GenerateProto.getProtoFieldInfo("field", "ARRAY>", true, counter, 0)); + assertTrue(ex.getMessage().contains("Nested arrays are not supported")); + } + + @Test + void testGetProtoFieldInfoArrayOfMaps() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> + GenerateProto.getProtoFieldInfo( + "field", "ARRAY>", true, counter, 0)); + assertTrue(ex.getMessage().contains("Arrays of maps are not supported")); + } + + @Test + void testGetProtoFieldInfoMapOfMaps() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> + GenerateProto.getProtoFieldInfo( + "field", "MAP>", true, counter, 0)); + assertTrue(ex.getMessage().contains("Maps with map values are not supported")); + } + + @Test + void testGetProtoFieldInfoMapOfArrays() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> + GenerateProto.getProtoFieldInfo( + "field", "MAP>", true, counter, 0)); + assertTrue(ex.getMessage().contains("Maps with array values are not supported")); + } + + @Test + void testGetProtoFieldInfoUnsupportedType() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> GenerateProto.getProtoFieldInfo("field", "UNSUPPORTED_TYPE", true, counter, 0)); + assertTrue(ex.getMessage().contains("Unsupported column type")); + } + + @Test + void testGetProtoFieldInfoMaxNestingDepthExceeded() { + int[] counter = {0}; + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> GenerateProto.getProtoFieldInfo("field", "STRING", true, counter, 101)); + assertTrue(ex.getMessage().contains("Nesting level exceeds maximum depth")); + } + + @Test + void testGetProtoFieldInfoStructWithMapField() { + int[] counter = {0}; + GenerateProto.ProtoFieldInfo info = + GenerateProto.getProtoFieldInfo( + "config", "STRUCT>", true, counter, 0); + assertEquals("optional", info.modifier); + assertEquals("Config", info.protoType); + assertNotNull(info.nestedDefinition); + assertTrue(info.nestedDefinition.contains("message Config")); + assertTrue(info.nestedDefinition.contains("optional string name")); + assertTrue(info.nestedDefinition.contains("map metadata")); + } + + // ==================== generateProtoContent Tests ==================== + + @Test + void testGenerateProtoContentWithStruct() throws IOException { + List> columns = new ArrayList<>(); + columns.add(createColumn("id", "INT", false)); + columns.add(createColumn("location", "STRUCT", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("MessageWithStruct", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("syntax = \"proto3\";")); + assertTrue(proto.contains("message MessageWithStruct {")); + assertTrue(proto.contains("message Location {")); + assertTrue(proto.contains("optional double lat = 1;")); + assertTrue(proto.contains("optional double lng = 2;")); + assertTrue(proto.contains("required int32 id = 1;")); + assertTrue(proto.contains("optional Location location = 2;")); + } + + @Test + void testGenerateProtoContentWithNestedStruct() throws IOException { + List> columns = new ArrayList<>(); + columns.add( + createColumn( + "address", "STRUCT>", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("NestedMessage", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("message NestedMessage {")); + assertTrue(proto.contains("message Address {")); + assertTrue(proto.contains("message Coords {")); + assertTrue(proto.contains("optional double lat")); + assertTrue(proto.contains("optional double lng")); + assertTrue(proto.contains("optional string city")); + assertTrue(proto.contains("optional Coords coords")); + assertTrue(proto.contains("optional Address address")); + } + + @Test + void testGenerateProtoContentWithArrayOfStruct() throws IOException { + List> columns = new ArrayList<>(); + columns.add(createColumn("people", "ARRAY>", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("ArrayOfStructMessage", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("message ArrayOfStructMessage {")); + assertTrue(proto.contains("message People {")); + assertTrue(proto.contains("optional string name")); + assertTrue(proto.contains("optional int32 age")); + assertTrue(proto.contains("repeated People people")); + } + + // ==================== extractColumns Tests ==================== + + @Test + void testExtractColumnsValid() { + Map tableInfo = new HashMap<>(); + List> columns = new ArrayList<>(); + columns.add(createColumn("id", "INT", false)); + columns.add(createColumn("name", "STRING", true)); + tableInfo.put("columns", columns); + + List> result = GenerateProto.extractColumns(tableInfo); + + assertEquals(2, result.size()); + assertEquals("id", result.get(0).get("name")); + assertEquals("name", result.get(1).get("name")); + } + + @Test + void testExtractColumnsNoColumns() { + Map tableInfo = new HashMap<>(); + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> GenerateProto.extractColumns(tableInfo)); + assertTrue(ex.getMessage().contains("No columns found")); + } + + // ==================== generateProtoContent Tests ==================== + + @Test + void testGenerateProtoContentBasic() throws IOException { + List> columns = new ArrayList<>(); + columns.add(createColumn("id", "INT", false)); + columns.add(createColumn("name", "STRING", true)); + columns.add(createColumn("value", "DOUBLE", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("TestMessage", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("syntax = \"proto3\";")); + assertTrue(proto.contains("message TestMessage {")); + assertTrue(proto.contains("required int32 id = 1;")); + assertTrue(proto.contains("optional string name = 2;")); + assertTrue(proto.contains("optional double value = 3;")); + assertTrue(proto.contains("}")); + } + + @Test + void testGenerateProtoContentWithArray() throws IOException { + List> columns = new ArrayList<>(); + columns.add(createColumn("tags", "ARRAY", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("ArrayMessage", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("repeated string tags = 1;")); + } + + @Test + void testGenerateProtoContentWithMap() throws IOException { + List> columns = new ArrayList<>(); + columns.add(createColumn("metadata", "MAP", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("MapMessage", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("map metadata = 1;")); + } + + @Test + void testGenerateProtoContentAllTypes() throws IOException { + List> columns = new ArrayList<>(); + columns.add(createColumn("tiny", "TINYINT", true)); + columns.add(createColumn("small", "SMALLINT", true)); + columns.add(createColumn("regular", "INT", true)); + columns.add(createColumn("big", "BIGINT", true)); + columns.add(createColumn("text", "STRING", true)); + columns.add(createColumn("decimal", "FLOAT", true)); + columns.add(createColumn("precise", "DOUBLE", true)); + columns.add(createColumn("flag", "BOOLEAN", true)); + columns.add(createColumn("data", "BINARY", true)); + columns.add(createColumn("day", "DATE", true)); + columns.add(createColumn("moment", "TIMESTAMP", true)); + columns.add(createColumn("local_moment", "TIMESTAMP_NTZ", true)); + columns.add(createColumn("json_data", "VARIANT", true)); + + StringWriter writer = new StringWriter(); + GenerateProto.generateProtoContent("AllTypesMessage", columns, writer); + String proto = writer.toString(); + + assertTrue(proto.contains("optional int32 tiny = 1;")); + assertTrue(proto.contains("optional int32 small = 2;")); + assertTrue(proto.contains("optional int32 regular = 3;")); + assertTrue(proto.contains("optional int64 big = 4;")); + assertTrue(proto.contains("optional string text = 5;")); + assertTrue(proto.contains("optional float decimal = 6;")); + assertTrue(proto.contains("optional double precise = 7;")); + assertTrue(proto.contains("optional bool flag = 8;")); + assertTrue(proto.contains("optional bytes data = 9;")); + assertTrue(proto.contains("optional int32 day = 10;")); + assertTrue(proto.contains("optional int64 moment = 11;")); + assertTrue(proto.contains("optional int64 local_moment = 12;")); + assertTrue(proto.contains("optional string json_data = 13;")); + } + + // ==================== parseJson Tests ==================== + + @Test + @SuppressWarnings("unchecked") + void testParseJsonObject() { + String json = "{\"name\": \"test\", \"count\": 42}"; + Map result = (Map) Json.parse(json); + + assertEquals("test", result.get("name")); + assertEquals(42, result.get("count")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseJsonArray() { + String json = "[1, 2, 3]"; + List result = (List) Json.parse(json); + + assertEquals(3, result.size()); + assertEquals(1, result.get(0)); + assertEquals(2, result.get(1)); + assertEquals(3, result.get(2)); + } + + @Test + void testParseJsonString() { + String json = "\"hello\""; + String result = (String) Json.parse(json); + assertEquals("hello", result); + } + + @Test + void testParseJsonNumber() { + assertEquals(42, Json.parse("42")); + assertEquals(3.14, Json.parse("3.14")); + assertEquals(-100, Json.parse("-100")); + } + + @Test + void testParseJsonBoolean() { + assertEquals(true, Json.parse("true")); + assertEquals(false, Json.parse("false")); + } + + @Test + void testParseJsonNull() { + assertNull(Json.parse("null")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseJsonNested() { + String json = "{\"user\": {\"name\": \"Alice\", \"age\": 30}, \"active\": true}"; + Map result = (Map) Json.parse(json); + + Map user = (Map) result.get("user"); + assertEquals("Alice", user.get("name")); + assertEquals(30, user.get("age")); + assertEquals(true, result.get("active")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseJsonEscapedStrings() { + String json = "{\"text\": \"hello\\nworld\", \"path\": \"c:\\\\test\"}"; + Map result = (Map) Json.parse(json); + + assertEquals("hello\nworld", result.get("text")); + assertEquals("c:\\test", result.get("path")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseJsonUnicode() { + String json = "{\"emoji\": \"\\u0048\\u0065\\u006C\\u006C\\u006F\"}"; + Map result = (Map) Json.parse(json); + assertEquals("Hello", result.get("emoji")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseJsonEmptyObject() { + String json = "{}"; + Map result = (Map) Json.parse(json); + assertTrue(result.isEmpty()); + } + + @Test + @SuppressWarnings("unchecked") + void testParseJsonEmptyArray() { + String json = "[]"; + List result = (List) Json.parse(json); + assertTrue(result.isEmpty()); + } + + @Test + void testParseJsonInvalid() { + assertThrows(IllegalArgumentException.class, () -> Json.parse("{invalid}")); + assertThrows(IllegalArgumentException.class, () -> Json.parse("{\"key\":}")); + assertThrows(IllegalArgumentException.class, () -> Json.parse("")); + } + + // ==================== HTTP Integration Tests ==================== + + @Test + void testGetOAuthTokenSuccess() throws Exception { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse( + new HttpClient.HttpResponse(200, "{\"access_token\": \"my-token\"}", null)); + + String token = + GenerateProto.getOAuthToken("https://endpoint.com", "client", "secret", mockClient); + + assertEquals("my-token", token); + assertEquals("https://endpoint.com/oidc/v1/token", mockClient.lastUrl); + } + + @Test + void testGetOAuthTokenFailure() { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse(new HttpClient.HttpResponse(401, null, "Unauthorized")); + + IOException ex = + assertThrows( + IOException.class, + () -> + GenerateProto.getOAuthToken( + "https://endpoint.com", "client", "secret", mockClient)); + + assertTrue(ex.getMessage().contains("401")); + } + + @Test + void testGetOAuthTokenMissingAccessToken() { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse(new HttpClient.HttpResponse(200, "{\"error\": \"invalid\"}", null)); + + IOException ex = + assertThrows( + IOException.class, + () -> + GenerateProto.getOAuthToken( + "https://endpoint.com", "client", "secret", mockClient)); + + assertTrue(ex.getMessage().contains("No access token")); + } + + @Test + @SuppressWarnings("unchecked") + void testFetchTableInfoSuccess() throws Exception { + MockHttpClient mockClient = new MockHttpClient(); + String tableJson = + "{\"name\": \"test_table\", \"columns\": [{\"name\": \"id\", \"type_text\": \"INT\", \"nullable\": false}]}"; + mockClient.setResponse(new HttpClient.HttpResponse(200, tableJson, null)); + + Map result = + GenerateProto.fetchTableInfo("https://endpoint.com", "token", "cat.sch.tbl", mockClient); + + assertEquals("test_table", result.get("name")); + List> columns = (List>) result.get("columns"); + assertEquals(1, columns.size()); + assertEquals("id", columns.get(0).get("name")); + } + + @Test + void testFetchTableInfoFailure() { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse(new HttpClient.HttpResponse(404, null, "Table not found")); + + IOException ex = + assertThrows( + IOException.class, + () -> + GenerateProto.fetchTableInfo( + "https://endpoint.com", "token", "cat.sch.tbl", mockClient)); + + assertTrue(ex.getMessage().contains("404")); + } + + // ==================== Helper Methods ==================== + + private Map createColumn(String name, String typeText, boolean nullable) { + Map col = new HashMap<>(); + col.put("name", name); + col.put("type_text", typeText); + col.put("nullable", nullable); + return col; + } + + /** Mock HttpClient for testing. */ + private static class MockHttpClient implements HttpClient { + private HttpClient.HttpResponse response; + private String lastUrl; + + void setResponse(HttpClient.HttpResponse response) { + this.response = response; + } + + @Override + public HttpClient.HttpResponse post(String url, String formData, Map headers) { + this.lastUrl = url; + return response; + } + + @Override + public HttpClient.HttpResponse get(String url, Map headers) { + this.lastUrl = url; + return response; + } + } +} diff --git a/common/pom.xml b/common/pom.xml new file mode 100644 index 0000000..d0bd751 --- /dev/null +++ b/common/pom.xml @@ -0,0 +1,68 @@ + + + 4.0.0 + + + com.databricks + zerobus-java-parent + 0.2.0 + + + zerobus-common + jar + Zerobus Common + Common utilities for Databricks Zerobus - HTTP client and JSON utilities + https://github.com/databricks/zerobus-sdk-java + + + + + com.google.code.findbugs + jsr305 + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.jacoco + jacoco-maven-plugin + 0.8.11 + + + + prepare-agent + + + + report + test + + report + + + + + + + diff --git a/common/src/main/java/com/databricks/zerobus/common/http/DefaultHttpClient.java b/common/src/main/java/com/databricks/zerobus/common/http/DefaultHttpClient.java new file mode 100644 index 0000000..90c7ac9 --- /dev/null +++ b/common/src/main/java/com/databricks/zerobus/common/http/DefaultHttpClient.java @@ -0,0 +1,100 @@ +package com.databricks.zerobus.common.http; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import javax.annotation.Nonnull; + +/** + * Default implementation of {@link HttpClient} using {@link HttpURLConnection}. + * + *

This is the production implementation used by the SDK. + * + *

Use the singleton instance via {@link #INSTANCE}. + */ +public class DefaultHttpClient implements HttpClient { + + public static final DefaultHttpClient INSTANCE = new DefaultHttpClient(); + + private DefaultHttpClient() {} + + @Override + @Nonnull + public HttpResponse post( + @Nonnull String url, @Nonnull String formData, @Nonnull Map headers) + throws IOException { + HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); + try { + connection.setRequestMethod("POST"); + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setDoOutput(true); + + for (Map.Entry entry : headers.entrySet()) { + connection.setRequestProperty(entry.getKey(), entry.getValue()); + } + + try (OutputStreamWriter writer = + new OutputStreamWriter(connection.getOutputStream(), StandardCharsets.UTF_8)) { + writer.write(formData); + } + + return readResponse(connection); + } finally { + connection.disconnect(); + } + } + + @Override + @Nonnull + public HttpResponse get(@Nonnull String url, @Nonnull Map headers) + throws IOException { + HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); + try { + connection.setRequestMethod("GET"); + + for (Map.Entry entry : headers.entrySet()) { + connection.setRequestProperty(entry.getKey(), entry.getValue()); + } + + return readResponse(connection); + } finally { + connection.disconnect(); + } + } + + private HttpResponse readResponse(HttpURLConnection connection) throws IOException { + int statusCode = connection.getResponseCode(); + + String body = null; + String errorBody = null; + + if (statusCode >= 200 && statusCode < 300) { + body = readStream(connection.getInputStream()); + } else { + errorBody = readStream(connection.getErrorStream()); + } + + return new HttpResponse(statusCode, body, errorBody); + } + + private String readStream(InputStream stream) throws IOException { + if (stream == null) { + return null; + } + try (BufferedReader reader = + new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) { + StringBuilder builder = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + builder.append(line).append("\n"); + } + return builder.toString(); + } + } +} diff --git a/common/src/main/java/com/databricks/zerobus/common/http/HttpClient.java b/common/src/main/java/com/databricks/zerobus/common/http/HttpClient.java new file mode 100644 index 0000000..0e2651c --- /dev/null +++ b/common/src/main/java/com/databricks/zerobus/common/http/HttpClient.java @@ -0,0 +1,72 @@ +package com.databricks.zerobus.common.http; + +import java.io.IOException; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Interface for HTTP operations. This abstraction allows mocking HTTP calls in tests. + * + *

Implementations should handle connection management and proper resource cleanup. + */ +public interface HttpClient { + + /** + * Performs an HTTP POST request with form data. + * + * @param url The URL to post to + * @param formData The form data (application/x-www-form-urlencoded format) + * @param headers Additional headers to include + * @return The response from the server + * @throws IOException if the request fails + */ + @Nonnull + HttpResponse post( + @Nonnull String url, @Nonnull String formData, @Nonnull Map headers) + throws IOException; + + /** + * Performs an HTTP GET request. + * + * @param url The URL to get + * @param headers Additional headers to include + * @return The response from the server + * @throws IOException if the request fails + */ + @Nonnull + HttpResponse get(@Nonnull String url, @Nonnull Map headers) throws IOException; + + /** Response from an HTTP request. */ + class HttpResponse { + private final int statusCode; + private final String body; + private final String errorBody; + + public HttpResponse(int statusCode, @Nullable String body, @Nullable String errorBody) { + this.statusCode = statusCode; + this.body = body; + this.errorBody = errorBody; + } + + /** Returns the HTTP status code. */ + public int getStatusCode() { + return statusCode; + } + + /** Returns the response body, or null if the request failed. */ + @Nullable public String getBody() { + return body; + } + + /** Returns the error body, or null if the request succeeded. */ + @Nullable public String getErrorBody() { + return errorBody; + } + + /** Returns true if the status code indicates success (2xx). */ + public boolean isSuccess() { + return statusCode >= 200 && statusCode < 300; + } + } +} diff --git a/common/src/main/java/com/databricks/zerobus/common/http/RetryingHttpClient.java b/common/src/main/java/com/databricks/zerobus/common/http/RetryingHttpClient.java new file mode 100644 index 0000000..60a96ab --- /dev/null +++ b/common/src/main/java/com/databricks/zerobus/common/http/RetryingHttpClient.java @@ -0,0 +1,120 @@ +package com.databricks.zerobus.common.http; + +import java.io.IOException; +import java.util.Map; +import javax.annotation.Nonnull; + +/** + * HTTP client decorator that adds retry logic for transient failures. + * + *

This client wraps another {@link HttpClient} and automatically retries requests that fail due + * to transient errors: + * + *

    + *
  • 5xx status codes (server errors) + *
  • IOException (network issues, timeouts, connection refused) + *
+ * + *

Requests are NOT retried for: + * + *

    + *
  • 4xx status codes (client errors like 400, 401, 403, 404) + *
  • Successful responses (2xx) + *
+ * + *

Uses exponential backoff between retries (e.g., 1s, 2s, 4s). + */ +public class RetryingHttpClient implements HttpClient { + + private static final int DEFAULT_MAX_RETRIES = 3; + private static final long DEFAULT_INITIAL_BACKOFF_MS = 1000; + + private final HttpClient delegate; + private final int maxRetries; + private final long initialBackoffMs; + + /** + * Creates a retrying HTTP client with default settings. + * + * @param delegate The underlying HTTP client to use + */ + public RetryingHttpClient(@Nonnull HttpClient delegate) { + this(delegate, DEFAULT_MAX_RETRIES, DEFAULT_INITIAL_BACKOFF_MS); + } + + /** + * Creates a retrying HTTP client with custom settings. + * + * @param delegate The underlying HTTP client to use + * @param maxRetries Maximum number of retry attempts (not including the initial attempt) + * @param initialBackoffMs Initial backoff duration in milliseconds (doubles each retry) + */ + public RetryingHttpClient(@Nonnull HttpClient delegate, int maxRetries, long initialBackoffMs) { + this.delegate = delegate; + this.maxRetries = maxRetries; + this.initialBackoffMs = initialBackoffMs; + } + + @Override + @Nonnull + public HttpResponse post( + @Nonnull String url, @Nonnull String formData, @Nonnull Map headers) + throws IOException { + return executeWithRetry(() -> delegate.post(url, formData, headers)); + } + + @Override + @Nonnull + public HttpResponse get(@Nonnull String url, @Nonnull Map headers) + throws IOException { + return executeWithRetry(() -> delegate.get(url, headers)); + } + + private HttpResponse executeWithRetry(HttpOperation operation) throws IOException { + IOException lastException = null; + long backoffMs = initialBackoffMs; + + for (int attempt = 0; attempt <= maxRetries; attempt++) { + try { + HttpResponse response = operation.execute(); + + if (response.isSuccess()) { + return response; + } + + if (response.getStatusCode() >= 500) { + if (attempt < maxRetries) { + sleep(backoffMs); + backoffMs *= 2; + continue; + } + return response; + } + + return response; + + } catch (IOException e) { + lastException = e; + if (attempt < maxRetries) { + sleep(backoffMs); + backoffMs *= 2; + } + } + } + + throw lastException; + } + + private void sleep(long ms) { + try { + Thread.sleep(ms); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + @FunctionalInterface + private interface HttpOperation { + HttpResponse execute() throws IOException; + } +} diff --git a/common/src/main/java/com/databricks/zerobus/common/json/Json.java b/common/src/main/java/com/databricks/zerobus/common/json/Json.java new file mode 100644 index 0000000..4115466 --- /dev/null +++ b/common/src/main/java/com/databricks/zerobus/common/json/Json.java @@ -0,0 +1,435 @@ +package com.databricks.zerobus.common.json; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Minimal JSON utility for parsing and serializing JSON without external dependencies. + * + *

This class provides basic JSON operations sufficient for SDK needs without adding a dependency + * on a JSON library. + * + *

Supported types: + * + *

    + *
  • Objects → {@code Map} + *
  • Arrays → {@code List} + *
  • Strings → {@code String} + *
  • Numbers → {@code Integer}, {@code Long}, or {@code Double} + *
  • Booleans → {@code Boolean} + *
  • Null → {@code null} + * + * + *

    Example usage: + * + *

    {@code
    + * // Parsing
    + * Map obj = (Map) Json.parse("{\"name\": \"Alice\"}");
    + *
    + * // Serializing
    + * String json = Json.stringify(Map.of("name", "Alice", "age", 30));
    + * }
    + */ +public final class Json { + + private Json() {} + + /** + * Parses a JSON string into a Java object. + * + * @param json The JSON string to parse + * @return The parsed object (Map, List, String, Number, Boolean, or null) + * @throws IllegalArgumentException if the JSON is malformed + */ + @Nullable public static Object parse(@Nonnull String json) { + return new Parser(json).parse(); + } + + /** + * Serializes a Java object to a JSON string. + * + *

    Supported types: + * + *

      + *
    • {@code Map} → JSON object + *
    • {@code Iterable} → JSON array + *
    • {@code String} → JSON string + *
    • {@code Number} → JSON number + *
    • {@code Boolean} → JSON boolean + *
    • {@code null} → JSON null + *
    + * + * @param obj The object to serialize + * @return The JSON string representation + * @throws IllegalArgumentException if the object type is not supported + */ + @Nonnull + public static String stringify(@Nullable Object obj) { + StringBuilder sb = new StringBuilder(); + new Writer(sb).write(obj); + return sb.toString(); + } + + // ==================== Parser ==================== + + /** Minimal JSON parser that handles objects, arrays, strings, numbers, booleans, and nulls. */ + private static class Parser { + private final String json; + private int pos = 0; + + Parser(String json) { + this.json = json.trim(); + } + + Object parse() { + skipWhitespace(); + return parseValue(); + } + + private Object parseValue() { + skipWhitespace(); + char c = peek(); + + if (c == '{') { + return parseObject(); + } else if (c == '[') { + return parseArray(); + } else if (c == '"') { + return parseString(); + } else if (c == 't' || c == 'f') { + return parseBoolean(); + } else if (c == 'n') { + return parseNull(); + } else if (c == '-' || Character.isDigit(c)) { + return parseNumber(); + } else { + throw new IllegalArgumentException("Unexpected character at position " + pos + ": " + c); + } + } + + private Map parseObject() { + Map map = new HashMap<>(); + consume('{'); + skipWhitespace(); + + if (peek() == '}') { + consume('}'); + return map; + } + + while (true) { + skipWhitespace(); + String key = parseString(); + skipWhitespace(); + consume(':'); + skipWhitespace(); + Object value = parseValue(); + map.put(key, value); + + skipWhitespace(); + char c = peek(); + if (c == '}') { + consume('}'); + break; + } else if (c == ',') { + consume(','); + } else { + throw new IllegalArgumentException("Expected ',' or '}' at position " + pos); + } + } + + return map; + } + + private List parseArray() { + List list = new ArrayList<>(); + consume('['); + skipWhitespace(); + + if (peek() == ']') { + consume(']'); + return list; + } + + while (true) { + skipWhitespace(); + list.add(parseValue()); + skipWhitespace(); + + char c = peek(); + if (c == ']') { + consume(']'); + break; + } else if (c == ',') { + consume(','); + } else { + throw new IllegalArgumentException("Expected ',' or ']' at position " + pos); + } + } + + return list; + } + + private String parseString() { + consume('"'); + StringBuilder sb = new StringBuilder(); + + while (pos < json.length()) { + char c = json.charAt(pos); + if (c == '"') { + pos++; + return sb.toString(); + } else if (c == '\\') { + pos++; + if (pos >= json.length()) { + throw new IllegalArgumentException("Unterminated string escape"); + } + char escaped = json.charAt(pos); + switch (escaped) { + case '"': + case '\\': + case '/': + sb.append(escaped); + break; + case 'b': + sb.append('\b'); + break; + case 'f': + sb.append('\f'); + break; + case 'n': + sb.append('\n'); + break; + case 'r': + sb.append('\r'); + break; + case 't': + sb.append('\t'); + break; + case 'u': + if (pos + 4 >= json.length()) { + throw new IllegalArgumentException("Invalid unicode escape"); + } + String hex = json.substring(pos + 1, pos + 5); + sb.append((char) Integer.parseInt(hex, 16)); + pos += 4; + break; + default: + throw new IllegalArgumentException("Invalid escape character: " + escaped); + } + pos++; + } else { + sb.append(c); + pos++; + } + } + + throw new IllegalArgumentException("Unterminated string"); + } + + private Object parseNumber() { + int start = pos; + if (peek() == '-') { + pos++; + } + + while (pos < json.length() + && (Character.isDigit(json.charAt(pos)) + || json.charAt(pos) == '.' + || json.charAt(pos) == 'e' + || json.charAt(pos) == 'E' + || json.charAt(pos) == '+' + || json.charAt(pos) == '-')) { + pos++; + } + + String numStr = json.substring(start, pos); + if (numStr.contains(".") || numStr.contains("e") || numStr.contains("E")) { + return Double.parseDouble(numStr); + } else { + try { + return Integer.parseInt(numStr); + } catch (NumberFormatException e) { + return Long.parseLong(numStr); + } + } + } + + private Boolean parseBoolean() { + if (json.startsWith("true", pos)) { + pos += 4; + return Boolean.TRUE; + } else if (json.startsWith("false", pos)) { + pos += 5; + return Boolean.FALSE; + } else { + throw new IllegalArgumentException("Invalid boolean at position " + pos); + } + } + + private Object parseNull() { + if (json.startsWith("null", pos)) { + pos += 4; + return null; + } else { + throw new IllegalArgumentException("Invalid null at position " + pos); + } + } + + private char peek() { + if (pos >= json.length()) { + throw new IllegalArgumentException("Unexpected end of JSON"); + } + return json.charAt(pos); + } + + private void consume(char expected) { + char c = peek(); + if (c != expected) { + throw new IllegalArgumentException( + "Expected '" + expected + "' but got '" + c + "' at position " + pos); + } + pos++; + } + + private void skipWhitespace() { + while (pos < json.length() && Character.isWhitespace(json.charAt(pos))) { + pos++; + } + } + } + + // ==================== Writer ==================== + + /** Minimal JSON writer that serializes Java objects to JSON strings. */ + private static class Writer { + private final StringBuilder sb; + + Writer(StringBuilder sb) { + this.sb = sb; + } + + void write(Object obj) { + if (obj == null) { + sb.append("null"); + } else if (obj instanceof Map) { + writeObject((Map) obj); + } else if (obj instanceof Iterable) { + writeArray((Iterable) obj); + } else if (obj instanceof String) { + writeString((String) obj); + } else if (obj instanceof Number) { + writeNumber((Number) obj); + } else if (obj instanceof Boolean) { + sb.append(obj.toString()); + } else if (obj.getClass().isArray()) { + writeArrayPrimitive(obj); + } else { + throw new IllegalArgumentException( + "Unsupported type for JSON serialization: " + obj.getClass().getName()); + } + } + + private void writeObject(Map map) { + sb.append('{'); + boolean first = true; + for (Map.Entry entry : map.entrySet()) { + if (!first) { + sb.append(','); + } + first = false; + + Object key = entry.getKey(); + if (!(key instanceof String)) { + throw new IllegalArgumentException( + "JSON object keys must be strings, got: " + key.getClass().getName()); + } + writeString((String) key); + sb.append(':'); + write(entry.getValue()); + } + sb.append('}'); + } + + private void writeArray(Iterable iterable) { + sb.append('['); + boolean first = true; + for (Object item : iterable) { + if (!first) { + sb.append(','); + } + first = false; + write(item); + } + sb.append(']'); + } + + private void writeArrayPrimitive(Object array) { + sb.append('['); + int length = java.lang.reflect.Array.getLength(array); + for (int i = 0; i < length; i++) { + if (i > 0) { + sb.append(','); + } + write(java.lang.reflect.Array.get(array, i)); + } + sb.append(']'); + } + + private void writeString(String str) { + sb.append('"'); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + switch (c) { + case '"': + sb.append("\\\""); + break; + case '\\': + sb.append("\\\\"); + break; + case '\b': + sb.append("\\b"); + break; + case '\f': + sb.append("\\f"); + break; + case '\n': + sb.append("\\n"); + break; + case '\r': + sb.append("\\r"); + break; + case '\t': + sb.append("\\t"); + break; + default: + if (c < 0x20) { + sb.append(String.format("\\u%04x", (int) c)); + } else { + sb.append(c); + } + } + } + sb.append('"'); + } + + private void writeNumber(Number num) { + if (num instanceof Double) { + double d = num.doubleValue(); + if (Double.isInfinite(d) || Double.isNaN(d)) { + throw new IllegalArgumentException("JSON does not support Infinity or NaN"); + } + } else if (num instanceof Float) { + float f = num.floatValue(); + if (Float.isInfinite(f) || Float.isNaN(f)) { + throw new IllegalArgumentException("JSON does not support Infinity or NaN"); + } + } + sb.append(num.toString()); + } + } +} diff --git a/common/src/test/java/com/databricks/zerobus/common/http/DefaultHttpClientTest.java b/common/src/test/java/com/databricks/zerobus/common/http/DefaultHttpClientTest.java new file mode 100644 index 0000000..d0172f9 --- /dev/null +++ b/common/src/test/java/com/databricks/zerobus/common/http/DefaultHttpClientTest.java @@ -0,0 +1,387 @@ +package com.databricks.zerobus.common.http; + +import static org.junit.jupiter.api.Assertions.*; + +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpServer; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for DefaultHttpClient using an embedded HTTP server. */ +class DefaultHttpClientTest { + + private HttpServer server; + private int port; + private DefaultHttpClient client; + + @BeforeEach + void setUp() throws IOException { + server = HttpServer.create(new InetSocketAddress(0), 0); + port = server.getAddress().getPort(); + server.setExecutor(null); + server.start(); + client = DefaultHttpClient.INSTANCE; + } + + @AfterEach + void tearDown() { + if (server != null) { + server.stop(0); + } + } + + private String getBaseUrl() { + return "http://localhost:" + port; + } + + // ==================== POST Tests ==================== + + @Test + void testPostSuccess() throws IOException { + server.createContext( + "/post", + exchange -> { + assertEquals("POST", exchange.getRequestMethod()); + + String requestBody = readRequestBody(exchange); + assertEquals("key=value&foo=bar", requestBody); + + String response = "{\"status\": \"ok\"}"; + sendResponse(exchange, 200, response); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = + client.post(getBaseUrl() + "/post", "key=value&foo=bar", headers); + + assertEquals(200, response.getStatusCode()); + assertTrue(response.isSuccess()); + assertNotNull(response.getBody()); + assertTrue(response.getBody().contains("\"status\": \"ok\"")); + assertNull(response.getErrorBody()); + } + + @Test + void testPostWithHeaders() throws IOException { + server.createContext( + "/post-headers", + exchange -> { + assertEquals("custom-value", exchange.getRequestHeaders().getFirst("X-Custom-Header")); + assertEquals("Bearer token123", exchange.getRequestHeaders().getFirst("Authorization")); + + sendResponse(exchange, 200, "ok"); + }); + + Map headers = new HashMap<>(); + headers.put("X-Custom-Header", "custom-value"); + headers.put("Authorization", "Bearer token123"); + + HttpClient.HttpResponse response = + client.post(getBaseUrl() + "/post-headers", "data=test", headers); + + assertEquals(200, response.getStatusCode()); + assertTrue(response.isSuccess()); + } + + @Test + void testPostClientError() throws IOException { + server.createContext( + "/post-error", + exchange -> { + String errorBody = "{\"error\": \"Bad Request\"}"; + sendResponse(exchange, 400, errorBody); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = + client.post(getBaseUrl() + "/post-error", "invalid=data", headers); + + assertEquals(400, response.getStatusCode()); + assertFalse(response.isSuccess()); + assertNull(response.getBody()); + assertNotNull(response.getErrorBody()); + assertTrue(response.getErrorBody().contains("Bad Request")); + } + + @Test + void testPostServerError() throws IOException { + server.createContext( + "/post-server-error", + exchange -> { + String errorBody = "{\"error\": \"Internal Server Error\"}"; + sendResponse(exchange, 500, errorBody); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = + client.post(getBaseUrl() + "/post-server-error", "data=test", headers); + + assertEquals(500, response.getStatusCode()); + assertFalse(response.isSuccess()); + assertNull(response.getBody()); + assertNotNull(response.getErrorBody()); + } + + @Test + void testPostEmptyResponse() throws IOException { + server.createContext( + "/post-empty", + exchange -> { + exchange.sendResponseHeaders(204, -1); + exchange.close(); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = + client.post(getBaseUrl() + "/post-empty", "data=test", headers); + + assertEquals(204, response.getStatusCode()); + assertTrue(response.isSuccess()); + } + + // ==================== GET Tests ==================== + + @Test + void testGetSuccess() throws IOException { + server.createContext( + "/get", + exchange -> { + assertEquals("GET", exchange.getRequestMethod()); + + String response = "{\"data\": \"hello\"}"; + sendResponse(exchange, 200, response); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/get", headers); + + assertEquals(200, response.getStatusCode()); + assertTrue(response.isSuccess()); + assertNotNull(response.getBody()); + assertTrue(response.getBody().contains("\"data\": \"hello\"")); + assertNull(response.getErrorBody()); + } + + @Test + void testGetWithHeaders() throws IOException { + server.createContext( + "/get-headers", + exchange -> { + assertEquals("application/json", exchange.getRequestHeaders().getFirst("Accept")); + assertEquals("Bearer mytoken", exchange.getRequestHeaders().getFirst("Authorization")); + + sendResponse(exchange, 200, "ok"); + }); + + Map headers = new HashMap<>(); + headers.put("Accept", "application/json"); + headers.put("Authorization", "Bearer mytoken"); + + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/get-headers", headers); + + assertEquals(200, response.getStatusCode()); + assertTrue(response.isSuccess()); + } + + @Test + void testGetNotFound() throws IOException { + server.createContext( + "/get-not-found", + exchange -> { + String errorBody = "{\"error\": \"Not Found\"}"; + sendResponse(exchange, 404, errorBody); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/get-not-found", headers); + + assertEquals(404, response.getStatusCode()); + assertFalse(response.isSuccess()); + assertNull(response.getBody()); + assertNotNull(response.getErrorBody()); + assertTrue(response.getErrorBody().contains("Not Found")); + } + + @Test + void testGetUnauthorized() throws IOException { + server.createContext( + "/get-unauthorized", + exchange -> { + String errorBody = "{\"error\": \"Unauthorized\"}"; + sendResponse(exchange, 401, errorBody); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/get-unauthorized", headers); + + assertEquals(401, response.getStatusCode()); + assertFalse(response.isSuccess()); + } + + // ==================== HttpResponse Tests ==================== + + @Test + void testHttpResponseIsSuccess() { + // 2xx are success + assertTrue(new HttpClient.HttpResponse(200, "body", null).isSuccess()); + assertTrue(new HttpClient.HttpResponse(201, "body", null).isSuccess()); + assertTrue(new HttpClient.HttpResponse(204, null, null).isSuccess()); + assertTrue(new HttpClient.HttpResponse(299, "body", null).isSuccess()); + + // < 200 are not success + assertFalse(new HttpClient.HttpResponse(199, null, "error").isSuccess()); + + // >= 300 are not success + assertFalse(new HttpClient.HttpResponse(300, null, "error").isSuccess()); + assertFalse(new HttpClient.HttpResponse(400, null, "error").isSuccess()); + assertFalse(new HttpClient.HttpResponse(500, null, "error").isSuccess()); + } + + @Test + void testHttpResponseGetters() { + HttpClient.HttpResponse response = new HttpClient.HttpResponse(200, "body content", null); + assertEquals(200, response.getStatusCode()); + assertEquals("body content", response.getBody()); + assertNull(response.getErrorBody()); + + HttpClient.HttpResponse errorResponse = new HttpClient.HttpResponse(500, null, "error content"); + assertEquals(500, errorResponse.getStatusCode()); + assertNull(errorResponse.getBody()); + assertEquals("error content", errorResponse.getErrorBody()); + } + + // ==================== Edge Cases ==================== + + @Test + void testPostConnectionError() { + // Try to connect to a port that's not listening + Map headers = new HashMap<>(); + assertThrows(IOException.class, () -> client.post("http://localhost:1", "data=test", headers)); + } + + @Test + void testGetConnectionError() { + Map headers = new HashMap<>(); + assertThrows(IOException.class, () -> client.get("http://localhost:1", headers)); + } + + @Test + void testPostEmptyHeaders() throws IOException { + server.createContext( + "/post-no-headers", + exchange -> { + sendResponse(exchange, 200, "ok"); + }); + + HttpClient.HttpResponse response = + client.post(getBaseUrl() + "/post-no-headers", "data=test", new HashMap<>()); + + assertEquals(200, response.getStatusCode()); + } + + @Test + void testGetEmptyHeaders() throws IOException { + server.createContext( + "/get-no-headers", + exchange -> { + sendResponse(exchange, 200, "ok"); + }); + + HttpClient.HttpResponse response = + client.get(getBaseUrl() + "/get-no-headers", new HashMap<>()); + + assertEquals(200, response.getStatusCode()); + } + + @Test + void testErrorWithNoBody() throws IOException { + server.createContext( + "/error-no-body", + exchange -> { + // Send error with no body (empty error stream) + exchange.sendResponseHeaders(500, -1); + exchange.close(); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/error-no-body", headers); + + assertEquals(500, response.getStatusCode()); + assertFalse(response.isSuccess()); + assertNull(response.getBody()); + // Error body should be null when no error stream + assertNull(response.getErrorBody()); + } + + @Test + void testMultilineResponse() throws IOException { + server.createContext( + "/multiline", + exchange -> { + String response = "line1\nline2\nline3"; + sendResponse(exchange, 200, response); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/multiline", headers); + + assertEquals(200, response.getStatusCode()); + assertTrue(response.getBody().contains("line1")); + assertTrue(response.getBody().contains("line2")); + assertTrue(response.getBody().contains("line3")); + } + + @Test + void testInformationalResponse() throws IOException { + // Test status code < 200 (informational responses) + // This covers the missing branch in readResponse where statusCode < 200 + server.createContext( + "/informational", + exchange -> { + // Status 199 exercises the branch where statusCode < 200 + // Note: HttpServer forces contentLen=-1 for status < 200 + exchange.sendResponseHeaders(199, -1); + exchange.close(); + }); + + Map headers = new HashMap<>(); + HttpClient.HttpResponse response = client.get(getBaseUrl() + "/informational", headers); + + assertEquals(199, response.getStatusCode()); + assertFalse(response.isSuccess()); + assertNull(response.getBody()); + // Error body is null because HttpServer doesn't send body for status < 200 + assertNull(response.getErrorBody()); + } + + // ==================== Helper Methods ==================== + + private String readRequestBody(HttpExchange exchange) throws IOException { + try (InputStream is = exchange.getRequestBody()) { + StringBuilder sb = new StringBuilder(); + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = is.read(buffer)) != -1) { + sb.append(new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); + } + return sb.toString(); + } + } + + private void sendResponse(HttpExchange exchange, int statusCode, String response) + throws IOException { + byte[] responseBytes = response.getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(statusCode, responseBytes.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(responseBytes); + } + } +} diff --git a/common/src/test/java/com/databricks/zerobus/common/http/RetryingHttpClientTest.java b/common/src/test/java/com/databricks/zerobus/common/http/RetryingHttpClientTest.java new file mode 100644 index 0000000..118cc86 --- /dev/null +++ b/common/src/test/java/com/databricks/zerobus/common/http/RetryingHttpClientTest.java @@ -0,0 +1,277 @@ +package com.databricks.zerobus.common.http; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.zerobus.common.http.HttpClient.HttpResponse; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import org.junit.jupiter.api.Test; + +/** Tests for RetryingHttpClient. */ +class RetryingHttpClientTest { + + private static final Map EMPTY_HEADERS = new HashMap<>(); + + @Test + void testSuccessOnFirstAttempt() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(200, "success", null)); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(200, response.getStatusCode()); + assertEquals("success", response.getBody()); + assertEquals(1, mock.getCallCount()); + } + + @Test + void testClientErrorNotRetried() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(400, null, "Bad Request")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(400, response.getStatusCode()); + assertEquals(1, mock.getCallCount()); // Should NOT retry + } + + @Test + void testUnauthorizedNotRetried() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(401, null, "Unauthorized")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(401, response.getStatusCode()); + assertEquals(1, mock.getCallCount()); // Should NOT retry + } + + @Test + void testForbiddenNotRetried() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(403, null, "Forbidden")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(403, response.getStatusCode()); + assertEquals(1, mock.getCallCount()); // Should NOT retry + } + + @Test + void testServerErrorRetried() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(500, null, "Internal Server Error")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(500, response.getStatusCode()); + assertEquals(4, mock.getCallCount()); // 1 initial + 3 retries + } + + @Test + void testServerErrorSucceedsAfterRetry() throws IOException { + MockHttpClient mock = new MockHttpClient(); + // Fail twice with 500, then succeed + mock.setResponses( + new HttpResponse(500, null, "Error"), + new HttpResponse(500, null, "Error"), + new HttpResponse(200, "success", null)); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(200, response.getStatusCode()); + assertEquals("success", response.getBody()); + assertEquals(3, mock.getCallCount()); + } + + @Test + void testServiceUnavailableRetried() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(503, null, "Service Unavailable")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 2, 1); + HttpResponse response = client.get("http://test", EMPTY_HEADERS); + + assertEquals(503, response.getStatusCode()); + assertEquals(3, mock.getCallCount()); // 1 initial + 2 retries + } + + @Test + void testIOExceptionRetried() { + MockHttpClient mock = new MockHttpClient(); + mock.setException(new IOException("Connection refused")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + + IOException thrown = + assertThrows(IOException.class, () -> client.post("http://test", "data", EMPTY_HEADERS)); + + assertEquals("Connection refused", thrown.getMessage()); + assertEquals(4, mock.getCallCount()); // 1 initial + 3 retries + } + + @Test + void testIOExceptionSucceedsAfterRetry() throws IOException { + MockHttpClient mock = new MockHttpClient(); + // Fail twice with IOException, then succeed + mock.setResponsesWithExceptions( + null, // IOException + null, // IOException + new HttpResponse(200, "success", null)); + mock.setException(new IOException("Network error")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(200, response.getStatusCode()); + assertEquals(3, mock.getCallCount()); + } + + @Test + void testGetMethodRetries() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponses( + new HttpResponse(502, null, "Bad Gateway"), new HttpResponse(200, "success", null)); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 1); + HttpResponse response = client.get("http://test", EMPTY_HEADERS); + + assertEquals(200, response.getStatusCode()); + assertEquals(2, mock.getCallCount()); + } + + @Test + void testZeroRetriesMeansOneAttempt() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(500, null, "Error")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 0, 1); + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + assertEquals(500, response.getStatusCode()); + assertEquals(1, mock.getCallCount()); // Only 1 attempt, no retries + } + + @Test + void testDefaultConstructorUsesDefaults() throws IOException { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(200, "success", null)); + + // Uses default constructor (3 retries, 1000ms backoff) + RetryingHttpClient client = new RetryingHttpClient(mock); + HttpResponse response = client.get("http://test", EMPTY_HEADERS); + + assertEquals(200, response.getStatusCode()); + assertEquals(1, mock.getCallCount()); + } + + @Test + void testThreadInterruptionDuringSleep() throws Exception { + MockHttpClient mock = new MockHttpClient(); + mock.setResponse(new HttpResponse(500, null, "Error")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 3, 100); + + Thread testThread = Thread.currentThread(); + + // Start a thread that will interrupt us during the retry sleep + Thread interrupter = + new Thread( + () -> { + try { + Thread.sleep(50); // Wait for retry to start sleeping + testThread.interrupt(); + } catch (InterruptedException e) { + // ignore + } + }); + interrupter.start(); + + // This should complete despite interruption (interrupt is handled gracefully) + HttpResponse response = client.post("http://test", "data", EMPTY_HEADERS); + + interrupter.join(1000); + + assertEquals(500, response.getStatusCode()); + // Clear interrupted status + Thread.interrupted(); + } + + @Test + void testIOExceptionOnGetMethod() { + MockHttpClient mock = new MockHttpClient(); + mock.setException(new IOException("Network timeout")); + + RetryingHttpClient client = new RetryingHttpClient(mock, 2, 1); + + IOException thrown = + assertThrows(IOException.class, () -> client.get("http://test", EMPTY_HEADERS)); + + assertEquals("Network timeout", thrown.getMessage()); + assertEquals(3, mock.getCallCount()); // 1 initial + 2 retries + } + + /** Mock HttpClient for testing retry behavior. */ + private static class MockHttpClient implements HttpClient { + private final AtomicInteger callCount = new AtomicInteger(0); + private HttpResponse[] responses; + private int responseIndex = 0; + private IOException exception; + + void setResponse(HttpResponse response) { + this.responses = new HttpResponse[] {response}; + } + + void setResponses(HttpResponse... responses) { + this.responses = responses; + } + + void setResponsesWithExceptions(HttpResponse... responses) { + this.responses = responses; + } + + void setException(IOException exception) { + this.exception = exception; + } + + int getCallCount() { + return callCount.get(); + } + + @Override + public HttpResponse post(String url, String formData, Map headers) + throws IOException { + return execute(); + } + + @Override + public HttpResponse get(String url, Map headers) throws IOException { + return execute(); + } + + private HttpResponse execute() throws IOException { + callCount.incrementAndGet(); + + if (responses != null && responseIndex < responses.length) { + HttpResponse response = responses[responseIndex++]; + if (response != null) { + return response; + } + } + + if (exception != null) { + throw exception; + } + + return responses[responses.length - 1]; + } + } +} diff --git a/common/src/test/java/com/databricks/zerobus/common/json/JsonTest.java b/common/src/test/java/com/databricks/zerobus/common/json/JsonTest.java new file mode 100644 index 0000000..fb89c40 --- /dev/null +++ b/common/src/test/java/com/databricks/zerobus/common/json/JsonTest.java @@ -0,0 +1,388 @@ +package com.databricks.zerobus.common.json; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +/** Tests for the Json utility class. */ +class JsonTest { + + // ==================== Parse Tests ==================== + + @Test + @SuppressWarnings("unchecked") + void testParseObject() { + Map result = + (Map) Json.parse("{\"name\": \"test\", \"count\": 42}"); + assertEquals("test", result.get("name")); + assertEquals(42, result.get("count")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseArray() { + List result = (List) Json.parse("[1, 2, 3]"); + assertEquals(3, result.size()); + assertEquals(1, result.get(0)); + assertEquals(2, result.get(1)); + assertEquals(3, result.get(2)); + } + + @Test + void testParseString() { + assertEquals("hello", Json.parse("\"hello\"")); + } + + @Test + void testParseNumbers() { + assertEquals(42, Json.parse("42")); + assertEquals(3.14, Json.parse("3.14")); + assertEquals(-100, Json.parse("-100")); + assertEquals(1e10, Json.parse("1e10")); + assertEquals(1E10, Json.parse("1E10")); + assertEquals(1.5e-3, Json.parse("1.5e-3")); + assertEquals(1.5E+3, Json.parse("1.5E+3")); + } + + @Test + void testParseLongNumbers() { + // Numbers exceeding Integer range should parse as Long + assertEquals(9999999999L, Json.parse("9999999999")); + assertEquals(-9999999999L, Json.parse("-9999999999")); + assertEquals(Long.MAX_VALUE, Json.parse(String.valueOf(Long.MAX_VALUE))); + assertEquals(Long.MIN_VALUE, Json.parse(String.valueOf(Long.MIN_VALUE))); + } + + @Test + void testParseBooleans() { + assertEquals(true, Json.parse("true")); + assertEquals(false, Json.parse("false")); + } + + @Test + void testParseNull() { + assertNull(Json.parse("null")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseNested() { + String json = "{\"user\": {\"name\": \"Alice\", \"age\": 30}, \"active\": true}"; + Map result = (Map) Json.parse(json); + + Map user = (Map) result.get("user"); + assertEquals("Alice", user.get("name")); + assertEquals(30, user.get("age")); + assertEquals(true, result.get("active")); + } + + @Test + @SuppressWarnings("unchecked") + void testParseEscapedStrings() { + Map result = + (Map) Json.parse("{\"text\": \"hello\\nworld\", \"path\": \"c:\\\\test\"}"); + assertEquals("hello\nworld", result.get("text")); + assertEquals("c:\\test", result.get("path")); + } + + @Test + void testParseAllEscapeCharacters() { + // Test all JSON escape sequences + assertEquals("\"", Json.parse("\"\\\"\"")); // \" + assertEquals("\\", Json.parse("\"\\\\\"")); // \\ + assertEquals("/", Json.parse("\"\\/\"")); // \/ + assertEquals("\b", Json.parse("\"\\b\"")); // \b + assertEquals("\f", Json.parse("\"\\f\"")); // \f + assertEquals("\n", Json.parse("\"\\n\"")); // \n + assertEquals("\r", Json.parse("\"\\r\"")); // \r + assertEquals("\t", Json.parse("\"\\t\"")); // \t + } + + @Test + void testParseUnicodeEscapes() { + assertEquals("A", Json.parse("\"\\u0041\"")); // Basic ASCII + assertEquals("\u00E9", Json.parse("\"\\u00e9\"")); // é (lowercase hex) + assertEquals("\u00E9", Json.parse("\"\\u00E9\"")); // é (uppercase hex) + assertEquals("\u4E2D", Json.parse("\"\\u4E2D\"")); // 中 (Chinese character) + assertEquals("Hello\u0020World", Json.parse("\"Hello\\u0020World\"")); // Space as unicode + } + + @Test + void testParseEmptyStructures() { + // Empty object + @SuppressWarnings("unchecked") + Map emptyObj = (Map) Json.parse("{}"); + assertTrue(emptyObj.isEmpty()); + + // Empty array + @SuppressWarnings("unchecked") + List emptyArr = (List) Json.parse("[]"); + assertTrue(emptyArr.isEmpty()); + } + + @Test + void testParseWithWhitespace() { + // Various whitespace around elements + assertEquals(42, Json.parse(" 42 ")); + assertEquals("test", Json.parse(" \"test\" ")); + + @SuppressWarnings("unchecked") + Map obj = (Map) Json.parse(" { \"key\" : \"value\" } "); + assertEquals("value", obj.get("key")); + + @SuppressWarnings("unchecked") + List arr = (List) Json.parse(" [ 1 , 2 , 3 ] "); + assertEquals(3, arr.size()); + } + + @Test + void testParseInvalid() { + assertThrows(IllegalArgumentException.class, () -> Json.parse("{invalid}")); + assertThrows(IllegalArgumentException.class, () -> Json.parse("{\"key\":}")); + assertThrows(IllegalArgumentException.class, () -> Json.parse("")); + } + + @Test + void testParseInvalidEscapes() { + // Invalid escape character + assertThrows(IllegalArgumentException.class, () -> Json.parse("\"\\x\"")); + // Unterminated string escape + assertThrows(IllegalArgumentException.class, () -> Json.parse("\"test\\")); + // Invalid unicode escape (too short) + assertThrows(IllegalArgumentException.class, () -> Json.parse("\"\\u00\"")); + // Unterminated string + assertThrows(IllegalArgumentException.class, () -> Json.parse("\"unterminated")); + } + + @Test + void testParseInvalidStructures() { + // Missing closing brace + assertThrows(IllegalArgumentException.class, () -> Json.parse("{\"key\": \"value\"")); + // Missing closing bracket + assertThrows(IllegalArgumentException.class, () -> Json.parse("[1, 2, 3")); + // Invalid boolean + assertThrows(IllegalArgumentException.class, () -> Json.parse("tru")); + // Invalid null + assertThrows(IllegalArgumentException.class, () -> Json.parse("nul")); + // Unexpected character + assertThrows(IllegalArgumentException.class, () -> Json.parse("@invalid")); + // Invalid character after object value (not ',' or '}') + assertThrows(IllegalArgumentException.class, () -> Json.parse("{\"key\": \"value\" @}")); + // Invalid character after array value (not ',' or ']') + assertThrows(IllegalArgumentException.class, () -> Json.parse("[1 2 3]")); + } + + // ==================== Stringify Tests ==================== + + @Test + void testStringifyNull() { + assertEquals("null", Json.stringify(null)); + } + + @Test + void testStringifyString() { + assertEquals("\"hello\"", Json.stringify("hello")); + } + + @Test + void testStringifyStringWithEscapes() { + assertEquals("\"hello\\nworld\"", Json.stringify("hello\nworld")); + assertEquals("\"hello\\tworld\"", Json.stringify("hello\tworld")); + assertEquals("\"hello\\\"world\"", Json.stringify("hello\"world")); + assertEquals("\"hello\\\\world\"", Json.stringify("hello\\world")); + } + + @Test + void testStringifyAllEscapeCharacters() { + // All characters that need escaping + assertEquals("\"\\b\"", Json.stringify("\b")); // backspace + assertEquals("\"\\f\"", Json.stringify("\f")); // form feed + assertEquals("\"\\n\"", Json.stringify("\n")); // newline + assertEquals("\"\\r\"", Json.stringify("\r")); // carriage return + assertEquals("\"\\t\"", Json.stringify("\t")); // tab + assertEquals("\"\\\"\"", Json.stringify("\"")); // quote + assertEquals("\"\\\\\"", Json.stringify("\\")); // backslash + } + + @Test + void testStringifyControlCharacters() { + // Control characters (< 0x20) should be unicode-escaped + assertEquals("\"\\u0000\"", Json.stringify("\u0000")); // null + assertEquals("\"\\u0001\"", Json.stringify("\u0001")); // SOH + assertEquals("\"\\u001f\"", Json.stringify("\u001F")); // unit separator + // But 0x20 (space) and above should not be escaped + assertEquals("\" \"", Json.stringify(" ")); // space (0x20) + } + + @Test + void testStringifyNumbers() { + assertEquals("42", Json.stringify(42)); + assertEquals("3.14", Json.stringify(3.14)); + assertEquals("-100", Json.stringify(-100)); + assertEquals("9999999999", Json.stringify(9999999999L)); + } + + @Test + void testStringifyAllNumberTypes() { + // Byte + assertEquals("127", Json.stringify((byte) 127)); + assertEquals("-128", Json.stringify((byte) -128)); + + // Short + assertEquals("32767", Json.stringify((short) 32767)); + assertEquals("-32768", Json.stringify((short) -32768)); + + // Integer + assertEquals("2147483647", Json.stringify(Integer.MAX_VALUE)); + assertEquals("-2147483648", Json.stringify(Integer.MIN_VALUE)); + + // Long + assertEquals("9223372036854775807", Json.stringify(Long.MAX_VALUE)); + assertEquals("-9223372036854775808", Json.stringify(Long.MIN_VALUE)); + + // Float + assertEquals("3.14", Json.stringify(3.14f)); + assertEquals("-1.5", Json.stringify(-1.5f)); + + // Double + assertEquals("3.141592653589793", Json.stringify(3.141592653589793)); + } + + @Test + void testStringifyBooleans() { + assertEquals("true", Json.stringify(true)); + assertEquals("false", Json.stringify(false)); + } + + @Test + void testStringifyList() { + assertEquals("[1,2,3]", Json.stringify(Arrays.asList(1, 2, 3))); + assertEquals("[\"a\",\"b\"]", Json.stringify(Arrays.asList("a", "b"))); + assertEquals("[]", Json.stringify(Arrays.asList())); + } + + @Test + void testStringifyArray() { + assertEquals("[1,2,3]", Json.stringify(new int[] {1, 2, 3})); + assertEquals("[\"a\",\"b\"]", Json.stringify(new String[] {"a", "b"})); + } + + @Test + void testStringifyMap() { + // Use LinkedHashMap to ensure consistent ordering for test + Map map = new LinkedHashMap<>(); + map.put("name", "Alice"); + map.put("age", 30); + assertEquals("{\"name\":\"Alice\",\"age\":30}", Json.stringify(map)); + } + + @Test + void testStringifyEmptyMap() { + assertEquals("{}", Json.stringify(new HashMap<>())); + } + + @Test + void testStringifyNestedMap() { + Map inner = new LinkedHashMap<>(); + inner.put("city", "NYC"); + + Map outer = new LinkedHashMap<>(); + outer.put("name", "Bob"); + outer.put("address", inner); + + assertEquals("{\"name\":\"Bob\",\"address\":{\"city\":\"NYC\"}}", Json.stringify(outer)); + } + + @Test + void testStringifyMapWithList() { + Map map = new LinkedHashMap<>(); + map.put("tags", Arrays.asList("a", "b", "c")); + assertEquals("{\"tags\":[\"a\",\"b\",\"c\"]}", Json.stringify(map)); + } + + @Test + void testStringifyMapWithNull() { + Map map = new LinkedHashMap<>(); + map.put("value", null); + assertEquals("{\"value\":null}", Json.stringify(map)); + } + + @Test + void testStringifyInvalidType() { + // Custom object without toString representation + Object custom = new Object() {}; + assertThrows(IllegalArgumentException.class, () -> Json.stringify(custom)); + } + + @Test + void testStringifyInvalidDoubleInfinity() { + assertThrows(IllegalArgumentException.class, () -> Json.stringify(Double.POSITIVE_INFINITY)); + assertThrows(IllegalArgumentException.class, () -> Json.stringify(Double.NEGATIVE_INFINITY)); + assertThrows(IllegalArgumentException.class, () -> Json.stringify(Double.NaN)); + } + + @Test + void testStringifyInvalidFloatInfinity() { + assertThrows(IllegalArgumentException.class, () -> Json.stringify(Float.POSITIVE_INFINITY)); + assertThrows(IllegalArgumentException.class, () -> Json.stringify(Float.NEGATIVE_INFINITY)); + assertThrows(IllegalArgumentException.class, () -> Json.stringify(Float.NaN)); + } + + @Test + void testStringifyNonStringKey() { + Map map = new HashMap<>(); + map.put(1, "one"); + assertThrows(IllegalArgumentException.class, () -> Json.stringify(map)); + } + + // ==================== Round-trip Tests ==================== + + @Test + @SuppressWarnings("unchecked") + void testRoundTripSimple() { + Map original = new LinkedHashMap<>(); + original.put("name", "Test"); + original.put("count", 42); + original.put("active", true); + + String json = Json.stringify(original); + Map parsed = (Map) Json.parse(json); + + assertEquals("Test", parsed.get("name")); + assertEquals(42, parsed.get("count")); + assertEquals(true, parsed.get("active")); + } + + @Test + @SuppressWarnings("unchecked") + void testRoundTripComplex() { + Map alice = new LinkedHashMap<>(); + alice.put("name", "Alice"); + alice.put("age", 30); + + Map bob = new LinkedHashMap<>(); + bob.put("name", "Bob"); + bob.put("age", 25); + + Map metadata = new LinkedHashMap<>(); + metadata.put("version", 1); + + Map original = new LinkedHashMap<>(); + original.put("users", Arrays.asList(alice, bob)); + original.put("metadata", metadata); + + String json = Json.stringify(original); + Map parsed = (Map) Json.parse(json); + + List users = (List) parsed.get("users"); + assertEquals(2, users.size()); + + Map parsedAlice = (Map) users.get(0); + assertEquals("Alice", parsedAlice.get("name")); + } +} diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 183bf69..0000000 --- a/examples/README.md +++ /dev/null @@ -1,129 +0,0 @@ -# Zerobus SDK Examples - -This directory contains example applications demonstrating different usage patterns of the Zerobus Ingest SDK for Java. - -## Examples - -### 1. Blocking Ingestion (`BlockingIngestionExample.java`) - -Demonstrates synchronous record ingestion where each record is waited for before proceeding to the next. - -**Best for:** -- Low-volume ingestion (< 1000 records/sec) -- Use cases requiring immediate confirmation per record -- Critical data where you need to handle errors immediately - -**Key features:** -- Waits for each record to be durably written -- Simple error handling -- Predictable behavior -- Lower throughput - -**Run:** -```bash -javac -cp "../target/databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar" \ - src/main/java/com/databricks/zerobus/examples/BlockingIngestionExample.java - -java -cp "../target/databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar:src/main/java" \ - com.databricks.zerobus.examples.BlockingIngestionExample -``` - -### 2. Non-Blocking Ingestion (`NonBlockingIngestionExample.java`) - -Demonstrates asynchronous record ingestion for maximum throughput. - -**Best for:** -- High-volume ingestion (> 10,000 records/sec) -- Batch processing scenarios -- Stream processing applications -- Maximum throughput requirements - -**Key features:** -- Asynchronous ingestion with CompletableFutures -- Automatic buffering and flow control -- Ack callback for progress tracking -- Batch flush at the end -- Higher throughput - -**Run:** -```bash -javac -cp "../target/databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar" \ - src/main/java/com/databricks/zerobus/examples/NonBlockingIngestionExample.java - -java -cp "../target/databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar:src/main/java" \ - com.databricks.zerobus.examples.NonBlockingIngestionExample -``` - -## Configuration - -Before running the examples, update the following constants in each example file: - -```java -private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; -private static final String UNITY_CATALOG_ENDPOINT = "https://your-workspace.cloud.databricks.com"; -private static final String TABLE_NAME = "catalog.schema.table"; -private static final String CLIENT_ID = "your-oauth-client-id"; -private static final String CLIENT_SECRET = "your-oauth-client-secret"; -``` - -## Protobuf Schema - -The examples use an `AirQuality` message defined as: - -```proto -syntax = "proto2"; - -message AirQuality { - optional string device_name = 1; - optional int32 temp = 2; - optional int64 humidity = 3; -} -``` - -To use your own schema: -1. Define your `.proto` file -2. Generate Java classes: `protoc --java_out=. your_schema.proto` -3. Update the examples to use your message type instead of `Record.AirQuality` - -## Performance Comparison - -Typical performance characteristics (results may vary): - -| Metric | Blocking | Non-Blocking | -|--------|----------|--------------| -| Throughput | ~100-500 records/sec | ~10,000-50,000 records/sec | -| Latency (avg) | Low per record | Higher per record, lower overall | -| Memory usage | Low | Medium (buffering) | -| Complexity | Simple | Moderate | -| Error handling | Immediate | Deferred to flush | - -## Best Practices - -1. **Choose the right pattern**: Use blocking for low-volume/critical data, non-blocking for high-volume -2. **Monitor progress**: Use `ackCallback` in non-blocking mode to track progress -3. **Handle errors**: Always wrap ingestion in try-catch blocks -4. **Close streams**: Always close streams in a `finally` block or use try-with-resources -5. **Tune buffer size**: Adjust `maxInflightRecords` based on your throughput needs - -## Common Issues - -### Out of Memory -Increase JVM heap size: -```bash -java -Xmx4g -cp ... com.databricks.zerobus.examples.NonBlockingIngestionExample -``` - -### Authentication Failures -- Verify your CLIENT_ID and CLIENT_SECRET are correct -- Check that your OAuth client has permissions for the target table - -### Slow Performance -- Use non-blocking mode for better throughput -- Increase `maxInflightRecords` in stream configuration -- Check network connectivity to the Zerobus endpoint - -## Additional Resources - -- [SDK Documentation](../README.md) -- [Protocol Buffers Guide](https://developers.google.com/protocol-buffers) -- [Databricks Documentation](https://docs.databricks.com) diff --git a/examples/src/main/java/com/databricks/zerobus/examples/BlockingIngestionExample.java b/examples/src/main/java/com/databricks/zerobus/examples/BlockingIngestionExample.java deleted file mode 100644 index 7762dbb..0000000 --- a/examples/src/main/java/com/databricks/zerobus/examples/BlockingIngestionExample.java +++ /dev/null @@ -1,111 +0,0 @@ -package com.databricks.zerobus.examples; - -import com.databricks.zerobus.*; - -/** - * Example demonstrating blocking (synchronous) record ingestion. - * - *

    This example shows how to ingest records synchronously, waiting for each - * record to be durably written before proceeding to the next one. This approach - * provides the strongest durability guarantees but has lower throughput compared - * to non-blocking ingestion. - * - *

    Use Case: Best for low-volume ingestion where durability is critical - * and you need immediate confirmation of each write. - */ -public class BlockingIngestionExample { - - // Configuration - update these with your values - private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; - private static final String UNITY_CATALOG_ENDPOINT = "https://your-workspace.cloud.databricks.com"; - private static final String TABLE_NAME = "catalog.schema.table"; - private static final String CLIENT_ID = "your-oauth-client-id"; - private static final String CLIENT_SECRET = "your-oauth-client-secret"; - - // Number of records to ingest - private static final int NUM_RECORDS = 1000; - - public static void main(String[] args) { - System.out.println("Starting blocking ingestion example..."); - System.out.println("==========================================="); - - try { - // Step 1: Initialize the SDK - ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); - System.out.println("✓ SDK initialized"); - - // Step 2: Define table properties with your protobuf message type - // Note: Replace Record.AirQuality with your own protobuf message class - TableProperties tableProperties = new TableProperties<>( - TABLE_NAME, - Record.AirQuality.getDefaultInstance() - ); - System.out.println("✓ Table properties configured"); - - // Step 3: Create a stream with default configuration - ZerobusStream stream = sdk.createStream( - tableProperties, - CLIENT_ID, - CLIENT_SECRET - ).join(); - System.out.println("✓ Stream created: " + stream.getStreamId()); - - // Step 4: Ingest records synchronously - System.out.println("\nIngesting " + NUM_RECORDS + " records (blocking mode)..."); - long startTime = System.currentTimeMillis(); - int successCount = 0; - - try { - for (int i = 0; i < NUM_RECORDS; i++) { - // Create a record - Record.AirQuality record = Record.AirQuality.newBuilder() - .setDeviceName("sensor-" + (i % 10)) - .setTemp(20 + (i % 15)) - .setHumidity(50 + (i % 40)) - .build(); - - // Ingest and wait for durability - stream.ingestRecord(record).join(); - - successCount++; - - // Progress indicator - if ((i + 1) % 100 == 0) { - System.out.println(" Ingested " + (i + 1) + " records"); - } - } - - long endTime = System.currentTimeMillis(); - double durationSeconds = (endTime - startTime) / 1000.0; - double recordsPerSecond = NUM_RECORDS / durationSeconds; - - // Step 5: Close the stream - stream.close(); - System.out.println("\n✓ Stream closed"); - - // Print summary - System.out.println("\n==========================================="); - System.out.println("Ingestion Summary:"); - System.out.println(" Total records: " + NUM_RECORDS); - System.out.println(" Successful: " + successCount); - System.out.println(" Failed: " + (NUM_RECORDS - successCount)); - System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); - System.out.println(" Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); - System.out.println("==========================================="); - - } catch (Exception e) { - System.err.println("\n✗ Error during ingestion: " + e.getMessage()); - e.printStackTrace(); - stream.close(); - System.exit(1); - } - - } catch (ZerobusException e) { - System.err.println("\n✗ Failed to initialize stream: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - - System.out.println("\nBlocking ingestion example completed successfully!"); - } -} diff --git a/examples/src/main/java/com/databricks/zerobus/examples/NonBlockingIngestionExample.java b/examples/src/main/java/com/databricks/zerobus/examples/NonBlockingIngestionExample.java deleted file mode 100644 index 3d0c305..0000000 --- a/examples/src/main/java/com/databricks/zerobus/examples/NonBlockingIngestionExample.java +++ /dev/null @@ -1,157 +0,0 @@ -package com.databricks.zerobus.examples; - -import com.databricks.zerobus.*; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.function.Consumer; - -/** - * Example demonstrating non-blocking (asynchronous) record ingestion. - * - *

    This example shows how to ingest records asynchronously, allowing maximum - * throughput by not waiting for each record to complete before submitting the next. - * The SDK manages buffering and flow control automatically. - * - *

    Use Case: Best for high-volume ingestion where maximum throughput is - * important. Records are still durably written, but acknowledgments are handled - * asynchronously. - */ -public class NonBlockingIngestionExample { - - // Configuration - update these with your values - private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; - private static final String UNITY_CATALOG_ENDPOINT = "https://your-workspace.cloud.databricks.com"; - private static final String TABLE_NAME = "catalog.schema.table"; - private static final String CLIENT_ID = "your-oauth-client-id"; - private static final String CLIENT_SECRET = "your-oauth-client-secret"; - - // Number of records to ingest - private static final int NUM_RECORDS = 100_000; - - public static void main(String[] args) { - System.out.println("Starting non-blocking ingestion example..."); - System.out.println("==========================================="); - - try { - // Step 1: Initialize the SDK - ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); - System.out.println("✓ SDK initialized"); - - // Step 2: Configure stream options with ack callback - StreamConfigurationOptions options = StreamConfigurationOptions.builder() - .setMaxInflightRecords(50_000) // Allow 50k records in flight - .setRecovery(true) // Enable automatic recovery - .setAckCallback(createAckCallback()) // Track acknowledgments - .build(); - System.out.println("✓ Stream configuration created"); - - // Step 3: Define table properties with your protobuf message type - // Note: Replace Record.AirQuality with your own protobuf message class - TableProperties tableProperties = new TableProperties<>( - TABLE_NAME, - Record.AirQuality.getDefaultInstance() - ); - System.out.println("✓ Table properties configured"); - - // Step 4: Create a stream - ZerobusStream stream = sdk.createStream( - tableProperties, - CLIENT_ID, - CLIENT_SECRET, - options - ).join(); - System.out.println("✓ Stream created: " + stream.getStreamId()); - - // Step 5: Ingest records asynchronously - System.out.println("\nIngesting " + NUM_RECORDS + " records (non-blocking mode)..."); - List> futures = new ArrayList<>(); - long startTime = System.currentTimeMillis(); - - try { - for (int i = 0; i < NUM_RECORDS; i++) { - // Create a record with varying data - Record.AirQuality record = Record.AirQuality.newBuilder() - .setDeviceName("sensor-" + (i % 10)) - .setTemp(20 + (i % 15)) - .setHumidity(50 + (i % 40)) - .build(); - - // Ingest record and collect future for durability later - futures.add(stream.ingestRecord(record)); - - // Progress indicator - if ((i + 1) % 10000 == 0) { - System.out.println(" Submitted " + (i + 1) + " records"); - } - } - - long submitEndTime = System.currentTimeMillis(); - double submitDuration = (submitEndTime - startTime) / 1000.0; - - System.out.println("\n✓ All records submitted in " + - String.format("%.2f", submitDuration) + " seconds"); - - // Step 6: Flush and wait for all records to be durably written - System.out.println("\nFlushing stream and waiting for durability..."); - stream.flush(); - - // Wait for all futures to complete - CompletableFuture allFutures = CompletableFuture.allOf( - futures.toArray(new CompletableFuture[0]) - ); - allFutures.join(); - - long endTime = System.currentTimeMillis(); - double totalDuration = (endTime - startTime) / 1000.0; - double recordsPerSecond = NUM_RECORDS / totalDuration; - - System.out.println("✓ All records durably written"); - - // Step 7: Close the stream - stream.close(); - System.out.println("✓ Stream closed"); - - // Print summary - System.out.println("\n==========================================="); - System.out.println("Ingestion Summary:"); - System.out.println(" Total records: " + NUM_RECORDS); - System.out.println(" Submit time: " + String.format("%.2f", submitDuration) + " seconds"); - System.out.println(" Total time: " + String.format("%.2f", totalDuration) + " seconds"); - System.out.println(" Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); - System.out.println(" Average latency: " + - String.format("%.2f", (totalDuration * 1000.0) / NUM_RECORDS) + " ms/record"); - System.out.println("==========================================="); - - } catch (Exception e) { - System.err.println("\n✗ Error during ingestion: " + e.getMessage()); - e.printStackTrace(); - stream.close(); - System.exit(1); - } - - } catch (ZerobusException e) { - System.err.println("\n✗ Failed to initialize stream: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - - System.out.println("\nNon-blocking ingestion example completed successfully!"); - } - - /** - * Creates an acknowledgment callback that logs progress. - * - * @return Consumer that handles acknowledgment responses - */ - private static Consumer createAckCallback() { - return response -> { - long offset = response.getDurabilityAckUpToOffset(); - // Log every 10000 records - if (offset % 10000 == 0) { - System.out.println(" Acknowledged up to offset: " + offset); - } - }; - } -} diff --git a/pom.xml b/pom.xml index 0f543ff..beb4683 100644 --- a/pom.xml +++ b/pom.xml @@ -3,11 +3,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 com.databricks - zerobus-ingest-sdk - 0.1.0 - jar - Zerobus Ingest SDK for Java - Databricks Zerobus Ingest SDK for Java - Direct ingestion to Delta tables + zerobus-java-parent + 0.2.0 + pom + Zerobus Java Parent + Parent POM for Databricks Zerobus Java projects https://github.com/databricks/zerobus-sdk-java @@ -27,272 +27,166 @@ scm:git:ssh://github.com:databricks/zerobus-sdk-java.git https://github.com/databricks/zerobus-sdk-java/tree/main + + + common + sdk + sdk/examples + cli + + - 1.8 - 1.8 UTF-8 + 4.33.0 + 1.76.0 + 2.0.17 + 5.10.0 + 5.5.0 - - - - com.google.protobuf - protobuf-java - 4.33.0 - - - - io.grpc - grpc-netty-shaded - 1.76.0 - - - io.grpc - grpc-protobuf - 1.76.0 - - - io.grpc - grpc-stub - 1.76.0 - - - - javax.annotation - javax.annotation-api - 1.3.2 - - - - org.slf4j - slf4j-api - 2.0.17 - - - - org.slf4j - slf4j-simple - 2.0.17 - test - - - - org.junit.jupiter - junit-jupiter-api - 5.10.0 - test - - - org.junit.jupiter - junit-jupiter-engine - 5.10.0 - test - - - org.mockito - mockito-core - 5.5.0 - test - - - org.mockito - mockito-junit-jupiter - 5.5.0 - test - - - io.grpc - grpc-testing - 1.76.0 - test - - + + + + + + com.google.protobuf + protobuf-java + ${protobuf.version} + + + + io.grpc + grpc-netty-shaded + ${grpc.version} + + + io.grpc + grpc-protobuf + ${grpc.version} + + + io.grpc + grpc-stub + ${grpc.version} + + + + javax.annotation + javax.annotation-api + 1.3.2 + + + + com.google.code.findbugs + jsr305 + 3.0.2 + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + org.slf4j + slf4j-simple + ${slf4j.version} + + + + org.slf4j + slf4j-nop + ${slf4j.version} + + + + org.junit.jupiter + junit-jupiter-api + ${junit.version} + + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + + + org.mockito + mockito-core + ${mockito.version} + + + org.mockito + mockito-junit-jupiter + ${mockito.version} + + + io.grpc + grpc-testing + ${grpc.version} + + + + - - - - org.xolstice.maven.plugins - protobuf-maven-plugin - 0.6.1 - - com.google.protobuf:protoc:4.33.0:exe:${os.detected.classifier} - grpc-java - io.grpc:protoc-gen-grpc-java:1.76.0:exe:${os.detected.classifier} - - - - compile-protobuf - - compile - compile-custom - - - - test-compile-protobuf - - test-compile - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.14.1 - - 1.8 - 1.8 - - - - com.diffplug.spotless - spotless-maven-plugin - - 2.30.0 - - - - - - - - - - - pom.xml - - - false - false - - - true - true - true - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 3.2.1 - - - org.apache.maven.plugins - maven-jar-plugin - 3.3.0 - - - - org.apache.maven.plugins - maven-shade-plugin - 3.5.1 - - - - shade - - package - - true - jar-with-dependencies - - - - ${project.name} - ${project.version} - com.databricks.zerobus.tools.GenerateProto - - - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - org.apache.maven.plugins - maven-source-plugin - 3.3.0 - - - attach-sources - - jar-no-fork - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.6.2 - - none - true - - - - attach-javadocs - - jar - - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 3.1.0 - - - sign-artifacts - - sign - - verify - - - --pinentry-mode - loopback - - - - - - - - org.sonatype.central - central-publishing-maven-plugin - 0.5.0 - true - - central - false - - - - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.14.1 + + 8 + + + + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 + + + + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.1 + + + org.apache.maven.plugins + maven-jar-plugin + 3.3.0 + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + org.apache.maven.plugins + maven-source-plugin + 3.3.0 + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.6.2 + + + org.apache.maven.plugins + maven-gpg-plugin + 3.1.0 + + + org.sonatype.central + central-publishing-maven-plugin + 0.5.0 + + + diff --git a/sdk/examples/README.md b/sdk/examples/README.md new file mode 100644 index 0000000..0844fe4 --- /dev/null +++ b/sdk/examples/README.md @@ -0,0 +1,246 @@ +# Zerobus SDK Examples + +This directory contains example applications demonstrating different usage patterns of the Zerobus Ingest SDK for Java. + +## Examples + +The examples are organized by record type and schema approach: + +### Protobuf Records (`proto/`) + +#### Compiled Schema (`proto/compiled/`) + +Use compiled protobuf classes generated from `.proto` files. Best for type-safe development with IDE autocompletion. + +| Example | Description | +|---------|-------------| +| `SingleRecordExample.java` | Ingest protobuf records one at a time using generated classes | +| `BatchRecordExample.java` | Ingest protobuf records in batches using generated classes | + +#### Dynamic Schema (`proto/dynamic/`) + +Use runtime-loaded protobuf descriptors with `DynamicMessage`. Best for multi-tenant systems, schema registries, or generic data pipelines. + +| Example | Description | +|---------|-------------| +| `SingleRecordExample.java` | Ingest records one at a time with runtime schema | +| `BatchRecordExample.java` | Ingest records in batches with runtime schema | + +### JSON Records (`json/`) + +| Example | Description | +|---------|-------------| +| `SingleRecordExample.java` | Ingest JSON records one at a time | +| `BatchRecordExample.java` | Ingest JSON records in batches | + +## Configuration + +Before running the examples, update the following constants in each example file: + +```java +private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; +private static final String UNITY_CATALOG_ENDPOINT = "https://your-workspace.cloud.databricks.com"; +private static final String TABLE_NAME = "catalog.schema.table"; +private static final String CLIENT_ID = "your-oauth-client-id"; +private static final String CLIENT_SECRET = "your-oauth-client-secret"; +``` + +## Protobuf Schema + +### Compiled Schema Examples + +The static schema examples use an `AirQuality` message defined in `record.proto`: + +```proto +syntax = "proto2"; + +message AirQuality { + optional string device_name = 1; + optional int32 temp = 2; + optional int64 humidity = 3; +} +``` + +To use your own schema: +1. Define your `.proto` file +2. Generate Java classes: `protoc --java_out=. your_schema.proto` +3. Update the examples to use your message type instead of `Record.AirQuality` + +### Dynamic Schema Examples + +The dynamic schema examples build protobuf descriptors programmatically at runtime using the Protocol Buffers `DescriptorProtos` API: + +```java +// Build descriptor programmatically +DescriptorProtos.DescriptorProto descriptorProto = DescriptorProtos.DescriptorProto.newBuilder() + .setName("AirQuality") + .addField(DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("device_name").setNumber(1) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_STRING) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL)) + .addField(DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("temp").setNumber(2) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL)) + .build(); + +// Create runtime descriptor +FileDescriptorProto fileDescriptorProto = FileDescriptorProto.newBuilder() + .addMessageType(descriptorProto) + .build(); +FileDescriptor fileDescriptor = FileDescriptor.buildFrom(fileDescriptorProto, new FileDescriptor[]{}); +Descriptors.Descriptor descriptor = fileDescriptor.findMessageTypeByName("AirQuality"); +``` + +This approach is ideal for: +- Multi-tenant systems where schemas vary per tenant +- Schema registries that provide schema definitions at runtime +- Generic data pipelines that need runtime flexibility + +## Running the Examples + +The examples are part of a Maven module and can be run using Maven from the repository root. + +### Build First + +```bash +# From the repository root, build all modules +mvn clean compile +``` + +### Proto Compiled Schema Examples +```bash +# Single record +mvn exec:java -pl sdk/examples \ + -Dexec.mainClass=com.databricks.zerobus.examples.proto.compiled.SingleRecordExample + +# Batch +mvn exec:java -pl sdk/examples \ + -Dexec.mainClass=com.databricks.zerobus.examples.proto.compiled.BatchRecordExample +``` + +### Proto Dynamic Schema Examples +```bash +# Single record +mvn exec:java -pl sdk/examples \ + -Dexec.mainClass=com.databricks.zerobus.examples.proto.dynamic.SingleRecordExample + +# Batch +mvn exec:java -pl sdk/examples \ + -Dexec.mainClass=com.databricks.zerobus.examples.proto.dynamic.BatchRecordExample +``` + +### JSON Examples +```bash +# Single record +mvn exec:java -pl sdk/examples \ + -Dexec.mainClass=com.databricks.zerobus.examples.json.SingleRecordExample + +# Batch +mvn exec:java -pl sdk/examples \ + -Dexec.mainClass=com.databricks.zerobus.examples.json.BatchRecordExample +``` + +## Advanced Configuration + +### Custom Authentication + +For custom authentication strategies, implement the `HeadersProvider` interface: + +```java +HeadersProvider customProvider = new HeadersProvider() { + @Override + public Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("authorization", "Bearer " + getMyToken()); + headers.put("x-databricks-zerobus-table-name", TABLE_NAME); + return headers; + } +}; + +// Using the fluent builder API with custom headers provider +ProtoZerobusStream stream = sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .headersProvider(customProvider) // Custom authentication + .compiledProto(Record.AirQuality.getDefaultInstance()) + .build() + .join(); +``` + +### Custom TLS Configuration + +For custom TLS settings, extend the `TlsConfig` class: + +```java +TlsConfig customTls = new SecureTlsConfig(); // Uses system CA certificates (default) + +// Using the fluent builder API with custom TLS config +ProtoZerobusStream stream = sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .tlsConfig(customTls) // Custom TLS configuration + .compiledProto(Record.AirQuality.getDefaultInstance()) + .build() + .join(); +``` + +### Custom Executor + +For custom thread management, use the SDK builder: + +```java +ExecutorService myExecutor = Executors.newFixedThreadPool(10); +ZerobusSdk sdk = ZerobusSdk.builder(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT) + .executor(myExecutor) + .build(); +``` + +### Stream Configuration Options + +All configuration options can be set on the stream builder: + +```java +ProtoZerobusStream stream = sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .maxInflightRequests(50000) // Max unacked requests + .recovery(true) // Enable auto-recovery + .recoveryRetries(3) // Max recovery attempts + .recoveryTimeoutMs(15000) // Recovery timeout + .recoveryBackoffMs(2000) // Backoff between retries + .flushTimeoutMs(300000) // Flush timeout + .serverLackOfAckTimeoutMs(60000) // Server ack timeout + .maxMessageSizeBytes(10485760) // 10MB max message size + .offsetCallback(offset -> + System.out.println("Acknowledged: " + offset)) + .compiledProto(Record.AirQuality.getDefaultInstance()) + .build() + .join(); +``` + +## Best Practices + +1. **Choose the right schema approach**: + - Compiled schema: Type safety, IDE support, compile-time validation + - Dynamic schema: Runtime flexibility, multi-tenant support, schema registry integration +2. **Choose the right record type**: Use protobuf for efficiency, JSON for flexibility +3. **Use batching for high throughput**: `ingestBatch()` reduces per-record overhead +4. **Handle errors**: Always wrap ingestion in try-catch blocks +5. **Close streams**: Always close streams in a `finally` block or use try-with-resources +6. **Tune buffer size**: Adjust `maxInflightRequests` based on your throughput needs +7. **SDK lifecycle**: The SDK implements `AutoCloseable` - use try-with-resources or call `close()` for explicit cleanup (optional, daemon threads clean up on JVM shutdown) + +## Common Issues + +### Authentication Failures +- Verify your CLIENT_ID and CLIENT_SECRET are correct +- Check that your OAuth client has permissions for the target table + +### Slow Performance +- Use batch ingestion for better throughput +- Increase `maxInflightRequests` in stream configuration +- Check network connectivity to the Zerobus endpoint + +## Additional Resources + +- [SDK Documentation](../../README.md) +- [Protocol Buffers Guide](https://developers.google.com/protocol-buffers) +- [Databricks Documentation](https://docs.databricks.com) diff --git a/sdk/examples/pom.xml b/sdk/examples/pom.xml new file mode 100644 index 0000000..e118275 --- /dev/null +++ b/sdk/examples/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + com.databricks + zerobus-java-parent + 0.2.0 + ../../pom.xml + + zerobus-examples + jar + Zerobus SDK Examples + Example code for Databricks Zerobus Ingest SDK + + + + + com.databricks + zerobus-ingest-sdk + ${project.parent.version} + + + + com.google.protobuf + protobuf-java + + + + org.slf4j + slf4j-simple + + + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + 0.6.1 + + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + + + + compile-protobuf + + compile + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.codehaus.mojo + exec-maven-plugin + 3.1.0 + + compile + + + + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + + + diff --git a/sdk/examples/src/main/java/com/databricks/zerobus/examples/json/BatchRecordExample.java b/sdk/examples/src/main/java/com/databricks/zerobus/examples/json/BatchRecordExample.java new file mode 100644 index 0000000..7857fd7 --- /dev/null +++ b/sdk/examples/src/main/java/com/databricks/zerobus/examples/json/BatchRecordExample.java @@ -0,0 +1,145 @@ +package com.databricks.zerobus.examples.json; + +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.batch.json.StringBatch; +import com.databricks.zerobus.stream.JsonZerobusStream; +import java.util.ArrayList; +import java.util.List; + +/** + * Example demonstrating batch JSON record ingestion. + * + *

    This example shows how to ingest multiple JSON records in a single batch using {@code + * ingestBatch()}. Batching improves throughput by reducing per-record overhead. + * + *

    Use Case: Best for high-volume JSON ingestion where you can accumulate records before + * sending. The entire batch is assigned a single offset ID and acknowledged atomically. + */ +public class BatchRecordExample { + + // Configuration - update these with your values + private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; + private static final String UNITY_CATALOG_ENDPOINT = + "https://your-workspace.cloud.databricks.com"; + private static final String TABLE_NAME = "catalog.schema.table"; + private static final String CLIENT_ID = "your-oauth-client-id"; + private static final String CLIENT_SECRET = "your-oauth-client-secret"; + + // Batch configuration + private static final int TOTAL_RECORDS = 10000; + private static final int BATCH_SIZE = 100; + + public static void main(String[] args) { + System.out.println("Starting JSON batch record ingestion example..."); + System.out.println("================================================"); + + try { + // Step 1: Initialize the SDK + ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); + System.out.println("SDK initialized"); + + // Step 2: Create a JSON stream using the fluent builder API + JsonZerobusStream stream = + sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .recovery(true) + .maxInflightRequests(50000) // Higher limit for batch ingestion + .json() + .build() + .join(); + + // --- Optional: Custom authentication --- + // HeadersProvider customAuth = new HeadersProvider() { + // @Override + // public java.util.Map getHeaders() { + // java.util.Map headers = new java.util.HashMap<>(); + // headers.put("authorization", "Bearer " + getMyCustomToken()); + // headers.put("x-databricks-zerobus-table-name", TABLE_NAME); + // return headers; + // } + // }; + // JsonZerobusStream stream = sdk.streamBuilder(TABLE_NAME) + // .clientCredentials(CLIENT_ID, CLIENT_SECRET) + // .headersProvider(customAuth) + // .json() + // .build() + // .join(); + + System.out.println("Stream created: " + stream.getStreamId()); + + // Step 3: Ingest JSON records in batches + System.out.println( + "\nIngesting " + TOTAL_RECORDS + " JSON records in batches of " + BATCH_SIZE + "..."); + long startTime = System.currentTimeMillis(); + int batchCount = 0; + + try { + for (int i = 0; i < TOTAL_RECORDS; i += BATCH_SIZE) { + // Build a batch of JSON records + List batch = new ArrayList<>(); + int batchEnd = Math.min(i + BATCH_SIZE, TOTAL_RECORDS); + + for (int j = i; j < batchEnd; j++) { + String jsonRecord = + String.format( + "{\"device_name\": \"sensor-%d\", \"temp\": %d, \"humidity\": %d}", + j % 10, 20 + (j % 15), 50 + (j % 40)); + batch.add(jsonRecord); + } + + // Ingest the entire batch at once + Long offset = stream.ingestBatch(StringBatch.of(batch)); + batchCount++; + + // Progress indicator + if (batchCount % 10 == 0) { + System.out.println( + " Ingested " + + batchEnd + + " records (" + + batchCount + + " batches, offset: " + + offset + + ")"); + } + } + + // Wait for all batches to be durably written + stream.flush(); + + long endTime = System.currentTimeMillis(); + double durationSeconds = (endTime - startTime) / 1000.0; + double recordsPerSecond = TOTAL_RECORDS / durationSeconds; + + // Step 4: Close the stream + stream.close(); + System.out.println("\nStream closed"); + + // Print summary + System.out.println("\n================================================"); + System.out.println("Ingestion Summary:"); + System.out.println(" Total records: " + TOTAL_RECORDS); + System.out.println(" Batch size: " + BATCH_SIZE); + System.out.println(" Total batches: " + batchCount); + System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); + System.out.println( + " Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); + System.out.println("================================================"); + + } catch (Exception e) { + System.err.println("\nError during ingestion: " + e.getMessage()); + e.printStackTrace(); + stream.close(); + System.exit(1); + } + + } catch (ZerobusException e) { + System.err.println("\nFailed to initialize stream: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("\nJSON batch record example completed successfully!"); + } +} diff --git a/sdk/examples/src/main/java/com/databricks/zerobus/examples/json/SingleRecordExample.java b/sdk/examples/src/main/java/com/databricks/zerobus/examples/json/SingleRecordExample.java new file mode 100644 index 0000000..8ff1a52 --- /dev/null +++ b/sdk/examples/src/main/java/com/databricks/zerobus/examples/json/SingleRecordExample.java @@ -0,0 +1,132 @@ +package com.databricks.zerobus.examples.json; + +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.stream.JsonZerobusStream; + +/** + * Example demonstrating single JSON record ingestion. + * + *

    This example shows how to ingest JSON records one at a time. JSON ingestion is useful when: + * + *

      + *
    • Your data is already in JSON format + *
    • You don't want to define protobuf schemas + *
    • You need schema flexibility at the cost of type safety + *
    + * + *

    Note: JSON streams require the table schema to be defined in the catalog, and JSON + * records must match the expected schema. + */ +public class SingleRecordExample { + + // Configuration - update these with your values + private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; + private static final String UNITY_CATALOG_ENDPOINT = + "https://your-workspace.cloud.databricks.com"; + private static final String TABLE_NAME = "catalog.schema.table"; + private static final String CLIENT_ID = "your-oauth-client-id"; + private static final String CLIENT_SECRET = "your-oauth-client-secret"; + + // Number of records to ingest + private static final int NUM_RECORDS = 1000; + + public static void main(String[] args) { + System.out.println("Starting JSON single record ingestion example..."); + System.out.println("================================================="); + + try { + // Step 1: Initialize the SDK + ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); + System.out.println("SDK initialized"); + + // Step 2: Create a JSON stream using the fluent builder API + JsonZerobusStream stream = + sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .recovery(true) + .maxInflightRequests(10000) + .json() + .build() + .join(); + + // --- Optional: Custom authentication --- + // HeadersProvider customAuth = new HeadersProvider() { + // @Override + // public java.util.Map getHeaders() { + // java.util.Map headers = new java.util.HashMap<>(); + // headers.put("authorization", "Bearer " + getMyCustomToken()); + // headers.put("x-databricks-zerobus-table-name", TABLE_NAME); + // return headers; + // } + // }; + // JsonZerobusStream stream = sdk.streamBuilder(TABLE_NAME) + // .clientCredentials(CLIENT_ID, CLIENT_SECRET) + // .headersProvider(customAuth) + // .json() + // .build() + // .join(); + + System.out.println("Stream created: " + stream.getStreamId()); + + // Step 5: Ingest JSON records one at a time + System.out.println("\nIngesting " + NUM_RECORDS + " JSON records..."); + long startTime = System.currentTimeMillis(); + + try { + for (int i = 0; i < NUM_RECORDS; i++) { + // Create a JSON record + // Note: The JSON structure must match your table schema + String jsonRecord = + String.format( + "{\"device_name\": \"sensor-%d\", \"temp\": %d, \"humidity\": %d}", + i % 10, // device_name + 20 + (i % 15), // temp + 50 + (i % 40) // humidity + ); + + // Ingest the JSON record and get its offset ID + long offset = stream.ingest(jsonRecord); + + // Progress indicator + if ((i + 1) % 100 == 0) { + System.out.println(" Ingested " + (i + 1) + " records (last offset: " + offset + ")"); + } + } + + // Wait for all records to be durably written + stream.flush(); + + long endTime = System.currentTimeMillis(); + double durationSeconds = (endTime - startTime) / 1000.0; + double recordsPerSecond = NUM_RECORDS / durationSeconds; + + // Step 6: Close the stream + stream.close(); + System.out.println("\nStream closed"); + + // Print summary + System.out.println("\n================================================="); + System.out.println("Ingestion Summary:"); + System.out.println(" Total records: " + NUM_RECORDS); + System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); + System.out.println( + " Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); + System.out.println("================================================="); + + } catch (Exception e) { + System.err.println("\nError during ingestion: " + e.getMessage()); + e.printStackTrace(); + stream.close(); + System.exit(1); + } + + } catch (ZerobusException e) { + System.err.println("\nFailed to initialize stream: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("\nJSON single record example completed successfully!"); + } +} diff --git a/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/BatchRecordExample.java b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/BatchRecordExample.java new file mode 100644 index 0000000..f7a04e7 --- /dev/null +++ b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/BatchRecordExample.java @@ -0,0 +1,132 @@ +package com.databricks.zerobus.examples.proto.compiled; + +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.batch.proto.MessageBatch; +import com.databricks.zerobus.examples.Record; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import java.util.ArrayList; +import java.util.List; + +/** + * Example demonstrating batch protobuf record ingestion with compiled proto schemas. + * + *

    This example uses the fluent builder API with {@code .compiledProto()} for compiled protobuf + * message types. The schema is known at compile time from generated Java classes. + * + *

    Use Case: Best for high-volume ingestion with type-safe record creation when you have + * .proto files compiled into Java classes. + * + * @see com.databricks.zerobus.examples.proto.dynamic.BatchRecordExample for runtime schema + */ +public class BatchRecordExample { + + // Configuration - update these with your values + private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; + private static final String UNITY_CATALOG_ENDPOINT = + "https://your-workspace.cloud.databricks.com"; + private static final String TABLE_NAME = "catalog.schema.table"; + private static final String CLIENT_ID = "your-oauth-client-id"; + private static final String CLIENT_SECRET = "your-oauth-client-secret"; + + // Batch configuration + private static final int TOTAL_RECORDS = 10000; + private static final int BATCH_SIZE = 100; + + public static void main(String[] args) { + System.out.println("Starting compiled proto batch record ingestion example..."); + System.out.println("==========================================================="); + + try { + // Step 1: Initialize the SDK + ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); + System.out.println("SDK initialized"); + + // Step 2: Create a proto stream using the fluent builder API + ProtoZerobusStream stream = + sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .recovery(true) + .maxInflightRequests(50000) // Higher limit for batch ingestion + .compiledProto(Record.AirQuality.getDefaultInstance()) + .build() + .join(); + System.out.println("Stream created: " + stream.getStreamId()); + + // Step 3: Ingest records in batches + System.out.println( + "\nIngesting " + TOTAL_RECORDS + " proto records in batches of " + BATCH_SIZE + "..."); + long startTime = System.currentTimeMillis(); + int batchCount = 0; + + try { + for (int i = 0; i < TOTAL_RECORDS; i += BATCH_SIZE) { + // Build a batch of records using generated builders (type-safe) + List batch = new ArrayList<>(); + int batchEnd = Math.min(i + BATCH_SIZE, TOTAL_RECORDS); + + for (int j = i; j < batchEnd; j++) { + Record.AirQuality record = + Record.AirQuality.newBuilder() + .setDeviceName("sensor-" + (j % 10)) + .setTemp(20 + (j % 15)) + .setHumidity(50 + (j % 40)) + .build(); + batch.add(record); + } + + // Ingest the entire batch at once + Long offset = stream.ingestBatch(MessageBatch.of(batch)); + batchCount++; + + // Progress indicator + if (batchCount % 10 == 0) { + System.out.println( + " Ingested " + + batchEnd + + " records (" + + batchCount + + " batches, offset: " + + offset + + ")"); + } + } + + // Wait for all batches to be durably written + stream.flush(); + + long endTime = System.currentTimeMillis(); + double durationSeconds = (endTime - startTime) / 1000.0; + double recordsPerSecond = TOTAL_RECORDS / durationSeconds; + + // Step 4: Close the stream + stream.close(); + System.out.println("\nStream closed"); + + // Print summary + System.out.println("\n==========================================================="); + System.out.println("Ingestion Summary:"); + System.out.println(" Total records: " + TOTAL_RECORDS); + System.out.println(" Batch size: " + BATCH_SIZE); + System.out.println(" Total batches: " + batchCount); + System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); + System.out.println( + " Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); + System.out.println("==========================================================="); + + } catch (Exception e) { + System.err.println("\nError during ingestion: " + e.getMessage()); + e.printStackTrace(); + stream.close(); + System.exit(1); + } + + } catch (ZerobusException e) { + System.err.println("\nFailed to initialize stream: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("\nCompiled proto batch record example completed successfully!"); + } +} diff --git a/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/SingleRecordExample.java b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/SingleRecordExample.java new file mode 100644 index 0000000..23a3f00 --- /dev/null +++ b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/compiled/SingleRecordExample.java @@ -0,0 +1,110 @@ +package com.databricks.zerobus.examples.proto.compiled; + +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.examples.Record; +import com.databricks.zerobus.stream.ProtoZerobusStream; + +/** + * Example demonstrating single protobuf record ingestion with compiled proto schemas. + * + *

    This example uses the fluent builder API with {@code .compiledProto()} for compiled protobuf + * message types. The schema is known at compile time from generated Java classes. + * + *

    Use Case: Best when you have .proto files compiled into Java classes and want type-safe + * record creation with IDE autocompletion. + * + * @see com.databricks.zerobus.examples.proto.dynamic.SingleRecordExample for runtime schema + */ +public class SingleRecordExample { + + // Configuration - update these with your values + private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; + private static final String UNITY_CATALOG_ENDPOINT = + "https://your-workspace.cloud.databricks.com"; + private static final String TABLE_NAME = "catalog.schema.table"; + private static final String CLIENT_ID = "your-oauth-client-id"; + private static final String CLIENT_SECRET = "your-oauth-client-secret"; + + // Number of records to ingest + private static final int NUM_RECORDS = 1000; + + public static void main(String[] args) { + System.out.println("Starting compiled proto single record ingestion example..."); + System.out.println("============================================================"); + + try { + // Step 1: Initialize the SDK + ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); + System.out.println("SDK initialized"); + + // Step 2: Create a proto stream using the fluent builder API + ProtoZerobusStream stream = + sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .recovery(true) // Enable automatic recovery on transient failures + .maxInflightRequests(10000) // Allow up to 10k requests in flight + .compiledProto(Record.AirQuality.getDefaultInstance()) + .build() + .join(); + System.out.println("Stream created: " + stream.getStreamId()); + + // Step 5: Ingest records one at a time + System.out.println("\nIngesting " + NUM_RECORDS + " proto records..."); + long startTime = System.currentTimeMillis(); + + try { + for (int i = 0; i < NUM_RECORDS; i++) { + // Create a protobuf record using generated builder (type-safe) + Record.AirQuality record = + Record.AirQuality.newBuilder() + .setDeviceName("sensor-" + (i % 10)) + .setTemp(20 + (i % 15)) + .setHumidity(50 + (i % 40)) + .build(); + + // Ingest the record and get its offset ID + long offset = stream.ingest(record); + + // Progress indicator + if ((i + 1) % 100 == 0) { + System.out.println(" Ingested " + (i + 1) + " records (last offset: " + offset + ")"); + } + } + + // Wait for all records to be durably written + stream.flush(); + + long endTime = System.currentTimeMillis(); + double durationSeconds = (endTime - startTime) / 1000.0; + double recordsPerSecond = NUM_RECORDS / durationSeconds; + + // Step 6: Close the stream + stream.close(); + System.out.println("\nStream closed"); + + // Print summary + System.out.println("\n============================================================"); + System.out.println("Ingestion Summary:"); + System.out.println(" Total records: " + NUM_RECORDS); + System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); + System.out.println( + " Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); + System.out.println("============================================================"); + + } catch (Exception e) { + System.err.println("\nError during ingestion: " + e.getMessage()); + e.printStackTrace(); + stream.close(); + System.exit(1); + } + + } catch (ZerobusException e) { + System.err.println("\nFailed to initialize stream: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("\nCompiled proto single record example completed successfully!"); + } +} diff --git a/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/BatchRecordExample.java b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/BatchRecordExample.java new file mode 100644 index 0000000..63a7eb6 --- /dev/null +++ b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/BatchRecordExample.java @@ -0,0 +1,230 @@ +package com.databricks.zerobus.examples.proto.dynamic; + +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.batch.proto.MessageBatch; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.DynamicMessage; +import java.util.ArrayList; +import java.util.List; + +/** + * Example demonstrating batch record ingestion with runtime-built proto schemas. + * + *

    This example uses the fluent builder API with {@code .dynamicProto()} for runtime-created + * protobuf descriptors. The schema is built programmatically at runtime rather than from compiled + * .proto files. + * + *

    Use Case: Best for high-volume ingestion when schemas are determined dynamically. + * + * @see com.databricks.zerobus.examples.proto.compiled.BatchRecordExample for compile-time schema + */ +public class BatchRecordExample { + + // Configuration - update these with your values + private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; + private static final String UNITY_CATALOG_ENDPOINT = + "https://your-workspace.cloud.databricks.com"; + private static final String TABLE_NAME = "catalog.schema.table"; + private static final String CLIENT_ID = "your-oauth-client-id"; + private static final String CLIENT_SECRET = "your-oauth-client-secret"; + + // Batch configuration + private static final int TOTAL_RECORDS = 10000; + private static final int BATCH_SIZE = 100; + + public static void main(String[] args) { + System.out.println("Starting dynamic proto batch record ingestion example..."); + System.out.println("=========================================================="); + + try { + // Step 1: Build the protobuf descriptor programmatically at runtime + Descriptors.Descriptor messageDescriptor = buildAirQualityDescriptor(); + System.out.println("Built descriptor for: " + messageDescriptor.getFullName()); + System.out.println("Fields: " + messageDescriptor.getFields().size()); + + // Step 2: Initialize the SDK + ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); + System.out.println("SDK initialized"); + + // Step 3: Create a dynamic proto stream using the fluent builder API + ProtoZerobusStream stream = + sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .recovery(true) + .maxInflightRequests(50000) // Higher limit for batch ingestion + .dynamicProto(messageDescriptor) + .build() + .join(); + System.out.println("Stream created: " + stream.getStreamId()); + + // Step 4: Ingest records in batches + System.out.println( + "\nIngesting " + + TOTAL_RECORDS + + " dynamic proto records in batches of " + + BATCH_SIZE + + "..."); + long startTime = System.currentTimeMillis(); + int batchCount = 0; + + try { + for (int i = 0; i < TOTAL_RECORDS; i += BATCH_SIZE) { + // Build a batch of DynamicMessage records + List batch = new ArrayList<>(); + int batchEnd = Math.min(i + BATCH_SIZE, TOTAL_RECORDS); + + for (int j = i; j < batchEnd; j++) { + DynamicMessage record = buildDynamicRecord(messageDescriptor, j); + batch.add(record); + } + + // Ingest the entire batch at once + Long offset = stream.ingestBatch(MessageBatch.of(batch)); + batchCount++; + + // Progress indicator + if (batchCount % 10 == 0) { + System.out.println( + " Ingested " + + batchEnd + + " records (" + + batchCount + + " batches, offset: " + + offset + + ")"); + } + } + + // Wait for all batches to be durably written + stream.flush(); + + long endTime = System.currentTimeMillis(); + double durationSeconds = (endTime - startTime) / 1000.0; + double recordsPerSecond = TOTAL_RECORDS / durationSeconds; + + // Step 5: Close the stream + stream.close(); + System.out.println("\nStream closed"); + + // Print summary + System.out.println("\n=========================================================="); + System.out.println("Ingestion Summary:"); + System.out.println(" Total records: " + TOTAL_RECORDS); + System.out.println(" Batch size: " + BATCH_SIZE); + System.out.println(" Total batches: " + batchCount); + System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); + System.out.println( + " Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); + System.out.println("=========================================================="); + + } catch (Exception e) { + System.err.println("\nError during ingestion: " + e.getMessage()); + e.printStackTrace(); + stream.close(); + System.exit(1); + } + + } catch (Exception e) { + System.err.println("\nFailed to initialize: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("\nDynamic proto batch record example completed successfully!"); + } + + /** + * Builds a protobuf descriptor programmatically at runtime. + * + *

    This creates a schema equivalent to: + * + *

    +   * message AirQuality {
    +   *   optional string device_name = 1;
    +   *   optional int32 temp = 2;
    +   *   optional int32 humidity = 3;
    +   * }
    +   * 
    + * + * @return The message descriptor + */ + private static Descriptors.Descriptor buildAirQualityDescriptor() throws Exception { + // Define fields + DescriptorProtos.FieldDescriptorProto deviceNameField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("device_name") + .setNumber(1) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_STRING) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + DescriptorProtos.FieldDescriptorProto tempField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("temp") + .setNumber(2) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + DescriptorProtos.FieldDescriptorProto humidityField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("humidity") + .setNumber(3) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + // Define message type + DescriptorProtos.DescriptorProto messageType = + DescriptorProtos.DescriptorProto.newBuilder() + .setName("AirQuality") + .addField(deviceNameField) + .addField(tempField) + .addField(humidityField) + .build(); + + // Create file descriptor + DescriptorProtos.FileDescriptorProto fileDescriptorProto = + DescriptorProtos.FileDescriptorProto.newBuilder() + .setName("air_quality.proto") + .addMessageType(messageType) + .build(); + + Descriptors.FileDescriptor fileDescriptor = + Descriptors.FileDescriptor.buildFrom( + fileDescriptorProto, new Descriptors.FileDescriptor[] {}); + + return fileDescriptor.findMessageTypeByName("AirQuality"); + } + + /** + * Builds a DynamicMessage record using the runtime-built descriptor. + * + * @param descriptor The message descriptor + * @param index Record index for generating sample data + * @return A DynamicMessage record + */ + private static DynamicMessage buildDynamicRecord(Descriptors.Descriptor descriptor, int index) { + DynamicMessage.Builder builder = DynamicMessage.newBuilder(descriptor); + + // Set fields by field descriptor + for (FieldDescriptor field : descriptor.getFields()) { + switch (field.getName()) { + case "device_name": + builder.setField(field, "sensor-" + (index % 10)); + break; + case "temp": + builder.setField(field, 20 + (index % 15)); + break; + case "humidity": + builder.setField(field, 50 + (index % 40)); + break; + } + } + + return builder.build(); + } +} diff --git a/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/SingleRecordExample.java b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/SingleRecordExample.java new file mode 100644 index 0000000..2e0a33d --- /dev/null +++ b/sdk/examples/src/main/java/com/databricks/zerobus/examples/proto/dynamic/SingleRecordExample.java @@ -0,0 +1,217 @@ +package com.databricks.zerobus.examples.proto.dynamic; + +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.DynamicMessage; + +/** + * Example demonstrating single record ingestion with runtime-built proto schemas. + * + *

    This example uses the fluent builder API with {@code .dynamicProto()} for runtime-created + * protobuf descriptors. The schema is built programmatically at runtime rather than from compiled + * .proto files. + * + *

    Use Case: Best when schemas are determined dynamically, such as: + * + *

      + *
    • Multi-tenant systems where each tenant has different schemas + *
    • Schema registry integration where schemas are fetched at runtime + *
    • Generic data pipelines that handle multiple message types + *
    + * + * @see com.databricks.zerobus.examples.proto.compiled.SingleRecordExample for compile-time schema + */ +public class SingleRecordExample { + + // Configuration - update these with your values + private static final String SERVER_ENDPOINT = "your-shard-id.zerobus.region.cloud.databricks.com"; + private static final String UNITY_CATALOG_ENDPOINT = + "https://your-workspace.cloud.databricks.com"; + private static final String TABLE_NAME = "catalog.schema.table"; + private static final String CLIENT_ID = "your-oauth-client-id"; + private static final String CLIENT_SECRET = "your-oauth-client-secret"; + + // Number of records to ingest + private static final int NUM_RECORDS = 1000; + + public static void main(String[] args) { + System.out.println("Starting dynamic proto single record ingestion example..."); + System.out.println("==========================================================="); + + try { + // Step 1: Build the protobuf descriptor programmatically at runtime + // This creates a schema equivalent to: + // message AirQuality { + // optional string device_name = 1; + // optional int32 temp = 2; + // optional int32 humidity = 3; + // } + Descriptors.Descriptor messageDescriptor = buildAirQualityDescriptor(); + System.out.println("Built descriptor for: " + messageDescriptor.getFullName()); + System.out.println("Fields: " + messageDescriptor.getFields().size()); + + // Step 2: Initialize the SDK + ZerobusSdk sdk = new ZerobusSdk(SERVER_ENDPOINT, UNITY_CATALOG_ENDPOINT); + System.out.println("SDK initialized"); + + // Step 3: Create a dynamic proto stream using the fluent builder API + ProtoZerobusStream stream = + sdk.streamBuilder(TABLE_NAME) + .clientCredentials(CLIENT_ID, CLIENT_SECRET) + .recovery(true) + .maxInflightRequests(10000) + .dynamicProto(messageDescriptor) + .build() + .join(); + System.out.println("Stream created: " + stream.getStreamId()); + + // Step 4: Ingest records one at a time + System.out.println("\nIngesting " + NUM_RECORDS + " dynamic proto records..."); + long startTime = System.currentTimeMillis(); + + try { + for (int i = 0; i < NUM_RECORDS; i++) { + // Create a record using DynamicMessage.Builder + // Fields are set by field descriptor (no compile-time type safety) + DynamicMessage record = buildDynamicRecord(messageDescriptor, i); + + // Ingest the record + long offset = stream.ingest(record); + + // Progress indicator + if ((i + 1) % 100 == 0) { + System.out.println(" Ingested " + (i + 1) + " records (last offset: " + offset + ")"); + } + } + + // Wait for all records to be durably written + stream.flush(); + + long endTime = System.currentTimeMillis(); + double durationSeconds = (endTime - startTime) / 1000.0; + double recordsPerSecond = NUM_RECORDS / durationSeconds; + + // Step 5: Close the stream + stream.close(); + System.out.println("\nStream closed"); + + // Print summary + System.out.println("\n==========================================================="); + System.out.println("Ingestion Summary:"); + System.out.println(" Total records: " + NUM_RECORDS); + System.out.println(" Duration: " + String.format("%.2f", durationSeconds) + " seconds"); + System.out.println( + " Throughput: " + String.format("%.2f", recordsPerSecond) + " records/sec"); + System.out.println("==========================================================="); + + } catch (Exception e) { + System.err.println("\nError during ingestion: " + e.getMessage()); + e.printStackTrace(); + stream.close(); + System.exit(1); + } + + } catch (Exception e) { + System.err.println("\nFailed to initialize: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("\nDynamic proto single record example completed successfully!"); + } + + /** + * Builds a protobuf descriptor programmatically at runtime. + * + *

    This creates a schema equivalent to: + * + *

    +   * message AirQuality {
    +   *   optional string device_name = 1;
    +   *   optional int32 temp = 2;
    +   *   optional int32 humidity = 3;
    +   * }
    +   * 
    + * + * @return The message descriptor + */ + private static Descriptors.Descriptor buildAirQualityDescriptor() throws Exception { + // Define fields + DescriptorProtos.FieldDescriptorProto deviceNameField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("device_name") + .setNumber(1) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_STRING) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + DescriptorProtos.FieldDescriptorProto tempField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("temp") + .setNumber(2) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + DescriptorProtos.FieldDescriptorProto humidityField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("humidity") + .setNumber(3) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + // Define message type + DescriptorProtos.DescriptorProto messageType = + DescriptorProtos.DescriptorProto.newBuilder() + .setName("AirQuality") + .addField(deviceNameField) + .addField(tempField) + .addField(humidityField) + .build(); + + // Create file descriptor + DescriptorProtos.FileDescriptorProto fileDescriptorProto = + DescriptorProtos.FileDescriptorProto.newBuilder() + .setName("air_quality.proto") + .addMessageType(messageType) + .build(); + + Descriptors.FileDescriptor fileDescriptor = + Descriptors.FileDescriptor.buildFrom( + fileDescriptorProto, new Descriptors.FileDescriptor[] {}); + + return fileDescriptor.findMessageTypeByName("AirQuality"); + } + + /** + * Builds a DynamicMessage record using the runtime-built descriptor. + * + * @param descriptor The message descriptor + * @param index Record index for generating sample data + * @return A DynamicMessage record + */ + private static DynamicMessage buildDynamicRecord(Descriptors.Descriptor descriptor, int index) { + DynamicMessage.Builder builder = DynamicMessage.newBuilder(descriptor); + + // Set fields by field descriptor + for (FieldDescriptor field : descriptor.getFields()) { + switch (field.getName()) { + case "device_name": + builder.setField(field, "sensor-" + (index % 10)); + break; + case "temp": + builder.setField(field, 20 + (index % 15)); + break; + case "humidity": + builder.setField(field, 50 + (index % 40)); + break; + } + } + + return builder.build(); + } +} diff --git a/examples/record.proto b/sdk/examples/src/main/proto/record.proto similarity index 50% rename from examples/record.proto rename to sdk/examples/src/main/proto/record.proto index d063f8b..01370a8 100644 --- a/examples/record.proto +++ b/sdk/examples/src/main/proto/record.proto @@ -1,5 +1,10 @@ syntax = "proto2"; +package com.databricks.zerobus.examples; + +option java_package = "com.databricks.zerobus.examples"; +option java_outer_classname = "Record"; + message AirQuality { optional string device_name = 1; optional int32 temp = 2; diff --git a/sdk/pom.xml b/sdk/pom.xml new file mode 100644 index 0000000..0e32feb --- /dev/null +++ b/sdk/pom.xml @@ -0,0 +1,341 @@ + + + 4.0.0 + + com.databricks + zerobus-java-parent + 0.2.0 + + zerobus-ingest-sdk + jar + Zerobus Ingest SDK for Java + Databricks Zerobus Ingest SDK for Java - Direct ingestion to Delta tables + https://github.com/databricks/zerobus-sdk-java + + + + com.databricks + zerobus-common + ${project.parent.version} + + + + com.google.protobuf + protobuf-java + + + + io.grpc + grpc-netty-shaded + + + io.grpc + grpc-protobuf + + + io.grpc + grpc-stub + + + + javax.annotation + javax.annotation-api + + + + com.google.code.findbugs + jsr305 + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-nop + test + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.mockito + mockito-core + test + + + org.mockito + mockito-junit-jupiter + test + + + io.grpc + grpc-testing + test + + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + 0.6.1 + + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + grpc-java + io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier} + + + + compile-protobuf + + compile + compile-custom + + + + test-compile-protobuf + + test-compile + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + com.diffplug.spotless + spotless-maven-plugin + + + + + + + + + + pom.xml + + + false + false + + + true + true + true + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + org.jacoco + jacoco-maven-plugin + 0.8.11 + + + prepare-agent + + prepare-agent + + + + report + + report + + test + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + true + shaded + + + + + com.google.protobuf + com.databricks.zerobus.shaded.protobuf + + + + io.grpc + com.databricks.zerobus.shaded.grpc + + + + com.google.common + com.databricks.zerobus.shaded.guava + + + + io.perfmark + com.databricks.zerobus.shaded.perfmark + + + + com.google.errorprone + com.databricks.zerobus.shaded.errorprone + + + + com.google.api + com.databricks.zerobus.shaded.google.api + + + com.google.rpc + com.databricks.zerobus.shaded.google.rpc + + + com.google.type + com.databricks.zerobus.shaded.google.type + + + com.google.cloud + com.databricks.zerobus.shaded.google.cloud + + + com.google.logging + com.databricks.zerobus.shaded.google.logging + + + com.google.longrunning + com.databricks.zerobus.shaded.google.longrunning + + + + + + ${project.name} + ${project.version} + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + META-INF/MANIFEST.MF + META-INF/LICENSE.txt + META-INF/versions/9/module-info.class + + + + + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + none + true + + ${project.basedir}/src/main/java:${project.build.directory}/generated-sources/protobuf/java:${project.build.directory}/generated-sources/protobuf/grpc-java + + + + attach-javadocs + + jar + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + + + sign-artifacts + + sign + + verify + + + --pinentry-mode + loopback + + + + + + + + org.sonatype.central + central-publishing-maven-plugin + true + + central + false + + + + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + + + diff --git a/src/main/java/com/databricks/zerobus/NonRetriableException.java b/sdk/src/main/java/com/databricks/zerobus/NonRetriableException.java similarity index 53% rename from src/main/java/com/databricks/zerobus/NonRetriableException.java rename to sdk/src/main/java/com/databricks/zerobus/NonRetriableException.java index bb032a3..d90c38e 100644 --- a/src/main/java/com/databricks/zerobus/NonRetriableException.java +++ b/sdk/src/main/java/com/databricks/zerobus/NonRetriableException.java @@ -1,8 +1,22 @@ package com.databricks.zerobus; /** - * An exception that indicates a non-retriable error has occurred. This is used to signal that - * stream creation or recovery should not be retried. + * An exception that indicates a non-retriable error has occurred. + * + *

    This exception is thrown when the error is permanent and cannot be resolved by retrying. + * Common causes include: + * + *

      + *
    • Invalid credentials (wrong client ID or secret) + *
    • Missing table or insufficient permissions + *
    • Schema mismatch between data and table + *
    • Invalid configuration parameters + *
    + * + *

    When this exception is thrown, the operation should not be retried without first fixing the + * underlying issue. Contrast with {@link ZerobusException} which indicates a retriable error. + * + * @see ZerobusException */ public class NonRetriableException extends ZerobusException { diff --git a/src/main/java/com/databricks/zerobus/StreamConfigurationOptions.java b/sdk/src/main/java/com/databricks/zerobus/StreamConfigurationOptions.java similarity index 55% rename from src/main/java/com/databricks/zerobus/StreamConfigurationOptions.java rename to sdk/src/main/java/com/databricks/zerobus/StreamConfigurationOptions.java index 050b685..caa901f 100644 --- a/src/main/java/com/databricks/zerobus/StreamConfigurationOptions.java +++ b/sdk/src/main/java/com/databricks/zerobus/StreamConfigurationOptions.java @@ -2,6 +2,8 @@ import java.util.Optional; import java.util.function.Consumer; +import java.util.function.LongConsumer; +import javax.annotation.Nonnull; /** * Configuration options for Zerobus streams. @@ -17,46 +19,67 @@ */ public class StreamConfigurationOptions { - private int maxInflightRecords = 50000; + /** Default max message size: 10MB (matches server limit). */ + public static final int DEFAULT_MAX_MESSAGE_SIZE_BYTES = 10 * 1024 * 1024; + + private int maxInflightRequests = 50000; private boolean recovery = true; private int recoveryTimeoutMs = 15000; private int recoveryBackoffMs = 2000; private int recoveryRetries = 3; private int flushTimeoutMs = 300000; private int serverLackOfAckTimeoutMs = 60000; - private Optional> ackCallback = Optional.empty(); + private int maxMessageSizeBytes = DEFAULT_MAX_MESSAGE_SIZE_BYTES; + private Optional streamPausedMaxWaitTimeMs = Optional.empty(); + private Optional offsetCallback = Optional.empty(); private StreamConfigurationOptions() {} private StreamConfigurationOptions( - int maxInflightRecords, + int maxInflightRequests, boolean recovery, int recoveryTimeoutMs, int recoveryBackoffMs, int recoveryRetries, int flushTimeoutMs, int serverLackOfAckTimeoutMs, - Optional> ackCallback) { - this.maxInflightRecords = maxInflightRecords; + int maxMessageSizeBytes, + Optional streamPausedMaxWaitTimeMs, + Optional offsetCallback) { + this.maxInflightRequests = maxInflightRequests; this.recovery = recovery; this.recoveryTimeoutMs = recoveryTimeoutMs; this.recoveryBackoffMs = recoveryBackoffMs; this.recoveryRetries = recoveryRetries; this.flushTimeoutMs = flushTimeoutMs; this.serverLackOfAckTimeoutMs = serverLackOfAckTimeoutMs; - this.ackCallback = ackCallback; + this.maxMessageSizeBytes = maxMessageSizeBytes; + this.streamPausedMaxWaitTimeMs = streamPausedMaxWaitTimeMs; + this.offsetCallback = offsetCallback; } /** - * Returns the maximum number of records that can be in flight. + * Returns the maximum number of requests that can be in flight. * *

    This controls how many records the SDK can accept and send to the server before waiting for * acknowledgments. Higher values improve throughput but use more memory. * + * @return the maximum number of in-flight requests + */ + public int maxInflightRequests() { + return this.maxInflightRequests; + } + + /** + * Returns the maximum number of records that can be in flight. + * * @return the maximum number of in-flight records + * @deprecated Since 0.2.0. Use {@link #maxInflightRequests()} instead. This method will be + * removed in a future release. */ + @Deprecated public int maxInflightRecords() { - return this.maxInflightRecords; + return this.maxInflightRequests; } /** @@ -132,15 +155,45 @@ public int serverLackOfAckTimeoutMs() { } /** - * Returns the acknowledgment callback function. + * Returns the maximum message size in bytes for gRPC messages. + * + *

    This limits the size of individual gRPC messages (records). The server enforces a 10MB + * limit; messages exceeding this will be rejected by the server. * - *

    This callback is invoked whenever the server acknowledges records. If no callback is set, - * this returns an empty Optional. + *

    Default is {@link #DEFAULT_MAX_MESSAGE_SIZE_BYTES} (10MB). * - * @return the acknowledgment callback, or an empty Optional if none is set + * @return the maximum message size in bytes */ - public Optional> ackCallback() { - return this.ackCallback; + public int maxMessageSizeBytes() { + return this.maxMessageSizeBytes; + } + + /** + * Returns the maximum time to wait in PAUSED state during graceful close. + * + *

    When the server sends a close signal, the stream enters PAUSED state and waits for pending + * acknowledgments. This setting controls the maximum time to wait before triggering recovery. If + * all acknowledgments are received before this timeout, recovery is triggered immediately. + * + *

    If not set, the stream will use the default behavior of waiting for the recovery timeout. + * + * @return the maximum wait time in milliseconds, or empty if not configured + */ + public Optional streamPausedMaxWaitTimeMs() { + return this.streamPausedMaxWaitTimeMs; + } + + /** + * Returns the offset acknowledgment callback function. + * + *

    This callback is invoked whenever the server acknowledges records. The callback receives the + * durability acknowledgment offset ID (the offset up to which records have been durably written). + * If no callback is set, this returns an empty Optional. + * + * @return the offset callback, or an empty Optional if none is set + */ + public Optional offsetCallback() { + return this.offsetCallback; } /** @@ -148,7 +201,8 @@ public Optional> ackCallback() { * *

    Default values: - maxInflightRecords: 50000 - recovery: true - recoveryTimeoutMs: 15000 - * recoveryBackoffMs: 2000 - recoveryRetries: 3 - flushTimeoutMs: 300000 - - * serverLackOfAckTimeoutMs: 60000 - ackCallback: empty + * serverLackOfAckTimeoutMs: 60000 - maxMessageSizeBytes: 10MB (matches server limit) - + * ackCallback: empty * * @return the default stream configuration options */ @@ -165,6 +219,29 @@ public static StreamConfigurationOptionsBuilder builder() { return new StreamConfigurationOptionsBuilder(); } + /** + * Returns a builder initialized with this instance's values. + * + *

    Useful for creating a modified copy of an existing configuration. + * + * @return a new builder pre-populated with this instance's values + */ + public StreamConfigurationOptionsBuilder toBuilder() { + StreamConfigurationOptionsBuilder builder = + new StreamConfigurationOptionsBuilder() + .setMaxInflightRequests(this.maxInflightRequests) + .setRecovery(this.recovery) + .setRecoveryTimeoutMs(this.recoveryTimeoutMs) + .setRecoveryBackoffMs(this.recoveryBackoffMs) + .setRecoveryRetries(this.recoveryRetries) + .setFlushTimeoutMs(this.flushTimeoutMs) + .setServerLackOfAckTimeoutMs(this.serverLackOfAckTimeoutMs) + .setMaxMessageSizeBytes(this.maxMessageSizeBytes); + this.streamPausedMaxWaitTimeMs.ifPresent(builder::setStreamPausedMaxWaitTimeMs); + this.offsetCallback.ifPresent(builder::setOffsetCallback); + return builder; + } + /** * Builder for creating StreamConfigurationOptions instances. * @@ -183,29 +260,44 @@ public static StreamConfigurationOptionsBuilder builder() { public static class StreamConfigurationOptionsBuilder { private StreamConfigurationOptions defaultOptions = StreamConfigurationOptions.getDefault(); - private int maxInflightRecords = defaultOptions.maxInflightRecords; - private boolean recovery = defaultOptions.recovery; - private int recoveryTimeoutMs = defaultOptions.recoveryTimeoutMs; - private int recoveryBackoffMs = defaultOptions.recoveryBackoffMs; - private int recoveryRetries = defaultOptions.recoveryRetries; - private int flushTimeoutMs = defaultOptions.flushTimeoutMs; - private int serverLackOfAckTimeoutMs = defaultOptions.serverLackOfAckTimeoutMs; - private Optional> ackCallback = defaultOptions.ackCallback; + private int maxInflightRequests = defaultOptions.maxInflightRequests(); + private boolean recovery = defaultOptions.recovery(); + private int recoveryTimeoutMs = defaultOptions.recoveryTimeoutMs(); + private int recoveryBackoffMs = defaultOptions.recoveryBackoffMs(); + private int recoveryRetries = defaultOptions.recoveryRetries(); + private int flushTimeoutMs = defaultOptions.flushTimeoutMs(); + private int serverLackOfAckTimeoutMs = defaultOptions.serverLackOfAckTimeoutMs(); + private int maxMessageSizeBytes = defaultOptions.maxMessageSizeBytes(); + private Optional streamPausedMaxWaitTimeMs = defaultOptions.streamPausedMaxWaitTimeMs(); + private Optional offsetCallback = defaultOptions.offsetCallback(); private StreamConfigurationOptionsBuilder() {} /** - * Sets the maximum number of records that can be in flight. + * Sets the maximum number of requests that can be in flight. * *

    This controls how many records the SDK can accept and send to the server before waiting * for acknowledgments. Higher values improve throughput but use more memory. * + * @param maxInflightRequests the maximum number of in-flight requests + * @return this builder for method chaining + */ + public StreamConfigurationOptionsBuilder setMaxInflightRequests(int maxInflightRequests) { + this.maxInflightRequests = maxInflightRequests; + return this; + } + + /** + * Sets the maximum number of records that can be in flight. + * * @param maxInflightRecords the maximum number of in-flight records * @return this builder for method chaining + * @deprecated Since 0.2.0. Use {@link #setMaxInflightRequests(int)} instead. This method will + * be removed in a future release. */ + @Deprecated public StreamConfigurationOptionsBuilder setMaxInflightRecords(int maxInflightRecords) { - this.maxInflightRecords = maxInflightRecords; - return this; + return setMaxInflightRequests(maxInflightRecords); } /** @@ -295,17 +387,74 @@ public StreamConfigurationOptionsBuilder setServerLackOfAckTimeoutMs( } /** - * Sets the acknowledgment callback function. + * Sets the maximum message size in bytes for gRPC messages. + * + *

    This limits the size of individual gRPC messages (records). The server enforces a 10MB + * limit; messages exceeding this will be rejected by the server. + * + *

    Default is 10MB ({@link StreamConfigurationOptions#DEFAULT_MAX_MESSAGE_SIZE_BYTES}). + * + * @param maxMessageSizeBytes the maximum message size in bytes + * @return this builder for method chaining + */ + public StreamConfigurationOptionsBuilder setMaxMessageSizeBytes(int maxMessageSizeBytes) { + this.maxMessageSizeBytes = maxMessageSizeBytes; + return this; + } + + /** + * Sets the maximum time to wait in PAUSED state during graceful close. + * + *

    When the server sends a close signal, the stream enters PAUSED state and waits for pending + * acknowledgments. This setting controls the maximum time to wait before triggering recovery. + * If all acknowledgments are received before this timeout, recovery is triggered immediately. + * + * @param streamPausedMaxWaitTimeMs the maximum wait time in milliseconds + * @return this builder for method chaining + */ + public StreamConfigurationOptionsBuilder setStreamPausedMaxWaitTimeMs( + long streamPausedMaxWaitTimeMs) { + this.streamPausedMaxWaitTimeMs = Optional.of(streamPausedMaxWaitTimeMs); + return this; + } + + /** + * Sets the offset acknowledgment callback function. * *

    This callback is invoked whenever the server acknowledges records. The callback receives - * an IngestRecordResponse containing information about the acknowledged records. + * the durability acknowledgment offset ID (the offset up to which records have been durably + * written). + * + * @param offsetCallback the offset callback function that receives the offset ID + * @return this builder for method chaining + */ + public StreamConfigurationOptionsBuilder setOffsetCallback( + @Nonnull LongConsumer offsetCallback) { + this.offsetCallback = Optional.of(offsetCallback); + return this; + } + + /** + * Sets the acknowledgment callback function. + * + *

    This callback receives the full {@link IngestRecordResponse} object when records are + * acknowledged. Internally, this wraps the callback to work with the offset-based system. * - * @param ackCallback the acknowledgment callback function + * @param ackCallback the acknowledgment callback function that receives the full response * @return this builder for method chaining + * @deprecated Since 0.2.0. Use {@link #setOffsetCallback(LongConsumer)} instead which provides + * just the offset ID. This method will be removed in a future release. */ + @Deprecated public StreamConfigurationOptionsBuilder setAckCallback( - Consumer ackCallback) { - this.ackCallback = Optional.ofNullable(ackCallback); + @Nonnull Consumer ackCallback) { + this.offsetCallback = + Optional.of( + offset -> + ackCallback.accept( + IngestRecordResponse.newBuilder() + .setDurabilityAckUpToOffset(offset) + .build())); return this; } @@ -316,14 +465,16 @@ public StreamConfigurationOptionsBuilder setAckCallback( */ public StreamConfigurationOptions build() { return new StreamConfigurationOptions( - this.maxInflightRecords, + this.maxInflightRequests, this.recovery, this.recoveryTimeoutMs, this.recoveryBackoffMs, this.recoveryRetries, this.flushTimeoutMs, this.serverLackOfAckTimeoutMs, - this.ackCallback); + this.maxMessageSizeBytes, + this.streamPausedMaxWaitTimeMs, + this.offsetCallback); } } } diff --git a/src/main/java/com/databricks/zerobus/StreamState.java b/sdk/src/main/java/com/databricks/zerobus/StreamState.java similarity index 52% rename from src/main/java/com/databricks/zerobus/StreamState.java rename to sdk/src/main/java/com/databricks/zerobus/StreamState.java index 8cf5bd6..12bdd00 100644 --- a/src/main/java/com/databricks/zerobus/StreamState.java +++ b/sdk/src/main/java/com/databricks/zerobus/StreamState.java @@ -8,7 +8,9 @@ *

      * UNINITIALIZED → OPENED → FLUSHING → CLOSED
      *                    ↓
    - *                RECOVERING (on failure, if recovery enabled)
    + *                 PAUSED (on server close signal, waiting for acks)
    + *                    ↓
    + *                RECOVERING (on failure or after pause timeout)
      *                    ↓
      *                OPENED or FAILED
      * 
    @@ -23,6 +25,22 @@ public enum StreamState { /** Stream is flushing pending records before closing */ FLUSHING, + /** + * Stream is paused due to server close signal. + * + *

    During this state: + * + *

      + *
    • The sender task is paused (stops sending batches to server) + *
    • Ingestion can continue - records queue in the landing zone + *
    • The receiver continues processing acks for in-flight records + *
    • Recovery triggers after watermark batches are acked or timeout + *
    + * + *

    Records ingested during PAUSED state will be sent after recovery completes. + */ + PAUSED, + /** Stream is recovering from a failure (automatic retry in progress) */ RECOVERING, diff --git a/sdk/src/main/java/com/databricks/zerobus/TableProperties.java b/sdk/src/main/java/com/databricks/zerobus/TableProperties.java new file mode 100644 index 0000000..2d16abd --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/TableProperties.java @@ -0,0 +1,54 @@ +package com.databricks.zerobus; + +import com.databricks.zerobus.schema.ProtoTableProperties; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; + +/** + * Table properties for the stream, describes the table to ingest records into. + * + * @param The type of records to be ingested (must extend Message). + * @deprecated Since 0.2.0. Use {@link ProtoTableProperties} or the new builder API via {@link + * ZerobusSdk#streamBuilder(String)} instead. This class will be removed in a future release. + */ +@Deprecated +public class TableProperties extends ProtoTableProperties { + + private final RecordType defaultInstance; + + /** + * Creates a new TableProperties instance. + * + * @param tableName The name of the table to ingest records into. + * @param defaultInstance The default instance of the record type (used to get the descriptor). + * @deprecated Since 0.2.0. Use {@link ProtoTableProperties#fromCompiled(String, Message)} or the + * new builder API instead. + */ + @Deprecated + public TableProperties(String tableName, RecordType defaultInstance) { + super(tableName, defaultInstance.getDescriptorForType().toProto()); + this.defaultInstance = defaultInstance; + } + + /** + * Gets the default instance of the record type. + * + * @return the default instance + * @deprecated Since 0.2.0. This method is no longer needed with the new API. + */ + @Deprecated + public RecordType getDefaultInstance() { + return defaultInstance; + } + + /** + * Gets the descriptor for the record type. + * + * @return the descriptor + * @deprecated Since 0.2.0. Use {@link ProtoTableProperties#getDescriptorProto()} instead. + */ + @Deprecated + Descriptors.Descriptor getDescriptor() { + return defaultInstance.getDescriptorForType(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/ZerobusException.java b/sdk/src/main/java/com/databricks/zerobus/ZerobusException.java new file mode 100644 index 0000000..08b7f1a --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/ZerobusException.java @@ -0,0 +1,51 @@ +package com.databricks.zerobus; + +/** + * Base exception class for all Zerobus SDK errors. + * + *

    This is an unchecked exception (extends {@link RuntimeException}). Callers can catch this + * exception or let it propagate up the call stack. + * + *

    The SDK throws two types of exceptions: + * + *

      + *
    • {@link ZerobusException} - Retriable errors (network issues, temporary server errors) + *
    • {@link NonRetriableException} - Non-retriable errors (invalid credentials, missing table) + *
    + * + *

    Example usage: + * + *

    {@code
    + * try {
    + *     stream.ingest(record);
    + * } catch (NonRetriableException e) {
    + *     // Fatal error - do not retry
    + *     logger.error("Non-retriable error", e);
    + *     throw e;
    + * } catch (ZerobusException e) {
    + *     // Retriable error - can retry with backoff
    + *     logger.warn("Retriable error, will retry", e);
    + * }
    + * }
    + */ +public class ZerobusException extends RuntimeException { + + /** + * Constructs a new ZerobusException with the specified detail message. + * + * @param message the detail message + */ + public ZerobusException(String message) { + super(message); + } + + /** + * Constructs a new ZerobusException with the specified detail message and cause. + * + * @param message the detail message + * @param cause the cause of the exception + */ + public ZerobusException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/ZerobusSdk.java b/sdk/src/main/java/com/databricks/zerobus/ZerobusSdk.java new file mode 100644 index 0000000..ab35bdd --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/ZerobusSdk.java @@ -0,0 +1,383 @@ +package com.databricks.zerobus; + +import com.databricks.zerobus.stream.BaseZerobusStream; +import com.databricks.zerobus.stream.GrpcErrorHandling; +import com.databricks.zerobus.stream.ZerobusStream; +import com.google.protobuf.Message; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import java.util.Iterator; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nonnull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The main entry point for the Zerobus SDK. + * + *

    This class provides methods to create and recreate streams for ingesting records into + * Databricks tables. + * + *

    Example usage with builder: + * + *

    {@code
    + * ZerobusSdk sdk = ZerobusSdk.builder("server-endpoint.databricks.com",
    + *                                     "https://workspace.databricks.com")
    + *     .executor(customExecutor)
    + *     .build();
    + *
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options
    + * ).join();
    + *
    + * // When done with the SDK
    + * sdk.close();
    + * }
    + * + *

    Or using the constructor for simple cases: + * + *

    {@code
    + * ZerobusSdk sdk = new ZerobusSdk(
    + *     "server-endpoint.databricks.com",
    + *     "https://workspace.databricks.com"
    + * );
    + * }
    + */ +public class ZerobusSdk implements AutoCloseable { + private static final Logger logger = LoggerFactory.getLogger(ZerobusSdk.class); + private static final StreamConfigurationOptions DEFAULT_OPTIONS = + StreamConfigurationOptions.getDefault(); + + /** The current version of the Zerobus SDK. */ + public static final String VERSION = "0.2.0"; + + private final String serverEndpoint; + private final String unityCatalogEndpoint; + private final String workspaceId; + private final ExecutorService executor; + private ZerobusSdkStubFactory stubFactory; + + /** + * Creates a new ZerobusSdk instance with default settings. + * + *

    Uses a cached thread pool that creates threads as needed and reuses idle threads. + * + * @param serverEndpoint The gRPC endpoint URL for the Zerobus service + * @param unityCatalogEndpoint The Unity Catalog endpoint URL + */ + public ZerobusSdk(@Nonnull String serverEndpoint, @Nonnull String unityCatalogEndpoint) { + this( + Objects.requireNonNull(serverEndpoint, "serverEndpoint cannot be null"), + Objects.requireNonNull(unityCatalogEndpoint, "unityCatalogEndpoint cannot be null"), + createDefaultExecutor(), + new ZerobusSdkStubFactory()); + } + + /** + * Creates a new ZerobusSdk instance with custom configuration. + * + *

    This constructor is package-private and intended for use by {@link ZerobusSdkBuilder}. + * + * @param serverEndpoint The gRPC endpoint URL for the Zerobus service + * @param unityCatalogEndpoint The Unity Catalog endpoint URL + * @param executor Custom executor service + * @param stubFactory Custom stub factory + */ + ZerobusSdk( + @Nonnull String serverEndpoint, + @Nonnull String unityCatalogEndpoint, + @Nonnull ExecutorService executor, + @Nonnull ZerobusSdkStubFactory stubFactory) { + this.serverEndpoint = serverEndpoint; + this.unityCatalogEndpoint = unityCatalogEndpoint; + this.executor = executor; + this.stubFactory = stubFactory; + this.workspaceId = extractWorkspaceId(serverEndpoint); + } + + /** + * Creates a new builder for configuring a ZerobusSdk instance. + * + * @param serverEndpoint The gRPC endpoint URL for the Zerobus service + * @param unityCatalogEndpoint The Unity Catalog endpoint URL + * @return A new ZerobusSdkBuilder instance + * @see ZerobusSdkBuilder + */ + @Nonnull + public static ZerobusSdkBuilder builder( + @Nonnull String serverEndpoint, @Nonnull String unityCatalogEndpoint) { + return new ZerobusSdkBuilder(serverEndpoint, unityCatalogEndpoint); + } + + private static String extractWorkspaceId(String endpoint) { + String clean = endpoint; + if (clean.startsWith("https://")) clean = clean.substring(8); + else if (clean.startsWith("http://")) clean = clean.substring(7); + int dot = clean.indexOf('.'); + return dot > 0 ? clean.substring(0, dot) : clean; + } + + /** Creates the default executor service. Package-private for use by {@link ZerobusSdkBuilder}. */ + static ExecutorService createDefaultExecutor() { + ThreadFactory factory = + new ThreadFactory() { + private final AtomicInteger counter = new AtomicInteger(0); + + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setDaemon(true); + t.setName("ZerobusSdk-worker-" + counter.getAndIncrement()); + return t; + } + }; + return Executors.newCachedThreadPool(factory); + } + + /** + * Creates a stream with the specified options. + * + * @param tableProperties Configuration for the target table + * @param clientId OAuth client ID + * @param clientSecret OAuth client secret + * @param options Stream configuration options + * @return CompletableFuture that completes with the stream + * @deprecated Since 0.2.0. Use {@link #streamBuilder(String)} instead. This method will be + * removed in a future release. + */ + @Deprecated + @Nonnull + public CompletableFuture> createStream( + @Nonnull TableProperties tableProperties, + @Nonnull String clientId, + @Nonnull String clientSecret, + @Nonnull StreamConfigurationOptions options) { + Objects.requireNonNull(tableProperties, "tableProperties cannot be null"); + Objects.requireNonNull(clientId, "clientId cannot be null"); + Objects.requireNonNull(clientSecret, "clientSecret cannot be null"); + Objects.requireNonNull(options, "options cannot be null"); + + logger.debug("Creating stream for table: {}", tableProperties.getTableName()); + + return streamBuilder(tableProperties.getTableName()) + .clientCredentials(clientId, clientSecret) + .options(options) + .legacyProto(tableProperties) + .build() + .exceptionally( + e -> { + throw new RuntimeException(wrapException(e)); + }); + } + + /** + * Creates a stream with default options. + * + * @param tableProperties Configuration for the target table + * @param clientId OAuth client ID + * @param clientSecret OAuth client secret + * @return CompletableFuture that completes with the stream + * @deprecated Since 0.2.0. Use {@link #streamBuilder(String)} instead. This method will be + * removed in a future release. + */ + @Deprecated + @Nonnull + public CompletableFuture> createStream( + @Nonnull TableProperties tableProperties, + @Nonnull String clientId, + @Nonnull String clientSecret) { + Objects.requireNonNull(tableProperties, "tableProperties cannot be null"); + Objects.requireNonNull(clientId, "clientId cannot be null"); + Objects.requireNonNull(clientSecret, "clientSecret cannot be null"); + return createStream( + tableProperties, clientId, clientSecret, StreamConfigurationOptions.getDefault()); + } + + // ==================== Builder API ==================== + + /** + * Creates a builder for configuring and creating a stream. + * + *

    This is the preferred way to create streams. Use the builder to specify the schema type and + * configure stream options. + * + *

    Example usage: + * + *

    {@code
    +   * // Compiled proto stream.
    +   * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    +   *     .clientCredentials(clientId, clientSecret)
    +   *     .compiledProto(MyRecord.getDefaultInstance())
    +   *     .maxInflightRequests(10000)
    +   *     .build()
    +   *     .join();
    +   *
    +   * // JSON stream.
    +   * JsonZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    +   *     .clientCredentials(clientId, clientSecret)
    +   *     .json()
    +   *     .build()
    +   *     .join();
    +   * }
    + * + * @param tableName The fully qualified table name (catalog.schema.table) + * @return A new ZerobusStreamBuilder instance + */ + @Nonnull + public ZerobusStreamBuilder streamBuilder(@Nonnull String tableName) { + Objects.requireNonNull(tableName, "tableName cannot be null"); + return new ZerobusStreamBuilder( + stubFactory, executor, workspaceId, serverEndpoint, unityCatalogEndpoint, tableName); + } + + /** + * Recreates a stream from a failed stream. + * + *

    Creates a new stream with the same configuration and re-ingests any unacknowledged records + * from the failed stream. This method works with all stream types ({@link + * com.databricks.zerobus.stream.ProtoZerobusStream}, {@link + * com.databricks.zerobus.stream.JsonZerobusStream}, etc.). + * + *

    Example usage: + * + *

    {@code
    +   * try {
    +   *     stream.ingest(record);
    +   * } catch (ZerobusException e) {
    +   *     // Stream failed, recreate it
    +   *     ProtoZerobusStream newStream = sdk.recreateStream(stream).join();
    +   *     // Continue using newStream
    +   * }
    +   * }
    + * + * @param The stream type + * @param failedStream The stream to recreate + * @return CompletableFuture that completes with a new stream of the same type + */ + @Nonnull + @SuppressWarnings("unchecked") + public > CompletableFuture recreateStream( + @Nonnull S failedStream) { + Objects.requireNonNull(failedStream, "failedStream cannot be null"); + return (CompletableFuture) failedStream.recreate(this); + } + + /** + * Recreates a legacy stream from a failed stream. + * + *

    Uses the same configuration and re-ingests unacknowledged records. + * + * @param failedStream The stream to recreate + * @return CompletableFuture that completes with the new stream + * @deprecated Since 0.2.0. Use {@link #recreateStream(BaseZerobusStream)} instead. This method + * will be removed in a future release. + */ + @Deprecated + @Nonnull + public CompletableFuture> recreateStream( + @Nonnull ZerobusStream failedStream) { + Objects.requireNonNull(failedStream, "failedStream cannot be null"); + + CompletableFuture> result = new CompletableFuture<>(); + + TableProperties tableProperties = failedStream.getLegacyTableProperties(); + + streamBuilder(tableProperties.getTableName()) + .clientCredentials(failedStream.getClientId(), failedStream.getClientSecret()) + .options(failedStream.getOptions()) + .headersProvider(failedStream.getHeadersProvider()) + .tlsConfig(failedStream.getTlsConfig()) + .legacyProto(tableProperties) + .build() + .whenComplete( + (stream, error) -> { + if (error != null) { + result.completeExceptionally(error); + return; + } + try { + Iterator unacked = failedStream.getUnackedRecords(); + while (unacked.hasNext()) { + stream.ingestRecord(unacked.next()); + } + result.complete(stream); + } catch (ZerobusException e) { + result.completeExceptionally(e); + } + }); + return result; + } + + /** + * Closes the SDK and releases resources. + * + *

    This method performs a graceful shutdown: + * + *

      + *
    1. Shuts down the gRPC channel + *
    2. Stops accepting new tasks on the executor + *
    3. Waits up to 5 seconds for in-flight tasks to complete + *
    4. Forces shutdown if tasks don't complete in time + *
    + * + *

    After calling this method, the SDK instance cannot be reused. Create a new instance if + * needed. + * + *

    Note: If using daemon threads (the default), resources will also be cleaned up automatically + * on JVM shutdown, so calling close() is optional but recommended for explicit resource + * management. + */ + @Override + public void close() { + logger.debug("Closing ZerobusSdk"); + stubFactory.shutdown(); + executor.shutdown(); + try { + if (!executor.awaitTermination(5, java.util.concurrent.TimeUnit.SECONDS)) { + logger.warn("Executor did not terminate gracefully, forcing shutdown"); + executor.shutdownNow(); + if (!executor.awaitTermination(2, java.util.concurrent.TimeUnit.SECONDS)) { + logger.error("Executor did not terminate after forced shutdown"); + } + } + } catch (InterruptedException e) { + logger.warn("Interrupted while waiting for executor shutdown"); + executor.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + + /** + * Returns the current version of the Zerobus SDK. + * + *

    This can be used for logging, debugging, or verifying SDK compatibility: + * + *

    {@code
    +   * System.out.println("Using Zerobus SDK version: " + ZerobusSdk.getVersion());
    +   * }
    + * + * @return The SDK version string (e.g., "0.2.0") + */ + public static String getVersion() { + return VERSION; + } + + private Throwable wrapException(Throwable e) { + if (e instanceof ZerobusException) return e; + if (e instanceof StatusRuntimeException) { + Status.Code code = ((StatusRuntimeException) e).getStatus().getCode(); + if (GrpcErrorHandling.isNonRetriable(code)) { + return new NonRetriableException("Non-retriable gRPC error: " + e.getMessage(), e); + } + } + return new ZerobusException("Failed to create stream: " + e.getMessage(), e); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/ZerobusSdkBuilder.java b/sdk/src/main/java/com/databricks/zerobus/ZerobusSdkBuilder.java new file mode 100644 index 0000000..66ac650 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/ZerobusSdkBuilder.java @@ -0,0 +1,84 @@ +package com.databricks.zerobus; + +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import javax.annotation.Nonnull; + +/** + * Builder for creating {@link ZerobusSdk} instances with custom configuration. + * + *

    Example usage: + * + *

    {@code
    + * ZerobusSdk sdk = ZerobusSdk.builder(endpoint, ucEndpoint)
    + *     .executor(myExecutor)
    + *     .build();
    + * }
    + * + * @see ZerobusSdk#builder(String, String) + */ +public final class ZerobusSdkBuilder { + private final String serverEndpoint; + private final String unityCatalogEndpoint; + private Optional executor = Optional.empty(); + private Optional stubFactory = Optional.empty(); + + /** + * Creates a new ZerobusSdkBuilder. + * + *

    Use {@link ZerobusSdk#builder(String, String)} instead of calling this constructor directly. + * + * @param serverEndpoint The gRPC endpoint URL for the Zerobus service + * @param unityCatalogEndpoint The Unity Catalog endpoint URL + */ + ZerobusSdkBuilder(@Nonnull String serverEndpoint, @Nonnull String unityCatalogEndpoint) { + this.serverEndpoint = Objects.requireNonNull(serverEndpoint, "serverEndpoint cannot be null"); + this.unityCatalogEndpoint = + Objects.requireNonNull(unityCatalogEndpoint, "unityCatalogEndpoint cannot be null"); + } + + /** + * Sets a custom executor service for the SDK. + * + *

    If not set, the SDK will create a cached thread pool that automatically scales based on + * demand. When providing a custom executor, the caller is responsible for shutting it down. + * + * @param executor The executor service to use + * @return This builder for method chaining + */ + @Nonnull + public ZerobusSdkBuilder executor(@Nonnull ExecutorService executor) { + this.executor = Optional.of(Objects.requireNonNull(executor, "executor cannot be null")); + return this; + } + + /** + * Sets a custom stub factory for the SDK. + * + *

    This is primarily used for testing. + * + * @param stubFactory The stub factory to use + * @return This builder for method chaining + */ + @Nonnull + ZerobusSdkBuilder stubFactory(@Nonnull ZerobusSdkStubFactory stubFactory) { + this.stubFactory = + Optional.of(Objects.requireNonNull(stubFactory, "stubFactory cannot be null")); + return this; + } + + /** + * Builds the ZerobusSdk instance. + * + * @return A new ZerobusSdk instance + */ + @Nonnull + public ZerobusSdk build() { + return new ZerobusSdk( + serverEndpoint, + unityCatalogEndpoint, + executor.orElseGet(ZerobusSdk::createDefaultExecutor), + stubFactory.orElseGet(ZerobusSdkStubFactory::new)); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/ZerobusSdkStubFactory.java b/sdk/src/main/java/com/databricks/zerobus/ZerobusSdkStubFactory.java new file mode 100644 index 0000000..6cccdcb --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/ZerobusSdkStubFactory.java @@ -0,0 +1,228 @@ +package com.databricks.zerobus; + +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.tls.TlsConfig; +import io.grpc.CallOptions; +import io.grpc.Channel; +import io.grpc.ChannelCredentials; +import io.grpc.ClientCall; +import io.grpc.ClientInterceptor; +import io.grpc.ClientInterceptors; +import io.grpc.ForwardingClientCall.SimpleForwardingClientCall; +import io.grpc.Grpc; +import io.grpc.ManagedChannel; +import io.grpc.Metadata; +import io.grpc.MethodDescriptor; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Factory for creating Zerobus gRPC stubs with proper configuration. + * + *

    This factory handles the creation of gRPC channels and stubs with appropriate settings for + * long-lived streaming connections. The channel is cached and reused across all stubs created by + * this factory instance, avoiding the overhead of creating new connections for each stream. + */ +class ZerobusSdkStubFactory { + + // gRPC channel configuration constants + private static final int DEFAULT_TLS_PORT = 443; + private static final long KEEP_ALIVE_TIME_SECONDS = 30; + private static final long KEEP_ALIVE_TIMEOUT_SECONDS = 10; + + // Protocol prefixes + private static final String HTTPS_PREFIX = "https://"; + private static final String HTTP_PREFIX = "http://"; + + // Cached channel - initialized on first use and reused for all subsequent stubs. + // Multiple stubs can share the same channel, avoiding the overhead of creating + // a new channel (with its own connection pool and threads) per stream. + private final AtomicReference cachedChannel = new AtomicReference<>(null); + + /** + * Gets or creates the cached gRPC channel. + * + *

    The channel is configured for long-lived streaming with appropriate keep-alive settings and + * unlimited message size limits. Uses double-checked locking for thread-safe lazy initialization. + * + * @param endpoint The endpoint URL (may include https:// prefix) + * @param tlsConfig The TLS configuration for secure connection + * @return A configured ManagedChannel (cached) + */ + ManagedChannel getOrCreateChannel(String endpoint, TlsConfig tlsConfig) { + ManagedChannel channel = cachedChannel.get(); + if (channel != null) { + return channel; + } + + synchronized (this) { + channel = cachedChannel.get(); + if (channel != null) { + return channel; + } + + channel = createChannel(endpoint, tlsConfig); + cachedChannel.set(channel); + return channel; + } + } + + /** + * Creates a new Zerobus gRPC stub. + * + *

    The stub is configured with an interceptor that obtains headers for each request using the + * provided headers provider. The underlying channel is cached and reused across all stubs. + * + * @param endpoint The endpoint URL + * @param headersProvider Provider that supplies headers for each request + * @param tlsConfig The TLS configuration for secure connection + * @param maxMessageSizeBytes Maximum outbound message size in bytes + * @param tableName The expected table name for header validation + * @return A configured ZerobusStub + */ + ZerobusGrpc.ZerobusStub createStub( + String endpoint, + HeadersProvider headersProvider, + TlsConfig tlsConfig, + int maxMessageSizeBytes, + String tableName) { + ManagedChannel channel = getOrCreateChannel(endpoint, tlsConfig); + ClientInterceptor authInterceptor = new HeadersProviderInterceptor(headersProvider, tableName); + Channel interceptedChannel = ClientInterceptors.intercept(channel, authInterceptor); + return ZerobusGrpc.newStub(interceptedChannel) + .withMaxInboundMessageSize(Integer.MAX_VALUE) + .withMaxOutboundMessageSize(maxMessageSizeBytes); + } + + /** + * Shuts down the cached channel if it exists. + * + *

    This should be called when the SDK is being closed to clean up resources. + */ + void shutdown() { + ManagedChannel channel = cachedChannel.getAndSet(null); + if (channel != null) { + channel.shutdown(); + } + } + + /** + * Creates a new gRPC channel (internal, not cached). + * + * @param endpoint The endpoint URL + * @param tlsConfig The TLS configuration + * @return A new ManagedChannel + */ + private ManagedChannel createChannel(String endpoint, TlsConfig tlsConfig) { + EndpointInfo endpointInfo = parseEndpoint(endpoint); + ChannelCredentials credentials = tlsConfig.toChannelCredentials(); + + return Grpc.newChannelBuilder(endpointInfo.host + ":" + endpointInfo.port, credentials) + .keepAliveTime(KEEP_ALIVE_TIME_SECONDS, TimeUnit.SECONDS) + .keepAliveTimeout(KEEP_ALIVE_TIMEOUT_SECONDS, TimeUnit.SECONDS) + .keepAliveWithoutCalls(true) + .maxInboundMessageSize(Integer.MAX_VALUE) + .build(); + } + + /** Container for parsed endpoint information. */ + private static class EndpointInfo { + final String host; + final int port; + + EndpointInfo(String host, int port) { + this.host = host; + this.port = port; + } + } + + /** + * Parses an endpoint string to extract host and port information. + * + * @param endpoint The endpoint string (may include https:// or http:// prefix) + * @return Parsed endpoint information + */ + private EndpointInfo parseEndpoint(String endpoint) { + String cleanEndpoint = endpoint; + if (cleanEndpoint.startsWith(HTTPS_PREFIX)) { + cleanEndpoint = cleanEndpoint.substring(HTTPS_PREFIX.length()); + } else if (cleanEndpoint.startsWith(HTTP_PREFIX)) { + cleanEndpoint = cleanEndpoint.substring(HTTP_PREFIX.length()); + } + + String[] parts = cleanEndpoint.split(":", 2); + String host = parts[0]; + int port = parts.length > 1 ? Integer.parseInt(parts[1]) : DEFAULT_TLS_PORT; + + return new EndpointInfo(host, port); + } + + static final String TABLE_NAME_HEADER = "x-databricks-zerobus-table-name"; + + /** + * Validates that the headers contain the expected table name. + * + * @param headers The headers from the provider + * @param expectedTableName The expected table name + * @throws NonRetriableException if validation fails + */ + static void validateTableNameHeader(Map headers, String expectedTableName) + throws NonRetriableException { + String headerTableName = headers.get(TABLE_NAME_HEADER); + if (headerTableName == null) { + throw new NonRetriableException( + "Headers provider must include '" + TABLE_NAME_HEADER + "' header"); + } + if (!headerTableName.equals(expectedTableName)) { + throw new NonRetriableException( + "Table name mismatch: headers provider returned '" + + headerTableName + + "' but stream is configured for '" + + expectedTableName + + "'"); + } + } + + /** + * gRPC client interceptor that adds headers from a HeadersProvider to requests. + * + *

    This interceptor obtains headers from the provided HeadersProvider and attaches them to all + * outgoing requests. It also validates that the table name header matches the expected value. + */ + private static class HeadersProviderInterceptor implements ClientInterceptor { + + private final HeadersProvider headersProvider; + private final String expectedTableName; + + HeadersProviderInterceptor(HeadersProvider headersProvider, String expectedTableName) { + this.headersProvider = headersProvider; + this.expectedTableName = expectedTableName; + } + + @Override + public ClientCall interceptCall( + MethodDescriptor method, CallOptions callOptions, Channel next) { + return new SimpleForwardingClientCall(next.newCall(method, callOptions)) { + @Override + public void start(Listener responseListener, Metadata headers) { + try { + Map providerHeaders = headersProvider.getHeaders(); + + validateTableNameHeader(providerHeaders, expectedTableName); + + for (Map.Entry entry : providerHeaders.entrySet()) { + Metadata.Key key = + Metadata.Key.of(entry.getKey(), Metadata.ASCII_STRING_MARSHALLER); + headers.put(key, entry.getValue()); + } + + super.start(responseListener, headers); + } catch (NonRetriableException e) { + throw new RuntimeException("Failed to get headers from provider", e); + } + } + }; + } + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/ZerobusStreamBuilder.java b/sdk/src/main/java/com/databricks/zerobus/ZerobusStreamBuilder.java new file mode 100644 index 0000000..0f7fb13 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/ZerobusStreamBuilder.java @@ -0,0 +1,898 @@ +package com.databricks.zerobus; + +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.auth.OAuthHeadersProvider; +import com.databricks.zerobus.schema.JsonTableProperties; +import com.databricks.zerobus.schema.ProtoTableProperties; +import com.databricks.zerobus.stream.JsonZerobusStream; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import com.databricks.zerobus.stream.ZerobusStream; +import com.databricks.zerobus.tls.SecureTlsConfig; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.Descriptors; +import com.google.protobuf.DynamicMessage; +import com.google.protobuf.Message; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.LongConsumer; +import java.util.function.Supplier; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Builder for creating Zerobus streams with a fluent API. + * + *

    This builder uses a step builder pattern that enforces authentication configuration at compile + * time. You must call either {@link #clientCredentials} (for OAuth) or {@link #unauthenticated} + * (for custom auth) before you can access schema selection methods. + * + *

    The builder flow: + * + *

      + *
    1. {@code sdk.streamBuilder(tableName)} - returns {@code ZerobusStreamBuilder} + *
    2. {@code .clientCredentials(...)} or {@code .unauthenticated()} - returns a configurable + * builder + *
    3. Optional: configure stream options ({@code .maxInflightRequests()}, {@code + * .headersProvider()}, etc.) + *
    4. {@code .compiledProto(...)} or {@code .json()} - returns typed stream builder + *
    5. {@code .build()} - creates the stream + *
    + * + *

    Example usage: + * + *

    {@code
    + * // OAuth authentication (most common)
    + * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .maxInflightRequests(10000)
    + *     .compiledProto(MyRecord.getDefaultInstance())
    + *     .build()
    + *     .join();
    + *
    + * // Custom authentication with headers provider
    + * JsonZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .unauthenticated()
    + *     .headersProvider(customProvider)
    + *     .json()
    + *     .build()
    + *     .join();
    + * }
    + * + * @see ZerobusSdk#streamBuilder(String) + */ +public class ZerobusStreamBuilder { + + private static final Logger logger = LoggerFactory.getLogger(ZerobusStreamBuilder.class); + + private final ZerobusSdkStubFactory stubFactory; + private final ExecutorService executor; + private final String workspaceId; + private final String serverEndpoint; + private final String unityCatalogEndpoint; + private final String tableName; + + /** + * Creates a new ZerobusStreamBuilder. + * + *

    Use {@link ZerobusSdk#streamBuilder(String)} instead of calling this constructor directly. + */ + ZerobusStreamBuilder( + @Nonnull ZerobusSdkStubFactory stubFactory, + @Nonnull ExecutorService executor, + @Nonnull String workspaceId, + @Nonnull String serverEndpoint, + @Nonnull String unityCatalogEndpoint, + @Nonnull String tableName) { + this.stubFactory = Objects.requireNonNull(stubFactory, "stubFactory cannot be null"); + this.executor = Objects.requireNonNull(executor, "executor cannot be null"); + this.workspaceId = Objects.requireNonNull(workspaceId, "workspaceId cannot be null"); + this.serverEndpoint = Objects.requireNonNull(serverEndpoint, "serverEndpoint cannot be null"); + this.unityCatalogEndpoint = + Objects.requireNonNull(unityCatalogEndpoint, "unityCatalogEndpoint cannot be null"); + Objects.requireNonNull(tableName, "tableName cannot be null"); + if (tableName.trim().isEmpty()) { + throw new IllegalArgumentException("tableName cannot be empty"); + } + this.tableName = tableName; + } + + /** + * Sets the OAuth client credentials and returns an authenticated builder. + * + *

    This is the most common authentication method. The SDK will use OAuth 2.0 with Unity Catalog + * to authenticate requests. + * + * @param clientId The OAuth client ID (must not be null or empty) + * @param clientSecret The OAuth client secret (must not be null or empty) + * @return an authenticated builder for configuring and building the stream + * @throws IllegalArgumentException if clientId or clientSecret is null or empty + */ + @Nonnull + public AuthenticatedZerobusStreamBuilder clientCredentials( + @Nonnull String clientId, @Nonnull String clientSecret) { + Objects.requireNonNull(clientId, "clientId cannot be null"); + Objects.requireNonNull(clientSecret, "clientSecret cannot be null"); + if (clientId.trim().isEmpty()) { + throw new IllegalArgumentException("clientId cannot be empty"); + } + if (clientSecret.trim().isEmpty()) { + throw new IllegalArgumentException("clientSecret cannot be empty"); + } + return new AuthenticatedZerobusStreamBuilder( + stubFactory, + executor, + workspaceId, + serverEndpoint, + unityCatalogEndpoint, + tableName, + clientId, + clientSecret); + } + + /** + * Returns an unauthenticated builder for custom authentication. + * + *

    Use this when you want to provide your own authentication mechanism via {@link + * UnauthenticatedZerobusStreamBuilder#headersProvider}. If no headers provider is set, streams + * will be created without authentication headers. + * + * @return an unauthenticated builder for configuring and building the stream + */ + @Nonnull + public UnauthenticatedZerobusStreamBuilder unauthenticated() { + return new UnauthenticatedZerobusStreamBuilder( + stubFactory, executor, workspaceId, serverEndpoint, unityCatalogEndpoint, tableName); + } + + // ==================== Base Configurable Builder ==================== + + /** + * Abstract base class for stream builders that provides common configuration methods. + * + * @param The concrete builder type for method chaining + */ + abstract static class BaseConfigurableZerobusStreamBuilder< + SELF extends BaseConfigurableZerobusStreamBuilder> { + protected final ZerobusSdkStubFactory stubFactory; + protected final ExecutorService executor; + protected final String workspaceId; + protected final String serverEndpoint; + protected final String unityCatalogEndpoint; + protected final String tableName; + protected Optional headersProvider = Optional.empty(); + protected Optional tlsConfig = Optional.empty(); + protected StreamConfigurationOptions.StreamConfigurationOptionsBuilder optionsBuilder = + StreamConfigurationOptions.builder(); + + BaseConfigurableZerobusStreamBuilder( + ZerobusSdkStubFactory stubFactory, + ExecutorService executor, + String workspaceId, + String serverEndpoint, + String unityCatalogEndpoint, + String tableName) { + this.stubFactory = stubFactory; + this.executor = executor; + this.workspaceId = workspaceId; + this.serverEndpoint = serverEndpoint; + this.unityCatalogEndpoint = unityCatalogEndpoint; + this.tableName = tableName; + } + + @SuppressWarnings("unchecked") + protected SELF self() { + return (SELF) this; + } + + /** Returns the client ID for OAuth, or null if not using OAuth. */ + @Nullable protected abstract String getClientId(); + + /** Returns the client secret for OAuth, or null if not using OAuth. */ + @Nullable protected abstract String getClientSecret(); + + /** + * Sets a custom headers provider. + * + *

    Headers providers can add custom headers to all gRPC requests. This can be used for custom + * authentication, tracing, or other purposes. + * + *

    For OAuth authentication, use {@link ZerobusStreamBuilder#clientCredentials} instead - it + * automatically sets up an OAuth headers provider. + * + * @param headersProvider The custom headers provider + * @return this builder for method chaining + */ + @Nonnull + public SELF headersProvider(@Nonnull HeadersProvider headersProvider) { + this.headersProvider = + Optional.of(Objects.requireNonNull(headersProvider, "headersProvider cannot be null")); + return self(); + } + + /** + * Sets a custom TLS configuration. + * + *

    If not set, the default secure TLS configuration will be used. + * + * @param tlsConfig The TLS configuration + * @return this builder for method chaining + */ + @Nonnull + public SELF tlsConfig(@Nonnull TlsConfig tlsConfig) { + this.tlsConfig = Optional.of(Objects.requireNonNull(tlsConfig, "tlsConfig cannot be null")); + return self(); + } + + /** + * Sets the maximum number of requests that can be in flight. + * + * @param maxInflightRequests the maximum number of in-flight requests (must be positive) + * @return this builder for method chaining + * @throws IllegalArgumentException if maxInflightRequests is not positive + */ + @Nonnull + public SELF maxInflightRequests(int maxInflightRequests) { + if (maxInflightRequests <= 0) { + throw new IllegalArgumentException( + "maxInflightRequests must be positive, got: " + maxInflightRequests); + } + this.optionsBuilder.setMaxInflightRequests(maxInflightRequests); + return self(); + } + + /** + * Sets whether automatic recovery is enabled. + * + * @param recovery true to enable automatic recovery, false to disable + * @return this builder for method chaining + */ + @Nonnull + public SELF recovery(boolean recovery) { + this.optionsBuilder.setRecovery(recovery); + return self(); + } + + /** + * Sets the timeout for recovery operations in milliseconds. + * + * @param recoveryTimeoutMs the recovery timeout (must be non-negative) + * @return this builder for method chaining + * @throws IllegalArgumentException if recoveryTimeoutMs is negative + */ + @Nonnull + public SELF recoveryTimeoutMs(int recoveryTimeoutMs) { + if (recoveryTimeoutMs < 0) { + throw new IllegalArgumentException( + "recoveryTimeoutMs must be non-negative, got: " + recoveryTimeoutMs); + } + this.optionsBuilder.setRecoveryTimeoutMs(recoveryTimeoutMs); + return self(); + } + + /** + * Sets the backoff delay between recovery attempts in milliseconds. + * + * @param recoveryBackoffMs the backoff delay (must be non-negative) + * @return this builder for method chaining + * @throws IllegalArgumentException if recoveryBackoffMs is negative + */ + @Nonnull + public SELF recoveryBackoffMs(int recoveryBackoffMs) { + if (recoveryBackoffMs < 0) { + throw new IllegalArgumentException( + "recoveryBackoffMs must be non-negative, got: " + recoveryBackoffMs); + } + this.optionsBuilder.setRecoveryBackoffMs(recoveryBackoffMs); + return self(); + } + + /** + * Sets the maximum number of recovery attempts. + * + * @param recoveryRetries the maximum retries (must be non-negative) + * @return this builder for method chaining + * @throws IllegalArgumentException if recoveryRetries is negative + */ + @Nonnull + public SELF recoveryRetries(int recoveryRetries) { + if (recoveryRetries < 0) { + throw new IllegalArgumentException( + "recoveryRetries must be non-negative, got: " + recoveryRetries); + } + this.optionsBuilder.setRecoveryRetries(recoveryRetries); + return self(); + } + + /** + * Sets the timeout for flush operations in milliseconds. + * + * @param flushTimeoutMs the flush timeout (must be non-negative) + * @return this builder for method chaining + * @throws IllegalArgumentException if flushTimeoutMs is negative + */ + @Nonnull + public SELF flushTimeoutMs(int flushTimeoutMs) { + if (flushTimeoutMs < 0) { + throw new IllegalArgumentException( + "flushTimeoutMs must be non-negative, got: " + flushTimeoutMs); + } + this.optionsBuilder.setFlushTimeoutMs(flushTimeoutMs); + return self(); + } + + /** + * Sets the timeout for server acknowledgment in milliseconds. + * + * @param serverLackOfAckTimeoutMs the acknowledgment timeout (must be positive) + * @return this builder for method chaining + * @throws IllegalArgumentException if serverLackOfAckTimeoutMs is not positive + */ + @Nonnull + public SELF serverLackOfAckTimeoutMs(int serverLackOfAckTimeoutMs) { + if (serverLackOfAckTimeoutMs <= 0) { + throw new IllegalArgumentException( + "serverLackOfAckTimeoutMs must be positive, got: " + serverLackOfAckTimeoutMs); + } + this.optionsBuilder.setServerLackOfAckTimeoutMs(serverLackOfAckTimeoutMs); + return self(); + } + + /** + * Sets the maximum message size in bytes for gRPC messages. + * + * @param maxMessageSizeBytes the maximum message size (must be positive) + * @return this builder for method chaining + * @throws IllegalArgumentException if maxMessageSizeBytes is not positive + */ + @Nonnull + public SELF maxMessageSizeBytes(int maxMessageSizeBytes) { + if (maxMessageSizeBytes <= 0) { + throw new IllegalArgumentException( + "maxMessageSizeBytes must be positive, got: " + maxMessageSizeBytes); + } + this.optionsBuilder.setMaxMessageSizeBytes(maxMessageSizeBytes); + return self(); + } + + /** + * Sets the offset acknowledgment callback function. + * + * @param offsetCallback the callback that receives acknowledged offset IDs + * @return this builder for method chaining + */ + @Nonnull + public SELF offsetCallback(@Nullable LongConsumer offsetCallback) { + this.optionsBuilder.setOffsetCallback(offsetCallback); + return self(); + } + + /** + * Sets all options from an existing StreamConfigurationOptions object. + * + *

    This is useful when recreating streams with the same configuration. + * + * @param options The options to copy + * @return this builder for method chaining + */ + @Nonnull + public SELF options(@Nonnull StreamConfigurationOptions options) { + Objects.requireNonNull(options, "options cannot be null"); + this.optionsBuilder = options.toBuilder(); + return self(); + } + + /** + * Configures the stream for compiled protobuf records. + * + * @param defaultInstance The default instance of the protobuf message type + * @param The protobuf message type + * @return a typed builder for completing the stream creation + */ + @Nonnull + public ProtoZerobusStreamBuilder compiledProto( + @Nonnull T defaultInstance) { + Objects.requireNonNull(defaultInstance, "defaultInstance cannot be null"); + ProtoTableProperties tableProperties = + ProtoTableProperties.fromCompiled(tableName, defaultInstance); + return new ProtoZerobusStreamBuilder<>( + stubFactory, + executor, + workspaceId, + serverEndpoint, + unityCatalogEndpoint, + tableProperties, + getClientId(), + getClientSecret(), + optionsBuilder.build(), + headersProvider, + tlsConfig); + } + + /** + * Configures the stream for dynamic protobuf records. + * + * @param descriptor The protobuf descriptor created or loaded at runtime + * @return a typed builder for completing the stream creation + */ + @Nonnull + public ProtoZerobusStreamBuilder dynamicProto( + @Nonnull Descriptors.Descriptor descriptor) { + Objects.requireNonNull(descriptor, "descriptor cannot be null"); + ProtoTableProperties tableProperties = + ProtoTableProperties.fromDynamic(tableName, descriptor); + return new ProtoZerobusStreamBuilder<>( + stubFactory, + executor, + workspaceId, + serverEndpoint, + unityCatalogEndpoint, + tableProperties, + getClientId(), + getClientSecret(), + optionsBuilder.build(), + headersProvider, + tlsConfig); + } + + /** + * Configures the stream for legacy proto records (package-private, for internal SDK use). + * + *

    This method is used by {@link ZerobusSdk#createStream} to maintain backwards + * compatibility. + * + * @param tableProperties The legacy table properties + * @param The protobuf message type + * @return a typed builder for completing the stream creation + * @deprecated Since 0.2.0. Use {@link #compiledProto(Message)} instead. This method will be + * removed in a future release. + */ + @Deprecated + @Nonnull + LegacyProtoZerobusStreamBuilder legacyProto( + @Nonnull TableProperties tableProperties) { + Objects.requireNonNull(tableProperties, "tableProperties cannot be null"); + return new LegacyProtoZerobusStreamBuilder<>( + stubFactory, + executor, + workspaceId, + serverEndpoint, + unityCatalogEndpoint, + tableProperties, + getClientId(), + getClientSecret(), + optionsBuilder.build(), + headersProvider, + tlsConfig); + } + + /** + * Configures the stream for JSON records. + * + * @return a typed builder for completing the stream creation + */ + @Nonnull + public JsonZerobusStreamBuilder json() { + JsonTableProperties tableProperties = new JsonTableProperties(tableName); + return new JsonZerobusStreamBuilder( + stubFactory, + executor, + workspaceId, + serverEndpoint, + unityCatalogEndpoint, + tableProperties, + getClientId(), + getClientSecret(), + optionsBuilder.build(), + headersProvider, + tlsConfig); + } + } + + // ==================== Authenticated Stream Builder ==================== + + /** + * Builder for streams using OAuth authentication. + * + *

    This builder is returned by {@link ZerobusStreamBuilder#clientCredentials} and has OAuth + * credentials configured. You can still override the headers provider if needed. + */ + public static class AuthenticatedZerobusStreamBuilder + extends BaseConfigurableZerobusStreamBuilder { + + private final String clientId; + private final String clientSecret; + + AuthenticatedZerobusStreamBuilder( + ZerobusSdkStubFactory stubFactory, + ExecutorService executor, + String workspaceId, + String serverEndpoint, + String unityCatalogEndpoint, + String tableName, + String clientId, + String clientSecret) { + super(stubFactory, executor, workspaceId, serverEndpoint, unityCatalogEndpoint, tableName); + this.clientId = clientId; + this.clientSecret = clientSecret; + } + + @Override + protected String getClientId() { + return clientId; + } + + @Override + protected String getClientSecret() { + return clientSecret; + } + } + + // ==================== Unauthenticated Stream Builder ==================== + + /** + * Builder for streams without OAuth authentication. + * + *

    This builder is returned by {@link ZerobusStreamBuilder#unauthenticated}. Use {@link + * #headersProvider} to set custom authentication headers, or leave it unset for unauthenticated + * requests. + */ + public static class UnauthenticatedZerobusStreamBuilder + extends BaseConfigurableZerobusStreamBuilder { + + UnauthenticatedZerobusStreamBuilder( + ZerobusSdkStubFactory stubFactory, + ExecutorService executor, + String workspaceId, + String serverEndpoint, + String unityCatalogEndpoint, + String tableName) { + super(stubFactory, executor, workspaceId, serverEndpoint, unityCatalogEndpoint, tableName); + } + + @Override + protected String getClientId() { + return null; + } + + @Override + protected String getClientSecret() { + return null; + } + } + + // ==================== Typed Stream Builders ==================== + + /** + * Builder for proto streams (final step after schema selection). + * + * @param The protobuf message type + */ + public static class ProtoZerobusStreamBuilder { + private static final Logger logger = LoggerFactory.getLogger(ProtoZerobusStreamBuilder.class); + + private final ZerobusSdkStubFactory stubFactory; + private final ExecutorService executor; + private final String workspaceId; + private final String serverEndpoint; + private final String unityCatalogEndpoint; + private final ProtoTableProperties tableProperties; + private final String clientId; + private final String clientSecret; + private final StreamConfigurationOptions options; + private final Optional headersProvider; + private final Optional tlsConfig; + + ProtoZerobusStreamBuilder( + ZerobusSdkStubFactory stubFactory, + ExecutorService executor, + String workspaceId, + String serverEndpoint, + String unityCatalogEndpoint, + ProtoTableProperties tableProperties, + String clientId, + String clientSecret, + StreamConfigurationOptions options, + Optional headersProvider, + Optional tlsConfig) { + this.stubFactory = stubFactory; + this.executor = executor; + this.workspaceId = workspaceId; + this.serverEndpoint = serverEndpoint; + this.unityCatalogEndpoint = unityCatalogEndpoint; + this.tableProperties = tableProperties; + this.clientId = clientId; + this.clientSecret = clientSecret; + this.options = options; + this.headersProvider = headersProvider; + this.tlsConfig = tlsConfig; + } + + /** + * Builds and initializes the proto stream. + * + * @return CompletableFuture that completes with the stream + */ + @Nonnull + public CompletableFuture> build() { + CompletableFuture> result = new CompletableFuture<>(); + + try { + logger.debug("Creating proto stream for table: {}", tableProperties.getTableName()); + + HeadersProvider effectiveHeaders = + headersProvider.orElseGet( + () -> + new OAuthHeadersProvider( + tableProperties.getTableName(), + workspaceId, + unityCatalogEndpoint, + clientId, + clientSecret)); + TlsConfig effectiveTls = tlsConfig.orElseGet(SecureTlsConfig::new); + + String tableName = tableProperties.getTableName(); + Supplier stubSupplier = + () -> + stubFactory.createStub( + serverEndpoint, + effectiveHeaders, + effectiveTls, + options.maxMessageSizeBytes(), + tableName); + + ProtoZerobusStream stream = + new ProtoZerobusStream<>( + stubSupplier, + tableProperties, + clientId, + clientSecret, + effectiveHeaders, + effectiveTls, + options, + executor); + + stream + .initialize() + .whenComplete( + (r, e) -> { + if (e == null) { + result.complete(stream); + } else { + result.completeExceptionally(e); + } + }); + } catch (Throwable e) { + logger.error("Failed to create proto stream", e); + result.completeExceptionally( + e instanceof ZerobusException ? e : new ZerobusException(e.getMessage(), e)); + } + return result; + } + } + + /** Builder for JSON streams (final step after schema selection). */ + public static class JsonZerobusStreamBuilder { + private static final Logger logger = LoggerFactory.getLogger(JsonZerobusStreamBuilder.class); + + private final ZerobusSdkStubFactory stubFactory; + private final ExecutorService executor; + private final String workspaceId; + private final String serverEndpoint; + private final String unityCatalogEndpoint; + private final JsonTableProperties tableProperties; + private final String clientId; + private final String clientSecret; + private final StreamConfigurationOptions options; + private final Optional headersProvider; + private final Optional tlsConfig; + + JsonZerobusStreamBuilder( + ZerobusSdkStubFactory stubFactory, + ExecutorService executor, + String workspaceId, + String serverEndpoint, + String unityCatalogEndpoint, + JsonTableProperties tableProperties, + String clientId, + String clientSecret, + StreamConfigurationOptions options, + Optional headersProvider, + Optional tlsConfig) { + this.stubFactory = stubFactory; + this.executor = executor; + this.workspaceId = workspaceId; + this.serverEndpoint = serverEndpoint; + this.unityCatalogEndpoint = unityCatalogEndpoint; + this.tableProperties = tableProperties; + this.clientId = clientId; + this.clientSecret = clientSecret; + this.options = options; + this.headersProvider = headersProvider; + this.tlsConfig = tlsConfig; + } + + /** + * Builds and initializes the JSON stream. + * + * @return CompletableFuture that completes with the stream + */ + @Nonnull + public CompletableFuture build() { + CompletableFuture result = new CompletableFuture<>(); + + try { + logger.debug("Creating JSON stream for table: {}", tableProperties.getTableName()); + + HeadersProvider effectiveHeaders = + headersProvider.orElseGet( + () -> + new OAuthHeadersProvider( + tableProperties.getTableName(), + workspaceId, + unityCatalogEndpoint, + clientId, + clientSecret)); + TlsConfig effectiveTls = tlsConfig.orElseGet(SecureTlsConfig::new); + + String tableName = tableProperties.getTableName(); + Supplier stubSupplier = + () -> + stubFactory.createStub( + serverEndpoint, + effectiveHeaders, + effectiveTls, + options.maxMessageSizeBytes(), + tableName); + + JsonZerobusStream stream = + new JsonZerobusStream( + stubSupplier, + tableProperties, + clientId, + clientSecret, + effectiveHeaders, + effectiveTls, + options, + executor); + + stream + .initialize() + .whenComplete( + (r, e) -> { + if (e == null) { + result.complete(stream); + } else { + result.completeExceptionally(e); + } + }); + } catch (Throwable e) { + logger.error("Failed to create JSON stream", e); + result.completeExceptionally( + e instanceof ZerobusException ? e : new ZerobusException(e.getMessage(), e)); + } + return result; + } + } + + /** + * Builder for legacy proto streams (package-private, for internal SDK use). + * + *

    This builder is used by {@link ZerobusSdk#createStream} to maintain backwards compatibility + * with the deprecated {@link ZerobusStream} type. + * + * @param The protobuf message type + * @deprecated Since 0.2.0. Use {@link ProtoZerobusStreamBuilder} instead. This class will be + * removed in a future release. + */ + @Deprecated + static class LegacyProtoZerobusStreamBuilder { + private static final Logger logger = + LoggerFactory.getLogger(LegacyProtoZerobusStreamBuilder.class); + + private final ZerobusSdkStubFactory stubFactory; + private final ExecutorService executor; + private final String workspaceId; + private final String serverEndpoint; + private final String unityCatalogEndpoint; + private final TableProperties tableProperties; + private final String clientId; + private final String clientSecret; + private final StreamConfigurationOptions options; + private final Optional headersProvider; + private final Optional tlsConfig; + + LegacyProtoZerobusStreamBuilder( + ZerobusSdkStubFactory stubFactory, + ExecutorService executor, + String workspaceId, + String serverEndpoint, + String unityCatalogEndpoint, + TableProperties tableProperties, + String clientId, + String clientSecret, + StreamConfigurationOptions options, + Optional headersProvider, + Optional tlsConfig) { + this.stubFactory = stubFactory; + this.executor = executor; + this.workspaceId = workspaceId; + this.serverEndpoint = serverEndpoint; + this.unityCatalogEndpoint = unityCatalogEndpoint; + this.tableProperties = tableProperties; + this.clientId = clientId; + this.clientSecret = clientSecret; + this.options = options; + this.headersProvider = headersProvider; + this.tlsConfig = tlsConfig; + } + + /** + * Builds and initializes the legacy proto stream. + * + * @return CompletableFuture that completes with the stream + * @deprecated Since 0.2.0. Use {@link ProtoZerobusStreamBuilder#build()} instead. This method + * will be removed in a future release. + */ + @Deprecated + @Nonnull + CompletableFuture> build() { + CompletableFuture> result = new CompletableFuture<>(); + + try { + logger.debug("Creating legacy stream for table: {}", tableProperties.getTableName()); + + HeadersProvider effectiveHeaders = + headersProvider.orElseGet( + () -> + new OAuthHeadersProvider( + tableProperties.getTableName(), + workspaceId, + unityCatalogEndpoint, + clientId, + clientSecret)); + TlsConfig effectiveTls = tlsConfig.orElseGet(SecureTlsConfig::new); + + String tableName = tableProperties.getTableName(); + Supplier stubSupplier = + () -> + stubFactory.createStub( + serverEndpoint, + effectiveHeaders, + effectiveTls, + options.maxMessageSizeBytes(), + tableName); + + ZerobusStream stream = + new ZerobusStream<>( + stubSupplier, + tableProperties, + clientId, + clientSecret, + effectiveHeaders, + effectiveTls, + options, + executor); + + stream + .initialize() + .whenComplete( + (r, e) -> { + if (e == null) { + result.complete(stream); + } else { + result.completeExceptionally(e); + } + }); + } catch (Throwable e) { + logger.error("Failed to create legacy stream", e); + result.completeExceptionally( + e instanceof ZerobusException ? e : new ZerobusException(e.getMessage(), e)); + } + return result; + } + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/auth/HeadersProvider.java b/sdk/src/main/java/com/databricks/zerobus/auth/HeadersProvider.java new file mode 100644 index 0000000..9f374bc --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/auth/HeadersProvider.java @@ -0,0 +1,78 @@ +package com.databricks.zerobus.auth; + +import com.databricks.zerobus.NonRetriableException; +import java.util.Map; + +/** + * Interface for providing custom headers for gRPC stream authentication and configuration. + * + *

    This interface allows users to implement custom authentication strategies or add additional + * headers to gRPC requests. The headers returned by {@link #getHeaders()} are attached to all + * outgoing gRPC requests for the stream. + * + *

    The {@code getHeaders()} method is called: + * + *

      + *
    • Once when creating a stream + *
    • When recovering from failure (if recovery is enabled) + *
    • When recreating a stream + *
    + * + *

    Example usage with custom headers: + * + *

    {@code
    + * public class CustomHeadersProvider implements HeadersProvider {
    + *     private final String token;
    + *     private final String tableName;
    + *
    + *     public CustomHeadersProvider(String token, String tableName) {
    + *         this.token = token;
    + *         this.tableName = tableName;
    + *     }
    + *
    + *     @Override
    + *     public Map getHeaders() throws NonRetriableException {
    + *         Map headers = new HashMap<>();
    + *         headers.put("authorization", "Bearer " + token);
    + *         headers.put("x-databricks-zerobus-table-name", tableName);
    + *         headers.put("x-custom-header", "custom-value");
    + *         return headers;
    + *     }
    + * }
    + *
    + * HeadersProvider provider = new CustomHeadersProvider(token, tableName);
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options,
    + *     provider
    + * ).join();
    + * }
    + * + * @see OAuthHeadersProvider + * @see com.databricks.zerobus.ZerobusSdk#createStream + */ +public interface HeadersProvider { + + /** + * Returns headers to be attached to gRPC requests. + * + *

    This method is called synchronously when creating or recreating a stream. It should return a + * map of header names to header values. Common headers include: + * + *

      + *
    • {@code authorization} - Authentication token (e.g., "Bearer ") + *
    • {@code x-databricks-zerobus-table-name} - Target table name + *
    + * + *

    Important: This method should be thread-safe if the same provider instance is used + * across multiple streams. + * + * @return A map of header names to header values + * @throws NonRetriableException if headers cannot be obtained due to a non-retriable error (e.g., + * invalid credentials, configuration error). This will cause stream creation to fail without + * retry. + */ + Map getHeaders() throws NonRetriableException; +} diff --git a/sdk/src/main/java/com/databricks/zerobus/auth/OAuthHeadersProvider.java b/sdk/src/main/java/com/databricks/zerobus/auth/OAuthHeadersProvider.java new file mode 100644 index 0000000..11c6dfb --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/auth/OAuthHeadersProvider.java @@ -0,0 +1,115 @@ +package com.databricks.zerobus.auth; + +import com.databricks.zerobus.NonRetriableException; +import java.util.HashMap; +import java.util.Map; + +/** + * Default OAuth 2.0 headers provider for Databricks Zerobus authentication. + * + *

    This provider implements the OAuth 2.0 Client Credentials flow with Unity Catalog privileges. + * It obtains access tokens from the Databricks OIDC endpoint and includes them in the authorization + * header along with the target table name. + * + *

    The provider validates that the table name follows the three-part format + * (catalog.schema.table) and requests appropriate Unity Catalog privileges: + * + *

      + *
    • USE CATALOG on the table's catalog + *
    • USE SCHEMA on the table's schema + *
    • SELECT and MODIFY on the target table + *
    + * + *

    Example usage: + * + *

    {@code
    + * HeadersProvider provider = new OAuthHeadersProvider(
    + *     "catalog.schema.table",
    + *     "workspace-id",
    + *     "https://workspace.databricks.com",
    + *     "client-id",
    + *     "client-secret"
    + * );
    + *
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options,
    + *     provider
    + * ).join();
    + * }
    + * + *

    Note: Tokens are fetched lazily when {@link #getHeaders()} is called, not during + * provider construction. This allows the provider to be created early in the application lifecycle + * without triggering authentication immediately. + * + * @see HeadersProvider + * @see TokenFactory + */ +public class OAuthHeadersProvider implements HeadersProvider { + + private static final String SDK_VERSION = "0.2.0"; + private static final String USER_AGENT = "zerobus-sdk-java/" + SDK_VERSION; + + private static final String AUTHORIZATION_HEADER = "authorization"; + private static final String TABLE_NAME_HEADER = "x-databricks-zerobus-table-name"; + private static final String USER_AGENT_HEADER = "user-agent"; + private static final String BEARER_PREFIX = "Bearer "; + + private final String tableName; + private final String workspaceId; + private final String workspaceUrl; + private final String clientId; + private final String clientSecret; + + /** + * Creates a new OAuth headers provider. + * + * @param tableName The fully qualified table name (catalog.schema.table) + * @param workspaceId The Databricks workspace ID + * @param workspaceUrl The Unity Catalog endpoint URL + * @param clientId The OAuth client ID + * @param clientSecret The OAuth client secret + */ + public OAuthHeadersProvider( + String tableName, + String workspaceId, + String workspaceUrl, + String clientId, + String clientSecret) { + this.tableName = tableName; + this.workspaceId = workspaceId; + this.workspaceUrl = workspaceUrl; + this.clientId = clientId; + this.clientSecret = clientSecret; + } + + /** + * Returns headers for OAuth 2.0 authentication. + * + *

    This method fetches a fresh OAuth token from the Databricks OIDC endpoint and returns + * headers containing: + * + *

      + *
    • {@code authorization}: Bearer token for authentication + *
    • {@code x-databricks-zerobus-table-name}: Target table name + *
    • {@code user-agent}: SDK version identifier (zerobus-sdk-java/version) + *
    + * + * @return A map containing the authorization, table name, and user-agent headers + * @throws NonRetriableException if the table name is invalid or token request fails + */ + @Override + public Map getHeaders() throws NonRetriableException { + String token = + TokenFactory.getZerobusToken(tableName, workspaceId, workspaceUrl, clientId, clientSecret); + + Map headers = new HashMap<>(); + headers.put(AUTHORIZATION_HEADER, BEARER_PREFIX + token); + headers.put(TABLE_NAME_HEADER, tableName); + headers.put(USER_AGENT_HEADER, USER_AGENT); + + return headers; + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/auth/TokenFactory.java b/sdk/src/main/java/com/databricks/zerobus/auth/TokenFactory.java new file mode 100644 index 0000000..14dd29b --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/auth/TokenFactory.java @@ -0,0 +1,225 @@ +package com.databricks.zerobus.auth; + +import com.databricks.zerobus.NonRetriableException; +import com.databricks.zerobus.common.http.DefaultHttpClient; +import com.databricks.zerobus.common.http.HttpClient; +import com.databricks.zerobus.common.http.RetryingHttpClient; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Factory for obtaining OAuth 2.0 access tokens with Unity Catalog privileges. + * + *

    This class uses the OAuth 2.0 client credentials flow with authorization details to request + * tokens scoped to specific Unity Catalog resources. The generated tokens include privileges for + * catalog, schema, and table access required for ingestion. + */ +public class TokenFactory { + + private static final Pattern ACCESS_TOKEN_PATTERN = + Pattern.compile("\"access_token\"\\s*:\\s*\"([^\"]+)\""); + + /** + * Obtains an OAuth token with Unity Catalog privileges for the specified table. + * + *

    The token request includes authorization details that grant: + * + *

      + *
    • USE CATALOG on the table's catalog + *
    • USE SCHEMA on the table's schema + *
    • SELECT and MODIFY on the target table + *
    + * + * @param tableName The fully qualified table name (catalog.schema.table) + * @param workspaceId The Databricks workspace ID + * @param workspaceUrl The Unity Catalog endpoint URL + * @param clientId The OAuth client ID + * @param clientSecret The OAuth client secret + * @return The OAuth access token (JWT) + * @throws NonRetriableException if the token request fails or table name is invalid + */ + private static final HttpClient DEFAULT_HTTP_CLIENT = + new RetryingHttpClient(DefaultHttpClient.INSTANCE); + + public static String getZerobusToken( + String tableName, + String workspaceId, + String workspaceUrl, + String clientId, + String clientSecret) + throws NonRetriableException { + return getZerobusToken( + tableName, workspaceId, workspaceUrl, clientId, clientSecret, DEFAULT_HTTP_CLIENT); + } + + /** + * Obtains an OAuth token with Unity Catalog privileges for the specified table. + * + *

    This overload accepts an {@link HttpClient} for testing purposes. + * + * @param tableName The fully qualified table name (catalog.schema.table) + * @param workspaceId The Databricks workspace ID + * @param workspaceUrl The Unity Catalog endpoint URL + * @param clientId The OAuth client ID + * @param clientSecret The OAuth client secret + * @param httpClient The HTTP client to use for requests + * @return The OAuth access token (JWT) + * @throws NonRetriableException if the token request fails or table name is invalid + */ + public static String getZerobusToken( + String tableName, + String workspaceId, + String workspaceUrl, + String clientId, + String clientSecret, + HttpClient httpClient) + throws NonRetriableException { + + validateNotBlank(tableName, "tableName"); + validateNotBlank(workspaceId, "workspaceId"); + validateNotBlank(workspaceUrl, "workspaceUrl"); + validateNotBlank(clientId, "clientId"); + validateNotBlank(clientSecret, "clientSecret"); + + String[] threePartTableName = parseTableName(tableName); + String catalogName = threePartTableName[0]; + String schemaName = threePartTableName[1]; + String tableNameOnly = threePartTableName[2]; + + String authorizationDetails = buildAuthorizationDetails(catalogName, schemaName, tableNameOnly); + + String url = workspaceUrl + "/oidc/v1/token"; + + try { + String formData = + "grant_type=client_credentials" + + "&scope=all-apis" + + "&resource=api://databricks/workspaces/" + + workspaceId + + "/zerobusDirectWriteApi" + + "&authorization_details=" + + URLEncoder.encode(authorizationDetails, "UTF-8"); + + String credentials = + Base64.getEncoder() + .encodeToString((clientId + ":" + clientSecret).getBytes(StandardCharsets.UTF_8)); + + Map headers = new HashMap<>(); + headers.put("Authorization", "Basic " + credentials); + + HttpClient.HttpResponse response = httpClient.post(url, formData, headers); + + if (!response.isSuccess()) { + String errorBody = + response.getErrorBody() != null + ? response.getErrorBody() + : "No error details available"; + throw new NonRetriableException( + "OAuth request failed with status " + response.getStatusCode() + ": " + errorBody); + } + + return extractAccessToken(response.getBody()); + } catch (NonRetriableException e) { + throw e; + } catch (Exception e) { + throw new NonRetriableException("Unexpected error getting OAuth token: " + e.getMessage(), e); + } + } + + /** + * Validates that a parameter is not null or blank. + * + * @param value The value to validate + * @param paramName The parameter name for error messages + * @throws NonRetriableException if the value is null or blank + */ + static void validateNotBlank(String value, String paramName) throws NonRetriableException { + if (value == null) { + throw new NonRetriableException(paramName + " cannot be null"); + } + if (value.trim().isEmpty()) { + throw new NonRetriableException(paramName + " cannot be blank"); + } + } + + /** + * Parses and validates a three-part table name. + * + * @param tableName The table name in format catalog.schema.table + * @return Array of [catalog, schema, table] + * @throws NonRetriableException if the table name format is invalid + */ + static String[] parseTableName(String tableName) throws NonRetriableException { + String[] parts = tableName.split("\\.", -1); + if (parts.length != 3) { + throw new NonRetriableException( + "Table name '" + tableName + "' must be in the format of catalog.schema.table"); + } + + for (int i = 0; i < 3; i++) { + if (parts[i].trim().isEmpty()) { + throw new NonRetriableException( + "Table name '" + tableName + "' contains empty parts; must be catalog.schema.table"); + } + } + + return parts; + } + + /** + * Builds the authorization details JSON for Unity Catalog privileges. + * + * @param catalogName The catalog name + * @param schemaName The schema name + * @param tableName The table name (without catalog/schema prefix) + * @return The authorization details JSON string + */ + static String buildAuthorizationDetails(String catalogName, String schemaName, String tableName) { + return String.format( + "[\n" + + " {\n" + + " \"type\": \"unity_catalog_privileges\",\n" + + " \"privileges\": [\"USE CATALOG\"],\n" + + " \"object_type\": \"CATALOG\",\n" + + " \"object_full_path\": \"%s\"\n" + + " },\n" + + " {\n" + + " \"type\": \"unity_catalog_privileges\",\n" + + " \"privileges\": [\"USE SCHEMA\"],\n" + + " \"object_type\": \"SCHEMA\",\n" + + " \"object_full_path\": \"%s.%s\"\n" + + " },\n" + + " {\n" + + " \"type\": \"unity_catalog_privileges\",\n" + + " \"privileges\": [\"SELECT\", \"MODIFY\"],\n" + + " \"object_type\": \"TABLE\",\n" + + " \"object_full_path\": \"%s.%s.%s\"\n" + + " }\n" + + "]", + catalogName, catalogName, schemaName, catalogName, schemaName, tableName); + } + + /** + * Extracts the access token from an OAuth response body. + * + * @param responseBody The OAuth response body + * @return The access token + * @throws NonRetriableException if no access token is found + */ + static String extractAccessToken(String responseBody) throws NonRetriableException { + if (responseBody == null) { + throw new NonRetriableException("No response body received from OAuth request"); + } + Matcher matcher = ACCESS_TOKEN_PATTERN.matcher(responseBody); + if (matcher.find()) { + return matcher.group(1); + } else { + throw new NonRetriableException("No access token received from OAuth response"); + } + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/Batch.java b/sdk/src/main/java/com/databricks/zerobus/batch/Batch.java new file mode 100644 index 0000000..54c42a7 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/Batch.java @@ -0,0 +1,13 @@ +package com.databricks.zerobus.batch; + +/** + * Base interface for all batch types used in stream ingestion. + * + *

    Batches wrap collections of records for atomic ingestion. All records in a batch are assigned + * a single offset ID and acknowledged together. + * + * @param The record type contained in the batch + * @see PrimaryBatch + * @see SecondaryBatch + */ +public interface Batch extends Iterable {} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/PrimaryBatch.java b/sdk/src/main/java/com/databricks/zerobus/batch/PrimaryBatch.java new file mode 100644 index 0000000..ba29206 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/PrimaryBatch.java @@ -0,0 +1,18 @@ +package com.databricks.zerobus.batch; + +/** + * Marker interface for primary batch types. + * + *

    Primary batches contain the "structured" or "native" record type for a stream: + * + *

      + *
    • {@link com.databricks.zerobus.batch.proto.MessageBatch} - protobuf messages for {@link + * com.databricks.zerobus.stream.ProtoZerobusStream} + *
    • {@link com.databricks.zerobus.batch.json.MapBatch} - Map records for {@link + * com.databricks.zerobus.stream.JsonZerobusStream} + *
    + * + * @param

    The primary record type + * @see SecondaryBatch + */ +public interface PrimaryBatch

    extends Batch

    {} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/SecondaryBatch.java b/sdk/src/main/java/com/databricks/zerobus/batch/SecondaryBatch.java new file mode 100644 index 0000000..d588578 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/SecondaryBatch.java @@ -0,0 +1,18 @@ +package com.databricks.zerobus.batch; + +/** + * Marker interface for secondary batch types. + * + *

    Secondary batches contain the "raw" or "serialized" record type for a stream: + * + *

      + *
    • {@link com.databricks.zerobus.batch.proto.BytesBatch} - pre-serialized bytes for {@link + * com.databricks.zerobus.stream.ProtoZerobusStream} + *
    • {@link com.databricks.zerobus.batch.json.StringBatch} - JSON strings for {@link + * com.databricks.zerobus.stream.JsonZerobusStream} + *
    + * + * @param The secondary record type + * @see PrimaryBatch + */ +public interface SecondaryBatch extends Batch {} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/json/MapBatch.java b/sdk/src/main/java/com/databricks/zerobus/batch/json/MapBatch.java new file mode 100644 index 0000000..60cff3f --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/json/MapBatch.java @@ -0,0 +1,68 @@ +package com.databricks.zerobus.batch.json; + +import com.databricks.zerobus.batch.PrimaryBatch; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; +import javax.annotation.Nonnull; + +/** + * Wrapper for a batch of Map records to be serialized as JSON. + * + *

    This is the primary batch type for {@link com.databricks.zerobus.stream.JsonZerobusStream}, + * containing structured Map records that will be serialized to JSON. + * + *

    Example usage: + * + *

    {@code
    + * List> records = new ArrayList<>();
    + * records.add(Map.of("name", "Alice", "age", 30));
    + * records.add(Map.of("name", "Bob", "age", 25));
    + * Long offset = stream.ingestBatch(MapBatch.of(records));
    + * }
    + * + * @see com.databricks.zerobus.stream.JsonZerobusStream#ingestBatch(PrimaryBatch) + */ +public final class MapBatch implements PrimaryBatch> { + + private final Iterable> maps; + + private MapBatch(Iterable> maps) { + this.maps = maps; + } + + /** + * Creates a MapBatch from an iterable of Maps. + * + * @param maps The Maps to wrap + * @return A new MapBatch instance + */ + @Nonnull + public static MapBatch of(@Nonnull Iterable> maps) { + if (maps == null) { + throw new IllegalArgumentException("maps cannot be null"); + } + return new MapBatch(maps); + } + + /** + * Creates a MapBatch from varargs Maps. + * + * @param maps The Maps to wrap + * @return A new MapBatch instance + */ + @SafeVarargs + @Nonnull + public static MapBatch of(@Nonnull Map... maps) { + if (maps == null) { + throw new IllegalArgumentException("maps cannot be null"); + } + return new MapBatch(Arrays.asList(maps)); + } + + @Override + @Nonnull + public Iterator> iterator() { + return maps.iterator(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/json/StringBatch.java b/sdk/src/main/java/com/databricks/zerobus/batch/json/StringBatch.java new file mode 100644 index 0000000..a668c0b --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/json/StringBatch.java @@ -0,0 +1,66 @@ +package com.databricks.zerobus.batch.json; + +import com.databricks.zerobus.batch.SecondaryBatch; +import java.util.Arrays; +import java.util.Iterator; +import javax.annotation.Nonnull; + +/** + * Wrapper for a batch of JSON string records. + * + *

    This is the secondary batch type for {@link com.databricks.zerobus.stream.JsonZerobusStream}, + * containing raw JSON strings. + * + *

    Example usage: + * + *

    {@code
    + * List records = new ArrayList<>();
    + * records.add("{\"name\": \"Alice\", \"age\": 30}");
    + * records.add("{\"name\": \"Bob\", \"age\": 25}");
    + * Long offset = stream.ingestBatch(StringBatch.of(records));
    + * }
    + * + * @see com.databricks.zerobus.stream.JsonZerobusStream#ingestBatch(StringBatch) + */ +public final class StringBatch implements SecondaryBatch { + + private final Iterable records; + + private StringBatch(Iterable records) { + this.records = records; + } + + /** + * Creates a StringBatch from an iterable of strings. + * + * @param records The strings to wrap + * @return A new StringBatch instance + */ + @Nonnull + public static StringBatch of(@Nonnull Iterable records) { + if (records == null) { + throw new IllegalArgumentException("records cannot be null"); + } + return new StringBatch(records); + } + + /** + * Creates a StringBatch from varargs strings. + * + * @param records The strings to wrap + * @return A new StringBatch instance + */ + @Nonnull + public static StringBatch of(@Nonnull String... records) { + if (records == null) { + throw new IllegalArgumentException("records cannot be null"); + } + return new StringBatch(Arrays.asList(records)); + } + + @Override + @Nonnull + public Iterator iterator() { + return records.iterator(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/proto/BytesBatch.java b/sdk/src/main/java/com/databricks/zerobus/batch/proto/BytesBatch.java new file mode 100644 index 0000000..2418246 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/proto/BytesBatch.java @@ -0,0 +1,64 @@ +package com.databricks.zerobus.batch.proto; + +import com.databricks.zerobus.batch.SecondaryBatch; +import java.util.Arrays; +import java.util.Iterator; +import javax.annotation.Nonnull; + +/** + * Wrapper for a batch of pre-serialized protobuf byte arrays. + * + *

    This is the secondary batch type for {@link com.databricks.zerobus.stream.ProtoZerobusStream}, + * containing raw serialized bytes. + * + *

    Example usage: + * + *

    {@code
    + * List serializedRecords = getSerializedRecordsFromKafka();
    + * Long offset = stream.ingestBatch(BytesBatch.of(serializedRecords));
    + * }
    + * + * @see com.databricks.zerobus.stream.ProtoZerobusStream#ingestBatch(BytesBatch) + */ +public final class BytesBatch implements SecondaryBatch { + + private final Iterable bytes; + + private BytesBatch(Iterable bytes) { + this.bytes = bytes; + } + + /** + * Creates a BytesBatch from an iterable of byte arrays. + * + * @param bytes The byte arrays to wrap + * @return A new BytesBatch instance + */ + @Nonnull + public static BytesBatch of(@Nonnull Iterable bytes) { + if (bytes == null) { + throw new IllegalArgumentException("bytes cannot be null"); + } + return new BytesBatch(bytes); + } + + /** + * Creates a BytesBatch from varargs byte arrays. + * + * @param bytes The byte arrays to wrap + * @return A new BytesBatch instance + */ + @Nonnull + public static BytesBatch of(@Nonnull byte[]... bytes) { + if (bytes == null) { + throw new IllegalArgumentException("bytes cannot be null"); + } + return new BytesBatch(Arrays.asList(bytes)); + } + + @Override + @Nonnull + public Iterator iterator() { + return bytes.iterator(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/batch/proto/MessageBatch.java b/sdk/src/main/java/com/databricks/zerobus/batch/proto/MessageBatch.java new file mode 100644 index 0000000..0d7233b --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/batch/proto/MessageBatch.java @@ -0,0 +1,71 @@ +package com.databricks.zerobus.batch.proto; + +import com.databricks.zerobus.batch.PrimaryBatch; +import com.google.protobuf.Message; +import java.util.Arrays; +import java.util.Iterator; +import javax.annotation.Nonnull; + +/** + * Wrapper for a batch of protobuf message records. + * + *

    This is the primary batch type for {@link com.databricks.zerobus.stream.ProtoZerobusStream}, + * containing structured protobuf messages. + * + *

    Example usage: + * + *

    {@code
    + * List records = new ArrayList<>();
    + * records.add(MyRecord.newBuilder().setField("value1").build());
    + * records.add(MyRecord.newBuilder().setField("value2").build());
    + * Long offset = stream.ingestBatch(MessageBatch.of(records));
    + * }
    + * + * @param The protobuf message type + * @see com.databricks.zerobus.stream.ProtoZerobusStream#ingestBatch(PrimaryBatch) + */ +public final class MessageBatch implements PrimaryBatch { + + private final Iterable records; + + private MessageBatch(Iterable records) { + this.records = records; + } + + /** + * Creates a MessageBatch from an iterable of protobuf messages. + * + * @param records The protobuf messages to wrap + * @param The protobuf message type + * @return A new MessageBatch instance + */ + @Nonnull + public static MessageBatch of(@Nonnull Iterable records) { + if (records == null) { + throw new IllegalArgumentException("records cannot be null"); + } + return new MessageBatch<>(records); + } + + /** + * Creates a MessageBatch from varargs protobuf messages. + * + * @param records The protobuf messages to wrap + * @param The protobuf message type + * @return A new MessageBatch instance + */ + @SafeVarargs + @Nonnull + public static MessageBatch of(@Nonnull T... records) { + if (records == null) { + throw new IllegalArgumentException("records cannot be null"); + } + return new MessageBatch<>(Arrays.asList(records)); + } + + @Override + @Nonnull + public Iterator iterator() { + return records.iterator(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/schema/BaseTableProperties.java b/sdk/src/main/java/com/databricks/zerobus/schema/BaseTableProperties.java new file mode 100644 index 0000000..458e86d --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/schema/BaseTableProperties.java @@ -0,0 +1,35 @@ +package com.databricks.zerobus.schema; + +/** + * Base class for table properties. + * + *

    This abstract class defines the common properties for all table property implementations, + * including proto-based and JSON table properties. + * + * @see ProtoTableProperties + * @see JsonTableProperties + */ +public abstract class BaseTableProperties { + + private final String tableName; + + /** + * Creates a new table properties instance. + * + * @param tableName the fully qualified table name (catalog.schema.table) + */ + protected BaseTableProperties(String tableName) { + this.tableName = tableName; + } + + /** + * Returns the fully qualified table name. + * + *

    The table name should be in the format: catalog.schema.table + * + * @return the fully qualified table name + */ + public String getTableName() { + return tableName; + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/schema/JsonTableProperties.java b/sdk/src/main/java/com/databricks/zerobus/schema/JsonTableProperties.java new file mode 100644 index 0000000..777ae5a --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/schema/JsonTableProperties.java @@ -0,0 +1,37 @@ +package com.databricks.zerobus.schema; + +/** + * Table properties for JSON record ingestion. + * + *

    Use this class when ingesting JSON records without a protobuf schema. JSON records are + * provided as strings or Maps. + * + *

    Example usage: + * + *

    {@code
    + * // Create a JSON stream using the builder API.
    + * JsonZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .json()
    + *     .build()
    + *     .join();
    + *
    + * // Ingest JSON string.
    + * stream.ingest("{\"city\": \"Tokyo\", \"population\": 14000000}");
    + * stream.close();
    + * }
    + * + * @see ProtoTableProperties + * @see com.databricks.zerobus.stream.JsonZerobusStream + */ +public class JsonTableProperties extends BaseTableProperties { + + /** + * Creates table properties for JSON ingestion. + * + * @param tableName the fully qualified table name (catalog.schema.table) + */ + public JsonTableProperties(String tableName) { + super(tableName); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/schema/ProtoTableProperties.java b/sdk/src/main/java/com/databricks/zerobus/schema/ProtoTableProperties.java new file mode 100644 index 0000000..a1b3637 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/schema/ProtoTableProperties.java @@ -0,0 +1,126 @@ +package com.databricks.zerobus.schema; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.Descriptors.Descriptor; +import com.google.protobuf.Message; + +/** + * Table properties for protocol buffer record ingestion. + * + *

    This class stores the table name and the protobuf descriptor proto which defines the schema + * for records being ingested. Use the factory methods to create instances: + * + *

      + *
    • {@link #fromCompiled(String, Message)} - for compiled .proto schemas + *
    • {@link #fromDynamic(String, Descriptor)} - for runtime-created schemas + *
    + * + *

    Example usage with compiled schema: + * + *

    {@code
    + * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .compiledProto(MyRecord.getDefaultInstance())
    + *     .build()
    + *     .join();
    + * }
    + * + *

    Example usage with dynamic schema: + * + *

    {@code
    + * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .dynamicProto(descriptor)
    + *     .build()
    + *     .join();
    + * }
    + * + * @see JsonTableProperties + * @see com.databricks.zerobus.stream.ProtoZerobusStream + */ +public class ProtoTableProperties extends BaseTableProperties { + + private final DescriptorProto descriptorProto; + private final Message defaultInstance; + + /** + * Creates table properties with a pre-computed descriptor proto. + * + *

    Prefer using the factory methods {@link #fromCompiled(String, Message)} or {@link + * #fromDynamic(String, Descriptor)} instead of this constructor. + * + * @param tableName the fully qualified table name (catalog.schema.table) + * @param descriptorProto the protobuf descriptor proto for the schema + */ + public ProtoTableProperties(String tableName, DescriptorProto descriptorProto) { + this(tableName, descriptorProto, null); + } + + /** + * Creates table properties with a descriptor proto and optional default instance. + * + * @param tableName the fully qualified table name (catalog.schema.table) + * @param descriptorProto the protobuf descriptor proto for the schema + * @param defaultInstance the default instance (for compiled schemas, null for dynamic) + */ + private ProtoTableProperties( + String tableName, DescriptorProto descriptorProto, Message defaultInstance) { + super(tableName); + this.descriptorProto = descriptorProto; + this.defaultInstance = defaultInstance; + } + + /** + * Creates table properties from a compiled protobuf message type. + * + *

    Use this factory method when you have a compiled .proto file and generated Java classes. The + * schema is derived from the default instance of your protobuf message type. + * + * @param tableName the fully qualified table name (catalog.schema.table) + * @param defaultInstance the default instance of the protobuf message type + * @param the protobuf message type + * @return table properties for the compiled schema + */ + public static ProtoTableProperties fromCompiled( + String tableName, T defaultInstance) { + return new ProtoTableProperties( + tableName, defaultInstance.getDescriptorForType().toProto(), defaultInstance); + } + + /** + * Creates table properties from a dynamic protobuf descriptor. + * + *

    Use this factory method when you need to define a protobuf schema at runtime without a + * compiled .proto file. + * + * @param tableName the fully qualified table name (catalog.schema.table) + * @param descriptor the protobuf descriptor created or loaded at runtime + * @return table properties for the dynamic schema + */ + public static ProtoTableProperties fromDynamic(String tableName, Descriptor descriptor) { + return new ProtoTableProperties(tableName, descriptor.toProto()); + } + + /** + * Returns the protobuf descriptor proto for the record schema. + * + *

    This descriptor is used for schema validation when creating streams. + * + * @return the protobuf descriptor proto + */ + public DescriptorProto getDescriptorProto() { + return descriptorProto; + } + + /** + * Returns the default instance used to create this table properties. + * + *

    This is only available for compiled protobuf schemas (created via {@link + * #fromCompiled(String, Message)}). Returns null for dynamic schemas. + * + * @return the default instance, or null if not available (dynamic schema) + */ + public Message getDefaultInstance() { + return defaultInstance; + } +} diff --git a/src/main/java/com/databricks/zerobus/BackgroundTask.java b/sdk/src/main/java/com/databricks/zerobus/stream/BackgroundTask.java similarity index 98% rename from src/main/java/com/databricks/zerobus/BackgroundTask.java rename to sdk/src/main/java/com/databricks/zerobus/stream/BackgroundTask.java index 2080d99..14c3c7f 100644 --- a/src/main/java/com/databricks/zerobus/BackgroundTask.java +++ b/sdk/src/main/java/com/databricks/zerobus/stream/BackgroundTask.java @@ -1,4 +1,4 @@ -package com.databricks.zerobus; +package com.databricks.zerobus.stream; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/BaseZerobusStream.java b/sdk/src/main/java/com/databricks/zerobus/stream/BaseZerobusStream.java new file mode 100644 index 0000000..ea3e615 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/BaseZerobusStream.java @@ -0,0 +1,1129 @@ +package com.databricks.zerobus.stream; + +import com.databricks.zerobus.CloseStreamSignal; +import com.databricks.zerobus.CreateIngestStreamRequest; +import com.databricks.zerobus.EphemeralStreamRequest; +import com.databricks.zerobus.EphemeralStreamResponse; +import com.databricks.zerobus.NonRetriableException; +import com.databricks.zerobus.RecordType; +import com.databricks.zerobus.StreamConfigurationOptions; +import com.databricks.zerobus.StreamState; +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusGrpc.ZerobusStub; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.batch.PrimaryBatch; +import com.databricks.zerobus.schema.BaseTableProperties; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.ByteString; +import com.google.protobuf.DescriptorProtos; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import io.grpc.stub.ClientCallStreamObserver; +import io.grpc.stub.ClientResponseObserver; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract base class for Zerobus streams with a primary record type. + * + *

    This class contains the common infrastructure for all streams, including gRPC connection + * management, recovery logic, and acknowledgment handling. + * + *

    Implements {@link AutoCloseable} to support try-with-resources: + * + *

    {@code
    + * try (ProtoZerobusStream stream = sdk.streamBuilder("table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .compiledProto(MyRecord.getDefaultInstance())
    + *     .build()
    + *     .join()) {
    + *
    + *     stream.ingest(record);
    + * }
    + * // Stream is automatically closed
    + * }
    + * + *

    For streams with two record types (primary and secondary), see {@link DualTypeStream}. + * + * @param

    The primary record type + * @see DualTypeStream + * @see ProtoZerobusStream + * @see JsonZerobusStream + */ +public abstract class BaseZerobusStream

    implements AutoCloseable { + private static final Logger logger = LoggerFactory.getLogger(BaseZerobusStream.class); + private static final int CREATE_STREAM_TIMEOUT_MS = 15000; + + // Configuration (immutable after construction) + protected final BaseTableProperties tableProperties; + protected final String clientId; + protected final String clientSecret; + protected final StreamConfigurationOptions options; + protected final HeadersProvider headersProvider; + protected final TlsConfig tlsConfig; + protected final Supplier stubSupplier; + protected final ExecutorService executor; + protected final DescriptorProtos.DescriptorProto descriptorProto; + + // Stream state (mutable) + private ZerobusStub stub; + private StreamState state = StreamState.UNINITIALIZED; + private Optional streamId = Optional.empty(); + private Optional> grpcStream = Optional.empty(); + private Optional> streamCreatedEvent = Optional.empty(); + + // Batch lifecycle management + protected final LandingZone

    landingZone; + private List> failedBatches; + protected long latestAckedOffsetId = -1; + protected long lastSentOffsetId = -1; + + // Background tasks and failure tracking + private final StreamFailureInfo failureInfo = new StreamFailureInfo(); + private BackgroundTask senderTask; + private BackgroundTask unresponsivenessTask; + private ClientResponseObserver ackReceiver; + private CompletableFuture gracefulCloseTask; + private volatile long gracefulCloseWatermark = -1; + + // ==================== Constructor ==================== + + protected BaseZerobusStream( + Supplier stubSupplier, + BaseTableProperties tableProperties, + String clientId, + String clientSecret, + HeadersProvider headersProvider, + TlsConfig tlsConfig, + StreamConfigurationOptions options, + ExecutorService executor, + DescriptorProtos.DescriptorProto descriptorProto) { + this.stubSupplier = stubSupplier; + this.tableProperties = tableProperties; + this.clientId = clientId; + this.clientSecret = clientSecret; + this.options = options; + this.headersProvider = headersProvider; + this.tlsConfig = tlsConfig; + this.executor = executor; + this.descriptorProto = descriptorProto; + this.landingZone = new LandingZone<>(options.maxInflightRequests()); + + initBackgroundTasks(); + initAckReceiver(); + } + + // ==================== Public API ==================== + + /** Returns the stream ID assigned by the server. */ + public synchronized String getStreamId() { + return streamId.orElse(""); + } + + /** Returns the current state of the stream. */ + public synchronized StreamState getState() { + return state; + } + + /** + * Waits for a specific offset to be acknowledged by the server. + * + * @param offset The offset ID to wait for + * @throws ZerobusException if the stream fails or times out + */ + public void waitForOffset(long offset) throws ZerobusException { + synchronized (this) { + long startTime = System.currentTimeMillis(); + while (latestAckedOffsetId < offset) { + if (state == StreamState.FAILED) { + throw new ZerobusException("Stream failed while waiting for offset " + offset); + } + if (state == StreamState.CLOSED && latestAckedOffsetId < offset) { + throw new ZerobusException("Stream closed before offset " + offset + " was acknowledged"); + } + long remaining = options.flushTimeoutMs() - (System.currentTimeMillis() - startTime); + if (remaining <= 0) { + throw new ZerobusException("Timeout waiting for offset " + offset); + } + try { + this.wait(remaining); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while waiting for offset " + offset, e); + } + } + } + } + + /** + * Flushes the stream, waiting for all queued records to be acknowledged. + * + * @throws ZerobusException if the stream is not opened or flush fails + */ + public void flush() throws ZerobusException { + synchronized (this) { + logger.debug("Flushing stream..."); + try { + if (state == StreamState.UNINITIALIZED) { + throw new ZerobusException("Cannot flush stream when it is not opened"); + } + waitForRecovery(); + if (state == StreamState.OPENED) { + setState(StreamState.FLUSHING); + } + waitForInflightRecords(); + logger.info("All records have been flushed"); + } finally { + if (state == StreamState.FLUSHING) { + setState(StreamState.OPENED); + } + } + } + } + + /** + * Closes the stream after flushing all queued records. + * + * @throws ZerobusException if the stream is not opened or close fails + */ + public void close() throws ZerobusException { + boolean readyToClose = false; + synchronized (this) { + while (!readyToClose) { + switch (state) { + case UNINITIALIZED: + throw new ZerobusException("Cannot close stream when it is not opened"); + case FAILED: + throw new ZerobusException("Stream failed and cannot be gracefully closed"); + case CLOSED: + return; // Already closed + case FLUSHING: + case RECOVERING: + case PAUSED: + try { + this.wait(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + break; + case OPENED: + setState(StreamState.CLOSED); + readyToClose = true; + break; + } + } + } + + Optional exception = Optional.empty(); + try { + flush(); + } catch (ZerobusException ex) { + exception = Optional.of(ex); + throw ex; + } catch (Exception ex) { + ZerobusException wrapped = new ZerobusException("Flush failed during close", ex); + exception = Optional.of(wrapped); + throw wrapped; + } finally { + doCloseStream(true, exception); + } + logger.info("Stream gracefully closed"); + } + + // ==================== Abstract Methods (Primary Type) ==================== + + /** + * Ingests a primary record into the stream. + * + * @param record The record to ingest + * @return The logical offset ID assigned to this record + * @throws ZerobusException if ingestion fails + */ + public abstract long ingest(@Nonnull P record) throws ZerobusException; + + /** + * Ingests a batch of primary records into the stream. + * + *

    The batch is assigned a single offset ID and acknowledged atomically. + * + * @param batch The batch of records to ingest + * @return The offset ID for the batch, or null if empty + * @throws ZerobusException if ingestion fails + */ + @Nullable public abstract Long ingestBatch(@Nonnull PrimaryBatch

    batch) throws ZerobusException; + + /** + * Returns unacknowledged records after stream failure. + * + *

    These records can be re-ingested when the stream is recreated. Note that records ingested as + * raw bytes (e.g., {@code ingest(byte[])}) or raw JSON strings (e.g., {@code ingest(String)}) + * will not be included, as they don't store the original primary record. + * + * @return Iterator of unacknowledged primary records + */ + @Nonnull + public Iterator

    getUnackedRecords() { + if (failedBatches == null) { + return java.util.Collections.emptyIterator(); + } + java.util.List

    records = new java.util.ArrayList<>(); + for (InflightBatch

    batch : failedBatches) { + if (batch.records != null) { + records.addAll(batch.records); + } + } + return records.iterator(); + } + + /** + * Recreates this stream with the same configuration. + * + *

    Creates a new stream and re-ingests any unacknowledged records from this stream. + * + * @param sdk The SDK instance to use for recreation + * @return A future that completes with the new stream + */ + @Nonnull + public abstract CompletableFuture> recreate(ZerobusSdk sdk); + + /** + * Returns the record type for this stream. + * + *

    This determines how records are serialized when sent to the server. + * + * @return The record type (PROTO or JSON) + */ + @Nonnull + protected abstract RecordType getRecordType(); + + // ==================== Package-private for SDK ==================== + + public CompletableFuture initialize() { + CompletableFuture result = new CompletableFuture<>(); + synchronized (this) { + if (state != StreamState.UNINITIALIZED) { + result.completeExceptionally(new ZerobusException("Stream already initialized")); + return result; + } + } + + int retries = options.recovery() ? options.recoveryRetries() : 1; + runWithRetries(retries, "CreateStream", this::doCreateStream) + .whenComplete( + (r, e) -> { + if (e == null) { + setState(StreamState.OPENED); + unresponsivenessTask.start(); + logger.info("Stream created with id {}", streamId.orElse("unknown")); + result.complete(null); + } else { + setState(StreamState.FAILED); + logger.error("Failed to create stream", e); + result.completeExceptionally( + e instanceof ZerobusException + ? e + : new ZerobusException("Stream creation failed", e)); + } + }); + return result; + } + + // ==================== Protected Methods for Subclasses ==================== + + protected synchronized StreamState getStateInternal() { + return state; + } + + /** + * Checks that the stream is in a valid state for ingestion. + * + *

    Waits if the stream is in a transient state (RECOVERING, FLUSHING), throws if in a terminal + * state (FAILED, CLOSED, UNINITIALIZED). + * + *

    Note: Backpressure is now handled by LandingZone's semaphore, so this method no longer waits + * for queue space. + */ + protected void checkIngestState() throws ZerobusException, InterruptedException { + synchronized (this) { + while (true) { + switch (state) { + case RECOVERING: + case FLUSHING: + this.wait(); + break; + case PAUSED: + // Allow ingestion during PAUSED - records queue in landing zone + // and will be sent after recovery + return; + case FAILED: + case CLOSED: + case UNINITIALIZED: + throw new ZerobusException("Cannot ingest: stream is " + state); + case OPENED: + return; + } + } + } + } + + /** + * Adds an inflight batch to the landing zone. + * + *

    This method blocks if the maximum inflight capacity is reached (backpressure). + * + * @param batch The batch to add + * @throws InterruptedException if interrupted while waiting for capacity + */ + protected void addBatch(InflightBatch

    batch) throws InterruptedException { + landingZone.add(batch); + synchronized (this) { + this.notifyAll(); + } + } + + protected long getNextOffsetId() { + return ++lastSentOffsetId; + } + + /** + * Reassigns offset IDs to all batches in the landing zone. + * + *

    This is called during recovery after a new stream is created. All batches (both pending and + * those that were inflight but moved back to pending) need new offset IDs for the new stream. + */ + private void reassignBatchOffsets() { + synchronized (this) { + List> batches = landingZone.peekAll(); + for (InflightBatch

    batch : batches) { + batch.offsetId = getNextOffsetId(); + } + logger.debug("Reassigned offsets to {} batches for recovery", batches.size()); + } + } + + // ==================== Internal Implementation ==================== + + private synchronized void setState(StreamState newState) { + state = newState; + this.notifyAll(); + logger.debug("Stream state -> {}", newState); + } + + private void waitForRecovery() throws ZerobusException { + while (state == StreamState.RECOVERING) { + try { + this.wait(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while waiting for recovery", e); + } + } + } + + private void waitForInflightRecords() throws ZerobusException { + long startTime = System.currentTimeMillis(); + while (landingZone.totalCount() > 0) { + if (state == StreamState.FAILED) { + throw new ZerobusException("Stream failed during flush"); + } + long remaining = options.flushTimeoutMs() - (System.currentTimeMillis() - startTime); + if (remaining <= 0) { + throw new ZerobusException("Flush timed out"); + } + try { + this.wait(remaining); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted during flush", e); + } + } + } + + // ==================== Stream Lifecycle ==================== + + private CompletableFuture doCreateStream() { + CompletableFuture result = new CompletableFuture<>(); + int timeoutMs = options.recovery() ? options.recoveryTimeoutMs() : CREATE_STREAM_TIMEOUT_MS; + + latestAckedOffsetId = -1; + lastSentOffsetId = -1; + streamId = Optional.empty(); + grpcStream = Optional.empty(); + streamCreatedEvent = Optional.empty(); + + runWithTimeout( + timeoutMs, + () -> { + CompletableFuture attempt = new CompletableFuture<>(); + stub = stubSupplier.get(); + streamCreatedEvent = Optional.of(new CompletableFuture<>()); + grpcStream = + Optional.of( + (ClientCallStreamObserver) + stub.ephemeralStream(ackReceiver)); + + EphemeralStreamRequest createReq = + EphemeralStreamRequest.newBuilder() + .setCreateStream( + CreateIngestStreamRequest.newBuilder() + .setTableName(tableProperties.getTableName()) + .setDescriptorProto( + ByteString.copyFrom(descriptorProto.toByteArray())) + .setRecordType(getRecordType()) + .build()) + .build(); + + try { + grpcStream.get().onNext(createReq); + } catch (Exception e) { + failStream(e); + attempt.completeExceptionally(e); + return attempt; + } + + streamCreatedEvent + .get() + .whenComplete( + (id, e) -> { + if (e == null) { + streamId = Optional.of(id); + senderTask.start(); + attempt.complete(null); + } else { + failStream(e); + streamId = Optional.empty(); + streamCreatedEvent = Optional.empty(); + grpcStream = Optional.empty(); + attempt.completeExceptionally( + e instanceof ZerobusException + ? e + : new ZerobusException(e.getMessage(), e)); + } + }); + return attempt; + }) + .whenComplete( + (r, e) -> { + if (e == null) { + result.complete(null); + } else { + failStream(e); + Throwable ex = wrapException(e, "Stream creation failed"); + result.completeExceptionally(ex); + } + }); + return result; + } + + private void failStream(Throwable error) { + synchronized (this) { + if (grpcStream.isPresent()) { + try { + grpcStream.get().onError(error); + } catch (Exception ignored) { + } + grpcStream = Optional.empty(); + streamId = Optional.empty(); + } + } + } + + private void doCloseStream(boolean hardFailure, Optional exception) { + synchronized (this) { + logger.debug("Closing stream, hardFailure={}", hardFailure); + if (hardFailure && exception.isPresent()) { + setState(StreamState.FAILED); + } + + senderTask.cancel(); + + try { + if (grpcStream.isPresent()) { + grpcStream.get().onCompleted(); + if (hardFailure) { + grpcStream.get().cancel("Stream closed", null); + } + } + } catch (Exception ignored) { + } + + if (hardFailure) { + // Hard failure: close landing zone and fail all batches + landingZone.close(); + unresponsivenessTask.cancel(); + // Store failed batches for getUnackedRecords() + failedBatches = landingZone.removeAll(); + for (InflightBatch

    batch : failedBatches) { + batch.ackPromise.completeExceptionally( + exception.orElse(new ZerobusException("Stream failed"))); + } + this.notifyAll(); + } else { + // Soft close (recovery): move inflight batches back to pending for resending + landingZone.resetObserve(); + } + + grpcStream = Optional.empty(); + streamCreatedEvent = Optional.empty(); + streamId = Optional.empty(); + stub = null; + this.notifyAll(); + } + + senderTask.waitUntilStopped(); + if (hardFailure) { + unresponsivenessTask.waitUntilStopped(); + } + } + + private CompletableFuture doCloseStreamAsync( + boolean hardFailure, Optional exception) { + return CompletableFuture.runAsync(() -> doCloseStream(hardFailure, exception), executor); + } + + // ==================== Recovery ==================== + + private void handleStreamFailed(StreamFailureType type, Optional error) { + Optional exception = + error.map( + e -> + e instanceof ZerobusException + ? (ZerobusException) e + : new ZerobusException("Stream failed: " + e.getMessage(), e)); + if (!exception.isPresent()) { + exception = Optional.of(new ZerobusException("Stream failed")); + } + + synchronized (this) { + if (state == StreamState.FAILED + || state == StreamState.UNINITIALIZED + || state == StreamState.RECOVERING) { + if (state == StreamState.UNINITIALIZED && streamCreatedEvent.isPresent()) { + streamCreatedEvent.get().completeExceptionally(exception.get()); + } + return; + } + if (state == StreamState.CLOSED && !error.isPresent()) { + return; + } + error.ifPresent(e -> logger.error("Stream failed: {}", e.getMessage(), e)); + + if (error.isPresent() && error.get() instanceof NonRetriableException) { + doCloseStreamAsync(true, exception); + return; + } + + failureInfo.logFailure(type); + setState(StreamState.RECOVERING); + + final Optional finalException = exception; + recoverStream() + .whenComplete( + (r, e) -> { + if (e == null) { + setState(StreamState.OPENED); + logger.info("Stream recovered with id {}", streamId.orElse("unknown")); + } else { + logger.error("Stream recovery failed", e); + doCloseStream(true, finalException); + } + }); + } + } + + private CompletableFuture handleStreamFailedAsync( + StreamFailureType type, Optional error) { + return CompletableFuture.runAsync(() -> handleStreamFailed(type, error), executor); + } + + private CompletableFuture recoverStream() { + CompletableFuture result = new CompletableFuture<>(); + CompletableFuture.runAsync( + () -> { + if (!options.recovery()) { + result.completeExceptionally(new ZerobusException("Recovery is disabled")); + return; + } + logger.warn("Attempting stream recovery for {}", streamId.orElse("unknown")); + doCloseStream(false, Optional.empty()); + + synchronized (this) { + int retries = options.recoveryRetries(); + int leftRetries = Math.max(0, retries - failureInfo.getFailureCounts() + 1); + if (leftRetries == 0) { + result.completeExceptionally(new ZerobusException("Recovery retries exhausted")); + return; + } + + runWithRetries( + leftRetries, + "RecoverStream", + () -> { + CompletableFuture attempt = new CompletableFuture<>(); + doCreateStream() + .whenComplete( + (r, e) -> { + if (e != null) { + attempt.completeExceptionally(e); + } else { + // Reassign offset IDs to all pending batches for resending + reassignBatchOffsets(); + // Reset graceful close watermark + gracefulCloseWatermark = -1; + attempt.complete(null); + } + }); + return attempt; + }) + .whenComplete( + (r, e) -> { + if (e == null) { + logger.info( + "Recovery completed, new stream id: {}", streamId.orElse("unknown")); + result.complete(null); + } else { + logger.error("Recovery failed", e); + result.completeExceptionally(e); + } + }); + } + }, + executor); + return result; + } + + // ==================== Background Tasks ==================== + + private void initBackgroundTasks() { + senderTask = + new BackgroundTask( + token -> { + // Check if sender should be paused (graceful close in progress) + synchronized (this) { + if (state == StreamState.PAUSED) { + try { + this.wait(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return; + } + } + + // Try to observe the next batch from the landing zone + InflightBatch

    batch = null; + try { + batch = landingZone.tryObserve(100, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + + if (batch == null) { + // No batch available, check if we should continue waiting + synchronized (this) { + if (state != StreamState.OPENED + && state != StreamState.FLUSHING + && state != StreamState.CLOSED) { + try { + this.wait(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + return; + } + + // Build the request from the batch + EphemeralStreamRequest request = batch.encodedBatch.toRequest(batch.offsetId); + + if (grpcStream.isPresent()) { + ClientCallStreamObserver stream = grpcStream.get(); + synchronized (this) { + while (!stream.isReady() && !token.isDone()) { + try { + this.wait(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + } + if (!token.isDone()) { + try { + stream.onNext(request); + failureInfo.resetFailure(StreamFailureType.SENDING_MESSAGE); + } catch (Exception e) { + logger.error("Error sending batch", e); + handleStreamFailedAsync(StreamFailureType.SENDING_MESSAGE, Optional.of(e)); + synchronized (this) { + while ((state == StreamState.OPENED || state == StreamState.FLUSHING) + && !token.isDone()) { + try { + this.wait(); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + break; + } + } + } + } + } + } + }, + error -> { + logger.error("Sender task failed", error); + doCloseStreamAsync( + true, Optional.of(new ZerobusException("Sender task failed", error))); + }, + executor); + + unresponsivenessTask = + new BackgroundTask( + token -> { + long startTime = System.currentTimeMillis(); + synchronized (this) { + if (state == StreamState.UNINITIALIZED + || state == StreamState.CLOSED + || state == StreamState.FAILED) { + return; + } + if (state == StreamState.RECOVERING) { + try { + this.wait(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return; + } + if (landingZone.inflightCount() == 0) { + try { + this.wait(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return; + } + + long prevOffset = latestAckedOffsetId; + while (prevOffset == latestAckedOffsetId) { + long remaining = + options.serverLackOfAckTimeoutMs() - (System.currentTimeMillis() - startTime); + if (remaining <= 0) { + handleStreamFailedAsync( + StreamFailureType.SERVER_UNRESPONSIVE, + Optional.of(new ZerobusException("Server unresponsive"))); + return; + } + try { + this.wait(remaining); + if (token.isDone()) return; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + } + } + }, + error -> { + logger.error("Unresponsiveness task failed", error); + doCloseStreamAsync( + true, Optional.of(new ZerobusException("Unresponsiveness task failed", error))); + }, + executor); + } + + private void initAckReceiver() { + ackReceiver = + new ClientResponseObserver() { + private Optional receiverStreamId = Optional.empty(); + + @Override + public void beforeStart(ClientCallStreamObserver stream) { + stream.setOnReadyHandler( + () -> { + synchronized (BaseZerobusStream.this) { + BaseZerobusStream.this.notifyAll(); + } + }); + } + + @Override + public void onNext(EphemeralStreamResponse response) { + switch (response.getPayloadCase()) { + case CREATE_STREAM_RESPONSE: + String id = response.getCreateStreamResponse().getStreamId(); + if (id == null || id.isEmpty()) { + throw new RuntimeException(new ZerobusException("Missing stream id in response")); + } + receiverStreamId = Optional.of(id); + logger.debug("Stream created with id {}", id); + streamCreatedEvent.get().complete(id); + break; + + case INGEST_RECORD_RESPONSE: + String expectedId = + receiverStreamId.orElseThrow( + () -> + new RuntimeException(new ZerobusException("Got ack before stream id"))); + long ackedOffset = response.getIngestRecordResponse().getDurabilityAckUpToOffset(); + logger.debug("Acked offset {}", ackedOffset); + + synchronized (BaseZerobusStream.this) { + if (!streamId.isPresent() || !expectedId.equals(streamId.get())) { + return; // Stale ack from old stream + } + failureInfo.resetFailure(StreamFailureType.SERVER_CLOSED_STREAM); + failureInfo.resetFailure(StreamFailureType.SERVER_UNRESPONSIVE); + latestAckedOffsetId = Math.max(latestAckedOffsetId, ackedOffset); + + // Remove all acknowledged batches from the inflight queue + // removeObserved() removes the oldest inflight batch and returns it + while (landingZone.inflightCount() > 0) { + // Peek at the oldest inflight batch to check its offset + List> all = landingZone.peekAll(); + if (all.isEmpty()) break; + // First item in peekAll is the oldest inflight batch + InflightBatch

    oldest = all.get(0); + if (oldest.offsetId > ackedOffset) break; + + InflightBatch

    removed = landingZone.removeObserved(); + if (removed != null) { + removed.ackPromise.complete(removed.offsetId); + } + } + BaseZerobusStream.this.notifyAll(); + } + + // Invoke offset callback if set + if (options.offsetCallback().isPresent()) { + CompletableFuture.runAsync( + () -> options.offsetCallback().get().accept(ackedOffset), executor) + .exceptionally( + e -> { + logger.error( + "Exception in offset callback for offset {}", ackedOffset, e); + return null; + }); + } + break; + + case CLOSE_STREAM_SIGNAL: + if (options.recovery()) { + CloseStreamSignal closeSignal = response.getCloseStreamSignal(); + + // Determine wait duration: use user-configured value if set, otherwise server's + long serverWaitMs = 0; + if (closeSignal.hasDuration()) { + com.google.protobuf.Duration duration = closeSignal.getDuration(); + serverWaitMs = duration.getSeconds() * 1000 + duration.getNanos() / 1_000_000; + } + final long waitMs = options.streamPausedMaxWaitTimeMs().orElse(serverWaitMs); + + failureInfo.resetFailure(StreamFailureType.SERVER_CLOSED_STREAM); + + // Capture watermark and transition to PAUSED state + // Ingestion can continue - records queue in landing zone for after recovery + synchronized (BaseZerobusStream.this) { + if (state == StreamState.OPENED || state == StreamState.FLUSHING) { + gracefulCloseWatermark = lastSentOffsetId; + setState(StreamState.PAUSED); + logger.info( + "Server signaled stream close, entering PAUSED state for {}ms, watermark={}", + waitMs, + gracefulCloseWatermark); + } else { + // Already in a terminal or transitional state, skip graceful close + break; + } + } + + // Edge case: no batches ever sent + if (gracefulCloseWatermark < 0) { + logger.info( + "No pending batches during graceful close, triggering immediate recovery"); + handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); + break; + } + + if (waitMs > 0) { + gracefulCloseTask = + CompletableFuture.runAsync( + () -> { + long startTime = System.currentTimeMillis(); + synchronized (BaseZerobusStream.this) { + // Wait for watermark batches to be acked (not all batches) + while (state == StreamState.PAUSED + && latestAckedOffsetId < gracefulCloseWatermark) { + long elapsed = System.currentTimeMillis() - startTime; + long remaining = waitMs - elapsed; + if (remaining <= 0) { + break; + } + try { + BaseZerobusStream.this.wait(remaining); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + if (latestAckedOffsetId >= gracefulCloseWatermark) { + logger.info( + "All watermark batches (up to offset {}) acked during graceful close, triggering recovery", + gracefulCloseWatermark); + } else { + logger.info( + "Graceful close timeout, watermark {} not fully acked (latest acked: {}), triggering recovery", + gracefulCloseWatermark, + latestAckedOffsetId); + } + } + handleStreamFailed( + StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); + }, + executor) + .exceptionally( + e -> { + logger.error("Error during graceful close wait", e); + handleStreamFailed( + StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); + return null; + }); + } else { + handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); + } + } + break; + + default: + throw new RuntimeException(new ZerobusException("Unknown response type")); + } + } + + @Override + public void onError(Throwable t) { + synchronized (BaseZerobusStream.this) { + if (state == StreamState.CLOSED && !grpcStream.isPresent()) { + logger.debug("Ignoring error on closed stream: {}", t.getMessage()); + return; + } + } + Optional error = Optional.of(t); + if (t instanceof StatusRuntimeException) { + Status.Code code = ((StatusRuntimeException) t).getStatus().getCode(); + if (GrpcErrorHandling.isNonRetriable(code)) { + error = + Optional.of( + new NonRetriableException( + "Non-retriable gRPC error: " + t.getMessage(), t)); + } + } + handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, error); + } + + @Override + public void onCompleted() { + logger.debug("Server closed the stream"); + handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); + } + }; + } + + // ==================== Utility Methods ==================== + + private CompletableFuture runWithTimeout( + long timeoutMs, Supplier> operation) { + AtomicBoolean done = new AtomicBoolean(false); + CompletableFuture future = operation.get(); + future.whenComplete( + (r, e) -> { + synchronized (done) { + done.set(true); + done.notifyAll(); + } + }); + + CompletableFuture timeout = + CompletableFuture.runAsync( + () -> { + synchronized (done) { + try { + done.wait(timeoutMs); + if (!done.get()) { + throw new RuntimeException(new TimeoutException("Operation timed out")); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + }, + executor); + + return CompletableFuture.anyOf(future, timeout).thenApply(r -> null); + } + + private CompletableFuture runWithRetries( + int maxRetries, String context, Supplier> operation) { + CompletableFuture result = new CompletableFuture<>(); + int backoffMs = options.recovery() ? options.recoveryBackoffMs() : 0; + + class RetryHelper { + void attempt(int n) { + logger.debug("[{}] Attempt {}", context, n + 1); + operation + .get() + .whenComplete( + (r, e) -> { + if (e == null) { + result.complete(r); + } else if (e instanceof NonRetriableException + || e.getCause() instanceof NonRetriableException) { + result.completeExceptionally(e); + } else if (n < maxRetries - 1) { + CompletableFuture.runAsync( + () -> { + try { + Thread.sleep(backoffMs); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + attempt(n + 1); + }, + executor); + } else { + result.completeExceptionally(e); + } + }); + } + } + new RetryHelper().attempt(0); + return result; + } + + private Throwable wrapException(Throwable e, String message) { + if (e instanceof StatusRuntimeException) { + Status.Code code = ((StatusRuntimeException) e).getStatus().getCode(); + if (GrpcErrorHandling.isNonRetriable(code)) { + return new NonRetriableException(message + ": " + e.getMessage(), e); + } + } + if (e instanceof NonRetriableException) { + return new NonRetriableException(message + ": " + e.getMessage(), e); + } + return new ZerobusException(message + ": " + e.getMessage(), e); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/DualTypeStream.java b/sdk/src/main/java/com/databricks/zerobus/stream/DualTypeStream.java new file mode 100644 index 0000000..fe60ed0 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/DualTypeStream.java @@ -0,0 +1,95 @@ +package com.databricks.zerobus.stream; + +import com.databricks.zerobus.StreamConfigurationOptions; +import com.databricks.zerobus.ZerobusGrpc.ZerobusStub; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.schema.BaseTableProperties; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.DescriptorProtos; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; +import javax.annotation.Nonnull; + +/** + * Abstract base class for Zerobus streams with both primary and secondary record types. + * + *

    This class extends {@link BaseZerobusStream} to add support for a secondary record type. The + * primary type is typically the "structured" or "native" type, while the secondary type is the + * "raw" or "serialized" type. + * + *

    For protobuf streams: + * + *

      + *
    • Primary: {@code T extends Message} - structured protobuf messages + *
    • Secondary: {@code byte[]} - pre-serialized bytes + *
    + * + *

    For JSON streams: + * + *

      + *
    • Primary: {@code Map} - structured Map records + *
    • Secondary: {@code String} - raw JSON strings + *
    + * + * @param

    The primary record type (structured) + * @param The secondary record type (raw/serialized) + * @see BaseZerobusStream + * @see ProtoZerobusStream + * @see JsonZerobusStream + */ +public abstract class DualTypeStream extends BaseZerobusStream

    { + + // ==================== Constructor ==================== + + protected DualTypeStream( + Supplier stubSupplier, + BaseTableProperties tableProperties, + String clientId, + String clientSecret, + HeadersProvider headersProvider, + TlsConfig tlsConfig, + StreamConfigurationOptions options, + ExecutorService executor, + DescriptorProtos.DescriptorProto descriptorProto) { + super( + stubSupplier, + tableProperties, + clientId, + clientSecret, + headersProvider, + tlsConfig, + options, + executor, + descriptorProto); + } + + // ==================== Secondary Type Methods ==================== + // + // Note: Secondary type methods (ingest(S) and ingestBatch(SecondaryBatch)) are NOT declared + // as abstract here due to Java type erasure. Abstract methods ingest(P) and ingest(S) would + // have the same erasure (ingest(Object)) and conflict. + // + // Concrete subclasses MUST provide: + // - public long ingest(S record) throws ZerobusException + // - public Long ingestBatch(SecondaryBatch batch) throws ZerobusException + // + // This works in concrete classes because their type parameters have different erasures: + // - ProtoZerobusStream: ingest(T) erases to ingest(Message), ingest(byte[]) is concrete + // - JsonZerobusStream: ingest(Map) and ingest(String) are both concrete + + // ==================== Override recreate to return DualTypeStream ==================== + + /** + * Recreates this stream with the same configuration. + * + *

    Creates a new stream and re-ingests any unacknowledged records from this stream. + * + * @param sdk The SDK instance to use for recreation + * @return A future that completes with the new stream + */ + @Override + @Nonnull + public abstract CompletableFuture> recreate(ZerobusSdk sdk); +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/EncodedBatch.java b/sdk/src/main/java/com/databricks/zerobus/stream/EncodedBatch.java new file mode 100644 index 0000000..0050b21 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/EncodedBatch.java @@ -0,0 +1,126 @@ +package com.databricks.zerobus.stream; + +import com.databricks.zerobus.EphemeralStreamRequest; +import com.databricks.zerobus.IngestRecordBatchRequest; +import com.databricks.zerobus.IngestRecordRequest; +import com.databricks.zerobus.JsonRecordBatch; +import com.databricks.zerobus.ProtoEncodedRecordBatch; +import com.google.protobuf.ByteString; +import java.util.Collections; +import java.util.List; + +/** + * Represents a batch of encoded records ready for transmission. + * + *

    Single records are represented as a batch of size 1. This provides a unified internal API + * where all ingestion goes through the same batch-based code path. + * + *

    Subclasses handle the specific encoding format (protobuf or JSON). + */ +abstract class EncodedBatch { + + /** Returns the number of records in this batch. */ + abstract int size(); + + /** Converts this batch to a gRPC request with the given offset ID. */ + abstract EphemeralStreamRequest toRequest(long offsetId); + + /** Creates a proto batch containing a single encoded record. */ + static ProtoEncodedBatch protoSingle(ByteString encoded) { + return new ProtoEncodedBatch(Collections.singletonList(encoded)); + } + + /** Creates a proto batch containing multiple encoded records. */ + static ProtoEncodedBatch protoBatch(List encoded) { + return new ProtoEncodedBatch(encoded); + } + + /** Creates a JSON batch containing a single encoded record. */ + static JsonEncodedBatch jsonSingle(String json) { + return new JsonEncodedBatch(Collections.singletonList(json)); + } + + /** Creates a JSON batch containing multiple encoded records. */ + static JsonEncodedBatch jsonBatch(List json) { + return new JsonEncodedBatch(json); + } +} + +/** Batch of protobuf-encoded records. */ +final class ProtoEncodedBatch extends EncodedBatch { + final List encodedRecords; + + ProtoEncodedBatch(List encodedRecords) { + this.encodedRecords = encodedRecords; + } + + @Override + int size() { + return encodedRecords.size(); + } + + @Override + EphemeralStreamRequest toRequest(long offsetId) { + if (encodedRecords.size() == 1) { + // Single record optimization - use IngestRecordRequest + return EphemeralStreamRequest.newBuilder() + .setIngestRecord( + IngestRecordRequest.newBuilder() + .setOffsetId(offsetId) + .setProtoEncodedRecord(encodedRecords.get(0)) + .build()) + .build(); + } else { + // Batch - use IngestRecordBatchRequest + ProtoEncodedRecordBatch.Builder batchBuilder = ProtoEncodedRecordBatch.newBuilder(); + for (ByteString record : encodedRecords) { + batchBuilder.addRecords(record); + } + return EphemeralStreamRequest.newBuilder() + .setIngestRecordBatch( + IngestRecordBatchRequest.newBuilder() + .setOffsetId(offsetId) + .setProtoEncodedBatch(batchBuilder.build()) + .build()) + .build(); + } + } +} + +/** Batch of JSON-encoded records. */ +final class JsonEncodedBatch extends EncodedBatch { + final List jsonRecords; + + JsonEncodedBatch(List jsonRecords) { + this.jsonRecords = jsonRecords; + } + + @Override + int size() { + return jsonRecords.size(); + } + + @Override + EphemeralStreamRequest toRequest(long offsetId) { + if (jsonRecords.size() == 1) { + // Single record optimization - use IngestRecordRequest + return EphemeralStreamRequest.newBuilder() + .setIngestRecord( + IngestRecordRequest.newBuilder() + .setOffsetId(offsetId) + .setJsonRecord(jsonRecords.get(0)) + .build()) + .build(); + } else { + // Batch - use IngestRecordBatchRequest + JsonRecordBatch.Builder batchBuilder = JsonRecordBatch.newBuilder(); + for (String record : jsonRecords) { + batchBuilder.addRecords(record); + } + return EphemeralStreamRequest.newBuilder() + .setIngestRecordBatch( + IngestRecordBatchRequest.newBuilder().setJsonBatch(batchBuilder.build()).build()) + .build(); + } + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/GrpcErrorHandling.java b/sdk/src/main/java/com/databricks/zerobus/stream/GrpcErrorHandling.java new file mode 100644 index 0000000..e0e6cc3 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/GrpcErrorHandling.java @@ -0,0 +1,21 @@ +package com.databricks.zerobus.stream; + +import io.grpc.Status; +import java.util.HashSet; +import java.util.Set; + +/** Utility for classifying gRPC errors as retriable or non-retriable. */ +public class GrpcErrorHandling { + private static final Set NON_RETRIABLE_CODES = new HashSet<>(); + + static { + NON_RETRIABLE_CODES.add(Status.Code.INVALID_ARGUMENT); + NON_RETRIABLE_CODES.add(Status.Code.NOT_FOUND); + NON_RETRIABLE_CODES.add(Status.Code.UNAUTHENTICATED); + NON_RETRIABLE_CODES.add(Status.Code.OUT_OF_RANGE); + } + + public static boolean isNonRetriable(Status.Code code) { + return NON_RETRIABLE_CODES.contains(code); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/InflightBatch.java b/sdk/src/main/java/com/databricks/zerobus/stream/InflightBatch.java new file mode 100644 index 0000000..17265cc --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/InflightBatch.java @@ -0,0 +1,64 @@ +package com.databricks.zerobus.stream; + +import java.util.List; +import java.util.concurrent.CompletableFuture; +import javax.annotation.Nullable; + +/** + * Tracks a batch of records that have been queued for sending but not yet acknowledged. + * + *

    This class stores both the original records (for {@code getUnackedRecords()}) and the encoded + * batch (for resending during recovery). + * + *

    For single record ingestion, this holds a batch of size 1. + * + * @param

    The primary record type (Message for proto, Map for JSON) + */ +final class InflightBatch

    { + + /** + * Original records in this batch, for returning via {@code getUnackedRecords()}. + * + *

    May be null if records were ingested in serialized form (e.g., {@code ingest(byte[])} or + * {@code ingest(String)}). + */ + @Nullable final List

    records; + + /** Encoded batch payload for sending/resending over the wire. */ + final EncodedBatch encodedBatch; + + /** + * Logical offset ID for this batch. + * + *

    This is mutable because during stream recovery, batches need to be reassigned new offset IDs + * when they are re-sent to the server. + */ + long offsetId; + + /** Promise that completes when the server acknowledges this batch. */ + final CompletableFuture ackPromise; + + /** + * Creates a new inflight batch. + * + * @param records Original records (null if ingested as serialized data) + * @param encodedBatch Encoded payload for transmission + * @param offsetId Logical offset ID + * @param ackPromise Promise to complete on acknowledgment + */ + InflightBatch( + @Nullable List

    records, + EncodedBatch encodedBatch, + long offsetId, + CompletableFuture ackPromise) { + this.records = records; + this.encodedBatch = encodedBatch; + this.offsetId = offsetId; + this.ackPromise = ackPromise; + } + + /** Returns the number of records in this batch. */ + int size() { + return encodedBatch.size(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/JsonZerobusStream.java b/sdk/src/main/java/com/databricks/zerobus/stream/JsonZerobusStream.java new file mode 100644 index 0000000..47444a0 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/JsonZerobusStream.java @@ -0,0 +1,368 @@ +package com.databricks.zerobus.stream; + +import com.databricks.zerobus.RecordType; +import com.databricks.zerobus.StreamConfigurationOptions; +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusGrpc.ZerobusStub; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.ZerobusStreamBuilder; +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.batch.PrimaryBatch; +import com.databricks.zerobus.batch.json.MapBatch; +import com.databricks.zerobus.batch.json.StringBatch; +import com.databricks.zerobus.common.json.Json; +import com.databricks.zerobus.schema.JsonTableProperties; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.DescriptorProtos; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Zerobus stream for ingesting JSON records into a table. + * + *

    This stream type is used when your data is already in JSON format and you don't want to define + * a protobuf schema. JSON records are sent directly to the server as strings. + * + *

    Example usage: + * + *

    {@code
    + * JsonZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .json()
    + *     .build()
    + *     .join();
    + *
    + * String jsonRecord = "{\"field\": \"value\", \"count\": 42}";
    + * stream.ingest(jsonRecord);
    + * stream.close();
    + * }
    + * + * @see ProtoZerobusStream + * @see DualTypeStream + * @see ZerobusSdk#streamBuilder(String) + */ +public class JsonZerobusStream extends DualTypeStream, String> { + + // Empty descriptor proto for JSON streams (required by server but not used for schema) + private static final DescriptorProtos.DescriptorProto EMPTY_DESCRIPTOR = + DescriptorProtos.DescriptorProto.newBuilder().setName("JsonRecord").build(); + + // ==================== Constructor ==================== + + /** + * Creates a new JsonZerobusStream. + * + *

    Use {@link ZerobusSdk#streamBuilder(String)} instead of calling this constructor directly. + */ + public JsonZerobusStream( + Supplier stubSupplier, + JsonTableProperties tableProperties, + String clientId, + String clientSecret, + HeadersProvider headersProvider, + TlsConfig tlsConfig, + StreamConfigurationOptions options, + ExecutorService executor) { + super( + stubSupplier, + tableProperties, + clientId, + clientSecret, + headersProvider, + tlsConfig, + options, + executor, + EMPTY_DESCRIPTOR); + } + + @Override + @Nonnull + protected RecordType getRecordType() { + return RecordType.JSON; + } + + // ==================== Public API ==================== + + /** + * Ingests a JSON record into the stream. + * + * @param jsonRecord The JSON string to ingest + * @return The logical offset ID assigned to this record + * @throws ZerobusException if ingestion fails or the record is not a valid JSON string + */ + public long ingest(@Nonnull String jsonRecord) throws ZerobusException { + if (jsonRecord == null) { + throw new ZerobusException("JSON record cannot be null"); + } + synchronized (this) { + try { + return doIngestRecord(jsonRecord); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing record", e); + } + } + } + + /** + * Ingests a record as a Map into the stream (primary type). + * + *

    The Map is serialized to JSON before being sent to the server. The original Map is stored + * internally for recovery via {@link #getUnackedRecords()}. + * + *

    Supported value types: + * + *

      + *
    • {@code Map} - nested objects + *
    • {@code Iterable} - arrays + *
    • {@code String}, {@code Number}, {@code Boolean}, {@code null} + *
    + * + * @param record The Map to ingest (will be serialized to JSON) + * @return The logical offset ID assigned to this record + * @throws ZerobusException if ingestion fails or serialization fails + */ + @Override + public long ingest(@Nonnull Map record) throws ZerobusException { + if (record == null) { + throw new ZerobusException("Record cannot be null"); + } + String jsonRecord; + try { + jsonRecord = Json.stringify(record); + } catch (IllegalArgumentException e) { + throw new ZerobusException("Failed to serialize Map to JSON: " + e.getMessage(), e); + } + synchronized (this) { + try { + return doIngestMapRecord(record, jsonRecord); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing record", e); + } + } + } + + /** + * {@inheritDoc} + * + *

    For JSON streams, use {@link MapBatch#of(Iterable)} to create the batch. The original Maps + * are stored internally for recovery via {@link #getUnackedRecords()}. + */ + @Override + @Nullable public Long ingestBatch(@Nonnull PrimaryBatch> batch) throws ZerobusException { + if (batch == null) { + throw new ZerobusException("Batch cannot be null"); + } + if (!batch.iterator().hasNext()) { + return null; + } + // Convert Maps to JSON strings while keeping original Maps for recovery + List> records = new ArrayList<>(); + List jsonStrings = new ArrayList<>(); + try { + for (Map record : batch) { + records.add(record); + jsonStrings.add(Json.stringify(record)); + } + } catch (IllegalArgumentException e) { + throw new ZerobusException("Failed to serialize Map to JSON: " + e.getMessage(), e); + } + synchronized (this) { + try { + return doIngestMapBatch(records, jsonStrings); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing batch", e); + } + } + } + + /** + * Ingests a batch of JSON records into the stream (secondary type). + * + *

    The batch is assigned a single offset ID and acknowledged atomically. + * + *

    Example: + * + *

    {@code
    +   * List records = new ArrayList<>();
    +   * records.add("{\"name\": \"Alice\", \"age\": 30}");
    +   * records.add("{\"name\": \"Bob\", \"age\": 25}");
    +   * Long offset = stream.ingestBatch(StringBatch.of(records));
    +   * }
    + * + * @param stringBatch The batch of JSON strings to ingest + * @return The offset ID for the batch, or null if empty + * @throws ZerobusException if ingestion fails + */ + @Nullable public Long ingestBatch(@Nonnull StringBatch stringBatch) throws ZerobusException { + if (stringBatch == null) { + throw new ZerobusException("String batch cannot be null"); + } + if (!stringBatch.iterator().hasNext()) { + return null; + } + synchronized (this) { + try { + return doIngestBatch(stringBatch); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing batch", e); + } + } + } + + /** + * Ingests a batch of records as Maps into the stream (primary type). + * + *

    This is a convenience method that accepts {@link MapBatch} directly. + * + *

    Example: + * + *

    {@code
    +   * List> records = new ArrayList<>();
    +   * records.add(createRecord("sensor-1", 25));
    +   * records.add(createRecord("sensor-2", 30));
    +   * Long offset = stream.ingestBatch(MapBatch.of(records));
    +   * }
    + * + * @param mapBatch The batch of Maps to ingest (each will be serialized to JSON) + * @return The offset ID for the batch, or null if empty + * @throws ZerobusException if ingestion fails or serialization fails + */ + @Nullable public Long ingestBatch(@Nonnull MapBatch mapBatch) throws ZerobusException { + return ingestBatch((PrimaryBatch>) mapBatch); + } + + // getUnackedRecords() is now implemented in BaseZerobusStream using LandingZone + // When Maps are ingested, they are stored in InflightBatch.records and can be retrieved + + /** + * Returns the table properties for this stream. + * + * @return The JSON table properties + */ + @Nonnull + public JsonTableProperties getTableProperties() { + return (JsonTableProperties) tableProperties; + } + + /** + * {@inheritDoc} + * + *

    Recreates this JSON stream with the same configuration. Any unacknowledged records from this + * stream will be re-ingested into the new stream. + */ + @Override + @Nonnull + public CompletableFuture recreate(ZerobusSdk sdk) { + Iterator> unackedRecords = getUnackedRecords(); + JsonTableProperties tableProperties = getTableProperties(); + + ZerobusStreamBuilder.AuthenticatedZerobusStreamBuilder builder = + sdk.streamBuilder(tableProperties.getTableName()) + .clientCredentials(clientId, clientSecret) + .options(options) + .headersProvider(headersProvider) + .tlsConfig(tlsConfig); + + return builder + .json() + .build() + .thenApply( + newStream -> { + // Re-ingest unacked records + while (unackedRecords.hasNext()) { + try { + newStream.ingest(unackedRecords.next()); + } catch (ZerobusException e) { + throw new RuntimeException( + "Failed to re-ingest record during stream recreation", e); + } + } + return newStream; + }); + } + + // ==================== Internal Implementation ==================== + + private long doIngestRecord(String jsonRecord) throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + + // Create batch with single JSON record (no original Map stored for String ingestion) + EncodedBatch encodedBatch = EncodedBatch.jsonSingle(jsonRecord); + InflightBatch> batch = + new InflightBatch<>(null, encodedBatch, offsetId, new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + /** Internal method to ingest a Map record with the original Map stored for recovery. */ + private long doIngestMapRecord(Map record, String jsonRecord) + throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + + // Create batch with single JSON record and store original Map for recovery + EncodedBatch encodedBatch = EncodedBatch.jsonSingle(jsonRecord); + InflightBatch> batch = + new InflightBatch<>( + java.util.Collections.singletonList(record), + encodedBatch, + offsetId, + new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + private Long doIngestBatch(Iterable jsonRecords) + throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + List jsonList = new ArrayList<>(); + + for (String record : jsonRecords) { + jsonList.add(record); + } + + // Create batch with multiple JSON records (no original Maps stored) + EncodedBatch encodedBatch = EncodedBatch.jsonBatch(jsonList); + InflightBatch> batch = + new InflightBatch<>(null, encodedBatch, offsetId, new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + /** Internal method to ingest a batch of Map records with original Maps stored for recovery. */ + private Long doIngestMapBatch(List> records, List jsonRecords) + throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + + // Create batch with multiple JSON records and store original Maps for recovery + EncodedBatch encodedBatch = EncodedBatch.jsonBatch(jsonRecords); + InflightBatch> batch = + new InflightBatch<>(records, encodedBatch, offsetId, new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + // enqueueRecordsForResending() is no longer needed - recovery is handled by LandingZone +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/LandingZone.java b/sdk/src/main/java/com/databricks/zerobus/stream/LandingZone.java new file mode 100644 index 0000000..d022033 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/LandingZone.java @@ -0,0 +1,331 @@ +package com.databricks.zerobus.stream; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; +import javax.annotation.Nullable; + +/** + * Manages batches through their lifecycle: pending → inflight → acknowledged. + * + *

    This class provides thread-safe coordination between: + * + *

      + *
    • Producer threads calling {@link #add} to queue batches + *
    • Sender thread calling {@link #observe} to get batches for sending + *
    • Receiver thread calling {@link #removeObserved} when batches are acknowledged + *
    + * + *

    Backpressure is enforced via a semaphore that limits the maximum number of inflight batches. + * + * @param

    The primary record type (Message for proto, Map for JSON) + */ +final class LandingZone

    { + + private final LinkedList> pendingQueue = new LinkedList<>(); + private final LinkedList> inflightQueue = new LinkedList<>(); + + private final ReentrantLock lock = new ReentrantLock(); + private final Condition notEmpty = lock.newCondition(); + private final Semaphore semaphore; + + private volatile boolean closed = false; + + /** + * Creates a new landing zone with the specified capacity. + * + * @param maxInflightBatches Maximum number of batches that can be pending or inflight + */ + LandingZone(int maxInflightBatches) { + this.semaphore = new Semaphore(maxInflightBatches); + } + + /** + * Adds a batch to the pending queue. + * + *

    This method blocks if the maximum inflight capacity is reached, providing backpressure. + * + * @param batch The batch to add + * @throws InterruptedException if interrupted while waiting for capacity + * @throws IllegalStateException if the landing zone is closed + */ + void add(InflightBatch

    batch) throws InterruptedException { + if (closed) { + throw new IllegalStateException("LandingZone is closed"); + } + + // Acquire permit (blocks if at capacity) + semaphore.acquire(); + + lock.lock(); + try { + if (closed) { + semaphore.release(); + throw new IllegalStateException("LandingZone is closed"); + } + pendingQueue.addLast(batch); + notEmpty.signal(); + } finally { + lock.unlock(); + } + } + + /** + * Tries to add a batch to the pending queue with a timeout. + * + * @param batch The batch to add + * @param timeout Maximum time to wait + * @param unit Time unit for timeout + * @return true if added successfully, false if timeout elapsed + * @throws InterruptedException if interrupted while waiting + * @throws IllegalStateException if the landing zone is closed + */ + boolean tryAdd(InflightBatch

    batch, long timeout, TimeUnit unit) throws InterruptedException { + if (closed) { + throw new IllegalStateException("LandingZone is closed"); + } + + // Try to acquire permit with timeout + if (!semaphore.tryAcquire(timeout, unit)) { + return false; + } + + lock.lock(); + try { + if (closed) { + semaphore.release(); + throw new IllegalStateException("LandingZone is closed"); + } + pendingQueue.addLast(batch); + notEmpty.signal(); + return true; + } finally { + lock.unlock(); + } + } + + /** + * Gets the next pending batch and moves it to the inflight queue. + * + *

    This method blocks until a batch is available. + * + * @return The next batch to send + * @throws InterruptedException if interrupted while waiting + */ + InflightBatch

    observe() throws InterruptedException { + lock.lock(); + try { + while (pendingQueue.isEmpty() && !closed) { + notEmpty.await(); + } + + if (closed && pendingQueue.isEmpty()) { + throw new InterruptedException("LandingZone closed"); + } + + InflightBatch

    batch = pendingQueue.removeFirst(); + inflightQueue.addLast(batch); + return batch; + } finally { + lock.unlock(); + } + } + + /** + * Tries to get the next pending batch with a timeout. + * + * @param timeout Maximum time to wait + * @param unit Time unit for timeout + * @return The next batch, or null if timeout elapsed or closed + * @throws InterruptedException if interrupted while waiting + */ + @Nullable InflightBatch

    tryObserve(long timeout, TimeUnit unit) throws InterruptedException { + long nanos = unit.toNanos(timeout); + lock.lock(); + try { + while (pendingQueue.isEmpty() && !closed) { + if (nanos <= 0) { + return null; + } + nanos = notEmpty.awaitNanos(nanos); + } + + if (closed && pendingQueue.isEmpty()) { + return null; + } + + InflightBatch

    batch = pendingQueue.removeFirst(); + inflightQueue.addLast(batch); + return batch; + } finally { + lock.unlock(); + } + } + + /** + * Removes the oldest batch from the inflight queue (after acknowledgment). + * + *

    This releases a semaphore permit, allowing another batch to be added. + * + * @return The acknowledged batch, or null if the inflight queue is empty + */ + @Nullable InflightBatch

    removeObserved() { + lock.lock(); + try { + InflightBatch

    batch = inflightQueue.pollFirst(); + if (batch != null) { + semaphore.release(); + } + return batch; + } finally { + lock.unlock(); + } + } + + /** + * Resets for recovery: moves all inflight batches back to the pending queue. + * + *

    This is called when a stream fails and needs to retry sending. The batches are moved in + * reverse order so they maintain their original order when re-observed. + */ + void resetObserve() { + lock.lock(); + try { + // Move inflight back to front of pending (in reverse order to maintain order) + while (!inflightQueue.isEmpty()) { + InflightBatch

    batch = inflightQueue.removeLast(); + pendingQueue.addFirst(batch); + } + notEmpty.signalAll(); + } finally { + lock.unlock(); + } + } + + /** + * Removes and returns all batches (both pending and inflight). + * + *

    This is called when the stream fails permanently. All semaphore permits are released. + * + * @return List of all batches that were in the landing zone + */ + List> removeAll() { + lock.lock(); + try { + List> all = new ArrayList<>(pendingQueue.size() + inflightQueue.size()); + all.addAll(inflightQueue); + all.addAll(pendingQueue); + + int count = inflightQueue.size() + pendingQueue.size(); + inflightQueue.clear(); + pendingQueue.clear(); + + // Release all permits + semaphore.release(count); + + return all; + } finally { + lock.unlock(); + } + } + + /** + * Returns all records from all batches (flattened). + * + *

    Records from batches where the original records were not stored (e.g., byte[] ingestion) are + * skipped. + * + * @return List of all original records + */ + List

    getAllRecords() { + lock.lock(); + try { + List

    records = new ArrayList<>(); + for (InflightBatch

    batch : inflightQueue) { + if (batch.records != null) { + records.addAll(batch.records); + } + } + for (InflightBatch

    batch : pendingQueue) { + if (batch.records != null) { + records.addAll(batch.records); + } + } + return records; + } finally { + lock.unlock(); + } + } + + /** + * Returns all batches without removing them. + * + * @return List of all batches (inflight first, then pending) + */ + List> peekAll() { + lock.lock(); + try { + List> all = new ArrayList<>(pendingQueue.size() + inflightQueue.size()); + all.addAll(inflightQueue); + all.addAll(pendingQueue); + return all; + } finally { + lock.unlock(); + } + } + + /** Closes this landing zone, waking up any blocked threads. */ + void close() { + lock.lock(); + try { + closed = true; + notEmpty.signalAll(); + } finally { + lock.unlock(); + } + } + + /** Returns whether this landing zone is closed. */ + boolean isClosed() { + return closed; + } + + /** Returns the number of pending batches. */ + int pendingCount() { + lock.lock(); + try { + return pendingQueue.size(); + } finally { + lock.unlock(); + } + } + + /** Returns the number of inflight batches. */ + int inflightCount() { + lock.lock(); + try { + return inflightQueue.size(); + } finally { + lock.unlock(); + } + } + + /** Returns the total number of batches (pending + inflight). */ + int totalCount() { + lock.lock(); + try { + return pendingQueue.size() + inflightQueue.size(); + } finally { + lock.unlock(); + } + } + + /** Returns the number of available permits. */ + int availablePermits() { + return semaphore.availablePermits(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/ProtoZerobusStream.java b/sdk/src/main/java/com/databricks/zerobus/stream/ProtoZerobusStream.java new file mode 100644 index 0000000..e2993d2 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/ProtoZerobusStream.java @@ -0,0 +1,373 @@ +package com.databricks.zerobus.stream; + +import com.databricks.zerobus.RecordType; +import com.databricks.zerobus.StreamConfigurationOptions; +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusGrpc.ZerobusStub; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.ZerobusStreamBuilder; +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.batch.PrimaryBatch; +import com.databricks.zerobus.batch.proto.BytesBatch; +import com.databricks.zerobus.batch.proto.MessageBatch; +import com.databricks.zerobus.schema.ProtoTableProperties; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Zerobus stream for ingesting protobuf records into a table. + * + *

    This stream type is used for both compiled protobuf schemas and dynamic schemas. The type + * parameter {@code T} represents the protobuf message type being ingested. + * + *

    Example usage with compiled schema: + * + *

    {@code
    + * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .compiledProto(MyRecord.getDefaultInstance())
    + *     .build()
    + *     .join();
    + *
    + * MyRecord record = MyRecord.newBuilder()
    + *     .setField("value")
    + *     .build();
    + * stream.ingest(record);
    + * stream.close();
    + * }
    + * + *

    Example usage with dynamic schema: + * + *

    {@code
    + * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .dynamicProto(descriptor)
    + *     .build()
    + *     .join();
    + *
    + * DynamicMessage record = DynamicMessage.newBuilder(descriptor)
    + *     .setField(field, "value")
    + *     .build();
    + * stream.ingest(record);
    + * stream.close();
    + * }
    + * + * @param The protobuf message type (primary type) + * @see JsonZerobusStream + * @see DualTypeStream + * @see ZerobusSdk#streamBuilder(String) + */ +public class ProtoZerobusStream extends DualTypeStream { + + // ==================== Constructor ==================== + + /** + * Creates a new ProtoZerobusStream. + * + *

    Use {@link ZerobusSdk#streamBuilder(String)} instead of calling this constructor directly. + */ + public ProtoZerobusStream( + Supplier stubSupplier, + ProtoTableProperties tableProperties, + String clientId, + String clientSecret, + HeadersProvider headersProvider, + TlsConfig tlsConfig, + StreamConfigurationOptions options, + ExecutorService executor) { + super( + stubSupplier, + tableProperties, + clientId, + clientSecret, + headersProvider, + tlsConfig, + options, + executor, + tableProperties.getDescriptorProto()); + } + + @Override + @Nonnull + protected RecordType getRecordType() { + return RecordType.PROTO; + } + + // ==================== Public API ==================== + + /** + * Ingests a protobuf record into the stream (primary type). + * + * @param record The protobuf record to ingest + * @return The logical offset ID assigned to this record + * @throws ZerobusException if ingestion fails + */ + @Override + public long ingest(@Nonnull T record) throws ZerobusException { + if (record == null) { + throw new ZerobusException("Record cannot be null"); + } + synchronized (this) { + try { + return doIngestRecord(record); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing record", e); + } + } + } + + /** + * Ingests a pre-serialized protobuf record into the stream (secondary type). + * + *

    Use this method when you have pre-serialized protobuf bytes (e.g., from Kafka, another + * system, or manual serialization). The bytes are sent directly without additional processing. + * + * @param bytes The serialized protobuf bytes to ingest + * @return The logical offset ID assigned to this record + * @throws ZerobusException if ingestion fails + */ + // Note: This is the secondary type ingest method (S = byte[]) + public long ingest(@Nonnull byte[] bytes) throws ZerobusException { + if (bytes == null) { + throw new ZerobusException("Bytes cannot be null"); + } + synchronized (this) { + try { + return doIngestBytes(bytes); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing record", e); + } + } + } + + /** + * {@inheritDoc} + * + *

    For protobuf streams, use {@link MessageBatch#of(Iterable)} to create the batch. + */ + @Override + @Nullable public Long ingestBatch(@Nonnull PrimaryBatch batch) throws ZerobusException { + if (batch == null) { + throw new ZerobusException("Batch cannot be null"); + } + if (!batch.iterator().hasNext()) { + return null; + } + synchronized (this) { + try { + return doIngestBatch(batch); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing batch", e); + } + } + } + + /** + * Ingests a batch of protobuf records into the stream (primary type). + * + *

    This is a convenience method that accepts {@link MessageBatch} directly. + * + *

    Example: + * + *

    {@code
    +   * List records = new ArrayList<>();
    +   * records.add(MyRecord.newBuilder().setField("value1").build());
    +   * records.add(MyRecord.newBuilder().setField("value2").build());
    +   * Long offset = stream.ingestBatch(MessageBatch.of(records));
    +   * }
    + * + * @param messageBatch The batch of protobuf records to ingest + * @return The offset ID for the batch, or null if empty + * @throws ZerobusException if ingestion fails + */ + @Nullable public Long ingestBatch(@Nonnull MessageBatch messageBatch) throws ZerobusException { + if (messageBatch == null) { + throw new ZerobusException("Message batch cannot be null"); + } + if (!messageBatch.iterator().hasNext()) { + return null; + } + synchronized (this) { + try { + return doIngestBatch(messageBatch); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing batch", e); + } + } + } + + /** + * Ingests a batch of pre-serialized protobuf records into the stream. + * + *

    The batch is assigned a single offset ID and acknowledged atomically. + * + *

    Example: + * + *

    {@code
    +   * List serializedRecords = getSerializedRecordsFromKafka();
    +   * Long offset = stream.ingestBatch(BytesBatch.of(serializedRecords));
    +   * }
    + * + * @param bytesBatch The batch of serialized protobuf byte arrays to ingest + * @return The offset ID for the batch, or null if empty + * @throws ZerobusException if ingestion fails + */ + @Nullable public Long ingestBatch(@Nonnull BytesBatch bytesBatch) throws ZerobusException { + if (bytesBatch == null) { + throw new ZerobusException("Bytes batch cannot be null"); + } + if (!bytesBatch.iterator().hasNext()) { + return null; + } + synchronized (this) { + try { + return doIngestBytesBatch(bytesBatch); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ZerobusException("Interrupted while enqueuing batch", e); + } + } + } + + // getUnackedRecords() is now implemented in BaseZerobusStream using LandingZone + + /** + * Returns the table properties for this stream. + * + * @return The proto table properties + */ + @Nonnull + public ProtoTableProperties getTableProperties() { + return (ProtoTableProperties) tableProperties; + } + + /** + * {@inheritDoc} + * + *

    Recreates this proto stream with the same configuration. Any unacknowledged records from + * this stream will be re-ingested into the new stream. + */ + @Override + @Nonnull + @SuppressWarnings("unchecked") + public CompletableFuture> recreate(ZerobusSdk sdk) { + Iterator unackedRecords = getUnackedRecords(); + ProtoTableProperties tableProperties = getTableProperties(); + + ZerobusStreamBuilder.AuthenticatedZerobusStreamBuilder builder = + sdk.streamBuilder(tableProperties.getTableName()) + .clientCredentials(clientId, clientSecret) + .options(options) + .headersProvider(headersProvider) + .tlsConfig(tlsConfig); + + return builder + .compiledProto((T) tableProperties.getDefaultInstance()) + .build() + .thenApply( + newStream -> { + // Re-ingest unacked records + while (unackedRecords.hasNext()) { + try { + newStream.ingest(unackedRecords.next()); + } catch (ZerobusException e) { + throw new RuntimeException( + "Failed to re-ingest record during stream recreation", e); + } + } + return newStream; + }); + } + + // ==================== Internal Implementation ==================== + + private long doIngestRecord(T record) throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + ByteString encoded = ByteString.copyFrom(record.toByteArray()); + + // Create batch with single record + EncodedBatch encodedBatch = EncodedBatch.protoSingle(encoded); + InflightBatch batch = + new InflightBatch<>( + java.util.Collections.singletonList(record), + encodedBatch, + offsetId, + new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + private Long doIngestBatch(Iterable records) throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + List encoded = new ArrayList<>(); + List recordList = new ArrayList<>(); + + for (T record : records) { + encoded.add(ByteString.copyFrom(record.toByteArray())); + recordList.add(record); + } + + // Create batch with multiple records + EncodedBatch encodedBatch = EncodedBatch.protoBatch(encoded); + InflightBatch batch = + new InflightBatch<>(recordList, encodedBatch, offsetId, new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + private long doIngestBytes(byte[] bytes) throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + ByteString encoded = ByteString.copyFrom(bytes); + + // Create batch with single encoded record (no original record stored) + EncodedBatch encodedBatch = EncodedBatch.protoSingle(encoded); + InflightBatch batch = + new InflightBatch<>(null, encodedBatch, offsetId, new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + private Long doIngestBytesBatch(Iterable bytesList) + throws ZerobusException, InterruptedException { + checkIngestState(); + + long offsetId = getNextOffsetId(); + List encoded = new ArrayList<>(); + + for (byte[] bytes : bytesList) { + encoded.add(ByteString.copyFrom(bytes)); + } + + // Create batch with multiple encoded records (no original records stored) + EncodedBatch encodedBatch = EncodedBatch.protoBatch(encoded); + InflightBatch batch = + new InflightBatch<>(null, encodedBatch, offsetId, new CompletableFuture<>()); + + addBatch(batch); + return offsetId; + } + + // enqueueRecordsForResending() is no longer needed - recovery is handled by LandingZone +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/StreamFailure.java b/sdk/src/main/java/com/databricks/zerobus/stream/StreamFailure.java new file mode 100644 index 0000000..82c8bed --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/StreamFailure.java @@ -0,0 +1,35 @@ +package com.databricks.zerobus.stream; + +/** Types of stream failures that can occur during ingestion. */ +enum StreamFailureType { + UNKNOWN, + SERVER_CLOSED_STREAM, + SENDING_MESSAGE, + SERVER_UNRESPONSIVE +} + +/** Tracks stream failure counts and types for recovery decisions. */ +class StreamFailureInfo { + private StreamFailureType failureType = StreamFailureType.UNKNOWN; + private int failureCounts = 0; + + synchronized void logFailure(StreamFailureType type) { + if (type == failureType) { + failureCounts++; + } else { + failureType = type; + failureCounts = 1; + } + } + + synchronized void resetFailure(StreamFailureType type) { + if (failureType == type) { + failureCounts = 0; + failureType = StreamFailureType.UNKNOWN; + } + } + + synchronized int getFailureCounts() { + return failureCounts; + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/stream/ZerobusStream.java b/sdk/src/main/java/com/databricks/zerobus/stream/ZerobusStream.java new file mode 100644 index 0000000..c56a118 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/stream/ZerobusStream.java @@ -0,0 +1,209 @@ +package com.databricks.zerobus.stream; + +import com.databricks.zerobus.StreamConfigurationOptions; +import com.databricks.zerobus.TableProperties; +import com.databricks.zerobus.ZerobusException; +import com.databricks.zerobus.ZerobusGrpc.ZerobusStub; +import com.databricks.zerobus.ZerobusSdk; +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.Message; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; + +/** + * Zerobus stream for ingesting protobuf records into a table. + * + *

    This is the original stream class maintained for backwards compatibility. It extends {@link + * ProtoZerobusStream} and provides additional backwards-compatible methods. + * + *

    For new code, prefer using the fluent API: + * + *

    {@code
    + * ProtoZerobusStream stream = sdk.streamBuilder("catalog.schema.table")
    + *     .clientCredentials(clientId, clientSecret)
    + *     .protoSchema(MyRecord.getDefaultInstance())
    + *     .build()
    + *     .join();
    + * }
    + * + * @param The type of the protobuf message being ingested + * @see ProtoZerobusStream + * @see ZerobusSdk#streamBuilder(String) + * @deprecated Since 0.2.0. Use {@link ProtoZerobusStream} instead via {@link + * ZerobusSdk#streamBuilder(String)}. This class will be removed in a future release. + */ +@Deprecated +public class ZerobusStream extends ProtoZerobusStream { + + private final TableProperties legacyTableProperties; + + // ==================== Constructor ==================== + + /** + * Creates a new ZerobusStream. + * + *

    Use {@link ZerobusSdk#streamBuilder(String)} instead of calling this constructor directly. + * + * @param stubSupplier Supplier for gRPC stubs + * @param tableProperties Table configuration + * @param clientId OAuth client ID + * @param clientSecret OAuth client secret + * @param headersProvider Custom headers provider (may be null) + * @param tlsConfig TLS configuration + * @param options Stream configuration options + * @param executor Executor for async operations + * @deprecated Since 0.2.0. Use {@link ZerobusSdk#streamBuilder(String)} instead. This constructor + * will be removed in a future release. + */ + @Deprecated + public ZerobusStream( + Supplier stubSupplier, + TableProperties tableProperties, + String clientId, + String clientSecret, + HeadersProvider headersProvider, + TlsConfig tlsConfig, + StreamConfigurationOptions options, + ExecutorService executor) { + super( + stubSupplier, + tableProperties, + clientId, + clientSecret, + headersProvider, + tlsConfig, + options, + executor); + this.legacyTableProperties = tableProperties; + } + + // ==================== Backwards-Compatible API ==================== + + /** + * Returns the table properties for this stream. + * + *

    Overrides the parent method to return the original {@link TableProperties} type with generic + * parameter, preserving backwards compatibility. + * + * @return The table properties + * @deprecated Since 0.2.0. Use {@link ZerobusSdk#streamBuilder(String)} to create streams + * instead. This method will be removed in a future release. + */ + @Override + @Deprecated + public TableProperties getTableProperties() { + return legacyTableProperties; + } + + /** + * Returns the legacy table properties for this stream. + * + *

    Alias for {@link #getTableProperties()}, used internally for stream recreation. + * + * @return The legacy table properties + * @deprecated Since 0.2.0. Use {@link ZerobusSdk#streamBuilder(String)} to create streams + * instead. This method will be removed in a future release. + */ + @Deprecated + public TableProperties getLegacyTableProperties() { + return legacyTableProperties; + } + + /** + * @deprecated Used internally for stream recreation. + */ + @Deprecated + public String getClientId() { + return clientId; + } + + /** + * @deprecated Used internally for stream recreation. + */ + @Deprecated + public String getClientSecret() { + return clientSecret; + } + + /** + * @deprecated Used internally for stream recreation. + */ + @Deprecated + public StreamConfigurationOptions getOptions() { + return options; + } + + /** + * @deprecated Used internally for stream recreation. + */ + @Deprecated + public HeadersProvider getHeadersProvider() { + return headersProvider; + } + + /** + * @deprecated Used internally for stream recreation. + */ + @Deprecated + public TlsConfig getTlsConfig() { + return tlsConfig; + } + + /** + * Ingests a record into the stream asynchronously. + * + *

    The returned future completes when the record has been durably acknowledged by the server. + * + *

    This method is provided for backwards compatibility. For new code, consider using {@link + * #ingest(Message)} which returns the offset ID directly. + * + * @param record The protobuf record to ingest + * @return A future that completes when the record is acknowledged + * @throws ZerobusException if the stream is not in a valid state for ingestion + * @deprecated Since 0.2.0. Use {@link #ingest(Message)} instead, which returns the offset ID + * directly. This method will be removed in a future release. + */ + @Deprecated + public CompletableFuture ingestRecord(RecordType record) throws ZerobusException { + long offsetId = ingest(record); + return getAckFutureForOffset(offsetId); + } + + // ==================== Internal ==================== + + /** + * Gets a future that completes when the given offset is acknowledged. + * + * @param offsetId The offset to wait for + * @return A future that completes when the offset is acknowledged + */ + private CompletableFuture getAckFutureForOffset(long offsetId) { + CompletableFuture result = new CompletableFuture<>(); + // Find the batch with this offset and attach to its promise + synchronized (this) { + for (InflightBatch batch : landingZone.peekAll()) { + if (batch.offsetId == offsetId) { + batch.ackPromise.whenComplete( + (ack, err) -> { + if (err != null) { + result.completeExceptionally(err); + } else { + result.complete(null); + } + }); + return result; + } + } + } + // If not found in landing zone, it may already be acknowledged + if (offsetId <= latestAckedOffsetId) { + return CompletableFuture.completedFuture(null); + } + // Otherwise wait for it + result.completeExceptionally( + new ZerobusException("Offset " + offsetId + " not found in landing zone")); + return result; + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/tls/SecureTlsConfig.java b/sdk/src/main/java/com/databricks/zerobus/tls/SecureTlsConfig.java new file mode 100644 index 0000000..1ef42c6 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/tls/SecureTlsConfig.java @@ -0,0 +1,50 @@ +package com.databricks.zerobus.tls; + +import io.grpc.ChannelCredentials; +import io.grpc.TlsChannelCredentials; + +/** + * Secure TLS configuration using system CA certificates. + * + *

    This is the default configuration, enabling TLS encryption using the operating system's + * trusted CA certificates. It provides secure communication without requiring additional + * configuration. + * + *

    Example usage: + * + *

    {@code
    + * // Explicit usage (functionally identical to default)
    + * TlsConfig tls = new SecureTlsConfig();
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options,
    + *     null,  // headersProvider
    + *     tls    // tlsConfig
    + * ).join();
    + *
    + * // Default usage (SecureTlsConfig is used automatically)
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options
    + * ).join();
    + * }
    + * + * @see TlsConfig + * @see com.databricks.zerobus.ZerobusSdk#createStream + */ +public class SecureTlsConfig extends TlsConfig { + + /** + * Returns secure TLS credentials using system CA certificates. + * + * @return SSL channel credentials with system CAs + */ + @Override + public ChannelCredentials toChannelCredentials() { + return TlsChannelCredentials.create(); + } +} diff --git a/sdk/src/main/java/com/databricks/zerobus/tls/TlsConfig.java b/sdk/src/main/java/com/databricks/zerobus/tls/TlsConfig.java new file mode 100644 index 0000000..56bb716 --- /dev/null +++ b/sdk/src/main/java/com/databricks/zerobus/tls/TlsConfig.java @@ -0,0 +1,66 @@ +package com.databricks.zerobus.tls; + +import io.grpc.ChannelCredentials; + +/** + * Abstract base class for TLS configuration strategies. + * + *

    Implementations define how to configure the gRPC channel's TLS settings. By default, the SDK + * uses secure TLS with system CA certificates. Custom implementations can provide alternative TLS + * configurations such as custom certificate authorities or mutual TLS. + * + *

    Example usage with custom TLS configuration: + * + *

    {@code
    + * public class CustomTlsConfig extends TlsConfig {
    + *     private final File caCertFile;
    + *
    + *     public CustomTlsConfig(File caCertFile) {
    + *         this.caCertFile = caCertFile;
    + *     }
    + *
    + *     @Override
    + *     public ChannelCredentials toChannelCredentials() {
    + *         try {
    + *             return TlsChannelCredentials.newBuilder()
    + *                 .trustManager(caCertFile)
    + *                 .build();
    + *         } catch (IOException e) {
    + *             throw new RuntimeException("Failed to load CA certificate", e);
    + *         }
    + *     }
    + * }
    + *
    + * TlsConfig customTls = new CustomTlsConfig(new File("/path/to/ca-cert.pem"));
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options,
    + *     customTls
    + * ).join();
    + * }
    + * + *

    For most use cases, the default TLS configuration is sufficient and no custom implementation + * is needed. Simply call {@code createStream} without TLS parameters: + * + *

    {@code
    + * ZerobusStream stream = sdk.createStream(
    + *     tableProperties,
    + *     clientId,
    + *     clientSecret,
    + *     options
    + * ).join();
    + * }
    + * + * @see com.databricks.zerobus.ZerobusSdk#createStream + */ +public abstract class TlsConfig { + + /** + * Converts TLS configuration to gRPC ChannelCredentials. + * + * @return Channel credentials for secure connection + */ + public abstract ChannelCredentials toChannelCredentials(); +} diff --git a/src/main/proto/zerobus_service.proto b/sdk/src/main/proto/zerobus_service.proto similarity index 75% rename from src/main/proto/zerobus_service.proto rename to sdk/src/main/proto/zerobus_service.proto index 729dcd8..a89c1b9 100644 --- a/src/main/proto/zerobus_service.proto +++ b/sdk/src/main/proto/zerobus_service.proto @@ -41,7 +41,7 @@ service Zerobus { /* * Record type that will be accepted in the stream. - * + * * Defaults to RECORD_TYPE_UNSPECIFIED, which returns an error on stream creation. */ enum RecordType { @@ -50,6 +50,29 @@ enum RecordType { JSON = 2; } +/* + * Batch of JSON-encoded records. + * + * This message contains multiple JSON records that will be ingested together. + * Each string in the array represents a complete JSON object. + */ +message JsonRecordBatch { + // Array of JSON-encoded records. + repeated string records = 1; +} + +/* + * Batch of protobuf-encoded records. + * + * This message contains multiple protobuf-encoded records that will be ingested together. + * Each record must be serialized according to the protobuf descriptor provided in the + * CreateIngestStreamRequest. + */ +message ProtoEncodedRecordBatch { + // Array of protobuf-encoded records. + repeated bytes records = 1; +} + /* * Request to create a new ephemeral ingestion stream. * @@ -98,14 +121,14 @@ message CreateIngestStreamResponse { /* * Request to ingest a single record into the stream. - * + * * This message is sent by the client after the initial CreateIngestStreamRequest * to stream individual records for ingestion. */ message IngestRecordRequest { // Unique identifier for this record within the stream. optional int64 offset_id = 1; - + // Serialized record data. oneof record { // The proto encoded record must be serialized according to the protobuf descriptor @@ -115,23 +138,50 @@ message IngestRecordRequest { } } +/* + * Request to ingest a batch of records into the stream. + * + * This message is sent by the client after the initial CreateIngestStreamRequest + * to stream batches of records for ingestion. + */ +message IngestRecordBatchRequest { + // Unique identifier for this batch within the stream. + optional int64 offset_id = 1; + + // Batch of serialized records. + // The batch can contain multiple records encoded as either protobuf or JSON. + oneof batch { + // Batch of protobuf-encoded records. Each record must be serialized according to + // the protobuf descriptor provided in the CreateIngestStreamRequest. + ProtoEncodedRecordBatch proto_encoded_batch = 2; + + // Batch of JSON-encoded records. + JsonRecordBatch json_batch = 3; + } +} + /* * A message in the EphemeralStream bidirectional stream. - * + * * This message type allows the client to send either stream creation requests - * or record ingestion requests through the same stream. + * or record ingestion requests (individual or batched) through the same stream. */ message EphemeralStreamRequest { oneof payload { // Initial request to create an ephemeral stream. // Must be the first message in the stream. - // All subsequent messages should be ingest_record. + // All subsequent messages should be ingest_record or ingest_record_batch. CreateIngestStreamRequest create_stream = 1; - + // Request to ingest a record. // Can only be sent after a successful create_stream request. // Multiple ingest_record messages can be sent in sequence. IngestRecordRequest ingest_record = 2; + + // Request to ingest a batch of records. + // Can only be sent after a successful create_stream request. + // Multiple ingest_record_batch messages can be sent in sequence. + IngestRecordBatchRequest ingest_record_batch = 3; } } diff --git a/sdk/src/test/java/com/databricks/zerobus/AcknowledgmentTest.java b/sdk/src/test/java/com/databricks/zerobus/AcknowledgmentTest.java new file mode 100644 index 0000000..60632da --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/AcknowledgmentTest.java @@ -0,0 +1,159 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.test.table.TestTableRow.CityPopulationTableRow; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.LongConsumer; +import org.junit.jupiter.api.Test; + +/** Tests for acknowledgment callbacks using the fluent API. */ +public class AcknowledgmentTest extends BaseZerobusTest { + + @Test + public void testAckCallback() throws Exception { + List ackedOffsets = Collections.synchronizedList(new ArrayList<>()); + LongConsumer ackCallback = ackedOffsets::add; + + int numRecords = 10; + for (int i = 0; i < numRecords; i++) { + mockedGrpcServer.injectAckRecord(i); + } + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .offsetCallback(ackCallback) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + for (int i = 0; i < numRecords; i++) { + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("test-city-" + i) + .setPopulation(i) + .build()); + } + + stream.flush(); + + // Wait for callbacks to complete + long deadline = System.currentTimeMillis() + 2000; + boolean foundFinalOffset = false; + while (System.currentTimeMillis() < deadline) { + synchronized (ackedOffsets) { + if (!ackedOffsets.isEmpty() && ackedOffsets.contains((long) (numRecords - 1))) { + foundFinalOffset = true; + break; + } + } + Thread.sleep(10); + } + + assertTrue(foundFinalOffset, "Expected to receive ack for final offset " + (numRecords - 1)); + assertTrue(ackedOffsets.size() > 0, "Expected callback to be called at least once"); + + Iterator unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + } + + @Test + public void testCallbackExceptionHandling() throws Exception { + List callbackInvocations = Collections.synchronizedList(new ArrayList<>()); + List thrownExceptions = Collections.synchronizedList(new ArrayList<>()); + + LongConsumer ackCallback = + offsetId -> { + callbackInvocations.add(offsetId); + if (offsetId == 1) { + RuntimeException exception = + new RuntimeException("Test exception in callback for offset " + offsetId); + thrownExceptions.add(exception.getMessage()); + throw exception; + } + }; + + int numRecords = 3; + for (int i = 0; i < numRecords; i++) { + mockedGrpcServer.injectAckRecord(i); + } + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .offsetCallback(ackCallback) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + for (int i = 0; i < numRecords; i++) { + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("error-callback-device-" + i) + .setPopulation(30 + i) + .build()); + } + + stream.flush(); + + // Wait for callbacks to complete + long deadline = System.currentTimeMillis() + 1000; + while (callbackInvocations.size() < numRecords && System.currentTimeMillis() < deadline) { + Thread.sleep(10); + } + + assertEquals(numRecords, callbackInvocations.size()); + assertTrue(callbackInvocations.contains(0L)); + assertTrue(callbackInvocations.contains(1L)); + assertTrue(callbackInvocations.contains(2L)); + + assertEquals(1, thrownExceptions.size()); + assertTrue(thrownExceptions.get(0).contains("Test exception in callback for offset 1")); + + // Stream should remain functional + Iterator unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + assertEquals(StreamState.OPENED, stream.getState()); + + stream.close(); + } + + @Test + public void testIngestReturnsOffsetAndWaitForOffset() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + // ingest returns offset ID + long offset = + stream.ingest( + CityPopulationTableRow.newBuilder().setCityName("city").setPopulation(100).build()); + + // waitForOffset blocks until acknowledged + stream.waitForOffset(offset); + + // After waitForOffset completes, record should be acknowledged + Iterator unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/BaseZerobusTest.java b/sdk/src/test/java/com/databricks/zerobus/BaseZerobusTest.java new file mode 100644 index 0000000..518fee9 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/BaseZerobusTest.java @@ -0,0 +1,96 @@ +package com.databricks.zerobus; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; + +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.auth.TokenFactory; +import com.databricks.zerobus.tls.TlsConfig; +import io.grpc.stub.StreamObserver; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; + +/** + * Base test class for ZerobusSdk tests. + * + *

    Provides common setup and teardown for mocked gRPC server and SDK infrastructure. + */ +@ExtendWith(MockitoExtension.class) +public abstract class BaseZerobusTest { + + protected MockedGrpcServer mockedGrpcServer; + protected ZerobusGrpc.ZerobusStub zerobusStub; + protected ZerobusSdk zerobusSdk; + protected ZerobusSdkStubFactory zerobusSdkStubFactory; + protected org.mockito.MockedStatic tokenFactoryMock; + protected io.grpc.stub.ClientCallStreamObserver spiedStream; + + @BeforeEach + public void setUp() { + // Create mocked gRPC server + mockedGrpcServer = new MockedGrpcServer(); + + // Create mocked stub + zerobusStub = mock(ZerobusGrpc.ZerobusStub.class); + + // Create spy on stub factory + zerobusSdkStubFactory = spy(new ZerobusSdkStubFactory()); + + // Mock TokenFactory to return a fake token + tokenFactoryMock = mockStatic(TokenFactory.class); + tokenFactoryMock + .when( + () -> + TokenFactory.getZerobusToken( + anyString(), anyString(), anyString(), anyString(), anyString())) + .thenReturn("fake-token-for-testing"); + + // Create ZerobusSdk with mocked stub factory + zerobusSdk = + ZerobusSdk.builder("localhost:50051", "https://test.cloud.databricks.com") + .stubFactory(zerobusSdkStubFactory) + .build(); + + // Configure stub factory to return our mocked stub with headers provider + lenient() + .doReturn(zerobusStub) + .when(zerobusSdkStubFactory) + .createStub( + anyString(), any(HeadersProvider.class), any(TlsConfig.class), anyInt(), anyString()); + + // Setup mocked stub's ephemeralStream behavior + lenient() + .doAnswer( + invocation -> { + @SuppressWarnings("unchecked") + StreamObserver ackSender = + (StreamObserver) invocation.getArgument(0); + + mockedGrpcServer.initialize(ackSender); + + // Spy on the message receiver to verify cancel() is called + spiedStream = spy(mockedGrpcServer.getMessageReceiver()); + return spiedStream; + }) + .when(zerobusStub) + .ephemeralStream(any()); + } + + @AfterEach + public void tearDown() { + if (tokenFactoryMock != null) { + tokenFactoryMock.close(); + } + if (mockedGrpcServer != null) { + mockedGrpcServer.destroy(); + } + mockedGrpcServer = null; + zerobusStub = null; + zerobusSdk = null; + zerobusSdkStubFactory = null; + tokenFactoryMock = null; + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/ConfigurationTest.java b/sdk/src/test/java/com/databricks/zerobus/ConfigurationTest.java new file mode 100644 index 0000000..4359efe --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/ConfigurationTest.java @@ -0,0 +1,70 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.test.table.TestTableRow.CityPopulationTableRow; +import com.databricks.zerobus.stream.ZerobusStream; +import org.junit.jupiter.api.Test; + +/** Tests for stream configuration and options. */ +public class ConfigurationTest extends BaseZerobusTest { + + @Test + public void testStreamConfigPreserved() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + TableProperties tableProperties = + new TableProperties<>("test.schema.table", CityPopulationTableRow.getDefaultInstance()); + StreamConfigurationOptions options = + StreamConfigurationOptions.builder().setRecovery(true).setMaxInflightRequests(500).build(); + + ZerobusStream stream = + zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); + + assertEquals("test.schema.table", stream.getLegacyTableProperties().getTableName()); + assertEquals("client-id", stream.getClientId()); + assertEquals("client-secret", stream.getClientSecret()); + assertTrue(stream.getOptions().recovery()); + assertEquals(500, stream.getOptions().maxInflightRequests()); + assertNotNull(stream.getHeadersProvider()); + + stream.close(); + } + + @Test + public void testDefaultMaxInflightRecords() { + StreamConfigurationOptions options = StreamConfigurationOptions.builder().build(); + + // Default should be a reasonable value (e.g., 10000) + assertTrue(options.maxInflightRequests() > 0); + } + + @Test + public void testCustomMaxInflightRecords() { + StreamConfigurationOptions options = + StreamConfigurationOptions.builder().setMaxInflightRequests(5000).build(); + + assertEquals(5000, options.maxInflightRequests()); + } + + @Test + public void testRecoveryOption() { + StreamConfigurationOptions withRecovery = + StreamConfigurationOptions.builder().setRecovery(true).build(); + + StreamConfigurationOptions withoutRecovery = + StreamConfigurationOptions.builder().setRecovery(false).build(); + + assertTrue(withRecovery.recovery()); + assertFalse(withoutRecovery.recovery()); + } + + @Test + public void testTablePropertiesGetters() { + TableProperties tableProperties = + new TableProperties<>("catalog.schema.table", CityPopulationTableRow.getDefaultInstance()); + + assertEquals("catalog.schema.table", tableProperties.getTableName()); + assertNotNull(tableProperties.getDefaultInstance()); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/ErrorHandlingTest.java b/sdk/src/test/java/com/databricks/zerobus/ErrorHandlingTest.java new file mode 100644 index 0000000..ea18ccc --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/ErrorHandlingTest.java @@ -0,0 +1,66 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.test.table.TestTableRow.CityPopulationTableRow; +import com.databricks.zerobus.batch.proto.MessageBatch; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import org.junit.jupiter.api.Test; + +/** Tests for error handling: null inputs and state violations using fluent API. */ +public class ErrorHandlingTest extends BaseZerobusTest { + + @Test + public void testIngestNullRecordThrows() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingest((CityPopulationTableRow) null)); + stream.close(); + } + + @Test + public void testIngestNullBatchThrows() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertThrows( + ZerobusException.class, + () -> stream.ingestBatch((MessageBatch) null)); + stream.close(); + } + + @Test + public void testIngestAfterCloseThrows() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + + CityPopulationTableRow record = + CityPopulationTableRow.newBuilder().setCityName("test-city").setPopulation(1000).build(); + + assertThrows(ZerobusException.class, () -> stream.ingest(record)); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/JsonIngestionTest.java b/sdk/src/test/java/com/databricks/zerobus/JsonIngestionTest.java new file mode 100644 index 0000000..6092f29 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/JsonIngestionTest.java @@ -0,0 +1,275 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.zerobus.batch.json.MapBatch; +import com.databricks.zerobus.batch.json.StringBatch; +import com.databricks.zerobus.stream.JsonZerobusStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +/** Tests for JSON record ingestion using the fluent API. */ +public class JsonIngestionTest extends BaseZerobusTest { + + @Test + public void testSingleJsonRecordIngestion() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + + long offset = stream.ingest("{\"city_name\": \"test-city\", \"population\": 1000}"); + stream.waitForOffset(offset); + + Iterator> unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } + + @Test + public void testMultipleJsonRecordsIngestion() throws Exception { + int numRecords = 10; + for (int i = 0; i < numRecords; i++) { + mockedGrpcServer.injectAckRecord(i); + } + + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + for (int i = 0; i < numRecords; i++) { + stream.ingest("{\"city_name\": \"city-" + i + "\", \"population\": " + (1000 + i) + "}"); + } + + stream.flush(); + + Iterator> unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + } + + @Test + public void testJsonBatchIngestion() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + List batch = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + batch.add("{\"city_name\": \"city-" + i + "\", \"population\": " + (1000 + i) + "}"); + } + + Long offset = stream.ingestBatch(StringBatch.of(batch)); + assertNotNull(offset); + stream.waitForOffset(offset); + + stream.close(); + } + + @Test + public void testJsonEmptyBatchReturnsNull() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + List emptyBatch = new ArrayList<>(); + Long offset = stream.ingestBatch(StringBatch.of(emptyBatch)); + assertNull(offset); + + stream.close(); + } + + @Test + public void testJsonNullRecordThrows() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingest((String) null)); + stream.close(); + } + + @Test + public void testJsonNullBatchThrows() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingestBatch((StringBatch) null)); + stream.close(); + } + + // ==================== Map Overload Tests ==================== + + @Test + public void testSingleMapIngestion() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + Map record = new HashMap<>(); + record.put("city_name", "test-city"); + record.put("population", 1000); + + long offset = stream.ingest(record); + stream.waitForOffset(offset); + + Iterator> unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + } + + @Test + public void testMapBatchIngestion() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + List> batch = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + Map record = new HashMap<>(); + record.put("city_name", "city-" + i); + record.put("population", 1000 + i); + batch.add(record); + } + + Long offset = stream.ingestBatch(MapBatch.of(batch)); + assertNotNull(offset); + stream.waitForOffset(offset); + + stream.close(); + } + + @Test + public void testMapEmptyBatchReturnsNull() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + List> emptyBatch = new ArrayList<>(); + Long offset = stream.ingestBatch(MapBatch.of(emptyBatch)); + assertNull(offset); + + stream.close(); + } + + @Test + public void testMapNullRecordThrows() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingest((Map) null)); + stream.close(); + } + + @Test + public void testMapNullBatchThrows() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingestBatch((MapBatch) null)); + stream.close(); + } + + @Test + public void testMapWithNestedValues() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + Map nested = new HashMap<>(); + nested.put("lat", 40.7128); + nested.put("lng", -74.0060); + + Map record = new HashMap<>(); + record.put("city_name", "New York"); + record.put("location", nested); + record.put("tags", java.util.Arrays.asList("urban", "coastal")); + + long offset = stream.ingest(record); + stream.waitForOffset(offset); + + stream.close(); + } +} diff --git a/src/test/java/com/databricks/zerobus/MockedGrpcServer.java b/sdk/src/test/java/com/databricks/zerobus/MockedGrpcServer.java similarity index 87% rename from src/test/java/com/databricks/zerobus/MockedGrpcServer.java rename to sdk/src/test/java/com/databricks/zerobus/MockedGrpcServer.java index 5620b98..1fe8872 100644 --- a/src/test/java/com/databricks/zerobus/MockedGrpcServer.java +++ b/sdk/src/test/java/com/databricks/zerobus/MockedGrpcServer.java @@ -58,16 +58,16 @@ private static class CreateStreamResponse { } private final ExecutorService executorService; - private final List capturedMessages; - private final List injectedAckRecords; - private final List injectedCreateStreamResponses; - private final BlockingQueue messagesToProcess; + final List capturedMessages; + final List injectedAckRecords; + final List injectedCreateStreamResponses; + final BlockingQueue messagesToProcess; private StreamObserver ackSender; private long lastReceivedOffsetId = -1; private volatile boolean serverRunning = false; - private volatile boolean streamReady = true; - private Runnable streamReadyHandler; + volatile boolean streamReady = true; + Runnable streamReadyHandler; private final ClientCallStreamObserver messageReceiver = new ClientCallStreamObserver() { @@ -294,6 +294,8 @@ private void processMessage(EphemeralStreamRequest request) throws InterruptedEx handleCreateStream(); } else if (request.hasIngestRecord()) { handleIngestRecord(request.getIngestRecord().getOffsetId()); + } else if (request.hasIngestRecordBatch()) { + handleIngestRecordBatch(request.getIngestRecordBatch().getOffsetId()); } } @@ -379,6 +381,50 @@ private void handleIngestRecord(long offset) throws InterruptedException { } } + private void handleIngestRecordBatch(long offset) throws InterruptedException { + // For batches, we use the batch's offset ID directly (not sequential like single records) + lastReceivedOffsetId = offset; + + synchronized (injectedAckRecords) { + if (injectedAckRecords.isEmpty()) { + // Default behavior: auto-ack batch + sendAck(offset); + return; + } + + // Check if there's a specific ack record for this offset + AckRecord matchingRecord = null; + for (int i = 0; i < injectedAckRecords.size(); i++) { + if (injectedAckRecords.get(i).offsetId == offset) { + matchingRecord = injectedAckRecords.remove(i); + break; + } + } + + if (matchingRecord != null) { + if (matchingRecord.delayMs > 0) { + Thread.sleep(matchingRecord.delayMs); + } + + if (matchingRecord.writeFailure) { + throw new RuntimeException("IngestRecordBatch write failure"); + } + + if (matchingRecord.closeStreamSignal) { + sendCloseStreamSignal(); + } else if (matchingRecord.success) { + sendAck(offset); + } else { + Throwable error = + matchingRecord.error != null + ? matchingRecord.error + : new RuntimeException("Batch ingest failed"); + sendError(error); + } + } + } + } + private void sendCreateStreamSuccess() { if (ackSender != null) { EphemeralStreamResponse response = diff --git a/sdk/src/test/java/com/databricks/zerobus/ProtoIngestionTest.java b/sdk/src/test/java/com/databricks/zerobus/ProtoIngestionTest.java new file mode 100644 index 0000000..b971431 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/ProtoIngestionTest.java @@ -0,0 +1,265 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.test.table.TestTableRow.CityPopulationTableRow; +import com.databricks.zerobus.batch.proto.BytesBatch; +import com.databricks.zerobus.batch.proto.MessageBatch; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import org.junit.jupiter.api.Test; + +/** Tests for protobuf record ingestion (single and batch) using the fluent API. */ +public class ProtoIngestionTest extends BaseZerobusTest { + + @Test + public void testSingleRecordIngestAndAcknowledgment() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + + // ingest returns offset, waitForOffset blocks until ack + long offset = + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("test-city") + .setPopulation(1000) + .build()); + stream.waitForOffset(offset); + + Iterator unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } + + @Test + public void testMultipleRecordsIngestion() throws Exception { + int batchSize = 100; + + for (int i = 0; i < batchSize; i++) { + mockedGrpcServer.injectAckRecord(i); + } + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + for (int i = 0; i < batchSize; i++) { + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("city-" + i) + .setPopulation(1000 + i) + .build()); + } + + // Flush waits for all records to be acknowledged + stream.flush(); + + Iterator unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + } + + @Test + public void testBatchIngestRecords() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + List batch = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + batch.add( + CityPopulationTableRow.newBuilder() + .setCityName("city-" + i) + .setPopulation(1000 + i) + .build()); + } + + // ingestBatch returns offset, waitForOffset blocks until ack + Long offset = stream.ingestBatch(MessageBatch.of(batch)); + assertNotNull(offset); + stream.waitForOffset(offset); + + stream.close(); + } + + @Test + public void testEmptyBatchReturnsImmediately() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + List emptyBatch = new ArrayList<>(); + // Empty batch returns null immediately + Long offset = stream.ingestBatch(MessageBatch.of(emptyBatch)); + assertNull(offset); + + stream.close(); + } + + @Test + public void testMultipleRecordsAckedTogether() throws Exception { + // Server acks up to offset, not individual offsets + mockedGrpcServer.injectAckRecord(4); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + for (int i = 0; i < 5; i++) { + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("city-" + i) + .setPopulation(1000 + i) + .build()); + } + + stream.flush(); + + assertFalse(stream.getUnackedRecords().hasNext()); + stream.close(); + } + + // ==================== Byte Array Overload Tests ==================== + + @Test + public void testSingleBytesIngestAndAcknowledgment() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + byte[] recordBytes = + CityPopulationTableRow.newBuilder() + .setCityName("test-city") + .setPopulation(1000) + .build() + .toByteArray(); + + long offset = stream.ingest(recordBytes); + stream.waitForOffset(offset); + + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } + + @Test + public void testBytesBatchIngest() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + List batch = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + batch.add( + CityPopulationTableRow.newBuilder() + .setCityName("city-" + i) + .setPopulation(1000 + i) + .build() + .toByteArray()); + } + + Long offset = stream.ingestBatch(BytesBatch.of(batch)); + assertNotNull(offset); + stream.waitForOffset(offset); + + stream.close(); + } + + @Test + public void testBytesEmptyBatchReturnsNull() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + List emptyBatch = new ArrayList<>(); + Long offset = stream.ingestBatch(BytesBatch.of(emptyBatch)); + assertNull(offset); + + stream.close(); + } + + @Test + public void testBytesNullRecordThrows() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingest((byte[]) null)); + stream.close(); + } + + @Test + public void testBytesNullBatchThrows() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertThrows(ZerobusException.class, () -> stream.ingestBatch((BytesBatch) null)); + stream.close(); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/StreamCreationTest.java b/sdk/src/test/java/com/databricks/zerobus/StreamCreationTest.java new file mode 100644 index 0000000..a5dd79c --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/StreamCreationTest.java @@ -0,0 +1,221 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; + +import com.databricks.test.table.TestTableRow.CityPopulationTableRow; +import com.databricks.zerobus.auth.HeadersProvider; +import com.databricks.zerobus.auth.OAuthHeadersProvider; +import com.databricks.zerobus.stream.JsonZerobusStream; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import com.databricks.zerobus.tls.TlsConfig; +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; +import com.google.protobuf.DynamicMessage; +import org.junit.jupiter.api.Test; + +/** Tests for stream creation with various configurations using fluent API. */ +public class StreamCreationTest extends BaseZerobusTest { + + @Test + public void testBasicStreamCreation() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } + + @Test + public void testStreamIdAssigned() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + String streamId = stream.getStreamId(); + assertNotNull(streamId); + assertFalse(streamId.isEmpty()); + + stream.close(); + } + + @Test + public void testCreateStreamWithHeadersProvider() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + HeadersProvider headersProvider = + new OAuthHeadersProvider( + "test.schema.table", + "workspace-id", + "https://test.cloud.databricks.com", + "client-id", + "client-secret"); + + // Use unauthenticated() path with custom headers provider + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test.schema.table") + .unauthenticated() + .headersProvider(headersProvider) + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + + long offset = + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("test-city") + .setPopulation(1000) + .build()); + stream.waitForOffset(offset); + + stream.close(); + + verify(zerobusSdkStubFactory, times(1)) + .createStub( + anyString(), any(HeadersProvider.class), any(TlsConfig.class), anyInt(), anyString()); + } + + @Test + public void testCustomHeadersProvider() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + HeadersProvider customProvider = + new HeadersProvider() { + @Override + public java.util.Map getHeaders() { + java.util.Map headers = new java.util.HashMap<>(); + headers.put("authorization", "Bearer custom-token"); + headers.put("x-databricks-zerobus-table-name", "test.schema.table"); + headers.put("x-custom-header", "custom-value"); + return headers; + } + }; + + // Use unauthenticated() path with custom headers provider + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test.schema.table") + .unauthenticated() + .headersProvider(customProvider) + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + + java.util.Map headers = customProvider.getHeaders(); + assertEquals(3, headers.size()); + assertEquals("Bearer custom-token", headers.get("authorization")); + assertEquals("test.schema.table", headers.get("x-databricks-zerobus-table-name")); + assertEquals("custom-value", headers.get("x-custom-header")); + + stream.close(); + } + + @Test + public void testOAuthHeadersProviderGetHeaders() throws NonRetriableException { + OAuthHeadersProvider provider = + new OAuthHeadersProvider( + "catalog.schema.table", + "workspace-id", + "https://test.cloud.databricks.com", + "client-id", + "client-secret"); + + java.util.Map headers = provider.getHeaders(); + + assertTrue(headers.containsKey("authorization")); + assertTrue(headers.get("authorization").startsWith("Bearer ")); + assertEquals("fake-token-for-testing", headers.get("authorization").substring(7)); + + assertTrue(headers.containsKey("x-databricks-zerobus-table-name")); + assertEquals("catalog.schema.table", headers.get("x-databricks-zerobus-table-name")); + } + + @Test + public void testJsonStreamCreation() throws Exception { + JsonZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .json() + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } + + @Test + public void testDynamicProtoStreamCreation() throws Exception { + // Build descriptor programmatically + DescriptorProtos.FieldDescriptorProto cityNameField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("city_name") + .setNumber(1) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_STRING) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + DescriptorProtos.FieldDescriptorProto populationField = + DescriptorProtos.FieldDescriptorProto.newBuilder() + .setName("population") + .setNumber(2) + .setType(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT64) + .setLabel(DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL) + .build(); + + DescriptorProtos.DescriptorProto messageType = + DescriptorProtos.DescriptorProto.newBuilder() + .setName("DynamicCityPopulation") + .addField(cityNameField) + .addField(populationField) + .build(); + + DescriptorProtos.FileDescriptorProto fileDescriptorProto = + DescriptorProtos.FileDescriptorProto.newBuilder() + .setName("dynamic_city.proto") + .addMessageType(messageType) + .build(); + + Descriptors.FileDescriptor fileDescriptor = + Descriptors.FileDescriptor.buildFrom( + fileDescriptorProto, new Descriptors.FileDescriptor[] {}); + Descriptors.Descriptor descriptor = + fileDescriptor.findMessageTypeByName("DynamicCityPopulation"); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .dynamicProto(descriptor) + .build() + .get(); + + assertEquals(StreamState.OPENED, stream.getState()); + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/StreamLifecycleTest.java b/sdk/src/test/java/com/databricks/zerobus/StreamLifecycleTest.java new file mode 100644 index 0000000..4730717 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/StreamLifecycleTest.java @@ -0,0 +1,137 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; + +import com.databricks.test.table.TestTableRow.CityPopulationTableRow; +import com.databricks.zerobus.stream.ProtoZerobusStream; +import java.util.Iterator; +import org.junit.jupiter.api.Test; + +/** Tests for stream lifecycle operations: flush, close, state transitions. */ +public class StreamLifecycleTest extends BaseZerobusTest { + + @Test + public void testFlushWaitsForAllAcknowledgments() throws Exception { + int numRecords = 10; + mockedGrpcServer.injectAckRecord(numRecords - 1); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + for (int i = 0; i < numRecords; i++) { + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("device-" + i) + .setPopulation(20 + i) + .build()); + } + + stream.flush(); + + Iterator unackedRecords = stream.getUnackedRecords(); + assertFalse(unackedRecords.hasNext()); + + stream.close(); + } + + @Test + public void testEmptyFlushReturnsImmediately() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + long startTime = System.currentTimeMillis(); + stream.flush(); + long flushDuration = System.currentTimeMillis() - startTime; + + assertTrue( + flushDuration < 100, + "Expected flush to return immediately, but took " + flushDuration + "ms"); + + assertEquals(StreamState.OPENED, stream.getState()); + stream.close(); + } + + @Test + public void testIdempotentClose() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + // Close multiple times - should not throw + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + } + + @Test + public void testFlushAfterCloseReturnsImmediately() throws Exception { + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + stream.close(); + assertEquals(StreamState.CLOSED, stream.getState()); + + long startTime = System.currentTimeMillis(); + stream.flush(); + long duration = System.currentTimeMillis() - startTime; + + assertTrue(duration < 100, "Expected flush to return immediately, took " + duration + "ms"); + } + + @Test + public void testGrpcStreamIsCancelledOnClose() throws Exception { + mockedGrpcServer.injectAckRecord(0); + + ProtoZerobusStream stream = + zerobusSdk + .streamBuilder("test-table") + .clientCredentials("client-id", "client-secret") + .recovery(false) + .compiledProto(CityPopulationTableRow.getDefaultInstance()) + .build() + .get(); + + long offset = + stream.ingest( + CityPopulationTableRow.newBuilder() + .setCityName("test-city") + .setPopulation(1000) + .build()); + stream.waitForOffset(offset); + + stream.close(); + + verify(spiedStream, times(1)).cancel(anyString(), any()); + verify(spiedStream, times(1)).onCompleted(); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/ZerobusSdkStubFactoryTest.java b/sdk/src/test/java/com/databricks/zerobus/ZerobusSdkStubFactoryTest.java new file mode 100644 index 0000000..e4bf569 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/ZerobusSdkStubFactoryTest.java @@ -0,0 +1,84 @@ +package com.databricks.zerobus; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; + +/** Tests for ZerobusSdkStubFactory validation logic. */ +class ZerobusSdkStubFactoryTest { + + @Test + void testValidateTableNameHeader_Success() throws NonRetriableException { + Map headers = new HashMap<>(); + headers.put("x-databricks-zerobus-table-name", "catalog.schema.table"); + headers.put("authorization", "Bearer token"); + + // Should not throw + ZerobusSdkStubFactory.validateTableNameHeader(headers, "catalog.schema.table"); + } + + @Test + void testValidateTableNameHeader_MissingHeader() { + Map headers = new HashMap<>(); + headers.put("authorization", "Bearer token"); + // Missing x-databricks-zerobus-table-name header + + NonRetriableException exception = + assertThrows( + NonRetriableException.class, + () -> ZerobusSdkStubFactory.validateTableNameHeader(headers, "catalog.schema.table")); + + assertTrue(exception.getMessage().contains("must include")); + assertTrue(exception.getMessage().contains("x-databricks-zerobus-table-name")); + } + + @Test + void testValidateTableNameHeader_Mismatch() { + Map headers = new HashMap<>(); + headers.put("x-databricks-zerobus-table-name", "wrong.table.name"); + headers.put("authorization", "Bearer token"); + + NonRetriableException exception = + assertThrows( + NonRetriableException.class, + () -> ZerobusSdkStubFactory.validateTableNameHeader(headers, "catalog.schema.table")); + + assertTrue(exception.getMessage().contains("Table name mismatch")); + assertTrue(exception.getMessage().contains("wrong.table.name")); + assertTrue(exception.getMessage().contains("catalog.schema.table")); + } + + @Test + void testValidateTableNameHeader_EmptyTableName() { + Map headers = new HashMap<>(); + headers.put("x-databricks-zerobus-table-name", ""); + headers.put("authorization", "Bearer token"); + + NonRetriableException exception = + assertThrows( + NonRetriableException.class, + () -> ZerobusSdkStubFactory.validateTableNameHeader(headers, "catalog.schema.table")); + + assertTrue(exception.getMessage().contains("Table name mismatch")); + } + + @Test + void testValidateTableNameHeader_CaseSensitive() { + Map headers = new HashMap<>(); + headers.put("x-databricks-zerobus-table-name", "Catalog.Schema.Table"); + + NonRetriableException exception = + assertThrows( + NonRetriableException.class, + () -> ZerobusSdkStubFactory.validateTableNameHeader(headers, "catalog.schema.table")); + + assertTrue(exception.getMessage().contains("Table name mismatch")); + } + + @Test + void testTableNameHeaderConstant() { + assertEquals("x-databricks-zerobus-table-name", ZerobusSdkStubFactory.TABLE_NAME_HEADER); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/auth/TokenFactoryTest.java b/sdk/src/test/java/com/databricks/zerobus/auth/TokenFactoryTest.java new file mode 100644 index 0000000..8bdc682 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/auth/TokenFactoryTest.java @@ -0,0 +1,541 @@ +package com.databricks.zerobus.auth; + +import static org.junit.jupiter.api.Assertions.*; + +import com.databricks.zerobus.NonRetriableException; +import com.databricks.zerobus.common.http.HttpClient; +import java.io.IOException; +import java.util.Map; +import org.junit.jupiter.api.Test; + +/** + * Tests for TokenFactory validation logic. + * + *

    These tests verify parameter validation without making actual HTTP calls. + */ +class TokenFactoryTest { + + private static final String VALID_TABLE_NAME = "catalog.schema.table"; + private static final String VALID_WORKSPACE_ID = "workspace-123"; + private static final String VALID_WORKSPACE_URL = "https://workspace.databricks.com"; + private static final String VALID_CLIENT_ID = "client-id"; + private static final String VALID_CLIENT_SECRET = "client-secret"; + + // ==================== Mock HttpClient ==================== + + /** Mock HttpClient that returns a configurable response. */ + private static class MockHttpClient implements HttpClient { + private HttpResponse response; + private IOException exception; + private String lastUrl; + private String lastFormData; + private Map lastHeaders; + + void setResponse(HttpResponse response) { + this.response = response; + this.exception = null; + } + + void setException(IOException exception) { + this.exception = exception; + this.response = null; + } + + @Override + public HttpResponse post(String url, String formData, Map headers) + throws IOException { + this.lastUrl = url; + this.lastFormData = formData; + this.lastHeaders = headers; + if (exception != null) { + throw exception; + } + return response; + } + + @Override + public HttpResponse get(String url, Map headers) throws IOException { + this.lastUrl = url; + this.lastHeaders = headers; + if (exception != null) { + throw exception; + } + return response; + } + } + + // ==================== Table Name Validation ==================== + + @Test + void testNullTableName() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + null, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("tableName cannot be null")); + } + + @Test + void testEmptyTableName() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + "", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("tableName cannot be blank")); + } + + @Test + void testBlankTableName() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + " ", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("tableName cannot be blank")); + } + + @Test + void testTableNameSinglePart() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + "table", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("must be in the format of catalog.schema.table")); + } + + @Test + void testTableNameTwoParts() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + "schema.table", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("must be in the format of catalog.schema.table")); + } + + @Test + void testTableNameFourParts() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + "a.b.c.d", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("must be in the format of catalog.schema.table")); + } + + @Test + void testTableNameEmptyCatalog() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + ".schema.table", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("empty parts")); + } + + @Test + void testTableNameEmptySchema() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + "catalog..table", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("empty parts")); + } + + @Test + void testTableNameEmptyTable() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + "catalog.schema.", + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("empty parts")); + } + + // ==================== Workspace ID Validation ==================== + + @Test + void testNullWorkspaceId() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + null, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("workspaceId cannot be null")); + } + + @Test + void testEmptyWorkspaceId() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + "", + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("workspaceId cannot be blank")); + } + + @Test + void testBlankWorkspaceId() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + " ", + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("workspaceId cannot be blank")); + } + + // ==================== Workspace URL Validation ==================== + + @Test + void testNullWorkspaceUrl() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + null, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("workspaceUrl cannot be null")); + } + + @Test + void testEmptyWorkspaceUrl() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + "", + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("workspaceUrl cannot be blank")); + } + + @Test + void testBlankWorkspaceUrl() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + " ", + VALID_CLIENT_ID, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("workspaceUrl cannot be blank")); + } + + // ==================== Client ID Validation ==================== + + @Test + void testNullClientId() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + null, + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("clientId cannot be null")); + } + + @Test + void testEmptyClientId() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + "", + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("clientId cannot be blank")); + } + + @Test + void testBlankClientId() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + " ", + VALID_CLIENT_SECRET)); + assertTrue(ex.getMessage().contains("clientId cannot be blank")); + } + + // ==================== Client Secret Validation ==================== + + @Test + void testNullClientSecret() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + null)); + assertTrue(ex.getMessage().contains("clientSecret cannot be null")); + } + + @Test + void testEmptyClientSecret() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + "")); + assertTrue(ex.getMessage().contains("clientSecret cannot be blank")); + } + + @Test + void testBlankClientSecret() { + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + " ")); + assertTrue(ex.getMessage().contains("clientSecret cannot be blank")); + } + + // ==================== parseTableName Tests ==================== + + @Test + void testParseTableNameValid() throws NonRetriableException { + String[] parts = TokenFactory.parseTableName("catalog.schema.table"); + assertEquals(3, parts.length); + assertEquals("catalog", parts[0]); + assertEquals("schema", parts[1]); + assertEquals("table", parts[2]); + } + + @Test + void testParseTableNameWithSpecialChars() throws NonRetriableException { + String[] parts = TokenFactory.parseTableName("my_catalog.my_schema.my_table"); + assertEquals("my_catalog", parts[0]); + assertEquals("my_schema", parts[1]); + assertEquals("my_table", parts[2]); + } + + // ==================== buildAuthorizationDetails Tests ==================== + + @Test + void testBuildAuthorizationDetails() { + String details = TokenFactory.buildAuthorizationDetails("cat", "sch", "tbl"); + + assertTrue(details.contains("\"object_full_path\": \"cat\"")); + assertTrue(details.contains("\"object_full_path\": \"cat.sch\"")); + assertTrue(details.contains("\"object_full_path\": \"cat.sch.tbl\"")); + assertTrue(details.contains("\"privileges\": [\"USE CATALOG\"]")); + assertTrue(details.contains("\"privileges\": [\"USE SCHEMA\"]")); + assertTrue(details.contains("\"privileges\": [\"SELECT\", \"MODIFY\"]")); + } + + // ==================== extractAccessToken Tests ==================== + + @Test + void testExtractAccessTokenValid() throws NonRetriableException { + String response = "{\"access_token\": \"my-jwt-token\", \"token_type\": \"Bearer\"}"; + String token = TokenFactory.extractAccessToken(response); + assertEquals("my-jwt-token", token); + } + + @Test + void testExtractAccessTokenWithWhitespace() throws NonRetriableException { + String response = "{\"access_token\" : \"token-with-spaces\" }"; + String token = TokenFactory.extractAccessToken(response); + assertEquals("token-with-spaces", token); + } + + @Test + void testExtractAccessTokenMissing() { + String response = "{\"token_type\": \"Bearer\"}"; + NonRetriableException ex = + assertThrows(NonRetriableException.class, () -> TokenFactory.extractAccessToken(response)); + assertTrue(ex.getMessage().contains("No access token received")); + } + + @Test + void testExtractAccessTokenNullResponse() { + NonRetriableException ex = + assertThrows(NonRetriableException.class, () -> TokenFactory.extractAccessToken(null)); + assertTrue(ex.getMessage().contains("No response body received")); + } + + // ==================== HTTP Client Tests ==================== + + @Test + void testGetZerobusTokenSuccess() throws NonRetriableException { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse( + new HttpClient.HttpResponse(200, "{\"access_token\": \"test-token\"}", null)); + + String token = + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET, + mockClient); + + assertEquals("test-token", token); + assertEquals(VALID_WORKSPACE_URL + "/oidc/v1/token", mockClient.lastUrl); + assertTrue(mockClient.lastFormData.contains("grant_type=client_credentials")); + assertTrue(mockClient.lastFormData.contains("scope=all-apis")); + assertTrue(mockClient.lastHeaders.containsKey("Authorization")); + assertTrue(mockClient.lastHeaders.get("Authorization").startsWith("Basic ")); + } + + @Test + void testGetZerobusTokenHttpError() { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse(new HttpClient.HttpResponse(401, null, "Unauthorized")); + + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET, + mockClient)); + + assertTrue(ex.getMessage().contains("OAuth request failed with status 401")); + assertTrue(ex.getMessage().contains("Unauthorized")); + } + + @Test + void testGetZerobusTokenHttpException() { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setException(new IOException("Connection refused")); + + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET, + mockClient)); + + assertTrue(ex.getMessage().contains("Unexpected error getting OAuth token")); + } + + @Test + void testGetZerobusTokenNoTokenInResponse() { + MockHttpClient mockClient = new MockHttpClient(); + mockClient.setResponse(new HttpClient.HttpResponse(200, "{\"error\": \"no token\"}", null)); + + NonRetriableException ex = + assertThrows( + NonRetriableException.class, + () -> + TokenFactory.getZerobusToken( + VALID_TABLE_NAME, + VALID_WORKSPACE_ID, + VALID_WORKSPACE_URL, + VALID_CLIENT_ID, + VALID_CLIENT_SECRET, + mockClient)); + + assertTrue(ex.getMessage().contains("No access token received")); + } +} diff --git a/sdk/src/test/java/com/databricks/zerobus/stream/LandingZoneTest.java b/sdk/src/test/java/com/databricks/zerobus/stream/LandingZoneTest.java new file mode 100644 index 0000000..5f36046 --- /dev/null +++ b/sdk/src/test/java/com/databricks/zerobus/stream/LandingZoneTest.java @@ -0,0 +1,730 @@ +package com.databricks.zerobus.stream; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** Comprehensive tests for LandingZone thread safety and functionality. */ +class LandingZoneTest { + + // Helper to create a test batch + private InflightBatch createBatch(long offsetId, String... records) { + List recordList = records.length > 0 ? Arrays.asList(records) : null; + EncodedBatch encoded = EncodedBatch.jsonSingle("test"); + return new InflightBatch<>(recordList, encoded, offsetId, new CompletableFuture<>()); + } + + // ==================== Basic Operations ==================== + + @Test + void testAddAndObserve() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + InflightBatch batch = createBatch(1, "record1"); + zone.add(batch); + + assertEquals(1, zone.pendingCount()); + assertEquals(0, zone.inflightCount()); + assertEquals(1, zone.totalCount()); + + InflightBatch observed = zone.observe(); + assertSame(batch, observed); + + assertEquals(0, zone.pendingCount()); + assertEquals(1, zone.inflightCount()); + assertEquals(1, zone.totalCount()); + } + + @Test + void testRemoveObserved() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1, "record1")); + zone.observe(); + + assertEquals(9, zone.availablePermits()); + + InflightBatch removed = zone.removeObserved(); + assertNotNull(removed); + assertEquals(1, removed.offsetId); + + assertEquals(10, zone.availablePermits()); + assertEquals(0, zone.inflightCount()); + } + + @Test + void testRemoveObservedEmpty() { + LandingZone zone = new LandingZone<>(10); + assertNull(zone.removeObserved()); + } + + @Test + void testMultipleBatches() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1, "a")); + zone.add(createBatch(2, "b")); + zone.add(createBatch(3, "c")); + + assertEquals(3, zone.pendingCount()); + + // Observe in order + assertEquals(1, zone.observe().offsetId); + assertEquals(2, zone.observe().offsetId); + assertEquals(3, zone.observe().offsetId); + + assertEquals(0, zone.pendingCount()); + assertEquals(3, zone.inflightCount()); + + // Remove in order + assertEquals(1, zone.removeObserved().offsetId); + assertEquals(2, zone.removeObserved().offsetId); + assertEquals(3, zone.removeObserved().offsetId); + + assertEquals(0, zone.totalCount()); + } + + // ==================== Backpressure ==================== + + @Test + @Timeout(5) + void testBackpressureBlocks() throws InterruptedException { + LandingZone zone = new LandingZone<>(2); + + zone.add(createBatch(1)); + zone.add(createBatch(2)); + + assertEquals(0, zone.availablePermits()); + + // Third add should block + AtomicBoolean added = new AtomicBoolean(false); + Thread adder = + new Thread( + () -> { + try { + zone.add(createBatch(3)); + added.set(true); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + }); + adder.start(); + + Thread.sleep(100); + assertFalse(added.get(), "Add should be blocked"); + + // Observe and remove to release permit + zone.observe(); + zone.removeObserved(); + + adder.join(1000); + assertTrue(added.get(), "Add should complete after permit released"); + } + + @Test + void testTryAddTimeout() throws InterruptedException { + LandingZone zone = new LandingZone<>(1); + + zone.add(createBatch(1)); + + // Should timeout since capacity is full + long start = System.currentTimeMillis(); + boolean result = zone.tryAdd(createBatch(2), 100, TimeUnit.MILLISECONDS); + long elapsed = System.currentTimeMillis() - start; + + assertFalse(result); + assertTrue(elapsed >= 90, "Should wait approximately 100ms"); + } + + @Test + void testTryAddSuccess() throws InterruptedException { + LandingZone zone = new LandingZone<>(2); + + assertTrue(zone.tryAdd(createBatch(1), 100, TimeUnit.MILLISECONDS)); + assertEquals(1, zone.pendingCount()); + } + + // ==================== Observe Blocking ==================== + + @Test + @Timeout(5) + void testObserveBlocks() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + AtomicReference> observed = new AtomicReference<>(); + Thread observer = + new Thread( + () -> { + try { + observed.set(zone.observe()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + }); + observer.start(); + + Thread.sleep(100); + assertNull(observed.get(), "Observe should be blocked"); + + // Add a batch + zone.add(createBatch(42)); + + observer.join(1000); + assertNotNull(observed.get()); + assertEquals(42, observed.get().offsetId); + } + + @Test + void testTryObserveTimeout() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + long start = System.currentTimeMillis(); + InflightBatch result = zone.tryObserve(100, TimeUnit.MILLISECONDS); + long elapsed = System.currentTimeMillis() - start; + + assertNull(result); + assertTrue(elapsed >= 90, "Should wait approximately 100ms"); + } + + @Test + void testTryObserveSuccess() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1)); + + InflightBatch result = zone.tryObserve(100, TimeUnit.MILLISECONDS); + assertNotNull(result); + assertEquals(1, result.offsetId); + } + + // ==================== Recovery ==================== + + @Test + void testResetObserve() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1, "a")); + zone.add(createBatch(2, "b")); + zone.add(createBatch(3, "c")); + + // Observe first two + zone.observe(); + zone.observe(); + + assertEquals(1, zone.pendingCount()); + assertEquals(2, zone.inflightCount()); + + // Reset moves inflight back to pending + zone.resetObserve(); + + assertEquals(3, zone.pendingCount()); + assertEquals(0, zone.inflightCount()); + + // Order should be preserved: 1, 2, 3 + assertEquals(1, zone.observe().offsetId); + assertEquals(2, zone.observe().offsetId); + assertEquals(3, zone.observe().offsetId); + } + + @Test + void testResetObserveEmpty() { + LandingZone zone = new LandingZone<>(10); + zone.resetObserve(); // Should not throw + assertEquals(0, zone.totalCount()); + } + + // ==================== Failure Handling ==================== + + @Test + void testRemoveAll() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1, "a")); + zone.add(createBatch(2, "b")); + zone.observe(); // Move 1 to inflight + zone.add(createBatch(3, "c")); + + assertEquals(2, zone.pendingCount()); // 2, 3 + assertEquals(1, zone.inflightCount()); // 1 + + List> all = zone.removeAll(); + + assertEquals(3, all.size()); + // Inflight first, then pending + assertEquals(1, all.get(0).offsetId); + assertEquals(2, all.get(1).offsetId); + assertEquals(3, all.get(2).offsetId); + + assertEquals(0, zone.totalCount()); + assertEquals(10, zone.availablePermits()); + } + + @Test + void testRemoveAllEmpty() { + LandingZone zone = new LandingZone<>(10); + List> all = zone.removeAll(); + assertTrue(all.isEmpty()); + } + + // ==================== Record Access ==================== + + @Test + void testGetAllRecords() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1, "a", "b")); + zone.add(createBatch(2, "c")); + zone.observe(); // Move 1 to inflight + zone.add(createBatch(3, "d", "e", "f")); + + List records = zone.getAllRecords(); + + // Should include all records from both queues + assertEquals(6, records.size()); + assertTrue(records.containsAll(Arrays.asList("a", "b", "c", "d", "e", "f"))); + } + + @Test + void testGetAllRecordsWithNullsInPending() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + // Batch with no records (null) + zone.add(createBatch(1)); + // Batch with records + zone.add(createBatch(2, "x", "y")); + + List records = zone.getAllRecords(); + + assertEquals(2, records.size()); + assertTrue(records.contains("x")); + assertTrue(records.contains("y")); + } + + @Test + void testGetAllRecordsWithNullsInInflight() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + // Add batch with no records (null) and move to inflight + zone.add(createBatch(1)); + zone.observe(); // Moves to inflight + + // Add batch with records to pending + zone.add(createBatch(2, "a", "b")); + + List records = zone.getAllRecords(); + + // Should only contain records from batch 2 (pending queue) + // Batch 1 in inflight has null records + assertEquals(2, records.size()); + assertTrue(records.contains("a")); + assertTrue(records.contains("b")); + } + + @Test + void testPeekAll() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + zone.add(createBatch(1)); + zone.add(createBatch(2)); + zone.observe(); // Move 1 to inflight + + List> all = zone.peekAll(); + + assertEquals(2, all.size()); + assertEquals(1, all.get(0).offsetId); // Inflight first + assertEquals(2, all.get(1).offsetId); // Then pending + + // Should not remove anything + assertEquals(2, zone.totalCount()); + } + + // ==================== Close Behavior ==================== + + @Test + void testClose() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + assertFalse(zone.isClosed()); + zone.close(); + assertTrue(zone.isClosed()); + } + + @Test + void testAddAfterClose() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + zone.close(); + + assertThrows(IllegalStateException.class, () -> zone.add(createBatch(1))); + } + + @Test + void testTryAddAfterClose() { + LandingZone zone = new LandingZone<>(10); + zone.close(); + + assertThrows( + IllegalStateException.class, () -> zone.tryAdd(createBatch(1), 100, TimeUnit.MILLISECONDS)); + } + + @Test + @Timeout(5) + void testTryAddClosedWhileWaitingForSemaphore() throws InterruptedException { + LandingZone zone = new LandingZone<>(1); + zone.add(createBatch(1)); // Fill capacity + + AtomicReference error = new AtomicReference<>(); + Thread adder = + new Thread( + () -> { + try { + zone.tryAdd(createBatch(2), 5, TimeUnit.SECONDS); + } catch (Exception e) { + error.set(e); + } + }); + adder.start(); + + Thread.sleep(100); + zone.close(); + + // Release the semaphore so the thread can proceed to the closed check inside lock + zone.observe(); + zone.removeObserved(); + + adder.join(1000); + assertNotNull(error.get()); + assertTrue(error.get() instanceof IllegalStateException); + } + + @Test + @Timeout(5) + void testObserveUnblocksOnClose() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + AtomicBoolean interrupted = new AtomicBoolean(false); + Thread observer = + new Thread( + () -> { + try { + zone.observe(); + } catch (InterruptedException e) { + interrupted.set(true); + } + }); + observer.start(); + + Thread.sleep(100); + zone.close(); + + observer.join(1000); + assertTrue(interrupted.get(), "Observe should throw InterruptedException on close"); + } + + @Test + void testTryObserveReturnsNullOnClose() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + zone.close(); + + InflightBatch result = zone.tryObserve(100, TimeUnit.MILLISECONDS); + assertNull(result); + } + + @Test + @Timeout(5) + void testTryObserveUnblocksOnClose() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + AtomicReference> result = new AtomicReference<>(); + AtomicBoolean completed = new AtomicBoolean(false); + Thread observer = + new Thread( + () -> { + try { + result.set(zone.tryObserve(5, TimeUnit.SECONDS)); + completed.set(true); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + }); + observer.start(); + + Thread.sleep(100); + assertFalse(completed.get(), "tryObserve should still be waiting"); + + zone.close(); + + observer.join(1000); + assertTrue(completed.get(), "tryObserve should complete after close"); + assertNull(result.get(), "Result should be null when closed with empty queue"); + } + + @Test + void testTryObserveClosedWithPendingBatches() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + // Add batches before closing + zone.add(createBatch(1, "a")); + zone.add(createBatch(2, "b")); + + // Close zone - but batches should still be retrievable + zone.close(); + + // tryObserve should still return batches even when closed + InflightBatch batch1 = zone.tryObserve(100, TimeUnit.MILLISECONDS); + assertNotNull(batch1, "Should get batch even when closed"); + assertEquals(1, batch1.offsetId); + + InflightBatch batch2 = zone.tryObserve(100, TimeUnit.MILLISECONDS); + assertNotNull(batch2, "Should get second batch even when closed"); + assertEquals(2, batch2.offsetId); + + // Now queue is empty and closed + InflightBatch batch3 = zone.tryObserve(100, TimeUnit.MILLISECONDS); + assertNull(batch3, "Should return null when closed and empty"); + } + + @Test + @Timeout(5) + void testCloseWhileAddBlocked() throws InterruptedException { + LandingZone zone = new LandingZone<>(1); + zone.add(createBatch(1)); + + AtomicReference error = new AtomicReference<>(); + Thread adder = + new Thread( + () -> { + try { + zone.add(createBatch(2)); + } catch (Exception e) { + error.set(e); + } + }); + adder.start(); + + Thread.sleep(100); + zone.close(); + + // Release the semaphore so the thread can proceed to the lock + zone.observe(); + zone.removeObserved(); + + adder.join(1000); + assertNotNull(error.get()); + assertTrue(error.get() instanceof IllegalStateException); + } + + // ==================== Concurrent Access ==================== + + @Test + @Timeout(10) + void testConcurrentAddAndObserve() throws InterruptedException { + LandingZone zone = new LandingZone<>(100); + int numBatches = 1000; + AtomicInteger addedCount = new AtomicInteger(0); + AtomicInteger observedCount = new AtomicInteger(0); + CountDownLatch done = new CountDownLatch(2); + + // Producer thread + Thread producer = + new Thread( + () -> { + try { + for (int i = 0; i < numBatches; i++) { + zone.add(createBatch(i, "record" + i)); + addedCount.incrementAndGet(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } finally { + done.countDown(); + } + }); + + // Consumer thread + Thread consumer = + new Thread( + () -> { + try { + for (int i = 0; i < numBatches; i++) { + zone.observe(); + zone.removeObserved(); + observedCount.incrementAndGet(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } finally { + done.countDown(); + } + }); + + producer.start(); + consumer.start(); + + done.await(); + + assertEquals(numBatches, addedCount.get()); + assertEquals(numBatches, observedCount.get()); + assertEquals(0, zone.totalCount()); + } + + @Test + @Timeout(10) + void testMultipleProducers() throws InterruptedException { + int numProducers = 4; + int batchesPerProducer = 25; + int totalBatches = numProducers * batchesPerProducer; + LandingZone zone = new LandingZone<>(totalBatches); // Enough capacity + CountDownLatch startLatch = new CountDownLatch(1); + CountDownLatch doneLatch = new CountDownLatch(numProducers); + AtomicInteger totalAdded = new AtomicInteger(0); + + List producers = new ArrayList<>(); + for (int p = 0; p < numProducers; p++) { + final int producerId = p; + Thread producer = + new Thread( + () -> { + try { + startLatch.await(); + for (int i = 0; i < batchesPerProducer; i++) { + zone.add(createBatch(producerId * 1000 + i)); + totalAdded.incrementAndGet(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } finally { + doneLatch.countDown(); + } + }); + producers.add(producer); + producer.start(); + } + + startLatch.countDown(); + doneLatch.await(); + + assertEquals(totalBatches, totalAdded.get()); + assertEquals(totalBatches, zone.pendingCount()); + } + + @Test + @Timeout(10) + void testConcurrentResetObserve() throws InterruptedException { + LandingZone zone = new LandingZone<>(100); + + // Add batches + for (int i = 0; i < 50; i++) { + zone.add(createBatch(i)); + } + + // Observer thread + AtomicInteger observeCount = new AtomicInteger(0); + AtomicBoolean running = new AtomicBoolean(true); + + Thread observer = + new Thread( + () -> { + while (running.get()) { + try { + InflightBatch batch = zone.tryObserve(10, TimeUnit.MILLISECONDS); + if (batch != null) { + observeCount.incrementAndGet(); + } + } catch (InterruptedException e) { + break; + } + } + }); + + // Resetter thread + Thread resetter = + new Thread( + () -> { + for (int i = 0; i < 10; i++) { + zone.resetObserve(); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + break; + } + } + }); + + observer.start(); + resetter.start(); + + resetter.join(); + running.set(false); + observer.join(); + + // No exceptions should have occurred, and counts should be consistent + assertEquals(zone.pendingCount() + zone.inflightCount(), zone.totalCount()); + } + + // ==================== Edge Cases ==================== + + @Test + void testSingleCapacity() throws InterruptedException { + LandingZone zone = new LandingZone<>(1); + + zone.add(createBatch(1)); + assertEquals(0, zone.availablePermits()); + + zone.observe(); + zone.removeObserved(); + assertEquals(1, zone.availablePermits()); + + zone.add(createBatch(2)); + assertEquals(0, zone.availablePermits()); + } + + @Test + void testBatchSize() throws InterruptedException { + LandingZone zone = new LandingZone<>(10); + + // Create batch with multiple records + List records = Arrays.asList("a", "b", "c"); + EncodedBatch encoded = EncodedBatch.jsonBatch(Arrays.asList("a", "b", "c")); + InflightBatch batch = + new InflightBatch<>(records, encoded, 1, new CompletableFuture<>()); + + zone.add(batch); + InflightBatch observed = zone.observe(); + + assertEquals(3, observed.size()); + assertEquals(3, observed.records.size()); + } + + @Test + void testInflightBatchFields() { + List records = Collections.singletonList("test"); + EncodedBatch encoded = EncodedBatch.jsonSingle("test"); + CompletableFuture promise = new CompletableFuture<>(); + + InflightBatch batch = new InflightBatch<>(records, encoded, 42L, promise); + + assertEquals(records, batch.records); + assertSame(encoded, batch.encodedBatch); + assertEquals(42L, batch.offsetId); + assertSame(promise, batch.ackPromise); + assertEquals(1, batch.size()); + } + + @Test + void testInflightBatchNullRecords() { + EncodedBatch encoded = EncodedBatch.jsonSingle("test"); + InflightBatch batch = new InflightBatch<>(null, encoded, 1L, new CompletableFuture<>()); + + assertNull(batch.records); + assertEquals(1, batch.size()); + } +} diff --git a/src/test/proto/test_table.proto b/sdk/src/test/proto/test_table.proto similarity index 100% rename from src/test/proto/test_table.proto rename to sdk/src/test/proto/test_table.proto diff --git a/src/main/java/com/databricks/zerobus/TableProperties.java b/src/main/java/com/databricks/zerobus/TableProperties.java deleted file mode 100644 index 41d4bae..0000000 --- a/src/main/java/com/databricks/zerobus/TableProperties.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.databricks.zerobus; - -import com.google.protobuf.Descriptors; -import com.google.protobuf.Message; - -/** - * Table properties for the stream, describes the table to ingest records into. - * - * @param The type of records to be ingested (must extend Message). - */ -public class TableProperties { - private final String tableName; - private final Message defaultInstance; - - /** - * Creates a new TableProperties instance. - * - * @param tableName The name of the table to ingest records into. - * @param defaultInstance The default instance of the record type (used to get the descriptor). - */ - public TableProperties(String tableName, RecordType defaultInstance) { - this.tableName = tableName; - this.defaultInstance = defaultInstance; - } - - /** - * Returns the table name. - * - * @return the table name - */ - public String getTableName() { - return tableName; - } - - /** - * Returns the default instance. - * - * @return the default instance - */ - public Message getDefaultInstance() { - return defaultInstance; - } - - /** - * Gets the descriptor proto for the record type. - * - * @return the descriptor proto - */ - Descriptors.Descriptor getDescriptor() { - return defaultInstance.getDescriptorForType(); - } - - /** - * Gets the DescriptorProto for the record type. This is used to send the schema to the server. - * - * @return the DescriptorProto - */ - com.google.protobuf.DescriptorProtos.DescriptorProto getDescriptorProto() { - Descriptors.Descriptor descriptor = getDescriptor(); - return descriptor.toProto(); - } -} diff --git a/src/main/java/com/databricks/zerobus/TokenFactory.java b/src/main/java/com/databricks/zerobus/TokenFactory.java deleted file mode 100644 index e6a9c57..0000000 --- a/src/main/java/com/databricks/zerobus/TokenFactory.java +++ /dev/null @@ -1,162 +0,0 @@ -package com.databricks.zerobus; - -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.net.HttpURLConnection; -import java.net.URL; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.Base64; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Factory for obtaining OAuth 2.0 access tokens with Unity Catalog privileges. - * - *

    This class uses the OAuth 2.0 client credentials flow with authorization details to request - * tokens scoped to specific Unity Catalog resources. The generated tokens include privileges for - * catalog, schema, and table access required for ingestion. - */ -public class TokenFactory { - - /** - * Obtains an OAuth token with Unity Catalog privileges for the specified table. - * - *

    The token request includes authorization details that grant: - * - *

      - *
    • USE CATALOG on the table's catalog - *
    • USE SCHEMA on the table's schema - *
    • SELECT and MODIFY on the target table - *
    - * - * @param tableName The fully qualified table name (catalog.schema.table) - * @param workspaceId The Databricks workspace ID - * @param workspaceUrl The Unity Catalog endpoint URL - * @param clientId The OAuth client ID - * @param clientSecret The OAuth client secret - * @return The OAuth access token (JWT) - * @throws NonRetriableException if the token request fails or table name is invalid - */ - public static String getZerobusToken( - String tableName, - String workspaceId, - String workspaceUrl, - String clientId, - String clientSecret) - throws NonRetriableException { - - // Parse and validate the three-part table name - String[] threePartTableName = tableName.split("\\."); - if (threePartTableName.length != 3) { - throw new NonRetriableException( - "Table name '" + tableName + "' must be in the format of catalog.schema.table"); - } - - String catalogName = threePartTableName[0]; - String schemaName = threePartTableName[1]; - String tableNameOnly = threePartTableName[2]; - - // Build authorization details using the RAR (RFC 9396) format. - // Newlines are required for proper JWT claim formatting. - String authorizationDetails = - String.format( - "[\n" - + " {\n" - + " \"type\": \"unity_catalog_privileges\",\n" - + " \"privileges\": [\"USE CATALOG\"],\n" - + " \"object_type\": \"CATALOG\",\n" - + " \"object_full_path\": \"%s\"\n" - + " },\n" - + " {\n" - + " \"type\": \"unity_catalog_privileges\",\n" - + " \"privileges\": [\"USE SCHEMA\"],\n" - + " \"object_type\": \"SCHEMA\",\n" - + " \"object_full_path\": \"%s.%s\"\n" - + " },\n" - + " {\n" - + " \"type\": \"unity_catalog_privileges\",\n" - + " \"privileges\": [\"SELECT\", \"MODIFY\"],\n" - + " \"object_type\": \"TABLE\",\n" - + " \"object_full_path\": \"%s.%s.%s\"\n" - + " }\n" - + "]", - catalogName, catalogName, schemaName, catalogName, schemaName, tableNameOnly); - - String urlString = workspaceUrl + "/oidc/v1/token"; - - try { - // Build OAuth 2.0 client credentials request with Unity Catalog authorization details - String formData = - "grant_type=client_credentials" - + "&scope=all-apis" - + "&resource=api://databricks/workspaces/" - + workspaceId - + "/zerobusDirectWriteApi" - + "&authorization_details=" - + URLEncoder.encode(authorizationDetails, "UTF-8"); - - // Encode credentials for HTTP Basic authentication - String credentials = - Base64.getEncoder() - .encodeToString((clientId + ":" + clientSecret).getBytes(StandardCharsets.UTF_8)); - - HttpURLConnection connection = (HttpURLConnection) new URL(urlString).openConnection(); - connection.setRequestMethod("POST"); - connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - connection.setRequestProperty("Authorization", "Basic " + credentials); - connection.setDoOutput(true); - - OutputStreamWriter writer = - new OutputStreamWriter(connection.getOutputStream(), StandardCharsets.UTF_8); - writer.write(formData); - writer.close(); - - int responseCode = connection.getResponseCode(); - - if (responseCode != 200) { - String errorBody = "No error details available"; - if (connection.getErrorStream() != null) { - BufferedReader errorReader = - new BufferedReader( - new InputStreamReader(connection.getErrorStream(), StandardCharsets.UTF_8)); - StringBuilder errorBuilder = new StringBuilder(); - String line; - while ((line = errorReader.readLine()) != null) { - errorBuilder.append(line).append("\n"); - } - errorReader.close(); - errorBody = errorBuilder.toString(); - } - throw new NonRetriableException( - "OAuth request failed with status " + responseCode + ": " + errorBody); - } - - BufferedReader reader = - new BufferedReader( - new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8)); - StringBuilder responseBody = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) { - responseBody.append(line).append("\n"); - } - reader.close(); - - // Extract access token using regex to avoid dependency on a JSON library. - // Pattern matches: "access_token": "value" with flexible whitespace. - Pattern accessTokenPattern = Pattern.compile("\"access_token\"\\s*:\\s*\"([^\"]+)\""); - Matcher matcher = accessTokenPattern.matcher(responseBody.toString()); - - if (matcher.find()) { - return matcher.group(1); - } else { - throw new NonRetriableException("No access token received from OAuth response"); - } - } catch (NonRetriableException e) { - throw e; - } catch (Exception e) { - throw new NonRetriableException("Unexpected error getting OAuth token: " + e.getMessage(), e); - } - } -} diff --git a/src/main/java/com/databricks/zerobus/ZerobusException.java b/src/main/java/com/databricks/zerobus/ZerobusException.java deleted file mode 100644 index 0aede36..0000000 --- a/src/main/java/com/databricks/zerobus/ZerobusException.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.databricks.zerobus; - -/** - * Base exception class for all Ingest API related errors. This allows clients to catch all Ingest - * API specific exceptions with a single catch block. - */ -public class ZerobusException extends Exception { - - /** - * Constructs a new ZerobusException with the specified detail message. - * - * @param message the detail message - */ - public ZerobusException(String message) { - super(message); - } - - /** - * Constructs a new ZerobusException with the specified detail message and cause. - * - * @param message the detail message - * @param cause the cause of the exception - */ - public ZerobusException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/src/main/java/com/databricks/zerobus/ZerobusSdk.java b/src/main/java/com/databricks/zerobus/ZerobusSdk.java deleted file mode 100644 index a8b5429..0000000 --- a/src/main/java/com/databricks/zerobus/ZerobusSdk.java +++ /dev/null @@ -1,289 +0,0 @@ -package com.databricks.zerobus; - -import com.google.protobuf.Message; -import io.grpc.Status; -import io.grpc.StatusRuntimeException; -import java.util.Iterator; -import java.util.Random; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Supplier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The main entry point for the Zerobus SDK. - * - *

    This class provides methods to create and recreate streams for ingesting records into - * Databricks tables. It handles authentication, connection management, and stream lifecycle - * operations. - * - *

    Example usage: - * - *

    {@code
    - * ZerobusSdk sdk = new ZerobusSdk(
    - *     "server-endpoint.databricks.com",
    - *     "https://workspace.databricks.com"
    - * );
    - *
    - * ZerobusStream stream = sdk.createStream(
    - *     tableProperties,
    - *     clientId,
    - *     clientSecret,
    - *     options
    - * ).join();
    - * }
    - * - * @see ZerobusStream - * @see StreamConfigurationOptions - */ -public class ZerobusSdk { - private static final Logger logger = LoggerFactory.getLogger(ZerobusSdk.class); - - // Constants - private static final StreamConfigurationOptions DEFAULT_OPTIONS = - StreamConfigurationOptions.getDefault(); - private static final int STREAM_EXECUTOR_THREAD_POOL_SIZE = 4; - private static final String HTTPS_PREFIX = "https://"; - private static final String HTTP_PREFIX = "http://"; - private static final String THREAD_NAME_PREFIX = "ZerobusStream-executor-"; - - private static final Random RANDOM = new Random(); - - private final String serverEndpoint; - private final String unityCatalogEndpoint; - private final String workspaceId; - - private ZerobusSdkStubFactory stubFactory = ZerobusSdkStubFactory.create(); - - /** - * Creates a new ZerobusSdk instance. - * - * @param serverEndpoint The gRPC endpoint URL for the Zerobus service. - * @param unityCatalogEndpoint The Unity Catalog endpoint URL. - */ - public ZerobusSdk(String serverEndpoint, String unityCatalogEndpoint) { - this.serverEndpoint = serverEndpoint; - this.unityCatalogEndpoint = unityCatalogEndpoint; - this.workspaceId = extractWorkspaceId(serverEndpoint); - } - - /** - * Sets the stub factory (used for testing). - * - * @param stubFactory The stub factory to use - */ - void setStubFactory(ZerobusSdkStubFactory stubFactory) { - this.stubFactory = stubFactory; - } - - /** - * Extracts workspace ID from server endpoint. - * - *

    The workspace ID is the first component of the endpoint hostname. - * - *

    Example: {@code 1234567890123456.zerobus.us-west-2.cloud.databricks.com} returns {@code - * 1234567890123456} - * - * @param endpoint The server endpoint (may include protocol prefix) - * @return The extracted workspace ID - */ - private static String extractWorkspaceId(String endpoint) { - String cleanEndpoint = endpoint; - - // Remove protocol prefix if present - if (cleanEndpoint.startsWith(HTTPS_PREFIX)) { - cleanEndpoint = cleanEndpoint.substring(HTTPS_PREFIX.length()); - } else if (cleanEndpoint.startsWith(HTTP_PREFIX)) { - cleanEndpoint = cleanEndpoint.substring(HTTP_PREFIX.length()); - } - - // Extract workspace ID (first part before first dot) - int dotIndex = cleanEndpoint.indexOf('.'); - return dotIndex > 0 ? cleanEndpoint.substring(0, dotIndex) : cleanEndpoint; - } - - /** - * Creates an executor service for stream operations. - * - *

    The executor uses daemon threads to avoid preventing JVM shutdown. Each thread is named with - * a unique instance ID for debugging purposes. - * - * @return A new ExecutorService configured for stream operations - */ - private static ExecutorService createStreamExecutor() { - long instanceId = 1000000000L + Math.abs(RANDOM.nextLong() % 9000000000L); - - ThreadFactory daemonThreadFactory = - new ThreadFactory() { - private final AtomicInteger counter = new AtomicInteger(0); - - @Override - public Thread newThread(Runnable runnable) { - Thread thread = new Thread(runnable); - thread.setDaemon(true); - thread.setName(THREAD_NAME_PREFIX + instanceId + "-" + counter.getAndIncrement()); - return thread; - } - }; - - return Executors.newFixedThreadPool(STREAM_EXECUTOR_THREAD_POOL_SIZE, daemonThreadFactory); - } - - /** - * Creates a new gRPC stream for ingesting records into a table. - * - *

    Opens a stream which can be used for blocking and/or non-blocking gRPC calls to ingest data - * to the given table. At this stage, the table and message descriptor will be validated. - * - * @param tableProperties Configuration for the target table including table name and record type - * information. - * @param clientId The OAuth client ID for authentication. - * @param clientSecret The OAuth client secret for authentication. - * @param options Configuration options for the stream including timeouts, retry settings, and - * callback functions. - * @param The type of records to be ingested (must extend Message). - * @return A CompletableFuture that completes with the ZerobusStream when the stream is ready. - */ - public CompletableFuture> createStream( - TableProperties tableProperties, - String clientId, - String clientSecret, - StreamConfigurationOptions options) { - - ExecutorService streamExecutor = createStreamExecutor(); - CompletableFuture> resultFuture = new CompletableFuture<>(); - - try { - logger.debug("Creating stream for table: " + tableProperties.getTableName()); - - // Create a token supplier that generates a fresh token for each gRPC request - Supplier tokenSupplier = - () -> { - try { - return TokenFactory.getZerobusToken( - tableProperties.getTableName(), - workspaceId, - unityCatalogEndpoint, - clientId, - clientSecret); - } catch (NonRetriableException e) { - throw new RuntimeException("Failed to get Zerobus token", e); - } - }; - - // Create a stub supplier that generates a fresh stub with token supplier each time - Supplier stubSupplier = - () -> - stubFactory.createStubWithTokenSupplier( - serverEndpoint, tableProperties.getTableName(), tokenSupplier); - - ZerobusStream stream = - new ZerobusStream<>( - stubSupplier, - tableProperties, - stubFactory, - serverEndpoint, - workspaceId, - unityCatalogEndpoint, - clientId, - clientSecret, - options, - streamExecutor, - streamExecutor); - - stream - .initialize() - .whenComplete( - (result, error) -> { - if (error == null) { - resultFuture.complete(stream); - } else { - resultFuture.completeExceptionally(error); - } - }); - } catch (Throwable e) { - logger.error("Failed to create stream with: " + e.getMessage(), e); - - Throwable ex; - if (e instanceof ZerobusException) { - ex = e; - } else if (e instanceof StatusRuntimeException) { - StatusRuntimeException sre = (StatusRuntimeException) e; - Status.Code code = sre.getStatus().getCode(); - if (GrpcErrorHandling.isNonRetriable(code)) { - ex = - new NonRetriableException( - "Non-retriable gRPC error during stream creation: " + sre.getMessage(), sre); - } else { - ex = new ZerobusException("Failed to create stream: " + sre.getMessage(), sre); - } - } else { - ex = new ZerobusException("Failed to create stream: " + e.getMessage(), e); - } - resultFuture.completeExceptionally(ex); - } - - return resultFuture; - } - - /** - * Creates a new gRPC stream for ingesting records into a table with default options. - * - * @param tableProperties Configuration for the target table including table name and record type - * information. - * @param clientId The OAuth client ID for authentication. - * @param clientSecret The OAuth client secret for authentication. - * @param The type of records to be ingested (must extend Message). - * @return A CompletableFuture that completes with the ZerobusStream when the stream is ready. - */ - public CompletableFuture> createStream( - TableProperties tableProperties, String clientId, String clientSecret) { - return this.createStream(tableProperties, clientId, clientSecret, DEFAULT_OPTIONS); - } - - /** - * Recreate stream from a failed stream. - * - *

    Uses the same table properties and stream options as the failed stream. It will also ingest - * all unacknowledged records from the failed stream. - * - * @param failedStream The stream to be recreated. - * @param The type of records to be ingested (must extend Message). - * @return A CompletableFuture that completes with the new ZerobusStream when the stream is ready. - */ - public CompletableFuture> recreateStream( - ZerobusStream failedStream) { - - CompletableFuture> resultFuture = new CompletableFuture<>(); - - createStream( - failedStream.getTableProperties(), - failedStream.getClientId(), - failedStream.getClientSecret(), - failedStream.getOptions()) - .whenComplete( - (stream, error) -> { - if (error == null) { - // ingest unacked records - Iterator unackedRecords = failedStream.getUnackedRecords(); - - try { - while (unackedRecords.hasNext()) { - stream.ingestRecord(unackedRecords.next()); - } - resultFuture.complete(stream); - } catch (ZerobusException e) { - resultFuture.completeExceptionally(e); - } - } else { - resultFuture.completeExceptionally(error); - } - }); - - return resultFuture; - } -} diff --git a/src/main/java/com/databricks/zerobus/ZerobusSdkStubUtils.java b/src/main/java/com/databricks/zerobus/ZerobusSdkStubUtils.java deleted file mode 100644 index 6893cd5..0000000 --- a/src/main/java/com/databricks/zerobus/ZerobusSdkStubUtils.java +++ /dev/null @@ -1,164 +0,0 @@ -package com.databricks.zerobus; - -import io.grpc.CallOptions; -import io.grpc.Channel; -import io.grpc.ClientCall; -import io.grpc.ClientInterceptor; -import io.grpc.ManagedChannel; -import io.grpc.Metadata; -import io.grpc.MethodDescriptor; -import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder; -import java.util.concurrent.TimeUnit; - -/** - * Factory for creating Zerobus gRPC stubs with proper configuration. - * - *

    This factory handles the creation of gRPC channels and stubs with appropriate settings for - * long-lived streaming connections. - */ -class ZerobusSdkStubFactory { - - // gRPC channel configuration constants - private static final int DEFAULT_TLS_PORT = 443; - private static final long KEEP_ALIVE_TIME_SECONDS = 30; - private static final long KEEP_ALIVE_TIMEOUT_SECONDS = 10; - - // Protocol prefix - private static final String HTTPS_PREFIX = "https://"; - - /** - * Creates a new managed gRPC channel with TLS. - * - *

    The channel is configured for long-lived streaming with appropriate keep-alive settings and - * unlimited message size limits. - * - * @param endpoint The endpoint URL (may include https:// prefix) - * @return A configured ManagedChannel - */ - ManagedChannel createGrpcChannel(String endpoint) { - EndpointInfo endpointInfo = parseEndpoint(endpoint); - - NettyChannelBuilder builder = - NettyChannelBuilder.forAddress(endpointInfo.host, endpointInfo.port).useTransportSecurity(); - - // Configure for long-lived streaming connections with unlimited message size - return builder - .keepAliveTime(KEEP_ALIVE_TIME_SECONDS, TimeUnit.SECONDS) - .keepAliveTimeout(KEEP_ALIVE_TIMEOUT_SECONDS, TimeUnit.SECONDS) - .keepAliveWithoutCalls(true) - .maxInboundMessageSize(Integer.MAX_VALUE) - .build(); - } - - /** - * Creates a new Zerobus gRPC stub with dynamic token supplier. - * - *

    The stub is configured with an interceptor that obtains a fresh token for each request using - * the provided token supplier. This allows token rotation without recreating the stub. - * - *

    Note: Currently creates a new channel for each stub. Consider reusing channels across - * multiple streams for better resource utilization. - * - * @param endpoint The endpoint URL - * @param tableName The target table name - * @param tokenSupplier Supplier that provides a fresh authentication token for each request - * @return A configured ZerobusStub with unlimited message sizes - */ - ZerobusGrpc.ZerobusStub createStubWithTokenSupplier( - String endpoint, String tableName, java.util.function.Supplier tokenSupplier) { - ManagedChannel channel = createGrpcChannel(endpoint); - ClientInterceptor authInterceptor = new AuthenticationInterceptor(tokenSupplier, tableName); - Channel interceptedChannel = io.grpc.ClientInterceptors.intercept(channel, authInterceptor); - return ZerobusGrpc.newStub(interceptedChannel) - .withMaxInboundMessageSize(Integer.MAX_VALUE) - .withMaxOutboundMessageSize(Integer.MAX_VALUE); - } - - /** - * Creates a new stub factory instance. - * - * @return A new ZerobusSdkStubFactory - */ - static ZerobusSdkStubFactory create() { - return new ZerobusSdkStubFactory(); - } - - /** - * Parses an endpoint string to extract host and port information. - * - * @param endpoint The endpoint string (may include https:// prefix) - * @return Parsed endpoint information - */ - private EndpointInfo parseEndpoint(String endpoint) { - // Remove protocol prefix if present - String cleanEndpoint = endpoint; - if (cleanEndpoint.startsWith(HTTPS_PREFIX)) { - cleanEndpoint = cleanEndpoint.substring(HTTPS_PREFIX.length()); - } - - // Parse host:port format - String[] parts = cleanEndpoint.split(":", 2); - String host = parts[0]; - int port = parts.length > 1 ? Integer.parseInt(parts[1]) : DEFAULT_TLS_PORT; - - return new EndpointInfo(host, port); - } - - /** Container for parsed endpoint information. */ - private static class EndpointInfo { - final String host; - final int port; - - EndpointInfo(String host, int port) { - this.host = host; - this.port = port; - } - } -} - -/** - * gRPC client interceptor that adds authentication headers to requests. - * - *

    This interceptor attaches the following headers to all outgoing requests: - * - *

      - *
    • Authorization: Bearer token - *
    • x-databricks-zerobus-table-name: table name - *
    - */ -class AuthenticationInterceptor implements ClientInterceptor { - - private static final Metadata.Key AUTHORIZATION_HEADER = - Metadata.Key.of("authorization", Metadata.ASCII_STRING_MARSHALLER); - private static final Metadata.Key TABLE_NAME_HEADER = - Metadata.Key.of("x-databricks-zerobus-table-name", Metadata.ASCII_STRING_MARSHALLER); - private static final String BEARER_PREFIX = "Bearer "; - - private final java.util.function.Supplier tokenSupplier; - private final String tableName; - - /** - * Creates a new authentication interceptor with a dynamic token supplier. - * - * @param tokenSupplier Supplier that provides a fresh authentication token for each request - * @param tableName The target table name - */ - AuthenticationInterceptor(java.util.function.Supplier tokenSupplier, String tableName) { - this.tokenSupplier = tokenSupplier; - this.tableName = tableName; - } - - @Override - public ClientCall interceptCall( - MethodDescriptor method, CallOptions callOptions, Channel next) { - return new io.grpc.ForwardingClientCall.SimpleForwardingClientCall( - next.newCall(method, callOptions)) { - @Override - public void start(Listener responseListener, Metadata headers) { - headers.put(AUTHORIZATION_HEADER, BEARER_PREFIX + tokenSupplier.get()); - headers.put(TABLE_NAME_HEADER, tableName); - super.start(responseListener, headers); - } - }; - } -} diff --git a/src/main/java/com/databricks/zerobus/ZerobusStream.java b/src/main/java/com/databricks/zerobus/ZerobusStream.java deleted file mode 100644 index 1ba47f5..0000000 --- a/src/main/java/com/databricks/zerobus/ZerobusStream.java +++ /dev/null @@ -1,1386 +0,0 @@ -package com.databricks.zerobus; - -import com.databricks.zerobus.ZerobusGrpc.ZerobusStub; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import io.grpc.Status; -import io.grpc.StatusRuntimeException; -import io.grpc.stub.ClientCallStreamObserver; -import io.grpc.stub.ClientResponseObserver; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Supplier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Types of stream failures that can occur during ingestion. */ -enum StreamFailureType { - /** Unknown failure type */ - UNKNOWN, - /** Server closed the stream */ - SERVER_CLOSED_STREAM, - /** Failed while sending a message to the server */ - SENDING_MESSAGE, - /** Server stopped responding to requests */ - SERVER_UNRESPONSIVE -} - -/** Tracks stream failure counts and types for recovery decisions. */ -class StreamFailureInfo { - private StreamFailureType _failureType = StreamFailureType.UNKNOWN; - private int _failureCounts = 0; - - synchronized void logFailure(StreamFailureType streamFailureType) { - if (streamFailureType == _failureType) { - _failureCounts += 1; - } else { - _failureType = streamFailureType; - _failureCounts = 1; - } - } - - synchronized void resetFailure(StreamFailureType streamFailureType) { - if (_failureType == streamFailureType) { - _failureCounts = 0; - _failureType = StreamFailureType.UNKNOWN; - } - } - - synchronized int getFailureCounts() { - return _failureCounts; - } - - synchronized StreamFailureType getFailureType() { - return _failureType; - } -} - -/** - * Utility for classifying gRPC errors as retriable or non-retriable. Non-retriable errors indicate - * issues that cannot be resolved by retrying (e.g., invalid credentials, missing resources). - */ -class GrpcErrorHandling { - private static final Set NON_RETRIABLE_CODES = new HashSet<>(); - - static { - NON_RETRIABLE_CODES.add(Status.Code.INVALID_ARGUMENT); - NON_RETRIABLE_CODES.add(Status.Code.NOT_FOUND); - NON_RETRIABLE_CODES.add(Status.Code.UNAUTHENTICATED); - NON_RETRIABLE_CODES.add(Status.Code.OUT_OF_RANGE); - } - - /** - * Determines if a gRPC status code represents a non-retriable error. - * - * @param code The gRPC status code to check - * @return true if the error should not be retried - */ - static boolean isNonRetriable(Status.Code code) { - return NON_RETRIABLE_CODES.contains(code); - } -} - -/** - * Internal record wrapper that tracks ingestion state. - * - * @param The type of the protobuf message being ingested - */ -class Record { - long offsetId; - final T record; - final ByteString protoEncodedRecord; - final CompletableFuture ackPromise; - - Record( - long offsetId, T record, ByteString protoEncodedRecord, CompletableFuture ackPromise) { - this.offsetId = offsetId; - this.record = record; - this.protoEncodedRecord = protoEncodedRecord; - this.ackPromise = ackPromise; - } -} - -/** - * Zerobus stream for ingesting records into a table. Should be created using - * ZerobusSdk.createStream. - */ -public class ZerobusStream { - private static final Logger logger = LoggerFactory.getLogger(ZerobusStream.class); - - // implicit ec: ExecutionContext - this is the ExecutionContext that client provides to run async - // operations (e.g.create stream async result processing) - // zerobusStreamExecutor: ExecutionContext - This is used only for futures like timeout counter / - // stream recovery / stream unresponsiveness detection, so we don't block threads from customer's - // ExecutionContext - // We have to use a separate executor (bounded) to make sure stream progress is - // not blocked - - private static final int CREATE_STREAM_TIMEOUT_MS = 15000; - - private ZerobusStub stub; - private final Supplier stubSupplier; - final TableProperties tableProperties; - private final ZerobusSdkStubFactory stubFactory; - private final String serverEndpoint; - final StreamConfigurationOptions options; - private final ExecutorService zerobusStreamExecutor; - private final ExecutorService ec; - private final String workspaceId; - private final String unityCatalogEndpoint; - private final String clientId; - private final String clientSecret; - - private StreamState state = StreamState.UNINITIALIZED; - private Optional streamId = Optional.empty(); - private Optional> stream = Optional.empty(); - private Optional> streamCreatedEvent = Optional.empty(); - - // Sending records is asynchronus task which consumes records from recordsQueuedForSending and - // sends them to the server - private final ArrayBlockingQueue recordsQueuedForSending; - - // Here we store records which are not yet acknowledged by the server - final ArrayBlockingQueue> inflightRecords; - - // Populated just in case of hard failure, otherwise it's empty - private final List> unackedRecordsAfterStreamFailure = new ArrayList<>(); - - private long latestRespondedOffsetId = -1; - private long lastSentOffsetId = -1; - private final StreamFailureInfo streamFailureInfo = new StreamFailureInfo(); - - private final com.google.protobuf.DescriptorProtos.DescriptorProto descriptorProto; - - /** - * Returns the ID of the stream. - * - * @return The ID of the stream. - */ - public synchronized String getStreamId() { - return streamId.orElse(""); - } - - /** - * Returns the state of the stream. - * - * @return The state of the stream. - */ - public synchronized StreamState getState() { - return state; - } - - /** - * Returns the unacknowledged records after stream failure. - * - * @return The unacknowledged records after stream failure. - */ - public Iterator getUnackedRecords() { - List records = new ArrayList<>(); - for (Record record : unackedRecordsAfterStreamFailure) { - records.add(record.record); - } - return records.iterator(); - } - - /** - * Returns the table properties for this stream. - * - * @return The table properties. - */ - public TableProperties getTableProperties() { - return tableProperties; - } - - /** - * Returns the stream configuration options. - * - * @return The stream configuration options. - */ - public StreamConfigurationOptions getOptions() { - return options; - } - - /** - * Returns the OAuth client ID. - * - * @return The OAuth client ID. - */ - public String getClientId() { - return clientId; - } - - /** - * Returns the OAuth client secret. - * - * @return The OAuth client secret. - */ - public String getClientSecret() { - return clientSecret; - } - - private synchronized void setState(StreamState newState) { - state = newState; - this.notifyAll(); - logger.debug("Stream state changed to " + newState); - } - - private CompletableFuture runWithTimeout( - long timeoutMs, java.util.function.Supplier> getFuture) { - AtomicBoolean done = new AtomicBoolean(false); - CompletableFuture future = getFuture.get(); - - future.whenComplete( - (result, error) -> { - synchronized (done) { - done.set(true); - done.notifyAll(); - } - }); - - CompletableFuture timeoutFuture = - CompletableFuture.runAsync( - () -> { - synchronized (done) { - try { - done.wait(timeoutMs); - if (!done.get()) { - throw new RuntimeException(new TimeoutException("Operation timed out!")); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - } - }, - zerobusStreamExecutor); - - return CompletableFuture.anyOf(future, timeoutFuture).thenApply(result -> null); - } - - /** - * Retries an operation with exponential backoff until success or max retries reached. - * - *

    This method uses recursion through the RetryHelper inner class to avoid blocking the caller - * thread. Each retry is scheduled asynchronously on the stream executor. - * - * @param maxRetries Maximum number of retry attempts - * @param context Context string for logging - * @param f Supplier that provides the operation to retry - * @return CompletableFuture that completes with the operation result or error - */ - private CompletableFuture runWithRetries( - long maxRetries, String context, java.util.function.Supplier> f) { - CompletableFuture resultPromise = new CompletableFuture<>(); - - int backoffMs = options.recovery() ? options.recoveryBackoffMs() : 0; - - class RetryHelper { - void tryNext(int attempt) { - logger.debug("[" + context + "] Running attempt ... "); - - f.get() - .whenComplete( - (response, error) -> { - if (error == null) { - resultPromise.complete(response); - } else if (error instanceof NonRetriableException - || error.getCause() instanceof NonRetriableException) { - // Non-retriable errors should fail immediately without retrying - resultPromise.completeExceptionally(error); - } else { - if (attempt < maxRetries - 1) { - // Schedule next retry after backoff period - CompletableFuture.runAsync( - () -> { - logger.debug("[" + context + "] Retrying in " + backoffMs + " ms ... "); - try { - Thread.sleep(backoffMs); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - tryNext(attempt + 1); - }, - zerobusStreamExecutor); - } else { - // Exhausted all retries - resultPromise.completeExceptionally(error); - } - } - }); - } - } - - new RetryHelper().tryNext(0); - return resultPromise; - } - - private void failStream(Throwable error) { - synchronized (this) { - if (stream.isPresent()) { - try { - stream.get().onError(error); - } catch (Exception e) { - // Ignore - } - - stream = Optional.empty(); - streamId = Optional.empty(); - } - } - } - - private CompletableFuture createStream() { - CompletableFuture createStreamDone = new CompletableFuture<>(); - - int timeoutMs = options.recovery() ? options.recoveryTimeoutMs() : CREATE_STREAM_TIMEOUT_MS; - - latestRespondedOffsetId = -1; - lastSentOffsetId = -1; - streamId = Optional.empty(); - stream = Optional.empty(); - streamCreatedEvent = Optional.empty(); - - runWithTimeout( - timeoutMs, - () -> { - CompletableFuture createStreamTry = new CompletableFuture<>(); - - // Get a fresh stub from the supplier - stub = stubSupplier.get(); - - // Create the gRPC stream with the fresh stub - streamCreatedEvent = Optional.of(new CompletableFuture<>()); - stream = - Optional.of( - (ClientCallStreamObserver) - stub.ephemeralStream(ackReceiver)); - - logger.debug("Creating ephemeral stream for table " + tableProperties.getTableName()); - - // Create the initial request - EphemeralStreamRequest createStreamRequest = - EphemeralStreamRequest.newBuilder() - .setCreateStream( - CreateIngestStreamRequest.newBuilder() - .setTableName(tableProperties.getTableName()) - .setDescriptorProto( - ByteString.copyFrom(descriptorProto.toByteArray())) - .setRecordType(com.databricks.zerobus.RecordType.PROTO) - .build()) - .build(); - - // Send the CreateStreamRequest - try { - sendMessage(createStreamRequest); - } catch (Exception exception) { - failStream(exception); - createStreamTry.completeExceptionally(exception); - return createStreamTry; - } - - streamCreatedEvent - .get() - .whenComplete( - (id, e) -> { - if (e == null) { - streamId = Optional.of(id); - recordsSenderTask.start(); - createStreamTry.complete(null); - } else if (e instanceof ZerobusException) { - failStream(e); - streamId = Optional.empty(); - streamCreatedEvent = Optional.empty(); - stream = Optional.empty(); - createStreamTry.completeExceptionally(e); - } else { - failStream(e); - streamId = Optional.empty(); - streamCreatedEvent = Optional.empty(); - stream = Optional.empty(); - createStreamTry.completeExceptionally( - new ZerobusException(e.getMessage(), e)); - } - }); - - return createStreamTry; - }) - .whenComplete( - (result, e) -> { - if (e == null) { - createStreamDone.complete(null); - } else { - failStream(e); - Throwable ex; - if (e instanceof StatusRuntimeException) { - Status.Code code = ((StatusRuntimeException) e).getStatus().getCode(); - if (GrpcErrorHandling.isNonRetriable(code)) { - ex = - new NonRetriableException( - "Non-retriable gRPC error during stream creation: " + e.getMessage(), - e); - } else { - ex = new ZerobusException("Stream creation failed: " + e.getMessage(), e); - } - } else if (e instanceof NonRetriableException) { - ex = new NonRetriableException("Stream creation failed: " + e.getMessage(), e); - } else { - ex = new ZerobusException("Stream creation failed: " + e.getMessage(), e); - } - createStreamDone.completeExceptionally(ex); - } - }); - - return createStreamDone; - } - - CompletableFuture initialize() { - CompletableFuture initializeDone = new CompletableFuture<>(); - - synchronized (this) { - if (state != StreamState.UNINITIALIZED) { - logger.error("Stream cannot be initialized/opened more than once"); - initializeDone.completeExceptionally( - new ZerobusException("Stream cannot be initialized/opened more than once")); - return initializeDone; - } - } - - int retries = options.recovery() ? options.recoveryRetries() : 1; - - runWithRetries(retries, "CreateStream", () -> createStream()) - .whenComplete( - (result, e) -> { - if (e == null) { - setState(StreamState.OPENED); - serverUnresponsivenessDetectionTask.start(); - logger.info("Stream created successfully with id " + streamId.get()); - initializeDone.complete(null); - } else { - setState(StreamState.FAILED); - logger.error("Failed to create stream: ", e); - if (e instanceof ZerobusException) { - initializeDone.completeExceptionally(e); - } else { - initializeDone.completeExceptionally( - new ZerobusException("Stream creation failed: " + e.getMessage(), e)); - } - } - }); - - return initializeDone; - } - - /** - * Closes the stream and cleans up resources. - * - * @param hardFailure If true, marks stream as FAILED and saves unacked records for potential - * retry - * @param exception The exception that caused the failure (if any) - */ - private void closeStream(boolean hardFailure, Optional exception) { - synchronized (this) { - logger.debug("Closing stream, hardFailure: " + hardFailure); - - if (hardFailure && exception.isPresent()) { - // CRITICAL: Atomically mark stream as FAILED before processing unacked records. - // This prevents race conditions where clients see errors but unackedRecords is empty. - setState(StreamState.FAILED); - } - - recordsQueuedForSending.clear(); - recordsSenderTask.cancel(); - - try { - if (stream.isPresent()) { - stream.get().onCompleted(); - if (hardFailure) { - stream.get().cancel("Stream closed", null); - } - } - } catch (Exception e) { - // Ignore errors during stream cleanup - stream may already be closed - logger.debug("Error while closing stream: " + e.getMessage()); - } - - // For hard failures, preserve unacked records so they can be retried via recreateStream() - if (hardFailure) { - serverUnresponsivenessDetectionTask.cancel(); - logger.debug("Stream closing: Failing all unacked records"); - - while (!inflightRecords.isEmpty()) { - try { - Record record = inflightRecords.take(); - unackedRecordsAfterStreamFailure.add(record); - record.ackPromise.completeExceptionally( - exception.orElse(new ZerobusException("Stream failed"))); - this.notifyAll(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - } - - stream = Optional.empty(); - streamCreatedEvent = Optional.empty(); - streamId = Optional.empty(); - stub = null; - - this.notifyAll(); - } - - // Wait for background tasks to fully stop before returning. - // This ensures clean shutdown and prevents resource leaks. - recordsSenderTask.waitUntilStopped(); - if (hardFailure) { - serverUnresponsivenessDetectionTask.waitUntilStopped(); - } - } - - private CompletableFuture closeStreamAsync( - boolean hardFailure, Optional exception) { - return CompletableFuture.runAsync( - () -> closeStream(hardFailure, exception), zerobusStreamExecutor); - } - - private void enqueueRecordsForResending() { - synchronized (this) { - if (state != StreamState.RECOVERING) { - return; - } - - Iterator> recordsIterator = inflightRecords.iterator(); - - while (recordsIterator.hasNext()) { - Record record = recordsIterator.next(); - - lastSentOffsetId += 1; - long offsetId = lastSentOffsetId; - - record.offsetId = offsetId; - - EphemeralStreamRequest recordRequest = - EphemeralStreamRequest.newBuilder() - .setIngestRecord( - IngestRecordRequest.newBuilder() - .setOffsetId(offsetId) - .setProtoEncodedRecord(record.protoEncodedRecord) - .build()) - .build(); - - try { - recordsQueuedForSending.put(recordRequest); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - } - } - - /** - * Attempts to recover a failed stream by recreating it and resending unacked records. - * - *

    This method: - * - *

      - *
    1. Closes the current stream without marking it as hard failure - *
    2. Creates a new stream with the same configuration - *
    3. Re-enqueues all unacknowledged records for sending - *
    - * - * @return CompletableFuture that completes when recovery succeeds or fails - */ - private CompletableFuture recoverStream() { - CompletableFuture recoverStreamDone = new CompletableFuture<>(); - - CompletableFuture.runAsync( - () -> { - if (!options.recovery()) { - logger.debug("Stream recovery is disabled"); - recoverStreamDone.completeExceptionally( - new ZerobusException("Stream recovery is disabled")); - } else { - logger.warn( - "Stream broken! Running stream recovery for stream id '" - + streamId.orElse("unknown") - + "' ... "); - - // Close the broken stream but don't mark as hard failure since we're attempting - // recovery - closeStream(false, Optional.empty()); - - synchronized (this) { - int retries = options.recoveryRetries(); - // Reduce remaining retries based on consecutive failures of the same type - int leftRetries = Math.max(0, retries - streamFailureInfo.getFailureCounts() + 1); - - if (leftRetries == 0) { - logger.debug("Stream recovery failed: Run out of retries"); - recoverStreamDone.completeExceptionally( - new ZerobusException("Stream recovery failed")); - return; - } - - logger.debug( - "Stream recovery: Running with " - + leftRetries - + " / " - + retries - + " retries left"); - - runWithRetries( - leftRetries, - "RecoverStream", - () -> { - CompletableFuture recoverStreamTry = new CompletableFuture<>(); - - createStream() - .whenComplete( - (result, e) -> { - if (e != null) { - logger.debug( - "Stream recovery: Failed to create stream: " - + e.getMessage()); - recoverStreamTry.completeExceptionally(e); - } else { - enqueueRecordsForResending(); - recoverStreamTry.complete(null); - } - }); - - return recoverStreamTry; - }) - .whenComplete( - (result, e) -> { - if (e == null) { - logger.info( - "Stream recovery completed successfully. New stream id: " - + streamId.get()); - recoverStreamDone.complete(null); - } else { - logger.error("Stream recovery failed: " + e.getMessage(), e); - recoverStreamDone.completeExceptionally(e); - } - }); - } - } - }, - zerobusStreamExecutor); - - return recoverStreamDone; - } - - private void handleStreamFailed(StreamFailureType streamFailureType, Optional error) { - - Optional exception; - if (error.isPresent()) { - Throwable e = error.get(); - if (e instanceof ZerobusException) { - exception = Optional.of((ZerobusException) e); - } else { - exception = Optional.of(new ZerobusException("Stream failed: " + e.getMessage(), e)); - } - } else { - exception = Optional.of(new ZerobusException("Stream failed")); - } - - synchronized (this) { - if (state == StreamState.FAILED - || state == StreamState.UNINITIALIZED - || state == StreamState.RECOVERING) { - // UNINITIALIZED -> Stream failed during creation - // FAILED -> Stream already failed (don't handle it twice) - // RECOVERING -> Stream is recovering from a failure, no action needed - - if (state == StreamState.UNINITIALIZED && streamCreatedEvent.isPresent()) { - streamCreatedEvent.get().completeExceptionally(exception.get()); - } - - return; - } - - if (state == StreamState.CLOSED && !error.isPresent()) { - // Stream failed after closed, but without exception - that's expected (stream closed - // gracefully) - return; - } - - if (error.isPresent()) { - logger.error("Stream failed: " + error.get().getMessage(), error.get()); - } - - // Check if this is a non-retriable error - if so, don't attempt recovery - if (error.isPresent() && error.get() instanceof NonRetriableException) { - closeStreamAsync(true, exception); - return; - } - - streamFailureInfo.logFailure(streamFailureType); - - // Stream is open or flushing, try to recover it - setState(StreamState.RECOVERING); - - recoverStream() - .whenComplete( - (result, e) -> { - if (e == null) { - setState(StreamState.OPENED); - logger.info("Stream recovered successfully with id " + streamId.get()); - } else { - logger.error("Stream recovery failed", e); - closeStream(true, exception); - } - }); - } - } - - private CompletableFuture handleStreamFailedAsync( - StreamFailureType streamFailureType, Optional error) { - return CompletableFuture.runAsync( - () -> handleStreamFailed(streamFailureType, error), zerobusStreamExecutor); - } - - // Task that checks if server is responsive (time it takes for server to ack a record) - // Task is created once during initialize() and it's shutdown when stream is closed finally - // (e.g. close() is called or stream can't be recovered) - private BackgroundTask serverUnresponsivenessDetectionTask; - - private void initServerUnresponsivenessDetectionTask() { - serverUnresponsivenessDetectionTask = - new BackgroundTask( - cancellationToken -> { - long taskIterationStartTime = System.currentTimeMillis(); - synchronized (ZerobusStream.this) { - switch (state) { - case UNINITIALIZED: - case CLOSED: - case FAILED: - break; - - case RECOVERING: - logger.debug( - "Server unresponsiveness detection task: Waiting for stream to finish recovering"); - try { - ZerobusStream.this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - break; - - case OPENED: - case FLUSHING: - if (inflightRecords.isEmpty()) { - logger.debug( - "Server unresponsiveness detection task: Waiting for some records to be ingested"); - try { - ZerobusStream.this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } else { - // STREAM IS OPENED OR FLUSHING AND THERE ARE RECORDS IN THE QUEUE - CHECK IF - // SERVER IS RESPONSIVE - long latestRespondedOffsetIdBefore = latestRespondedOffsetId; - boolean serverResponsive = false; - boolean serverResponsiveTimeout = false; - - while (!serverResponsive && !serverResponsiveTimeout) { - if (latestRespondedOffsetIdBefore != latestRespondedOffsetId) { - serverResponsive = true; - } else { - long remainingTime = - options.serverLackOfAckTimeoutMs() - - (System.currentTimeMillis() - taskIterationStartTime); - - if (remainingTime <= 0) { - // We don't want to block here, since this potentially can close the - // stream, which will wait for this task to finish (deadlock) - handleStreamFailedAsync( - StreamFailureType.SERVER_UNRESPONSIVE, - Optional.of(new ZerobusException("Server is unresponsive"))); - serverResponsiveTimeout = true; - } else { - try { - ZerobusStream.this.wait(remainingTime); - if (cancellationToken.isDone()) { - // In case of a stream close, break the loop so that it doesn't hang - // waiting for the timeout. - serverResponsive = true; - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - } - } - } - break; - } - } - }, - error -> { - // This should never happen (task won't throw any errors), but if it does, we need to - // handle it - // and it probably won't be recoverable - logger.error( - "Server unresponsiveness detection task failed: " + error.getMessage(), error); - - closeStreamAsync( - true, - Optional.of( - new ZerobusException( - "Server unresponsiveness detection task failed: " + error.getMessage(), - error))); - }, - zerobusStreamExecutor); - } - - // Task that consumes records from recordsQueuedForSending and sends them to the server - // This task is restarted each time stream is recovered/restarted - private BackgroundTask recordsSenderTask; - - private void initRecordsSenderTask() { - recordsSenderTask = - new BackgroundTask( - cancellationToken -> { - // Check if there are records to send - Optional recordRequest; - synchronized (ZerobusStream.this) { - switch (state) { - case OPENED: - case FLUSHING: - if (recordsQueuedForSending.isEmpty()) { - try { - ZerobusStream.this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - recordRequest = Optional.empty(); - } else { - try { - recordRequest = Optional.of(recordsQueuedForSending.take()); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - recordRequest = Optional.empty(); - } - } - break; - case CLOSED: - if (recordsQueuedForSending.isEmpty()) { - recordRequest = Optional.empty(); - } else { - try { - recordRequest = Optional.of(recordsQueuedForSending.take()); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - recordRequest = Optional.empty(); - } - } - break; - default: - recordRequest = Optional.empty(); - break; - } - } - - // If we have a record, wait for stream to be ready and send it - if (recordRequest.isPresent()) { - if (stream.isPresent()) { - ClientCallStreamObserver strm = stream.get(); - // Wait for stream to be ready - synchronized (ZerobusStream.this) { - while (!strm.isReady() && !cancellationToken.isDone()) { - try { - ZerobusStream.this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - } - if (!cancellationToken.isDone()) { - // Send the record - try { - sendMessage(recordRequest.get()); - streamFailureInfo.resetFailure(StreamFailureType.SENDING_MESSAGE); - } catch (Exception ex) { - logger.error("Error while sending record: " + ex.getMessage(), ex); - - // Use async to avoid deadlock: handleStreamFailed() may call closeStream() - // which waits for this task to stop. - handleStreamFailedAsync(StreamFailureType.SENDING_MESSAGE, Optional.of(ex)); - - // Wait for state change before continuing. This prevents repeatedly - // attempting - // to send the next record which would likely fail with the same error. - // The task will be restarted after recovery (or shut down if recovery fails). - synchronized (ZerobusStream.this) { - while ((state == StreamState.OPENED || state == StreamState.FLUSHING) - && !cancellationToken.isDone()) { - try { - ZerobusStream.this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - } - } - } - } - } - // No record available, continue to next iteration - }, - error -> { - // This should never happen (task won't throw any errors), but if it does, we need to - // handle it - // and it probably won't be recoverable - logger.error("Records sender task failed: " + error.getMessage(), error); - - closeStreamAsync( - true, - Optional.of( - new ZerobusException( - "Records sender task failed: " + error.getMessage(), error))); - }, - zerobusStreamExecutor); - } - - private ClientResponseObserver ackReceiver; - - private void initAckReceiver() { - ackReceiver = - new ClientResponseObserver() { - // Track state for the receiver - private Optional ackReceiverStreamId = Optional.empty(); - - @Override - public void beforeStart(ClientCallStreamObserver requestStream) { - requestStream.setOnReadyHandler( - () -> { - synchronized (ZerobusStream.this) { - ZerobusStream.this.notifyAll(); - } - }); - } - - @Override - public void onNext(EphemeralStreamResponse response) { - switch (response.getPayloadCase()) { - // *** Create stream response *** - case CREATE_STREAM_RESPONSE: - ackReceiverStreamId = - Optional.of( - response.getCreateStreamResponse().getStreamId().isEmpty() - ? null - : response.getCreateStreamResponse().getStreamId()); - if (!ackReceiverStreamId.isPresent() || ackReceiverStreamId.get() == null) { - throw new RuntimeException( - new ZerobusException("Invalid response from server: stream id is missing")); - } - logger.debug("Stream created with id " + ackReceiverStreamId.get()); - streamCreatedEvent.get().complete(ackReceiverStreamId.get()); - break; - - // *** Ingest record response (durability ack) *** - case INGEST_RECORD_RESPONSE: - String streamIdForReceiver = - ackReceiverStreamId.orElseThrow( - () -> - new RuntimeException( - new ZerobusException( - "Invalid response from server: expected stream id but got record ack"))); - long ackedOffsetId = - response.getIngestRecordResponse().getDurabilityAckUpToOffset(); - logger.debug("Acked offset " + ackedOffsetId); - - synchronized (ZerobusStream.this) { - - // Edge case: Stream was recovered/recreated while ack was in flight. - // Ignore stale acks from old stream to avoid incorrectly completing promises. - if (!streamId.isPresent() || !streamIdForReceiver.equals(streamId.get())) { - return; - } - - // Receiving an ack proves the server is responsive and connection is healthy - streamFailureInfo.resetFailure(StreamFailureType.SERVER_CLOSED_STREAM); - streamFailureInfo.resetFailure(StreamFailureType.SERVER_UNRESPONSIVE); - - latestRespondedOffsetId = Math.max(latestRespondedOffsetId, ackedOffsetId); - - // Complete promises for all records up to and including the acked offset. - // Server guarantees durability for all records <= ackedOffsetId. - boolean processingDone = false; - while (!processingDone) { - if (inflightRecords.isEmpty()) { - processingDone = true; - } else { - Record record = inflightRecords.peek(); - - if (record.offsetId > ackedOffsetId) { - // This record hasn't been acked yet - processingDone = true; - } else { - record.ackPromise.complete(null); - try { - inflightRecords.take(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - } - } - - ZerobusStream.this.notifyAll(); - } - - // Invoke user callback asynchronously to avoid blocking the gRPC receiver thread. - // Exceptions in user code should not affect stream operation. - if (options.ackCallback().isPresent()) { - CompletableFuture.runAsync( - () -> { - options.ackCallback().get().accept(response.getIngestRecordResponse()); - }, - ec) - .exceptionally( - e -> { - logger.error( - "Exception in async ack_callback for offset " - + response - .getIngestRecordResponse() - .getDurabilityAckUpToOffset(), - e); - return null; - }); - } - break; - - // *** Close stream signal *** - case CLOSE_STREAM_SIGNAL: - if (options.recovery()) { - double durationMs = 0.0; - if (response.getCloseStreamSignal().hasDuration()) { - durationMs = - response.getCloseStreamSignal().getDuration().getSeconds() * 1000.0 - + response.getCloseStreamSignal().getDuration().getNanos() / 1000000.0; - } - logger.info( - String.format( - "Server will close the stream in %.3fms. Triggering stream recovery.", - durationMs)); - streamFailureInfo.resetFailure(StreamFailureType.SERVER_CLOSED_STREAM); - handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); - } - break; - - // *** Unknown response *** - default: - throw new RuntimeException(new ZerobusException("Invalid response from server")); - } - } - - @Override - public void onError(Throwable t) { - synchronized (ZerobusStream.this) { - if (state == StreamState.CLOSED && !stream.isPresent()) { - logger.debug("Ignoring error on already closed stream: " + t.getMessage()); - return; - } - } - - Optional error = Optional.of(t); - - if (t instanceof StatusRuntimeException) { - Status.Code code = ((StatusRuntimeException) t).getStatus().getCode(); - if (GrpcErrorHandling.isNonRetriable(code)) { - error = - Optional.of( - new NonRetriableException( - "Non-retriable gRPC error: " + ((StatusRuntimeException) t).getStatus(), - t)); - } - } - - handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, error); - } - - @Override - public void onCompleted() { - logger.debug("Server called close on the stream"); - handleStreamFailed(StreamFailureType.SERVER_CLOSED_STREAM, Optional.empty()); - } - }; - } - - private void sendMessage(EphemeralStreamRequest message) throws Exception { - stream.get().onNext(message); - } - - /** - * Ingests a record into the stream. - * - * @param record The record to ingest. - * @return A CompletableFuture that completes when the server acknowledges the record has been - * durably stored. If the future raises an exception, the record most probably was not - * acknowledged, but it is also possible that the server acknowledged the record but the - * response was lost. In this case client should decide whether to retry the record or not. - * @throws ZerobusException if the stream is not in a valid state for ingestion - */ - public CompletableFuture ingestRecord(RecordType record) throws ZerobusException { - CompletableFuture durabilityPromise = new CompletableFuture<>(); - - synchronized (this) { - // Wait until there is space in the queue - boolean recordQueueFull = true; - while (recordQueueFull) { - switch (state) { - case RECOVERING: - case FLUSHING: - logger.debug( - "Ingest record: Waiting for stream " - + streamId.orElse("") - + " to finish recovering/flushing"); - try { - this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - durabilityPromise.completeExceptionally( - new ZerobusException("Interrupted while waiting for stream", e)); - return durabilityPromise; - } - break; - case FAILED: - case CLOSED: - case UNINITIALIZED: - logger.error( - "Cannot ingest record when stream is closed or not opened for stream ID " - + streamId.orElse("unknown")); - throw new ZerobusException( - "Cannot ingest record when stream is closed or not opened for stream ID " - + streamId.orElse("unknown")); - case OPENED: - if (inflightRecords.remainingCapacity() > 0) { - recordQueueFull = false; - } else { - logger.debug("Ingest record: Waiting for space in the queue"); - try { - this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - durabilityPromise.completeExceptionally( - new ZerobusException("Interrupted while waiting for space in queue", e)); - return durabilityPromise; - } - } - break; - } - } - - ByteString protoEncodedRecord = ByteString.copyFrom(record.toByteArray()); - lastSentOffsetId += 1; - long offsetId = lastSentOffsetId; - - try { - inflightRecords.put(new Record<>(offsetId, record, protoEncodedRecord, durabilityPromise)); - - recordsQueuedForSending.put( - EphemeralStreamRequest.newBuilder() - .setIngestRecord( - IngestRecordRequest.newBuilder() - .setOffsetId(offsetId) - .setProtoEncodedRecord(protoEncodedRecord) - .build()) - .build()); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - durabilityPromise.completeExceptionally( - new ZerobusException("Interrupted while enqueuing record", e)); - return durabilityPromise; - } - - this.notifyAll(); - } - - return durabilityPromise; - } - - /** - * Flushes the stream, waiting for all queued records to be acknowledged by the server. The stream - * doesn't close after flushing. - * - * @throws ZerobusException If the stream is not opened. - */ - public void flush() throws ZerobusException { - synchronized (this) { - logger.debug("Flushing stream ..."); - - try { - if (state == StreamState.UNINITIALIZED) { - logger.error("Cannot flush stream when it is not opened"); - throw new ZerobusException("Cannot flush stream when it is not opened"); - } - - while (state == StreamState.RECOVERING) { - logger.debug("Flushing stream: Waiting for stream to finish recovering"); - try { - this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new ZerobusException("Error while flushing stream", e); - } - } - - if (state == StreamState.OPENED) { - setState(StreamState.FLUSHING); - } - - long startTime = System.currentTimeMillis(); - - boolean recordsFlushed = false; - while (!recordsFlushed) { - if (state == StreamState.FAILED) { - logger.error("Stream failed, cannot flush"); - throw new ZerobusException("Stream failed, cannot flush"); - } else { - if (inflightRecords.isEmpty()) { - recordsFlushed = true; - } else { - long remainingTime = - options.flushTimeoutMs() - (System.currentTimeMillis() - startTime); - - if (remainingTime <= 0) { - logger.error("Flushing stream timed out"); - throw new ZerobusException("Flushing stream timed out"); - } - - try { - logger.debug("Waiting for " + remainingTime + "ms to flush stream ..."); - this.wait(remainingTime); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - logger.error("Error while flushing stream: " + e.getMessage(), e); - throw new ZerobusException("Error while flushing stream", e); - } - } - } - } - - if (!inflightRecords.isEmpty()) { - logger.error("Flushing stream timed out"); - throw new ZerobusException("Flushing stream timed out"); - } - - logger.info("All records have been flushed"); - } finally { - if (state == StreamState.FLUSHING) { - setState(StreamState.OPENED); - } - } - } - } - - /** - * Closes the stream, while first flushing all queued records. Once a stream is closed, it cannot - * be reopened. - * - * @throws ZerobusException If the stream is not opened. - */ - public void close() throws ZerobusException { - boolean readyToClose = false; - synchronized (this) { - while (!readyToClose) { - switch (state) { - case UNINITIALIZED: - logger.error("Cannot close stream when it is not opened"); - throw new ZerobusException("Cannot close stream when it is not opened"); - case FAILED: - logger.error("Stream failed and cannot be gracefully closed"); - throw new ZerobusException("Stream failed and cannot be gracefully closed"); - case CLOSED: - // Idempotent operation - logger.debug("Close stream: Stream is already closed"); - return; - case FLUSHING: - case RECOVERING: - // Wait until the stream is flushed or recovering - logger.debug("Close stream: Waiting for stream to finish flushing/recovering"); - try { - this.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - break; - case OPENED: - setState(StreamState.CLOSED); - readyToClose = true; - break; - } - } - } - - Optional receivedException = Optional.empty(); - try { - flush(); - } catch (ZerobusException ex) { - // Case 1: The exception is already the type we want. - receivedException = Optional.of(ex); - throw ex; // Re-throw the original exception. - } catch (Exception otherEx) { - // Case 2: Any other non-fatal exception. - // Wrap the unexpected exception in a new ZerobusException. - ZerobusException wrappedEx = new ZerobusException("Underlying failure during flush", otherEx); - receivedException = Optional.of(wrappedEx); - throw wrappedEx; - } finally { - closeStream(true, receivedException); - } - - logger.info("Stream gracefully closed"); - } - - public ZerobusStream( - Supplier stubSupplier, - TableProperties tableProperties, - ZerobusSdkStubFactory stubFactory, - String serverEndpoint, - String workspaceId, - String unityCatalogEndpoint, - String clientId, - String clientSecret, - StreamConfigurationOptions options, - ExecutorService zerobusStreamExecutor, - ExecutorService ec) { - this.stub = null; - this.stubSupplier = stubSupplier; - this.tableProperties = tableProperties; - this.stubFactory = stubFactory; - this.serverEndpoint = serverEndpoint; - this.workspaceId = workspaceId; - this.unityCatalogEndpoint = unityCatalogEndpoint; - this.clientId = clientId; - this.clientSecret = clientSecret; - this.options = options; - this.zerobusStreamExecutor = zerobusStreamExecutor; - this.ec = ec; - - this.recordsQueuedForSending = new ArrayBlockingQueue<>(options.maxInflightRecords()); - this.inflightRecords = new ArrayBlockingQueue<>(options.maxInflightRecords()); - this.descriptorProto = tableProperties.getDescriptorProto(); - - // Initialize background tasks and observers - initServerUnresponsivenessDetectionTask(); - initRecordsSenderTask(); - initAckReceiver(); - } -} diff --git a/src/main/java/com/databricks/zerobus/tools/GenerateProto.java b/src/main/java/com/databricks/zerobus/tools/GenerateProto.java deleted file mode 100644 index e83c14c..0000000 --- a/src/main/java/com/databricks/zerobus/tools/GenerateProto.java +++ /dev/null @@ -1,706 +0,0 @@ -package com.databricks.zerobus.tools; - -import java.io.BufferedReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.net.HttpURLConnection; -import java.net.URL; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.Base64; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Generate proto2 file from Unity Catalog table schema. - * - *

    This tool fetches table schema from Unity Catalog and generates a corresponding proto2 - * definition file. It supports all Delta data types and maps them to appropriate Protocol Buffer - * types. - * - *

    Usage: java GenerateProto --uc-endpoint <endpoint> --client-id <id> - * --client-secret <secret> --table <catalog.schema.table> --output <output.proto> - * [--proto-msg <message_name>] - * - *

    Type mappings: INT -> int32 STRING -> string FLOAT -> float LONG/BIGINT -> int64 - * SHORT/SMALLINT -> int32 DOUBLE -> double BOOLEAN -> bool BINARY -> bytes DATE -> - * int32 TIMESTAMP -> int64 ARRAY<type> -> repeated type MAP<key_type, value_type> - * -> map<key_type, value_type> - */ -public class GenerateProto { - - private static final String USAGE = - "Usage: java GenerateProto \n" - + " --uc-endpoint Unity Catalog endpoint URL\n" - + " --client-id OAuth client ID\n" - + " --client-secret OAuth client secret\n" - + " --table Full table name\n" - + " --output Output path for proto file\n" - + " [--proto-msg ] Name of protobuf message (defaults to table name)\n" - + "\n" - + "Examples:\n" - + " java GenerateProto \\\n" - + " --uc-endpoint \"https://your-workspace.cloud.databricks.com\" \\\n" - + " --client-id \"your-client-id\" \\\n" - + " --client-secret \"your-client-secret\" \\\n" - + " --table \"catalog.schema.table_name\" \\\n" - + " --proto-msg \"TableMessage\" \\\n" - + " --output \"output.proto\"\n" - + "\n" - + "Type mappings:\n" - + " Delta -> Proto2\n" - + " INT -> int32\n" - + " STRING -> string\n" - + " FLOAT -> float\n" - + " LONG -> int64\n" - + " SHORT -> int32\n" - + " DOUBLE -> double\n" - + " BOOLEAN -> bool\n" - + " BINARY -> bytes\n" - + " DATE -> int32\n" - + " TIMESTAMP -> int64\n" - + " ARRAY -> repeated type\n" - + " MAP -> map\n"; - - public static void main(String[] args) { - try { - Args parsedArgs = parseArgs(args); - run(parsedArgs); - System.out.println("Successfully generated proto file at: " + parsedArgs.output); - System.exit(0); - } catch (IllegalArgumentException e) { - System.err.println("Error: " + e.getMessage()); - System.err.println(); - System.err.println(USAGE); - System.exit(1); - } catch (Exception e) { - System.err.println("Error: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - } - - private static void run(Args args) throws Exception { - // Get OAuth token - String token = getOAuthToken(args.ucEndpoint, args.clientId, args.clientSecret); - - // Fetch table information from Unity Catalog - Map tableInfo = fetchTableInfo(args.ucEndpoint, token, args.table); - - // Extract column information - List> columns = extractColumns(tableInfo); - - // Determine message name - String messageName = args.protoMsg != null ? args.protoMsg : args.table.split("\\.")[2]; - - // Generate proto file - generateProtoFile(messageName, columns, args.output); - } - - private static Args parseArgs(String[] args) { - Args result = new Args(); - - for (int i = 0; i < args.length; i++) { - String arg = args[i]; - if (arg.startsWith("--")) { - String key = arg.substring(2); - if (i + 1 >= args.length) { - throw new IllegalArgumentException("Missing value for argument: " + arg); - } - String value = args[++i]; - - switch (key) { - case "uc-endpoint": - result.ucEndpoint = value; - break; - case "client-id": - result.clientId = value; - break; - case "client-secret": - result.clientSecret = value; - break; - case "table": - result.table = value; - break; - case "output": - result.output = value; - break; - case "proto-msg": - result.protoMsg = value; - break; - default: - throw new IllegalArgumentException("Unknown argument: " + arg); - } - } - } - - // Validate required arguments - if (result.ucEndpoint == null) { - throw new IllegalArgumentException("Missing required argument: --uc-endpoint"); - } - if (result.clientId == null) { - throw new IllegalArgumentException("Missing required argument: --client-id"); - } - if (result.clientSecret == null) { - throw new IllegalArgumentException("Missing required argument: --client-secret"); - } - if (result.table == null) { - throw new IllegalArgumentException("Missing required argument: --table"); - } - if (result.output == null) { - throw new IllegalArgumentException("Missing required argument: --output"); - } - - return result; - } - - /** - * Obtains an OAuth token using client credentials flow. - * - *

    This method uses basic OAuth 2.0 client credentials flow without resource or authorization - * details. - * - * @param ucEndpoint The Unity Catalog endpoint URL - * @param clientId The OAuth client ID - * @param clientSecret The OAuth client secret - * @return The OAuth access token (JWT) - * @throws Exception if the token request fails - */ - private static String getOAuthToken(String ucEndpoint, String clientId, String clientSecret) - throws Exception { - String urlString = ucEndpoint + "/oidc/v1/token"; - - // Build OAuth 2.0 client credentials request with minimal scope - String formData = "grant_type=client_credentials&scope=all-apis"; - - // Encode credentials for HTTP Basic authentication - String credentials = - Base64.getEncoder() - .encodeToString((clientId + ":" + clientSecret).getBytes(StandardCharsets.UTF_8)); - - HttpURLConnection connection = (HttpURLConnection) new URL(urlString).openConnection(); - connection.setRequestMethod("POST"); - connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - connection.setRequestProperty("Authorization", "Basic " + credentials); - connection.setDoOutput(true); - - OutputStreamWriter writer = - new OutputStreamWriter(connection.getOutputStream(), StandardCharsets.UTF_8); - writer.write(formData); - writer.close(); - - int responseCode = connection.getResponseCode(); - - if (responseCode != 200) { - String errorBody = readStream(connection.getErrorStream()); - throw new IOException("OAuth request failed with status " + responseCode + ": " + errorBody); - } - - String responseBody = readStream(connection.getInputStream()); - - // Extract access token using regex to avoid dependency on a JSON library - Pattern accessTokenPattern = Pattern.compile("\"access_token\"\\s*:\\s*\"([^\"]+)\""); - Matcher matcher = accessTokenPattern.matcher(responseBody); - - if (matcher.find()) { - return matcher.group(1); - } else { - throw new IOException("No access token received from OAuth response"); - } - } - - /** - * Fetch table information from Unity Catalog. - * - * @param endpoint Base URL of the Unity Catalog endpoint - * @param token Authentication token - * @param table Table identifier (catalog.schema.table) - * @return The parsed table information as a Map - * @throws Exception If the HTTP request fails - */ - @SuppressWarnings("unchecked") - private static Map fetchTableInfo(String endpoint, String token, String table) - throws Exception { - String encodedTable = URLEncoder.encode(table, "UTF-8"); - String urlString = endpoint + "/api/2.1/unity-catalog/tables/" + encodedTable; - - HttpURLConnection connection = (HttpURLConnection) new URL(urlString).openConnection(); - connection.setRequestMethod("GET"); - connection.setRequestProperty("Authorization", "Bearer " + token); - connection.setRequestProperty("Content-Type", "application/json"); - - int responseCode = connection.getResponseCode(); - - if (responseCode != 200) { - String errorBody = readStream(connection.getErrorStream()); - throw new IOException( - "Failed to fetch table info with status " + responseCode + ": " + errorBody); - } - - String responseBody = readStream(connection.getInputStream()); - return (Map) parseJson(responseBody); - } - - /** - * Extract column information from the table schema. - * - * @param tableInfo Raw table information from Unity Catalog - * @return List of column information maps - * @throws IllegalArgumentException If the expected schema structure is not found - */ - @SuppressWarnings("unchecked") - private static List> extractColumns(Map tableInfo) { - if (!tableInfo.containsKey("columns")) { - throw new IllegalArgumentException("No columns found in table info"); - } - return (List>) tableInfo.get("columns"); - } - - /** - * Map Unity Catalog column types to proto2 field information. - * - * @param columnType The Unity Catalog column type - * @param nullable Whether the column is nullable - * @return Array containing [field_modifier, proto_type] - * @throws IllegalArgumentException If the column type is not supported - */ - private static String[] getProtoFieldInfo(String columnType, boolean nullable) { - String upperType = columnType.toUpperCase(); - - // Basic type mapping - String protoType = null; - switch (upperType) { - case "SMALLINT": - case "INT": - case "SHORT": - case "DATE": - protoType = "int32"; - break; - case "BIGINT": - case "LONG": - case "TIMESTAMP": - protoType = "int64"; - break; - case "STRING": - protoType = "string"; - break; - case "FLOAT": - protoType = "float"; - break; - case "DOUBLE": - protoType = "double"; - break; - case "BOOLEAN": - protoType = "bool"; - break; - case "BINARY": - protoType = "bytes"; - break; - } - - if (protoType != null) { - return new String[] {nullable ? "optional" : "required", protoType}; - } - - // VARCHAR types - if (upperType.startsWith("VARCHAR")) { - return new String[] {nullable ? "optional" : "required", "string"}; - } - - // Array types - Pattern arrayPattern = Pattern.compile("^ARRAY<(.+)>$"); - Matcher arrayMatcher = arrayPattern.matcher(upperType); - if (arrayMatcher.matches()) { - String elementType = arrayMatcher.group(1).trim(); - String elementProtoType = getBasicProtoType(elementType); - if (elementProtoType == null) { - throw new IllegalArgumentException("Unsupported array element type: " + elementType); - } - return new String[] {"repeated", elementProtoType}; - } - - // Map types - Pattern mapPattern = Pattern.compile("^MAP<(.+),(.+)>$"); - Matcher mapMatcher = mapPattern.matcher(upperType); - if (mapMatcher.matches()) { - String keyType = mapMatcher.group(1).trim(); - String valueType = mapMatcher.group(2).trim(); - - String keyProtoType = getBasicProtoType(keyType); - if (keyProtoType == null) { - throw new IllegalArgumentException("Unsupported map key type: " + keyType); - } - - String valueProtoType = getBasicProtoType(valueType); - if (valueProtoType == null) { - throw new IllegalArgumentException("Unsupported map value type: " + valueType); - } - - return new String[] {"", "map<" + keyProtoType + ", " + valueProtoType + ">"}; - } - - throw new IllegalArgumentException("Unsupported column type: " + columnType); - } - - /** - * Get basic proto type mapping for simple types. - * - * @param type The Unity Catalog type - * @return The proto type or null if not a basic type - */ - private static String getBasicProtoType(String type) { - String upperType = type.toUpperCase(); - switch (upperType) { - case "SMALLINT": - case "INT": - case "SHORT": - case "DATE": - return "int32"; - case "BIGINT": - case "LONG": - case "TIMESTAMP": - return "int64"; - case "STRING": - return "string"; - case "FLOAT": - return "float"; - case "DOUBLE": - return "double"; - case "BOOLEAN": - return "bool"; - case "BINARY": - return "bytes"; - default: - return null; - } - } - - /** - * Generate a proto2 file from the column information. - * - * @param messageName Name of the protobuf message - * @param columns List of column information maps - * @param outputPath Path where to write the proto file - * @throws IOException If the file cannot be written - */ - @SuppressWarnings("unchecked") - private static void generateProtoFile( - String messageName, List> columns, String outputPath) throws IOException { - StringBuilder protoContent = new StringBuilder(); - protoContent.append("syntax = \"proto2\";\n"); - protoContent.append("\n"); - protoContent.append("message ").append(messageName).append(" {\n"); - - // Add fields - int fieldNumber = 1; - for (Map col : columns) { - String fieldName = (String) col.get("name"); - String typeText = (String) col.get("type_text"); - boolean nullable = (Boolean) col.get("nullable"); - - String[] fieldInfo = getProtoFieldInfo(typeText, nullable); - String fieldModifier = fieldInfo[0]; - String protoType = fieldInfo[1]; - - if (fieldModifier.isEmpty()) { - // Map type (no modifier) - protoContent - .append(" ") - .append(protoType) - .append(" ") - .append(fieldName) - .append(" = ") - .append(fieldNumber) - .append(";\n"); - } else { - // Regular field or repeated field - protoContent - .append(" ") - .append(fieldModifier) - .append(" ") - .append(protoType) - .append(" ") - .append(fieldName) - .append(" = ") - .append(fieldNumber) - .append(";\n"); - } - fieldNumber++; - } - - protoContent.append("}\n"); - - // Write to file - try (FileWriter writer = new FileWriter(outputPath)) { - writer.write(protoContent.toString()); - } - } - - /** Helper method to read an input stream to a string. */ - private static String readStream(java.io.InputStream stream) throws IOException { - if (stream == null) { - return "No error details available"; - } - BufferedReader reader = - new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); - StringBuilder builder = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) { - builder.append(line).append("\n"); - } - reader.close(); - return builder.toString(); - } - - /** - * Simple JSON parser for basic objects and arrays. This avoids adding a JSON library dependency. - */ - private static Object parseJson(String json) { - return new SimpleJsonParser(json).parse(); - } - - /** Simple command-line arguments holder. */ - private static class Args { - String ucEndpoint; - String clientId; - String clientSecret; - String table; - String output; - String protoMsg; - } - - /** - * A minimal JSON parser that can handle basic objects, arrays, strings, numbers, booleans, and - * nulls. This is sufficient for parsing Unity Catalog API responses without adding a dependency. - */ - private static class SimpleJsonParser { - private final String json; - private int pos = 0; - - SimpleJsonParser(String json) { - this.json = json.trim(); - } - - Object parse() { - skipWhitespace(); - return parseValue(); - } - - private Object parseValue() { - skipWhitespace(); - char c = peek(); - - if (c == '{') { - return parseObject(); - } else if (c == '[') { - return parseArray(); - } else if (c == '"') { - return parseString(); - } else if (c == 't' || c == 'f') { - return parseBoolean(); - } else if (c == 'n') { - return parseNull(); - } else if (c == '-' || Character.isDigit(c)) { - return parseNumber(); - } else { - throw new IllegalArgumentException("Unexpected character at position " + pos + ": " + c); - } - } - - private Map parseObject() { - Map map = new java.util.HashMap<>(); - consume('{'); - skipWhitespace(); - - if (peek() == '}') { - consume('}'); - return map; - } - - while (true) { - skipWhitespace(); - String key = parseString(); - skipWhitespace(); - consume(':'); - skipWhitespace(); - Object value = parseValue(); - map.put(key, value); - - skipWhitespace(); - char c = peek(); - if (c == '}') { - consume('}'); - break; - } else if (c == ',') { - consume(','); - } else { - throw new IllegalArgumentException("Expected ',' or '}' at position " + pos); - } - } - - return map; - } - - private List parseArray() { - List list = new java.util.ArrayList<>(); - consume('['); - skipWhitespace(); - - if (peek() == ']') { - consume(']'); - return list; - } - - while (true) { - skipWhitespace(); - list.add(parseValue()); - skipWhitespace(); - - char c = peek(); - if (c == ']') { - consume(']'); - break; - } else if (c == ',') { - consume(','); - } else { - throw new IllegalArgumentException("Expected ',' or ']' at position " + pos); - } - } - - return list; - } - - private String parseString() { - consume('"'); - StringBuilder sb = new StringBuilder(); - - while (pos < json.length()) { - char c = json.charAt(pos); - if (c == '"') { - pos++; - return sb.toString(); - } else if (c == '\\') { - pos++; - if (pos >= json.length()) { - throw new IllegalArgumentException("Unterminated string escape"); - } - char escaped = json.charAt(pos); - switch (escaped) { - case '"': - case '\\': - case '/': - sb.append(escaped); - break; - case 'b': - sb.append('\b'); - break; - case 'f': - sb.append('\f'); - break; - case 'n': - sb.append('\n'); - break; - case 'r': - sb.append('\r'); - break; - case 't': - sb.append('\t'); - break; - case 'u': - // Unicode escape - if (pos + 4 >= json.length()) { - throw new IllegalArgumentException("Invalid unicode escape"); - } - String hex = json.substring(pos + 1, pos + 5); - sb.append((char) Integer.parseInt(hex, 16)); - pos += 4; - break; - default: - throw new IllegalArgumentException("Invalid escape character: " + escaped); - } - pos++; - } else { - sb.append(c); - pos++; - } - } - - throw new IllegalArgumentException("Unterminated string"); - } - - private Object parseNumber() { - int start = pos; - if (peek() == '-') { - pos++; - } - - while (pos < json.length() - && (Character.isDigit(json.charAt(pos)) - || json.charAt(pos) == '.' - || json.charAt(pos) == 'e' - || json.charAt(pos) == 'E' - || json.charAt(pos) == '+' - || json.charAt(pos) == '-')) { - pos++; - } - - String numStr = json.substring(start, pos); - if (numStr.contains(".") || numStr.contains("e") || numStr.contains("E")) { - return Double.parseDouble(numStr); - } else { - try { - return Integer.parseInt(numStr); - } catch (NumberFormatException e) { - return Long.parseLong(numStr); - } - } - } - - private Boolean parseBoolean() { - if (json.startsWith("true", pos)) { - pos += 4; - return Boolean.TRUE; - } else if (json.startsWith("false", pos)) { - pos += 5; - return Boolean.FALSE; - } else { - throw new IllegalArgumentException("Invalid boolean at position " + pos); - } - } - - private Object parseNull() { - if (json.startsWith("null", pos)) { - pos += 4; - return null; - } else { - throw new IllegalArgumentException("Invalid null at position " + pos); - } - } - - private char peek() { - if (pos >= json.length()) { - throw new IllegalArgumentException("Unexpected end of JSON"); - } - return json.charAt(pos); - } - - private void consume(char expected) { - char c = peek(); - if (c != expected) { - throw new IllegalArgumentException( - "Expected '" + expected + "' but got '" + c + "' at position " + pos); - } - pos++; - } - - private void skipWhitespace() { - while (pos < json.length() && Character.isWhitespace(json.charAt(pos))) { - pos++; - } - } - } -} diff --git a/src/test/java/com/databricks/zerobus/ZerobusSdkTest.java b/src/test/java/com/databricks/zerobus/ZerobusSdkTest.java deleted file mode 100644 index 6a0ec76..0000000 --- a/src/test/java/com/databricks/zerobus/ZerobusSdkTest.java +++ /dev/null @@ -1,421 +0,0 @@ -package com.databricks.zerobus; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.*; - -import com.databricks.test.table.TestTableRow.CityPopulationTableRow; -import io.grpc.stub.StreamObserver; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.junit.jupiter.MockitoExtension; - -/** - * Test suite for ZerobusSdk with mocked gRPC server. - * - *

    These tests verify the SDK's core functionality including stream creation, record ingestion, - * acknowledgments, and flush operations without requiring a real Zerobus backend server. - * - *

    Best practices followed: - Fast execution (no long sleeps or timeouts) - Clear test names - * describing what is being tested - Proper mock setup and teardown - Testing both success and - * failure paths - Using CompletableFutures for async operations - */ -@ExtendWith(MockitoExtension.class) -public class ZerobusSdkTest { - - private MockedGrpcServer mockedGrpcServer; - private ZerobusGrpc.ZerobusStub zerobusStub; - private ZerobusSdk zerobusSdk; - private ZerobusSdkStubFactory zerobusSdkStubFactory; - private org.mockito.MockedStatic tokenFactoryMock; - private io.grpc.stub.ClientCallStreamObserver spiedStream; - - @BeforeEach - public void setUp() { - // Create mocked gRPC server - mockedGrpcServer = new MockedGrpcServer(); - - // Create mocked stub - zerobusStub = mock(ZerobusGrpc.ZerobusStub.class); - - // Create spy on stub factory - zerobusSdkStubFactory = spy(ZerobusSdkStubFactory.create()); - - // Mock TokenFactory to return a fake token - tokenFactoryMock = mockStatic(TokenFactory.class); - tokenFactoryMock - .when( - () -> - TokenFactory.getZerobusToken( - anyString(), anyString(), anyString(), anyString(), anyString())) - .thenReturn("fake-token-for-testing"); - - // Create ZerobusSdk and set mocked stub factory - zerobusSdk = new ZerobusSdk("localhost:50051", "https://test.cloud.databricks.com"); - zerobusSdk.setStubFactory(zerobusSdkStubFactory); - - // Configure stub factory to return our mocked stub with token supplier - doReturn(zerobusStub) - .when(zerobusSdkStubFactory) - .createStubWithTokenSupplier(anyString(), anyString(), any()); - - // Setup mocked stub's ephemeralStream behavior - doAnswer( - invocation -> { - @SuppressWarnings("unchecked") - StreamObserver ackSender = - (StreamObserver) invocation.getArgument(0); - - mockedGrpcServer.initialize(ackSender); - - // Spy on the message receiver to verify cancel() is called - spiedStream = spy(mockedGrpcServer.getMessageReceiver()); - return spiedStream; - }) - .when(zerobusStub) - .ephemeralStream(any()); - } - - @AfterEach - public void tearDown() { - if (tokenFactoryMock != null) { - tokenFactoryMock.close(); - } - if (mockedGrpcServer != null) { - mockedGrpcServer.destroy(); - } - mockedGrpcServer = null; - zerobusStub = null; - zerobusSdk = null; - zerobusSdkStubFactory = null; - tokenFactoryMock = null; - } - - @Test - public void testSingleRecordIngestAndAcknowledgment() throws Exception { - // Test basic ingestion: send one record and verify it's acknowledged - mockedGrpcServer.injectAckRecord(0); - - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - - assertEquals(StreamState.OPENED, stream.getState()); - - CompletableFuture writeCompleted = - stream.ingestRecord( - CityPopulationTableRow.newBuilder() - .setCityName("test-city") - .setPopulation(1000) - .build()); - - // Wait for acknowledgment - writeCompleted.get(5, TimeUnit.SECONDS); - - // Verify no unacked records - Iterator unackedRecords = stream.getUnackedRecords(); - assertFalse(unackedRecords.hasNext()); - - stream.close(); - assertEquals(StreamState.CLOSED, stream.getState()); - } - - @Test - public void testBatchIngestion() throws Exception { - // Test ingesting multiple records in a batch - int batchSize = 100; - - for (int i = 0; i < batchSize; i++) { - mockedGrpcServer.injectAckRecord(i); - } - - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - assertEquals(StreamState.OPENED, stream.getState()); - - // Send records - List> futures = new ArrayList<>(); - for (int i = 0; i < batchSize; i++) { - futures.add( - stream.ingestRecord( - CityPopulationTableRow.newBuilder() - .setCityName("city-" + i) - .setPopulation(1000 + i) - .build())); - } - - // Wait for all acknowledgments - for (CompletableFuture future : futures) { - future.get(5, TimeUnit.SECONDS); - } - - // Verify all records acknowledged - Iterator unackedRecords = stream.getUnackedRecords(); - assertFalse(unackedRecords.hasNext()); - - stream.close(); - assertEquals(StreamState.CLOSED, stream.getState()); - } - - @Test - public void testFlushWaitsForAllAcknowledgments() throws Exception { - // Test that flush() blocks until all inflight records are acknowledged - int numRecords = 10; - mockedGrpcServer.injectAckRecord(numRecords - 1); - - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - assertEquals(StreamState.OPENED, stream.getState()); - - // Ingest records - for (int i = 0; i < numRecords; i++) { - stream.ingestRecord( - CityPopulationTableRow.newBuilder() - .setCityName("device-" + i) - .setPopulation(20 + i) - .build()); - } - - // Flush should wait for all acks - stream.flush(); - - // Verify no unacked records after flush - Iterator unackedRecords = stream.getUnackedRecords(); - assertFalse(unackedRecords.hasNext()); - - stream.close(); - } - - @Test - public void testEmptyFlushReturnsImmediately() throws Exception { - // Test that flush() on an empty stream returns immediately - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - - assertEquals(StreamState.OPENED, stream.getState()); - - // Measure flush execution time - long startTime = System.currentTimeMillis(); - stream.flush(); - long endTime = System.currentTimeMillis(); - long flushDuration = endTime - startTime; - - assertTrue( - flushDuration < 100, - "Expected flush to return immediately, but took " + flushDuration + "ms"); - - assertEquals(StreamState.OPENED, stream.getState()); - stream.close(); - } - - @Test - public void testAckCallback() throws Exception { - // Test that ack callbacks are invoked for each acknowledgment - List ackedOffsets = Collections.synchronizedList(new ArrayList<>()); - Consumer ackCallback = - response -> ackedOffsets.add(response.getDurabilityAckUpToOffset()); - - int numRecords = 10; - for (int i = 0; i < numRecords; i++) { - mockedGrpcServer.injectAckRecord(i); - } - - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).setAckCallback(ackCallback).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - assertEquals(StreamState.OPENED, stream.getState()); - - // Ingest records - List> futures = new ArrayList<>(); - for (int i = 0; i < numRecords; i++) { - futures.add( - stream.ingestRecord( - CityPopulationTableRow.newBuilder() - .setCityName("test-city-" + i) - .setPopulation(i) - .build())); - } - - // Wait for all records to be acknowledged - for (CompletableFuture future : futures) { - future.get(5, TimeUnit.SECONDS); - } - - stream.flush(); - assertEquals(StreamState.OPENED, stream.getState()); - - // Wait for callbacks to complete - wait until we see the final offset (numRecords - 1) - long deadline = System.currentTimeMillis() + 2000; - boolean foundFinalOffset = false; - while (System.currentTimeMillis() < deadline) { - synchronized (ackedOffsets) { - if (!ackedOffsets.isEmpty() && ackedOffsets.contains((long) (numRecords - 1))) { - foundFinalOffset = true; - break; - } - } - Thread.sleep(10); - } - - // Verify callback was called and final offset was received - assertTrue(foundFinalOffset, "Expected to receive ack for final offset " + (numRecords - 1)); - assertTrue(ackedOffsets.size() > 0, "Expected callback to be called at least once"); - - // Verify the final offset was acknowledged - assertTrue( - ackedOffsets.contains((long) (numRecords - 1)), - "Expected callbacks to include offset " + (numRecords - 1)); - - // Verify unacked records are empty - Iterator unackedRecords = stream.getUnackedRecords(); - assertFalse(unackedRecords.hasNext()); - - stream.close(); - assertEquals(StreamState.CLOSED, stream.getState()); - } - - @Test - public void testCallbackExceptionHandling() throws Exception { - // Test that exceptions in callbacks don't crash the stream - List callbackInvocations = new ArrayList<>(); - List thrownExceptions = new ArrayList<>(); - - Consumer ackCallback = - response -> { - long offsetId = response.getDurabilityAckUpToOffset(); - callbackInvocations.add(offsetId); - - // Throw exception for offset 1 to test error handling - if (offsetId == 1) { - RuntimeException exception = - new RuntimeException("Test exception in callback for offset " + offsetId); - thrownExceptions.add(exception.getMessage()); - throw exception; - } - }; - - int numRecords = 3; - for (int i = 0; i < numRecords; i++) { - mockedGrpcServer.injectAckRecord(i); - } - - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).setAckCallback(ackCallback).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - - assertEquals(StreamState.OPENED, stream.getState()); - - List> ingestResults = new ArrayList<>(); - for (int i = 0; i < numRecords; i++) { - CompletableFuture writeCompleted = - stream.ingestRecord( - CityPopulationTableRow.newBuilder() - .setCityName("error-callback-device-" + i) - .setPopulation(30 + i) - .build()); - ingestResults.add(writeCompleted); - } - - // Wait for all records to be acknowledged (should succeed despite callback exception) - for (CompletableFuture future : ingestResults) { - future.get(5, TimeUnit.SECONDS); - } - - // Wait for callbacks to complete - long deadline = System.currentTimeMillis() + 1000; - while (callbackInvocations.size() < numRecords && System.currentTimeMillis() < deadline) { - Thread.yield(); - } - - // Verify callback was invoked for all acknowledgments (including the one that threw) - assertEquals(numRecords, callbackInvocations.size()); - assertTrue(callbackInvocations.contains(0L)); - assertTrue(callbackInvocations.contains(1L)); - assertTrue(callbackInvocations.contains(2L)); - - // Verify the exception was thrown for offset 1 - assertEquals(1, thrownExceptions.size()); - assertTrue(thrownExceptions.get(0).contains("Test exception in callback for offset 1")); - - // Verify stream remains functional - Iterator unackedRecords = stream.getUnackedRecords(); - assertFalse(unackedRecords.hasNext()); - assertEquals(StreamState.OPENED, stream.getState()); - - stream.close(); - assertEquals(StreamState.CLOSED, stream.getState()); - } - - @Test - public void testGrpcStreamIsCancelledOnClose() throws Exception { - // Test that the underlying gRPC stream is properly cancelled when stream.close() is called - mockedGrpcServer.injectAckRecord(0); - - TableProperties tableProperties = - new TableProperties<>("test-table", CityPopulationTableRow.getDefaultInstance()); - StreamConfigurationOptions options = - StreamConfigurationOptions.builder().setRecovery(false).build(); - - ZerobusStream stream = - zerobusSdk.createStream(tableProperties, "client-id", "client-secret", options).get(); - - assertEquals(StreamState.OPENED, stream.getState()); - - // Ingest one record - CompletableFuture writeCompleted = - stream.ingestRecord( - CityPopulationTableRow.newBuilder() - .setCityName("test-city") - .setPopulation(1000) - .build()); - - writeCompleted.get(5, TimeUnit.SECONDS); - - // Close the stream - stream.close(); - assertEquals(StreamState.CLOSED, stream.getState()); - - // Verify that cancel() was called on the gRPC stream - verify(spiedStream, times(1)).cancel(anyString(), any()); - - // Also verify onCompleted() was called - verify(spiedStream, times(1)).onCompleted(); - } -} diff --git a/src/test/resources/simplelogger.properties b/src/test/resources/simplelogger.properties deleted file mode 100644 index 3ac050e..0000000 --- a/src/test/resources/simplelogger.properties +++ /dev/null @@ -1,20 +0,0 @@ -# SLF4J Simple Logger configuration for tests -# Reduce noise during test execution while keeping error logs visible - -# Default log level for all loggers -org.slf4j.simpleLogger.defaultLogLevel=error - -# Show date/time in logs -org.slf4j.simpleLogger.showDateTime=true -org.slf4j.simpleLogger.dateTimeFormat=HH:mm:ss.SSS - -# Show thread name -org.slf4j.simpleLogger.showThreadName=false - -# Show logger name -org.slf4j.simpleLogger.showLogName=false - -# Only show errors from ZerobusStream during tests -# This suppresses INFO logs like "Stream created successfully" but keeps ERROR logs -org.slf4j.simpleLogger.log.com.databricks.zerobus.ZerobusStream=error -org.slf4j.simpleLogger.log.com.databricks.zerobus.ZerobusSdk=error diff --git a/tools/README.md b/tools/README.md deleted file mode 100644 index 3829d4f..0000000 --- a/tools/README.md +++ /dev/null @@ -1,262 +0,0 @@ -# Generate Proto Tool - -A standalone tool for generating Protocol Buffer (proto2) definition files from Unity Catalog table schemas. - -## Overview - -The `GenerateProto` tool fetches table schema information from Unity Catalog and automatically generates a corresponding `.proto` file with proper type mappings. This is useful when you need to create Protocol Buffer message definitions that match your Delta table schemas for use with the Zerobus SDK. - -The tool is **packaged within the Zerobus SDK JAR**, so users can run it directly after downloading the SDK without needing to clone the repository. - -## Features - -- Fetches table schema directly from Unity Catalog -- Supports all standard Delta data types -- Generates proto2 format files -- Handles complex types (arrays and maps) -- Uses OAuth 2.0 client credentials authentication -- No external dependencies beyond Java standard library -- Packaged in SDK JAR for easy distribution - -## Requirements - -- Java 8 or higher -- Zerobus SDK JAR (built with `mvn package`) -- OAuth client ID and client secret with access to Unity Catalog -- Access to a Unity Catalog endpoint - -## Usage - -### Method 1: Using the Helper Script (Recommended for Development) - -If you have the SDK source repository: - -```bash -# First, build the SDK JAR -mvn package - -# Then run the tool -./tools/generate_proto.sh \ - --uc-endpoint "https://your-workspace.cloud.databricks.com" \ - --client-id "your-client-id" \ - --client-secret "your-client-secret" \ - --table "catalog.schema.table_name" \ - --output "output.proto" \ - --proto-msg "TableMessage" -``` - -### Method 2: Running Directly from the SDK JAR (Recommended for Users) - -If you have downloaded the SDK JAR without the source code: - -```bash -# Using the shaded JAR (includes all dependencies) -java -cp databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ - com.databricks.zerobus.tools.GenerateProto \ - --uc-endpoint "https://your-workspace.cloud.databricks.com" \ - --client-id "your-client-id" \ - --client-secret "your-client-secret" \ - --table "catalog.schema.table_name" \ - --output "output.proto" \ - --proto-msg "TableMessage" -``` - -Or, if the JAR has a Main-Class manifest entry (which it does): - -```bash -# Even simpler - just use -jar flag -java -jar databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ - --uc-endpoint "https://your-workspace.cloud.databricks.com" \ - --client-id "your-client-id" \ - --client-secret "your-client-secret" \ - --table "catalog.schema.table_name" \ - --output "output.proto" \ - --proto-msg "TableMessage" -``` - -## Arguments - -| Argument | Required | Description | -|----------|----------|-------------| -| `--uc-endpoint` | Yes | Unity Catalog endpoint URL (e.g., `https://your-workspace.cloud.databricks.com`) | -| `--client-id` | Yes | OAuth client ID for authentication | -| `--client-secret` | Yes | OAuth client secret for authentication | -| `--table` | Yes | Full table name in format `catalog.schema.table_name` | -| `--output` | Yes | Output path for the generated proto file (e.g., `output.proto`) | -| `--proto-msg` | No | Name of the protobuf message (defaults to the table name) | - -## Type Mappings - -The tool automatically maps Delta/Unity Catalog types to Protocol Buffer types: - -| Delta Type | Proto2 Type | -|------------|-------------| -| `INT`, `SHORT`, `SMALLINT` | `int32` | -| `LONG`, `BIGINT` | `int64` | -| `STRING`, `VARCHAR(n)` | `string` | -| `FLOAT` | `float` | -| `DOUBLE` | `double` | -| `BOOLEAN` | `bool` | -| `BINARY` | `bytes` | -| `DATE` | `int32` | -| `TIMESTAMP` | `int64` | -| `ARRAY` | `repeated type` | -| `MAP` | `map` | - -## Examples - -### Basic Usage - -Generate a proto file for a simple table: - -**From the SDK JAR:** -```bash -java -jar databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ - --uc-endpoint "https://myworkspace.cloud.databricks.com" \ - --client-id "abc123" \ - --client-secret "secret123" \ - --table "my_catalog.my_schema.users" \ - --output "users.proto" -``` - -**Or, if you have the source repository:** -```bash -./tools/generate_proto.sh \ - --uc-endpoint "https://myworkspace.cloud.databricks.com" \ - --client-id "abc123" \ - --client-secret "secret123" \ - --table "my_catalog.my_schema.users" \ - --output "users.proto" -``` - -This might generate: - -```protobuf -syntax = "proto2"; - -message users { - required int32 user_id = 1; - required string username = 2; - optional string email = 3; - required int64 created_at = 4; -} -``` - -### Custom Message Name - -Specify a custom message name: - -```bash -java -jar databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ - --uc-endpoint "https://myworkspace.cloud.databricks.com" \ - --client-id "abc123" \ - --client-secret "secret123" \ - --table "my_catalog.my_schema.events" \ - --output "events.proto" \ - --proto-msg "EventRecord" -``` - -### Complex Types - -The tool handles complex types like arrays and maps: - -```bash -java -jar databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ - --uc-endpoint "https://myworkspace.cloud.databricks.com" \ - --client-id "abc123" \ - --client-secret "secret123" \ - --table "my_catalog.my_schema.products" \ - --output "products.proto" -``` - -If the table has columns like: -- `tags ARRAY` -- `attributes MAP` - -The generated proto will include: -```protobuf -syntax = "proto2"; - -message products { - required int32 product_id = 1; - required string name = 2; - repeated string tags = 3; - map attributes = 4; -} -``` - -## Authentication - -The tool uses OAuth 2.0 client credentials flow to authenticate with Unity Catalog. Unlike the SDK's token generation (which includes resource and authorization details for specific table privileges), this tool uses basic authentication with minimal scope to fetch table metadata. - -The authentication flow: -1. Exchanges client ID and secret for an OAuth token -2. Uses the token to fetch table schema from Unity Catalog API -3. Token is used only for metadata retrieval (read-only operation) - -## Integration with Zerobus SDK - -After generating the `.proto` file: - -1. Place it in your project's proto directory (e.g., `src/main/proto/`) -2. Compile it using the protobuf compiler: - ```bash - protoc --java_out=src/main/java your_proto_file.proto - ``` -3. Use the generated Java classes with the Zerobus SDK: - ```java - TableProperties tableProperties = - new TableProperties<>("catalog.schema.table", YourMessage.getDefaultInstance()); - - ZerobusStream stream = sdk.createStream( - tableProperties, clientId, clientSecret).join(); - ``` - -## Troubleshooting - -### Authentication Errors - -If you receive authentication errors: -- Verify your client ID and secret are correct -- Ensure your OAuth client has access to Unity Catalog -- Check that the endpoint URL is correct - -### Table Not Found - -If the table cannot be found: -- Verify the table name format is `catalog.schema.table` -- Ensure the table exists in Unity Catalog -- Check that your OAuth client has permission to read the table metadata - -### Unsupported Type Errors - -If you encounter unsupported type errors: -- Check if your table uses custom or complex types not listed in the type mappings -- Consider simplifying the column type or manually editing the generated proto file - -## Distribution - -The tool is distributed as part of the Zerobus SDK JAR. When you download or build the SDK, the `GenerateProto` tool is automatically included in the shaded JAR file (`databricks-zerobus-ingest-sdk-*-jar-with-dependencies.jar`). - -Users can run the tool directly from the JAR without needing access to the source code: - -```bash -# Download the SDK JAR (or build it with mvn package) -# Then simply run: -java -jar databricks-zerobus-ingest-sdk-0.1.0-jar-with-dependencies.jar \ - --uc-endpoint "..." \ - --client-id "..." \ - --client-secret "..." \ - --table "..." \ - --output "output.proto" -``` - -## Files - -- `src/main/java/com/databricks/zerobus/tools/GenerateProto.java` - Main tool implementation (packaged in SDK JAR) -- `tools/generate_proto.sh` - Helper script for running from source repository -- `tools/README.md` - This documentation file - -## License - -This tool is part of the Databricks Zerobus SDK for Java. diff --git a/tools/generate_proto.sh b/tools/generate_proto.sh deleted file mode 100755 index 9995232..0000000 --- a/tools/generate_proto.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -# Generate Proto Tool - Helper script for running GenerateProto from the SDK JAR -# -# This script runs the GenerateProto tool to generate proto2 files -# from Unity Catalog table schemas. -# -# The tool is packaged within the Zerobus SDK JAR and can be executed -# directly without needing to clone the repository. - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="${SCRIPT_DIR}/.." -TARGET_DIR="${PROJECT_ROOT}/target" - -# Find the shaded JAR (with dependencies) -SHADED_JAR=$(find "${TARGET_DIR}" -name "databricks-zerobus-ingest-sdk-*-jar-with-dependencies.jar" 2>/dev/null | head -n 1) - -if [ -z "${SHADED_JAR}" ] || [ ! -f "${SHADED_JAR}" ]; then - echo "Error: Zerobus SDK JAR not found in ${TARGET_DIR}" - echo "Please run 'mvn package' first to build the SDK JAR" - exit 1 -fi - -# Run the tool from the JAR -java -cp "${SHADED_JAR}" com.databricks.zerobus.tools.GenerateProto "$@"