From f0fe9d956dc142689ef528d068e407dd9cbb16f2 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Thu, 29 Jan 2026 23:55:04 -0800 Subject: [PATCH] DRC POC Signed-off-by: Yi Li --- api/delta-rest.yaml | 1767 +++++++++++++++++ build.sbt | 111 +- .../spark/auth/CredPropsUtil.java | 115 +- .../unitycatalog/spark/UCSingleCatalog.scala | 424 ++-- .../server/UnityCatalogServer.java | 65 + .../auth/annotation/AuthorizeResourceKey.java | 6 +- .../server/decorator/RpcLoggingDecorator.java | 137 ++ .../server/persist/DeltaCommitRepository.java | 70 +- .../server/persist/TableRepository.java | 521 ++++- .../persist/utils/ExternalLocationUtils.java | 4 +- .../deltarest/DeltaRestCatalogService.java | 1017 ++++++++++ .../deltarest/DeltaRestExceptionHandler.java | 82 + .../deltarest/TableUpdateDeserializer.java | 52 + .../deltacommits/SdkDeltaCommitsCRUDTest.java | 69 + 14 files changed, 4171 insertions(+), 269 deletions(-) create mode 100644 api/delta-rest.yaml create mode 100644 server/src/main/java/io/unitycatalog/server/decorator/RpcLoggingDecorator.java create mode 100644 server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestCatalogService.java create mode 100644 server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestExceptionHandler.java create mode 100644 server/src/main/java/io/unitycatalog/server/service/deltarest/TableUpdateDeserializer.java diff --git a/api/delta-rest.yaml b/api/delta-rest.yaml new file mode 100644 index 0000000000..fc0128f34b --- /dev/null +++ b/api/delta-rest.yaml @@ -0,0 +1,1767 @@ +openapi: 3.1.1 +servers: + - url: "{scheme}://{host}:{port}/api/2.1/unity-catalog/delta-rest/v1" + description: Configurable Delta REST Catalog server + variables: + scheme: + description: The URI scheme (http or https) + default: https + enum: + - http + - https + host: + description: The server host address + default: localhost + port: + description: The server port number + default: "8080" + - url: http://localhost:8080/api/2.1/unity-catalog/delta-rest/v1 + description: Localhost reference server for Delta REST Catalog API +info: + title: Delta REST Catalog API + summary: REST API for Delta Lake table catalog operations + version: '0.1' + description: | + Delta REST Catalog (DRC) API follows the Apache Iceberg REST Catalog (IRC) API style. + Unlike the actual IRC APIs implemented for UniForm and Managed Iceberg tables, this API + is Delta-centric and does not provide a translation layer to make Delta tables work like + Iceberg tables. Clients directly communicate Delta metadata to servers without any translation. + +tags: + - name: Configuration + description: | + Configuration endpoints for getting catalog configuration and supported endpoints. + - name: Tables + description: | + Table operations for Delta tables including CRUD operations, staging tables, and credentials. + - name: Namespaces + description: | + Namespace operations corresponding to Unity Catalog schemas. + - name: Metrics + description: | + Metrics reporting endpoints for telemetry. + +paths: + /config: + get: + tags: + - Configuration + operationId: getConfig + summary: Get catalog configuration + description: | + Get catalog configuration and supported endpoints. The server returns configuration + overrides and a list of supported endpoints that the client can use. + parameters: + - name: catalog + in: query + description: Catalog name + required: true + schema: + type: string + responses: + '200': + description: Configuration retrieved successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CatalogConfig' + example: + overrides: + prefix: "catalogs/my_catalog" + endpoints: + - "POST /catalogs/{catalog}/namespaces/{namespace}/tables" + - "GET /catalogs/{catalog}/namespaces/{namespace}/tables/{table}" + - "POST /catalogs/{catalog}/namespaces/{namespace}/tables/{table}" + - "DELETE /catalogs/{catalog}/namespaces/{namespace}/tables/{table}" + managed-tables-required-features: + - "appendOnly" + - "catalogManaged" + - "deletionVectors" + managed-tables-suggested-features: + - "rowTracking" + - "domainMetadata" + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '500': + $ref: '#/components/responses/InternalServerErrorResponse' + '503': + $ref: '#/components/responses/ServiceUnavailableResponse' + + /catalogs/{catalog}/namespaces/{namespace}/staging-tables: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + post: + tags: + - Tables + operationId: createStagingTable + summary: Create a staging table + description: | + Create a staging table that will become a catalog managed table. The server allocates + a table UUID and a storage location for the table. This is a Delta-specific endpoint. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateStagingTableRequest' + responses: + '200': + description: Staging table created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/StagingTableResponse' + example: + table-id: "123e4567-e89b-12d3-a456-426614174000" + table-type: "MANAGED" + location: "s3://bucket/warehouse/catalog/schema/table" + storage-credentials: + - url: "s3://bucket/warehouse/catalog/schema/table/" + config: + s3.access-key-id: "AK...example" + s3.secret-access-key: "ExampleKey" + s3.session-token: "token" + expiration-time-ms: 1234567890000 + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '409': + $ref: '#/components/responses/ConflictResponse' + '500': + $ref: '#/components/responses/InternalServerErrorResponse' + + /catalogs/{catalog}/namespaces/{namespace}/staging-tables/{table_id}/credentials: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + - name: table_id + in: path + description: Table UUID (not name) - required because name is not unique for staging tables + required: true + schema: + type: string + format: uuid + get: + tags: + - Tables + operationId: getStagingTableCredentials + summary: Get staging table credentials + description: | + Get temporary credentials for writing staging table data. This uses the UUID of the + staging table, not the name, because multiple pending staging tables can share the same name. + responses: + '200': + description: Credentials retrieved successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CredentialsResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + + /catalogs/{catalog}/namespaces/{namespace}/tables: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + post: + tags: + - Tables + operationId: createTable + summary: Create a table + description: | + Create a new Delta table in the specified namespace. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateTableRequest' + responses: + '200': + description: Table created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/LoadTableResponse' + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '409': + $ref: '#/components/responses/ConflictResponse' + get: + tags: + - Tables + operationId: listTables + summary: List tables + description: | + List all tables in a namespace. + parameters: + - name: pageToken + in: query + description: Pagination token + required: false + schema: + type: string + responses: + '200': + description: Tables listed successfully + content: + application/json: + schema: + $ref: '#/components/schemas/ListTablesResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + + /catalogs/{catalog}/namespaces/{namespace}/tables/{table}: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + - $ref: '#/components/parameters/table' + get: + tags: + - Tables + operationId: loadTable + summary: Load table metadata + description: | + Load table metadata including schema, protocol, properties, and optionally credentials. + parameters: + - name: with_credentials + in: query + description: If true, include temporary storage credentials in the response + required: false + schema: + type: boolean + default: false + responses: + '200': + description: Table loaded successfully + content: + application/json: + schema: + $ref: '#/components/schemas/LoadTableResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + post: + tags: + - Tables + operationId: updateTable + summary: Update table + description: | + Update table properties, schema, protocol, or commit Delta changes. + This endpoint corresponds to the CCv2 commit specification. + Unlike IRC, this endpoint does not support table creation. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateTableRequest' + responses: + '200': + description: Table updated successfully + content: + application/json: + schema: + $ref: '#/components/schemas/LoadTableResponse' + '400': + description: Bad request or requirement not met + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + '409': + $ref: '#/components/responses/ConflictResponse' + delete: + tags: + - Tables + operationId: deleteTable + summary: Delete a table + description: | + Delete a table from the catalog. + responses: + '204': + description: Table deleted successfully + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + head: + tags: + - Tables + operationId: tableExists + summary: Check if table exists + description: | + Check if the specified table exists. + responses: + '204': + description: Table exists + '404': + description: Table does not exist + + /catalogs/{catalog}/namespaces/{namespace}/tables/{table}/credentials: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + - $ref: '#/components/parameters/table' + get: + tags: + - Tables + operationId: getTableCredentials + summary: Get table credentials + description: | + Get temporary credentials for accessing table data (vended credentials). + responses: + '200': + description: Credentials retrieved successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CredentialsResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + + /catalogs/{catalog}/tables/rename: + parameters: + - $ref: '#/components/parameters/catalog' + post: + tags: + - Tables + operationId: renameTable + summary: Rename a table + description: | + Rename a table. Can also move between namespaces within the same catalog. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RenameTableRequest' + responses: + '204': + description: Table renamed successfully + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + description: Source table not found + '409': + $ref: '#/components/responses/ConflictResponse' + + /temporary-path-credentials: + get: + tags: + - Tables + operationId: getTemporaryPathCredentials + summary: Get temporary path credentials + description: | + Get temporary credentials of a storage path for creating a new external table. + This path will later be registered in UC as a real external table. + This is a Delta-specific endpoint. Note: no {prefix} as this is not part of a catalog. + parameters: + - name: location + in: query + description: Storage path for the external table + required: true + schema: + type: string + - name: operation + in: query + description: Operation type + required: true + schema: + type: string + enum: + - UNKNOWN_PATH_OPERATION + - PATH_READ + - PATH_READ_WRITE + - PATH_CREATE_TABLE + responses: + '200': + description: Credentials retrieved successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CredentialsResponse' + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + + /catalogs/{catalog}/namespaces: + parameters: + - $ref: '#/components/parameters/catalog' + get: + tags: + - Namespaces + operationId: listNamespaces + summary: List namespaces + description: | + List all namespaces (schemas) in a catalog. + parameters: + - name: pageToken + in: query + description: Pagination token + required: false + schema: + type: string + responses: + '200': + description: Namespaces listed successfully + content: + application/json: + schema: + $ref: '#/components/schemas/ListNamespacesResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + post: + tags: + - Namespaces + operationId: createNamespace + summary: Create a namespace + description: | + Create a new namespace (schema) in the catalog. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateNamespaceRequest' + responses: + '200': + description: Namespace created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/NamespaceResponse' + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '409': + $ref: '#/components/responses/ConflictResponse' + + /catalogs/{catalog}/namespaces/{namespace}: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + get: + tags: + - Namespaces + operationId: loadNamespace + summary: Load namespace metadata + description: | + Load namespace (schema) metadata. + responses: + '200': + description: Namespace loaded successfully + content: + application/json: + schema: + $ref: '#/components/schemas/NamespaceResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + delete: + tags: + - Namespaces + operationId: deleteNamespace + summary: Delete a namespace + description: | + Delete a namespace (schema) from the catalog. + responses: + '204': + description: Namespace deleted successfully + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + head: + tags: + - Namespaces + operationId: namespaceExists + summary: Check if namespace exists + description: | + Check if the specified namespace exists. + responses: + '204': + description: Namespace exists + '404': + description: Namespace does not exist + + /catalogs/{catalog}/namespaces/{namespace}/properties: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + post: + tags: + - Namespaces + operationId: updateNamespaceProperties + summary: Update namespace properties + description: | + Update namespace (schema) properties. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateNamespacePropertiesRequest' + responses: + '200': + description: Properties updated successfully + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateNamespacePropertiesResponse' + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + + /catalogs/{catalog}/namespaces/{namespace}/tables/{table}/metrics: + parameters: + - $ref: '#/components/parameters/catalog' + - $ref: '#/components/parameters/namespace' + - $ref: '#/components/parameters/table' + post: + tags: + - Metrics + operationId: reportMetrics + summary: Report query metrics + description: | + Report query metrics (telemetry) for a table. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ReportMetricsRequest' + responses: + '204': + description: Metrics received successfully + '400': + $ref: '#/components/responses/BadRequestErrorResponse' + '401': + $ref: '#/components/responses/UnauthorizedResponse' + '403': + $ref: '#/components/responses/ForbiddenResponse' + '404': + $ref: '#/components/responses/NotFoundResponse' + +components: + parameters: + catalog: + name: catalog + in: path + description: Catalog name + required: true + schema: + type: string + namespace: + name: namespace + in: path + description: Schema/namespace name + required: true + schema: + type: string + table: + name: table + in: path + description: Table name + required: true + schema: + type: string + + schemas: + # Configuration schemas + CatalogConfig: + type: object + properties: + overrides: + type: object + description: Configuration overrides including the prefix to use for subsequent requests + properties: + prefix: + type: string + description: The prefix to use in subsequent API calls (e.g., "catalogs/my_catalog") + additionalProperties: + type: string + endpoints: + type: array + description: List of supported endpoints + items: + type: string + managed-tables-required-features: + type: array + description: | + Server requires that at least these Delta table features should be enabled + for managed tables + items: + type: string + managed-tables-suggested-features: + type: array + description: | + Server suggests that these Delta table features to be enabled for managed + tables if supported by client + items: + type: string + required: + - overrides + - endpoints + + # Table schemas + CreateStagingTableRequest: + type: object + properties: + name: + type: string + description: The table name + required: + - name + example: + name: "sales" + + StagingTableResponse: + type: object + properties: + table-id: + type: string + format: uuid + description: Table UUID allocated by UC + table-type: + type: string + enum: + - MANAGED + description: Table type (always MANAGED for staging tables) + location: + type: string + description: UC allocated storage location for this table + storage-credentials: + type: array + description: Temporary credentials for initial commit + items: + $ref: '#/components/schemas/StorageCredential' + required: + - table-id + - table-type + - location + + CreateTableRequest: + type: object + properties: + name: + type: string + description: The table name + location: + type: string + description: Storage location + table-type: + type: string + description: Table type (MANAGED or EXTERNAL) + enum: + - MANAGED + - EXTERNAL + data-source-format: + $ref: '#/components/schemas/DataSourceFormat' + description: Data source format (DELTA or ICEBERG) + comment: + type: string + description: Table comment + schema: + type: array + description: Table columns as ColumnInfo + items: + $ref: '#/components/schemas/DeltaColumn' + protocol: + $ref: '#/components/schemas/DeltaProtocol' + description: Delta protocol version and feature requirements + properties: + type: object + description: Delta table properties + additionalProperties: + type: string + required: + - name + - location + - table-type + - data-source-format + - schema + - protocol + - properties + example: + name: "sales" + location: "s3://bucket/warehouse/catalog/schema/sales" + table-type: "MANAGED" + data-source-format: "DELTA" + schema: + - type-json: + name: "id" + type: "long" + nullable: false + metadata: {} + - type-json: + name: "amount" + type: + type: "decimal" + precision: 10 + scale: 2 + nullable: true + metadata: {} + protocol: + min_reader_version: 3 + min_writer_version: 7 + reader_features: + - "deletionVectors" + writer_features: + - "deletionVectors" + - "invariants" + properties: + "delta.enableDeletionVectors": "true" + + LoadTableResponse: + type: object + properties: + metadata: + $ref: '#/components/schemas/TableMetadata' + description: Complete table metadata including schema, protocol, and properties + commits: + type: array + description: All unbackfilled CCv2 commits + items: + $ref: '#/components/schemas/DeltaCommit' + latest-table-version: + type: integer + format: int64 + description: Latest table version + config: + type: object + description: Additional configuration + additionalProperties: + type: string + storage-credentials: + type: array + description: Temporary credentials (only if with_credentials=true) + items: + $ref: '#/components/schemas/StorageCredential' + required: + - metadata + + TableMetadata: + type: object + properties: + etag: + type: string + description: Entity tag for optimistic concurrency control + data-source-format: + $ref: '#/components/schemas/DataSourceFormat' + description: Data source format (DELTA or ICEBERG) + table-type: + type: string + description: Table type (MANAGED or EXTERNAL) + enum: + - MANAGED + - EXTERNAL + table-uuid: + type: string + format: uuid + description: Unique identifier for the table + location: + type: string + description: Storage location of the table + owner: + type: string + description: Owner of the table + comment: + type: string + description: Table comment + create-time: + type: integer + format: int64 + description: Creation time in epoch milliseconds + created-by: + type: string + description: Creator username + update-time: + type: integer + format: int64 + description: Last update time in epoch milliseconds + updated-by: + type: string + description: Last updater username + securable_type: + type: string + description: Type of securable (TABLE) + schema: + type: array + description: Table schema as ColumnInfo array + items: + $ref: '#/components/schemas/DeltaColumn' + protocol: + $ref: '#/components/schemas/DeltaProtocol' + description: Delta protocol version and feature requirements + properties: + type: object + description: Table properties + additionalProperties: + type: string + required: + - etag + - data-source-format + - table-type + - table-uuid + - location + - schema + - protocol + - properties + + DeltaColumn: + type: object + description: | + Column information for Delta tables. Contains the type-json field with the full + type specification as a JSON-serialized Delta schema field (StructField). + All other fields (name, type-text, type-name, position, nullable, comment, partition-index) + can be derived on-demand from type-json. + properties: + type-json: + $ref: '#/components/schemas/StructField' + description: Complete field specification including name, type, nullability, and metadata + required: + - type-json + + ColumnMask: + type: object + description: Column mask configuration for data masking + properties: + function_name: + type: string + description: Fully qualified name of the masking function + using_column_names: + type: array + description: Column names used by the mask function + items: + type: string + + DeltaProtocol: + type: object + description: Delta table protocol specification + properties: + min_reader_version: + type: integer + format: int32 + description: Minimum reader version + min_writer_version: + type: integer + format: int32 + description: Minimum writer version + reader_features: + type: array + description: Enabled reader features + items: + type: string + writer_features: + type: array + description: Enabled writer features + items: + type: string + required: + - min_reader_version + - min_writer_version + + DeltaCommit: + type: object + description: Delta commit information for CCv2 + properties: + version: + type: integer + format: int64 + description: Commit version + timestamp: + type: integer + format: int64 + description: In-commit timestamp + file-name: + type: string + description: UUID-based commit file name + file-size: + type: integer + format: int64 + description: Commit file size in bytes + file-modification-timestamp: + type: integer + format: int64 + description: File modification timestamp + required: + - version + - timestamp + - file-name + - file-size + - file-modification-timestamp + + UpdateTableRequest: + type: object + description: Request to update a table with requirements and updates + properties: + requirements: + type: array + description: Pre-conditions that must be met for the update + items: + $ref: '#/components/schemas/TableRequirement' + updates: + type: array + description: Updates to apply to the table + items: + $ref: '#/components/schemas/TableUpdate' + required: + - requirements + - updates + + TableRequirement: + type: object + description: A requirement that must be met for the update to proceed + discriminator: + propertyName: type + mapping: + assert-table-uuid: '#/components/schemas/AssertTableUUID' + assert-etag: '#/components/schemas/AssertEtag' + unevaluatedProperties: false + properties: + type: + type: string + description: Requirement type + required: + - type + oneOf: + - $ref: '#/components/schemas/AssertTableUUID' + - $ref: '#/components/schemas/AssertEtag' + + AssertTableUUID: + type: object + description: Assert that the table UUID matches the expected value + properties: + type: + type: string + const: "assert-table-uuid" + uuid: + type: string + format: uuid + description: Expected table UUID + required: + - type + - uuid + + AssertEtag: + type: object + description: Assert that the table etag matches the expected value for optimistic concurrency + properties: + type: + type: string + const: "assert-etag" + etag: + type: string + description: Expected etag value + required: + - type + - etag + + TableUpdate: + type: object + description: An update action to apply to the table + discriminator: + propertyName: action + mapping: + set-properties: '#/components/schemas/SetPropertiesUpdate' + remove-properties: '#/components/schemas/RemovePropertiesUpdate' + delta-set-schema-and-column-masks: '#/components/schemas/SetSchemaUpdate' + delta-set-table-comment: '#/components/schemas/SetTableCommentUpdate' + delta-update-protocol: '#/components/schemas/UpdateProtocolUpdate' + delta-add-commit: '#/components/schemas/AddCommitUpdate' + delta-set-latest-backfilled-version: '#/components/schemas/SetLatestBackfilledVersionUpdate' + unevaluatedProperties: false + properties: + action: + type: string + description: Update action type + required: + - action + oneOf: + - $ref: '#/components/schemas/SetPropertiesUpdate' + - $ref: '#/components/schemas/RemovePropertiesUpdate' + - $ref: '#/components/schemas/SetSchemaUpdate' + - $ref: '#/components/schemas/SetTableCommentUpdate' + - $ref: '#/components/schemas/UpdateProtocolUpdate' + - $ref: '#/components/schemas/AddCommitUpdate' + - $ref: '#/components/schemas/SetLatestBackfilledVersionUpdate' + + SetPropertiesUpdate: + type: object + description: Set table properties + properties: + action: + type: string + const: "set-properties" + updates: + type: object + description: Properties to set + additionalProperties: + type: string + required: + - action + - updates + + RemovePropertiesUpdate: + type: object + description: Remove table properties + properties: + action: + type: string + const: "remove-properties" + removals: + type: array + description: Property keys to remove + items: + type: string + required: + - action + - removals + + SetSchemaUpdate: + type: object + description: Set table schema and column masks + properties: + action: + type: string + const: "delta-set-schema-and-column-masks" + schema: + type: array + description: New schema columns + items: + $ref: '#/components/schemas/DeltaColumn' + column-mask-update-mode: + type: string + description: How to update column masks + enum: + - RETAIN_MERGE + - OVERWRITE + default: RETAIN_MERGE + required: + - action + - schema + + SetTableCommentUpdate: + type: object + description: Set table comment + properties: + action: + type: string + const: "delta-set-table-comment" + comment: + type: string + description: New table comment + required: + - action + - comment + + UpdateProtocolUpdate: + type: object + description: Update table protocol version and features + properties: + action: + type: string + const: "delta-update-protocol" + protocol: + $ref: '#/components/schemas/DeltaProtocol' + description: New protocol version (optional, can use add/remove features instead) + add-reader-features: + type: array + description: Reader features to add + items: + type: string + add-writer-features: + type: array + description: Writer features to add + items: + type: string + remove-reader-features: + type: array + description: Reader features to remove + items: + type: string + remove-writer-features: + type: array + description: Writer features to remove + items: + type: string + required: + - action + + AddCommitUpdate: + type: object + description: Add a CCv2 commit to the table + properties: + action: + type: string + const: "delta-add-commit" + commit: + $ref: '#/components/schemas/DeltaCommit' + description: Commit metadata including version, timestamp, and file information + required: + - action + - commit + + SetLatestBackfilledVersionUpdate: + type: object + description: Set the latest backfilled version for the table + properties: + action: + type: string + const: "delta-set-latest-backfilled-version" + latest-published-version: + type: integer + format: int64 + description: Latest backfilled/published version + required: + - action + - latest-published-version + + ListTablesResponse: + type: object + properties: + identifiers: + type: array + description: List of table identifiers + items: + $ref: '#/components/schemas/TableIdentifierWithDataSourceFormat' + next-page-token: + type: string + description: Token for next page + + TableIdentifier: + type: object + properties: + namespace: + type: array + description: Namespace path + items: + type: string + name: + type: string + description: Table name + required: + - namespace + - name + + TableIdentifierWithDataSourceFormat: + type: object + description: Table identifier with data source format, used in list operations + allOf: + - $ref: '#/components/schemas/TableIdentifier' + - type: object + properties: + data-source-format: + $ref: '#/components/schemas/DataSourceFormat' + description: Data source format (DELTA or ICEBERG) + required: + - data-source-format + + RenameTableRequest: + type: object + properties: + source: + $ref: '#/components/schemas/TableIdentifier' + destination: + $ref: '#/components/schemas/TableIdentifier' + required: + - source + - destination + + CredentialsResponse: + type: object + properties: + storage-credentials: + type: array + items: + $ref: '#/components/schemas/StorageCredential' + + StorageCredential: + type: object + description: | + Temporary storage credential with prefix and config. + Indicates a storage location prefix where the credential is relevant. + Clients should choose the most specific prefix (by selecting the longest prefix) + if several credentials of the same type are available. + required: + - prefix + - config + properties: + prefix: + type: string + description: Storage path prefix this credential applies to + config: + type: object + description: | + Credential configuration with cloud provider-specific fields. + Contains temporary credentials for accessing storage as string key-value pairs. + + Common configuration keys: + + **AWS S3 Credentials:** + - `s3.access-key-id`: AWS access key ID + - `s3.secret-access-key`: AWS secret access key + - `s3.session-token`: AWS session token for temporary credentials + + **Azure Blob Storage Credentials:** + - `azure.sas-token`: Azure SAS (Shared Access Signature) token + + **Google Cloud Storage Credentials:** + - `gcs.oauth-token`: GCP OAuth token + + Additional provider-specific or implementation-specific keys may be included. + additionalProperties: + type: string + expiration-time-ms: + type: integer + format: int64 + description: | + Credential expiration time in epoch milliseconds. + This standardized field avoids the need for provider-specific expiration keys + (e.g., s3.session-token-expires-at-ms, adls.sas-token-expires-at-ms, etc.) + + DataSourceFormat: + type: string + description: Data source format + enum: + - DELTA + - ICEBERG + - PARQUET + - CSV + - JSON + - ORC + - TEXT + - AVRO + + # Namespace schemas + ListNamespacesResponse: + type: object + properties: + namespaces: + type: array + description: List of namespace identifiers + items: + type: array + items: + type: string + next-page-token: + type: string + description: Token for next page + + CreateNamespaceRequest: + type: object + properties: + namespace: + type: array + description: Namespace path + items: + type: string + properties: + type: object + description: Namespace properties + additionalProperties: + type: string + required: + - namespace + + NamespaceResponse: + type: object + properties: + namespace: + type: array + description: Namespace path + items: + type: string + properties: + type: object + description: Namespace properties + additionalProperties: + type: string + required: + - namespace + + UpdateNamespacePropertiesRequest: + type: object + properties: + updates: + type: object + description: Properties to add or update + additionalProperties: + type: string + removals: + type: array + description: Property keys to remove + items: + type: string + + UpdateNamespacePropertiesResponse: + type: object + properties: + updated: + type: array + description: Keys that were updated + items: + type: string + removed: + type: array + description: Keys that were removed + items: + type: string + missing: + type: array + description: Keys that were requested for removal but not found + items: + type: string + + # Metrics schemas + ReportMetricsRequest: + type: object + properties: + snapshot-id: + type: integer + format: int64 + description: Snapshot/version identifier + scan-metrics: + $ref: '#/components/schemas/ScanMetrics' + commit-metrics: + $ref: '#/components/schemas/CommitMetrics' + + ScanMetrics: + type: object + description: Metrics related to table scanning + properties: + total-planning-duration: + $ref: '#/components/schemas/MetricValue' + result-data-files: + $ref: '#/components/schemas/MetricValue' + scanned-data-manifests: + $ref: '#/components/schemas/MetricValue' + additionalProperties: + $ref: '#/components/schemas/MetricValue' + + CommitMetrics: + type: object + description: Metrics related to commits + properties: + total-duration: + $ref: '#/components/schemas/MetricValue' + added-data-files: + $ref: '#/components/schemas/MetricValue' + additionalProperties: + $ref: '#/components/schemas/MetricValue' + + MetricValue: + type: object + properties: + unit: + type: string + description: Unit of measurement (e.g., "millisecond", "count") + value: + type: number + description: Metric value + required: + - unit + - value + + # ========== Spark DataType Schemas ========== + # These schemas define the JSON representation of Spark SQL data types + # as produced by StructField.toJson and DataType.json + + DataType: + description: | + Spark SQL DataType representation. Can be either: + - A primitive type string (e.g., "string", "integer", "long", "double") + - A complex type object (decimal, array, map, struct) + oneOf: + - type: string + description: | + Primitive type name. Valid values include: string, integer, long, short, byte, + float, double, decimal, boolean, binary, date, timestamp, timestamp_ntz, null, + char, varchar, geometry, geography, time, and interval types. + example: "string" + - $ref: '#/components/schemas/DecimalType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/MapType' + - $ref: '#/components/schemas/StructType' + + DecimalType: + type: object + description: Decimal type with precision and scale + properties: + type: + type: string + enum: ["decimal"] + description: Type identifier for decimal + precision: + type: integer + description: Total number of digits + minimum: 1 + maximum: 38 + scale: + type: integer + description: Number of digits after decimal point + minimum: 0 + required: + - type + - precision + - scale + example: + type: "decimal" + precision: 10 + scale: 2 + + ArrayType: + type: object + description: Array type containing elements of a specific type + properties: + type: + type: string + enum: ["array"] + description: Type identifier for array + elementType: + $ref: '#/components/schemas/DataType' + description: Data type of array elements (can be primitive or complex) + containsNull: + type: boolean + description: Whether array elements can be null + default: true + required: + - type + - elementType + - containsNull + example: + type: "array" + elementType: "string" + containsNull: true + + MapType: + type: object + description: Map type with key-value pairs + properties: + type: + type: string + enum: ["map"] + description: Type identifier for map + keyType: + $ref: '#/components/schemas/DataType' + description: Data type of map keys (can be primitive or complex) + valueType: + $ref: '#/components/schemas/DataType' + description: Data type of map values (can be primitive or complex) + valueContainsNull: + type: boolean + description: Whether map values can be null + default: true + required: + - type + - keyType + - valueType + - valueContainsNull + example: + type: "map" + keyType: "string" + valueType: "integer" + valueContainsNull: true + + StructType: + type: object + description: Struct type containing named fields + properties: + type: + type: string + enum: ["struct"] + description: Type identifier for struct + fields: + type: array + description: Array of field definitions + items: + $ref: '#/components/schemas/StructField' + required: + - type + - fields + example: + type: "struct" + fields: + - name: "id" + type: "long" + nullable: false + metadata: {} + - name: "name" + type: "string" + nullable: true + metadata: {} + + StructField: + type: object + description: | + A field within a StructType, representing a column with name, type, and metadata. + This is the same structure returned by Spark's StructField.toJson. + properties: + name: + type: string + description: Field name + type: + description: | + Column data type. Can be either: + - A primitive type string: "string", "integer", "long", "short", "byte", "float", + "double", "boolean", "binary", "date", "timestamp", "timestamp_ntz", "null", etc. + - A DecimalType object: {"type": "decimal", "precision": 10, "scale": 2} + - An ArrayType object: {"type": "array", "elementType": , "containsNull": true} + - A MapType object: {"type": "map", "keyType": , "valueType": , + "valueContainsNull": true} + - A StructType object: {"type": "struct", "fields": [, ...]} + + Where recursively follows the same pattern (primitive string or complex object). + + See the DecimalType, ArrayType, MapType, and StructType schemas below for details on + complex type structures. + example: "long" + nullable: + type: boolean + description: Whether this field can be null + default: true + metadata: + type: object + description: Additional field metadata (arbitrary key-value pairs) + additionalProperties: true + default: {} + comment: + type: string + description: Optional field comment/description + mask: + $ref: '#/components/schemas/ColumnMask' + description: Optional column-level data masking configuration + required: + - name + - type + - nullable + - metadata + example: + name: "user_id" + type: "long" + nullable: false + metadata: {} + + # Error Response Schema + ErrorResponse: + type: object + description: Standard error response for all error cases + properties: + error_code: + type: string + description: Error code identifying the error type + message: + type: string + description: Human-readable error message + details: + type: array + description: Additional error details + items: + type: object + properties: + "@type": + type: string + reason: + type: string + metadata: + type: object + additionalProperties: true + stack_trace: + type: string + description: Stack trace (only in debug mode, optional) + required: + - error_code + - message + example: + error_code: "NOT_FOUND" + message: "Table 'sales' not found in namespace 'accounting'" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "TABLE_NOT_FOUND" + metadata: {} + + # Reusable Response Definitions + responses: + BadRequestErrorResponse: + description: | + Bad Request - The request is malformed or contains invalid parameters. + This could be caused by invalid JSON, missing required fields, or validation failures. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "INVALID_ARGUMENT" + message: "Invalid request: missing required field 'name'" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "INVALID_ARGUMENT" + metadata: {} + + UnauthorizedResponse: + description: | + Unauthorized - Authentication is required or has failed. + The access token may be expired, revoked, malformed, or invalid. + Client should authenticate and retry the request. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "UNAUTHENTICATED" + message: "Authentication required" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "UNAUTHENTICATED" + metadata: {} + + ForbiddenResponse: + description: | + Forbidden - The authenticated user does not have permission to perform this operation. + The user may need additional privileges or access rights. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "PERMISSION_DENIED" + message: "User does not have permission to access this resource" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "PERMISSION_DENIED" + metadata: {} + + NotFoundResponse: + description: | + Not Found - The requested resource does not exist. + This could be a table, namespace, or other catalog object. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "NOT_FOUND" + message: "Resource not found" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "NOT_FOUND" + metadata: {} + + ConflictResponse: + description: | + Conflict - The resource already exists or there is a conflict with the current state. + For example, attempting to create a table that already exists. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "ALREADY_EXISTS" + message: "Table already exists" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "ALREADY_EXISTS" + metadata: {} + + PreconditionFailedResponse: + description: | + Precondition Failed - A requirement check failed (e.g., table UUID or etag mismatch). + The client should refresh the resource state and retry with updated requirements. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "FAILED_PRECONDITION" + message: "Table UUID does not match expected value" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "FAILED_PRECONDITION" + metadata: {} + + InternalServerErrorResponse: + description: | + Internal Server Error - An unexpected error occurred on the server. + This indicates a server-side problem that may not be addressable from the client. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "INTERNAL_ERROR" + message: "An internal server error occurred" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "INTERNAL_ERROR" + metadata: {} + + ServiceUnavailableResponse: + description: | + Service Unavailable - The service is temporarily unavailable. + The client should retry the request after a delay. + A Retry-After header may indicate when to retry. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error_code: "UNAVAILABLE" + message: "Service temporarily unavailable" + details: + - "@type": "google.rpc.ErrorInfo" + reason: "UNAVAILABLE" + metadata: {} diff --git a/build.sbt b/build.sbt index d7bf49ccd1..7b265f0af0 100644 --- a/build.sbt +++ b/build.sbt @@ -305,7 +305,7 @@ lazy val server = (project in file("server")) .dependsOn(client % "test->test") // Server and control models are added as provided to avoid them being added as maven dependencies // This is because the server and control models are included in the server jar - .dependsOn(serverModels % "provided", controlModels % "provided") + .dependsOn(serverModels % "provided", controlModels % "provided", deltaRestServerModels % "provided") .dependsOn(controlApi % "test->compile") .enablePlugins(CheckstylePlugin) .settings ( @@ -413,11 +413,12 @@ lazy val server = (project in file("server")) (Test / runMain).toTask(s" io.unitycatalog.server.utils.PopulateTestDatabase").value }, Test / javaOptions += s"-Duser.dir=${(ThisBuild / baseDirectory).value.getAbsolutePath}", - // Include server and control models in the bin package for server - // This will allow us to have a single maven artifact and not 3 (server, server models, control models) + // Include server, control, and delta rest models in the bin package for server + // This will allow us to have a single maven artifact and not multiple (server, server models, control models, deltarest models) Compile / packageBin / mappings ++= (Compile / packageBin / mappings).value ++ (serverModels / Compile / packageBin / mappings).value ++ - (controlModels / Compile / packageBin / mappings).value + (controlModels / Compile / packageBin / mappings).value ++ + (deltaRestServerModels / Compile / packageBin / mappings).value ) lazy val serverModels = (project in file("server") / "target" / "models") @@ -492,9 +493,107 @@ lazy val controlModels = (project in file("server") / "target" / "controlmodels" } ) +lazy val deltaRestServerModels = (project in file("server") / "target" / "deltarestmodels") + .enablePlugins(OpenApiGeneratorPlugin) + .disablePlugins(JavaFormatterPlugin, CheckstylePlugin) + .settings( + name := s"$artifactNamePrefix-deltarestservermodels", + commonSettings, + javaOnlyReleaseSettings, + (Compile / compile) := ((Compile / compile) dependsOn generate).value, + Compile / compile / javacOptions ++= javacRelease17, + libraryDependencies ++= Seq( + "jakarta.annotation" % "jakarta.annotation-api" % "3.0.0" % Provided, + "com.fasterxml.jackson.core" % "jackson-annotations" % jacksonVersion, + ), + // OpenAPI generation configs for generating model codes from the Delta REST Catalog spec + openApiInputSpec := (file(".") / "api" / "delta-rest.yaml").toString, + openApiGeneratorName := "java", + openApiOutputDir := (file("server") / "target" / "deltarestmodels").toString, + openApiValidateSpec := SettingEnabled, + openApiGenerateMetadata := SettingDisabled, + openApiModelPackage := s"$orgName.server.model.deltarest", + openApiAdditionalProperties := Map( + "library" -> "resteasy", // resteasy generates the most minimal models + "useJakartaEe" -> "true", + "hideGenerationTimestamp" -> "true" + ), + openApiGlobalProperties := Map("models" -> ""), + openApiGenerateApiTests := SettingDisabled, + openApiGenerateModelTests := SettingDisabled, + openApiGenerateApiDocumentation := SettingDisabled, + openApiGenerateModelDocumentation := SettingDisabled, + // Define the simple generate command to generate model codes + generate := { + val _ = openApiGenerate.value + } + ) + +lazy val deltaRestClient = (project in file("clients/delta-rest")) + .enablePlugins(OpenApiGeneratorPlugin) + .settings( + name := s"$artifactNamePrefix-deltarest-client", + commonSettings, + javaOnlyReleaseSettings, + Compile / compile / javacOptions ++= javacRelease11, + javaCheckstyleTestOnlySettings("dev/checkstyle-config.xml"), + // Include generated OpenAPI sources + Compile / unmanagedSourceDirectories += (file(".") / "clients" / "delta-rest" / "target" / "src" / "main" / "java"), + libraryDependencies ++= Seq( + "com.fasterxml.jackson.core" % "jackson-annotations" % jacksonVersion, + "com.fasterxml.jackson.core" % "jackson-core" % jacksonVersion, + "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion, + "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % jacksonVersion, + "org.openapitools" % "jackson-databind-nullable" % openApiToolsJacksonBindNullableVersion, + "com.google.code.findbugs" % "jsr305" % "3.0.2", + "jakarta.annotation" % "jakarta.annotation-api" % "3.0.0" % Provided, + + // Test dependencies + "org.mockito" % "mockito-core" % "5.11.0" % Test, + "org.mockito" % "mockito-inline" % "5.2.0" % Test, + "org.mockito" % "mockito-junit-jupiter" % "5.12.0" % Test, + "org.junit.jupiter" % "junit-jupiter" % "5.10.3" % Test, + "net.aichler" % "jupiter-interface" % JupiterKeys.jupiterVersion.value % Test, + "org.assertj" % "assertj-core" % "3.26.3" % Test, + ), + (Compile / compile) := ((Compile / compile) dependsOn generate).value, + + // Add custom test sources from clients/delta-rest directory + Test / unmanagedSourceDirectories += (file(".") / "clients" / "delta-rest" / "src" / "test" / "java"), + + // OpenAPI generation specs + openApiInputSpec := (file(".") / "api" / "delta-rest.yaml").toString, + openApiGeneratorName := "java", + openApiOutputDir := (file(".") / "clients" / "delta-rest" / "target").toString, + openApiApiPackage := s"$orgName.client.deltarest.api", + openApiModelPackage := s"$orgName.client.deltarest.model", + openApiAdditionalProperties := Map( + "library" -> "native", + "useJakartaEe" -> "true", + "hideGenerationTimestamp" -> "true", + "openApiNullable" -> "false", + "enumUnknownDefaultCase" -> "true"), + openApiGenerateApiTests := SettingDisabled, + openApiGenerateModelTests := SettingDisabled, + openApiGenerateApiDocumentation := SettingDisabled, + openApiGenerateModelDocumentation := SettingDisabled, + // Define the simple generate command to generate full client codes + generate := { + val _ = openApiGenerate.value + + // Delete the generated build.sbt file so that it is not used for our sbt config + val buildSbtFile = file(openApiOutputDir.value) / "build.sbt" + if (buildSbtFile.exists()) { + buildSbtFile.delete() + } + }, + ) + lazy val cli = (project in file("examples") / "cli") .dependsOn(server % "test->test") .dependsOn(serverModels) + .dependsOn(deltaRestServerModels) + .dependsOn(deltaRestClient) .dependsOn(client % "compile->compile;test->test") .dependsOn(controlApi % "compile->compile") .enablePlugins(CheckstylePlugin) @@ -562,7 +661,7 @@ lazy val serverShaded = (project in file("server-shaded")) ) lazy val spark = (project in file("connectors/spark")) - .dependsOn(client) + .dependsOn(client, deltaRestClient) .enablePlugins(CheckstylePlugin) .settings( name := s"$artifactNamePrefix-spark", @@ -687,7 +786,7 @@ lazy val integrationTests = (project in file("integration-tests")) ) lazy val root = (project in file(".")) - .aggregate(serverModels, client, pythonClient, server, cli, spark, controlApi, controlModels, apiDocs) + .aggregate(serverModels, client, pythonClient, server, cli, spark, controlApi, controlModels, deltaRestServerModels, deltaRestClient, apiDocs) .settings( name := s"$artifactNamePrefix", createTarballSettings(), diff --git a/connectors/spark/src/main/java/io/unitycatalog/spark/auth/CredPropsUtil.java b/connectors/spark/src/main/java/io/unitycatalog/spark/auth/CredPropsUtil.java index cff5fb9dbf..d9293d1393 100644 --- a/connectors/spark/src/main/java/io/unitycatalog/spark/auth/CredPropsUtil.java +++ b/connectors/spark/src/main/java/io/unitycatalog/spark/auth/CredPropsUtil.java @@ -5,12 +5,9 @@ import static io.unitycatalog.spark.UCHadoopConf.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; import io.unitycatalog.client.auth.TokenProvider; -import io.unitycatalog.client.model.AwsCredentials; -import io.unitycatalog.client.model.AzureUserDelegationSAS; -import io.unitycatalog.client.model.GcpOauthToken; +import io.unitycatalog.client.deltarest.model.StorageCredential; import io.unitycatalog.client.model.PathOperation; import io.unitycatalog.client.model.TableOperation; -import io.unitycatalog.client.model.TemporaryCredentials; import io.unitycatalog.spark.UCHadoopConf; import io.unitycatalog.spark.auth.storage.AbfsVendedTokenProvider; import io.unitycatalog.spark.auth.storage.AwsVendedTokenProvider; @@ -131,32 +128,32 @@ protected AbfsPropsBuilder self() { } } - private static Map s3FixedCredProps(TemporaryCredentials tempCreds) { - AwsCredentials awsCred = tempCreds.getAwsTempCredentials(); + private static Map s3FixedCredProps(StorageCredential cred) { + Map config = cred.getConfig(); return new S3PropsBuilder() - .set("fs.s3a.access.key", awsCred.getAccessKeyId()) - .set("fs.s3a.secret.key", awsCred.getSecretAccessKey()) - .set("fs.s3a.session.token", awsCred.getSessionToken()) + .set("fs.s3a.access.key", config.get("s3.access-key-id")) + .set("fs.s3a.secret.key", config.get("s3.secret-access-key")) + .set("fs.s3a.session.token", config.get("s3.session-token")) .build(); } private static S3PropsBuilder s3TempCredPropsBuilder( - String uri, TokenProvider tokenProvider, TemporaryCredentials tempCreds) { - AwsCredentials awsCred = tempCreds.getAwsTempCredentials(); + String uri, TokenProvider tokenProvider, StorageCredential cred) { + Map config = cred.getConfig(); S3PropsBuilder builder = new S3PropsBuilder() .set(UCHadoopConf.S3A_CREDENTIALS_PROVIDER, AwsVendedTokenProvider.class.getName()) .uri(uri) .tokenProvider(tokenProvider) .uid(UUID.randomUUID().toString()) - .set(UCHadoopConf.S3A_INIT_ACCESS_KEY, awsCred.getAccessKeyId()) - .set(UCHadoopConf.S3A_INIT_SECRET_KEY, awsCred.getSecretAccessKey()) - .set(UCHadoopConf.S3A_INIT_SESSION_TOKEN, awsCred.getSessionToken()); + .set(UCHadoopConf.S3A_INIT_ACCESS_KEY, config.get("s3.access-key-id")) + .set(UCHadoopConf.S3A_INIT_SECRET_KEY, config.get("s3.secret-access-key")) + .set(UCHadoopConf.S3A_INIT_SESSION_TOKEN, config.get("s3.session-token")); // For the static credential case, nullable expiration time is possible. - if (tempCreds.getExpirationTime() != null) { + if (cred.getExpirationTimeMs() != null) { builder.set( - UCHadoopConf.S3A_INIT_CRED_EXPIRED_TIME, String.valueOf(tempCreds.getExpirationTime())); + UCHadoopConf.S3A_INIT_CRED_EXPIRED_TIME, String.valueOf(cred.getExpirationTimeMs())); } return builder; @@ -167,8 +164,8 @@ private static Map s3TableTempCredProps( TokenProvider tokenProvider, String tableId, TableOperation tableOp, - TemporaryCredentials tempCreds) { - return s3TempCredPropsBuilder(uri, tokenProvider, tempCreds) + StorageCredential cred) { + return s3TempCredPropsBuilder(uri, tokenProvider, cred) .credentialType(UCHadoopConf.UC_CREDENTIALS_TYPE_TABLE_VALUE) .tableId(tableId) .tableOperation(tableOp) @@ -180,27 +177,27 @@ private static Map s3PathTempCredProps( TokenProvider tokenProvider, String path, PathOperation pathOp, - TemporaryCredentials tempCreds) { - return s3TempCredPropsBuilder(uri, tokenProvider, tempCreds) + StorageCredential cred) { + return s3TempCredPropsBuilder(uri, tokenProvider, cred) .credentialType(UCHadoopConf.UC_CREDENTIALS_TYPE_PATH_VALUE) .path(path) .pathOperation(pathOp) .build(); } - private static Map gsFixedCredProps(TemporaryCredentials tempCreds) { - GcpOauthToken gcpOauthToken = tempCreds.getGcpOauthToken(); + private static Map gsFixedCredProps(StorageCredential cred) { + Map config = cred.getConfig(); Long expirationTime = - tempCreds.getExpirationTime() == null ? Long.MAX_VALUE : tempCreds.getExpirationTime(); + cred.getExpirationTimeMs() == null ? Long.MAX_VALUE : cred.getExpirationTimeMs(); return new GcsPropsBuilder() - .set(GcsVendedTokenProvider.ACCESS_TOKEN_KEY, gcpOauthToken.getOauthToken()) + .set(GcsVendedTokenProvider.ACCESS_TOKEN_KEY, config.get("gcs.oauth-token")) .set(GcsVendedTokenProvider.ACCESS_TOKEN_EXPIRATION_KEY, String.valueOf(expirationTime)) .build(); } private static GcsPropsBuilder gcsTempCredPropsBuilder( - String uri, TokenProvider tokenProvider, TemporaryCredentials tempCreds) { - GcpOauthToken gcpToken = tempCreds.getGcpOauthToken(); + String uri, TokenProvider tokenProvider, StorageCredential cred) { + Map config = cred.getConfig(); GcsPropsBuilder builder = new GcsPropsBuilder() .set("fs.gs.auth.type", "ACCESS_TOKEN_PROVIDER") @@ -208,13 +205,13 @@ private static GcsPropsBuilder gcsTempCredPropsBuilder( .uri(uri) .tokenProvider(tokenProvider) .uid(UUID.randomUUID().toString()) - .set(UCHadoopConf.GCS_INIT_OAUTH_TOKEN, gcpToken.getOauthToken()); + .set(UCHadoopConf.GCS_INIT_OAUTH_TOKEN, config.get("gcs.oauth-token")); // For the static credential case, nullable expiration time is possible. - if (tempCreds.getExpirationTime() != null) { + if (cred.getExpirationTimeMs() != null) { builder.set( UCHadoopConf.GCS_INIT_OAUTH_TOKEN_EXPIRATION_TIME, - String.valueOf(tempCreds.getExpirationTime())); + String.valueOf(cred.getExpirationTimeMs())); } return builder; @@ -225,8 +222,8 @@ private static Map gsTableTempCredProps( TokenProvider tokenProvider, String tableId, TableOperation tableOp, - TemporaryCredentials tempCreds) { - return gcsTempCredPropsBuilder(uri, tokenProvider, tempCreds) + StorageCredential cred) { + return gcsTempCredPropsBuilder(uri, tokenProvider, cred) .credentialType(UCHadoopConf.UC_CREDENTIALS_TYPE_TABLE_VALUE) .tableId(tableId) .tableOperation(tableOp) @@ -238,37 +235,37 @@ private static Map gsPathTempCredProps( TokenProvider tokenProvider, String path, PathOperation pathOp, - TemporaryCredentials tempCreds) { - return gcsTempCredPropsBuilder(uri, tokenProvider, tempCreds) + StorageCredential cred) { + return gcsTempCredPropsBuilder(uri, tokenProvider, cred) .credentialType(UCHadoopConf.UC_CREDENTIALS_TYPE_PATH_VALUE) .path(path) .pathOperation(pathOp) .build(); } - private static Map abfsFixedCredProps(TemporaryCredentials tempCreds) { - AzureUserDelegationSAS azureSas = tempCreds.getAzureUserDelegationSas(); + private static Map abfsFixedCredProps(StorageCredential cred) { + Map config = cred.getConfig(); return new AbfsPropsBuilder() - .set(AbfsVendedTokenProvider.ACCESS_TOKEN_KEY, azureSas.getSasToken()) + .set(AbfsVendedTokenProvider.ACCESS_TOKEN_KEY, config.get("azure.sas-token")) .build(); } private static AbfsPropsBuilder abfsTempCredPropsBuilder( - String uri, TokenProvider tokenProvider, TemporaryCredentials tempCreds) { - AzureUserDelegationSAS azureSas = tempCreds.getAzureUserDelegationSas(); + String uri, TokenProvider tokenProvider, StorageCredential cred) { + Map config = cred.getConfig(); AbfsPropsBuilder builder = new AbfsPropsBuilder() .set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, AbfsVendedTokenProvider.class.getName()) .uri(uri) .tokenProvider(tokenProvider) .uid(UUID.randomUUID().toString()) - .set(UCHadoopConf.AZURE_INIT_SAS_TOKEN, azureSas.getSasToken()); + .set(UCHadoopConf.AZURE_INIT_SAS_TOKEN, config.get("azure.sas-token")); // For the static credential case, nullable expiration time is possible. - if (tempCreds.getExpirationTime() != null) { + if (cred.getExpirationTimeMs() != null) { builder.set( UCHadoopConf.AZURE_INIT_SAS_TOKEN_EXPIRED_TIME, - String.valueOf(tempCreds.getExpirationTime())); + String.valueOf(cred.getExpirationTimeMs())); } return builder; @@ -279,8 +276,8 @@ private static Map abfsTableTempCredProps( TokenProvider tokenProvider, String tableId, TableOperation tableOp, - TemporaryCredentials tempCreds) { - return abfsTempCredPropsBuilder(uri, tokenProvider, tempCreds) + StorageCredential cred) { + return abfsTempCredPropsBuilder(uri, tokenProvider, cred) .credentialType(UCHadoopConf.UC_CREDENTIALS_TYPE_TABLE_VALUE) .tableId(tableId) .tableOperation(tableOp) @@ -292,8 +289,8 @@ private static Map abfsPathTempCredProps( TokenProvider tokenProvider, String path, PathOperation pathOp, - TemporaryCredentials tempCreds) { - return abfsTempCredPropsBuilder(uri, tokenProvider, tempCreds) + StorageCredential cred) { + return abfsTempCredPropsBuilder(uri, tokenProvider, cred) .credentialType(UCHadoopConf.UC_CREDENTIALS_TYPE_PATH_VALUE) .path(path) .pathOperation(pathOp) @@ -307,26 +304,26 @@ public static Map createTableCredProps( TokenProvider tokenProvider, String tableId, TableOperation tableOp, - TemporaryCredentials tempCreds) { + StorageCredential cred) { switch (scheme) { case "s3": if (renewCredEnabled) { - return s3TableTempCredProps(uri, tokenProvider, tableId, tableOp, tempCreds); + return s3TableTempCredProps(uri, tokenProvider, tableId, tableOp, cred); } else { - return s3FixedCredProps(tempCreds); + return s3FixedCredProps(cred); } case "gs": if (renewCredEnabled) { - return gsTableTempCredProps(uri, tokenProvider, tableId, tableOp, tempCreds); + return gsTableTempCredProps(uri, tokenProvider, tableId, tableOp, cred); } else { - return gsFixedCredProps(tempCreds); + return gsFixedCredProps(cred); } case "abfss": case "abfs": if (renewCredEnabled) { - return abfsTableTempCredProps(uri, tokenProvider, tableId, tableOp, tempCreds); + return abfsTableTempCredProps(uri, tokenProvider, tableId, tableOp, cred); } else { - return abfsFixedCredProps(tempCreds); + return abfsFixedCredProps(cred); } default: return ImmutableMap.of(); @@ -340,26 +337,26 @@ public static Map createPathCredProps( TokenProvider tokenProvider, String path, PathOperation pathOp, - TemporaryCredentials tempCreds) { + StorageCredential cred) { switch (scheme) { case "s3": if (renewCredEnabled) { - return s3PathTempCredProps(uri, tokenProvider, path, pathOp, tempCreds); + return s3PathTempCredProps(uri, tokenProvider, path, pathOp, cred); } else { - return s3FixedCredProps(tempCreds); + return s3FixedCredProps(cred); } case "gs": if (renewCredEnabled) { - return gsPathTempCredProps(uri, tokenProvider, path, pathOp, tempCreds); + return gsPathTempCredProps(uri, tokenProvider, path, pathOp, cred); } else { - return gsFixedCredProps(tempCreds); + return gsFixedCredProps(cred); } case "abfss": case "abfs": if (renewCredEnabled) { - return abfsPathTempCredProps(uri, tokenProvider, path, pathOp, tempCreds); + return abfsPathTempCredProps(uri, tokenProvider, path, pathOp, cred); } else { - return abfsFixedCredProps(tempCreds); + return abfsFixedCredProps(cred); } default: return ImmutableMap.of(); diff --git a/connectors/spark/src/main/scala/io/unitycatalog/spark/UCSingleCatalog.scala b/connectors/spark/src/main/scala/io/unitycatalog/spark/UCSingleCatalog.scala index 4c5e389d55..ea68469198 100644 --- a/connectors/spark/src/main/scala/io/unitycatalog/spark/UCSingleCatalog.scala +++ b/connectors/spark/src/main/scala/io/unitycatalog/spark/UCSingleCatalog.scala @@ -1,8 +1,11 @@ package io.unitycatalog.spark -import io.unitycatalog.client.api.{SchemasApi, TablesApi, TemporaryCredentialsApi} +import io.unitycatalog.client.api.TablesApi import io.unitycatalog.client.auth.TokenProvider -import io.unitycatalog.client.model._ +import io.unitycatalog.client.deltarest.api.{ConfigurationApi => DeltaRestConfigurationApi, NamespacesApi => DeltaRestNamespacesApi, TablesApi => DeltaRestTablesApi} +import io.unitycatalog.client.deltarest.model.{CatalogConfig => DeltaRestCatalogConfig, CreateNamespaceRequest => DeltaRestCreateNamespaceRequest, CreateStagingTableRequest => DeltaRestCreateStagingTableRequest, CreateTableRequest => DeltaRestCreateTableRequest, DeltaColumn => DeltaRestDeltaColumn, DeltaProtocol => DeltaRestDeltaProtocol, DataSourceFormat => DeltaRestDataSourceFormat, ListNamespacesResponse => DeltaRestListNamespacesResponse, ListTablesResponse => DeltaRestListTablesResponse, LoadTableResponse => DeltaRestLoadTableResponse, NamespaceResponse => DeltaRestNamespaceResponse, StagingTableResponse => DeltaRestStagingTableResponse, StorageCredential => DeltaRestStorageCredential, StructField => DeltaRestStructField, TableIdentifierWithDataSourceFormat => DeltaRestTableIdentifierWithDataSourceFormat, TableMetadata => DeltaRestTableMetadata} +import io.unitycatalog.client.deltarest.{ApiClient => DeltaRestApiClient} +import io.unitycatalog.client.model.{ColumnInfo, ColumnTypeName, CreateTable, DataSourceFormat, TableType} import io.unitycatalog.client.retry.JitterDelayRetryPolicy import io.unitycatalog.client.{ApiClient, ApiException} import io.unitycatalog.spark.auth.{AuthConfigUtils, CredPropsUtil} @@ -35,9 +38,10 @@ class UCSingleCatalog private[this] var uri: URI = null private[this] var tokenProvider: TokenProvider = null private[this] var renewCredEnabled: Boolean = false - private[this] var apiClient: ApiClient = null; - private[this] var temporaryCredentialsApi: TemporaryCredentialsApi = null - private[this] var tablesApi: TablesApi = null + private[this] var apiClient: ApiClient = null + private[this] var deltaRestConfigurationApi: DeltaRestConfigurationApi = null + private[this] var deltaRestTablesApi: DeltaRestTablesApi = null + private[this] var catalogConfig: DeltaRestCatalogConfig = null @volatile private var delegate: TableCatalog = null @@ -51,12 +55,27 @@ class UCSingleCatalog OptionsUtil.RENEW_CREDENTIAL_ENABLED, OptionsUtil.DEFAULT_RENEW_CREDENTIAL_ENABLED) + // Create Unity Catalog API client for non-Delta/non-Iceberg tables apiClient = ApiClientFactory.createApiClient( - JitterDelayRetryPolicy.builder().build(),uri, tokenProvider) - temporaryCredentialsApi = new TemporaryCredentialsApi(apiClient) - tablesApi = new TablesApi(apiClient) - val proxy = new UCProxy(uri, tokenProvider, renewCredEnabled, apiClient, tablesApi, - temporaryCredentialsApi) + JitterDelayRetryPolicy.builder().build(), uri, tokenProvider) + + // Create Delta REST API client + val deltaRestUri = uri.toString + "/api/2.1/unity-catalog/delta-rest/v1" + val deltaRestApiClient = new DeltaRestApiClient() + deltaRestApiClient.updateBaseUri(deltaRestUri) + deltaRestConfigurationApi = new DeltaRestConfigurationApi(deltaRestApiClient) + deltaRestTablesApi = new DeltaRestTablesApi(deltaRestApiClient) + + // Get catalog configuration from Delta REST API + try { + catalogConfig = deltaRestConfigurationApi.getConfig(name) + logInfo(s"Delta REST Catalog configured with ${catalogConfig.getEndpoints.size()} endpoints") + } catch { + case e: Exception => + logWarning(s"Failed to get Delta REST catalog config: ${e.getMessage}") + } + + val proxy = new UCProxy(uri, tokenProvider, renewCredEnabled, apiClient) proxy.initialize(name, options) if (UCSingleCatalog.LOAD_DELTA_CATALOG.get()) { try { @@ -86,6 +105,7 @@ class UCSingleCatalog override def loadTable(ident: Identifier, timestamp: Long): Table = delegate.loadTable(ident, timestamp) override def tableExists(ident: Identifier): Boolean = { + UCSingleCatalog.checkUnsupportedNestedNamespace(ident.namespace()) delegate.tableExists(ident) } @@ -130,57 +150,109 @@ class UCSingleCatalog s"Invalid property value '$v' for '$k'.")) }) - // Get staging table location and table id from UC - val createStagingTable = new CreateStagingTable() - .catalogName(name()) - .schemaName(ident.namespace().head) - .name(ident.name()) - val stagingTableInfo = tablesApi.createStagingTable(createStagingTable) - val stagingLocation = stagingTableInfo.getStagingLocation - val stagingTableId = stagingTableInfo.getId + // Get staging table location and table id from Delta REST API + val createStagingTableRequest = new DeltaRestCreateStagingTableRequest() + createStagingTableRequest.setName(ident.name()) + val stagingTableResponse: DeltaRestStagingTableResponse = + deltaRestTablesApi.createStagingTable(name(), ident.namespace().head, createStagingTableRequest) + val stagingLocation = stagingTableResponse.getLocation + val stagingTableId = stagingTableResponse.getTableId.toString val newProps = new util.HashMap[String, String] newProps.putAll(properties) - newProps.put(TableCatalog.PROP_LOCATION, stagingTableInfo.getStagingLocation) + newProps.put(TableCatalog.PROP_LOCATION, stagingLocation) // Sets both the new and old table ID property while it's being renamed. - newProps.put(UCTableProperties.UC_TABLE_ID_KEY, stagingTableInfo.getId) - newProps.put(UCTableProperties.UC_TABLE_ID_KEY_OLD, stagingTableInfo.getId) + newProps.put(UCTableProperties.UC_TABLE_ID_KEY, stagingTableId) + newProps.put(UCTableProperties.UC_TABLE_ID_KEY_OLD, stagingTableId) // `PROP_IS_MANAGED_LOCATION` is used to indicate that the table location is not // user-specified but system-generated, which is exactly the case here. newProps.put(TableCatalog.PROP_IS_MANAGED_LOCATION, "true") - val temporaryCredentials = temporaryCredentialsApi.generateTemporaryTableCredentials( - new GenerateTemporaryTableCredential().tableId(stagingTableId).operation(TableOperation.READ_WRITE)) - val credentialProps = CredPropsUtil.createTableCredProps( - renewCredEnabled, - CatalogUtils.stringToURI(stagingLocation).getScheme, - uri.toString, - tokenProvider, - stagingTableId, - TableOperation.READ_WRITE, - temporaryCredentials, - ) - UCSingleCatalog.setCredentialProps(newProps, credentialProps) + // Add Delta table feature requirements from catalog config + // Only apply Delta features to Delta tables + val provider = Option(properties.get(TableCatalog.PROP_PROVIDER)).getOrElse("delta") + if (provider.equalsIgnoreCase("delta")) { + // Each feature is added as a separate property: delta.feature.X=supported + // Filter out features that aren't supported in all Delta versions + val unsupportedFeatures = Set("catalogManaged", "catalogmanaged") + if (catalogConfig != null && catalogConfig.getManagedTablesRequiredFeatures != null) { + catalogConfig.getManagedTablesRequiredFeatures.asScala + .filterNot(f => unsupportedFeatures.contains(f)) + .foreach { feature => + newProps.put(s"delta.feature.$feature", "supported") + } + } + // Suggested features are not yet implemented + // if (catalogConfig != null && catalogConfig.getManagedTablesSuggestedFeatures != null) { + // catalogConfig.getManagedTablesSuggestedFeatures.asScala + // .filterNot(f => unsupportedFeatures.contains(f)) + // .foreach { feature => + // newProps.put(s"delta.feature.$feature", "supported") + // } + // } + } + + // Use storage credentials from staging table response + val storageCredentials = stagingTableResponse.getStorageCredentials + if (storageCredentials != null && !storageCredentials.isEmpty) { + val cred = storageCredentials.get(0) + val locationUri = new URI(stagingLocation) + val scheme = locationUri.getScheme + // Use CredPropsUtil to get proper Hadoop config + val credProps = io.unitycatalog.spark.auth.CredPropsUtil.createTableCredProps( + renewCredEnabled, + scheme, + uri.toString, + tokenProvider, + stagingTableId, + io.unitycatalog.client.model.TableOperation.UNKNOWN_TABLE_OPERATION, + cred + ) + credProps.asScala.foreach { case (key, value) => + newProps.put(key, value) + newProps.put(TableCatalog.OPTION_PREFIX + key, value) + } + } delegate.createTable(ident, columns, partitions, newProps) } else if (hasLocationClause) { + // For external tables with user-provided location, get credentials for the location val location = properties.get(TableCatalog.PROP_LOCATION) - assert(location != null) - val cred = temporaryCredentialsApi.generateTemporaryPathCredentials( - new GenerateTemporaryPathCredential().url(location).operation(PathOperation.PATH_CREATE_TABLE)) val newProps = new util.HashMap[String, String] newProps.putAll(properties) - val credentialProps = CredPropsUtil.createPathCredProps( - renewCredEnabled, - CatalogUtils.stringToURI(location).getScheme, - uri.toString, - tokenProvider, - location, - PathOperation.PATH_CREATE_TABLE, - cred) + // Get credentials for the external location from Delta REST API + try { + val response = deltaRestTablesApi.getTemporaryPathCredentials( + location, + "PATH_CREATE_TABLE" + ) + val storageCredentials = response.getStorageCredentials + if (storageCredentials != null && !storageCredentials.isEmpty) { + val cred = storageCredentials.get(0) + val locationUri = new URI(location) + val scheme = locationUri.getScheme + // Generate a temporary table ID for credential tracking + val tempTableId = java.util.UUID.randomUUID().toString + val credProps = io.unitycatalog.spark.auth.CredPropsUtil.createTableCredProps( + renewCredEnabled, + scheme, + uri.toString, + tokenProvider, + tempTableId, + io.unitycatalog.client.model.TableOperation.READ_WRITE, + cred + ) + credProps.asScala.foreach { case (key, value) => + newProps.put(key, value) + newProps.put(TableCatalog.OPTION_PREFIX + key, value) + } + } + } catch { + case e: Exception => + logWarning(s"Failed to get credentials for external table location $location: ${e.getMessage}") + } - UCSingleCatalog.setCredentialProps(newProps, credentialProps) delegate.createTable(ident, columns, partitions, newProps) } else { // TODO: for path-based tables, Spark should generate a location property using the qualified @@ -197,7 +269,10 @@ class UCSingleCatalog throw new UnsupportedOperationException("Altering a table is not supported yet") } - override def dropTable(ident: Identifier): Boolean = delegate.dropTable(ident) + override def dropTable(ident: Identifier): Boolean = { + UCSingleCatalog.checkUnsupportedNestedNamespace(ident.namespace()) + delegate.dropTable(ident) + } override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { throw new UnsupportedOperationException("Renaming a table is not supported yet") @@ -279,15 +354,24 @@ private class UCProxy( uri: URI, tokenProvider: TokenProvider, renewCredEnabled: Boolean, - apiClient: ApiClient, - tablesApi: TablesApi, - temporaryCredentialsApi: TemporaryCredentialsApi) extends TableCatalog with SupportsNamespaces { + apiClient: ApiClient) extends TableCatalog with SupportsNamespaces { private[this] var name: String = null - private[this] var schemasApi: SchemasApi = null + private[this] var tablesApi: TablesApi = null + private[this] var deltaRestTablesApi: DeltaRestTablesApi = null + private[this] var deltaRestNamespacesApi: DeltaRestNamespacesApi = null override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { this.name = name - schemasApi = new SchemasApi(apiClient) + + // Create Unity Catalog TablesApi for non-Delta/non-Iceberg tables + tablesApi = new TablesApi(apiClient) + + // Create Delta REST API client with Delta REST endpoint + val deltaRestUri = uri.toString + "/api/2.1/unity-catalog/delta-rest/v1" + val deltaRestApiClient = new DeltaRestApiClient() + deltaRestApiClient.updateBaseUri(deltaRestUri) + deltaRestTablesApi = new DeltaRestTablesApi(deltaRestApiClient) + deltaRestNamespacesApi = new DeltaRestNamespacesApi(deltaRestApiClient) } override def name(): String = { @@ -300,78 +384,82 @@ private class UCProxy( val catalogName = this.name val schemaName = namespace.head - val maxResults = 0 val pageToken = null - val response: ListTablesResponse = tablesApi.listTables(catalogName, schemaName, maxResults, pageToken) - response.getTables.toSeq.map(table => Identifier.of(namespace, table.getName)).toArray + val response: DeltaRestListTablesResponse = deltaRestTablesApi.listTables(catalogName, schemaName, pageToken) + response.getIdentifiers.asScala.map(tableIdent => Identifier.of(namespace, tableIdent.getName)).toArray } override def loadTable(ident: Identifier): Table = { - val t = try { - tablesApi.getTable( - UCSingleCatalog.fullTableNameForApi(this.name, ident), - /* readStreamingTableAsManaged = */ true, - /* readMaterializedViewAsManaged = */ true) + val response = try { + deltaRestTablesApi.loadTable( + this.name, + ident.namespace().head, + ident.name(), + /* withCredentials = */ true) } catch { - case e: ApiException if e.getCode == 404 => + case e: io.unitycatalog.client.deltarest.ApiException if e.getCode == 404 => throw new NoSuchTableException(ident) } - val identifier = TableIdentifier(t.getName, Some(t.getSchemaName), Some(t.getCatalogName)) + + val metadata = response.getMetadata + val identifier = TableIdentifier(ident.name(), Some(ident.namespace().head), Some(this.name)) val partitionCols = scala.collection.mutable.ArrayBuffer.empty[(String, Int)] - val fields = t.getColumns.asScala.map { col => - Option(col.getPartitionIndex).foreach { index => - partitionCols += col.getName -> index + + // Convert Delta REST schema to Spark schema + val fields = metadata.getSchema.asScala.zipWithIndex.map { case (deltaCol, index) => + val structField = deltaCol.getTypeJson + // Serialize type object to JSON string for Spark's DataType.fromJson + // For primitive types, we need to wrap the type name in quotes to make it valid JSON + val typeJson = structField.getType match { + case s: String => + // Wrap string in quotes to make it valid JSON: "string" becomes "\"string\"" + s"""\"$s\"""" + case obj => + // Complex types are already objects, serialize them as-is + new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(obj) } - StructField(col.getName, DataType.fromDDL(col.getTypeText), col.getNullable) - .withComment(col.getComment) + val dataType = DataType.fromJson(typeJson) + // Check if this is a partition column (assuming partition columns are marked in properties) + StructField(structField.getName, dataType, structField.getNullable) }.toArray - val locationUri = CatalogUtils.stringToURI(t.getStorageLocation) - val tableId = t.getTableId - var tableOp = TableOperation.READ_WRITE - val temporaryCredentials = { - try { - temporaryCredentialsApi - .generateTemporaryTableCredentials( - // TODO: at this time, we don't know if the table will be read or written. For now we always - // request READ_WRITE credentials as the server doesn't distinguish between READ and - // READ_WRITE credentials as of today. When loading a table, Spark should tell if it's - // for read or write, we can request the proper credential after fixing Spark. - new GenerateTemporaryTableCredential().tableId(tableId).operation(tableOp) - ) - } catch { - case _: ApiException => - tableOp = TableOperation.READ - temporaryCredentialsApi - .generateTemporaryTableCredentials( - new GenerateTemporaryTableCredential().tableId(tableId).operation(tableOp) - ) - } - } - val extraSerdeProps = CredPropsUtil.createTableCredProps( - renewCredEnabled, - locationUri.getScheme, - uri.toString, - tokenProvider, - tableId, - tableOp, - temporaryCredentials, - ) + val locationUri = CatalogUtils.stringToURI(metadata.getLocation) + val tableId = metadata.getTableUuid.toString + + // Get storage credentials from the response + val storageCredentials = response.getStorageCredentials + val extraSerdeProps = if (storageCredentials != null && !storageCredentials.isEmpty) { + val cred = storageCredentials.get(0) + val scheme = locationUri.getScheme + // Use CredPropsUtil to get proper Hadoop config + val credProps = io.unitycatalog.spark.auth.CredPropsUtil.createTableCredProps( + renewCredEnabled, + scheme, + uri.toString, + tokenProvider, + tableId, + io.unitycatalog.client.model.TableOperation.READ, + cred + ) + credProps.asScala.toMap + } else { + Map.empty[String, String] + } val sparkTable = CatalogTable( identifier, - tableType = if (t.getTableType == TableType.MANAGED) { + tableType = if (metadata.getTableType == DeltaRestTableMetadata.TableTypeEnum.MANAGED) { CatalogTableType.MANAGED } else { CatalogTableType.EXTERNAL }, storage = CatalogStorageFormat.empty.copy( locationUri = Some(locationUri), - properties = t.getProperties.asScala.toMap ++ extraSerdeProps + properties = metadata.getProperties.asScala.toMap ++ extraSerdeProps ), schema = StructType(fields), - provider = Some(t.getDataSourceFormat.getValue.toLowerCase()), - createTime = t.getCreatedAt, + provider = Some(metadata.getDataSourceFormat.getValue.toLowerCase()), + createTime = metadata.getCreateTime, tracksPartitionsInCatalog = false, partitionColumnNames = partitionCols.sortBy(_._2).map(_._1).toSeq ) @@ -388,50 +476,77 @@ private class UCProxy( UCSingleCatalog.checkUnsupportedNestedNamespace(ident.namespace()) assert(properties.get(TableCatalog.PROP_PROVIDER) != null) - val createTable = new CreateTable() - createTable.setName(ident.name()) - createTable.setSchemaName(ident.namespace().head) - createTable.setCatalogName(this.name) - val hasExternalClause = properties.containsKey(TableCatalog.PROP_EXTERNAL) val storageLocation = properties.get(TableCatalog.PROP_LOCATION) assert(storageLocation != null, "location should either be user specified or system generated.") val isManagedLocation = Option(properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION)) .exists(_.equalsIgnoreCase("true")) val format = properties.get("provider") - if (isManagedLocation) { + + // Determine table type + val tableType = if (isManagedLocation) { assert(!hasExternalClause, "location is only generated for managed tables.") - if (!format.equalsIgnoreCase(DataSourceFormat.DELTA.name)) { + if (!format.equalsIgnoreCase("DELTA")) { throw new ApiException("Unity Catalog does not support non-Delta managed table.") } - createTable.setTableType(TableType.MANAGED) + DeltaRestCreateTableRequest.TableTypeEnum.MANAGED } else { - createTable.setTableType(TableType.EXTERNAL) + DeltaRestCreateTableRequest.TableTypeEnum.EXTERNAL } - createTable.setStorageLocation(storageLocation) - val columns: Seq[ColumnInfo] = schema.fields.toSeq.zipWithIndex.map { case (field, i) => - val column = new ColumnInfo() - column.setName(field.name) + // Convert schema to Delta REST format + val deltaColumns: Seq[DeltaRestDeltaColumn] = schema.fields.toSeq.zipWithIndex.map { case (field, i) => + val structField = new DeltaRestStructField() + structField.setName(field.name) + // Use Spark's JSON representation for the type + // For primitive types, Spark's json property returns a quoted string like "\"integer\"" + // We need to strip the quotes to get just the type name + val jsonType = field.dataType.json + val typeValue = if (jsonType.startsWith("\"") && jsonType.endsWith("\"")) { + // Remove surrounding quotes for primitive types + jsonType.substring(1, jsonType.length - 1) + } else { + // Complex types (structs, arrays, maps) - use as-is + jsonType + } + structField.setType(typeValue) + structField.setNullable(field.nullable) if (field.getComment().isDefined) { - column.setComment(field.getComment.get) + structField.setComment(field.getComment.get) } - column.setNullable(field.nullable) - column.setTypeText(field.dataType.simpleString) - column.setTypeName(convertDataTypeToTypeName(field.dataType)) - column.setTypeJson(field.dataType.json) - column.setPosition(i) - column + structField.setMetadata(new java.util.HashMap[String, AnyRef]()) + + val deltaColumn = new DeltaRestDeltaColumn() + deltaColumn.setTypeJson(structField) + deltaColumn } - val comment = Option(properties.get(TableCatalog.PROP_COMMENT)) - comment.foreach(createTable.setComment(_)) - createTable.setColumns(columns) - createTable.setDataSourceFormat(convertDatasourceFormat(format)) + + // Create Delta protocol + val protocol = new DeltaRestDeltaProtocol() + protocol.setMinReaderVersion(1) + protocol.setMinWriterVersion(2) + + // Create Delta REST request + val createTableRequest = new DeltaRestCreateTableRequest() + createTableRequest.setName(ident.name()) + createTableRequest.setLocation(storageLocation) + createTableRequest.setTableType(tableType) + createTableRequest.setDataSourceFormat(convertDatasourceFormatToDeltaRest(format)) + createTableRequest.setSchema(deltaColumns.asJava) + createTableRequest.setProtocol(protocol) + + // Set comment if present in properties + val comment = properties.get(TableCatalog.PROP_COMMENT) + if (comment != null) { + createTableRequest.setComment(comment) + } + // Do not send the V2 table properties as they are made part of the `createTable` already. val propertiesToServer = properties.view.filterKeys(!UCTableProperties.V2_TABLE_PROPERTIES.contains(_)).toMap - createTable.setProperties(propertiesToServer) - tablesApi.createTable(createTable) + createTableRequest.setProperties(propertiesToServer.asJava) + + deltaRestTablesApi.createTable(this.name, ident.namespace().head, createTableRequest) loadTable(ident) } @@ -448,6 +563,20 @@ private class UCProxy( } } + private def convertDatasourceFormatToDeltaRest(format: String): DeltaRestDataSourceFormat = { + format.toUpperCase match { + case "DELTA" => DeltaRestDataSourceFormat.DELTA + case "ICEBERG" => DeltaRestDataSourceFormat.ICEBERG + case "PARQUET" => DeltaRestDataSourceFormat.PARQUET + case "CSV" => DeltaRestDataSourceFormat.CSV + case "JSON" => DeltaRestDataSourceFormat.JSON + case "ORC" => DeltaRestDataSourceFormat.ORC + case "TEXT" => DeltaRestDataSourceFormat.TEXT + case "AVRO" => DeltaRestDataSourceFormat.AVRO + case _ => throw new ApiException("DataSourceFormat not supported for Delta REST: " + format) + } + } + private def convertDataTypeToTypeName(dataType: DataType): ColumnTypeName = { dataType match { case StringType => ColumnTypeName.STRING @@ -470,8 +599,9 @@ private class UCProxy( } override def dropTable(ident: Identifier): Boolean = { - val ret = tablesApi.deleteTable(UCSingleCatalog.fullTableNameForApi(this.name, ident)) - if (ret == 200) true else false + UCSingleCatalog.checkUnsupportedNestedNamespace(ident.namespace()) + deltaRestTablesApi.deleteTable(this.name, ident.namespace().head, ident.name()) + true } override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { @@ -479,8 +609,9 @@ private class UCProxy( } override def listNamespaces(): Array[Array[String]] = { - schemasApi.listSchemas(name, 0, null).getSchemas.asScala.map { schema => - Array(schema.getName) + val response: DeltaRestListNamespacesResponse = deltaRestNamespacesApi.listNamespaces(name, null) + response.getNamespaces.asScala.map { ns => + ns.asScala.toArray }.toArray } @@ -490,33 +621,22 @@ private class UCProxy( override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = { UCSingleCatalog.checkUnsupportedNestedNamespace(namespace) - val schema = try { - schemasApi.getSchema(name + "." + namespace(0)) + val response = try { + deltaRestNamespacesApi.loadNamespace(name, namespace(0)) } catch { - case e: ApiException if e.getCode == 404 => + case e: io.unitycatalog.client.deltarest.ApiException if e.getCode == 404 => throw new NoSuchNamespaceException(namespace) } - // flatten the schema properties to a map, with the key prefixed by "properties:" - val metadata = schema.getProperties.asScala.map { - case (k, v) => SchemaInfo.JSON_PROPERTY_PROPERTIES + ":" + k -> v - } - metadata(SchemaInfo.JSON_PROPERTY_NAME) = schema.getName - metadata(SchemaInfo.JSON_PROPERTY_CATALOG_NAME) = schema.getCatalogName - metadata(SchemaInfo.JSON_PROPERTY_COMMENT) = schema.getComment - metadata(SchemaInfo.JSON_PROPERTY_FULL_NAME) = schema.getFullName - metadata(SchemaInfo.JSON_PROPERTY_CREATED_AT) = if (schema.getCreatedAt != null) {schema.getCreatedAt.toString} else {"null"} - metadata(SchemaInfo.JSON_PROPERTY_UPDATED_AT) = if (schema.getUpdatedAt != null) {schema.getUpdatedAt.toString} else {"null"} - metadata(SchemaInfo.JSON_PROPERTY_SCHEMA_ID) = schema.getSchemaId - metadata.asJava + // Return namespace properties + response.getProperties.asScala.toMap.asJava } override def createNamespace(namespace: Array[String], metadata: util.Map[String, String]): Unit = { UCSingleCatalog.checkUnsupportedNestedNamespace(namespace) - val createSchema = new CreateSchema() - createSchema.setCatalogName(this.name) - createSchema.setName(namespace.head) - createSchema.setProperties(metadata) - schemasApi.createSchema(createSchema) + val createRequest = new DeltaRestCreateNamespaceRequest() + createRequest.setNamespace(namespace.toSeq.asJava) + createRequest.setProperties(metadata) + deltaRestNamespacesApi.createNamespace(name, createRequest) } override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = { @@ -525,7 +645,7 @@ private class UCProxy( override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = { UCSingleCatalog.checkUnsupportedNestedNamespace(namespace) - schemasApi.deleteSchema(name + "." + namespace.head, cascade) + deltaRestNamespacesApi.deleteNamespace(name, namespace.head) true } } diff --git a/server/src/main/java/io/unitycatalog/server/UnityCatalogServer.java b/server/src/main/java/io/unitycatalog/server/UnityCatalogServer.java index 923c3f98b3..63df22538b 100644 --- a/server/src/main/java/io/unitycatalog/server/UnityCatalogServer.java +++ b/server/src/main/java/io/unitycatalog/server/UnityCatalogServer.java @@ -2,11 +2,14 @@ import static io.unitycatalog.server.security.SecurityContext.Issuers.INTERNAL; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.json.JsonMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; import com.linecorp.armeria.common.HttpResponse; import com.linecorp.armeria.server.Server; import com.linecorp.armeria.server.ServerBuilder; @@ -18,10 +21,12 @@ import io.unitycatalog.server.auth.UnityCatalogAuthorizer; import io.unitycatalog.server.auth.decorator.UnityAccessDecorator; import io.unitycatalog.server.auth.decorator.UnityAccessUtil; +import io.unitycatalog.server.decorator.RpcLoggingDecorator; import io.unitycatalog.server.exception.BaseException; import io.unitycatalog.server.exception.ErrorCode; import io.unitycatalog.server.exception.ExceptionHandlingDecorator; import io.unitycatalog.server.exception.GlobalExceptionHandler; +import io.unitycatalog.server.model.deltarest.TableUpdate; import io.unitycatalog.server.persist.Repositories; import io.unitycatalog.server.persist.utils.HibernateConfigurator; import io.unitycatalog.server.security.SecurityConfiguration; @@ -49,6 +54,8 @@ import io.unitycatalog.server.service.VolumeService; import io.unitycatalog.server.service.credential.CloudCredentialVendor; import io.unitycatalog.server.service.credential.StorageCredentialVendor; +import io.unitycatalog.server.service.deltarest.DeltaRestCatalogService; +import io.unitycatalog.server.service.deltarest.TableUpdateDeserializer; import io.unitycatalog.server.service.iceberg.FileIOFactory; import io.unitycatalog.server.service.iceberg.MetadataService; import io.unitycatalog.server.service.iceberg.TableConfigService; @@ -121,6 +128,8 @@ private Server initializeServer(UnityCatalogServer.Builder unityCatalogServerBui // Init security decorators addSecurityDecorators( armeriaServerBuilder, unityCatalogServerBuilder.serverProperties, authorizer, repositories); + // Init RPC logging decorator (applied to all requests/responses) + addRpcLoggingDecorator(armeriaServerBuilder); return armeriaServerBuilder.build(); } @@ -248,6 +257,8 @@ private void addApiServices( schemaService, tableService, repositories); + addDeltaRestApiServices( + armeriaServerBuilder, authorizer, storageCredentialVendor, repositories); } private void addIcebergApiServices( @@ -284,6 +295,52 @@ private void addIcebergApiServices( icebergResponseConverter); } + /** + * Mixin to disable the generated @JsonTypeInfo annotation on TableUpdate. This allows our custom + * deserializer to handle polymorphic deserialization instead. + */ + @JsonTypeInfo(use = JsonTypeInfo.Id.NONE) + @JsonIgnoreProperties({"action"}) + interface TableUpdateMixin {} + + private void addDeltaRestApiServices( + ServerBuilder armeriaServerBuilder, + UnityCatalogAuthorizer authorizer, + StorageCredentialVendor storageCredentialVendor, + Repositories repositories) { + LOGGER.info("Adding Delta REST Catalog services..."); + + // Add support for Delta REST Catalog APIs + ObjectMapper deltaRestMapper = + JsonMapper.builder() + .disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) + .serializationInclusion(JsonInclude.Include.NON_NULL) + .build(); + + // Disable the generated @JsonTypeInfo annotation on TableUpdate using a mixin + // This prevents Jackson from trying to use the annotation-based polymorphic deserialization + deltaRestMapper.addMixIn(TableUpdate.class, TableUpdateMixin.class); + + // Register custom deserializer for TableUpdate to handle polymorphic deserialization + // This is needed because OpenAPI Generator (with library: "resteasy") generates separate + // classes for each update type without creating an inheritance relationship + SimpleModule module = new SimpleModule(); + // Use raw type to bypass compile-time type checking since the generated classes don't + // share a common base class + module.addDeserializer((Class) TableUpdate.class, new TableUpdateDeserializer()); + deltaRestMapper.registerModule(module); + JacksonRequestConverterFunction deltaRestRequestConverter = + new JacksonRequestConverterFunction(deltaRestMapper); + JacksonResponseConverterFunction deltaRestResponseConverter = + new JacksonResponseConverterFunction(deltaRestMapper); + + armeriaServerBuilder.annotatedService( + BASE_PATH + "delta-rest", + new DeltaRestCatalogService(authorizer, repositories, storageCredentialVendor), + deltaRestRequestConverter, + deltaRestResponseConverter); + } + private void addSecurityDecorators( ServerBuilder armeriaServerBuilder, ServerProperties serverProperties, @@ -316,6 +373,14 @@ private void addSecurityDecorators( } } + private void addRpcLoggingDecorator(ServerBuilder armeriaServerBuilder) { + LOGGER.info("Enabling RPC request/response logging decorator..."); + // Apply RPC logging decorator to all API requests + RpcLoggingDecorator loggingDecorator = new RpcLoggingDecorator(); + armeriaServerBuilder.routeDecorator().pathPrefix(BASE_PATH).build(loggingDecorator); + armeriaServerBuilder.routeDecorator().pathPrefix(CONTROL_PATH).build(loggingDecorator); + } + public static void main(String[] args) { OptionParser options = new OptionParser(); options.parse(args); diff --git a/server/src/main/java/io/unitycatalog/server/auth/annotation/AuthorizeResourceKey.java b/server/src/main/java/io/unitycatalog/server/auth/annotation/AuthorizeResourceKey.java index 2bd7acbeb8..6f42b0fd59 100644 --- a/server/src/main/java/io/unitycatalog/server/auth/annotation/AuthorizeResourceKey.java +++ b/server/src/main/java/io/unitycatalog/server/auth/annotation/AuthorizeResourceKey.java @@ -13,8 +13,8 @@ *

Unlike {@link AuthorizeKey} which only exposes the raw value of ANY request field, this class * only annotates request fields that reference to resources and maps them to resource identifiers * (UUIDs). The resource key is used to retrieve the resource identifier, which is then used to - * authorize the request. As an example, suppose you are making a request the retrieve a schema, - * the parameter that contains the schema name might be defined in the request as: + * authorize the request. As an example, suppose you are making a request the retrieve a schema, the + * parameter that contains the schema name might be defined in the request as: * *

@AuthorizeResourceKey(SCHEMA) @Param("full_Name") String fullName * @@ -50,7 +50,7 @@ *

{@code
  * public void serviceMethod(
  *   @AuthorizeResourceKey(value = CATALOG, key = "catalog") CreateSchemaRequest request) { }
- * 
+ * } */ @Retention(RetentionPolicy.RUNTIME) @Target({ElementType.METHOD, ElementType.PARAMETER}) diff --git a/server/src/main/java/io/unitycatalog/server/decorator/RpcLoggingDecorator.java b/server/src/main/java/io/unitycatalog/server/decorator/RpcLoggingDecorator.java new file mode 100644 index 0000000000..f7798c8f58 --- /dev/null +++ b/server/src/main/java/io/unitycatalog/server/decorator/RpcLoggingDecorator.java @@ -0,0 +1,137 @@ +package io.unitycatalog.server.decorator; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linecorp.armeria.common.HttpRequest; +import com.linecorp.armeria.common.HttpResponse; +import com.linecorp.armeria.common.HttpStatus; +import com.linecorp.armeria.server.DecoratingHttpServiceFunction; +import com.linecorp.armeria.server.HttpService; +import com.linecorp.armeria.server.ServiceRequestContext; +import java.util.concurrent.CompletableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Decorator that logs all RPC requests and responses at a centralized point. + * + *

This decorator intercepts all HTTP requests and responses passing through the service layer + * and logs them using error level logging to ensure they are always captured. + */ +public class RpcLoggingDecorator implements DecoratingHttpServiceFunction { + + private static final Logger LOGGER = LoggerFactory.getLogger(RpcLoggingDecorator.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public HttpResponse serve(HttpService delegate, ServiceRequestContext ctx, HttpRequest req) + throws Exception { + + // Aggregate the request once and log it, then pass it to the delegate + return HttpResponse.of( + req.aggregate() + .thenCompose( + aggregatedRequest -> { + // Log the request + logRequest(ctx, req, aggregatedRequest.contentUtf8()); + + // Convert back to HttpRequest and pass to delegate + HttpRequest newReq = HttpRequest.of(req.headers(), aggregatedRequest.content()); + + // Get the response from the delegate service + HttpResponse response; + try { + response = delegate.serve(ctx, newReq); + } catch (Exception e) { + LOGGER.error( + "\n[RPC Req] {} {}, Error in delegate: {}", + req.method(), + req.path(), + e.getMessage(), + e); + return CompletableFuture.completedFuture( + HttpResponse.of(HttpStatus.INTERNAL_SERVER_ERROR)); + } + + // Log the response + return response + .aggregate() + .thenApply( + aggregatedResponse -> { + logResponse( + ctx, aggregatedResponse.status(), aggregatedResponse.contentUtf8()); + return aggregatedResponse.toHttpResponse(); + }); + }) + .exceptionally( + throwable -> { + LOGGER.error( + "\n[RPC Error] {} {}, Error: {}", + req.method(), + req.path(), + throwable.getMessage(), + throwable); + return HttpResponse.of(HttpStatus.INTERNAL_SERVER_ERROR); + })); + } + + private void logRequest(ServiceRequestContext ctx, HttpRequest req, String requestBody) { + try { + String prettyRequestBody = requestBody; + + // Pretty print JSON if possible + if (!requestBody.isEmpty()) { + String contentType = req.headers().get("Content-Type", "application/json"); + if (contentType.contains("application/json")) { + try { + Object json = OBJECT_MAPPER.readValue(requestBody, Object.class); + prettyRequestBody = + OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(json); + } catch (Exception e) { + // If parsing fails, use original body + } + } + } + + LOGGER.error( + "\n[RPC Req] {} {}, Headers: {}, Body: {}", + req.method(), + req.path(), + req.headers(), + prettyRequestBody.isEmpty() ? "" : "\n" + prettyRequestBody); + } catch (Exception e) { + LOGGER.error("\n[RPC Req] {} {}, Error: {}", req.method(), req.path(), e.getMessage(), e); + } + } + + private void logResponse(ServiceRequestContext ctx, HttpStatus status, String responseBody) { + try { + String prettyResponseBody = responseBody; + + // Pretty print JSON if possible + if (!responseBody.isEmpty()) { + try { + Object json = OBJECT_MAPPER.readValue(responseBody, Object.class); + prettyResponseBody = + OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(json); + } catch (Exception e) { + // If parsing fails, use original body + } + } + + LOGGER.error( + "\n[RPC Resp] {} {}, Status: {}, Body: {}", + ctx.request().method(), + ctx.request().path(), + status, + prettyResponseBody.isEmpty() ? "" : "\n" + prettyResponseBody); + } catch (Exception e) { + LOGGER.error( + "\n[RPC Resp] {} {}, Status: {}, Error: {}", + ctx.request().method(), + ctx.request().path(), + status, + e.getMessage(), + e); + } + } +} diff --git a/server/src/main/java/io/unitycatalog/server/persist/DeltaCommitRepository.java b/server/src/main/java/io/unitycatalog/server/persist/DeltaCommitRepository.java index e5f6701a0d..5a46869cb0 100644 --- a/server/src/main/java/io/unitycatalog/server/persist/DeltaCommitRepository.java +++ b/server/src/main/java/io/unitycatalog/server/persist/DeltaCommitRepository.java @@ -221,44 +221,56 @@ public DeltaGetCommitsResponse getCommits(DeltaGetCommits rpc) { * exceeded */ public void postCommit(DeltaCommit commit) { - serverProperties.checkManagedTableEnabled(); - validateCommit(commit); TransactionManager.executeWithTransaction( sessionFactory, session -> { - UUID tableId = UUID.fromString(commit.getTableId()); - TableInfoDAO tableInfoDAO = session.get(TableInfoDAO.class, tableId); - if (tableInfoDAO == null) { - throw new BaseException(ErrorCode.NOT_FOUND, "Table not found: " + commit.getTableId()); - } - validateTableForCommit(commit, tableInfoDAO); - List firstAndLastCommits = getFirstAndLastCommits(session, tableId); - if (firstAndLastCommits.isEmpty()) { - handleOnboardingCommit(session, tableId, tableInfoDAO, commit); - } else { - DeltaCommitDAO firstCommitDAO = firstAndLastCommits.get(0); - DeltaCommitDAO lastCommitDAO = firstAndLastCommits.get(1); - assert firstCommitDAO.getCommitVersion() <= lastCommitDAO.getCommitVersion(); - if (commit.getCommitInfo() == null) { - // This is already checked in validateCommit() - assert commit.getLatestBackfilledVersion() != null; - handleBackfillOnlyCommit( - session, - tableId, - commit.getLatestBackfilledVersion(), - firstCommitDAO.getCommitVersion(), - lastCommitDAO.getCommitVersion()); - } else { - handleNormalCommit( - session, tableId, tableInfoDAO, commit, firstCommitDAO, lastCommitDAO); - } - } + postCommit(session, commit); return null; }, "Error committing to table: " + commit.getTableId(), /* readOnly = */ false); } + /** + * Session-aware version of postCommit for use within an existing transaction. + * + * @param session the Hibernate session + * @param commit the DeltaCommit containing commit information + * @throws BaseException if the commit is invalid, table is not found, or commit limits are + * exceeded + */ + public void postCommit(Session session, DeltaCommit commit) { + serverProperties.checkManagedTableEnabled(); + validateCommit(commit); + + UUID tableId = UUID.fromString(commit.getTableId()); + TableInfoDAO tableInfoDAO = session.get(TableInfoDAO.class, tableId); + if (tableInfoDAO == null) { + throw new BaseException(ErrorCode.NOT_FOUND, "Table not found: " + commit.getTableId()); + } + validateTableForCommit(commit, tableInfoDAO); + List firstAndLastCommits = getFirstAndLastCommits(session, tableId); + if (firstAndLastCommits.isEmpty()) { + handleOnboardingCommit(session, tableId, tableInfoDAO, commit); + } else { + DeltaCommitDAO firstCommitDAO = firstAndLastCommits.get(0); + DeltaCommitDAO lastCommitDAO = firstAndLastCommits.get(1); + assert firstCommitDAO.getCommitVersion() <= lastCommitDAO.getCommitVersion(); + if (commit.getCommitInfo() == null) { + // This is already checked in validateCommit() + assert commit.getLatestBackfilledVersion() != null; + handleBackfillOnlyCommit( + session, + tableId, + commit.getLatestBackfilledVersion(), + firstCommitDAO.getCommitVersion(), + lastCommitDAO.getCommitVersion()); + } else { + handleNormalCommit(session, tableId, tableInfoDAO, commit, firstCommitDAO, lastCommitDAO); + } + } + } + /** * Handles an onboarding commit, which is the very first commit sent to Unity Catalog for a table. * diff --git a/server/src/main/java/io/unitycatalog/server/persist/TableRepository.java b/server/src/main/java/io/unitycatalog/server/persist/TableRepository.java index e937a16396..5130b71ddf 100644 --- a/server/src/main/java/io/unitycatalog/server/persist/TableRepository.java +++ b/server/src/main/java/io/unitycatalog/server/persist/TableRepository.java @@ -5,9 +5,22 @@ import io.unitycatalog.server.model.ColumnInfo; import io.unitycatalog.server.model.CreateTable; import io.unitycatalog.server.model.DataSourceFormat; +import io.unitycatalog.server.model.DeltaCommit; import io.unitycatalog.server.model.ListTablesResponse; import io.unitycatalog.server.model.TableInfo; import io.unitycatalog.server.model.TableType; +import io.unitycatalog.server.model.deltarest.AddCommitUpdate; +import io.unitycatalog.server.model.deltarest.DeltaColumn; +import io.unitycatalog.server.model.deltarest.RemovePropertiesUpdate; +import io.unitycatalog.server.model.deltarest.SetLatestBackfilledVersionUpdate; +import io.unitycatalog.server.model.deltarest.SetPropertiesUpdate; +import io.unitycatalog.server.model.deltarest.SetSchemaUpdate; +import io.unitycatalog.server.model.deltarest.SetTableCommentUpdate; +import io.unitycatalog.server.model.deltarest.StructField; +import io.unitycatalog.server.model.deltarest.TableRequirement; +import io.unitycatalog.server.model.deltarest.UpdateProtocolUpdate; +import io.unitycatalog.server.model.deltarest.UpdateTableRequest; +import io.unitycatalog.server.persist.dao.ColumnInfoDAO; import io.unitycatalog.server.persist.dao.PropertyDAO; import io.unitycatalog.server.persist.dao.SchemaInfoDAO; import io.unitycatalog.server.persist.dao.StagingTableDAO; @@ -22,10 +35,15 @@ import io.unitycatalog.server.utils.ServerProperties; import io.unitycatalog.server.utils.ValidationUtils; import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; @@ -87,6 +105,22 @@ public NormalizedURL getStorageLocationForTableOrStagingTable(UUID tableId) { /* readOnly = */ true); } + public NormalizedURL getStorageLocationForStagingTable(UUID tableId) { + return TransactionManager.executeWithTransaction( + sessionFactory, + session -> { + LOGGER.debug("Getting storage location of staging table by id: {}", tableId); + StagingTableDAO stagingTableDAO = session.get(StagingTableDAO.class, tableId); + if (stagingTableDAO != null) { + return NormalizedURL.from(stagingTableDAO.getStagingLocation()); + } + throw new BaseException( + ErrorCode.NOT_FOUND, "No staging table found with id: " + tableId); + }, + "Failed to get storage location of staging table", + /* readOnly = */ true); + } + /** * Retrieves the schema ID and catalog ID for a table or staging table by its ID. First attempts * to get IDs associated with a regular table with the given ID, then falls back to searching for @@ -132,30 +166,43 @@ public Pair getCatalogSchemaIdsByTableOrStagingTableId(UUID tableId) } public TableInfo getTable(String fullName) { - LOGGER.debug("Getting table: {}", fullName); return TransactionManager.executeWithTransaction( sessionFactory, - session -> { - String[] parts = fullName.split("\\."); - if (parts.length != 3) { - throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Invalid table name: " + fullName); - } - String catalogName = parts[0]; - String schemaName = parts[1]; - String tableName = parts[2]; - TableInfoDAO tableInfoDAO = findTable(session, catalogName, schemaName, tableName); - if (tableInfoDAO == null) { - throw new BaseException(ErrorCode.NOT_FOUND, "Table not found: " + fullName); - } - TableInfo tableInfo = tableInfoDAO.toTableInfo(true, catalogName, schemaName); - RepositoryUtils.attachProperties( - tableInfo, tableInfo.getTableId(), Constants.TABLE, session); - return tableInfo; - }, + session -> getTable(session, fullName), "Failed to get table", /* readOnly = */ true); } + /** + * Session-aware version of getTable for use within an existing transaction. + * + * @param session the Hibernate session + * @param fullName the full table name (catalog.schema.table) + * @return the TableInfo + */ + public TableInfo getTable(Session session, String fullName) { + LOGGER.debug("Getting table: {}", fullName); + String[] parts = fullName.split("\\."); + if (parts.length != 3) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Invalid table name: " + fullName); + } + String catalogName = parts[0]; + String schemaName = parts[1]; + String tableName = parts[2]; + + UUID schemaId = + repositories.getSchemaRepository().getSchemaIdOrThrow(session, catalogName, schemaName); + + TableInfoDAO tableInfoDAO = findBySchemaIdAndName(session, schemaId, tableName); + if (tableInfoDAO == null) { + throw new BaseException(ErrorCode.NOT_FOUND, "Table not found: " + fullName); + } + + TableInfo result = tableInfoDAO.toTableInfo(true, catalogName, schemaName); + RepositoryUtils.attachProperties(result, result.getTableId(), Constants.TABLE, session); + return result; + } + public String getTableUniformMetadataLocation( Session session, String catalogName, String schemaName, String tableName) { TableInfoDAO dao = findTable(session, catalogName, schemaName, tableName); @@ -385,4 +432,442 @@ public void deleteTable(Session session, UUID schemaId, String tableName) { .forEach(session::remove); session.remove(tableInfoDAO); } + + /** + * Updates table metadata including comment, properties, and columns. + * + * @param fullName the full table name (catalog.schema.table) + * @param comment optional new comment for the table + * @param properties optional properties to set (replaces existing) + * @param columns optional columns to set (replaces existing) + * @return the updated TableInfo + */ + public TableInfo updateTable( + String fullName, String comment, Map properties, List columns) { + LOGGER.debug("Updating table: {}", fullName); + return TransactionManager.executeWithTransaction( + sessionFactory, + session -> updateTable(session, fullName, comment, properties, columns), + "Failed to update table: " + fullName, + /* readOnly = */ false); + } + + /** + * Session-aware version of updateTable for use within an existing transaction. + * + * @param session the Hibernate session + * @param fullName the full table name (catalog.schema.table) + * @param comment optional new comment for the table + * @param properties optional properties to set (replaces existing) + * @param columns optional columns to set (replaces existing) + * @return the updated TableInfo + */ + public TableInfo updateTable( + Session session, + String fullName, + String comment, + Map properties, + List columns) { + LOGGER.debug("Updating table: {}", fullName); + String callerId = IdentityUtils.findPrincipalEmailAddress(); + + String[] parts = fullName.split("\\."); + if (parts.length != 3) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Invalid table name: " + fullName); + } + String catalogName = parts[0]; + String schemaName = parts[1]; + String tableName = parts[2]; + + UUID schemaId = + repositories.getSchemaRepository().getSchemaIdOrThrow(session, catalogName, schemaName); + + TableInfoDAO tableInfoDAO = findBySchemaIdAndName(session, schemaId, tableName); + if (tableInfoDAO == null) { + throw new BaseException(ErrorCode.NOT_FOUND, "Table not found: " + fullName); + } + + // Update comment if provided + if (comment != null) { + tableInfoDAO.setComment(comment); + } + + // Update properties if provided + if (properties != null) { + PropertyRepository.findProperties(session, tableInfoDAO.getId(), Constants.TABLE) + .forEach(session::remove); + session.flush(); + PropertyDAO.from(properties, tableInfoDAO.getId(), Constants.TABLE).forEach(session::persist); + } + + // Update columns if provided + if (columns != null && !columns.isEmpty()) { + List newColumns = ColumnInfoDAO.fromList(columns); + tableInfoDAO.getColumns().clear(); + session.flush(); + newColumns.forEach( + c -> { + c.setId(UUID.randomUUID()); + c.setTable(tableInfoDAO); + }); + tableInfoDAO.getColumns().addAll(newColumns); + } + + tableInfoDAO.setUpdatedBy(callerId); + tableInfoDAO.setUpdatedAt(new Date()); + session.merge(tableInfoDAO); + + TableInfo result = tableInfoDAO.toTableInfo(true, catalogName, schemaName); + RepositoryUtils.attachProperties(result, result.getTableId(), Constants.TABLE, session); + return result; + } + + /** + * Updates a table by processing Delta REST requirements and updates in a single transaction. This + * method handles the complete update flow: get table, validate requirements, process updates, + * return updated table. + * + * @param fullName the full table name (catalog.schema.table) + * @param request the Delta REST update request containing requirements and updates + * @return the updated TableInfo + */ + public TableInfo updateTableWithDeltaRestRequest(String fullName, UpdateTableRequest request) { + + LOGGER.debug("Updating table with Delta REST request: {}", fullName); + + return TransactionManager.executeWithTransaction( + sessionFactory, + session -> { + // Get table within transaction + TableInfo info = getTable(session, fullName); + + // Validate requirements before processing updates + List requirements = request.getRequirements(); + if (requirements != null) { + for (TableRequirement requirement : requirements) { + validateTableRequirement(info, requirement); + } + } + + // Process updates within the same transaction + // Note: updates are deserialized as concrete types (AddCommitUpdate, etc.) + // that don't extend TableUpdate, so we use List to avoid ClassCastException + @SuppressWarnings("unchecked") + List updates = (List) (List) request.getUpdates(); + if (updates != null && !updates.isEmpty()) { + // Phase 1: Extract and accumulate all updates + AccumulatedTableUpdates accumulated = accumulateTableUpdates(info, updates); + + // Phase 2: Apply accumulated updates + processTableUpdates(session, info, fullName, accumulated); + } + + // Reload table info after updates within same transaction + return getTable(session, fullName); + }, + "Failed to update table: " + fullName, + /* readOnly = */ false); + } + + /** + * Accumulates all table updates from a list into a single consolidated structure. + * + * @param tableInfo the current table info + * @param updates the list of updates to accumulate (concrete types like AddCommitUpdate) + * @return accumulated updates ready for processing + * @throws BaseException if duplicate update actions are found + */ + private AccumulatedTableUpdates accumulateTableUpdates( + TableInfo tableInfo, List updates) { + AccumulatedTableUpdates accumulated = new AccumulatedTableUpdates(tableInfo); + Set seenActions = new HashSet<>(); + + for (Object updateObj : updates) { + // Handle each concrete update type since they don't extend a common base class + String action = null; + + if (updateObj instanceof AddCommitUpdate addCommit) { + action = addCommit.getAction().getValue(); + if (addCommit.getCommit() != null) { + accumulated.setCommit(addCommit.getCommit()); + } + } else if (updateObj instanceof SetLatestBackfilledVersionUpdate backfillUpdate) { + action = backfillUpdate.getAction().getValue(); + if (backfillUpdate.getLatestPublishedVersion() != null) { + accumulated.setLatestBackfilledVersion(backfillUpdate.getLatestPublishedVersion()); + } + } else if (updateObj instanceof SetPropertiesUpdate setProps) { + action = setProps.getAction().getValue(); + if (setProps.getUpdates() != null && !setProps.getUpdates().isEmpty()) { + accumulated.setProperties(setProps.getUpdates()); + } + } else if (updateObj instanceof RemovePropertiesUpdate removeProps) { + action = removeProps.getAction().getValue(); + if (removeProps.getRemovals() != null && !removeProps.getRemovals().isEmpty()) { + accumulated.setRemoveProperties(removeProps.getRemovals()); + } + } else if (updateObj instanceof SetSchemaUpdate setSchema) { + action = setSchema.getAction().getValue(); + if (setSchema.getSchema() != null && !setSchema.getSchema().isEmpty()) { + accumulated.setSchema(setSchema.getSchema()); + } + } else if (updateObj instanceof SetTableCommentUpdate setComment) { + action = setComment.getAction().getValue(); + if (setComment.getComment() != null) { + accumulated.setComment(setComment.getComment()); + } + } else if (updateObj instanceof UpdateProtocolUpdate protocolUpdate) { + action = protocolUpdate.getAction().getValue(); + // Protocol updates are stored in Delta log, not in UC metadata + } else { + throw new BaseException( + ErrorCode.INVALID_ARGUMENT, + "Unknown update type: " + updateObj.getClass().getName()); + } + + if (action == null) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Update action cannot be null"); + } + + // Check for duplicate actions + if (seenActions.contains(action)) { + throw new BaseException( + ErrorCode.INVALID_ARGUMENT, + String.format("Duplicate update action not allowed: %s", action)); + } + seenActions.add(action); + } + + return accumulated; + } + + /** Container class for accumulated table updates extracted from multiple TableUpdate objects. */ + @lombok.Getter + private static class AccumulatedTableUpdates { + private final TableInfo tableInfo; + private Optional commit = Optional.empty(); + private Optional latestBackfilledVersion = Optional.empty(); + private Optional> setProperties = Optional.empty(); + private Optional> removeProperties = Optional.empty(); + private Optional> schema = Optional.empty(); + private Optional comment = Optional.empty(); + + AccumulatedTableUpdates(TableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + void setCommit(io.unitycatalog.server.model.deltarest.DeltaCommit commit) { + this.commit = Optional.of(commit); + } + + void setLatestBackfilledVersion(Long version) { + this.latestBackfilledVersion = Optional.of(version); + } + + void setProperties(Map properties) { + this.setProperties = Optional.of(properties); + } + + void setRemoveProperties(List properties) { + this.removeProperties = Optional.of(properties); + } + + void setSchema(List schema) { + this.schema = Optional.of(schema); + } + + void setComment(String comment) { + this.comment = Optional.of(comment); + } + } + + /** + * Validates a table requirement for Delta REST updates. + * + * @param tableInfo the current table info + * @param requirement the requirement to validate + * @throws BaseException if the requirement is not met + */ + private void validateTableRequirement(TableInfo tableInfo, TableRequirement requirement) { + String type = requirement.getType(); + if (type == null) { + return; + } + + switch (type) { + case "assert-table-uuid": + UUID expectedUuid = requirement.getUuid(); + if (expectedUuid != null) { + UUID actualUuid = UUID.fromString(tableInfo.getTableId()); + if (!expectedUuid.equals(actualUuid)) { + throw new BaseException( + ErrorCode.FAILED_PRECONDITION, + String.format( + "Requirement failed: assert-table-uuid. Expected UUID %s but found %s", + expectedUuid, actualUuid)); + } + } + break; + + case "assert-etag": + String expectedEtag = requirement.getEtag(); + if (expectedEtag != null) { + String actualEtag = generateEtag(tableInfo); + if (!expectedEtag.equals(actualEtag)) { + throw new BaseException( + ErrorCode.ABORTED, + String.format( + "Requirement failed: assert-etag. Expected etag %s but found %s", + expectedEtag, actualEtag)); + } + } + break; + + default: + // Unknown requirement type, skip + break; + } + } + + /** + * Applies accumulated table updates by persisting them to the database. + * + * @param session the Hibernate session + * @param tableInfo the current table info + * @param fullName the full table name + * @param accumulated the accumulated updates to apply + */ + private void processTableUpdates( + Session session, TableInfo tableInfo, String fullName, AccumulatedTableUpdates accumulated) { + DeltaCommit deltaCommit = new DeltaCommit(); + + // Apply delta commit operation + accumulated + .getCommit() + .ifPresent( + commitInfo -> { + io.unitycatalog.server.model.DeltaCommitInfo ucCommitInfo = + new io.unitycatalog.server.model.DeltaCommitInfo() + .version(commitInfo.getVersion()) + .timestamp(commitInfo.getTimestamp()) + .fileName(commitInfo.getFileName()) + .fileSize(commitInfo.getFileSize()) + .fileModificationTimestamp(commitInfo.getFileModificationTimestamp()); + + deltaCommit + .commitInfo(ucCommitInfo); + }); + + // Apply latest backfilled version operation + accumulated.getLatestBackfilledVersion().ifPresent(deltaCommit::setLatestBackfilledVersion); + + if (deltaCommit.getCommitInfo() != null || deltaCommit.getLatestBackfilledVersion() != null) { + deltaCommit.tableId(tableInfo.getTableId()).tableUri(tableInfo.getStorageLocation()); + repositories.getDeltaCommitRepository().postCommit(session, deltaCommit); + } + + // Apply table metadata updates (properties, schema, comment) in a single operation + boolean hasMetadataChanges = + accumulated.getSetProperties().isPresent() + || accumulated.getRemoveProperties().isPresent() + || accumulated.getSchema().isPresent() + || accumulated.getComment().isPresent(); + + if (hasMetadataChanges) { + String comment = accumulated.getComment().orElse(null); + + // Compute final properties by applying set and remove operations + Map properties = null; + if (accumulated.getSetProperties().isPresent() + || accumulated.getRemoveProperties().isPresent()) { + Map finalProperties = + tableInfo.getProperties() != null + ? new HashMap<>(tableInfo.getProperties()) + : new HashMap<>(); + + // Apply property additions + accumulated.getSetProperties().ifPresent(finalProperties::putAll); + + // Apply property removals + accumulated + .getRemoveProperties() + .ifPresent(toRemove -> toRemove.forEach(finalProperties::remove)); + + properties = finalProperties; + } + + List columns = + accumulated + .getSchema() + .map(TableRepository::convertDeltaColumnsToColumnInfoList) + .orElse(null); + + updateTable(session, fullName, comment, properties, columns); + } + } + + /** + * Converts Delta REST DeltaColumn list to Unity Catalog ColumnInfo list. + * + * @param schemaColumns the Delta REST column list + * @return the Unity Catalog column info list + */ + public static List convertDeltaColumnsToColumnInfoList( + List schemaColumns) { + List columns = new ArrayList<>(); + int position = 0; + for (DeltaColumn col : schemaColumns) { + ColumnInfo columnInfo = new ColumnInfo(); + + // Extract all information from type-json (StructField) + StructField typeJson = col.getTypeJson(); + columnInfo.typeJson(typeJson.toString()); + columnInfo.name(typeJson.getName()); + Object typeObj = typeJson.getType(); + if (typeObj instanceof String) { + String jsonType = (String) typeObj; + // Map JSON type names to ColumnTypeName enum + io.unitycatalog.server.model.ColumnTypeName typeName = switch (jsonType.toLowerCase()) { + case "integer" -> io.unitycatalog.server.model.ColumnTypeName.INT; + case "long" -> io.unitycatalog.server.model.ColumnTypeName.LONG; + case "short" -> io.unitycatalog.server.model.ColumnTypeName.SHORT; + case "byte" -> io.unitycatalog.server.model.ColumnTypeName.BYTE; + case "float" -> io.unitycatalog.server.model.ColumnTypeName.FLOAT; + case "double" -> io.unitycatalog.server.model.ColumnTypeName.DOUBLE; + case "boolean" -> io.unitycatalog.server.model.ColumnTypeName.BOOLEAN; + case "string" -> io.unitycatalog.server.model.ColumnTypeName.STRING; + case "binary" -> io.unitycatalog.server.model.ColumnTypeName.BINARY; + case "date" -> io.unitycatalog.server.model.ColumnTypeName.DATE; + case "timestamp" -> io.unitycatalog.server.model.ColumnTypeName.TIMESTAMP; + case "timestamp_ntz" -> io.unitycatalog.server.model.ColumnTypeName.TIMESTAMP_NTZ; + default -> io.unitycatalog.server.model.ColumnTypeName.STRING; + }; + columnInfo.typeName(typeName); + columnInfo.typeText(jsonType); + } + columnInfo.nullable(typeJson.getNullable() != null ? typeJson.getNullable() : true); + columnInfo.comment(typeJson.getComment()); + + // Position is computed on-demand from array index + columnInfo.position(position); + position++; + columns.add(columnInfo); + } + return columns; + } + + /** + * Generates an etag for a table based on its ID and update timestamp. + * + * @param tableInfo the table info + * @return the generated etag + */ + public String generateEtag(TableInfo tableInfo) { + // Generate a simple etag based on table info + String data = + tableInfo.getTableId() + + ":" + + (tableInfo.getUpdatedAt() != null ? tableInfo.getUpdatedAt() : 0); + return "\"" + Integer.toHexString(data.hashCode()) + "\""; + } } diff --git a/server/src/main/java/io/unitycatalog/server/persist/utils/ExternalLocationUtils.java b/server/src/main/java/io/unitycatalog/server/persist/utils/ExternalLocationUtils.java index 0660f3f64c..b54d7e3f13 100644 --- a/server/src/main/java/io/unitycatalog/server/persist/utils/ExternalLocationUtils.java +++ b/server/src/main/java/io/unitycatalog/server/persist/utils/ExternalLocationUtils.java @@ -77,14 +77,14 @@ SecurableType.REGISTERED_MODEL, new DaoClassInfo(RegisteredModelInfoDAO.class, " *
  • If the URL is a parent path of one or more securable, we can not figure out the actual * owner but have to deny the access *
  • If the URL is under or the same path of any data securable, we'll figure out the UUID of - * that data securable along with its catalog&schema UUIDs and return the result. + * that data securable along with its catalog and schema UUIDs and return the result. *
  • If the URL is not owned by any data securable but only by external locations, return UUID * of that external location. *
  • Lastly if no securable is found, return empty map. * * * @param url the input URL to search securables for - * @return A map of SecurableType->UUID. For external location, this will be a 1-entry map. For + * @return A map of SecurableType->UUID. For external location, this will be a 1-entry map. For * data securables, this will be a 3-entry map. */ public Map getMapResourceIdsForPath(NormalizedURL url) { diff --git a/server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestCatalogService.java b/server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestCatalogService.java new file mode 100644 index 0000000000..d9226eddf1 --- /dev/null +++ b/server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestCatalogService.java @@ -0,0 +1,1017 @@ +package io.unitycatalog.server.service.deltarest; + +import static io.unitycatalog.server.model.SecurableType.CATALOG; +import static io.unitycatalog.server.model.SecurableType.METASTORE; +import static io.unitycatalog.server.model.SecurableType.SCHEMA; +import static io.unitycatalog.server.model.SecurableType.TABLE; +import static io.unitycatalog.server.service.credential.CredentialContext.Privilege.SELECT; +import static io.unitycatalog.server.service.credential.CredentialContext.Privilege.UPDATE; + +import com.linecorp.armeria.common.HttpResponse; +import com.linecorp.armeria.common.HttpStatus; +import com.linecorp.armeria.server.annotation.Delete; +import com.linecorp.armeria.server.annotation.ExceptionHandler; +import com.linecorp.armeria.server.annotation.Get; +import com.linecorp.armeria.server.annotation.Head; +import com.linecorp.armeria.server.annotation.Param; +import com.linecorp.armeria.server.annotation.Post; +import com.linecorp.armeria.server.annotation.ProducesJson; +import io.unitycatalog.server.auth.UnityCatalogAuthorizer; +import io.unitycatalog.server.auth.annotation.AuthorizeExpression; +import io.unitycatalog.server.auth.annotation.AuthorizeResourceKey; +import io.unitycatalog.server.exception.BaseException; +import io.unitycatalog.server.exception.ErrorCode; +import io.unitycatalog.server.model.CatalogInfo; +import io.unitycatalog.server.model.CreateSchema; +import io.unitycatalog.server.model.CreateStagingTable; +import io.unitycatalog.server.model.CreateTable; +import io.unitycatalog.server.model.DataSourceFormat; +import io.unitycatalog.server.model.DeltaGetCommits; +import io.unitycatalog.server.model.DeltaGetCommitsResponse; +import io.unitycatalog.server.model.ListSchemasResponse; +import io.unitycatalog.server.model.ListTablesResponse; +import io.unitycatalog.server.model.SchemaInfo; +import io.unitycatalog.server.model.StagingTableInfo; +import io.unitycatalog.server.model.TableInfo; +import io.unitycatalog.server.model.TableType; +import io.unitycatalog.server.model.TemporaryCredentials; +import io.unitycatalog.server.model.UpdateSchema; +import io.unitycatalog.server.model.deltarest.CatalogConfig; +import io.unitycatalog.server.model.deltarest.CatalogConfigOverrides; +import io.unitycatalog.server.model.deltarest.CreateNamespaceRequest; +import io.unitycatalog.server.model.deltarest.CreateStagingTableRequest; +import io.unitycatalog.server.model.deltarest.CreateTableRequest; +import io.unitycatalog.server.model.deltarest.CredentialsResponse; +import io.unitycatalog.server.model.deltarest.DeltaColumn; +import io.unitycatalog.server.model.deltarest.DeltaProtocol; +import io.unitycatalog.server.model.deltarest.ListNamespacesResponse; +import io.unitycatalog.server.model.deltarest.TableIdentifierWithDataSourceFormat; +import io.unitycatalog.server.model.deltarest.LoadTableResponse; +import io.unitycatalog.server.model.deltarest.NamespaceResponse; +import io.unitycatalog.server.model.deltarest.RenameTableRequest; +import io.unitycatalog.server.model.deltarest.ReportMetricsRequest; +import io.unitycatalog.server.model.deltarest.StagingTableResponse; +import io.unitycatalog.server.model.deltarest.StorageCredential; +import io.unitycatalog.server.model.deltarest.StructField; +import io.unitycatalog.server.model.deltarest.TableMetadata; +import io.unitycatalog.server.model.deltarest.UpdateNamespacePropertiesRequest; +import io.unitycatalog.server.model.deltarest.UpdateNamespacePropertiesResponse; +import io.unitycatalog.server.model.deltarest.UpdateTableRequest; +import io.unitycatalog.server.persist.CatalogRepository; +import io.unitycatalog.server.persist.DeltaCommitRepository; +import io.unitycatalog.server.persist.MetastoreRepository; +import io.unitycatalog.server.persist.Repositories; +import io.unitycatalog.server.persist.SchemaRepository; +import io.unitycatalog.server.persist.StagingTableRepository; +import io.unitycatalog.server.persist.TableRepository; +import io.unitycatalog.server.service.AuthorizedService; +import io.unitycatalog.server.service.credential.CredentialContext; +import io.unitycatalog.server.service.credential.StorageCredentialVendor; +import io.unitycatalog.server.utils.NormalizedURL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import lombok.SneakyThrows; + +/** + * Delta REST Catalog Service - IRC-style API for Delta tables. + * + *

    This service follows the Apache Iceberg REST Catalog (IRC) API style but is Delta-centric. + * Unlike the actual IRC APIs implemented for UniForm and Managed Iceberg tables, this API + * does not provide a translation layer to make Delta tables work like Iceberg tables. + */ +@ExceptionHandler(DeltaRestExceptionHandler.class) +public class DeltaRestCatalogService extends AuthorizedService { + + private static final String PREFIX_TEMPLATE = "catalogs/%s"; + + private static final List ENDPOINTS = List.of( + // Table operations + "POST /v1/{prefix}/namespaces/{namespace}/staging-tables", + "POST /v1/{prefix}/namespaces/{namespace}/tables", + "GET /v1/{prefix}/namespaces/{namespace}/tables", + "GET /v1/{prefix}/namespaces/{namespace}/tables/{table}", + "POST /v1/{prefix}/namespaces/{namespace}/tables/{table}", + "DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}", + "HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}", + "POST /v1/{prefix}/tables/rename", + "GET /v1/{prefix}/namespaces/{namespace}/tables/{table}/credentials", + "GET /v1/{prefix}/namespaces/{namespace}/staging-tables/{table_id}/credentials", + "POST /v1/temporary-path-credentials", + // Schema operations + "GET /v1/{prefix}/namespaces", + "GET /v1/{prefix}/namespaces/{namespace}", + "POST /v1/{prefix}/namespaces", + "POST /v1/{prefix}/namespaces/{namespace}/properties", + "DELETE /v1/{prefix}/namespaces/{namespace}", + "HEAD /v1/{prefix}/namespaces/{namespace}", + // Metrics + "POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics" + ); + + private final TableRepository tableRepository; + private final SchemaRepository schemaRepository; + private final CatalogRepository catalogRepository; + private final MetastoreRepository metastoreRepository; + private final StagingTableRepository stagingTableRepository; + private final DeltaCommitRepository deltaCommitRepository; + private final StorageCredentialVendor storageCredentialVendor; + + @SneakyThrows + public DeltaRestCatalogService( + UnityCatalogAuthorizer authorizer, + Repositories repositories, + StorageCredentialVendor storageCredentialVendor) { + super(authorizer, repositories); + this.tableRepository = repositories.getTableRepository(); + this.schemaRepository = repositories.getSchemaRepository(); + this.catalogRepository = repositories.getCatalogRepository(); + this.metastoreRepository = repositories.getMetastoreRepository(); + this.stagingTableRepository = repositories.getStagingTableRepository(); + this.deltaCommitRepository = repositories.getDeltaCommitRepository(); + this.storageCredentialVendor = storageCredentialVendor; + } + + // ==================== Configuration API ==================== + + @Get("/v1/config") + @ProducesJson + public HttpResponse getConfig(@Param("catalog") String catalog) { + if (catalog == null || catalog.isEmpty()) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, + "Must supply a proper catalog in catalog parameter."); + } + + // Verify catalog exists + catalogRepository.getCatalog(catalog); + + CatalogConfigOverrides overrides = new CatalogConfigOverrides(); + overrides.setPrefix(String.format(PREFIX_TEMPLATE, catalog)); + + CatalogConfig response = new CatalogConfig(); + response.setOverrides(overrides); + response.setEndpoints(ENDPOINTS); + response.setManagedTablesRequiredFeatures(List.of( + "appendOnly", "catalogManaged", "deletionVectors", "inCommitTimestamp", + "invariants", "v2Checkpoint", "vacuumProtocolCheck" + )); + response.setManagedTablesSuggestedFeatures(List.of( + "rowTracking", "domainMetadata" + )); + + return HttpResponse.ofJson(response); + } + + // ==================== Staging Table APIs ==================== + + @Post("/v1/catalogs/{catalog}/namespaces/{namespace}/staging-tables") + @ProducesJson + @AuthorizeExpression(""" + (#authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) + && #authorize(#principal, #schema, OWNER)) || + (#authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) + && #authorizeAll(#principal, #schema, USE_SCHEMA, CREATE_TABLE)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse createStagingTable( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + CreateStagingTableRequest request) { + + CreateStagingTable createStagingTable = new CreateStagingTable() + .catalogName(catalog) + .schemaName(namespace) + .name(request.getName()); + + StagingTableInfo stagingTableInfo = + stagingTableRepository.createStagingTable(createStagingTable); + + SchemaInfo schemaInfo = schemaRepository.getSchema(catalog + "." + namespace); + initializeHierarchicalAuthorization(stagingTableInfo.getId(), schemaInfo.getSchemaId()); + + // Build response in DRC format + StagingTableResponse response = new StagingTableResponse(); + response.setTableId(UUID.fromString(stagingTableInfo.getId())); + response.setTableType(StagingTableResponse.TableTypeEnum.MANAGED); + response.setLocation(stagingTableInfo.getStagingLocation()); + + // Vend initial credentials + NormalizedURL storageLocation = NormalizedURL.from(stagingTableInfo.getStagingLocation()); + TemporaryCredentials credentials = + storageCredentialVendor.vendCredential(storageLocation, Set.of(SELECT, UPDATE)); + response.setStorageCredentials( + buildStorageCredentials(stagingTableInfo.getStagingLocation(), credentials)); + + return HttpResponse.ofJson(response); + } + + @Get("/v1/catalogs/{catalog}/namespaces/{namespace}/staging-tables/{table_id}/credentials") + @ProducesJson + @AuthorizeExpression(""" + #authorizeAny(#principal, #schema, OWNER, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, MODIFY) + """) + public HttpResponse getStagingTableCredentials( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table_id") @AuthorizeResourceKey(TABLE) String tableId) { + + NormalizedURL storageLocation = + tableRepository.getStorageLocationForStagingTable(UUID.fromString(tableId)); + TemporaryCredentials credentials = + storageCredentialVendor.vendCredential(storageLocation, Set.of(SELECT, UPDATE)); + + CredentialsResponse response = + new CredentialsResponse() + .storageCredentials(buildStorageCredentials(storageLocation.toString(), credentials)); + + return HttpResponse.ofJson(response); + } + + // ==================== Table CRUD APIs ==================== + + @Post("/v1/catalogs/{catalog}/namespaces/{namespace}/tables") + @ProducesJson + @AuthorizeExpression(""" + #authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) && + (#authorize(#principal, #schema, OWNER) || + #authorizeAll(#principal, #schema, USE_SCHEMA, CREATE_TABLE)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse createTable( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + CreateTableRequest request) { + + String tableName = request.getName(); + String location = request.getLocation(); + + if (tableName == null || tableName.isEmpty()) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Table name is required"); + } + if (location == null || location.isEmpty()) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Location is required"); + } + + // Get table type from request + CreateTableRequest.TableTypeEnum reqTableType = request.getTableType(); + if (reqTableType == null) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Table type is required"); + } + TableType tableType = TableType.fromValue(reqTableType.getValue()); + + io.unitycatalog.server.model.deltarest.DataSourceFormat reqFormat = + request.getDataSourceFormat(); + String formatValue = reqFormat != null ? reqFormat.getValue() : "DELTA"; + + CreateTable createTable = new CreateTable() + .catalogName(catalog) + .schemaName(namespace) + .name(tableName) + .tableType(tableType) + .dataSourceFormat(DataSourceFormat.fromValue(formatValue)) + .storageLocation(location); + + // Handle comment if provided + String comment = request.getComment(); + if (comment != null) { + createTable.comment(comment); + } + + // Handle columns/schema if provided + List schemaColumns = request.getSchema(); + if (schemaColumns != null && !schemaColumns.isEmpty()) { + createTable.columns(TableRepository.convertDeltaColumnsToColumnInfoList(schemaColumns)); + } + + // Handle properties if provided + Map properties = request.getProperties(); + if (properties != null) { + createTable.properties(properties); + } + + TableInfo tableInfo = tableRepository.createTable(createTable); + + SchemaInfo schemaInfo = schemaRepository.getSchema(catalog + "." + namespace); + initializeHierarchicalAuthorization(tableInfo.getTableId(), schemaInfo.getSchemaId()); + + return HttpResponse.ofJson(buildLoadTableResponse(tableInfo, false)); + } + + @Get("/v1/catalogs/{catalog}/namespaces/{namespace}/tables") + @ProducesJson + @AuthorizeExpression("#defer") + public HttpResponse listTables( + @Param("catalog") String catalog, + @Param("namespace") String namespace, + @Param("pageToken") Optional pageToken) { + + // Get UC tables response + ListTablesResponse ucResponse = tableRepository.listTables( + catalog, namespace, Optional.of(1000), pageToken, false, false); + + filterTables(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, OWNER) && #authorize(#principal, #catalog, USE_CATALOG)) || + (#authorize(#principal, #schema, USE_SCHEMA) && + #authorize(#principal, #catalog, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, SELECT, MODIFY)) + """, ucResponse.getTables()); + + // Convert to DRC format + List identifiers = new ArrayList<>(); + if (ucResponse.getTables() != null) { + for (TableInfo table : ucResponse.getTables()) { + TableIdentifierWithDataSourceFormat identifier = new TableIdentifierWithDataSourceFormat(); + identifier.setNamespace(List.of(table.getSchemaName())); + identifier.setName(table.getName()); + + // Convert UC DataSourceFormat to DRC DataSourceFormat + io.unitycatalog.server.model.deltarest.DataSourceFormat drcFormat = + table.getDataSourceFormat() != null + ? io.unitycatalog.server.model.deltarest.DataSourceFormat.fromValue( + table.getDataSourceFormat().getValue()) + : io.unitycatalog.server.model.deltarest.DataSourceFormat.DELTA; + identifier.setDataSourceFormat(drcFormat); + + identifiers.add(identifier); + } + } + + // Build DRC response + io.unitycatalog.server.model.deltarest.ListTablesResponse drcResponse = + new io.unitycatalog.server.model.deltarest.ListTablesResponse(); + drcResponse.setIdentifiers(identifiers); + drcResponse.setNextPageToken(ucResponse.getNextPageToken()); + + return HttpResponse.ofJson(drcResponse); + } + + @Get("/v1/catalogs/{catalog}/namespaces/{namespace}/tables/{table}") + @ProducesJson + @AuthorizeExpression(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, OWNER) && #authorize(#principal, #catalog, USE_CATALOG)) || + (#authorize(#principal, #schema, USE_SCHEMA) && + #authorize(#principal, #catalog, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, SELECT, MODIFY)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse loadTable( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table") @AuthorizeResourceKey(TABLE) String table, + @Param("with_credentials") Optional withCredentials) { + + String fullName = catalog + "." + namespace + "." + table; + TableInfo tableInfo = tableRepository.getTable(fullName); + + return HttpResponse.ofJson(buildLoadTableResponse(tableInfo, withCredentials.orElse(false))); + } + + @Post("/v1/catalogs/{catalog}/namespaces/{namespace}/tables/{table}") + @ProducesJson + @AuthorizeExpression(""" + #authorizeAny(#principal, #schema, OWNER, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, MODIFY) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse updateTable( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table") @AuthorizeResourceKey(TABLE) String tableName, + UpdateTableRequest request) { + + String fullName = catalog + "." + namespace + "." + tableName; + + // Execute all operations in a single transaction via repository + TableInfo tableInfo = tableRepository.updateTableWithDeltaRestRequest(fullName, request); + + return HttpResponse.ofJson(buildLoadTableResponse(tableInfo, false)); + } + + @Delete("/v1/catalogs/{catalog}/namespaces/{namespace}/tables/{table}") + @AuthorizeExpression(""" + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, OWNER) && #authorize(#principal, #catalog, USE_CATALOG)) || + (#authorize(#principal, #schema, USE_SCHEMA) && + #authorize(#principal, #catalog, USE_CATALOG) && + #authorize(#principal, #table, OWNER)) + """) + public HttpResponse deleteTable( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table") @AuthorizeResourceKey(TABLE) String table) { + + String fullName = catalog + "." + namespace + "." + table; + TableInfo tableInfo = tableRepository.getTable(fullName); + tableRepository.deleteTable(fullName); + + SchemaInfo schemaInfo = schemaRepository.getSchema(catalog + "." + namespace); + removeHierarchicalAuthorizations(tableInfo.getTableId(), schemaInfo.getSchemaId()); + + return HttpResponse.of(HttpStatus.NO_CONTENT); + } + + @Head("/v1/catalogs/{catalog}/namespaces/{namespace}/tables/{table}") + @AuthorizeExpression(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, OWNER) && #authorize(#principal, #catalog, USE_CATALOG)) || + (#authorize(#principal, #schema, USE_SCHEMA) && + #authorize(#principal, #catalog, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, SELECT, MODIFY)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse tableExists( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table") @AuthorizeResourceKey(TABLE) String table) { + + String fullName = catalog + "." + namespace + "." + table; + try { + tableRepository.getTable(fullName); + return HttpResponse.of(HttpStatus.NO_CONTENT); + } catch (BaseException e) { + if (e.getErrorCode() == ErrorCode.NOT_FOUND) { + return HttpResponse.of(HttpStatus.NOT_FOUND); + } + throw e; + } + } + + @Post("/v1/catalogs/{catalog}/tables/rename") + @AuthorizeExpression(""" + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, OWNER) && #authorize(#principal, #catalog, USE_CATALOG)) + """) + public HttpResponse renameTable( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + RenameTableRequest request) { + + if (request.getSource() == null || request.getDestination() == null) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Source and destination are required"); + } + + List sourceNamespace = request.getSource().getNamespace(); + String sourceName = request.getSource().getName(); + List destNamespace = request.getDestination().getNamespace(); + String destName = request.getDestination().getName(); + + // For now, only support rename within same namespace + String sourceFullName = catalog + "." + sourceNamespace.get(0) + "." + sourceName; + String destFullName = catalog + "." + destNamespace.get(0) + "." + destName; + + // TODO: Implement table rename in TableRepository + throw new BaseException(ErrorCode.UNIMPLEMENTED, "Table rename not yet implemented"); + } + + // ==================== Table Credentials API ==================== + + @Get("/v1/catalogs/{catalog}/namespaces/{namespace}/tables/{table}/credentials") + @ProducesJson + @AuthorizeExpression(""" + #authorizeAny(#principal, #schema, OWNER, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, SELECT, MODIFY) + """) + public HttpResponse getTableCredentials( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table") @AuthorizeResourceKey(TABLE) String table) { + + String fullName = catalog + "." + namespace + "." + table; + TableInfo tableInfo = tableRepository.getTable(fullName); + + NormalizedURL storageLocation = NormalizedURL.from(tableInfo.getStorageLocation()); + TemporaryCredentials credentials = + storageCredentialVendor.vendCredential(storageLocation, Set.of(SELECT, UPDATE)); + + CredentialsResponse response = + new CredentialsResponse() + .storageCredentials( + buildStorageCredentials(tableInfo.getStorageLocation(), credentials)); + + return HttpResponse.ofJson(response); + } + + @Get("/v1/temporary-path-credentials") + @ProducesJson + @AuthorizeExpression("#authorize(#principal, #metastore, OWNER)") + @AuthorizeResourceKey(METASTORE) + public HttpResponse getTemporaryPathCredentials( + @Param("location") String location, + @Param("operation") String operation) { + + if (location == null || location.isEmpty()) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Location is required"); + } + + Set privileges = Set.of(SELECT, UPDATE); + if ("PATH_CREATE_TABLE".equals(operation)) { + privileges = Set.of(SELECT, UPDATE); + } + + NormalizedURL storageLocation = NormalizedURL.from(location); + TemporaryCredentials credentials = + storageCredentialVendor.vendCredential(storageLocation, privileges); + + CredentialsResponse response = + new CredentialsResponse() + .storageCredentials(buildStorageCredentials(location, credentials)); + + return HttpResponse.ofJson(response); + } + + // ==================== Namespace (Schema) APIs ==================== + + @Get("/v1/catalogs/{catalog}/namespaces") + @ProducesJson + @AuthorizeExpression("#defer") + public HttpResponse listNamespaces( + @Param("catalog") String catalog, + @Param("pageToken") Optional pageToken) { + + ListSchemasResponse response = schemaRepository.listSchemas( + catalog, Optional.of(1000), pageToken); + + filterSchemas(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, OWNER, USE_CATALOG)) + """, response.getSchemas()); + + List> namespaces = new ArrayList<>(); + if (response.getSchemas() != null) { + for (SchemaInfo schema : response.getSchemas()) { + namespaces.add(List.of(schema.getName())); + } + } + + ListNamespacesResponse result = new ListNamespacesResponse(); + result.setNamespaces(namespaces); + result.setNextPageToken(response.getNextPageToken()); + + return HttpResponse.ofJson(result); + } + + @Get("/v1/catalogs/{catalog}/namespaces/{namespace}") + @ProducesJson + @AuthorizeExpression(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorizeAny(#principal, #schema, OWNER, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, USE_CATALOG)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse loadNamespace( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace) { + + String fullName = catalog + "." + namespace; + SchemaInfo schemaInfo = schemaRepository.getSchema(fullName); + + NamespaceResponse response = new NamespaceResponse(); + response.setNamespace(List.of(schemaInfo.getName())); + + Map properties = new HashMap<>(); + if (schemaInfo.getOwner() != null) { + properties.put("owner", schemaInfo.getOwner()); + } + if (schemaInfo.getCreatedAt() != null) { + properties.put("created_at", schemaInfo.getCreatedAt().toString()); + } + if (schemaInfo.getComment() != null) { + properties.put("description", schemaInfo.getComment()); + } + if (schemaInfo.getSchemaId() != null) { + properties.put("io.unitycatalog.schemaId", schemaInfo.getSchemaId()); + } + if (schemaInfo.getProperties() != null) { + properties.putAll(schemaInfo.getProperties()); + } + response.setProperties(properties); + + return HttpResponse.ofJson(response); + } + + @Post("/v1/catalogs/{catalog}/namespaces") + @ProducesJson + @AuthorizeExpression(""" + #authorize(#principal, #catalog, OWNER) || + #authorizeAll(#principal, #catalog, USE_CATALOG, CREATE_SCHEMA) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse createNamespace( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + CreateNamespaceRequest request) { + + List namespace = request.getNamespace(); + Map properties = request.getProperties(); + + if (namespace == null || namespace.isEmpty()) { + throw new BaseException(ErrorCode.INVALID_ARGUMENT, "Namespace is required"); + } + + String schemaName = namespace.get(0); + CreateSchema createSchema = new CreateSchema() + .catalogName(catalog) + .name(schemaName); + + if (properties != null) { + if (properties.containsKey("description")) { + createSchema.comment(properties.get("description")); + } + createSchema.properties(properties); + } + + SchemaInfo schemaInfo = schemaRepository.createSchema(createSchema); + + CatalogInfo catalogInfo = catalogRepository.getCatalog(catalog); + initializeHierarchicalAuthorization(schemaInfo.getSchemaId(), catalogInfo.getId()); + + NamespaceResponse response = new NamespaceResponse(); + response.setNamespace(List.of(schemaInfo.getName())); + + Map responseProperties = new HashMap<>(); + if (schemaInfo.getOwner() != null) { + responseProperties.put("owner", schemaInfo.getOwner()); + } + if (schemaInfo.getSchemaId() != null) { + responseProperties.put("io.unitycatalog.schemaId", schemaInfo.getSchemaId()); + } + if (properties != null) { + responseProperties.putAll(properties); + } + response.setProperties(responseProperties); + + return HttpResponse.ofJson(response); + } + + @Post("/v1/catalogs/{catalog}/namespaces/{namespace}/properties") + @ProducesJson + @AuthorizeExpression(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #schema, OWNER) || + #authorizeAll(#principal, #catalog, USE_CATALOG, USE_SCHEMA) || + (#authorize(#principal, #schema, USE_SCHEMA) && + #authorize(#principal, #catalog, USE_CATALOG)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse updateNamespaceProperties( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + UpdateNamespacePropertiesRequest request) { + + String fullName = catalog + "." + namespace; + + Map updates = request.getUpdates(); + List removals = request.getRemovals(); + + UpdateSchema updateSchema = new UpdateSchema(); + if (updates != null) { + if (updates.containsKey("name")) { + updateSchema.newName(updates.get("name")); + } + if (updates.containsKey("comment") || updates.containsKey("description")) { + updateSchema.comment(updates.getOrDefault("comment", updates.get("description"))); + } + } + + schemaRepository.updateSchema(fullName, updateSchema); + + List updated = updates != null ? new ArrayList<>(updates.keySet()) : List.of(); + List removed = new ArrayList<>(); + List missing = new ArrayList<>(); + if (removals != null) { + // Properties that were requested to be removed but not found + missing.addAll(removals); + } + + UpdateNamespacePropertiesResponse response = new UpdateNamespacePropertiesResponse(); + response.setUpdated(updated); + response.setRemoved(removed); + response.setMissing(missing); + + return HttpResponse.ofJson(response); + } + + @Delete("/v1/catalogs/{catalog}/namespaces/{namespace}") + @AuthorizeExpression(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorize(#principal, #schema, OWNER) && + #authorizeAny(#principal, #catalog, USE_CATALOG)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse deleteNamespace( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace) { + + String fullName = catalog + "." + namespace; + SchemaInfo schemaInfo = schemaRepository.getSchema(fullName); + schemaRepository.deleteSchema(fullName, false); + + CatalogInfo catalogInfo = catalogRepository.getCatalog(catalog); + + // First remove any child table links + authorizer.removeHierarchyChildren(UUID.fromString(schemaInfo.getSchemaId())); + // Then remove schema from catalog and clear authorizations + removeHierarchicalAuthorizations(schemaInfo.getSchemaId(), catalogInfo.getId()); + + return HttpResponse.of(HttpStatus.NO_CONTENT); + } + + @Head("/v1/catalogs/{catalog}/namespaces/{namespace}") + @AuthorizeExpression(""" + #authorize(#principal, #metastore, OWNER) || + #authorize(#principal, #catalog, OWNER) || + (#authorizeAny(#principal, #schema, OWNER, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, USE_CATALOG)) + """) + @AuthorizeResourceKey(METASTORE) + public HttpResponse namespaceExists( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace) { + + String fullName = catalog + "." + namespace; + try { + schemaRepository.getSchema(fullName); + return HttpResponse.of(HttpStatus.NO_CONTENT); + } catch (BaseException e) { + if (e.getErrorCode() == ErrorCode.NOT_FOUND) { + return HttpResponse.of(HttpStatus.NOT_FOUND); + } + throw e; + } + } + + // ==================== Metrics API ==================== + + @Post("/v1/catalogs/{catalog}/namespaces/{namespace}/tables/{table}/metrics") + @AuthorizeExpression(""" + #authorizeAny(#principal, #schema, OWNER, USE_SCHEMA) && + #authorizeAny(#principal, #catalog, OWNER, USE_CATALOG) && + #authorizeAny(#principal, #table, OWNER, SELECT, MODIFY) + """) + public HttpResponse reportMetrics( + @Param("catalog") @AuthorizeResourceKey(CATALOG) String catalog, + @Param("namespace") @AuthorizeResourceKey(SCHEMA) String namespace, + @Param("table") @AuthorizeResourceKey(TABLE) String table, + ReportMetricsRequest metrics) { + // Accept and log metrics but don't process them for now + return HttpResponse.of(HttpStatus.NO_CONTENT); + } + + // ==================== Helper Methods ==================== + + private LoadTableResponse buildLoadTableResponse(TableInfo tableInfo, boolean withCredentials) { + LoadTableResponse response = new LoadTableResponse(); + + // Build metadata + TableMetadata metadata = new TableMetadata(); + metadata.setEtag(tableRepository.generateEtag(tableInfo)); + + // Data source format + io.unitycatalog.server.model.deltarest.DataSourceFormat drcFormat = + io.unitycatalog.server.model.deltarest.DataSourceFormat.DELTA; + if (tableInfo.getDataSourceFormat() != null) { + drcFormat = + io.unitycatalog.server.model.deltarest.DataSourceFormat.fromValue( + tableInfo.getDataSourceFormat().getValue()); + } + metadata.setDataSourceFormat(drcFormat); + + // Table type + if (tableInfo.getTableType() != null) { + metadata.setTableType( + TableMetadata.TableTypeEnum.fromValue(tableInfo.getTableType().getValue())); + } else { + metadata.setTableType(TableMetadata.TableTypeEnum.MANAGED); + } + + metadata.setTableUuid(UUID.fromString(tableInfo.getTableId())); + metadata.setLocation(tableInfo.getStorageLocation()); + metadata.setOwner(tableInfo.getOwner()); + metadata.setComment(tableInfo.getComment()); + metadata.setCreateTime(tableInfo.getCreatedAt()); + metadata.setCreatedBy(tableInfo.getCreatedBy()); + metadata.setUpdateTime(tableInfo.getUpdatedAt()); + metadata.setUpdatedBy(tableInfo.getUpdatedBy()); + metadata.setSecurableType("TABLE"); + + // Convert columns to DRC schema format + if (tableInfo.getColumns() != null && !tableInfo.getColumns().isEmpty()) { + List schema = + tableInfo.getColumns().stream() + .map(this::convertColumnToDeltaColumn) + .collect(Collectors.toList()); + metadata.setSchema(schema); + } else { + metadata.setSchema(List.of()); + } + + // Add protocol + DeltaProtocol protocol = new DeltaProtocol(); + protocol.setMinReaderVersion(1); + protocol.setMinWriterVersion(2); + protocol.setReaderFeatures(List.of()); + protocol.setWriterFeatures(List.of("appendOnly", "inCommitTimestamp")); + metadata.setProtocol(protocol); + + // Add properties + metadata.setProperties( + tableInfo.getProperties() != null ? tableInfo.getProperties() : Map.of()); + + response.setMetadata(metadata); + + // Get commits if this is a managed Delta table + if (tableInfo.getTableType() == TableType.MANAGED + && tableInfo.getDataSourceFormat() == DataSourceFormat.DELTA) { + try { + DeltaGetCommits getCommitsRequest = + new DeltaGetCommits() + .tableId(tableInfo.getTableId()) + .tableUri(tableInfo.getStorageLocation()) + .startVersion(0L); + DeltaGetCommitsResponse commitsResponse = + deltaCommitRepository.getCommits(getCommitsRequest); + if (commitsResponse.getCommits() != null) { + List commits = + commitsResponse.getCommits().stream() + .map(this::convertCommitInfoToDeltaCommit) + .collect(Collectors.toList()); + response.setCommits(commits); + } else { + response.setCommits(List.of()); + } + response.setLatestTableVersion(commitsResponse.getLatestTableVersion()); + } catch (Exception e) { + // If commits can't be retrieved, just set defaults + response.setCommits(List.of()); + response.setLatestTableVersion(-1L); + } + } else { + response.setCommits(List.of()); + response.setLatestTableVersion(-1L); + } + + response.setConfig(Map.of()); + + // Add credentials if requested + if (withCredentials && tableInfo.getStorageLocation() != null) { + NormalizedURL storageLocation = NormalizedURL.from(tableInfo.getStorageLocation()); + TemporaryCredentials credentials = + storageCredentialVendor.vendCredential(storageLocation, Set.of(SELECT, UPDATE)); + response.setStorageCredentials( + buildStorageCredentials(tableInfo.getStorageLocation(), credentials)); + } + + return response; + } + + private List buildStorageCredentials( + String prefix, TemporaryCredentials credentials) { + StorageCredential credential = new StorageCredential(); + credential.setPrefix(prefix); + + Map config = new HashMap<>(); + if (credentials.getAwsTempCredentials() != null) { + var aws = credentials.getAwsTempCredentials(); + if (aws.getAccessKeyId() != null) { + config.put("s3.access-key-id", aws.getAccessKeyId()); + } + if (aws.getSecretAccessKey() != null) { + config.put("s3.secret-access-key", aws.getSecretAccessKey()); + } + if (aws.getSessionToken() != null) { + config.put("s3.session-token", aws.getSessionToken()); + } + } + if (credentials.getAzureUserDelegationSas() != null) { + var azure = credentials.getAzureUserDelegationSas(); + if (azure.getSasToken() != null) { + config.put("azure.sas-token", azure.getSasToken()); + } + } + if (credentials.getGcpOauthToken() != null) { + var gcp = credentials.getGcpOauthToken(); + if (gcp.getOauthToken() != null) { + config.put("gcs.oauth-token", gcp.getOauthToken()); + } + } + credential.setConfig(config); + + // Add expiration at top level to standardize across all cloud providers + if (credentials.getExpirationTime() != null) { + credential.setExpirationTimeMs(credentials.getExpirationTime()); + } + + return List.of(credential); + } + + private DeltaColumn convertColumnToDeltaColumn(io.unitycatalog.server.model.ColumnInfo column) { + // Build StructField for type-json + StructField typeJson = new StructField(); + typeJson.setName(column.getName()); + + // Convert ColumnTypeName to Delta/Spark JSON type name + String jsonTypeName = "string"; // default + if (column.getTypeName() != null) { + jsonTypeName = switch (column.getTypeName()) { + case INT -> "integer"; + case LONG -> "long"; + case SHORT -> "short"; + case BYTE -> "byte"; + case FLOAT -> "float"; + case DOUBLE -> "double"; + case BOOLEAN -> "boolean"; + case STRING -> "string"; + case BINARY -> "binary"; + case DATE -> "date"; + case TIMESTAMP -> "timestamp"; + case TIMESTAMP_NTZ -> "timestamp_ntz"; + case DECIMAL -> column.getTypeText(); // Use typeText for complex types like DECIMAL(10,2) + case ARRAY, MAP, STRUCT -> column.getTypeText(); // Use typeText for complex types + default -> "string"; + }; + } + + typeJson.setType(jsonTypeName); + typeJson.setNullable(column.getNullable() != null ? column.getNullable() : true); + typeJson.setComment(column.getComment()); + typeJson.setMetadata(Map.of()); + + // DeltaColumn only contains typeJson - all other fields are derived from it + DeltaColumn deltaColumn = new DeltaColumn(); + deltaColumn.setTypeJson(typeJson); + + return deltaColumn; + } + + private io.unitycatalog.server.model.deltarest.DeltaCommit convertCommitInfoToDeltaCommit( + io.unitycatalog.server.model.DeltaCommitInfo commitInfo) { + io.unitycatalog.server.model.deltarest.DeltaCommit deltaCommit = + new io.unitycatalog.server.model.deltarest.DeltaCommit(); + deltaCommit.setVersion(commitInfo.getVersion()); + deltaCommit.setTimestamp(commitInfo.getTimestamp()); + deltaCommit.setFileName(commitInfo.getFileName()); + deltaCommit.setFileSize(commitInfo.getFileSize()); + deltaCommit.setFileModificationTimestamp(commitInfo.getFileModificationTimestamp()); + return deltaCommit; + } + + public void filterTables(String expression, List entries) { + if (entries == null) return; + UUID principalId = userRepository.findPrincipalId(); + + evaluator.filter( + principalId, + expression, + entries, + ti -> { + CatalogInfo catalogInfo = catalogRepository.getCatalog(ti.getCatalogName()); + SchemaInfo schemaInfo = + schemaRepository.getSchema(ti.getCatalogName() + "." + ti.getSchemaName()); + return Map.of( + METASTORE, + metastoreRepository.getMetastoreId(), + CATALOG, + UUID.fromString(catalogInfo.getId()), + SCHEMA, + UUID.fromString(schemaInfo.getSchemaId()), + TABLE, + UUID.fromString(ti.getTableId())); + }); + } + + public void filterSchemas(String expression, List entries) { + if (entries == null) return; + UUID principalId = userRepository.findPrincipalId(); + + evaluator.filter( + principalId, + expression, + entries, + si -> { + CatalogInfo catalogInfo = catalogRepository.getCatalog(si.getCatalogName()); + return Map.of( + METASTORE, + metastoreRepository.getMetastoreId(), + CATALOG, + UUID.fromString(catalogInfo.getId()), + SCHEMA, + UUID.fromString(si.getSchemaId())); + }); + } +} diff --git a/server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestExceptionHandler.java b/server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestExceptionHandler.java new file mode 100644 index 0000000000..c07245b462 --- /dev/null +++ b/server/src/main/java/io/unitycatalog/server/service/deltarest/DeltaRestExceptionHandler.java @@ -0,0 +1,82 @@ +package io.unitycatalog.server.service.deltarest; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linecorp.armeria.common.HttpRequest; +import com.linecorp.armeria.common.HttpResponse; +import com.linecorp.armeria.common.HttpStatus; +import com.linecorp.armeria.common.MediaType; +import com.linecorp.armeria.server.ServiceRequestContext; +import com.linecorp.armeria.server.annotation.ExceptionHandlerFunction; +import io.unitycatalog.server.exception.BaseException; +import io.unitycatalog.server.exception.ErrorCode; +import java.util.HashMap; +import java.util.Map; +import lombok.SneakyThrows; + +/** + * Exception handler for Delta REST Catalog API. + * + *

    Converts exceptions to JSON error responses following the Delta REST Catalog error format. + */ +public class DeltaRestExceptionHandler implements ExceptionHandlerFunction { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + @Override + public HttpResponse handleException(ServiceRequestContext ctx, HttpRequest req, Throwable cause) { + try { + if (cause instanceof BaseException baseException) { + return handleBaseException(baseException); + } else if (cause instanceof IllegalArgumentException) { + return createErrorResponse( + HttpStatus.BAD_REQUEST, "BadRequestException", cause.getMessage()); + } else if (cause instanceof SecurityException) { + return createErrorResponse( + HttpStatus.FORBIDDEN, "ForbiddenException", cause.getMessage()); + } else { + return createErrorResponse(HttpStatus.INTERNAL_SERVER_ERROR, + cause.getClass().getSimpleName(), cause.getMessage()); + } + } catch (Exception e) { + return HttpResponse.of(HttpStatus.INTERNAL_SERVER_ERROR); + } + } + + private HttpResponse handleBaseException(BaseException exception) { + HttpStatus status = exception.getErrorCode().getHttpStatus(); + String errorType = mapErrorCodeToType(exception.getErrorCode()); + return createErrorResponse(status, errorType, exception.getMessage()); + } + + private String mapErrorCodeToType(ErrorCode errorCode) { + return switch (errorCode) { + case NOT_FOUND -> "NoSuchEntityException"; + case ALREADY_EXISTS -> "AlreadyExistsException"; + case INVALID_ARGUMENT -> "BadRequestException"; + case PERMISSION_DENIED -> "ForbiddenException"; + case UNAUTHENTICATED -> "UnauthorizedException"; + case FAILED_PRECONDITION -> "PreconditionFailedException"; + case RESOURCE_EXHAUSTED -> "ResourceExhaustedException"; + case ABORTED -> "ConflictException"; + case UNIMPLEMENTED -> "NotImplementedException"; + case INTERNAL -> "InternalServerErrorException"; + case DATA_LOSS -> "DataLossException"; + default -> "UnknownException"; + }; + } + + @SneakyThrows + private HttpResponse createErrorResponse(HttpStatus status, String errorType, String message) { + Map error = new HashMap<>(); + error.put("error", Map.of( + "code", status.code(), + "type", errorType, + "message", message != null ? message : "Unknown error" + )); + + return HttpResponse.of( + status, + MediaType.JSON, + MAPPER.writeValueAsString(error)); + } +} diff --git a/server/src/main/java/io/unitycatalog/server/service/deltarest/TableUpdateDeserializer.java b/server/src/main/java/io/unitycatalog/server/service/deltarest/TableUpdateDeserializer.java new file mode 100644 index 0000000000..d5061c7833 --- /dev/null +++ b/server/src/main/java/io/unitycatalog/server/service/deltarest/TableUpdateDeserializer.java @@ -0,0 +1,52 @@ +package io.unitycatalog.server.service.deltarest; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.unitycatalog.server.model.deltarest.AddCommitUpdate; +import io.unitycatalog.server.model.deltarest.RemovePropertiesUpdate; +import io.unitycatalog.server.model.deltarest.SetLatestBackfilledVersionUpdate; +import io.unitycatalog.server.model.deltarest.SetPropertiesUpdate; +import io.unitycatalog.server.model.deltarest.SetSchemaUpdate; +import io.unitycatalog.server.model.deltarest.SetTableCommentUpdate; +import io.unitycatalog.server.model.deltarest.UpdateProtocolUpdate; +import java.io.IOException; + +/** + * Custom deserializer for TableUpdate that handles polymorphic deserialization based on the + * "action" field. This is needed because the OpenAPI Generator (with library: "resteasy") + * generates separate classes for each update type without creating an inheritance relationship, + * even though they are defined as oneOf with a discriminator in the OpenAPI spec. + */ +public class TableUpdateDeserializer extends JsonDeserializer { + + @Override + public Object deserialize(JsonParser jsonParser, DeserializationContext context) + throws IOException { + ObjectMapper mapper = (ObjectMapper) jsonParser.getCodec(); + JsonNode node = mapper.readTree(jsonParser); + + // Extract the "action" field to determine which type to deserialize + JsonNode actionNode = node.get("action"); + if (actionNode == null) { + throw new IOException("Missing required 'action' field in TableUpdate"); + } + + String action = actionNode.asText(); + + // Deserialize based on the action type + return switch (action) { + case "delta-add-commit" -> mapper.treeToValue(node, AddCommitUpdate.class); + case "delta-set-latest-backfilled-version" -> + mapper.treeToValue(node, SetLatestBackfilledVersionUpdate.class); + case "delta-set-schema-and-column-masks" -> mapper.treeToValue(node, SetSchemaUpdate.class); + case "delta-set-table-comment" -> mapper.treeToValue(node, SetTableCommentUpdate.class); + case "delta-update-protocol" -> mapper.treeToValue(node, UpdateProtocolUpdate.class); + case "remove-properties" -> mapper.treeToValue(node, RemovePropertiesUpdate.class); + case "set-properties" -> mapper.treeToValue(node, SetPropertiesUpdate.class); + default -> throw new IOException("Unknown action type: " + action); + }; + } +} diff --git a/server/src/test/java/io/unitycatalog/server/sdk/deltacommits/SdkDeltaCommitsCRUDTest.java b/server/src/test/java/io/unitycatalog/server/sdk/deltacommits/SdkDeltaCommitsCRUDTest.java index 59af82fdd9..c174f1fb0c 100644 --- a/server/src/test/java/io/unitycatalog/server/sdk/deltacommits/SdkDeltaCommitsCRUDTest.java +++ b/server/src/test/java/io/unitycatalog/server/sdk/deltacommits/SdkDeltaCommitsCRUDTest.java @@ -693,4 +693,73 @@ public void testCommitWithMetadata() throws ApiException { c -> c.setMetadata(new DeltaMetadata()), "At least one of description, properties, or schema must be set in commit.metadata"); } + + @Test + public void testDeltaRestApiCommit() throws Exception { + // Test the Delta REST API endpoint to verify polymorphic deserialization works + String catalog = TestUtils.CATALOG_NAME; + String schema = TestUtils.SCHEMA_NAME; + String table = TestUtils.TABLE_NAME; + + String fullName = + tableInfo.getCatalogName() + + "." + + tableInfo.getSchemaName() + + "." + + tableInfo.getName(); + System.out.println("Testing Delta REST API with table: " + fullName); + System.out.println("Table ID: " + tableInfo.getTableId()); + System.out.println("Storage Location: " + tableInfo.getStorageLocation()); + + // Build the Delta REST API request JSON + String requestJson = String.format(""" + { + "requirements": [], + "updates": [ + { + "action": "delta-add-commit", + "commit": { + "version": 1, + "timestamp": 1700000001, + "file-name": "00000000000000000001.json", + "file-size": 1024, + "file-modification-timestamp": 1700000001 + } + } + ] + } + """); + + // Make HTTP POST request to Delta REST API endpoint + java.net.http.HttpClient client = java.net.http.HttpClient.newHttpClient(); + String url = + String.format( + "%s/api/2.1/unity-catalog/delta-rest/v1/catalogs/%s/namespaces/%s/tables/%s", + serverConfig.getServerUrl(), + catalog, + schema, + table); + + System.out.println("Making request to URL: " + url); + System.out.println("Request JSON: " + requestJson); + + java.net.http.HttpRequest request = java.net.http.HttpRequest.newBuilder() + .uri(java.net.URI.create(url)) + .header("Content-Type", "application/json") + .header("Authorization", "Bearer " + serverConfig.getAuthToken()) + .POST(java.net.http.HttpRequest.BodyPublishers.ofString(requestJson)) + .build(); + + java.net.http.HttpResponse response = + client.send(request, java.net.http.HttpResponse.BodyHandlers.ofString()); + + System.out.println("Delta REST API Response Status: " + response.statusCode()); + System.out.println("Delta REST API Response Body: " + response.body()); + + // The request should be successfully deserialized and processed + assertThat(response.statusCode()).isEqualTo(200); + assertThat(response.body()).doesNotContain("Could not resolve type id"); + assertThat(response.body()).doesNotContain("InvalidTypeIdException"); + assertThat(response.body()).doesNotContain("cannot be cast"); + } }