-
Notifications
You must be signed in to change notification settings - Fork 88
[DO NOT MERGE] Standalone Nexus Operations #685
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
6724dab
5c14da2
6e7c81c
0b8ceb6
7fa3ce5
44adc50
af1af0e
b0a71e4
50e415e
9b1baed
d1c5a6c
3cbc3bc
f494689
4f7081a
afe9ec5
1e570f8
d99cea3
31047d7
a1bac0c
6b6a666
c706e34
1077802
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,3 +20,63 @@ enum NexusHandlerErrorRetryBehavior { | |
| NEXUS_HANDLER_ERROR_RETRY_BEHAVIOR_NON_RETRYABLE = 2; | ||
| } | ||
|
|
||
| // Status of a standalone Nexus operation execution. | ||
| // The status is updated once, when the operation is originally scheduled, and again when the | ||
| // operation reaches a terminal status. | ||
| // (-- api-linter: core::0216::synonyms=disabled | ||
| // aip.dev/not-precedent: Named consistently with WorkflowExecutionStatus. --) | ||
| enum NexusOperationExecutionStatus { | ||
| NEXUS_OPERATION_EXECUTION_STATUS_UNSPECIFIED = 0; | ||
| // The operation is not in a terminal status. The operation may be attempting to start, | ||
| // backing off between attempts, or already started. | ||
| NEXUS_OPERATION_EXECUTION_STATUS_RUNNING = 1; | ||
| // The operation completed successfully. | ||
| NEXUS_OPERATION_EXECUTION_STATUS_COMPLETED = 2; | ||
| // The operation completed with failure. | ||
| NEXUS_OPERATION_EXECUTION_STATUS_FAILED = 3; | ||
| // The operation completed as canceled. | ||
| // Requesting to cancel an operation does not automatically transition the operation to canceled status, depending | ||
| // on the current operation status and the cancelation type used. | ||
| NEXUS_OPERATION_EXECUTION_STATUS_CANCELED = 4; | ||
| // The operation was terminated. Termination happens immediately without notifying the handler. | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note to future us: This comment may not be true if we add a Nexus equivalent of a "parent close policy". |
||
| NEXUS_OPERATION_EXECUTION_STATUS_TERMINATED = 5; | ||
| // The operation has timed out by reaching the specified schedule-to-close timeout. | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be any timeout now that we've added start-to-close and schedule-to-close. |
||
| NEXUS_OPERATION_EXECUTION_STATUS_TIMED_OUT = 6; | ||
| } | ||
|
|
||
| // Stage that can be specified when waiting on a nexus operation. | ||
| enum NexusOperationWaitStage { | ||
| NEXUS_OPERATION_WAIT_STAGE_UNSPECIFIED = 0; | ||
| // Wait for the operation to be started. | ||
| NEXUS_OPERATION_WAIT_STAGE_STARTED = 1; | ||
| // Wait for the operation to be in a terminal state, either successful or unsuccessful. | ||
| NEXUS_OPERATION_WAIT_STAGE_CLOSED = 2; | ||
| } | ||
|
|
||
| // Defines whether to allow re-using an operation ID from a previously *closed* Nexus operation. | ||
| // If the request is denied, the server returns a `NexusOperationAlreadyStarted` error. | ||
| // | ||
| // See `NexusOperationIdConflictPolicy` for handling ID duplication with a *running* operation. | ||
| enum NexusOperationIdReusePolicy { | ||
| NEXUS_OPERATION_ID_REUSE_POLICY_UNSPECIFIED = 0; | ||
| // Always allow starting an operation using the same operation ID. | ||
| NEXUS_OPERATION_ID_REUSE_POLICY_ALLOW_DUPLICATE = 1; | ||
| // Allow starting an operation using the same ID only when the last operation's final state is one | ||
| // of {failed, canceled, terminated, timed out}. | ||
| NEXUS_OPERATION_ID_REUSE_POLICY_ALLOW_DUPLICATE_FAILED_ONLY = 2; | ||
| // Do not permit re-use of the ID for this operation. Future start requests could potentially change the policy, | ||
| // allowing re-use of the ID. | ||
| NEXUS_OPERATION_ID_REUSE_POLICY_REJECT_DUPLICATE = 3; | ||
| } | ||
|
|
||
| // Defines what to do when trying to start a Nexus operation with the same ID as a *running* operation. | ||
| // Note that it is *never* valid to have two running instances of the same operation ID. | ||
| // | ||
| // See `NexusOperationIdReusePolicy` for handling operation ID duplication with a *closed* operation. | ||
| enum NexusOperationIdConflictPolicy { | ||
| NEXUS_OPERATION_ID_CONFLICT_POLICY_UNSPECIFIED = 0; | ||
| // Don't start a new operation; instead return `NexusOperationAlreadyStarted` error. | ||
| NEXUS_OPERATION_ID_CONFLICT_POLICY_FAIL = 1; | ||
| // Don't start a new operation; instead return a handle for the running operation. | ||
| NEXUS_OPERATION_ID_CONFLICT_POLICY_USE_EXISTING = 2; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,10 +9,13 @@ option java_outer_classname = "MessageProto"; | |
| option ruby_package = "Temporalio::Api::Nexus::V1"; | ||
| option csharp_namespace = "Temporalio.Api.Nexus.V1"; | ||
|
|
||
| import "google/protobuf/duration.proto"; | ||
| import "google/protobuf/timestamp.proto"; | ||
| import "temporal/api/common/v1/message.proto"; | ||
| import "temporal/api/enums/v1/common.proto"; | ||
| import "temporal/api/enums/v1/nexus.proto"; | ||
| import "temporal/api/failure/v1/message.proto"; | ||
| import "temporal/api/sdk/v1/user_metadata.proto"; | ||
|
|
||
| // A general purpose failure message. | ||
| // See: https://github.com/nexus-rpc/api/blob/main/SPEC.md#failure | ||
|
|
@@ -199,7 +202,7 @@ message EndpointTarget { | |
| // Nexus task queue to route requests to. | ||
| string task_queue = 2; | ||
| } | ||
|
|
||
| // Target an external server by URL. | ||
| // At a later point, this will support providing credentials, in the meantime, an http.RoundTripper can be injected | ||
| // into the server to modify the request. | ||
|
|
@@ -213,3 +216,138 @@ message EndpointTarget { | |
| External external = 2; | ||
| } | ||
| } | ||
|
|
||
| // NexusOperationExecutionCancellationInfo contains the state of a Nexus operation cancellation. | ||
| message NexusOperationExecutionCancellationInfo { | ||
| // The time when cancellation was requested. | ||
| google.protobuf.Timestamp requested_time = 1; | ||
|
|
||
| temporal.api.enums.v1.NexusOperationCancellationState state = 2; | ||
|
|
||
| // The number of attempts made to deliver the cancel operation request. | ||
| // This number represents a minimum bound since the attempt is incremented after the request completes. | ||
| int32 attempt = 3; | ||
|
|
||
| // The time when the last attempt completed. | ||
| google.protobuf.Timestamp last_attempt_complete_time = 4; | ||
| // The last attempt's failure, if any. | ||
| temporal.api.failure.v1.Failure last_attempt_failure = 5; | ||
| // The time when the next attempt is scheduled. | ||
| google.protobuf.Timestamp next_attempt_schedule_time = 6; | ||
|
|
||
| // If the state is BLOCKED, blocked reason provides additional information. | ||
| string blocked_reason = 7; | ||
|
|
||
| // A reason that may be specified in the CancelNexusOperationRequest. | ||
| string reason = 8; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was set to
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I wonder why we don't have a linter for this?)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Copy pasta probably. Thanks for catching this. |
||
| } | ||
|
|
||
| // Full current state of a standalone Nexus operation, as of the time of the request. | ||
| message NexusOperationExecutionInfo { | ||
| // Unique identifier of this Nexus operation within its namespace along with run ID (below). | ||
| string operation_id = 1; | ||
| string run_id = 2; | ||
|
|
||
| // Endpoint name, resolved to a URL via the cluster's endpoint registry. | ||
| string endpoint = 3; | ||
| // Service name. | ||
| string service = 4; | ||
| // Operation name. | ||
| string operation = 5; | ||
|
|
||
| // A general status for this operation, indicates whether it is currently running or in one of the terminal statuses. | ||
| // Updated once when the operation is originally scheduled, and again when it reaches a terminal status. | ||
| temporal.api.enums.v1.NexusOperationExecutionStatus status = 6; | ||
| // More detailed breakdown of NEXUS_OPERATION_EXECUTION_STATUS_RUNNING. | ||
| temporal.api.enums.v1.PendingNexusOperationState state = 7; | ||
|
|
||
| // Schedule-to-close timeout for this operation. | ||
| // This is the only timeout settable for a Nexus operation. | ||
| // (-- api-linter: core::0140::prepositions=disabled | ||
| // aip.dev/not-precedent: "to" is used to indicate interval. --) | ||
| google.protobuf.Duration schedule_to_close_timeout = 8; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might as well add |
||
|
|
||
| // The number of attempts made to start/deliver the operation request. | ||
| // This number represents a minimum bound since the attempt is incremented after the request completes. | ||
| int32 attempt = 9; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is attempt to deliver the start request. Will we support overall operation retry in the future? Will this name be confusing if we do? Maybe we should call it
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to keep this for consistency with |
||
|
|
||
| // Time the operation was originally scheduled via a StartNexusOperation request. | ||
| google.protobuf.Timestamp schedule_time = 10; | ||
| // Scheduled time + schedule to close timeout. | ||
| google.protobuf.Timestamp expiration_time = 11; | ||
| // Time when the operation transitioned to a closed state. | ||
| google.protobuf.Timestamp close_time = 12; | ||
|
|
||
| // The time when the last attempt completed. | ||
| google.protobuf.Timestamp last_attempt_complete_time = 13; | ||
| // The last attempt's failure, if any. | ||
| temporal.api.failure.v1.Failure last_attempt_failure = 14; | ||
| // The time when the next attempt is scheduled. | ||
| google.protobuf.Timestamp next_attempt_schedule_time = 15; | ||
|
|
||
| // Elapsed time from schedule_time to now for running operations or to close_time for closed | ||
| // operations, including all attempts and backoff between attempts. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarifies it works for running operations (as opposed to
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LGTM |
||
| google.protobuf.Duration execution_duration = 16; | ||
|
|
||
| NexusOperationExecutionCancellationInfo cancellation_info = 17; | ||
|
|
||
| // If the state is BLOCKED, blocked reason provides additional information. | ||
| string blocked_reason = 18; | ||
|
|
||
| // Server-generated request ID used as an idempotency token when submitting start requests to | ||
| // the handler. Distinct from the request_id in StartNexusOperationRequest, which is the | ||
| // caller-side idempotency key for the StartNexusOperation RPC itself. | ||
| string request_id = 19; | ||
|
|
||
| // Operation token. Only set for asynchronous operations after a successful StartOperation call. | ||
| string operation_token = 20; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought we said we didn't want to expose this to callers? They should only have one way of referencing their operations: their caller-side operation ID.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's still worth exposing this information as we do for workflow callers.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no horse in this race, but I'm curious, why is it useful to have?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's useful for debugging and can be used in the direct Nexus APIs to reattach to the same operation (future capability). |
||
|
|
||
| // Incremented each time the operation's state is mutated in persistence. | ||
| int64 state_transition_count = 21; | ||
|
|
||
| temporal.api.common.v1.SearchAttributes search_attributes = 22; | ||
|
|
||
| // Header for context propagation and tracing purposes. | ||
| map<string, string> nexus_header = 23; | ||
|
|
||
| // Metadata for use by user interfaces to display the fixed as-of-start summary and details of the operation. | ||
| temporal.api.sdk.v1.UserMetadata user_metadata = 24; | ||
|
|
||
| // Links attached by the handler of this operation on start or completion. | ||
| repeated temporal.api.common.v1.Link links = 25; | ||
| } | ||
|
|
||
| // Limited Nexus operation information returned in the list response. | ||
| // When adding fields here, ensure that it is also present in NexusOperationExecutionInfo (note that it may already be present in | ||
| // NexusOperationExecutionInfo but not at the top-level). | ||
| message NexusOperationExecutionListInfo { | ||
| // A unique identifier of this operation within its namespace along with run ID (below). | ||
| string operation_id = 1; | ||
| // The run ID of the standalone Nexus operation. | ||
| string run_id = 2; | ||
|
|
||
| // Endpoint name. | ||
| string endpoint = 3; | ||
| // Service name. | ||
| string service = 4; | ||
| // Operation name. | ||
| string operation = 5; | ||
|
|
||
| // Time the operation was originally scheduled via a StartNexusOperation request. | ||
| google.protobuf.Timestamp schedule_time = 6; | ||
| // If the operation is in a terminal status, this field represents the time the operation transitioned to that status. | ||
| google.protobuf.Timestamp close_time = 7; | ||
| // The status is updated once, when the operation is originally scheduled, and again when the operation reaches a terminal status. | ||
| temporal.api.enums.v1.NexusOperationExecutionStatus status = 8; | ||
|
|
||
| // Search attributes from the start request. | ||
| temporal.api.common.v1.SearchAttributes search_attributes = 9; | ||
|
|
||
| // Updated on terminal status. | ||
| int64 state_transition_count = 10; | ||
| // Updated once on scheduled and once on terminal status. | ||
| int64 state_size_bytes = 11; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intentionally a field only present in list? It was mentioned for standalone activities that everything in list was expected to be in describe. Also, for standalone activities it was mentioned there would be a tool that would make sure everything in list was also in describe result. Can we prioritize that? It's a lot of effort for me to have to continually confirm our assertion on every PR and find these issues since we chose not to reuse types.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just want to call this out that we don't have this guarantee for schedules or batch which are much older archetypes: https://github.com/temporalio/api/blob/master/temporal/api/schedule/v1/message.proto https://github.com/temporalio/api/blob/master/temporal/api/workflowservice/v1/request_response.proto#L1715-L1751. I don't think this guarantee needs to be high priority but we should keep track of it because I do think that it is nice to have. Ideally the SDKs would allow the types to have completely different fields, there's no need to reuse the models here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, but this guarantee/promise was made as part of not reusing models knowing the SDK will need this guarantee. Was not expecting a "nice to have" guarantee when the promise/guarantee was made.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's take this offline.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed offline, we will write a tool soon. |
||
| // The difference between close time and scheduled time. | ||
| // This field is only populated if the operation is closed. | ||
| google.protobuf.Duration execution_duration = 12; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like the naming is inconsistent with
BatchJobandWorkflowEvent. Neither of those use the termExecution. I'm on the fence whether this is acceptable or not.