Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions api/cluster/resource/templater.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,16 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
// Get user-configured probe settings
var userLivenessConfig *models.ProbeConfig
var userReadinessConfig *models.ProbeConfig
var userStartupConfig *models.ProbeConfig
if modelService.ResourceRequest != nil {
userLivenessConfig = modelService.ResourceRequest.LivenessProbe
userReadinessConfig = modelService.ResourceRequest.ReadinessProbe
userStartupConfig = modelService.ResourceRequest.StartupProbe
}

livenessProbeConfig := getLivenessProbeConfig(modelService.PredictorProtocol(), envVars, fmt.Sprintf("/v1/models/%s", modelService.Name), userLivenessConfig)
readinessProbeConfig := getReadinessProbeConfig(modelService.PredictorProtocol(), fmt.Sprintf("/v1/models/%s", modelService.Name), userReadinessConfig)
startupProbeConfig := getStartupProbeConfig(modelService.PredictorProtocol(), fmt.Sprintf("/v1/models/%s", modelService.Name), userStartupConfig)

containerPorts := createContainerPorts(modelService.PredictorProtocol(), modelService.DeploymentMode)
storageUri := utils.CreateModelLocation(modelService.ArtifactURI)
Expand All @@ -262,6 +265,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
Resources: resources,
LivenessProbe: livenessProbeConfig,
ReadinessProbe: readinessProbeConfig,
StartupProbe: startupProbeConfig,
Ports: containerPorts,
Env: envVars,
},
Expand All @@ -278,6 +282,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
Resources: resources,
LivenessProbe: livenessProbeConfig,
ReadinessProbe: readinessProbeConfig,
StartupProbe: startupProbeConfig,
Ports: containerPorts,
Env: envVars,
},
Expand All @@ -294,6 +299,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
Resources: resources,
LivenessProbe: livenessProbeConfig,
ReadinessProbe: readinessProbeConfig,
StartupProbe: startupProbeConfig,
Ports: containerPorts,
Env: envVars,
},
Expand All @@ -310,6 +316,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
Resources: resources,
LivenessProbe: livenessProbeConfig,
ReadinessProbe: readinessProbeConfig,
StartupProbe: startupProbeConfig,
Ports: containerPorts,
Env: envVars,
},
Expand Down Expand Up @@ -354,6 +361,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
Resources: resources,
LivenessProbe: livenessProbeConfig,
ReadinessProbe: readinessProbeConfig,
StartupProbe: startupProbeConfig,
Ports: containerPorts,
},
},
Expand Down Expand Up @@ -428,13 +436,16 @@ func (t *InferenceServiceTemplater) createTransformerSpec(
// Get user-configured probe settings for transformer
var userLivenessConfig *models.ProbeConfig
var userReadinessConfig *models.ProbeConfig
var userStartupConfig *models.ProbeConfig
if transformer.ResourceRequest != nil {
userLivenessConfig = transformer.ResourceRequest.LivenessProbe
userReadinessConfig = transformer.ResourceRequest.ReadinessProbe
userStartupConfig = transformer.ResourceRequest.StartupProbe
}

livenessProbeConfig := getLivenessProbeConfig(modelService.Protocol, envVars, "/", userLivenessConfig)
readinessProbeConfig := getReadinessProbeConfig(modelService.Protocol, "/", userReadinessConfig)
startupProbeConfig := getStartupProbeConfig(modelService.Protocol, "/", userStartupConfig)

containerPorts := createContainerPorts(modelService.Protocol, modelService.DeploymentMode)
transformerSpec := &kservev1beta1.TransformerSpec{
Expand All @@ -455,6 +466,7 @@ func (t *InferenceServiceTemplater) createTransformerSpec(
Args: transformerArgs,
LivenessProbe: livenessProbeConfig,
ReadinessProbe: readinessProbeConfig,
StartupProbe: startupProbeConfig,
Ports: containerPorts,
},
},
Expand Down Expand Up @@ -648,6 +660,59 @@ func createGRPCReadinessProbe(port int, userConfig *models.ProbeConfig) *corev1.
return probe
}

// getStartupProbeConfig creates a startup probe configuration based on user settings
func getStartupProbeConfig(protocol prt.Protocol, httpPath string, userConfig *models.ProbeConfig) *corev1.Probe {
if userConfig == nil {
return nil
}
return createStartupProbeSpec(protocol, httpPath, userConfig)
}

func createStartupProbeSpec(protocol prt.Protocol, httpPath string, userConfig *models.ProbeConfig) *corev1.Probe {
if protocol == prt.UpiV1 {
return createGRPCStartupProbe(defaultGRPCPort, userConfig)
}
return createHTTPGetStartupProbe(httpPath, defaultHTTPPort, userConfig)
}

func createHTTPGetStartupProbe(httpPath string, port int, userConfig *models.ProbeConfig) *corev1.Probe {
probe := &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: httpPath,
Scheme: "HTTP",
Port: intstr.IntOrString{
IntVal: int32(port),
},
},
},
InitialDelaySeconds: liveProbeInitialDelaySec,
TimeoutSeconds: liveProbeTimeoutSec,
PeriodSeconds: liveProbePeriodSec,
SuccessThreshold: liveProbeSuccessThreshold,
FailureThreshold: liveProbeFailureThreshold,
}
applyUserProbeConfig(probe, userConfig, port)
return probe
}

func createGRPCStartupProbe(port int, userConfig *models.ProbeConfig) *corev1.Probe {
probe := &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{grpcHealthProbeCommand, fmt.Sprintf("-addr=:%d", port)},
},
},
InitialDelaySeconds: liveProbeInitialDelaySec,
TimeoutSeconds: liveProbeTimeoutSec,
PeriodSeconds: liveProbePeriodSec,
SuccessThreshold: liveProbeSuccessThreshold,
FailureThreshold: liveProbeFailureThreshold,
}
applyUserProbeConfig(probe, userConfig, port)
return probe
}

// applyUserProbeConfig applies user-provided probe configuration to the probe
func applyUserProbeConfig(probe *corev1.Probe, userConfig *models.ProbeConfig, _ int) {
if userConfig == nil {
Expand Down
2 changes: 2 additions & 0 deletions api/models/resource_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ type ResourceRequest struct {
LivenessProbe *ProbeConfig `json:"liveness_probe,omitempty"`
// Readiness probe configuration
ReadinessProbe *ProbeConfig `json:"readiness_probe,omitempty"`
// Startup probe configuration
StartupProbe *ProbeConfig `json:"startup_probe,omitempty"`
}

// ProbeConfig represents the configuration for Kubernetes liveness/readiness probes
Expand Down
2 changes: 2 additions & 0 deletions swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2121,6 +2121,8 @@ components:
"$ref": "#/components/schemas/ProbeConfig"
readiness_probe:
"$ref": "#/components/schemas/ProbeConfig"
startup_probe:
"$ref": "#/components/schemas/ProbeConfig"
ProbeConfig:
type: object
properties:
Expand Down
18 changes: 18 additions & 0 deletions ui/src/components/ResourcesConfigTable.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export const ResourcesConfigTable = ({
gpu_request,
liveness_probe,
readiness_probe,
startup_probe,
},
}) => {
const items = [
Expand Down Expand Up @@ -104,6 +105,23 @@ export const ResourcesConfigTable = ({
}
}

// Add startup probe info if configured
if (startup_probe && Object.keys(startup_probe).some(k => startup_probe[k])) {
const probeDetails = [];
if (startup_probe.initial_delay_seconds) probeDetails.push(`delay: ${startup_probe.initial_delay_seconds}s`);
if (startup_probe.timeout_seconds) probeDetails.push(`timeout: ${startup_probe.timeout_seconds}s`);
if (startup_probe.period_seconds) probeDetails.push(`period: ${startup_probe.period_seconds}s`);
if (startup_probe.failure_threshold) probeDetails.push(`failures: ${startup_probe.failure_threshold}`);
if (startup_probe.success_threshold) probeDetails.push(`successes: ${startup_probe.success_threshold}`);
if (startup_probe.path) probeDetails.push(`path: ${startup_probe.path}`);
if (probeDetails.length > 0) {
items.push({
title: "Startup Probe",
description: probeDetails.join(", "),
});
}
}

return (
<EuiDescriptionList
compressed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ export const DeployModelVersionForm = ({
delete versionEndpoint.resource_request.readiness_probe;
}
}
if (versionEndpoint?.resource_request?.startup_probe) {
const probe = versionEndpoint.resource_request.startup_probe;
if (!Object.keys(probe).some(k => probe[k])) {
delete versionEndpoint.resource_request.startup_probe;
}
}
if (versionEndpoint?.image_builder_resource_request?.cpu_request === "") {
delete versionEndpoint.image_builder_resource_request.cpu_request;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ export const ProbesFormGroup = ({
"Readiness Probe",
"Configure the readiness probe to determine if the container is ready to receive traffic. Empty values use platform defaults."
)}
<EuiSpacer size="m" />
{renderProbeFields(
"startup_probe",
"Startup Probe",
"Configure the startup probe to determine when the container has started. Useful for slow-starting containers. Empty values use platform defaults."
)}
</Fragment>
);
};
Expand Down
5 changes: 4 additions & 1 deletion ui/src/services/transformer/Transformer.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ export class Transformer {
max_replica: process.env.REACT_APP_ENVIRONMENT === "production" ? 4 : 2,
cpu_request: "500m",
cpu_limit: "",
memory_request: "512Mi"
memory_request: "512Mi",
liveness_probe: null,
readiness_probe: null,
startup_probe: null,
};

this.env_vars = [];
Expand Down
1 change: 1 addition & 0 deletions ui/src/services/version_endpoint/VersionEndpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export class VersionEndpoint {
memory_request: "512Mi",
liveness_probe: null,
readiness_probe: null,
startup_probe: null,
};

this.image_builder_resource_request = {
Expand Down
Loading