diff --git a/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md b/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md index aeacf1786..d537b3ef1 100644 --- a/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md +++ b/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md @@ -17,28 +17,30 @@ Grove's naming scheme serves two critical purposes: For PodCliques that are **not** part of a PodCliqueScalingGroup, the pod naming follows this pattern: ``` ---- +---- ``` **Components:** - ``: The name of the PodCliqueSet - ``: The replica index of the PodCliqueSet (0-based) - ``: The name of the PodClique template defined in the PodCliqueSet spec -- ``: A random 5-character suffix generated by Kubernetes +- ``: The pod index within the PodClique (0-based) +- ``: A random 5-character suffix generated by Grove -**Example:** `multinode-disaggregated-0-frontend-a7b3c` +**Example:** `multinode-disaggregated-0-frontend-0-a7b9x` Looking at this name, you can immediately tell: - It belongs to the `multinode-disaggregated` PodCliqueSet - It's part of PodCliqueSet replica 0 - It's from the `frontend` PodClique +- It is pod index 0 within the `frontend` PodClique ### PodCliques in a PodCliqueScalingGroup For PodCliques that **are** part of a PodCliqueScalingGroup, the pod naming includes the PCSG information: ``` ------ +------ ``` **Components:** @@ -47,39 +49,42 @@ For PodCliques that **are** part of a PodCliqueScalingGroup, the pod naming incl - ``: The name of the PodCliqueScalingGroup template - ``: The replica index of the PodCliqueScalingGroup (0-based) - ``: The name of the PodClique template within the PCSG -- ``: A random 5-character suffix generated by Kubernetes +- ``: The pod index within the PodClique (0-based) +- ``: A random 5-character suffix generated by Grove -**Example:** `multinode-disaggregated-0-prefill-1-pworker-m9n0o` +**Example:** `multinode-disaggregated-0-prefill-1-pworker-2-m9n2q` Looking at this name, you can immediately tell: - It belongs to the `multinode-disaggregated` PodCliqueSet (replica 0) - It's part of the `prefill` PodCliqueScalingGroup (replica 1) -- It's from the `pworker` PodClique (prefill worker) +- It's from the `pworker` PodClique (prefill worker, replica 2) ## Naming Best Practices -### Kubernetes Name Length Limit +### Generated Pod Name and Hostname Length Limits -Kubernetes has a **63-character limit** for pod names. Since Grove constructs full pod names by combining multiple components, you need to be mindful of name lengths when choosing names for your resources. +Grove validates the deterministic pod hostname and the generated pod object name separately. The pod hostname is used for service discovery and must stay within the Kubernetes **63-character DNS label limit**. The pod object name includes an additional 5-character suffix and can be longer than 63 characters because Kubernetes treats pod names as DNS subdomains. **How Grove constructs names:** For standalone PodCliques, the final pod name is: ``` ----<5-char-suffix> +----<5-char-suffix> ``` For PodCliques in a PCSG, the final pod name is: ``` ------<5-char-suffix> +------<5-char-suffix> ``` **Character budget breakdown:** -- `<5-char-suffix>`: 5 characters (fixed by Kubernetes) -- `-` separators: 3-5 characters depending on structure -- Replica indices: 1+ characters each (single digit for 0-9, two digits for 10-99, etc.) +- `<5-char-suffix>`: 5 characters +- `-` separators: 4-6 characters depending on structure +- Replica and pod indices: 1+ characters each (single digit for 0-9, two digits for 10-99, etc.) - Your chosen names: Remaining characters +When planning names, make sure the generated hostname fits within 63 characters. Grove allows the final pod object name to exceed 63 characters, but users should use Grove's environment variables and hostname-based DNS names for discovery rather than constructing DNS names from `metadata.name`. + ### Naming Guidelines 1. **Use Short, Descriptive Names**: Choose concise but meaningful names @@ -95,8 +100,8 @@ For PodCliques in a PCSG, the final pod name is: - ✅ Good: `ml-inference`, `web-app`, `data-pipeline` - ❌ Avoid: `machine-learning-inference-service`, `web-application-stack` -4. **Plan for Scaling**: Consider whether you'll need double-digit replica indices (adds 1 character per additional digit) - - If you plan to scale to 10+ or 100+ or 1000+ replicas, budget accordingly +4. **Plan for Scaling**: Consider whether you'll need double-digit PCS replica, PCSG replica, or PodClique pod indices (adds 1 character per additional digit) + - If you plan to scale to 10+ or 100+ or 1000+ replicas at any level, budget accordingly 5. **Unique PodClique Names Within a PodCliqueSet**: All PodClique names must be unique within a PodCliqueSet. We explain the rationale for this further in the [Hands-On Example](./03_hands-on-example.md#why-unique-podclique-names-matter). - If you have leader/worker patterns in multiple PCSGs, you **must** use different names (e.g., `pleader`/`pworker` and `dleader`/`dworker`) @@ -123,18 +128,18 @@ Let's plan names for a multi-node disaggregated inference system with a frontend - Worker PodClique: `dworker` (7 chars) **Resulting pod names:** -- Frontend: `mn-disagg-0-frontend-a7b3c` (26 chars) ✅ -- Prefill leader: `mn-disagg-0-prefill-0-pleader-a7b3c` (35 chars) ✅ -- Prefill worker: `mn-disagg-0-prefill-0-pworker-a7b3c` (35 chars) ✅ -- Decode leader: `mn-disagg-0-decode-0-dleader-a7b3c` (34 chars) ✅ -- Decode worker: `mn-disagg-0-decode-0-dworker-a7b3c` (34 chars) ✅ +- Frontend: `mn-disagg-0-frontend-0-a7b9x` (28 chars) ✅ +- Prefill leader: `mn-disagg-0-prefill-0-pleader-0-a7b9x` (37 chars) ✅ +- Prefill worker: `mn-disagg-0-prefill-0-pworker-0-a7b9x` (37 chars) ✅ +- Decode leader: `mn-disagg-0-decode-0-dleader-0-a7b9x` (36 chars) ✅ +- Decode worker: `mn-disagg-0-decode-0-dworker-0-a7b9x` (36 chars) ✅ -**Scaling headroom:** The longest name (`mn-disagg-0-prefill-0-pworker-a7b3c`) is 35 characters, leaving 28 characters of headroom. Each additional digit in a replica index adds 1 character: -- 2-digit indices for PCS and PCSG (10-99): 37 chars → scales to 99 PCS replicas × 99 PCSG replicas ✅ -- 3-digit indices for PCS and PCSG (100-999): 39 chars → scales to 999 × 999 replicas ✅ -- 7-digit indices for PCS and PCSG: 47 chars → scales to millions of replicas ✅ +**Scaling headroom:** The longest hostname (`mn-disagg-0-prefill-0-pworker-0`) is 31 characters, leaving 32 characters of hostname headroom. Each additional digit in a PCS replica, PCSG replica, or PodClique pod index adds 1 character: +- 2-digit indices for PCS, PCSG, and PodClique pods (10-99): 34 chars ✅ +- 3-digit indices for PCS, PCSG, and PodClique pods (100-999): 37 chars ✅ +- 7-digit indices for all three levels: 49 chars ✅ -With these name choices, you could scale to millions of replicas on both dimensions without hitting the limit. All names are well under the 63-character limit with room for scaling growth while remaining descriptive! +With these name choices, you could scale to millions of replicas across PCS replicas, PCSG replicas, and PodClique pods without hitting the hostname limit. All hostnames are well under the 63-character limit with room for scaling growth while remaining descriptive! To deploy a PodCliqueSet with this structure and explore the naming hierarchy through `kubectl`, continue to the [Hands-On Example](./03_hands-on-example.md). @@ -152,8 +157,8 @@ To deploy a PodCliqueSet with this structure and explore the naming hierarchy th | **PodClique (resource, standalone)** | - | ✅ | `--` | | **PodClique (resource, in PCSG)** | - | ✅ | `----` | | **PCSG (resource)** | - | ✅ | `--` | -| **Pod (standalone)** | - | ✅ | `---` | -| **Pod (in PCSG)** | - | ✅ | `-----` | +| **Pod (standalone)** | - | ✅ | `----` | +| **Pod (in PCSG)** | - | ✅ | `------` | **You control:** PodCliqueSet name, PodClique template names, PCSG template names **Grove generates:** All resource instances with hierarchical naming @@ -162,7 +167,7 @@ To deploy a PodCliqueSet with this structure and explore the naming hierarchy th 1. **Self-Documenting Hierarchy**: Pod names encode the complete hierarchy from PodCliqueSet → PCSG (if applicable) → PodClique → Pod, making `kubectl get pods` output immediately understandable. -2. **63-Character Limit**: Kubernetes enforces a 63-character limit on resource names. Use short, meaningful names for your resources, especially PodCliqueSet and PCSG names which appear in every generated name. +2. **Hostname Limit**: Generated pod hostnames must stay within the Kubernetes 63-character DNS label limit. Use short, meaningful names for your resources, especially PodCliqueSet and PCSG names which appear in every generated hostname and pod name. 3. **Unique PodClique Names**: All PodClique names must be unique within a PodCliqueSet. When you have similar roles in multiple PCSGs (e.g., leader/worker in both prefill and decode), use prefixes or abbreviations (e.g., `pleader`/`pworker` and `dleader`/`dworker`). @@ -177,4 +182,3 @@ Now that you understand Grove's naming scheme and best practices: - **See it in action**: Continue to the [Hands-On Example](./03_hands-on-example.md) to deploy an example system and observe the naming hierarchy firsthand. - **Learn programmatic discovery**: Head to the [Environment Variables guide](../03_environment-variables-for-pod-discovery/01_overview.md) to learn how to use these names programmatically for pod discovery, including how Grove injects environment variables and how to construct FQDNs for pod-to-pod communication. - diff --git a/docs/user-guide/02_pod-and-resource-naming-conventions/03_hands-on-example.md b/docs/user-guide/02_pod-and-resource-naming-conventions/03_hands-on-example.md index 647589324..17a52fb62 100644 --- a/docs/user-guide/02_pod-and-resource-naming-conventions/03_hands-on-example.md +++ b/docs/user-guide/02_pod-and-resource-naming-conventions/03_hands-on-example.md @@ -161,20 +161,20 @@ kubectl get pods -l app.kubernetes.io/part-of=mn-disagg -o wide You should see output like: ``` -NAME READY STATUS RESTARTS AGE -mn-disagg-0-decode-0-dleader-abc12 1/1 Running 0 45s -mn-disagg-0-decode-0-dworker-def34 1/1 Running 0 45s -mn-disagg-0-decode-0-dworker-ghi56 1/1 Running 0 45s -mn-disagg-0-frontend-jkl78 1/1 Running 0 45s -mn-disagg-0-frontend-mno90 1/1 Running 0 45s -mn-disagg-0-prefill-0-pleader-pqr12 1/1 Running 0 45s -mn-disagg-0-prefill-0-pworker-stu34 1/1 Running 0 45s -mn-disagg-0-prefill-0-pworker-vwx56 1/1 Running 0 45s -mn-disagg-0-prefill-0-pworker-yza78 1/1 Running 0 45s -mn-disagg-0-prefill-1-pleader-bcd90 1/1 Running 0 45s -mn-disagg-0-prefill-1-pworker-efg12 1/1 Running 0 45s -mn-disagg-0-prefill-1-pworker-hij34 1/1 Running 0 45s -mn-disagg-0-prefill-1-pworker-klm56 1/1 Running 0 45s +NAME READY STATUS RESTARTS AGE +mn-disagg-0-decode-0-dleader-0-abc7d 1/1 Running 0 45s +mn-disagg-0-decode-0-dworker-0-def8e 1/1 Running 0 45s +mn-disagg-0-decode-0-dworker-1-ghi9f 1/1 Running 0 45s +mn-disagg-0-frontend-0-jkl0g 1/1 Running 0 45s +mn-disagg-0-frontend-1-mno1h 1/1 Running 0 45s +mn-disagg-0-prefill-0-pleader-0-pqr2i 1/1 Running 0 45s +mn-disagg-0-prefill-0-pworker-0-stu3j 1/1 Running 0 45s +mn-disagg-0-prefill-0-pworker-1-vwx4k 1/1 Running 0 45s +mn-disagg-0-prefill-0-pworker-2-yza5l 1/1 Running 0 45s +mn-disagg-0-prefill-1-pleader-0-bcd6m 1/1 Running 0 45s +mn-disagg-0-prefill-1-pworker-0-efg7n 1/1 Running 0 45s +mn-disagg-0-prefill-1-pworker-1-hij8p 1/1 Running 0 45s +mn-disagg-0-prefill-1-pworker-2-klm9x 1/1 Running 0 45s ``` ## Parsing the Naming Hierarchy @@ -185,7 +185,7 @@ Looking at this output, you can immediately understand the system structure: ``` mn-disagg-0-frontend-* ``` -- Simpler naming: `---` +- Simpler naming: `----` - 2 frontend pods serving requests **2. PodCliqueScalingGroup (prefill) - 2 replicas:** @@ -193,7 +193,7 @@ mn-disagg-0-frontend-* mn-disagg-0-prefill-0-* mn-disagg-0-prefill-1-* ``` -- Deeper hierarchy: `-----` +- Deeper hierarchy: `------` - Each replica has 1 `pleader` + 3 `pworker` pods - Two independent prefill clusters @@ -255,12 +255,13 @@ The PCSG names clearly identify the two scaling groups. ## Name Length Analysis -Pod names have a 63-character limit (DNS label constraint). Let's verify our longest pod name fits: +Grove pod hostnames have a 63-character DNS label limit. Pod object names include an extra 5-character suffix and can be longer than 63 characters, but hostname length is the limit that matters for service discovery. Let's verify our longest generated hostname fits: -- Longest pod name: `mn-disagg-0-prefill-1-pworker-klm56` - - Characters: 35 (well under 63) ✅ +- Longest pod name: `mn-disagg-0-prefill-1-pworker-2-klm9x` +- Longest hostname: `mn-disagg-0-prefill-1-pworker-2` + - Characters: 31 (well under 63) ✅ -With 28 characters of headroom, this naming scheme can scale to millions of replicas on both PCS and PCSG dimensions without hitting the limit. +With 32 characters of hostname headroom, this naming scheme can scale to millions of replicas across the PCS, PCSG, and PodClique pod dimensions without hitting the limit. ## Cleanup @@ -275,4 +276,3 @@ kubectl delete pcs mn-disagg Now that you've seen the naming conventions in action, check out: - The [Key Takeaways](./02_naming-conventions.md#key-takeaways) section for a summary of naming best practices - The [Environment Variables guide](../03_environment-variables-for-pod-discovery/01_overview.md) to learn how to use these names programmatically for pod discovery - diff --git a/docs/user-guide/03_environment-variables-for-pod-discovery/02_env_var_reference.md b/docs/user-guide/03_environment-variables-for-pod-discovery/02_env_var_reference.md index d4ffd5478..e71510342 100644 --- a/docs/user-guide/03_environment-variables-for-pod-discovery/02_env_var_reference.md +++ b/docs/user-guide/03_environment-variables-for-pod-discovery/02_env_var_reference.md @@ -17,11 +17,11 @@ A common source of confusion is the difference between a pod's **name** and its ### Pod Name (Kubernetes Resource Identifier) -The **pod name** is the unique identifier for the Pod resource in Kubernetes (stored in `metadata.name`). When Grove creates pods, it uses Kubernetes' `generateName` feature, which appends a random 5-character suffix to ensure uniqueness: +The **pod name** is the unique identifier for the Pod resource in Kubernetes (stored in `metadata.name`). When Grove creates pods, it includes the PodClique pod index and appends a random 5-character suffix to ensure uniqueness: ``` -- -Example: env-demo-standalone-0-frontend-abc12 +-- +Example: env-demo-standalone-0-frontend-0-abc7d ``` This name is what you see when running `kubectl get pods`. However, you **cannot use this name for DNS-based pod discovery** because the random suffix is unpredictable. @@ -53,7 +53,7 @@ env-demo-standalone-0-frontend-0.env-demo-standalone-0.default.svc.cluster.local | Attribute | Pod Name | Hostname | |-----------|----------|----------| | Source | `metadata.name` | `spec.hostname` | -| Pattern | `-` | `-` | +| Pattern | `--` | `-` | | Predictable? | ❌ No (random suffix) | ✅ Yes (index-based) | | DNS resolvable? | ❌ No | ✅ Yes (with headless service) | | Use case | `kubectl` commands, logs | Pod discovery, pod-to-pod communication | @@ -100,4 +100,3 @@ If a pod belongs to a PodClique that is part of a PodCliqueScalingGroup, these a ## Next Steps Continue to the [Hands-On Examples](./03_hands-on-examples.md) to deploy example PodCliqueSets and use environment variables to construct FQDNs and discover other pods. We strongly recommend working through these examples as they demonstrate the practical techniques you'll need to implement pod discovery in your own applications. - diff --git a/docs/user-guide/03_environment-variables-for-pod-discovery/03_hands-on-examples.md b/docs/user-guide/03_environment-variables-for-pod-discovery/03_hands-on-examples.md index 6e1f34589..48e119102 100644 --- a/docs/user-guide/03_environment-variables-for-pod-discovery/03_hands-on-examples.md +++ b/docs/user-guide/03_environment-variables-for-pod-discovery/03_hands-on-examples.md @@ -71,9 +71,9 @@ kubectl get pods -l app.kubernetes.io/part-of=env-demo-standalone You should see output similar to: ``` -NAME READY STATUS RESTARTS AGE -env-demo-standalone-0-frontend-abc12 1/1 Running 0 30s -env-demo-standalone-0-frontend-def34 1/1 Running 0 30s +NAME READY STATUS RESTARTS AGE +env-demo-standalone-0-frontend-0-abc7d 1/1 Running 0 30s +env-demo-standalone-0-frontend-1-def8e 1/1 Running 0 30s ``` Now, let's check the logs of one of the pods to see the environment variables: @@ -96,7 +96,7 @@ GROVE_HEADLESS_SERVICE=env-demo-standalone-0.default.svc.cluster.local GROVE_PCLQ_POD_INDEX=0 === Pod Name vs Hostname === -Pod Name (random suffix): env-demo-standalone-0-frontend-abc12 +Pod Name (random suffix): env-demo-standalone-0-frontend-0-abc7d Hostname (deterministic): env-demo-standalone-0-frontend-0 My FQDN: env-demo-standalone-0-frontend-0.env-demo-standalone-0.default.svc.cluster.local @@ -105,9 +105,9 @@ Sleeping... ``` **Key Observations:** -- The **pod name** (`env-demo-standalone-0-frontend-abc12`) has a random suffix—this is the Kubernetes resource identifier, not used for DNS +- The **pod name** (`env-demo-standalone-0-frontend-0-abc7d`) includes the pod index and a random suffix—this is the Kubernetes resource identifier, not used for DNS - The **hostname** (constructed as `$GROVE_PCLQ_NAME-$GROVE_PCLQ_POD_INDEX`) is deterministic—this is what you use for pod discovery -- `GROVE_PCLQ_NAME` contains the fully qualified PodClique name without the random suffix +- `GROVE_PCLQ_NAME` contains the fully qualified PodClique name without the pod index or random suffix - `GROVE_PCLQ_POD_INDEX` tells us this is the first pod (index 0) in the PodClique - `GROVE_HEADLESS_SERVICE` provides the headless service domain, which you combine with the hostname to construct the pod's FQDN: `$GROVE_PCLQ_NAME-$GROVE_PCLQ_POD_INDEX.$GROVE_HEADLESS_SERVICE` @@ -222,15 +222,15 @@ kubectl get pods -l app.kubernetes.io/part-of=env-demo-pcsg -o wide You should see 8 pods (2 PCSG replicas × (1 leader + 3 workers)): ``` -NAME READY STATUS RESTARTS AGE -env-demo-pcsg-0-model-instance-0-leader-abc12 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-0-worker-def34 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-0-worker-ghi56 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-0-worker-jkl78 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-1-leader-mno90 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-1-worker-pqr12 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-1-worker-stu34 1/1 Running 0 45s -env-demo-pcsg-0-model-instance-1-worker-vwx56 1/1 Running 0 45s +NAME READY STATUS RESTARTS AGE +env-demo-pcsg-0-model-instance-0-leader-0-abc7d 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-0-worker-0-def8e 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-0-worker-1-ghi9f 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-0-worker-2-jkl0g 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-1-leader-0-mno1h 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-1-worker-0-pqr2i 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-1-worker-1-stu3j 1/1 Running 0 45s +env-demo-pcsg-0-model-instance-1-worker-2-vwx4k 1/1 Running 0 45s ``` Let's inspect the leader logs from the first PCSG replica: @@ -317,4 +317,3 @@ kubectl delete pcs env-demo-pcsg ## Next Steps Now that you've seen the environment variables in action, continue to [Common Patterns and Takeaways](./04_common-patterns-and-takeaways.md) for reusable patterns you can adapt for your applications and a summary of key concepts. - diff --git a/operator/api/common/namegen.go b/operator/api/common/namegen.go index 1d767da20..4851de100 100644 --- a/operator/api/common/namegen.go +++ b/operator/api/common/namegen.go @@ -20,6 +20,14 @@ import ( "fmt" "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" + + utilrand "k8s.io/apimachinery/pkg/util/rand" +) + +const ( + // PodNameRandomSuffixLength is the length of the random suffix Grove adds to + // generated Pod names. + PodNameRandomSuffixLength = 5 ) // ResourceNameReplica is a type that holds a resource name and its replica index. @@ -77,6 +85,21 @@ func GeneratePodCliqueScalingGroupName(pcsNameReplica ResourceNameReplica, pclqS return fmt.Sprintf("%s-%d-%s", pcsNameReplica.Name, pcsNameReplica.Replica, pclqScalingGroupName) } +// GeneratePodName generates a Pod name based on the PodClique FQN, Pod index, and a random suffix. +func GeneratePodName(pclqName string, podIndex int) string { + return GeneratePodNameWithSuffix(pclqName, podIndex, utilrand.String(PodNameRandomSuffixLength)) +} + +// GeneratePodNameWithSuffix generates a Pod name with a caller-provided suffix. +func GeneratePodNameWithSuffix(pclqName string, podIndex int, suffix string) string { + return fmt.Sprintf("%s-%s", GeneratePodHostname(pclqName, podIndex), suffix) +} + +// GeneratePodHostname generates the stable Pod hostname used for service discovery. +func GeneratePodHostname(pclqName string, podIndex int) string { + return fmt.Sprintf("%s-%d", pclqName, podIndex) +} + // GenerateBasePodGangName generates a base PodGang name for a PodCliqueSet replica. // This is used for PodGangs that are not part of scaled scaling group replicas. func GenerateBasePodGangName(pcsNameReplica ResourceNameReplica) string { diff --git a/operator/api/common/namegen_test.go b/operator/api/common/namegen_test.go index 68fd13d28..c532f8b90 100644 --- a/operator/api/common/namegen_test.go +++ b/operator/api/common/namegen_test.go @@ -17,6 +17,7 @@ package common import ( + "regexp" "testing" "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" @@ -115,6 +116,17 @@ func TestExtractScalingGroupNameFromPCSGFQN(t *testing.T) { } } +func TestGeneratePodName(t *testing.T) { + podName := GeneratePodName("workload-0-prefill-1-worker", 12) + + assert.Regexp(t, regexp.MustCompile(`^workload-0-prefill-1-worker-12-[a-z0-9]{5}$`), podName) + assert.Len(t, podName[len("workload-0-prefill-1-worker-12-"):], PodNameRandomSuffixLength) +} + +func TestGeneratePodHostname(t *testing.T) { + assert.Equal(t, "workload-0-prefill-1-worker-12", GeneratePodHostname("workload-0-prefill-1-worker", 12)) +} + func TestGenerateBasePodGangName(t *testing.T) { tests := []struct { name string diff --git a/operator/api/core/v1alpha1/podclique.go b/operator/api/core/v1alpha1/podclique.go index b34dda4a5..dd8b3e51b 100644 --- a/operator/api/core/v1alpha1/podclique.go +++ b/operator/api/core/v1alpha1/podclique.go @@ -80,6 +80,15 @@ type PodCliqueSpec struct { ScaleConfig *AutoScalingConfig `json:"autoScalingConfig,omitempty"` } +// MaxReplicas returns the maximum number of replicas for this PodClique. +func (p PodCliqueSpec) MaxReplicas() int32 { + replicas := p.Replicas + if p.ScaleConfig != nil { + replicas = max(replicas, p.ScaleConfig.MaxReplicas) + } + return replicas +} + // AutoScalingConfig defines the configuration for the horizontal pod autoscaler. type AutoScalingConfig struct { // MinReplicas is the lower limit for the number of replicas for the target resource. diff --git a/operator/api/core/v1alpha1/podcliqueset.go b/operator/api/core/v1alpha1/podcliqueset.go index 19ba9b36f..e7658334c 100644 --- a/operator/api/core/v1alpha1/podcliqueset.go +++ b/operator/api/core/v1alpha1/podcliqueset.go @@ -19,6 +19,7 @@ package v1alpha1 import ( resourcev1 "k8s.io/api/resource/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ) // +genclient @@ -334,6 +335,15 @@ type PodCliqueScalingGroupConfig struct { TopologyConstraint *TopologyConstraint `json:"topologyConstraint,omitempty"` } +// MaxReplicas returns the maximum number of replicas allowed with this scaling group config. +func (p PodCliqueScalingGroupConfig) MaxReplicas() int32 { + replicas := max(1, ptr.Deref(p.Replicas, 0)) + if p.ScaleConfig != nil { + replicas = max(replicas, p.ScaleConfig.MaxReplicas) + } + return replicas +} + // ResourceSharingScope defines the sharing scope for resource claims. // +kubebuilder:validation:Enum=AllReplicas;PerReplica type ResourceSharingScope string diff --git a/operator/e2e/tests/update/utils.go b/operator/e2e/tests/update/utils.go index 433e0b3cb..ffd150089 100644 --- a/operator/e2e/tests/update/utils.go +++ b/operator/e2e/tests/update/utils.go @@ -557,7 +557,7 @@ func waitForOrdinalUpdating(tc *testctx.TestContext, ordinal int32) error { // 3. The new pod is created to fill the same logical position // 4. The new pod receives the same pod.Spec.Hostname (e.g., "workload1-pc-a-0-0") // -// Only the pod name changes (due to GenerateName), while pod.Spec.Hostname represents the pod's +// Only the pod name changes (due to its random suffix), while pod.Spec.Hostname represents the pod's // logical position in the workload hierarchy and remains constant. func getPodIdentifier(tc *testctx.TestContext, pod *corev1.Pod) string { tc.T.Helper() diff --git a/operator/internal/controller/podclique/components/pod/pod.go b/operator/internal/controller/podclique/components/pod/pod.go index 50c28635d..b09204521 100644 --- a/operator/internal/controller/podclique/components/pod/pod.go +++ b/operator/internal/controller/podclique/components/pod/pod.go @@ -150,10 +150,10 @@ func (r _resource) buildResource(pcs *grovecorev1alpha1.PodCliqueSet, pclq *grov labels := getLabels(pclq.ObjectMeta, pcsName, podGangName, pcsReplicaIndex, podIndex) pod.ObjectMeta = metav1.ObjectMeta{ - GenerateName: fmt.Sprintf("%s-", pclq.Name), - Namespace: pclq.Namespace, - Labels: labels, - Annotations: pclq.Annotations, + Name: apicommon.GeneratePodName(pclq.Name, podIndex), + Namespace: pclq.Namespace, + Labels: labels, + Annotations: pclq.Annotations, } if err = controllerutil.SetControllerReference(pclq, pod, r.scheme); err != nil { return groveerr.WrapError(err, @@ -357,7 +357,7 @@ func addEnvironmentVariables(pod *corev1.Pod, pclq *grovecorev1alpha1.PodClique, // configurePodHostname sets the pod hostname and subdomain for service discovery func configurePodHostname(pcsName string, pcsReplicaIndex int, pclqName string, pod *corev1.Pod, podIndex int) { // Set hostname for service discovery (e.g., "my-pclq-0") - pod.Spec.Hostname = fmt.Sprintf("%s-%d", pclqName, podIndex) + pod.Spec.Hostname = apicommon.GeneratePodHostname(pclqName, podIndex) // Set subdomain to headless service name (reusing existing logic) pod.Spec.Subdomain = apicommon.GenerateHeadlessServiceName( diff --git a/operator/internal/controller/podclique/components/pod/pod_test.go b/operator/internal/controller/podclique/components/pod/pod_test.go index 9da32fa8d..32a973c6c 100644 --- a/operator/internal/controller/podclique/components/pod/pod_test.go +++ b/operator/internal/controller/podclique/components/pod/pod_test.go @@ -18,15 +18,19 @@ package pod import ( "fmt" + "regexp" "testing" "github.com/ai-dynamo/grove/operator/api/common" "github.com/ai-dynamo/grove/operator/api/common/constants" grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" + testutils "github.com/ai-dynamo/grove/operator/test/utils" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" ) // TestGetSelectorLabelsForPods_PCSGOwnedPodClique tests that getSelectorLabelsForPods @@ -124,6 +128,64 @@ func TestGetSelectorLabelsForPods_PCSGOwnedPodClique(t *testing.T) { } } +func TestBuildResourceSetsPodNameWithPodIndex(t *testing.T) { + scheme := runtime.NewScheme() + utilruntime.Must(grovecorev1alpha1.AddToScheme(scheme)) + utilruntime.Must(corev1.AddToScheme(scheme)) + + pcs := &grovecorev1alpha1.PodCliqueSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "workload", + Namespace: "default", + }, + Spec: grovecorev1alpha1.PodCliqueSetSpec{ + Template: grovecorev1alpha1.PodCliqueSetTemplateSpec{ + Cliques: []*grovecorev1alpha1.PodCliqueTemplateSpec{ + { + Name: "worker", + Spec: grovecorev1alpha1.PodCliqueSpec{ + PodSpec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "app", Image: "busybox"}}, + }, + Replicas: 1, + }, + }, + }, + }, + }, + } + pclq := &grovecorev1alpha1.PodClique{ + ObjectMeta: metav1.ObjectMeta{ + Name: "workload-0-worker", + Namespace: "default", + Labels: map[string]string{ + common.LabelPartOfKey: "workload", + common.LabelPodCliqueSetReplicaIndex: "0", + common.LabelPodClique: "workload-0-worker", + }, + }, + Spec: grovecorev1alpha1.PodCliqueSpec{ + PodSpec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "app", Image: "busybox"}}, + }, + Replicas: 1, + }, + } + + pod := &corev1.Pod{} + r := _resource{ + scheme: scheme, + schedRegistry: testutils.NewDefaultFakeRegistry(), + } + + err := r.buildResource(pcs, pclq, "workload-0", pod, 7) + + assert.NoError(t, err) + assert.Empty(t, pod.GenerateName) + assert.Regexp(t, regexp.MustCompile(`^workload-0-worker-7-[a-z0-9]{5}$`), pod.Name) + assert.Equal(t, "workload-0-worker-7", pod.Spec.Hostname) +} + func TestAddEnvironmentVariables(t *testing.T) { tests := []struct { name string diff --git a/operator/internal/controller/podclique/register.go b/operator/internal/controller/podclique/register.go index 05073841e..1ac8cae82 100644 --- a/operator/internal/controller/podclique/register.go +++ b/operator/internal/controller/podclique/register.go @@ -18,7 +18,6 @@ package podclique import ( "context" - "strings" "github.com/ai-dynamo/grove/operator/api/common/constants" grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" @@ -317,22 +316,14 @@ func mapPodGangToPCLQs() handler.MapFunc { if len(podGroup.PodReferences) == 0 { continue } - podRefName := podGroup.PodReferences[0].Name - pclqFQN := extractPCLQNameFromPodName(podRefName) requests = append(requests, reconcile.Request{ - NamespacedName: types.NamespacedName{Name: pclqFQN, Namespace: podGang.Namespace}, + NamespacedName: types.NamespacedName{Name: podGroup.Name, Namespace: podGang.Namespace}, }) } return requests } } -// extractPCLQNameFromPodName extracts the PodClique name from a Pod name by removing the replica index suffix -func extractPCLQNameFromPodName(podName string) string { - endIndex := strings.LastIndex(podName, "-") - return podName[:endIndex] -} - // podGangPredicate filters PodGang events to trigger on initialization and spec updates func podGangPredicate() predicate.Predicate { return predicate.Funcs{ diff --git a/operator/internal/webhook/admission/pcs/validation/podcliqueset.go b/operator/internal/webhook/admission/pcs/validation/podcliqueset.go index 395a1631f..74a29d119 100644 --- a/operator/internal/webhook/admission/pcs/validation/podcliqueset.go +++ b/operator/internal/webhook/admission/pcs/validation/podcliqueset.go @@ -23,10 +23,12 @@ import ( "slices" "strings" + apicommon "github.com/ai-dynamo/grove/operator/api/common" groveconfigv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1" grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" "github.com/ai-dynamo/grove/operator/internal/clustertopology" componentutils "github.com/ai-dynamo/grove/operator/internal/controller/common/component/utils" + "github.com/ai-dynamo/grove/operator/internal/resourceclaim" "github.com/ai-dynamo/grove/operator/internal/scheduler" "github.com/ai-dynamo/grove/operator/internal/utils" @@ -42,10 +44,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - maxCombinedResourceNameLength = 45 -) - var allowedStartupTypes = sets.New(grovecorev1alpha1.CliqueStartupTypeInOrder, grovecorev1alpha1.CliqueStartupTypeAnyOrder, grovecorev1alpha1.CliqueStartupTypeExplicit) // pcsValidator validates PodCliqueSet resources for create and update operations. @@ -145,6 +143,7 @@ func (v *pcsValidator) validatePCSResourceSharing(refs []grovecorev1alpha1.PCSRe bases[i] = refs[i].ResourceSharingSpec } allErrs = append(allErrs, v.validateResourceSharingSpecs(bases, fldPath)...) + allErrs = append(allErrs, validatePodResourceClaimReferenceNames(v.pcs.Name, v.pcs.Spec.Replicas, bases, fldPath)...) cliqueNames := sets.New[string]() for _, c := range v.pcs.Spec.Template.Cliques { @@ -185,6 +184,11 @@ func (v *pcsValidator) validatePCSGResourceSharing(cfg grovecorev1alpha1.PodCliq bases[i] = cfg.ResourceSharing[i].ResourceSharingSpec } allErrs = append(allErrs, v.validateResourceSharingSpecs(bases, fldPath)...) + pcsgFQN := apicommon.GeneratePodCliqueScalingGroupName( + apicommon.ResourceNameReplica{Name: v.pcs.Name, Replica: int(max(0, v.pcs.Spec.Replicas-1))}, + cfg.Name, + ) + allErrs = append(allErrs, validatePodResourceClaimReferenceNames(pcsgFQN, cfg.MaxReplicas(), bases, fldPath)...) for i, ref := range cfg.ResourceSharing { if ref.Filter == nil { continue @@ -326,7 +330,7 @@ func (v *pcsValidator) validatePodCliqueNameConstraints(fldPath *field.Path, cli // validateStandalonePodClique validates pod naming constraints for PodCliques that are not part of any scaling group. func validateStandalonePodClique(fldPath *field.Path, v *pcsValidator, cliqueTemplateSpec *grovecorev1alpha1.PodCliqueTemplateSpec) field.ErrorList { allErrs := field.ErrorList{} - if err := validatePodNameConstraints(v.pcs.Name, "", cliqueTemplateSpec.Name); err != nil { + if err := validatePodNameConstraints(v.pcs.Name, v.pcs.Spec.Replicas, "", 0, cliqueTemplateSpec.Name, cliqueTemplateSpec.Spec.MaxReplicas()); err != nil { // add error to each of filed paths that compose the podName in case of a PodCliqueTemplateSpec allErrs = append(allErrs, field.Invalid(fldPath.Child("name"), cliqueTemplateSpec.Name, err.Error())) allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), v.pcs.Name, err.Error())) @@ -352,7 +356,7 @@ func (v *pcsValidator) validatePodCliqueScalingGroupConfigs(fldPath *field.Path) pclqScalingGroupNames = append(pclqScalingGroupNames, scalingGroupConfig.Name) cliqueNamesAcrossAllScalingGroups = append(cliqueNamesAcrossAllScalingGroups, scalingGroupConfig.CliqueNames...) // validate that scaling groups only contains clique names that are defined in the PodCliqueSet. - allErrs = append(allErrs, v.validateScalingGroupPodCliqueNames(scalingGroupConfig.Name, allPodCliqueSetCliqueNames, + allErrs = append(allErrs, v.validateScalingGroupPodCliqueNames(scalingGroupConfig, allPodCliqueSetCliqueNames, scalingGroupConfig.CliqueNames, fldPath.Index(i).Child("cliqueNames"), fldPath.Index(i).Child("name"))...) // validate Replicas field @@ -439,6 +443,7 @@ func (v *pcsValidator) validatePodCliqueTemplateSpec(cliqueTemplateSpec *groveco allErrs = append(allErrs, apivalidation.ValidateAnnotations(cliqueTemplateSpec.Annotations, fldPath.Child("annotations"))...) allErrs = append(allErrs, v.validateResourceSharingSpecs(cliqueTemplateSpec.ResourceSharing, fldPath.Child("resourceSharing"))...) + allErrs = append(allErrs, v.validatePCLQResourceClaimReferenceNames(cliqueTemplateSpec, fldPath.Child("resourceSharing"))...) warnings, errs := v.validatePodCliqueSpec(cliqueTemplateSpec.Name, cliqueTemplateSpec.Spec, fldPath.Child("spec")) if len(errs) != 0 { allErrs = append(allErrs, errs...) @@ -484,7 +489,7 @@ func (v *pcsValidator) getScalingGroupCliqueNames() sets.Set[string] { } // validateScalingGroupPodCliqueNames validates that scaling group clique references exist and meet naming constraints. -func (v *pcsValidator) validateScalingGroupPodCliqueNames(pcsgName string, allPclqNames, pclqNameInScalingGrp []string, fldPath, pcsgNameFieldPath *field.Path) field.ErrorList { +func (v *pcsValidator) validateScalingGroupPodCliqueNames(scalingGroupConfig grovecorev1alpha1.PodCliqueScalingGroupConfig, allPclqNames, pclqNameInScalingGrp []string, fldPath, pcsgNameFieldPath *field.Path) field.ErrorList { allErrs := field.ErrorList{} _, unidentifiedPclqNames := lo.Difference(allPclqNames, lo.Uniq(pclqNameInScalingGrp)) @@ -494,7 +499,12 @@ func (v *pcsValidator) validateScalingGroupPodCliqueNames(pcsgName string, allPc // validate scaling group PodClique pods names are valid. for i, pclqName := range pclqNameInScalingGrp { - if err := validatePodNameConstraints(v.pcs.Name, pcsgName, pclqName); err != nil { + maxPCLQReplicas := int32(1) + if templateSpec := componentutils.FindPodCliqueTemplateSpecByName(v.pcs, pclqName); templateSpec != nil { + maxPCLQReplicas = templateSpec.Spec.MaxReplicas() + } + + if err := validatePodNameConstraints(v.pcs.Name, v.pcs.Spec.Replicas, scalingGroupConfig.Name, scalingGroupConfig.MaxReplicas(), pclqName, maxPCLQReplicas); err != nil { // add error to each of filed paths that compose the podName allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), pclqName, err.Error())) allErrs = append(allErrs, field.Invalid(pcsgNameFieldPath, pclqName, err.Error())) @@ -504,6 +514,66 @@ func (v *pcsValidator) validateScalingGroupPodCliqueNames(pcsgName string, allPc return allErrs } +func (v *pcsValidator) validatePCLQResourceClaimReferenceNames(cliqueTemplateSpec *grovecorev1alpha1.PodCliqueTemplateSpec, fldPath *field.Path) field.ErrorList { + if len(cliqueTemplateSpec.ResourceSharing) == 0 { + return nil + } + + pcsNameReplica := apicommon.ResourceNameReplica{Name: v.pcs.Name, Replica: int(max(0, v.pcs.Spec.Replicas-1))} + + var ownerNames []string + for _, scalingGroupConfig := range v.pcs.Spec.Template.PodCliqueScalingGroupConfigs { + if !slices.Contains(scalingGroupConfig.CliqueNames, cliqueTemplateSpec.Name) { + continue + } + pcsgFQN := apicommon.GeneratePodCliqueScalingGroupName(pcsNameReplica, scalingGroupConfig.Name) + ownerNames = append(ownerNames, apicommon.GeneratePodCliqueName( + apicommon.ResourceNameReplica{Name: pcsgFQN, Replica: int(max(0, scalingGroupConfig.MaxReplicas()-1))}, + cliqueTemplateSpec.Name, + )) + } + if len(ownerNames) == 0 { + ownerNames = append(ownerNames, apicommon.GeneratePodCliqueName(pcsNameReplica, cliqueTemplateSpec.Name)) + } + + allErrs := field.ErrorList{} + for _, ownerName := range ownerNames { + allErrs = append(allErrs, validatePodResourceClaimReferenceNames(ownerName, cliqueTemplateSpec.Spec.MaxReplicas(), cliqueTemplateSpec.ResourceSharing, fldPath)...) + } + return allErrs +} + +func validatePodResourceClaimReferenceNames(ownerName string, maxReplicas int32, refs []grovecorev1alpha1.ResourceSharingSpec, fldPath *field.Path) field.ErrorList { + allErrs := field.ErrorList{} + maxReplicaIndex := int(max(0, maxReplicas-1)) + + for i, ref := range refs { + var rcName string + + switch ref.Scope { + case grovecorev1alpha1.ResourceSharingScopeAllReplicas: + rcName = resourceclaim.RCName(ownerName, &ref, nil) + case grovecorev1alpha1.ResourceSharingScopePerReplica: + rcName = resourceclaim.RCName(ownerName, &ref, &maxReplicaIndex) + default: + continue + } + + if validationErrs := k8svalidation.IsDNS1123Label(rcName); len(validationErrs) > 0 { + allErrs = append(allErrs, field.Invalid( + fldPath.Index(i).Child("name"), + ref.Name, + fmt.Sprintf( + "generated pod resource claim reference name %q is invalid: %s. Pod resource claim names must fit DNS_LABEL because they are written to pod.spec.resourceClaims[].name and container resource claim references; shorten the resourceSharing name or generated owner name, or reduce max replicas", + rcName, strings.Join(validationErrs, "; "), + ), + )) + } + } + + return allErrs +} + // validatePodCliqueSpec validates the specification of a PodClique including replicas, minAvailable, dependencies, and autoscaling configuration. func (v *pcsValidator) validatePodCliqueSpec(name string, cliqueSpec grovecorev1alpha1.PodCliqueSpec, fldPath *field.Path) ([]string, field.ErrorList) { allErrs := field.ErrorList{} @@ -989,33 +1059,45 @@ func (v *pcsValidator) validatePodCliqueUpdate(oldCliques []*grovecorev1alpha1.P return allErrs } -// validatePodNameConstraints validates Grove pod name component constraints. -// This function validates the constraints for component names that will be used -// to construct pod names. +// validatePodNameConstraints validates the longest generated Grove pod name and hostname. // // Pod names that belong to a PCSG follow the format: -// ----- +// ------ // // Pod names that do not belong to a PCSG follow the format: -// --- -// -// Constraints: -// - Random string + hyphens: 10 chars for PCSG pods, 8 chars for non-PCSG pods -// - Max sum of all resource name characters: 45 chars -func validatePodNameConstraints(pcsName, pcsgName, pclqName string) error { - // Check resource name constraints - resourceNameLength := len(pcsName) + len(pclqName) +// ---- +func validatePodNameConstraints(pcsName string, pcsReplicas int32, pcsgName string, pcsgReplicas int32, pclqName string, pclqReplicas int32) error { + pclqOwnerNameReplica := apicommon.ResourceNameReplica{Name: pcsName, Replica: int(max(0, pcsReplicas-1))} if pcsgName != "" { - resourceNameLength += len(pcsgName) + pcsgFQN := apicommon.GeneratePodCliqueScalingGroupName(pclqOwnerNameReplica, pcsgName) + pclqOwnerNameReplica = apicommon.ResourceNameReplica{Name: pcsgFQN, Replica: int(max(0, pcsgReplicas-1))} } - if resourceNameLength > maxCombinedResourceNameLength { - if pcsgName != "" { - return fmt.Errorf("combined resource name length %d exceeds 45-character limit required for pod naming. Consider shortening: PodCliqueSet '%s', PodCliqueScalingGroup '%s', or PodClique '%s'", - resourceNameLength, pcsName, pcsgName, pclqName) - } - return fmt.Errorf("combined resource name length %d exceeds 45-character limit required for pod naming. Consider shortening: PodCliqueSet '%s' or PodClique '%s'", - resourceNameLength, pcsName, pclqName) + pclqFQN := apicommon.GeneratePodCliqueName(pclqOwnerNameReplica, pclqName) + podIndex := int(max(0, pclqReplicas-1)) + podHostname := apicommon.GeneratePodHostname(pclqFQN, podIndex) + if validationErrs := k8svalidation.IsDNS1123Label(podHostname); len(validationErrs) > 0 { + return generatedPodConstraintError("hostname", podHostname, strings.Join(validationErrs, "; "), pcsName, pcsgName, pclqName) + } + + podName := apicommon.GeneratePodNameWithSuffix( + pclqFQN, + podIndex, + strings.Repeat("x", apicommon.PodNameRandomSuffixLength), + ) + if validationErrs := k8svalidation.IsDNS1123Subdomain(podName); len(validationErrs) > 0 { + return generatedPodConstraintError("name", podName, strings.Join(validationErrs, "; "), pcsName, pcsgName, pclqName) } + return nil } + +func generatedPodConstraintError(fieldName, value, details, pcsName, pcsgName, pclqName string) error { + if pcsgName != "" { + return fmt.Errorf("generated pod %s %q is invalid: %s. Shorten PodCliqueSet %q, PodCliqueScalingGroup %q, or PodClique %q, or reduce max replicas", + fieldName, value, details, pcsName, pcsgName, pclqName) + } + + return fmt.Errorf("generated pod %s %q is invalid: %s. Shorten PodCliqueSet %q or PodClique %q, or reduce max replicas", + fieldName, value, details, pcsName, pclqName) +} diff --git a/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go b/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go index a8e353c79..0680be62c 100644 --- a/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go +++ b/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go @@ -162,6 +162,265 @@ func TestResourceNamingValidation(t *testing.T) { } } +func TestGeneratedPodNameValidationUsesMaxReplicaIndexes(t *testing.T) { + for _, tt := range []struct { + name string + pcs *grovecorev1alpha1.PodCliqueSet + wantErr bool + }{ + { + name: "allows generated pod name above DNS label limit when hostname is valid", + pcs: testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 30), "default", uuid.NewUUID()). + WithReplicas(101). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 26)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build(), + }, + { + name: "standalone PodClique checks PCS max replica index for hostname", + pcs: testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 30), "default", uuid.NewUUID()). + WithReplicas(1001). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 26)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build(), + wantErr: true, + }, + { + name: "allows generated pod name at length limit", + pcs: testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 30), "default", uuid.NewUUID()). + WithReplicas(11). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 23)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build(), + }, + { + name: "standalone PodClique checks PodClique max pod index", + pcs: testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 30), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 26)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + WithScaleConfig(ptr.To(int32(1)), 1001). + Build()). + Build(), + wantErr: true, + }, + { + name: "PCSG PodClique checks PCSG max replica index", + pcs: testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 20), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 15)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + WithPodCliqueScalingGroupConfig(grovecorev1alpha1.PodCliqueScalingGroupConfig{ + Name: strings.Repeat("c", 15), + CliqueNames: []string{strings.Repeat("b", 15)}, + Replicas: ptr.To(int32(1)), + MinAvailable: ptr.To(int32(1)), + ScaleConfig: &grovecorev1alpha1.AutoScalingConfig{ + MinReplicas: ptr.To(int32(1)), + MaxReplicas: 101, + }, + }). + Build(), + }, + { + name: "PCSG PodClique checks PCSG max replica index for hostname", + pcs: testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 20), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 19)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + WithPodCliqueScalingGroupConfig(grovecorev1alpha1.PodCliqueScalingGroupConfig{ + Name: strings.Repeat("c", 15), + CliqueNames: []string{strings.Repeat("b", 19)}, + Replicas: ptr.To(int32(1)), + MinAvailable: ptr.To(int32(1)), + ScaleConfig: &grovecorev1alpha1.AutoScalingConfig{ + MinReplicas: ptr.To(int32(1)), + MaxReplicas: 101, + }, + }). + Build(), + wantErr: true, + }, + { + name: "rejects generated hostnames with dots", + pcs: testutils.NewPodCliqueSetBuilder("workload.name", "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder("worker"). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build(), + wantErr: true, + }, + } { + t.Run(tt.name, func(t *testing.T) { + validator := newPCSValidator(tt.pcs, admissionv1.Create, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{ + Profiles: []groveconfigv1alpha1.SchedulerProfile{ + {Name: groveconfigv1alpha1.SchedulerNameKube}, + }, + DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube), + }, nil, testutils.NewDefaultFakeRegistry()) + + _, errs := validator.validate() + + if tt.wantErr { + require.Error(t, errs.ToAggregate()) + } else { + require.NoError(t, errs.ToAggregate()) + } + }) + } +} + +func TestGeneratedPodResourceClaimReferenceNameValidation(t *testing.T) { + for _, tt := range []struct { + name string + pcs *grovecorev1alpha1.PodCliqueSet + errorMatchers []testutils.ErrorMatcher + }{ + { + name: "allows PCLQ resource claim reference name at DNS label limit", + pcs: func() *grovecorev1alpha1.PodCliqueSet { + pcs := testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 44), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 8)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build() + pcs.Spec.Template.Cliques[0].ResourceSharing = []grovecorev1alpha1.ResourceSharingSpec{ + {Name: "gpu", Scope: grovecorev1alpha1.ResourceSharingScopeAllReplicas}, + } + return pcs + }(), + }, + { + name: "rejects overlong PCS resource claim reference name", + pcs: func() *grovecorev1alpha1.PodCliqueSet { + pcs := testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 56), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder("b"). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build() + pcs.Spec.Template.ResourceSharing = []grovecorev1alpha1.PCSResourceSharingSpec{ + {ResourceSharingSpec: grovecorev1alpha1.ResourceSharingSpec{Name: "gpu", Scope: grovecorev1alpha1.ResourceSharingScopeAllReplicas}}, + } + return pcs + }(), + errorMatchers: []testutils.ErrorMatcher{ + {ErrorType: field.ErrorTypeInvalid, Field: "spec.template.resourceSharing[0].name"}, + }, + }, + { + name: "rejects overlong PCSG resource claim reference name", + pcs: func() *grovecorev1alpha1.PodCliqueSet { + pcs := testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 45), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder("b"). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + WithPodCliqueScalingGroupConfig(grovecorev1alpha1.PodCliqueScalingGroupConfig{ + Name: strings.Repeat("c", 8), + CliqueNames: []string{"b"}, + Replicas: ptr.To(int32(1)), + MinAvailable: ptr.To(int32(1)), + ResourceSharing: []grovecorev1alpha1.PCSGResourceSharingSpec{ + {ResourceSharingSpec: grovecorev1alpha1.ResourceSharingSpec{Name: "gpu", Scope: grovecorev1alpha1.ResourceSharingScopeAllReplicas}}, + }, + }). + Build() + return pcs + }(), + errorMatchers: []testutils.ErrorMatcher{ + {ErrorType: field.ErrorTypeInvalid, Field: "spec.template.podCliqueScalingGroups[0].resourceSharing[0].name"}, + }, + }, + { + name: "rejects overlong PCLQ resource claim reference name", + pcs: func() *grovecorev1alpha1.PodCliqueSet { + pcs := testutils.NewPodCliqueSetBuilder(strings.Repeat("a", 50), "default", uuid.NewUUID()). + WithReplicas(1). + WithTerminationDelay(4 * time.Hour). + WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)). + WithPodCliqueTemplateSpec(testutils.NewPodCliqueTemplateSpecBuilder(strings.Repeat("b", 8)). + WithReplicas(1). + WithRoleName("worker"). + WithMinAvailable(1). + Build()). + Build() + pcs.Spec.Template.Cliques[0].ResourceSharing = []grovecorev1alpha1.ResourceSharingSpec{ + {Name: "gpu", Scope: grovecorev1alpha1.ResourceSharingScopeAllReplicas}, + } + return pcs + }(), + errorMatchers: []testutils.ErrorMatcher{ + {ErrorType: field.ErrorTypeInvalid, Field: "spec.template.cliques[0].resourceSharing[0].name"}, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + validator := newPCSValidator(tt.pcs, admissionv1.Create, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{ + Profiles: []groveconfigv1alpha1.SchedulerProfile{ + {Name: groveconfigv1alpha1.SchedulerNameKube}, + }, + DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube), + }, nil, testutils.NewDefaultFakeRegistry()) + + _, errs := validator.validate() + + if tt.errorMatchers != nil { + testutils.AssertErrorMatches(t, errs, tt.errorMatchers) + } else { + require.NoError(t, errs.ToAggregate()) + } + }) + } +} + func TestValidateSchedulerNames(t *testing.T) { specPath := field.NewPath("cliques").Child("spec").Child("podSpec").Child("schedulerName") tests := []struct { diff --git a/operator/samples/user-guide/03_environment-variables-for-pod-discovery/standalone-env-vars.yaml b/operator/samples/user-guide/03_environment-variables-for-pod-discovery/standalone-env-vars.yaml index 8b8aba9c9..808cfd814 100644 --- a/operator/samples/user-guide/03_environment-variables-for-pod-discovery/standalone-env-vars.yaml +++ b/operator/samples/user-guide/03_environment-variables-for-pod-discovery/standalone-env-vars.yaml @@ -22,7 +22,7 @@ spec: - name: app image: busybox:latest command: ["/bin/sh"] - args: + args: - "-c" - | echo "=== Grove Environment Variables ===" @@ -49,6 +49,3 @@ spec: requests: cpu: "10m" memory: "32Mi" - - -