From a5bed19cb5a55a1e515d7c7df53ede25cff0bf91 Mon Sep 17 00:00:00 2001 From: ColdsteelRail <574252631@qq.com> Date: Mon, 27 Apr 2026 17:31:41 +0800 Subject: [PATCH 1/5] feat: use kube-utils certmanager for webhook certificate auto-rotation - Replace manual certificate generation with kube-utils/webhook/certmanager - Add volumeMounts in Helm chart for webhook-certs Secret - Unify Secret name to "webhook-certs" (matching Helm chart) - Auto-detect and rotate expired certificates - Add design plan document in docs/plans/ Fixes TLS handshake timeout issue when Helm updates controller. Co-Authored-By: Claude Opus 4.6 --- charts/templates/statefulset.yaml | 4 + .../webhook-certificate-auto-rotation.md | 218 +++++++++++++++++ go.mod | 4 +- go.sum | 8 +- main.go | 15 +- pkg/utils/pki_helpers.go | 86 ------- pkg/webhook/webhook.go | 221 ++---------------- 7 files changed, 259 insertions(+), 297 deletions(-) create mode 100644 docs/plans/webhook-certificate-auto-rotation.md delete mode 100644 pkg/utils/pki_helpers.go diff --git a/charts/templates/statefulset.yaml b/charts/templates/statefulset.yaml index 925e965c..6186186c 100644 --- a/charts/templates/statefulset.yaml +++ b/charts/templates/statefulset.yaml @@ -52,6 +52,10 @@ spec: periodSeconds: 10 resources: {{- toYaml .Values.resources | nindent 10 }} + volumeMounts: + - name: webhook-certs + mountPath: /webhook-certs + readOnly: false serviceAccountName: {{ .Values.serviceAccountName }} terminationGracePeriodSeconds: 0 volumes: diff --git a/docs/plans/webhook-certificate-auto-rotation.md b/docs/plans/webhook-certificate-auto-rotation.md new file mode 100644 index 00000000..70195b64 --- /dev/null +++ b/docs/plans/webhook-certificate-auto-rotation.md @@ -0,0 +1,218 @@ +# Webhook Certificate Auto-Rotation Design + +## Background + +### Problem Description + +When users update kuperator via Helm, even after the controller starts normally, webhook calls fail with `TLS handshake timeout` error. + +### Root Cause Analysis + +1. **Missing volumeMounts in Helm Chart** + + The StatefulSet defines a volume `webhook-certs` but never mounts it: + ```yaml + volumes: + - name: webhook-certs + secret: + secretName: webhook-certs + # Missing: volumeMounts in container spec! + ``` + +2. **Secret Name Mismatch** + + - Code creates Secret: `kusionstack-webhook-certs` + - Helm chart references: `webhook-certs` + +3. **No Certificate Expiration Detection** + + Certificates have a 1-year validity but no automatic rotation mechanism. + +4. **Timing Issue** + + Certificate generation happens after webhook server tries to start, causing TLS handshake timeout. + +5. **Temporary Directory Issue** + + Without volumeMount, certificates are written to a temporary directory that disappears on pod restart. + +## Solution + +Use `kusionstack.io/kube-utils/webhook/certmanager` package which provides: + +- Automatic certificate expiration detection and rotation +- Unified Secret name configuration +- Continuous CABundle synchronization via Watch mechanism +- Non-leader-election controller (all replicas run) + +## Implementation + +### Files Changed + +| File | Change | +|------|--------| +| `pkg/webhook/webhook.go` | Refactored to use certmanager | +| `main.go` | Updated Initialize call signature | +| `charts/templates/statefulset.yaml` | Added volumeMounts | +| `pkg/utils/pki_helpers.go` | Deleted (no longer needed) | +| `go.mod` | Added afero, golib dependencies | + +### Code Changes + +#### 1. `pkg/webhook/webhook.go` + +Before: ~260 lines with manual certificate generation +After: ~75 lines using certmanager + +```go +func Initialize(ctx context.Context, mgr manager.Manager, dnsName string) error { + cfg := certmanager.CertConfig{ + Host: dnsName, + Namespace: getNamespace(), + SecretName: "webhook-certs", // Unified name + MutatingWebhookNames: []string{"kusionstack-controller-manager-mutating"}, + ValidatingWebhookNames: []string{"kusionstack-controller-manager-validating"}, + } + + certMgr := certmanager.New(mgr, cfg) + return certMgr.SetupWithManager(mgr) +} +``` + +#### 2. `main.go` + +```go +// Before +if err := webhook.Initialize(ctx, config, dnsName, certDir); err != nil { ... } + +// After +if err := webhook.Initialize(ctx, mgr, dnsName); err != nil { ... } +``` + +Default `cert-dir` changed from temp directory to `/webhook-certs`. + +#### 3. `charts/templates/statefulset.yaml` + +```yaml +containers: +- name: manager + volumeMounts: + - name: webhook-certs + mountPath: /webhook-certs + readOnly: false # Need write permission for cert rotation +``` + +## Certificate Workflow + +### Startup Sequence + +``` +1. ctrl.NewManager() + └── webhookServer created with CertDir="/webhook-certs" + +2. webhook.Initialize(ctx, mgr, dnsName) + └── certmanager.SetupWithManager + ├── FSCertProvider initialized (path="/webhook-certs") + ├── SecretCertProvider initialized (secret="webhook-certs") + └── Manual Reconcile called + ├── Load/Generate certs from Secret + ├── Write certs to filesystem + │ ├── tls.key (server private key) + │ ├── tls.crt (server certificate) + │ ├── ca.key (CA private key) + │ └── ca.crt (CA certificate) + └── Update CABundle in WebhookConfigurations + +3. mgr.Start(ctx) + └── webhook.Server.Start() + ├── certwatcher reads tls.key + tls.crt + ├── Starts fsnotify watcher for file changes + └── TLS server ready +``` + +### Certificate Auto-Rotation + +``` +Secret/WebhookConfiguration change triggers Reconcile: + +1. SecretCertProvider.Ensure() + ├── Load existing certs from Secret + └── Validate(host) - check expiration and DNSName + └── If invalid: GenerateSelfSignedCerts() + +2. FSCertProvider.Overwrite() + └── Write new certs to filesystem + └── fsnotify detects file change + +3. certwatcher.handleEvent() + └── ReadCertificate() - reload certs + └── Update currentCert + +4. New TLS connections automatically use new certificate + (no server restart needed) +``` + +## Certificate Files Explanation + +| File | Purpose | Location | +|------|---------|----------| +| `ca.key` | CA private key for signing certificates | Secret, filesystem (not used directly) | +| `ca.crt` | CA certificate = CABundle for verification | Secret, WebhookConfiguration.CABundle | +| `tls.key` | Server private key for TLS encryption | Secret, filesystem (webhook server) | +| `tls.crt` | Server certificate for TLS identity | Secret, filesystem (webhook server) | + +### TLS Handshake Flow + +``` +API Server (Client) Webhook Server +───────────────── ──────────────── + +1. Connect to port 9443 ────────────────────────▶ + +2. ◀─── Send tls.crt (server certificate) + +3. Verify using CABundle (ca.crt) ───────────────▶ + ✓ Signature valid + ✓ Not expired + ✓ DNSName matches + +4. Key exchange with tls.crt's public key ───────▶ + +5. ◀─── Decrypt with tls.key, establish encrypted channel + +6. Send AdmissionReview (encrypted) ────────────▶ + +7. ◀─── Response (encrypted) +``` + +## Comparison + +| Feature | Old Approach | New Approach (certmanager) | +|---------|-------------|---------------------------| +| Certificate expiration detection | ❌ None | ✅ Validate() auto-detects | +| Auto rotation | ❌ Manual delete Secret | ✅ Expired → auto regenerate | +| Secret name consistency | ❌ Mismatch | ✅ Unified to `webhook-certs` | +| volumeMounts | ❌ Missing | ✅ Added | +| CABundle sync | Only at startup | ✅ Watch + continuous sync | +| TLS handshake | May timeout | ✅ Certs ready before server starts | +| Multi-replica support | Leader election exclusive | ✅ All replicas run (NeedLeaderElection=false) | + +## Dependencies Added + +- `github.com/spf13/afero v1.11.0` - filesystem abstraction +- `github.com/zoumo/golib v0.2.0` - certificate utilities +- `kusionstack.io/kube-utils/cert` - cert provider package +- `kusionstack.io/kube-utils/webhook/certmanager` - cert manager controller + +## Testing + +All webhook tests pass: +``` +ok kusionstack.io/kuperator/pkg/webhook/server/generic/collaset +ok kusionstack.io/kuperator/pkg/webhook/server/generic/pod/gracedelete +ok kusionstack.io/kuperator/pkg/webhook/server/generic/pod/opslifecycle +ok kusionstack.io/kuperator/pkg/webhook/server/generic/poddecoration +ok kusionstack.io/kuperator/pkg/webhook/server/generic/podtransitionrule +``` + +Build successful: `go build ./...` \ No newline at end of file diff --git a/go.mod b/go.mod index bbc0d796..8a5ad6da 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( k8s.io/kubernetes v0.0.0-00010101000000-000000000000 k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 kusionstack.io/kube-api v0.7.5-0.20260127130112-9424ce325e09 - kusionstack.io/kube-utils v0.2.1-0.20251120063041-6043805ee00d + kusionstack.io/kube-utils v0.2.1-0.20251219073659-c81662b5b6a3 kusionstack.io/kube-xset v0.0.2-0.20260127130229-a7a010eba7e0 kusionstack.io/resourceconsist v0.0.1 sigs.k8s.io/controller-runtime v0.17.3 @@ -34,6 +34,8 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect github.com/samber/lo v1.47.0 // indirect + github.com/spf13/afero v1.11.0 // indirect + github.com/zoumo/golib v0.2.0 // indirect ) require ( diff --git a/go.sum b/go.sum index fcfdc7c4..51f37dfe 100644 --- a/go.sum +++ b/go.sum @@ -546,6 +546,8 @@ github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= +github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= @@ -594,6 +596,8 @@ github.com/yuin/goldmark v1.1.30/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zoumo/golib v0.2.0 h1:K6W8WWrgnl2bXRvUaiXjAaiFKsCTHwnrBkBHZoFr8lE= +github.com/zoumo/golib v0.2.0/go.mod h1:gOMPRvDgn9m49tfHoKUb2RO0NqplNoe/qj5/ZrczjgQ= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= @@ -1079,8 +1083,8 @@ k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6J k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= kusionstack.io/kube-api v0.7.5-0.20260127130112-9424ce325e09 h1:Kgc1N61F9KoBi1sHCrwoN8ax0j+0f1n6dQDQe2Luy9M= kusionstack.io/kube-api v0.7.5-0.20260127130112-9424ce325e09/go.mod h1:e1jtrQH2LK5fD2nTyfIXG6nYrYbU8VXShRxTRwVPaLk= -kusionstack.io/kube-utils v0.2.1-0.20251120063041-6043805ee00d h1:iQtnK03ia/MN4K/6O75EMI91ep7jpcIG0pWyeREBqtg= -kusionstack.io/kube-utils v0.2.1-0.20251120063041-6043805ee00d/go.mod h1:KEHTfo1Y8SWMODnckF6daO2cSIW0FJ8fkk8PBA5O2GU= +kusionstack.io/kube-utils v0.2.1-0.20251219073659-c81662b5b6a3 h1:0cXP1HAHG06Rf2Zztcep1LXkksd/mwppnozHy+mco6I= +kusionstack.io/kube-utils v0.2.1-0.20251219073659-c81662b5b6a3/go.mod h1:Lz5SBYWg9+jw+kP0CAyf/b62D5DeUPf6+jE1d0WC4cI= kusionstack.io/kube-xset v0.0.2-0.20260127130229-a7a010eba7e0 h1:mU1Jjdfgihju0xiYMmW/jSTGhovca/WEID7QzJrwkmw= kusionstack.io/kube-xset v0.0.2-0.20260127130229-a7a010eba7e0/go.mod h1:FceKgqapMHhwiyIqCziTQRW27fsSXpPS611AApnyiYI= kusionstack.io/resourceconsist v0.0.1 h1:+k/jriq5Ld7fQUYfWSMGynz/FesHtl3Rk2fmQPjBe0g= diff --git a/main.go b/main.go index 46855fb3..9f2118f6 100644 --- a/main.go +++ b/main.go @@ -20,7 +20,6 @@ import ( "context" "flag" "os" - "path/filepath" "github.com/spf13/pflag" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -62,8 +61,8 @@ func main() { flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") - flag.StringVar(&certDir, "cert-dir", webhookTempCertDir(), "The directory that contains the server key and certificate. If not set, webhook server would look up the server key and certificate in {TempDir}/k8s-webhook-server/serving-certs") - flag.StringVar(&dnsName, "dns-name", "kusionstack-controller-manager.kusionstack-system.svc", "The DNS name of the webhook server.") + flag.StringVar(&certDir, "cert-dir", "/webhook-certs", "The directory that contains the server key and certificate for webhook.") + flag.StringVar(&dnsName, "dns-name", "controller-manager.kusionstack-system.svc", "The DNS name of the webhook server.") klog.InitFlags(nil) defer klog.Flush() @@ -122,9 +121,9 @@ func main() { } // +kubebuilder:scaffold:builder - setupLog.Info("initialize webhook") - if err := webhook.Initialize(context.Background(), config, dnsName, certDir); err != nil { - setupLog.Error(err, "unable to initialize webhook") + setupLog.Info("initialize webhook cert manager") + if err := webhook.Initialize(context.Background(), mgr, dnsName); err != nil { + setupLog.Error(err, "unable to initialize webhook cert manager") os.Exit(1) } @@ -143,7 +142,3 @@ func main() { os.Exit(1) } } - -func webhookTempCertDir() string { - return filepath.Join(os.TempDir(), "k8s-webhook-server", "serving-certs") -} diff --git a/pkg/utils/pki_helpers.go b/pkg/utils/pki_helpers.go deleted file mode 100644 index 9836b632..00000000 --- a/pkg/utils/pki_helpers.go +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright 2019 The Kubernetes Authors. -Copyright 2023 The KusionStack Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "crypto" - cryptorand "crypto/rand" - "crypto/rsa" - "crypto/x509" - "crypto/x509/pkix" - "encoding/pem" - "math" - "math/big" - "time" - - "github.com/pkg/errors" - certutil "k8s.io/client-go/util/cert" -) - -const ( - certificateBlockType = "CERTIFICATE" - rsaKeySize = 2048 - duration365d = time.Hour * 24 * 365 -) - -// NewPrivateKey creates an RSA private key -func NewPrivateKey() (*rsa.PrivateKey, error) { - return rsa.GenerateKey(cryptorand.Reader, rsaKeySize) -} - -// EncodeCertPEM returns PEM-encoded certificate data -func EncodeCertPEM(cert *x509.Certificate) []byte { - block := pem.Block{ - Type: certificateBlockType, - Bytes: cert.Raw, - } - return pem.EncodeToMemory(&block) -} - -// NewSignedCert creates a signed certificate using the given CA certificate and key -func NewSignedCert(cfg *certutil.Config, key crypto.Signer, caCert *x509.Certificate, caKey crypto.Signer) (*x509.Certificate, error) { - serial, err := cryptorand.Int(cryptorand.Reader, new(big.Int).SetInt64(math.MaxInt64)) - if err != nil { - return nil, err - } - if len(cfg.CommonName) == 0 { - return nil, errors.New("must specify a CommonName") - } - if len(cfg.Usages) == 0 { - return nil, errors.New("must specify at least one ExtKeyUsage") - } - - certTmpl := x509.Certificate{ - Subject: pkix.Name{ - CommonName: cfg.CommonName, - Organization: cfg.Organization, - }, - DNSNames: cfg.AltNames.DNSNames, - IPAddresses: cfg.AltNames.IPs, - SerialNumber: serial, - NotBefore: caCert.NotBefore, - NotAfter: time.Now().Add(duration365d).UTC(), - KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, - ExtKeyUsage: cfg.Usages, - } - certDERBytes, err := x509.CreateCertificate(cryptorand.Reader, &certTmpl, caCert, key.Public(), caKey) - if err != nil { - return nil, err - } - return x509.ParseCertificate(certDERBytes) -} diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index 3d93a971..b094be5d 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -18,31 +18,18 @@ package webhook import ( "context" - "crypto" - "crypto/rsa" - "crypto/x509" "os" - "path/filepath" - "github.com/pkg/errors" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/util/cert" - "k8s.io/client-go/util/keyutil" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/manager" - "kusionstack.io/kuperator/pkg/utils" + certmanager "kusionstack.io/kube-utils/webhook/certmanager" ) const ( mutatingWebhookConfigurationName = "kusionstack-controller-manager-mutating" validatingWebhookConfigurationName = "kusionstack-controller-manager-validating" - webhookCertsSecretName = "kusionstack-webhook-certs" + webhookCertsSecretName = "webhook-certs" ) // AddToManagerFuncs is a list of functions to add all Webhook Servers to the Manager @@ -61,198 +48,36 @@ func AddToManager(m manager.Manager) error { return nil } -func Initialize(ctx context.Context, config *rest.Config, dnsName, certDir string) error { - clientset, err := kubernetes.NewForConfig(config) - if err != nil { - return err - } - return ensureWebhookCABundleAndCert(ctx, clientset, dnsName, certDir) -} - -func ensureWebhookCABundleAndCert(ctx context.Context, clientset *kubernetes.Clientset, dnsName, certDir string) error { - secret, err := ensureWebhookSecret(ctx, clientset, dnsName) - if err != nil { - return err - } - klog.Infof("webhook secret ensured, secret: %s", secret.Name) - - mwhc, err := clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Get(ctx, mutatingWebhookConfigurationName, metav1.GetOptions{}) - if err != nil { - return err - } - - for i := range mwhc.Webhooks { - if mwhc.Webhooks[i].ClientConfig.CABundle == nil { - mwhc.Webhooks[i].ClientConfig.CABundle = secret.Data["ca.crt"] - } - } - err = retry.RetryOnConflict(retry.DefaultRetry, func() error { - _, err := clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Update(ctx, mwhc, metav1.UpdateOptions{}) - return err - }) - if err != nil { +// Initialize sets up the webhook certificate manager with auto-rotation support. +// It uses kube-utils/certmanager which: +// - Automatically generates and rotates certificates when expired +// - Syncs certs from Secret to local filesystem +// - Updates CABundle in WebhookConfigurations +func Initialize(ctx context.Context, mgr manager.Manager, dnsName string) error { + cfg := certmanager.CertConfig{ + Host: dnsName, + Namespace: getNamespace(), + SecretName: webhookCertsSecretName, + MutatingWebhookNames: []string{mutatingWebhookConfigurationName}, + ValidatingWebhookNames: []string{validatingWebhookConfigurationName}, + } + + certMgr := certmanager.New(mgr, cfg) + if err := certMgr.SetupWithManager(mgr); err != nil { return err } - vwhc, err := clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(ctx, validatingWebhookConfigurationName, metav1.GetOptions{}) - if err != nil { - return err - } - - for i := range vwhc.Webhooks { - if vwhc.Webhooks[i].ClientConfig.CABundle == nil { - vwhc.Webhooks[i].ClientConfig.CABundle = secret.Data["ca.crt"] - } - } - err = retry.RetryOnConflict(retry.DefaultRetry, func() error { - _, err := clientset.AdmissionregistrationV1().ValidatingWebhookConfigurations().Update(ctx, vwhc, metav1.UpdateOptions{}) - return err - }) - if err != nil { - return err - } - klog.Infof("webhook ca bundle ensured, mutatingwebhookconfiguration: %s, validatingwebhookconfiguration: %s", mutatingWebhookConfigurationName, validatingWebhookConfigurationName) - - var tlsKey, tlsCert []byte - tlsKey, ok := secret.Data["tls.key"] - if !ok { - return errors.New("tls.key not found in secret") - } - tlsCert, ok = secret.Data["tls.crt"] - if !ok { - return errors.New("tls.crt not found in secret") - } - - err = ensureWebhookCert(certDir, tlsKey, tlsCert) - if err != nil { - return err - } - klog.Infof("webhook cert ensured, cert dir: %s", certDir) + klog.InfoS("webhook cert manager initialized", + "secret", webhookCertsSecretName, + "namespace", getNamespace(), + "host", dnsName) return nil } -func ensureWebhookSecret(ctx context.Context, clientset *kubernetes.Clientset, dnsName string) (secret *corev1.Secret, err error) { - var ( - found = true - dirty = false - ) - secret, err = clientset.CoreV1().Secrets(getNamespace()).Get(ctx, webhookCertsSecretName, metav1.GetOptions{}) - if err != nil { - if apierrors.IsNotFound(err) { - found = false - } else { - return - } - } - if found { - if secret.Data == nil || len(secret.Data) != 4 || - secret.Data["ca.key"] == nil || secret.Data["ca.crt"] == nil || - secret.Data["tls.key"] == nil || secret.Data["tls.crt"] == nil { - dirty = true - } - if !dirty { - return - } - } - - caKey, caCert, err := generateSelfSignedCACert() - if err != nil { - return - } - caKeyPEM, err := keyutil.MarshalPrivateKeyToPEM(caKey) - if err != nil { - return - } - caCertPEM := utils.EncodeCertPEM(caCert) - - privateKey, signedCert, err := generateSelfSignedCert(caCert, caKey, dnsName) - if err != nil { - return - } - privateKeyPEM, err := keyutil.MarshalPrivateKeyToPEM(privateKey) - if err != nil { - return - } - signedCertPEM := utils.EncodeCertPEM(signedCert) - - data := map[string][]byte{ - "ca.key": caKeyPEM, "ca.crt": caCertPEM, - "tls.key": privateKeyPEM, "tls.crt": signedCertPEM, - } - secret = &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: webhookCertsSecretName, - Namespace: getNamespace(), - }, - Data: data, - } - - var updatedSecret *corev1.Secret - err = retry.RetryOnConflict(retry.DefaultRetry, func() (err error) { - if dirty { - updatedSecret, err = clientset.CoreV1().Secrets(getNamespace()).Update(ctx, secret, metav1.UpdateOptions{}) - } else { - updatedSecret, err = clientset.CoreV1().Secrets(getNamespace()).Create(ctx, secret, metav1.CreateOptions{}) - } - return err - }) - return updatedSecret, err -} - -func generateSelfSignedCACert() (caKey *rsa.PrivateKey, caCert *x509.Certificate, err error) { - caKey, err = utils.NewPrivateKey() - if err != nil { - return - } - - caCert, err = cert.NewSelfSignedCACert(cert.Config{CommonName: "self-signed-k8s-cert"}, caKey) - - return -} - -func generateSelfSignedCert(caCert *x509.Certificate, caKey crypto.Signer, dnsName string) (privateKey *rsa.PrivateKey, signedCert *x509.Certificate, err error) { - privateKey, err = utils.NewPrivateKey() - if err != nil { - return - } - - signedCert, err = utils.NewSignedCert( - &cert.Config{ - CommonName: dnsName, - AltNames: cert.AltNames{DNSNames: []string{dnsName}}, - Usages: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, - }, - privateKey, caCert, caKey, - ) - - return -} - -func ensureWebhookCert(certDir string, tlsKey, tlsCert []byte) error { - if _, err := os.Stat(certDir); os.IsNotExist(err) { - err := os.MkdirAll(certDir, 0o777) - if err != nil { - return err - } - klog.Infof("cert dir is created: %s", certDir) - } - - keyFile := filepath.Join(certDir, "tls.key") - certFile := filepath.Join(certDir, "tls.crt") - - if err := os.WriteFile(keyFile, tlsKey, 0o644); err != nil { - return err - } - if err := os.WriteFile(certFile, tlsCert, 0o644); err != nil { - return err - } - return nil -} - func getNamespace() string { if ns := os.Getenv("POD_NAMESPACE"); len(ns) > 0 { return ns } return "kusionstack-system" -} +} \ No newline at end of file From 70c619a6888bfeca36d0bbb30707900cd2a4bf67 Mon Sep 17 00:00:00 2001 From: ColdsteelRail <574252631@qq.com> Date: Mon, 27 Apr 2026 17:56:07 +0800 Subject: [PATCH 2/5] docs: add CLAUDE.md with development guidelines - Document pre-commit checklist: make build lint fmt vet - Include project structure and key dependencies - Add code conventions and commit message style - Reference webhook certificate management design Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..3bd12899 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,35 @@ +# Kuperator - Claude Code Project Guide + +## Development Requirements + +- Go 1.24+ +- Kubernetes 1.22+ (envtest uses 1.22.1) +- Docker (for container builds) +- Kind (for e2e testing) + +## Pre-commit Checklist + +**IMPORTANT**: Before committing and pushing code, always run: + +```bash +make build lint fmt vet +``` + +These commands ensure: +- `make build`: Compiles the manager binary (includes manifests, fmt, vet) +- `make lint`: Runs golangci-lint for code quality +- `make fmt`: Formats code with `go fmt` +- `make vet`: Runs `go vet` for static analysis + +## Key Dependencies + +- `kusionstack.io/kube-api`: CRD API definitions +- `kusionstack.io/kube-utils`: Utility packages (cert, certmanager, controller helpers) +- `kusionstack.io/kube-xset`: XSet workload framework +- `sigs.k8s.io/controller-runtime`: Kubernetes controller framework + +## Related Documentation + +- [Contribution Guide](docs/contributing.md) +- [Design Plans](docs/plans/) +- [Official Documentation](https://kusionstack.io/kuperator/introduction/) \ No newline at end of file From 962c0665b360e9d757b56b192f78831e1843c28f Mon Sep 17 00:00:00 2001 From: ColdsteelRail <574252631@qq.com> Date: Mon, 27 Apr 2026 17:58:56 +0800 Subject: [PATCH 3/5] docs: simplify CLAUDE.md, remove redundant explanations Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 3bd12899..33c14b6b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,12 +15,6 @@ make build lint fmt vet ``` -These commands ensure: -- `make build`: Compiles the manager binary (includes manifests, fmt, vet) -- `make lint`: Runs golangci-lint for code quality -- `make fmt`: Formats code with `go fmt` -- `make vet`: Runs `go vet` for static analysis - ## Key Dependencies - `kusionstack.io/kube-api`: CRD API definitions From b5b092a3fd335026b1815dde5bd14591abc2c4c3 Mon Sep 17 00:00:00 2001 From: ColdsteelRail <574252631@qq.com> Date: Mon, 27 Apr 2026 18:02:41 +0800 Subject: [PATCH 4/5] fix: correct import order for gci linter Co-Authored-By: Claude Opus 4.6 --- pkg/webhook/webhook.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index b094be5d..93482308 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -21,9 +21,8 @@ import ( "os" "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/manager" - certmanager "kusionstack.io/kube-utils/webhook/certmanager" + "sigs.k8s.io/controller-runtime/pkg/manager" ) const ( @@ -80,4 +79,4 @@ func getNamespace() string { return ns } return "kusionstack-system" -} \ No newline at end of file +} From 58602d141ec3d168d2bbc1b4cee05dc27d6ab039 Mon Sep 17 00:00:00 2001 From: ColdsteelRail <574252631@qq.com> Date: Mon, 27 Apr 2026 18:30:46 +0800 Subject: [PATCH 5/5] minor --- charts/templates/statefulset.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/charts/templates/statefulset.yaml b/charts/templates/statefulset.yaml index 6186186c..925e965c 100644 --- a/charts/templates/statefulset.yaml +++ b/charts/templates/statefulset.yaml @@ -52,10 +52,6 @@ spec: periodSeconds: 10 resources: {{- toYaml .Values.resources | nindent 10 }} - volumeMounts: - - name: webhook-certs - mountPath: /webhook-certs - readOnly: false serviceAccountName: {{ .Values.serviceAccountName }} terminationGracePeriodSeconds: 0 volumes: