diff --git a/agent/go/internal/history/history.go b/agent/go/internal/history/history.go new file mode 100644 index 00000000..73c5ea95 --- /dev/null +++ b/agent/go/internal/history/history.go @@ -0,0 +1,229 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package history + +import ( + "encoding/json" + "errors" + "fmt" + "io/fs" + "log/slog" + "os" + "path/filepath" + "time" + + "github.com/NVIDIA/nodewright/agent/internal/config" + "github.com/NVIDIA/nodewright/agent/internal/stage" +) + +const ( + UnknownVersion = "unknown" + UninstalledVersion = "uninstalled" + CurrentVersionEnv = "CURRENT_VERSION" + PreviousVersionEnv = "PREVIOUS_VERSION" + historyDirectoryMode = 0o755 + historyFileMode = 0o600 + historyEntryLimit = 100 +) + +// Versions is the current package version and the previously recorded host +// version. It is returned to callers instead of mutating a step in place. +type Versions struct { + Current string + Previous string +} + +func (v Versions) Environment() map[string]string { + return map[string]string{ + CurrentVersionEnv: v.Current, + PreviousVersionEnv: v.Previous, + } +} + +func (v Versions) UpgradeArguments() []string { + return []string{v.Previous, v.Current} +} + +// Store owns the per-package install-history files in one directory. +type Store struct { + dir string + logger *slog.Logger +} + +func NewStore(dir string, logger *slog.Logger) Store { + if logger == nil { + logger = slog.Default() + } + return Store{dir: dir, logger: logger} +} + +// Path returns the history path for a package. +func (s Store) Path(packageName string) (string, error) { + if packageName == "" || !filepath.IsLocal(packageName) || filepath.Base(packageName) != packageName { + return "", fmt.Errorf("package name %q must be a single path component", packageName) + } + return filepath.Join(s.dir, packageName+".json"), nil +} + +// Read returns the versions a step should receive. A package with no usable +// history is treated as an unknown prior installation. +func (s Store) Read(cfg config.Config) (Versions, error) { + if cfg.PackageVersion == "" { + return Versions{}, fmt.Errorf("package version must not be empty") + } + ledger, err := s.load(cfg.PackageName) + if err != nil { + return Versions{}, fmt.Errorf("reading versions for package %q: loading history: %w", cfg.PackageName, err) + } + return Versions{Current: cfg.PackageVersion, Previous: ledger.CurrentVersion}, nil +} + +// Record prepends a completed version transition to the package ledger. +// UninstallCheck records the package as uninstalled; every other valid stage +// records the configured package version. +func (s Store) Record(cfg config.Config, completedStage stage.Stage, at time.Time) error { + if _, err := stage.ParseStage(string(completedStage)); err != nil { + return fmt.Errorf("recording history for package %q: validating completed stage: %w", cfg.PackageName, err) + } + if cfg.PackageVersion == "" { + return fmt.Errorf("package version must not be empty") + } + if at.IsZero() { + return fmt.Errorf("history timestamp must not be zero") + } + + ledger, err := s.load(cfg.PackageName) + if err != nil { + return fmt.Errorf("recording history for package %q: loading history: %w", cfg.PackageName, err) + } + version := cfg.PackageVersion + if completedStage == stage.UninstallCheck { + version = UninstalledVersion + } + ledger.CurrentVersion = version + ledger.Entries = append([]entry{{Version: version, Time: at.UTC()}}, ledger.Entries...) + if len(ledger.Entries) > historyEntryLimit { + ledger.Entries = ledger.Entries[:historyEntryLimit] + } + + path, err := s.Path(cfg.PackageName) + if err != nil { + return fmt.Errorf("recording history for package %q: resolving history path: %w", cfg.PackageName, err) + } + if err := os.MkdirAll(filepath.Dir(path), historyDirectoryMode); err != nil { + return fmt.Errorf("creating history directory %q: %w", filepath.Dir(path), err) + } + data, err := json.MarshalIndent(ledger, "", " ") + if err != nil { + return fmt.Errorf("encoding history for package %q: %w", cfg.PackageName, err) + } + data = append(data, '\n') + if err := writeAtomic(path, data); err != nil { + return fmt.Errorf("writing history for package %q: %w", cfg.PackageName, err) + } + return nil +} + +type entry struct { + Version string `json:"version"` + Time time.Time `json:"time"` +} + +type ledger struct { + CurrentVersion string `json:"current-version"` + Entries []entry `json:"history"` +} + +func (s Store) load(packageName string) (ledger, error) { + path, err := s.Path(packageName) + if err != nil { + return ledger{}, fmt.Errorf("loading history for package %q: resolving history path: %w", packageName, err) + } + data, err := os.ReadFile(path) + if errors.Is(err, fs.ErrNotExist) { + s.logger.Info("package history does not exist", "package", packageName, "path", path) + return ledger{CurrentVersion: UnknownVersion, Entries: []entry{}}, nil + } + if err != nil { + return ledger{}, fmt.Errorf("reading history %q: %w", path, err) + } + + var result ledger + if err := json.Unmarshal(data, &result); err != nil { + backup := path + ".backup" + // Preserve the damaged bytes and continue from an unknown version so a + // corrupt file cannot permanently block package execution on this node. + if renameErr := os.Rename(path, backup); renameErr != nil { + return ledger{}, fmt.Errorf("moving corrupt history %q to %q: %w", path, backup, renameErr) + } + s.logger.Error( + "moved corrupt package history aside", + "package", packageName, + "path", path, + "backup", backup, + "error", err, + ) + return ledger{CurrentVersion: UnknownVersion, Entries: []entry{}}, nil + } + if result.CurrentVersion == "" { + result.CurrentVersion = UnknownVersion + } + if result.Entries == nil { + result.Entries = []entry{} + } + return result, nil +} + +func writeAtomic(path string, data []byte) (returnErr error) { + temporary, err := os.CreateTemp(filepath.Dir(path), "."+filepath.Base(path)+".tmp-*") + if err != nil { + return fmt.Errorf("creating temporary history file: %w", err) + } + temporaryPath := temporary.Name() + closed := false + defer func() { + if !closed { + if err := temporary.Close(); err != nil && returnErr == nil { + returnErr = fmt.Errorf("closing temporary history file %q: %w", temporaryPath, err) + } + } + if err := os.Remove(temporaryPath); err != nil && !errors.Is(err, fs.ErrNotExist) && returnErr == nil { + returnErr = fmt.Errorf("removing temporary history file %q: %w", temporaryPath, err) + } + }() + + if err := temporary.Chmod(historyFileMode); err != nil { + return fmt.Errorf("setting permissions on temporary history file %q: %w", temporaryPath, err) + } + if _, err := temporary.Write(data); err != nil { + return fmt.Errorf("writing temporary history file %q: %w", temporaryPath, err) + } + if err := temporary.Sync(); err != nil { + return fmt.Errorf("syncing temporary history file %q: %w", temporaryPath, err) + } + if err := temporary.Close(); err != nil { + closed = true + return fmt.Errorf("closing temporary history file %q: %w", temporaryPath, err) + } + closed = true + if err := os.Rename(temporaryPath, path); err != nil { + return fmt.Errorf("replacing history file %q: %w", path, err) + } + return nil +} diff --git a/agent/go/internal/history/history_test.go b/agent/go/internal/history/history_test.go new file mode 100644 index 00000000..bc2d9263 --- /dev/null +++ b/agent/go/internal/history/history_test.go @@ -0,0 +1,226 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package history + +import ( + "encoding/json" + "io" + "log/slog" + "os" + "path/filepath" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/NVIDIA/nodewright/agent/internal/config" + "github.com/NVIDIA/nodewright/agent/internal/stage" +) + +func TestHistory(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "History Suite") +} + +var _ = Describe("Store", func() { + var ( + dir string + store Store + cfg config.Config + now time.Time + ) + + BeforeEach(func() { + dir = filepath.Join(GinkgoT().TempDir(), "history") + store = NewStore(dir, slog.New(slog.NewTextHandler(io.Discard, nil))) + cfg = config.Config{PackageName: "driver", PackageVersion: "1.2.3"} + now = time.Date(2026, time.June, 30, 12, 34, 56, 123456789, time.FixedZone("test", -7*60*60)) + }) + + It("returns unknown without creating a missing history file", func() { + versions, err := store.Read(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(versions).To(Equal(Versions{Current: "1.2.3", Previous: UnknownVersion})) + Expect(versions.Environment()).To(Equal(map[string]string{ + CurrentVersionEnv: "1.2.3", + PreviousVersionEnv: UnknownVersion, + })) + Expect(versions.UpgradeArguments()).To(Equal([]string{UnknownVersion, "1.2.3"})) + Expect(dir).NotTo(BeADirectory()) + }) + + It("reads an existing ledger", func() { + Expect(os.MkdirAll(dir, 0o755)).To(Succeed()) + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + data := `{ + "current-version": "1.1.0", + "history": [{"version":"1.1.0","time":"2024-08-28T14:33:20.123456+00:00"}] +}` + Expect(os.WriteFile(path, []byte(data), 0o600)).To(Succeed()) + + versions, err := store.Read(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(versions).To(Equal(Versions{Current: "1.2.3", Previous: "1.1.0"})) + }) + + It("treats empty or missing current versions as unknown", func() { + Expect(os.MkdirAll(dir, 0o755)).To(Succeed()) + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + + for _, data := range []string{`{}`, `{"current-version":"","history":[]}`} { + Expect(os.WriteFile(path, []byte(data), 0o600)).To(Succeed()) + loaded, err := store.load(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + Expect(loaded.CurrentVersion).To(Equal(UnknownVersion)) + Expect(loaded.Entries).To(Equal([]entry{})) + + versions, err := store.Read(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(versions.Previous).To(Equal(UnknownVersion)) + } + }) + + It("moves corrupt history aside and returns unknown", func() { + Expect(os.MkdirAll(dir, 0o755)).To(Succeed()) + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(path, []byte("{"), 0o600)).To(Succeed()) + + versions, err := store.Read(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(versions.Previous).To(Equal(UnknownVersion)) + Expect(path).NotTo(BeAnExistingFile()) + backup, err := os.ReadFile(path + ".backup") + Expect(err).NotTo(HaveOccurred()) + Expect(string(backup)).To(Equal("{")) + }) + + It("records a new installed version", func() { + Expect(store.Record(cfg, stage.ApplyCheck, now)).To(Succeed()) + + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + data, err := os.ReadFile(path) + Expect(err).NotTo(HaveOccurred()) + var saved ledger + Expect(json.Unmarshal(data, &saved)).To(Succeed()) + Expect(saved.CurrentVersion).To(Equal("1.2.3")) + Expect(saved.Entries).To(Equal([]entry{{Version: "1.2.3", Time: now.UTC()}})) + + info, err := os.Stat(path) + Expect(err).NotTo(HaveOccurred()) + Expect(info.Mode().Perm()).To(Equal(os.FileMode(0o600))) + temporaryFiles, err := filepath.Glob(filepath.Join(dir, ".driver.json.tmp-*")) + Expect(err).NotTo(HaveOccurred()) + Expect(temporaryFiles).To(BeEmpty()) + }) + + It("prepends entries without losing prior history", func() { + oldConfig := cfg + oldConfig.PackageVersion = "1.1.0" + Expect(store.Record(oldConfig, stage.ApplyCheck, now.Add(-time.Hour))).To(Succeed()) + Expect(store.Record(cfg, stage.UpgradeCheck, now)).To(Succeed()) + + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + data, err := os.ReadFile(path) + Expect(err).NotTo(HaveOccurred()) + var saved ledger + Expect(json.Unmarshal(data, &saved)).To(Succeed()) + Expect(saved.CurrentVersion).To(Equal("1.2.3")) + Expect(saved.Entries).To(Equal([]entry{ + {Version: "1.2.3", Time: now.UTC()}, + {Version: "1.1.0", Time: now.Add(-time.Hour).UTC()}, + })) + }) + + It("retains only the most recent history entries", func() { + entries := make([]entry, historyEntryLimit) + for i := range entries { + entries[i] = entry{Version: "old", Time: now.Add(-time.Duration(i+1) * time.Hour).UTC()} + } + Expect(os.MkdirAll(dir, 0o755)).To(Succeed()) + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + data, err := json.Marshal(ledger{CurrentVersion: "old", Entries: entries}) + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(path, data, 0o600)).To(Succeed()) + + Expect(store.Record(cfg, stage.UpgradeCheck, now)).To(Succeed()) + data, err = os.ReadFile(path) + Expect(err).NotTo(HaveOccurred()) + var saved ledger + Expect(json.Unmarshal(data, &saved)).To(Succeed()) + Expect(saved.Entries).To(HaveLen(historyEntryLimit)) + Expect(saved.Entries[0]).To(Equal(entry{Version: cfg.PackageVersion, Time: now.UTC()})) + Expect(saved.Entries[1:]).To(Equal(entries[:historyEntryLimit-1])) + }) + + It("records uninstall completion distinctly", func() { + Expect(store.Record(cfg, stage.UninstallCheck, now)).To(Succeed()) + + versions, err := store.Read(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(versions.Previous).To(Equal(UninstalledVersion)) + }) + + It("recovers corrupt history before recording", func() { + Expect(os.MkdirAll(dir, 0o755)).To(Succeed()) + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(path, []byte("{"), 0o600)).To(Succeed()) + + Expect(store.Record(cfg, stage.ApplyCheck, now)).To(Succeed()) + Expect(path + ".backup").To(BeAnExistingFile()) + versions, err := store.Read(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(versions.Previous).To(Equal("1.2.3")) + }) + + It("rejects invalid input before writing", func() { + err := store.Record(cfg, stage.Stage("bogus"), now) + Expect(err).To(MatchError(ContainSubstring(`recording history for package "driver": validating completed stage`))) + Expect(err).To(MatchError(ContainSubstring(`unknown stage "bogus"`))) + Expect(store.Record(cfg, stage.ApplyCheck, time.Time{})).To(MatchError("history timestamp must not be zero")) + cfg.PackageVersion = "" + _, err = store.Read(cfg) + Expect(err).To(MatchError("package version must not be empty")) + Expect(store.Record(cfg, stage.ApplyCheck, now)).To(MatchError("package version must not be empty")) + Expect(dir).NotTo(BeADirectory()) + cfg.PackageVersion = "1.2.3" + cfg.PackageName = "../driver" + _, err = store.Read(cfg) + Expect(err).To(MatchError(ContainSubstring(`reading versions for package "../driver": loading history`))) + Expect(err).To(MatchError(ContainSubstring(`loading history for package "../driver": resolving history path`))) + Expect(err).To(MatchError(ContainSubstring("single path component"))) + }) + + It("returns filesystem errors with context", func() { + Expect(os.MkdirAll(dir, 0o755)).To(Succeed()) + path, err := store.Path(cfg.PackageName) + Expect(err).NotTo(HaveOccurred()) + Expect(os.Mkdir(path, 0o755)).To(Succeed()) + + _, err = store.Read(cfg) + Expect(err).To(MatchError(ContainSubstring("reading history"))) + }) +})