Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 229 additions & 0 deletions agent/go/internal/history/history.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package history

import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"log/slog"
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"os"
"path/filepath"
"time"

"github.com/NVIDIA/nodewright/agent/internal/config"
"github.com/NVIDIA/nodewright/agent/internal/stage"
)

const (
UnknownVersion = "unknown"
UninstalledVersion = "uninstalled"
CurrentVersionEnv = "CURRENT_VERSION"
PreviousVersionEnv = "PREVIOUS_VERSION"
historyDirectoryMode = 0o755
historyFileMode = 0o600
historyEntryLimit = 100
)

// Versions is the current package version and the previously recorded host
// version. It is returned to callers instead of mutating a step in place.
type Versions struct {
Current string
Previous string
}

func (v Versions) Environment() map[string]string {
return map[string]string{
CurrentVersionEnv: v.Current,
PreviousVersionEnv: v.Previous,
}
}

func (v Versions) UpgradeArguments() []string {
return []string{v.Previous, v.Current}
}

// Store owns the per-package install-history files in one directory.
type Store struct {
dir string
logger *slog.Logger
}

func NewStore(dir string, logger *slog.Logger) Store {
if logger == nil {
logger = slog.Default()
}
return Store{dir: dir, logger: logger}
}

// Path returns the history path for a package.
func (s Store) Path(packageName string) (string, error) {
if packageName == "" || !filepath.IsLocal(packageName) || filepath.Base(packageName) != packageName {
return "", fmt.Errorf("package name %q must be a single path component", packageName)
}
return filepath.Join(s.dir, packageName+".json"), nil
}

// Read returns the versions a step should receive. A package with no usable
// history is treated as an unknown prior installation.
func (s Store) Read(cfg config.Config) (Versions, error) {
if cfg.PackageVersion == "" {
return Versions{}, fmt.Errorf("package version must not be empty")
}
ledger, err := s.load(cfg.PackageName)
if err != nil {
return Versions{}, fmt.Errorf("reading versions for package %q: loading history: %w", cfg.PackageName, err)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return Versions{Current: cfg.PackageVersion, Previous: ledger.CurrentVersion}, nil
}

// Record prepends a completed version transition to the package ledger.
// UninstallCheck records the package as uninstalled; every other valid stage
// records the configured package version.
func (s Store) Record(cfg config.Config, completedStage stage.Stage, at time.Time) error {
if _, err := stage.ParseStage(string(completedStage)); err != nil {
return fmt.Errorf("recording history for package %q: validating completed stage: %w", cfg.PackageName, err)
}
if cfg.PackageVersion == "" {
return fmt.Errorf("package version must not be empty")
}
if at.IsZero() {
return fmt.Errorf("history timestamp must not be zero")
}

ledger, err := s.load(cfg.PackageName)
if err != nil {
return fmt.Errorf("recording history for package %q: loading history: %w", cfg.PackageName, err)
}
version := cfg.PackageVersion
if completedStage == stage.UninstallCheck {
version = UninstalledVersion
}
ledger.CurrentVersion = version
ledger.Entries = append([]entry{{Version: version, Time: at.UTC()}}, ledger.Entries...)
if len(ledger.Entries) > historyEntryLimit {
ledger.Entries = ledger.Entries[:historyEntryLimit]
}

Comment thread
coderabbitai[bot] marked this conversation as resolved.
path, err := s.Path(cfg.PackageName)
if err != nil {
return fmt.Errorf("recording history for package %q: resolving history path: %w", cfg.PackageName, err)
}
if err := os.MkdirAll(filepath.Dir(path), historyDirectoryMode); err != nil {
return fmt.Errorf("creating history directory %q: %w", filepath.Dir(path), err)
}
data, err := json.MarshalIndent(ledger, "", " ")
if err != nil {
return fmt.Errorf("encoding history for package %q: %w", cfg.PackageName, err)
}
data = append(data, '\n')
if err := writeAtomic(path, data); err != nil {
return fmt.Errorf("writing history for package %q: %w", cfg.PackageName, err)
}
return nil
}

type entry struct {
Version string `json:"version"`
Time time.Time `json:"time"`
}

type ledger struct {
CurrentVersion string `json:"current-version"`
Entries []entry `json:"history"`
}

func (s Store) load(packageName string) (ledger, error) {
path, err := s.Path(packageName)
if err != nil {
return ledger{}, fmt.Errorf("loading history for package %q: resolving history path: %w", packageName, err)
}
data, err := os.ReadFile(path)
if errors.Is(err, fs.ErrNotExist) {
s.logger.Info("package history does not exist", "package", packageName, "path", path)
return ledger{CurrentVersion: UnknownVersion, Entries: []entry{}}, nil
}
if err != nil {
return ledger{}, fmt.Errorf("reading history %q: %w", path, err)
}

var result ledger
if err := json.Unmarshal(data, &result); err != nil {
backup := path + ".backup"
// Preserve the damaged bytes and continue from an unknown version so a
// corrupt file cannot permanently block package execution on this node.
if renameErr := os.Rename(path, backup); renameErr != nil {
return ledger{}, fmt.Errorf("moving corrupt history %q to %q: %w", path, backup, renameErr)
}
s.logger.Error(
"moved corrupt package history aside",
"package", packageName,
"path", path,
"backup", backup,
"error", err,
)
return ledger{CurrentVersion: UnknownVersion, Entries: []entry{}}, nil
}
if result.CurrentVersion == "" {
result.CurrentVersion = UnknownVersion
}
if result.Entries == nil {
result.Entries = []entry{}
}
return result, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

func writeAtomic(path string, data []byte) (returnErr error) {
temporary, err := os.CreateTemp(filepath.Dir(path), "."+filepath.Base(path)+".tmp-*")
if err != nil {
return fmt.Errorf("creating temporary history file: %w", err)
}
temporaryPath := temporary.Name()
closed := false
defer func() {
if !closed {
if err := temporary.Close(); err != nil && returnErr == nil {
returnErr = fmt.Errorf("closing temporary history file %q: %w", temporaryPath, err)
}
}
if err := os.Remove(temporaryPath); err != nil && !errors.Is(err, fs.ErrNotExist) && returnErr == nil {
returnErr = fmt.Errorf("removing temporary history file %q: %w", temporaryPath, err)
}
}()

if err := temporary.Chmod(historyFileMode); err != nil {
return fmt.Errorf("setting permissions on temporary history file %q: %w", temporaryPath, err)
}
if _, err := temporary.Write(data); err != nil {
return fmt.Errorf("writing temporary history file %q: %w", temporaryPath, err)
}
if err := temporary.Sync(); err != nil {
return fmt.Errorf("syncing temporary history file %q: %w", temporaryPath, err)
}
if err := temporary.Close(); err != nil {
closed = true
return fmt.Errorf("closing temporary history file %q: %w", temporaryPath, err)
}
closed = true
if err := os.Rename(temporaryPath, path); err != nil {
return fmt.Errorf("replacing history file %q: %w", path, err)
}
return nil
}
Loading
Loading