From 35d66a5f4c671c03ba9234d2ccef23fecb66a2b9 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 5 Jun 2026 16:49:54 +0000 Subject: [PATCH 1/2] fileutils: add Reflink function for reflink-only cloning Add a Reflink() function that attempts a CoW file clone without falling back to io.Copy. Callers that need to know whether the filesystem supports reflinks can use this instead of ReflinkOrCopy. ReflinkOrCopy is refactored to call Reflink internally. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Giuseppe Scrivano --- storage/pkg/fileutils/reflink_linux.go | 11 ++++++++--- storage/pkg/fileutils/reflink_unsupported.go | 7 +++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/storage/pkg/fileutils/reflink_linux.go b/storage/pkg/fileutils/reflink_linux.go index 9f5c6c90bb..f0357da159 100644 --- a/storage/pkg/fileutils/reflink_linux.go +++ b/storage/pkg/fileutils/reflink_linux.go @@ -7,14 +7,19 @@ import ( "golang.org/x/sys/unix" ) +// Reflink attempts to reflink (CoW clone) the source to the destination fd. +// Returns an error if the filesystem does not support reflinks. +func Reflink(src, dst *os.File) error { + return unix.IoctlFileClone(int(dst.Fd()), int(src.Fd())) +} + // ReflinkOrCopy attempts to reflink the source to the destination fd. // If reflinking fails or is unsupported, it falls back to io.Copy(). func ReflinkOrCopy(src, dst *os.File) error { - err := unix.IoctlFileClone(int(dst.Fd()), int(src.Fd())) - if err == nil { + if err := Reflink(src, dst); err == nil { return nil } - _, err = io.Copy(dst, src) + _, err := io.Copy(dst, src) return err } diff --git a/storage/pkg/fileutils/reflink_unsupported.go b/storage/pkg/fileutils/reflink_unsupported.go index c0a30e670c..2da19d74be 100644 --- a/storage/pkg/fileutils/reflink_unsupported.go +++ b/storage/pkg/fileutils/reflink_unsupported.go @@ -3,10 +3,17 @@ package fileutils import ( + "errors" "io" "os" ) +// Reflink attempts to reflink (CoW clone) the source to the destination fd. +// Returns an error if the filesystem does not support reflinks. +func Reflink(src, dst *os.File) error { + return errors.ErrUnsupported +} + // ReflinkOrCopy attempts to reflink the source to the destination fd. // If reflinking fails or is unsupported, it falls back to io.Copy(). func ReflinkOrCopy(src, dst *os.File) error { From 838d9bf81232f564f8446046641f3d58f433b5a0 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 5 Jun 2026 16:50:01 +0000 Subject: [PATCH 2/2] chunked: use reflinks for chunk deduplication When reusing chunks from other layers, reflink the source file into a temporary directory under the staging dir. The reflinked copy shares data blocks (CoW, O(1)) but is a separate inode, so it survives concurrent deletion of the source layer. If the filesystem does not support reflinks, chunk deduplication is skipped and all chunks are fetched from the network. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Giuseppe Scrivano --- storage/drivers/driver.go | 5 ++ storage/drivers/overlay/overlay.go | 3 +- storage/pkg/chunked/storage_linux.go | 88 +++++++++++++++++++++++++++- 3 files changed, 92 insertions(+), 4 deletions(-) diff --git a/storage/drivers/driver.go b/storage/drivers/driver.go index 1ca50b6462..510ee700b2 100644 --- a/storage/drivers/driver.go +++ b/storage/drivers/driver.go @@ -310,6 +310,11 @@ type DifferOptions struct { // UseFsVerity defines whether fs-verity is used UseFsVerity DifferFsVerity + + // StagingDirectory is a writable directory the differ can use for + // temporary scratch data. It must reside on the same filesystem + // as the destination directory. + StagingDirectory string } // Differ defines the interface for using a custom differ. diff --git a/storage/drivers/overlay/overlay.go b/storage/drivers/overlay/overlay.go index 3ade1e7850..cd438e00c2 100644 --- a/storage/drivers/overlay/overlay.go +++ b/storage/drivers/overlay/overlay.go @@ -2242,7 +2242,8 @@ func (d *Driver) ApplyDiffWithDiffer(options *graphdriver.ApplyDiffWithDifferOpt logrus.Debugf("Applying differ in %s", applyDir) differOptions := graphdriver.DifferOptions{ - Format: graphdriver.DifferOutputFormatDir, + Format: graphdriver.DifferOutputFormatDir, + StagingDirectory: layerDir, } if d.usingComposefs { differOptions.Format = graphdriver.DifferOutputFormatFlat diff --git a/storage/pkg/chunked/storage_linux.go b/storage/pkg/chunked/storage_linux.go index ed9a5c7f07..a9f6caecba 100644 --- a/storage/pkg/chunked/storage_linux.go +++ b/storage/pkg/chunked/storage_linux.go @@ -36,6 +36,7 @@ import ( "go.podman.io/storage/pkg/chunked/internal/minimal" path "go.podman.io/storage/pkg/chunked/internal/path" "go.podman.io/storage/pkg/chunked/toc" + "go.podman.io/storage/pkg/fileutils" "go.podman.io/storage/pkg/fsverity" "go.podman.io/storage/pkg/idtools" "go.podman.io/storage/pkg/system" @@ -625,6 +626,40 @@ func collectIDs(entries []fileMetadata) ([]uint32, []uint32) { return mapToSlice(uids), mapToSlice(gids) } +func isReflinkNotSupported(err error) bool { + return errors.Is(err, unix.EOPNOTSUPP) || + errors.Is(err, unix.ENOSYS) || + errors.Is(err, unix.EXDEV) || + errors.Is(err, unix.EINVAL) +} + +func createReflink(srcRoot, srcPath, dstDir string) (string, error) { + parentDirfd, err := unix.Open(srcRoot, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return "", err + } + defer unix.Close(parentDirfd) + + srcFile, err := openFileUnderRoot(parentDirfd, srcPath, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return "", err + } + defer srcFile.Close() + + dstFile, err := os.CreateTemp(dstDir, "") + if err != nil { + return "", err + } + defer dstFile.Close() + + if err := fileutils.Reflink(srcFile, dstFile); err != nil { + os.Remove(dstFile.Name()) + return "", err + } + + return filepath.Base(dstFile.Name()), nil +} + type originFile struct { Root string Path string @@ -1777,6 +1812,29 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff wg.Wait() + // Reflink chunk dedup: when findChunkInOtherLayers locates a chunk + // in an existing layer, we cannot use that path directly because + // the source layer may be deleted before storeMissingFiles reads + // from it. Instead we reflink (CoW clone) the source file into a + // scratch directory so the copy is immune to concurrent deletion. + // If the filesystem does not support reflinks we skip chunk dedup + // entirely and fetch everything from the network. + var chunkRefsDir string + if differOpts != nil && differOpts.StagingDirectory != "" { + d, err := os.MkdirTemp(differOpts.StagingDirectory, "chunk-refs-") + if err != nil { + return output, fmt.Errorf("create chunk-refs directory: %w", err) + } + chunkRefsDir = d + defer os.RemoveAll(chunkRefsDir) + } + // reflinkMap caches reflinks: (source root, source path) → reflinked + // filename in chunkRefsDir. Multiple chunks at different offsets + // within the same source file share one reflink. + type reflinkKey struct{ root, path string } + reflinkMap := make(map[reflinkKey]string) + reflinkSupported := chunkRefsDir != "" + for _, res := range copyResults[:filesToWaitFor] { r := &mergedEntries[res.index] @@ -1820,15 +1878,39 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff switch chunk.ChunkType { case minimal.ChunkTypeData: + if !reflinkSupported { + break + } root, path, offset, err := c.layersCache.findChunkInOtherLayers(chunk) if err != nil { return output, err } - if offset >= 0 && validateChunkChecksum(chunk, root, path, offset, c.copyBuffer) { + if offset < 0 { + break + } + key := reflinkKey{root: root, path: path} + refName, ok := reflinkMap[key] + if !ok { + refName, err = createReflink(root, path, chunkRefsDir) + if err != nil { + if isReflinkNotSupported(err) { + reflinkSupported = false + break + } + // ENOENT is expected: the source layer can + // be deleted concurrently. + if errors.Is(err, unix.ENOENT) { + break + } + return output, fmt.Errorf("create reflink for %q: %w", path, err) + } + reflinkMap[key] = refName + } + if validateChunkChecksum(chunk, chunkRefsDir, refName, offset, c.copyBuffer) { missingPartsSize -= size mp.OriginFile = &originFile{ - Root: root, - Path: path, + Root: chunkRefsDir, + Path: refName, Offset: offset, } }