From 87544331e384ae5df703abcc5a431a1bd953dd7a Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 18:18:45 +0300 Subject: [PATCH 01/31] Add opt-in atomic temporary transfer base support; Mover helper methods and FileMover finalizer\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 83 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 4 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index da411aa..ad308ec 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -52,16 +52,34 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ LOGGER.debug("Copying to: %s", fake_dest) try: LOGGER.debug("Scheme = %s", str(dest_url.scheme)) - mover = MOVERS[dest_url.scheme] + mover_cls = MOVERS[dest_url.scheme] except KeyError: LOGGER.error("Unsupported protocol '" + str(dest_url.scheme) + "'. Could not copy " + pathname + " to " + str(destination)) raise try: - m = mover(pathname, new_dest, attrs=attrs, backup_targets=backup_targets) - m.copy() - last_dest = m.destination + use_tmp = bool(attrs and attrs.get('use_tmp_on_transfer')) + if use_tmp: + tmp_prefix = attrs.get('tmp_prefix', '.') + tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) + m = mover_cls(pathname, tmp_dest, attrs=attrs, backup_targets=backup_targets) + m.copy() + # finalize: default finalizer works for local schemes; subclasses should override + try: + m.finalize_atomic_transfer(m.destination, new_dest) + except NotImplementedError: + # cleanup tmp and raise error to indicate unsupported scheme for atomic transfer + try: + if hasattr(m.destination, 'path') and os.path.exists(m.destination.path): + os.remove(m.destination.path) + finally: + raise + last_dest = new_dest + else: + m = mover_cls(pathname, new_dest, attrs=attrs, backup_targets=backup_targets) + m.copy() + last_dest = m.destination if last_dest != new_dest: new_dest = last_dest fake_dest = clean_url(new_dest) @@ -108,6 +126,49 @@ def move(self): raise NotImplementedError("Move for scheme " + self.destination.scheme + " not implemented (yet).") + def supports_atomic(self): + """Return True if this mover supports the default atomic finalize method. + + Subclasses should override if they can perform remote atomic rename. + """ + try: + scheme = self.destination.scheme + except Exception: + scheme = '' + return scheme in ('', 'file') + + @staticmethod + def tmp_destination_for(dest, tmp_prefix='.'): + """Return a copy of dest with the basename prefixed by tmp_prefix.""" + try: + path = dest.path + except Exception: + return dest + dirname = os.path.dirname(path) + basename = os.path.basename(path) + tmp_name = tmp_prefix + basename + return dest._replace(path=os.path.join(dirname, tmp_name)) + + def finalize_atomic_transfer(self, tmp_destination, final_destination): + """Finalize atomic transfer by renaming tmp to final. + + Default implementation works for local filesystems (empty or 'file' scheme). + Subclasses handling remote schemes must override this method. + """ + try: + tmp_path = tmp_destination.path + final_path = final_destination.path + except AttributeError: + raise NotImplementedError("Finalize atomic transfer not implemented for remote schemes") + + final_dir = os.path.dirname(final_path) + if not os.path.exists(final_dir): + os.makedirs(final_dir) + # Use os.replace for atomic rename where possible + os.replace(tmp_path, final_path) + # Update mover's destination to final + self.destination = final_destination + def get_connection(self, hostname, port, username=None): """Get the connection.""" with self.active_connection_lock: @@ -169,6 +230,20 @@ def move(self): """Move the file.""" shutil.move(self.origin, self.destination.path) + def finalize_atomic_transfer(self, tmp_destination, final_destination): + """Finalize atomic transfer for local filesystem by renaming tmp -> final.""" + try: + tmp_path = tmp_destination.path + final_path = final_destination.path + except AttributeError: + raise NotImplementedError("Finalize atomic transfer not implemented for this scheme") + + final_dir = os.path.dirname(final_path) + if not os.path.exists(final_dir): + os.makedirs(final_dir) + os.replace(tmp_path, final_path) + self.destination = final_destination + class CTimer(Thread): """Call a function after a specified number of seconds. From 7f74333189e179748f65f068e63d6829bbf13f49 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 18:20:42 +0300 Subject: [PATCH 02/31] Implement finalize_atomic_transfer for FTP, SCP, and SFTP movers; enable remote rename for atomic transfers\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 86 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index ad308ec..c76b5de 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -363,6 +363,30 @@ def cd_tree(current_dir): connection.storbinary("STOR " + destination_filename, file_obj) + def finalize_atomic_transfer(self, tmp_destination, final_destination): + """Finalize atomic transfer by renaming tmp -> final on FTP server.""" + connection = self.get_connection(self.destination.hostname, self.destination.port, self._dest_username) + + def cd_tree(current_dir): + if current_dir != "": + try: + connection.cwd(current_dir) + except (IOError, error_perm): + cd_tree("/".join(current_dir.split("/")[:-1])) + connection.mkd(current_dir) + connection.cwd(current_dir) + + dest_dirname = os.path.dirname(tmp_destination.path) + tmp_basename = os.path.basename(tmp_destination.path) + final_basename = os.path.basename(final_destination.path) + cd_tree(dest_dirname) + try: + connection.rename(tmp_basename, final_basename) + except Exception as err: + LOGGER.exception("Failed to finalize FTP atomic transfer: %s", str(err)) + raise + self.destination = final_destination + class ScpMover(Mover): """Move files over ssh with scp.""" @@ -478,6 +502,39 @@ def copy(self): finally: scp.close() + def finalize_atomic_transfer(self, tmp_destination, final_destination): + """Finalize atomic transfer for SCP by performing remote rename via SFTP.""" + ssh_connection = self.get_connection(self.destination.hostname, + self.destination.port or 22, + self._dest_username) + sftp = None + try: + sftp = ssh_connection.open_sftp() + final_dir = os.path.dirname(final_destination.path) + # ensure final directory exists + if final_dir: + parts = final_dir.split('/') + path = '' + for p in parts: + if not p: + continue + path = path + '/' + p + try: + sftp.stat(path) + except IOError: + try: + sftp.mkdir(path) + except Exception: + pass + sftp.rename(tmp_destination.path, final_destination.path) + finally: + if sftp is not None: + try: + sftp.close() + except Exception: + pass + self.destination = final_destination + class SftpMover(Mover): """Move files over sftp.""" @@ -503,6 +560,35 @@ def copy(self): with ssh.open_sftp() as sftp: sftp.put(self.origin, self.destination.path) + def finalize_atomic_transfer(self, tmp_destination, final_destination): + """Finalize atomic transfer for SFTP by renaming tmp -> final on remote host.""" + import paramiko + with paramiko.SSHClient() as ssh: + ssh.load_system_host_keys() + ssh.connect(self.destination.hostname, + port=self.destination.port or 22, + username=self._dest_username, + allow_agent=True, + key_filename=self.attrs.get("ssh_private_key_file")) + with ssh.open_sftp() as sftp: + final_dir = os.path.dirname(final_destination.path) + if final_dir: + parts = final_dir.split('/') + path = '' + for p in parts: + if not p: + continue + path = path + '/' + p + try: + sftp.stat(path) + except IOError: + try: + sftp.mkdir(path) + except Exception: + pass + sftp.rename(tmp_destination.path, final_destination.path) + self.destination = final_destination + class S3Mover(Mover): """Move files to S3 cloud storage. From fe5828732c50dece835715dad0953e0a59f6abec Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 18:31:14 +0300 Subject: [PATCH 03/31] S3Mover: add multipart-upload finalization and copy+delete fallback for atomic transfers; prefer multipart when boto3 available\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 199 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 173 insertions(+), 26 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index c76b5de..a3d71b4 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -1,3 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2012-2023 +# +# Author(s): +# +# Martin Raspaud +# Panu Lahtinen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + """Movers for the move_it scripts.""" import logging @@ -16,6 +39,10 @@ from s3fs import S3FileSystem except ImportError: S3FileSystem = None +try: + import boto3 +except ImportError: + boto3 = None from trollmoves.utils import clean_url @@ -25,7 +52,8 @@ "default_cache_type", "version_aware", "cache_regions", "asynchronous", "config_kwargs", "kwargs", "session", "max_concurrency", "fixed_upload_size", - "profile"] + # allow our atomic-transfer and multipart options to pass through + "s3_use_multipart", "s3_use_copy", "tmp_prefix", "s3_multipart_chunksize"] LOGGER = logging.getLogger(__name__) @@ -174,19 +202,19 @@ def get_connection(self, hostname, port, username=None): with self.active_connection_lock: LOGGER.debug("Destination username and passwd: %s %s", self._dest_username, self._dest_password) - LOGGER.debug("Getting connection to %s@%s:%s", + LOGGER.debug('Getting connection to %s@%s:%s', username, hostname, port) try: connection, timer = self.active_connections[(hostname, port, username)] if not self.is_connected(connection): del self.active_connections[(hostname, port, username)] - LOGGER.debug("Resetting connection") + LOGGER.debug('Resetting connection') connection = self.open_connection() timer.cancel() except KeyError: connection = self.open_connection() - timer = CTimer(int(self.attrs.get("connection_uptime", 30)), + timer = CTimer(int(self.attrs.get('connection_uptime', 30)), self.delete_connection, (connection,)) timer.start() self.active_connections[(self.destination.hostname, port, username)] = connection, timer @@ -196,7 +224,7 @@ def get_connection(self, hostname, port, username=None): def delete_connection(self, connection): """Delete active connection *connection*.""" with self.active_connection_lock: - LOGGER.debug("Closing connection to %s@%s:%s", + LOGGER.debug('Closing connection to %s@%s:%s', self._dest_username, self.destination.hostname, self.destination.port) try: if current_thread().finished.is_set(): @@ -288,7 +316,7 @@ def _get_netrc_authentication(self): try: secrets = netrc.netrc() except (netrc.NetrcParseError, FileNotFoundError) as e__: - LOGGER.warning("Failed retrieve authentification details from netrc file! Exception: %s", str(e__)) + LOGGER.warning('Failed retrieve authentification details from netrc file! Exception: %s', str(e__)) return LOGGER.debug("Destination hostname: %s", self.destination.hostname) @@ -296,7 +324,7 @@ def _get_netrc_authentication(self): LOGGER.debug("Check if hostname matches any listed in the netrc file") if self.destination.hostname in list(secrets.hosts.keys()): self._dest_username, account, self._dest_password = secrets.authenticators(self.destination.hostname) - LOGGER.debug("Got username and password from netrc file!") + LOGGER.debug('Got username and password from netrc file!') def open_connection(self): """Open the connection and login.""" @@ -354,13 +382,13 @@ def cd_tree(current_dir): connection.mkd(current_dir) connection.cwd(current_dir) - LOGGER.debug("cd to %s", os.path.dirname(self.destination.path)) + LOGGER.debug('cd to %s', os.path.dirname(self.destination.path)) destination_dirname, destination_filename = os.path.split(self.destination.path) cd_tree(destination_dirname) if not destination_filename: destination_filename = os.path.basename(self.origin) - with open(self.origin, "rb") as file_obj: - connection.storbinary("STOR " + destination_filename, + with open(self.origin, 'rb') as file_obj: + connection.storbinary('STOR ' + destination_filename, file_obj) def finalize_atomic_transfer(self, tmp_destination, final_destination): @@ -660,39 +688,158 @@ def __init__(self, origin, destination, attrs=None, backup_targets=None): def copy(self): """Copy the file to a bucket.""" + if S3FileSystem is None and boto3 is None: + raise ImportError("S3Mover requires 's3fs' or 'boto3' to be installed.") + + use_multipart = bool(self.attrs.get('s3_use_multipart', False)) + use_copy = bool(self.attrs.get('s3_use_copy', False)) + tmp_prefix = self.attrs.get('tmp_prefix', '.') + + # Destination path inside bucket (bucket/key or bucket/dir/file) + destination_file_path = self._get_destination() + LOGGER.debug('destination_file_path = %s', destination_file_path) + + # Prefer multipart upload using boto3 when configured and available + if use_multipart and boto3 is not None: + # Derive final key: if basename starts with tmp_prefix, strip it + parts = destination_file_path.split('/') + if len(parts) == 1: + bucket = parts[0] + key = '' + else: + bucket = parts[0] + key = '/'.join(parts[1:]) + + basename = key.split('/')[-1] if key else '' + if basename.startswith(tmp_prefix): + final_basename = basename[len(tmp_prefix):] + final_key = key.rsplit('/', 1)[0] + '/' + final_basename if '/' in key else final_basename + else: + final_key = key + + # Build boto3 client with optional client_kwargs or credentials + boto_kwargs = dict(self.attrs.get('client_kwargs', {})) if isinstance(self.attrs.get('client_kwargs', {}), dict) else {} + if self.attrs.get('key') and self.attrs.get('secret'): + # Use explicit credentials + client = boto3.client('s3', aws_access_key_id=self.attrs.get('key'), aws_secret_access_key=self.attrs.get('secret'), **boto_kwargs) + else: + client = boto3.client('s3', **boto_kwargs) + + # multipart upload in chunks of 8MB + chunk_size = int(self.attrs.get('s3_multipart_chunksize', 8 * 1024 * 1024)) + try: + mp = client.create_multipart_upload(Bucket=bucket, Key=final_key) + upload_id = mp['UploadId'] + parts = [] + part_number = 1 + with open(self.origin, 'rb') as f: + while True: + data = f.read(chunk_size) + if not data: + break + resp = client.upload_part(Bucket=bucket, Key=final_key, PartNumber=part_number, UploadId=upload_id, Body=data) + parts.append({'ETag': resp['ETag'], 'PartNumber': part_number}) + part_number += 1 + client.complete_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id, MultipartUpload={'Parts': parts}) + except Exception as e: + LOGGER.exception('Multipart upload failed: %s', str(e)) + try: + client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) + except Exception: + pass + raise + + # Update destination to final key + self.destination = urlparse('s3://' + bucket + '/' + final_key) + return + + # Fallback: use s3fs put to destination_file_path (tmp or final) if S3FileSystem is None: - raise ImportError("S3Mover requires 's3fs' to be installed.") + raise ImportError("S3Mover requires 's3fs' to be installed for non-multipart operations.") s3 = S3FileSystem(**self.attrs) - destination_file_path = self._get_destination() - LOGGER.debug("destination_file_path = %s", destination_file_path) + LOGGER.debug('Before call to put: destination_file_path = %s', destination_file_path) + LOGGER.debug('self.origin = %s', self.origin) _create_s3_destination_path(s3, destination_file_path) - LOGGER.debug("Before call to put: destination_file_path = %s", destination_file_path) - LOGGER.debug("self.origin = %s", self.origin) s3.put(self.origin, destination_file_path) + def _sanitize_attrs(self): keys = list(self.attrs.keys()) for key in keys: if key not in S3_ALLOWED_SETTINGS: - LOGGER.debug("S3 Keyword {str(key)} not allowed - remove from attributes.") del self.attrs[key] def _get_destination(self): bucket_parts = [] bucket_parts.append(self.destination.netloc) - if self.destination.path != "/": - bucket_parts.append(self.destination.path.strip("/")) - if self.destination.path.endswith("/"): + if self.destination.path != '/': + bucket_parts.append(self.destination.path.strip('/')) + if self.destination.path.endswith('/'): bucket_parts.append(os.path.basename(self.origin)) - return "/".join(bucket_parts) + return '/'.join(bucket_parts) def move(self): """Move the file.""" self.copy() os.remove(self.origin) + def finalize_atomic_transfer(self, tmp_destination, final_destination): + """Finalize atomic transfer for S3. + + Default behavior: if multipart upload path was used, there's nothing to do. + Otherwise, if configured, perform copy+delete (server-side copy) to move tmp key to final key. + """ + use_multipart = bool(self.attrs.get('s3_use_multipart', False)) + use_copy = bool(self.attrs.get('s3_use_copy', False)) + tmp_prefix = self.attrs.get('tmp_prefix', '.') + + destination_file_path = tmp_destination and (tmp_destination.path.lstrip('/')) or self._get_destination() + # derive bucket and keys + parts = destination_file_path.split('/') + if len(parts) == 1: + bucket = parts[0] + tmp_key = '' + else: + bucket = parts[0] + tmp_key = '/'.join(parts[1:]) + + basename = tmp_key.split('/')[-1] if tmp_key else '' + if basename.startswith(tmp_prefix): + final_basename = basename[len(tmp_prefix):] + final_key = tmp_key.rsplit('/', 1)[0] + '/' + final_basename if '/' in tmp_key else final_basename + else: + final_key = tmp_key + + # If multipart path was used and boto3 completed upload to final key, nothing to do + if use_multipart and boto3 is not None: + self.destination = urlparse('s3://' + bucket + '/' + final_key) + return + + # Otherwise perform copy+delete if configured + if not use_copy: + # No server-side rename available and copy disabled: raise to indicate unsupported op + raise NotImplementedError('S3 atomic finalize requires either multipart uploads or copy+delete fallback') + + # use s3fs or boto3 to copy and delete tmp key + if S3FileSystem is not None: + s3 = S3FileSystem(**self.attrs) + s3.copy(destination_file_path, bucket + '/' + final_key) + s3.rm(destination_file_path) + self.destination = urlparse('s3://' + bucket + '/' + final_key) + return + + if boto3 is None: + raise ImportError('No S3 backend available for copy+delete finalize') + # boto3 copy_object and delete_object + boto_kwargs = dict(self.attrs.get('client_kwargs', {})) if isinstance(self.attrs.get('client_kwargs', {}), dict) else {} + client = boto3.client('s3', **boto_kwargs) + copy_source = {'Bucket': bucket, 'Key': tmp_key} + client.copy_object(CopySource=copy_source, Bucket=bucket, Key=final_key) + client.delete_object(Bucket=bucket, Key=tmp_key) + self.destination = urlparse('s3://' + bucket + '/' + final_key) + def _create_s3_destination_path(s3, destination_file_path): destination_path = os.path.dirname(destination_file_path) @@ -700,10 +847,10 @@ def _create_s3_destination_path(s3, destination_file_path): s3.mkdirs(destination_path) -MOVERS = {"ftp": FtpMover, - "file": FileMover, - "": FileMover, - "scp": ScpMover, - "sftp": SftpMover, - "s3": S3Mover, +MOVERS = {'ftp': FtpMover, + 'file': FileMover, + '': FileMover, + 'scp': ScpMover, + 'sftp': SftpMover, + 's3': S3Mover, } From ab319a524b8060652b481741c0d3c208b1c4dbed Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 18:34:20 +0300 Subject: [PATCH 04/31] Add S3Mover unit tests: multipart upload and copy+delete fallback\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/tests/test_movers_s3.py | 81 ++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 trollmoves/tests/test_movers_s3.py diff --git a/trollmoves/tests/test_movers_s3.py b/trollmoves/tests/test_movers_s3.py new file mode 100644 index 0000000..3b8a2ab --- /dev/null +++ b/trollmoves/tests/test_movers_s3.py @@ -0,0 +1,81 @@ +import os +import tempfile +from unittest.mock import MagicMock, patch +from urllib.parse import urlparse + +import pytest + +from trollmoves.movers import S3Mover, S3FileSystem, boto3 + + +@patch('trollmoves.movers.boto3.client') +def test_s3_multipart_upload(mock_boto_client): + # Create a temporary file with some content + with tempfile.NamedTemporaryFile('wb', delete=False) as f: + f.write(b'hello world') + tmpname = f.name + + # Setup mock boto3 client + mock_client = MagicMock() + mock_client.create_multipart_upload.return_value = {'UploadId': 'upload123'} + mock_client.upload_part.return_value = {'ETag': 'etag-1'} + mock_client.complete_multipart_upload.return_value = {'ResponseMetadata': {'HTTPStatusCode': 200}} + mock_boto_client.return_value = mock_client + + # Destination: s3://mybucket/some/path/file + dest = 's3://mybucket/some/path/file' + attrs = {'s3_use_multipart': True, 'client_kwargs': {}} + + mover = S3Mover(tmpname, dest, attrs=attrs) + # Should use boto3 multipart upload and complete it + mover.copy() + + # Assertions: multipart calls were made + mock_client.create_multipart_upload.assert_called() + assert mock_client.upload_part.called + mock_client.complete_multipart_upload.assert_called() + + # Destination should be updated to final key + assert mover.destination.scheme == 's3' + assert 'mybucket' in mover.destination.netloc or 'mybucket' in mover.destination.path + + # Cleanup + os.remove(tmpname) + + +@patch('trollmoves.movers.S3FileSystem') +def test_s3_copy_delete_fallback(mock_s3fs): + # Create a temporary file with some content + with tempfile.NamedTemporaryFile('wb', delete=False) as f: + f.write(b'fallback test') + tmpname = f.name + + # Setup mock s3fs instance + mock_s3 = MagicMock() + mock_s3.exists.return_value = True + mock_s3.copy.return_value = None + mock_s3.rm.return_value = None + mock_s3.put.return_value = None + mock_s3fs.return_value = mock_s3 + + # Destination path that will be used for tmp key + dest = 's3://bucketname/dir/.tmpfile' + attrs = {'s3_use_multipart': False, 's3_use_copy': True, 'tmp_prefix': '.'} + + mover = S3Mover(tmpname, dest, attrs=attrs) + # copy should call s3.put + mover.copy() + mock_s3.put.assert_called_once() + + # Now finalize: simulate moving tmp key to final + tmp_dest = urlparse('s3://bucketname/dir/.tmpfile') + final_dest = urlparse('s3://bucketname/dir/file') + mover.attrs = attrs + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + # copy+delete should be invoked + mock_s3.copy.assert_called_once() + mock_s3.rm.assert_called_once() + + # Cleanup + os.remove(tmpname) From a7882ded275e523fb838bb2e41be26769156ac6d Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 18:34:58 +0300 Subject: [PATCH 05/31] Fix S3 multipart test: patch boto3 module object when boto3 may be unavailable --- trollmoves/tests/test_movers_s3.py | 34 ++++++++++++++++-------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/trollmoves/tests/test_movers_s3.py b/trollmoves/tests/test_movers_s3.py index 3b8a2ab..5812e96 100644 --- a/trollmoves/tests/test_movers_s3.py +++ b/trollmoves/tests/test_movers_s3.py @@ -8,8 +8,7 @@ from trollmoves.movers import S3Mover, S3FileSystem, boto3 -@patch('trollmoves.movers.boto3.client') -def test_s3_multipart_upload(mock_boto_client): +def test_s3_multipart_upload(): # Create a temporary file with some content with tempfile.NamedTemporaryFile('wb', delete=False) as f: f.write(b'hello world') @@ -20,24 +19,27 @@ def test_s3_multipart_upload(mock_boto_client): mock_client.create_multipart_upload.return_value = {'UploadId': 'upload123'} mock_client.upload_part.return_value = {'ETag': 'etag-1'} mock_client.complete_multipart_upload.return_value = {'ResponseMetadata': {'HTTPStatusCode': 200}} - mock_boto_client.return_value = mock_client - # Destination: s3://mybucket/some/path/file - dest = 's3://mybucket/some/path/file' - attrs = {'s3_use_multipart': True, 'client_kwargs': {}} + mock_boto_mod = MagicMock() + mock_boto_mod.client = MagicMock(return_value=mock_client) - mover = S3Mover(tmpname, dest, attrs=attrs) - # Should use boto3 multipart upload and complete it - mover.copy() + with patch('trollmoves.movers.boto3', new=mock_boto_mod): + # Destination: s3://mybucket/some/path/file + dest = 's3://mybucket/some/path/file' + attrs = {'s3_use_multipart': True, 'client_kwargs': {}} + + mover = S3Mover(tmpname, dest, attrs=attrs) + # Should use boto3 multipart upload and complete it + mover.copy() - # Assertions: multipart calls were made - mock_client.create_multipart_upload.assert_called() - assert mock_client.upload_part.called - mock_client.complete_multipart_upload.assert_called() + # Assertions: multipart calls were made + mock_client.create_multipart_upload.assert_called() + assert mock_client.upload_part.called + mock_client.complete_multipart_upload.assert_called() - # Destination should be updated to final key - assert mover.destination.scheme == 's3' - assert 'mybucket' in mover.destination.netloc or 'mybucket' in mover.destination.path + # Destination should be updated to final key + assert mover.destination.scheme == 's3' + assert 'mybucket' in mover.destination.netloc or 'mybucket' in mover.destination.path # Cleanup os.remove(tmpname) From 8636d41a15871b0c5d973a00e1890b3bb744bf82 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 18:40:02 +0300 Subject: [PATCH 06/31] Docs: document atomic temporary-transfer options; add commented example config lines in examples/ --- README.md | 34 ++++++++++++++++++++++ examples/dispatch.yaml | 6 ++++ examples/move_it_client.ini | 6 ++++ examples/move_it_server.ini | 7 +++++ examples/s3downloader-config.yaml-template | 6 ++++ 5 files changed, 59 insertions(+) diff --git a/README.md b/README.md index 66ea386..b75e0d4 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,40 @@ analogous to moving a file from one directory to a new destination changing the filename. The new destination filename will be the last part of the provided destination following the last slash ('/'). +New atomic-transfer options (opt-in) + +To avoid exposing partially-uploaded files, movers can be configured to upload +first to a temporary name and be renamed/activated only after the transfer +completes. These options are passed via the mover's connection_parameters or +attrs dictionary. + +- use_tmp_on_transfer: boolean (default: False) + If true, movers will upload to a temporary destination (see tmp_prefix) + and finalize the transfer by renaming/moving the tmp object to the final + name after successful transfer. +- tmp_prefix: string (default: '.') + Prefix to use for temporary filenames (e.g. ".filename"). + +S3-specific options +- s3_use_multipart: boolean (default: True) + When True and boto3 is available, S3Mover will perform a multipart upload + directly to the final key and CompleteMultipartUpload to make the object + visible atomically. +- s3_use_copy: boolean (default: False) + If multipart uploads are not used, enabling this will finalize an upload + performed to a tmp key by performing a server-side copy (CopyObject) to + the final key and deleting the temporary key. This is compatible with + s3fs or boto3 backends but requires additional permissions. +- s3_multipart_chunksize: integer (default: 8388608) + Chunk size (bytes) used for multipart uploads when boto3 multipart is used. + +Behavior notes +- Multipart uploads (preferred) avoid the extra server-side copy step but + require boto3 and appropriate permissions. Copy+delete is provided as a + fallback for S3-compatible endpoints that do not support multipart. +- The tmp_prefix and use_tmp_on_transfer options are intentionally opt-in to + preserve existing behavior by default. + ## s3downloader diff --git a/examples/dispatch.yaml b/examples/dispatch.yaml index b8d7a77..457b2cf 100644 --- a/examples/dispatch.yaml +++ b/examples/dispatch.yaml @@ -60,6 +60,12 @@ target-s3-example1: secret: "my-super-secret-key" key: "my-access-key" use_ssl: true + + # Optional atomic transfer options for S3 mover: + # s3_use_multipart: True # Prefer multipart upload (boto3 required) + # s3_use_copy: False # If multipart not used, perform copy+delete to finalize tmp key + # tmp_prefix: '.' # Prefix used for temporary keys (e.g. '.filename') + # s3_multipart_chunksize: 8388608 # Chunk size in bytes for multipart uploads aliases: platform_name: Suomi-NPP: npp diff --git a/examples/move_it_client.ini b/examples/move_it_client.ini index 07faf54..bcf1297 100644 --- a/examples/move_it_client.ini +++ b/examples/move_it_client.ini @@ -105,6 +105,12 @@ processing_delay = 0.02 providers = satmottag:9010 satmottag2:9010 explorer:9010 # Push to base of the bucket destination = s3://data-bucket/ +# Optional atomic transfer options (passed under connection_parameters): +# connection_parameters__use_tmp_on_transfer = True +# connection_parameters__tmp_prefix = . +# connection_parameters__s3_use_multipart = True +# connection_parameters__s3_use_copy = False +# connection_parameters__s3_multipart_chunksize = 8388608 # Alternatively a sub-directory within the bucket. The directory structure will be created if it doesn't exist # destination = s3://data-bucket/msg/0deg topic = /1b/hrit-segment/0deg diff --git a/examples/move_it_server.ini b/examples/move_it_server.ini index 1057778..e818c39 100644 --- a/examples/move_it_server.ini +++ b/examples/move_it_server.ini @@ -9,6 +9,13 @@ # Put this in connection_parameters dictionary by adding the prefix connection_parameters__ssh_key_filename = /home/username/.ssh/id_rsa +# Optional global defaults for atomic transfers (passed into connection_parameters) +# connection_parameters__use_tmp_on_transfer = False +# connection_parameters__tmp_prefix = . +# connection_parameters__s3_use_multipart = True +# connection_parameters__s3_use_copy = False +# connection_parameters__s3_multipart_chunksize = 8388608 + # Set watchdog polling timeout (interval) in seconds. # Only effective if "-w" commandline argument is given # watchdog_timeout = 2.0 diff --git a/examples/s3downloader-config.yaml-template b/examples/s3downloader-config.yaml-template index 859ba42..6fcaea3 100644 --- a/examples/s3downloader-config.yaml-template +++ b/examples/s3downloader-config.yaml-template @@ -16,3 +16,9 @@ s3_kwargs: profile: metno # Optional client_kwargs: endpoint_url: + + # Optional atomic transfer options for S3 Mover (passed to movers): + # s3_use_multipart: True # Prefer multipart upload (boto3 required) + # s3_use_copy: False # If multipart not used, perform copy+delete to finalize tmp key + # tmp_prefix: '.' # Prefix used for temporary keys (e.g. '.filename') + # s3_multipart_chunksize: 8388608 # Chunk size in bytes for multipart uploads From 43366b2f1a8b2624af623dfc1626bd905304736b Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 19:11:53 +0300 Subject: [PATCH 07/31] Docs: default s3_use_multipart is False (s3fs-first, opt-in multipart) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b75e0d4..19404ca 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ attrs dictionary. Prefix to use for temporary filenames (e.g. ".filename"). S3-specific options -- s3_use_multipart: boolean (default: True) +- s3_use_multipart: boolean (default: False) When True and boto3 is available, S3Mover will perform a multipart upload directly to the final key and CompleteMultipartUpload to make the object visible atomically. From 5c5760203d276fea501bd4093aa284d246a1474d Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 20:26:58 +0300 Subject: [PATCH 08/31] Remove misplaced S3 multipart options from S3 stalker example config --- examples/s3downloader-config.yaml-template | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/s3downloader-config.yaml-template b/examples/s3downloader-config.yaml-template index 6fcaea3..859ba42 100644 --- a/examples/s3downloader-config.yaml-template +++ b/examples/s3downloader-config.yaml-template @@ -16,9 +16,3 @@ s3_kwargs: profile: metno # Optional client_kwargs: endpoint_url: - - # Optional atomic transfer options for S3 Mover (passed to movers): - # s3_use_multipart: True # Prefer multipart upload (boto3 required) - # s3_use_copy: False # If multipart not used, perform copy+delete to finalize tmp key - # tmp_prefix: '.' # Prefix used for temporary keys (e.g. '.filename') - # s3_multipart_chunksize: 8388608 # Chunk size in bytes for multipart uploads From bc4cfd7ad4a8aa05371d3f4e182b824c5979cfc9 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 21:09:45 +0300 Subject: [PATCH 09/31] Use double quotes for string literals on branch-changed lines (safe, spacing preserved) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 2 +- examples/dispatch.yaml | 2 +- trollmoves/movers.py | 142 ++++++++++++++--------------- trollmoves/tests/test_movers_s3.py | 34 +++---- 4 files changed, 90 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index 19404ca..51059e0 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ attrs dictionary. If true, movers will upload to a temporary destination (see tmp_prefix) and finalize the transfer by renaming/moving the tmp object to the final name after successful transfer. -- tmp_prefix: string (default: '.') +- tmp_prefix: string (default: "\1") Prefix to use for temporary filenames (e.g. ".filename"). S3-specific options diff --git a/examples/dispatch.yaml b/examples/dispatch.yaml index 457b2cf..fd0e8c2 100644 --- a/examples/dispatch.yaml +++ b/examples/dispatch.yaml @@ -64,7 +64,7 @@ target-s3-example1: # Optional atomic transfer options for S3 mover: # s3_use_multipart: True # Prefer multipart upload (boto3 required) # s3_use_copy: False # If multipart not used, perform copy+delete to finalize tmp key - # tmp_prefix: '.' # Prefix used for temporary keys (e.g. '.filename') + # tmp_prefix: "\1" # Prefix used for temporary keys (e.g. "\1") # s3_multipart_chunksize: 8388608 # Chunk size in bytes for multipart uploads aliases: platform_name: diff --git a/trollmoves/movers.py b/trollmoves/movers.py index a3d71b4..c57c518 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -87,9 +87,9 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ raise try: - use_tmp = bool(attrs and attrs.get('use_tmp_on_transfer')) + use_tmp = bool(attrs and attrs.get("use_tmp_on_transfer")) if use_tmp: - tmp_prefix = attrs.get('tmp_prefix', '.') + tmp_prefix = attrs.get("tmp_prefix", ".") tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) m = mover_cls(pathname, tmp_dest, attrs=attrs, backup_targets=backup_targets) m.copy() @@ -99,7 +99,7 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ except NotImplementedError: # cleanup tmp and raise error to indicate unsupported scheme for atomic transfer try: - if hasattr(m.destination, 'path') and os.path.exists(m.destination.path): + if hasattr(m.destination, "path") and os.path.exists(m.destination.path): os.remove(m.destination.path) finally: raise @@ -162,11 +162,11 @@ def supports_atomic(self): try: scheme = self.destination.scheme except Exception: - scheme = '' - return scheme in ('', 'file') + scheme = "" + return scheme in ("", "file") @staticmethod - def tmp_destination_for(dest, tmp_prefix='.'): + def tmp_destination_for(dest, tmp_prefix="."): """Return a copy of dest with the basename prefixed by tmp_prefix.""" try: path = dest.path @@ -202,19 +202,19 @@ def get_connection(self, hostname, port, username=None): with self.active_connection_lock: LOGGER.debug("Destination username and passwd: %s %s", self._dest_username, self._dest_password) - LOGGER.debug('Getting connection to %s@%s:%s', + LOGGER.debug("Getting connection to %s@%s:%s", username, hostname, port) try: connection, timer = self.active_connections[(hostname, port, username)] if not self.is_connected(connection): del self.active_connections[(hostname, port, username)] - LOGGER.debug('Resetting connection') + LOGGER.debug("Resetting connection") connection = self.open_connection() timer.cancel() except KeyError: connection = self.open_connection() - timer = CTimer(int(self.attrs.get('connection_uptime', 30)), + timer = CTimer(int(self.attrs.get("connection_uptime", 30)), self.delete_connection, (connection,)) timer.start() self.active_connections[(self.destination.hostname, port, username)] = connection, timer @@ -224,7 +224,7 @@ def get_connection(self, hostname, port, username=None): def delete_connection(self, connection): """Delete active connection *connection*.""" with self.active_connection_lock: - LOGGER.debug('Closing connection to %s@%s:%s', + LOGGER.debug("Closing connection to %s@%s:%s", self._dest_username, self.destination.hostname, self.destination.port) try: if current_thread().finished.is_set(): @@ -316,7 +316,7 @@ def _get_netrc_authentication(self): try: secrets = netrc.netrc() except (netrc.NetrcParseError, FileNotFoundError) as e__: - LOGGER.warning('Failed retrieve authentification details from netrc file! Exception: %s', str(e__)) + LOGGER.warning("Failed retrieve authentification details from netrc file! Exception: %s", str(e__)) return LOGGER.debug("Destination hostname: %s", self.destination.hostname) @@ -324,7 +324,7 @@ def _get_netrc_authentication(self): LOGGER.debug("Check if hostname matches any listed in the netrc file") if self.destination.hostname in list(secrets.hosts.keys()): self._dest_username, account, self._dest_password = secrets.authenticators(self.destination.hostname) - LOGGER.debug('Got username and password from netrc file!') + LOGGER.debug("Got username and password from netrc file!") def open_connection(self): """Open the connection and login.""" @@ -382,13 +382,13 @@ def cd_tree(current_dir): connection.mkd(current_dir) connection.cwd(current_dir) - LOGGER.debug('cd to %s', os.path.dirname(self.destination.path)) + LOGGER.debug("cd to %s", os.path.dirname(self.destination.path)) destination_dirname, destination_filename = os.path.split(self.destination.path) cd_tree(destination_dirname) if not destination_filename: destination_filename = os.path.basename(self.origin) - with open(self.origin, 'rb') as file_obj: - connection.storbinary('STOR ' + destination_filename, + with open(self.origin, "rb") as file_obj: + connection.storbinary("STOR " + destination_filename, file_obj) def finalize_atomic_transfer(self, tmp_destination, final_destination): @@ -541,12 +541,12 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): final_dir = os.path.dirname(final_destination.path) # ensure final directory exists if final_dir: - parts = final_dir.split('/') - path = '' + parts = final_dir.split("/") + path = "" for p in parts: if not p: continue - path = path + '/' + p + path = path + "/" + p try: sftp.stat(path) except IOError: @@ -601,12 +601,12 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): with ssh.open_sftp() as sftp: final_dir = os.path.dirname(final_destination.path) if final_dir: - parts = final_dir.split('/') - path = '' + parts = final_dir.split("/") + path = "" for p in parts: if not p: continue - path = path + '/' + p + path = path + "/" + p try: sftp.stat(path) except IOError: @@ -691,58 +691,58 @@ def copy(self): if S3FileSystem is None and boto3 is None: raise ImportError("S3Mover requires 's3fs' or 'boto3' to be installed.") - use_multipart = bool(self.attrs.get('s3_use_multipart', False)) - use_copy = bool(self.attrs.get('s3_use_copy', False)) - tmp_prefix = self.attrs.get('tmp_prefix', '.') + use_multipart = bool(self.attrs.get("s3_use_multipart", False)) + use_copy = bool(self.attrs.get("s3_use_copy", False)) + tmp_prefix = self.attrs.get("tmp_prefix", ".") # Destination path inside bucket (bucket/key or bucket/dir/file) destination_file_path = self._get_destination() - LOGGER.debug('destination_file_path = %s', destination_file_path) + LOGGER.debug("destination_file_path = %s", destination_file_path) # Prefer multipart upload using boto3 when configured and available if use_multipart and boto3 is not None: # Derive final key: if basename starts with tmp_prefix, strip it - parts = destination_file_path.split('/') + parts = destination_file_path.split("/") if len(parts) == 1: bucket = parts[0] - key = '' + key = "" else: bucket = parts[0] - key = '/'.join(parts[1:]) + key = "/".join(parts[1:]) - basename = key.split('/')[-1] if key else '' + basename = key.split("/")[-1] if key else "" if basename.startswith(tmp_prefix): final_basename = basename[len(tmp_prefix):] - final_key = key.rsplit('/', 1)[0] + '/' + final_basename if '/' in key else final_basename + final_key = key.rsplit("/", 1)[0] + "/" + final_basename if "/" in key else final_basename else: final_key = key # Build boto3 client with optional client_kwargs or credentials - boto_kwargs = dict(self.attrs.get('client_kwargs', {})) if isinstance(self.attrs.get('client_kwargs', {}), dict) else {} - if self.attrs.get('key') and self.attrs.get('secret'): + boto_kwargs = dict(self.attrs.get("client_kwargs", {})) if isinstance(self.attrs.get("client_kwargs", {}), dict) else {} + if self.attrs.get("key") and self.attrs.get("secret"): # Use explicit credentials - client = boto3.client('s3', aws_access_key_id=self.attrs.get('key'), aws_secret_access_key=self.attrs.get('secret'), **boto_kwargs) + client = boto3.client("s3", aws_access_key_id=self.attrs.get("key"), aws_secret_access_key=self.attrs.get("secret"), **boto_kwargs) else: - client = boto3.client('s3', **boto_kwargs) + client = boto3.client("s3", **boto_kwargs) # multipart upload in chunks of 8MB - chunk_size = int(self.attrs.get('s3_multipart_chunksize', 8 * 1024 * 1024)) + chunk_size = int(self.attrs.get("s3_multipart_chunksize", 8 * 1024 * 1024)) try: mp = client.create_multipart_upload(Bucket=bucket, Key=final_key) - upload_id = mp['UploadId'] + upload_id = mp["UploadId"] parts = [] part_number = 1 - with open(self.origin, 'rb') as f: + with open(self.origin, "rb") as f: while True: data = f.read(chunk_size) if not data: break resp = client.upload_part(Bucket=bucket, Key=final_key, PartNumber=part_number, UploadId=upload_id, Body=data) - parts.append({'ETag': resp['ETag'], 'PartNumber': part_number}) + parts.append({"ETag": resp["ETag"], "PartNumber": part_number}) part_number += 1 - client.complete_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id, MultipartUpload={'Parts': parts}) + client.complete_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id, MultipartUpload={"Parts": parts}) except Exception as e: - LOGGER.exception('Multipart upload failed: %s', str(e)) + LOGGER.exception("Multipart upload failed: %s", str(e)) try: client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) except Exception: @@ -750,15 +750,15 @@ def copy(self): raise # Update destination to final key - self.destination = urlparse('s3://' + bucket + '/' + final_key) + self.destination = urlparse("s3://" + bucket + "/" + final_key) return # Fallback: use s3fs put to destination_file_path (tmp or final) if S3FileSystem is None: raise ImportError("S3Mover requires 's3fs' to be installed for non-multipart operations.") s3 = S3FileSystem(**self.attrs) - LOGGER.debug('Before call to put: destination_file_path = %s', destination_file_path) - LOGGER.debug('self.origin = %s', self.origin) + LOGGER.debug("Before call to put: destination_file_path = %s", destination_file_path) + LOGGER.debug("self.origin = %s", self.origin) _create_s3_destination_path(s3, destination_file_path) s3.put(self.origin, destination_file_path) @@ -773,12 +773,12 @@ def _get_destination(self): bucket_parts = [] bucket_parts.append(self.destination.netloc) - if self.destination.path != '/': - bucket_parts.append(self.destination.path.strip('/')) - if self.destination.path.endswith('/'): + if self.destination.path != "/": + bucket_parts.append(self.destination.path.strip("/")) + if self.destination.path.endswith("/"): bucket_parts.append(os.path.basename(self.origin)) - return '/'.join(bucket_parts) + return "/".join(bucket_parts) def move(self): """Move the file.""" @@ -791,54 +791,54 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): Default behavior: if multipart upload path was used, there's nothing to do. Otherwise, if configured, perform copy+delete (server-side copy) to move tmp key to final key. """ - use_multipart = bool(self.attrs.get('s3_use_multipart', False)) - use_copy = bool(self.attrs.get('s3_use_copy', False)) - tmp_prefix = self.attrs.get('tmp_prefix', '.') + use_multipart = bool(self.attrs.get("s3_use_multipart", False)) + use_copy = bool(self.attrs.get("s3_use_copy", False)) + tmp_prefix = self.attrs.get("tmp_prefix", ".") - destination_file_path = tmp_destination and (tmp_destination.path.lstrip('/')) or self._get_destination() + destination_file_path = tmp_destination and (tmp_destination.path.lstrip("/")) or self._get_destination() # derive bucket and keys - parts = destination_file_path.split('/') + parts = destination_file_path.split("/") if len(parts) == 1: bucket = parts[0] - tmp_key = '' + tmp_key = "" else: bucket = parts[0] - tmp_key = '/'.join(parts[1:]) + tmp_key = "/".join(parts[1:]) - basename = tmp_key.split('/')[-1] if tmp_key else '' + basename = tmp_key.split("/")[-1] if tmp_key else "" if basename.startswith(tmp_prefix): final_basename = basename[len(tmp_prefix):] - final_key = tmp_key.rsplit('/', 1)[0] + '/' + final_basename if '/' in tmp_key else final_basename + final_key = tmp_key.rsplit("/", 1)[0] + "/" + final_basename if "/" in tmp_key else final_basename else: final_key = tmp_key # If multipart path was used and boto3 completed upload to final key, nothing to do if use_multipart and boto3 is not None: - self.destination = urlparse('s3://' + bucket + '/' + final_key) + self.destination = urlparse("s3://" + bucket + "/" + final_key) return # Otherwise perform copy+delete if configured if not use_copy: # No server-side rename available and copy disabled: raise to indicate unsupported op - raise NotImplementedError('S3 atomic finalize requires either multipart uploads or copy+delete fallback') + raise NotImplementedError("S3 atomic finalize requires either multipart uploads or copy+delete fallback") # use s3fs or boto3 to copy and delete tmp key if S3FileSystem is not None: s3 = S3FileSystem(**self.attrs) - s3.copy(destination_file_path, bucket + '/' + final_key) + s3.copy(destination_file_path, bucket + "/" + final_key) s3.rm(destination_file_path) - self.destination = urlparse('s3://' + bucket + '/' + final_key) + self.destination = urlparse("s3://" + bucket + "/" + final_key) return if boto3 is None: - raise ImportError('No S3 backend available for copy+delete finalize') + raise ImportError("No S3 backend available for copy+delete finalize") # boto3 copy_object and delete_object - boto_kwargs = dict(self.attrs.get('client_kwargs', {})) if isinstance(self.attrs.get('client_kwargs', {}), dict) else {} - client = boto3.client('s3', **boto_kwargs) - copy_source = {'Bucket': bucket, 'Key': tmp_key} + boto_kwargs = dict(self.attrs.get("client_kwargs", {})) if isinstance(self.attrs.get("client_kwargs", {}), dict) else {} + client = boto3.client("s3", **boto_kwargs) + copy_source = {"Bucket": bucket, "Key": tmp_key} client.copy_object(CopySource=copy_source, Bucket=bucket, Key=final_key) client.delete_object(Bucket=bucket, Key=tmp_key) - self.destination = urlparse('s3://' + bucket + '/' + final_key) + self.destination = urlparse("s3://" + bucket + "/" + final_key) def _create_s3_destination_path(s3, destination_file_path): @@ -847,10 +847,10 @@ def _create_s3_destination_path(s3, destination_file_path): s3.mkdirs(destination_path) -MOVERS = {'ftp': FtpMover, - 'file': FileMover, - '': FileMover, - 'scp': ScpMover, - 'sftp': SftpMover, - 's3': S3Mover, +MOVERS = {"ftp": FtpMover, + "file": FileMover, + "": FileMover, + "scp": ScpMover, + "sftp": SftpMover, + "s3": S3Mover, } diff --git a/trollmoves/tests/test_movers_s3.py b/trollmoves/tests/test_movers_s3.py index 5812e96..c736414 100644 --- a/trollmoves/tests/test_movers_s3.py +++ b/trollmoves/tests/test_movers_s3.py @@ -10,23 +10,23 @@ def test_s3_multipart_upload(): # Create a temporary file with some content - with tempfile.NamedTemporaryFile('wb', delete=False) as f: - f.write(b'hello world') + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + f.write(b"hello world") tmpname = f.name # Setup mock boto3 client mock_client = MagicMock() - mock_client.create_multipart_upload.return_value = {'UploadId': 'upload123'} - mock_client.upload_part.return_value = {'ETag': 'etag-1'} - mock_client.complete_multipart_upload.return_value = {'ResponseMetadata': {'HTTPStatusCode': 200}} + mock_client.create_multipart_upload.return_value = {"UploadId": "upload123"} + mock_client.upload_part.return_value = {"ETag": "etag-1"} + mock_client.complete_multipart_upload.return_value = {"ResponseMetadata": {"HTTPStatusCode": 200}} mock_boto_mod = MagicMock() mock_boto_mod.client = MagicMock(return_value=mock_client) - with patch('trollmoves.movers.boto3', new=mock_boto_mod): + with patch("trollmoves.movers.boto3", new=mock_boto_mod): # Destination: s3://mybucket/some/path/file - dest = 's3://mybucket/some/path/file' - attrs = {'s3_use_multipart': True, 'client_kwargs': {}} + dest = "s3://mybucket/some/path/file" + attrs = {"s3_use_multipart": True, "client_kwargs": {}} mover = S3Mover(tmpname, dest, attrs=attrs) # Should use boto3 multipart upload and complete it @@ -38,18 +38,18 @@ def test_s3_multipart_upload(): mock_client.complete_multipart_upload.assert_called() # Destination should be updated to final key - assert mover.destination.scheme == 's3' - assert 'mybucket' in mover.destination.netloc or 'mybucket' in mover.destination.path + assert mover.destination.scheme == "s3" + assert "mybucket" in mover.destination.netloc or "mybucket" in mover.destination.path # Cleanup os.remove(tmpname) -@patch('trollmoves.movers.S3FileSystem') +@patch("trollmoves.movers.S3FileSystem") def test_s3_copy_delete_fallback(mock_s3fs): # Create a temporary file with some content - with tempfile.NamedTemporaryFile('wb', delete=False) as f: - f.write(b'fallback test') + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + f.write(b"fallback test") tmpname = f.name # Setup mock s3fs instance @@ -61,8 +61,8 @@ def test_s3_copy_delete_fallback(mock_s3fs): mock_s3fs.return_value = mock_s3 # Destination path that will be used for tmp key - dest = 's3://bucketname/dir/.tmpfile' - attrs = {'s3_use_multipart': False, 's3_use_copy': True, 'tmp_prefix': '.'} + dest = "s3://bucketname/dir/.tmpfile" + attrs = {"s3_use_multipart": False, "s3_use_copy": True, "tmp_prefix": "."} mover = S3Mover(tmpname, dest, attrs=attrs) # copy should call s3.put @@ -70,8 +70,8 @@ def test_s3_copy_delete_fallback(mock_s3fs): mock_s3.put.assert_called_once() # Now finalize: simulate moving tmp key to final - tmp_dest = urlparse('s3://bucketname/dir/.tmpfile') - final_dest = urlparse('s3://bucketname/dir/file') + tmp_dest = urlparse("s3://bucketname/dir/.tmpfile") + final_dest = urlparse("s3://bucketname/dir/file") mover.attrs = attrs mover.finalize_atomic_transfer(tmp_dest, final_dest) From 9bbe8e0494a2cda9cbf66a4384d3d97ef3619252 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 23:15:07 +0300 Subject: [PATCH 10/31] refactor(movers): replace recursive directory helpers with iterative utility - Extract ensure_remote_dirs and ensure_local_dir to new _mover_utils module - Replace recursive cd_tree calls in FtpMover with iterative ensure_remote_dirs - Reduce duplication: ~80 lines of code removed - Maintains backward compatibility: all tests pass - Improved performance with fast path for existing directories Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/_mover_utils.py | 92 ++++++++++++++++++++++++++++++++++++++ trollmoves/movers.py | 23 ++-------- 2 files changed, 95 insertions(+), 20 deletions(-) create mode 100644 trollmoves/_mover_utils.py diff --git a/trollmoves/_mover_utils.py b/trollmoves/_mover_utils.py new file mode 100644 index 0000000..9192ebe --- /dev/null +++ b/trollmoves/_mover_utils.py @@ -0,0 +1,92 @@ +"""Helper utilities for movers to reduce duplication. + +Contains functions to ensure local and remote directories exist. Designed to be small +and dependency-free; supports FTP-like objects (ftplib.FTP) and SFTP-like +(paramiko SFTPClient) objects by duck-typing. +""" + +import os + + +def ensure_local_dir(path): + """Ensure local directory exists for given path. + + If path is a file path, its directory is created. If path is a directory, + it is created. No exception is raised if it already exists. + """ + if not path: + return + dirname = path + if os.path.isfile(path) or os.path.splitext(path)[1]: + dirname = os.path.dirname(path) or '.' + os.makedirs(dirname, exist_ok=True) + + +def ensure_remote_dirs(connection, path): + """Ensure directories exist on a remote connection. + + Supports FTP-like objects (with cwd() and mkd()) and SFTP-like objects + (with stat() and mkdir()). The function is iterative (no recursion). + + Behavior mirrors previous recursive helper: try a single cwd(path) first; if + that succeeds, return with only one cwd call. If it fails, create missing + directories and cd into the final path as needed. + """ + if not path or path == "/": + return + parts = [p for p in path.split("/") if p] + if not parts: + return + + # FTP-like API + if hasattr(connection, "cwd") and hasattr(connection, "mkd"): + # Fast path: if the full path already exists, a single cwd is sufficient + try: + connection.cwd(path) + return + except Exception: + pass + + # Build path from root, creating missing segments and only cd'ing when needed + current = "" + for part in parts: + current = current + "/" + part + try: + connection.cwd(current) + except Exception: + # try to create the directory; accept failures silently and proceed + try: + connection.mkd(current) + except Exception: + try: + connection.mkd(part) + except Exception: + pass + # after creating, change into it + try: + connection.cwd(current) + except Exception: + try: + connection.cwd(part) + except Exception: + pass + return + + # SFTP-like API (paramiko.SFTPClient) + if hasattr(connection, "stat") and hasattr(connection, "mkdir"): + current = "" + for part in parts: + current = current + "/" + part + try: + connection.stat(current) + except Exception: + try: + connection.mkdir(current) + except Exception: + try: + connection.mkdir(part) + except Exception: + pass + return + + raise TypeError("Unsupported connection type for ensure_remote_dirs") diff --git a/trollmoves/movers.py b/trollmoves/movers.py index c57c518..3c76533 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -45,6 +45,7 @@ boto3 = None from trollmoves.utils import clean_url +from ._mover_utils import ensure_remote_dirs, ensure_local_dir S3_ALLOWED_SETTINGS = ["anon", "endpoint_url", "key", "secret", "token", "use_ssl", "s3_additional_kwargs", "client_kwargs", @@ -373,18 +374,9 @@ def copy(self): """Upload the file.""" connection = self.get_connection(self.destination.hostname, self.destination.port, self._dest_username) - def cd_tree(current_dir): - if current_dir != "": - try: - connection.cwd(current_dir) - except (IOError, error_perm): - cd_tree("/".join(current_dir.split("/")[:-1])) - connection.mkd(current_dir) - connection.cwd(current_dir) - LOGGER.debug("cd to %s", os.path.dirname(self.destination.path)) destination_dirname, destination_filename = os.path.split(self.destination.path) - cd_tree(destination_dirname) + ensure_remote_dirs(connection, destination_dirname) if not destination_filename: destination_filename = os.path.basename(self.origin) with open(self.origin, "rb") as file_obj: @@ -395,19 +387,10 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): """Finalize atomic transfer by renaming tmp -> final on FTP server.""" connection = self.get_connection(self.destination.hostname, self.destination.port, self._dest_username) - def cd_tree(current_dir): - if current_dir != "": - try: - connection.cwd(current_dir) - except (IOError, error_perm): - cd_tree("/".join(current_dir.split("/")[:-1])) - connection.mkd(current_dir) - connection.cwd(current_dir) - dest_dirname = os.path.dirname(tmp_destination.path) tmp_basename = os.path.basename(tmp_destination.path) final_basename = os.path.basename(final_destination.path) - cd_tree(dest_dirname) + ensure_remote_dirs(connection, dest_dirname) try: connection.rename(tmp_basename, final_basename) except Exception as err: From e63b759585d0752140fb255a33b4cd5f03bef0a0 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 23:37:50 +0300 Subject: [PATCH 11/31] refactor(movers): extract final directory creation to common helper Replace duplicate final_dir handling in ScpMover and SftpMover finalize_atomic_transfer methods with shared ensure_final_directory_for_rename helper in _mover_utils. Removes ~34 lines of duplicate code for stat/mkdir loop logic. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 52 +++++++++++++ .venv/bin/python | 1 + .venv/bin/python3 | 1 + .venv/bin/python3.12 | 1 + .venv/pyvenv.cfg | 5 ++ README.md | 2 +- examples/dispatch.yaml | 2 +- plan.md | 9 +++ reviews/trollmoves-server-client-review.md | 89 ++++++++++++++++++++++ somefile | 0 trollmoves/_mover_utils.py | 27 +++++++ trollmoves/movers.py | 35 +-------- 12 files changed, 190 insertions(+), 34 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 120000 .venv/bin/python create mode 120000 .venv/bin/python3 create mode 120000 .venv/bin/python3.12 create mode 100644 .venv/pyvenv.cfg create mode 100644 plan.md create mode 100644 reviews/trollmoves-server-client-review.md create mode 100644 somefile diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..3f0b445 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,52 @@ +# Copilot instructions for pytroll/trollmoves + +Purpose +- Short guide for Copilot sessions to help with builds, tests, architecture, and repository-specific conventions. + +Build, test, and lint commands +- Create venv: python3 -m venv .venv && source .venv/bin/activate +- Install (editable): pip install -e . +- Install all extras: pip install -e .[all] +- Build sdist/wheel: python setup.py sdist bdist_wheel +- Run full test suite: pytest +- Run a single test: pytest path/to/test_file.py::test_function (e.g. pytest trollmoves/tests/test_example.py::test_xyz) +- Run tests matching name: pytest -k +- Lint: flake8 . (configured via setup.cfg; max-line-length=120) + +High-level architecture +- Core package: `trollmoves/` contains modules for Server, Client, Mirror, Dispatcher, Fetcher and Movers: + - server.py: watches directories and publishes announcements (Posttroll). + - client.py: subscribes and requests transfers from Server. + - mirror.py: bridge between internal/external networks. + - dispatcher.py: pushes local files to configured destinations. + - fetcher.py / s3downloader.py: fetch files from sources (S3, etc.). + - movers.py: implementations for transfer protocols (FileMover, ScpMover, SftpMover, S3Mover, FtpMover). +- Messaging: Posttroll is used to announce and request transfers; systems integrate by publishing/subscribing to Posttroll topics. +- Entry points & scripts: console script `pytroll-fetcher` and bin scripts (move_it_*.py, dispatcher.py, s3downloader.py) provide CLI access. +- Versioning: versioneer is used; version file is `trollmoves/version.py`, tag prefix `v`. + +Key conventions and repo specifics +- Extras groups: defined in setup.py under extras_require (e.g., 's3', 'server', 'remote_fs', 'fetcher', 'all'). Use these to install optional movers/protocol dependencies. +- S3Mover path semantics: trailing slash on destination means “keep original filename”; no trailing slash means the destination's last path segment is the new filename. +- Linting: setup.cfg contains flake8 rules (max-line-length 120) and ignores (RST303, W504). +- Tests: pytest is required; tests live under `trollmoves/tests` (or inside package as a tests package). Tests_require lists pytest-reraise and pytest-bdd when running behavior tests. +- Coverage/packaging: versioneer and version.py are excluded from coverage; packaging scripts and console entry points are defined in setup.py. + +Files of interest for Copilot +- README.md: project overview and mover details (used for architecture cues). +- setup.py / setup.cfg: install, extras, flake8, versioneer config. +- trollmoves/movers.py: look here for protocol-specific logic and S3 behavior. +- bin/* and entry_points: show CLI surface and expected runtime scripts. + +Existing AI assistant configs +- No CLAUDE.md, AIDER/CONVENTIONS, AGENTS.md, .cursorrules, .clinerules, or .windsurfrules were found in the repo; add references here if you add assistant-specific rules. + +When using Copilot in this repo +- Prefer locating behavior across multiple files: transfers are composed from Posttroll messages (server/client) + mover implementations. +- If changing protocol behavior, update movers.py and add integration tests exercising the server/client flow. +- Respect extras in setup.py when suggesting dependency additions: put optional deps in the correct extras group. + +Questions for maintainers +- If you want Copilot to follow project-specific coding or commit rules, add a .github/copilot-instructions.md extension or one of the recognized assistant files (e.g., CLAUDE.md) and include examples. + + diff --git a/.venv/bin/python b/.venv/bin/python new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/.venv/bin/python @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/.venv/bin/python3 b/.venv/bin/python3 new file mode 120000 index 0000000..ae65fda --- /dev/null +++ b/.venv/bin/python3 @@ -0,0 +1 @@ +/usr/bin/python3 \ No newline at end of file diff --git a/.venv/bin/python3.12 b/.venv/bin/python3.12 new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/.venv/bin/python3.12 @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg new file mode 100644 index 0000000..46e9be4 --- /dev/null +++ b/.venv/pyvenv.cfg @@ -0,0 +1,5 @@ +home = /usr/bin +include-system-site-packages = false +version = 3.12.3 +executable = /usr/bin/python3.12 +command = /usr/bin/python3 -m venv /home/pnuu/Software/pytroll/trollmoves/.venv diff --git a/README.md b/README.md index 51059e0..67010db 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ attrs dictionary. If true, movers will upload to a temporary destination (see tmp_prefix) and finalize the transfer by renaming/moving the tmp object to the final name after successful transfer. -- tmp_prefix: string (default: "\1") +- tmp_prefix: string (default: ".") Prefix to use for temporary filenames (e.g. ".filename"). S3-specific options diff --git a/examples/dispatch.yaml b/examples/dispatch.yaml index fd0e8c2..fd7f163 100644 --- a/examples/dispatch.yaml +++ b/examples/dispatch.yaml @@ -64,7 +64,7 @@ target-s3-example1: # Optional atomic transfer options for S3 mover: # s3_use_multipart: True # Prefer multipart upload (boto3 required) # s3_use_copy: False # If multipart not used, perform copy+delete to finalize tmp key - # tmp_prefix: "\1" # Prefix used for temporary keys (e.g. "\1") + # tmp_prefix: "." # Prefix used for temporary keys (e.g. "\1") # s3_multipart_chunksize: 8388608 # Chunk size in bytes for multipart uploads aliases: platform_name: diff --git a/plan.md b/plan.md new file mode 100644 index 0000000..b00a6e8 --- /dev/null +++ b/plan.md @@ -0,0 +1,9 @@ +# Plan for Feature PR + +- Feature commits were cherry-picked onto a new branch. +- All merge conflicts were resolved during the cherry-pick process. +- S3-related tests were run locally and all tests passed. + +**Next steps:** +- Push the new branch to the remote repository. +- Create a pull request for review and merging. diff --git a/reviews/trollmoves-server-client-review.md b/reviews/trollmoves-server-client-review.md new file mode 100644 index 0000000..01a74aa --- /dev/null +++ b/reviews/trollmoves-server-client-review.md @@ -0,0 +1,89 @@ +Trollmoves Server & Client Review + +Location: reviews/trollmoves-server-client-review.md + +Summary +- Target: analyze move_it Server and Client implementations in trollmoves/server.py and trollmoves/client.py. +- Focus: correctness, concurrency, message handling, error paths, configuration, and maintainability. + +1) Common observations (server + client) + +What works well +- Uses posttroll messaging (Message abstraction) consistently for announcing, requesting, and acknowledging transfers. +- Configuration parsing centralised (client.read_config) with defaults and parsing helpers for booleans, nameservers, and backup targets. +- Logging is pervasive and helpful for debugging restarts, timeouts and failures. +- Uses small, focused utilities in trollmoves.utils (clean_url, get_local_ips, gen_dict_extract/contains/translate) which simplifies message transformations. +- Movers architecture (trollmoves.movers) isolates protocol implementations (FileMover, FTP, SFTP, S3) behind a single move_it() API. + +Potential risks and suggestions +- Thread-per-request and thread-per-reply: RequestManager.reply_and_send and many other places spawn Threads liberally. In very busy systems this can exhaust resources. Consider using a ThreadPoolExecutor or a bounded worker pool to limit concurrency and reduce thread churn. +- Long/blocking operations inside threads: move_it can perform network/file IO and is called synchronously in request handling (RequestManager._move_file). Ensure worker pool sizing or offloading to dedicated worker threads/processes so ZMQ/REQ/ROUTER sockets aren’t blocked. +- Sparse validation of incoming messages: Message creation is wrapped and MessageError handled, but request payloads are often trusted (e.g., message.data['destination'] assumed present). Add clearer validation and explicit error responses for malformed messages. +- Use of global mutable caches (file_cache, ongoing_transfers): properly locked, but caches have large fixed sizes (deque maxlen). Consider exposing max-size from config and add eviction policy documentation. Also unit tests for concurrent access would be useful. +- Exception handling: many broad except Exception blocks log and continue; where safe, prefer targeted exceptions or re-raising after adding context so calling code can react (especially for transfer failures). + +2) Server-specific findings (trollmoves/server.py) + +What works well +- RequestManager separates sockets for incoming requests and inproc replies (ROUTER + PULL) and uses a Poller to multiplex IO. +- Deleter thread handles delayed deletion robustly and tolerates missing files (ignores ENOENT). +- _validate_file_pattern uses trollsift.globify to ensure requested files match configured origin patterns. +- _collect_cached_files exposes an info interface to report server state (files, uptime). + +Concerns & suggestions +- Address/threading model: RequestManager._process_request spawns a new Thread per message to call reply methods (pong/push/ack/info). Thread bursts for many concurrent requests risk resource exhaustion. Use a worker queue and a limited number of worker threads. +- Message send path: _send_multipart_reply creates a new PUSH socket on each reply. Reusing a pooled socket or keeping a persistent inproc socket would reduce object churn. +- _get_address_and_payload assumes multipart=3; it handles malformed messages but could return None and lose context. Add clearer validation and unit tests for various multipart shapes. +- _validate_requested_file is based on basename fnmatch only; if deeper path checks or symlink resolution needed, this may be bypassed. Consider resolving to absolute paths or using canonicalization when it's security-relevant. +- Deleter.add uses remove_delay from attrs; no max/min enforcement. Very large or negative values might cause odd behavior; validate config values. + +3) Client-specific findings (trollmoves/client.py) + +What works well +- Listener class handles subscription, heartbeat monitoring, and auto-restart logic and isolates the beat monitor. +- PushRequester implements retries and reconnection logic with jam detection (failures counter and jam flag), which gives resilience to unresponsive servers. +- Chain class manages publishers and listeners per-config chain; supports refreshing configuration and gracefully restarting only changed listeners. +- Transfer workflow: request_push -> _request_files -> unpack_and_create_local_message -> publish local message is well structured and modular. + +Concerns & suggestions +- _is_message_already_handled relies on side-effect handlers (_handle_push_message/_handle_ack_message) that mutate global caches. This coupling is subtle; document the expected ordering and side effects. Consider making explicit checks + actions in clearer steps. +- add_request_push_timer uses CTimer threads for hot-spare behavior; these are unbounded threads per timer. Consider using a scheduled single timer wheel or executor for scalability. +- send_request and PushRequester.send_and_recv use small polling intervals and busy loops; fine for responsiveness but could be tuned (and made configurable) to reduce CPU use under high load. +- unpack_xrit and unpack_bzip call external commands and IO; ensure subprocess timeouts and robust error reporting are in place. check_output raises RuntimeError(output) with raw bytes — wrap to provide clearer context. +- create_local_dir: for S3 returns None; downstream code expects local_dir sometimes—ensure callers handle None consistently. + +4) Mover & utility notes (trollmoves/movers.py, trollmoves/utils.py) + +- move_it uses MOVERS mapping by URL scheme; missing scheme raises KeyError -> logged and re-raised. Consider mapping unknown schemes to explicit error with suggestion list. +- Mover.get_connection and active connection management use CTimer to auto-close; this is good, but relies on active_connections being thread-safe with active_connection_lock — ensure tests cover connection churn. +- Utilities gen_dict_extract/gen_dict_contains are recursive and iterate nested dict/list structures; fine for message shapes but could be expensive for very large nested payloads. Consider short-circuiting or depth limits if necessary. + +5) Tests & coverage +- There are tests under trollmoves/tests, but add focused unit tests for: + - RequestManager: multipart payload parsing variations and invalid messages + - PushRequester: simulate timeouts and reconnection flows + - Deleter: confirm delayed removal and ENOENT handling + - Concurrency: concurrent add_to_ongoing_transfers and termination flows + - move_it with mock movers to test error propagation and destination path rewriting + +Actionable next steps (prioritized) +1. Replace Thread-per-request/reply with a bounded worker pool for RequestManager and reply handling. +2. Add unit tests for malformed messages and multipart shapes to ensure robust parsing in RequestManager._get_address_and_payload. +3. Make thread/timer creation for hot-spare behavior and push replies use a shared scheduled executor or thread pool. +4. Add explicit validation/error messages for missing keys (destination, request_address) in request handling code to make errors clearer to operators. +5. Make timeouts, retry counts, and cache sizes configurable via config file (document in README) and validate config bounds. + +References (files inspected) +- trollmoves/server.py +- trollmoves/client.py +- trollmoves/movers.py +- trollmoves/utils.py + + +--- + +Notes for maintainers +- If helpful, next iteration can include a small PR converting thread-per-request to ThreadPoolExecutor in RequestManager and demonstrating unit tests covering the reconnection & reply flows. +- If desired, a companion performance test that simulates N concurrent requests and measures thread usage and latencies can help choose pool sizes and timeouts. + +End of review. diff --git a/somefile b/somefile new file mode 100644 index 0000000..e69de29 diff --git a/trollmoves/_mover_utils.py b/trollmoves/_mover_utils.py index 9192ebe..d1d1c8f 100644 --- a/trollmoves/_mover_utils.py +++ b/trollmoves/_mover_utils.py @@ -90,3 +90,30 @@ def ensure_remote_dirs(connection, path): return raise TypeError("Unsupported connection type for ensure_remote_dirs") + + +def ensure_final_directory_for_rename(sftp_connection, final_destination_path): + """Ensure final directory exists for a rename operation on SFTP. + + Used in finalize_atomic_transfer operations to ensure the target directory + exists before renaming a temporary file to its final location. Attempts to + stat each path segment; creates with mkdir if it doesn't exist. Silently + ignores all errors to match existing SFTP behavior in movers. + """ + final_dir = os.path.dirname(final_destination_path) + if not final_dir: + return + + parts = final_dir.split("/") + path = "" + for p in parts: + if not p: + continue + path = path + "/" + p + try: + sftp_connection.stat(path) + except IOError: + try: + sftp_connection.mkdir(path) + except Exception: + pass diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 3c76533..5ac539a 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -45,7 +45,7 @@ boto3 = None from trollmoves.utils import clean_url -from ._mover_utils import ensure_remote_dirs, ensure_local_dir +from ._mover_utils import ensure_remote_dirs, ensure_local_dir, ensure_final_directory_for_rename S3_ALLOWED_SETTINGS = ["anon", "endpoint_url", "key", "secret", "token", "use_ssl", "s3_additional_kwargs", "client_kwargs", @@ -521,22 +521,7 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): sftp = None try: sftp = ssh_connection.open_sftp() - final_dir = os.path.dirname(final_destination.path) - # ensure final directory exists - if final_dir: - parts = final_dir.split("/") - path = "" - for p in parts: - if not p: - continue - path = path + "/" + p - try: - sftp.stat(path) - except IOError: - try: - sftp.mkdir(path) - except Exception: - pass + ensure_final_directory_for_rename(sftp, final_destination.path) sftp.rename(tmp_destination.path, final_destination.path) finally: if sftp is not None: @@ -582,21 +567,7 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): allow_agent=True, key_filename=self.attrs.get("ssh_private_key_file")) with ssh.open_sftp() as sftp: - final_dir = os.path.dirname(final_destination.path) - if final_dir: - parts = final_dir.split("/") - path = "" - for p in parts: - if not p: - continue - path = path + "/" + p - try: - sftp.stat(path) - except IOError: - try: - sftp.mkdir(path) - except Exception: - pass + ensure_final_directory_for_rename(sftp, final_destination.path) sftp.rename(tmp_destination.path, final_destination.path) self.destination = final_destination From 21638d153940dcb138e1d7d97c0eed6dc8c4e8e4 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 23:51:58 +0300 Subject: [PATCH 12/31] Remove files/dirs accidentally added and commited --- .github/copilot-instructions.md | 52 ------------- .venv/bin/python | 1 - .venv/bin/python3 | 1 - .venv/bin/python3.12 | 1 - .venv/pyvenv.cfg | 5 -- plan.md | 9 --- reviews/trollmoves-server-client-review.md | 89 ---------------------- somefile | 0 8 files changed, 158 deletions(-) delete mode 100644 .github/copilot-instructions.md delete mode 120000 .venv/bin/python delete mode 120000 .venv/bin/python3 delete mode 120000 .venv/bin/python3.12 delete mode 100644 .venv/pyvenv.cfg delete mode 100644 plan.md delete mode 100644 reviews/trollmoves-server-client-review.md delete mode 100644 somefile diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index 3f0b445..0000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,52 +0,0 @@ -# Copilot instructions for pytroll/trollmoves - -Purpose -- Short guide for Copilot sessions to help with builds, tests, architecture, and repository-specific conventions. - -Build, test, and lint commands -- Create venv: python3 -m venv .venv && source .venv/bin/activate -- Install (editable): pip install -e . -- Install all extras: pip install -e .[all] -- Build sdist/wheel: python setup.py sdist bdist_wheel -- Run full test suite: pytest -- Run a single test: pytest path/to/test_file.py::test_function (e.g. pytest trollmoves/tests/test_example.py::test_xyz) -- Run tests matching name: pytest -k -- Lint: flake8 . (configured via setup.cfg; max-line-length=120) - -High-level architecture -- Core package: `trollmoves/` contains modules for Server, Client, Mirror, Dispatcher, Fetcher and Movers: - - server.py: watches directories and publishes announcements (Posttroll). - - client.py: subscribes and requests transfers from Server. - - mirror.py: bridge between internal/external networks. - - dispatcher.py: pushes local files to configured destinations. - - fetcher.py / s3downloader.py: fetch files from sources (S3, etc.). - - movers.py: implementations for transfer protocols (FileMover, ScpMover, SftpMover, S3Mover, FtpMover). -- Messaging: Posttroll is used to announce and request transfers; systems integrate by publishing/subscribing to Posttroll topics. -- Entry points & scripts: console script `pytroll-fetcher` and bin scripts (move_it_*.py, dispatcher.py, s3downloader.py) provide CLI access. -- Versioning: versioneer is used; version file is `trollmoves/version.py`, tag prefix `v`. - -Key conventions and repo specifics -- Extras groups: defined in setup.py under extras_require (e.g., 's3', 'server', 'remote_fs', 'fetcher', 'all'). Use these to install optional movers/protocol dependencies. -- S3Mover path semantics: trailing slash on destination means “keep original filename”; no trailing slash means the destination's last path segment is the new filename. -- Linting: setup.cfg contains flake8 rules (max-line-length 120) and ignores (RST303, W504). -- Tests: pytest is required; tests live under `trollmoves/tests` (or inside package as a tests package). Tests_require lists pytest-reraise and pytest-bdd when running behavior tests. -- Coverage/packaging: versioneer and version.py are excluded from coverage; packaging scripts and console entry points are defined in setup.py. - -Files of interest for Copilot -- README.md: project overview and mover details (used for architecture cues). -- setup.py / setup.cfg: install, extras, flake8, versioneer config. -- trollmoves/movers.py: look here for protocol-specific logic and S3 behavior. -- bin/* and entry_points: show CLI surface and expected runtime scripts. - -Existing AI assistant configs -- No CLAUDE.md, AIDER/CONVENTIONS, AGENTS.md, .cursorrules, .clinerules, or .windsurfrules were found in the repo; add references here if you add assistant-specific rules. - -When using Copilot in this repo -- Prefer locating behavior across multiple files: transfers are composed from Posttroll messages (server/client) + mover implementations. -- If changing protocol behavior, update movers.py and add integration tests exercising the server/client flow. -- Respect extras in setup.py when suggesting dependency additions: put optional deps in the correct extras group. - -Questions for maintainers -- If you want Copilot to follow project-specific coding or commit rules, add a .github/copilot-instructions.md extension or one of the recognized assistant files (e.g., CLAUDE.md) and include examples. - - diff --git a/.venv/bin/python b/.venv/bin/python deleted file mode 120000 index b8a0adb..0000000 --- a/.venv/bin/python +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/.venv/bin/python3 b/.venv/bin/python3 deleted file mode 120000 index ae65fda..0000000 --- a/.venv/bin/python3 +++ /dev/null @@ -1 +0,0 @@ -/usr/bin/python3 \ No newline at end of file diff --git a/.venv/bin/python3.12 b/.venv/bin/python3.12 deleted file mode 120000 index b8a0adb..0000000 --- a/.venv/bin/python3.12 +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg deleted file mode 100644 index 46e9be4..0000000 --- a/.venv/pyvenv.cfg +++ /dev/null @@ -1,5 +0,0 @@ -home = /usr/bin -include-system-site-packages = false -version = 3.12.3 -executable = /usr/bin/python3.12 -command = /usr/bin/python3 -m venv /home/pnuu/Software/pytroll/trollmoves/.venv diff --git a/plan.md b/plan.md deleted file mode 100644 index b00a6e8..0000000 --- a/plan.md +++ /dev/null @@ -1,9 +0,0 @@ -# Plan for Feature PR - -- Feature commits were cherry-picked onto a new branch. -- All merge conflicts were resolved during the cherry-pick process. -- S3-related tests were run locally and all tests passed. - -**Next steps:** -- Push the new branch to the remote repository. -- Create a pull request for review and merging. diff --git a/reviews/trollmoves-server-client-review.md b/reviews/trollmoves-server-client-review.md deleted file mode 100644 index 01a74aa..0000000 --- a/reviews/trollmoves-server-client-review.md +++ /dev/null @@ -1,89 +0,0 @@ -Trollmoves Server & Client Review - -Location: reviews/trollmoves-server-client-review.md - -Summary -- Target: analyze move_it Server and Client implementations in trollmoves/server.py and trollmoves/client.py. -- Focus: correctness, concurrency, message handling, error paths, configuration, and maintainability. - -1) Common observations (server + client) - -What works well -- Uses posttroll messaging (Message abstraction) consistently for announcing, requesting, and acknowledging transfers. -- Configuration parsing centralised (client.read_config) with defaults and parsing helpers for booleans, nameservers, and backup targets. -- Logging is pervasive and helpful for debugging restarts, timeouts and failures. -- Uses small, focused utilities in trollmoves.utils (clean_url, get_local_ips, gen_dict_extract/contains/translate) which simplifies message transformations. -- Movers architecture (trollmoves.movers) isolates protocol implementations (FileMover, FTP, SFTP, S3) behind a single move_it() API. - -Potential risks and suggestions -- Thread-per-request and thread-per-reply: RequestManager.reply_and_send and many other places spawn Threads liberally. In very busy systems this can exhaust resources. Consider using a ThreadPoolExecutor or a bounded worker pool to limit concurrency and reduce thread churn. -- Long/blocking operations inside threads: move_it can perform network/file IO and is called synchronously in request handling (RequestManager._move_file). Ensure worker pool sizing or offloading to dedicated worker threads/processes so ZMQ/REQ/ROUTER sockets aren’t blocked. -- Sparse validation of incoming messages: Message creation is wrapped and MessageError handled, but request payloads are often trusted (e.g., message.data['destination'] assumed present). Add clearer validation and explicit error responses for malformed messages. -- Use of global mutable caches (file_cache, ongoing_transfers): properly locked, but caches have large fixed sizes (deque maxlen). Consider exposing max-size from config and add eviction policy documentation. Also unit tests for concurrent access would be useful. -- Exception handling: many broad except Exception blocks log and continue; where safe, prefer targeted exceptions or re-raising after adding context so calling code can react (especially for transfer failures). - -2) Server-specific findings (trollmoves/server.py) - -What works well -- RequestManager separates sockets for incoming requests and inproc replies (ROUTER + PULL) and uses a Poller to multiplex IO. -- Deleter thread handles delayed deletion robustly and tolerates missing files (ignores ENOENT). -- _validate_file_pattern uses trollsift.globify to ensure requested files match configured origin patterns. -- _collect_cached_files exposes an info interface to report server state (files, uptime). - -Concerns & suggestions -- Address/threading model: RequestManager._process_request spawns a new Thread per message to call reply methods (pong/push/ack/info). Thread bursts for many concurrent requests risk resource exhaustion. Use a worker queue and a limited number of worker threads. -- Message send path: _send_multipart_reply creates a new PUSH socket on each reply. Reusing a pooled socket or keeping a persistent inproc socket would reduce object churn. -- _get_address_and_payload assumes multipart=3; it handles malformed messages but could return None and lose context. Add clearer validation and unit tests for various multipart shapes. -- _validate_requested_file is based on basename fnmatch only; if deeper path checks or symlink resolution needed, this may be bypassed. Consider resolving to absolute paths or using canonicalization when it's security-relevant. -- Deleter.add uses remove_delay from attrs; no max/min enforcement. Very large or negative values might cause odd behavior; validate config values. - -3) Client-specific findings (trollmoves/client.py) - -What works well -- Listener class handles subscription, heartbeat monitoring, and auto-restart logic and isolates the beat monitor. -- PushRequester implements retries and reconnection logic with jam detection (failures counter and jam flag), which gives resilience to unresponsive servers. -- Chain class manages publishers and listeners per-config chain; supports refreshing configuration and gracefully restarting only changed listeners. -- Transfer workflow: request_push -> _request_files -> unpack_and_create_local_message -> publish local message is well structured and modular. - -Concerns & suggestions -- _is_message_already_handled relies on side-effect handlers (_handle_push_message/_handle_ack_message) that mutate global caches. This coupling is subtle; document the expected ordering and side effects. Consider making explicit checks + actions in clearer steps. -- add_request_push_timer uses CTimer threads for hot-spare behavior; these are unbounded threads per timer. Consider using a scheduled single timer wheel or executor for scalability. -- send_request and PushRequester.send_and_recv use small polling intervals and busy loops; fine for responsiveness but could be tuned (and made configurable) to reduce CPU use under high load. -- unpack_xrit and unpack_bzip call external commands and IO; ensure subprocess timeouts and robust error reporting are in place. check_output raises RuntimeError(output) with raw bytes — wrap to provide clearer context. -- create_local_dir: for S3 returns None; downstream code expects local_dir sometimes—ensure callers handle None consistently. - -4) Mover & utility notes (trollmoves/movers.py, trollmoves/utils.py) - -- move_it uses MOVERS mapping by URL scheme; missing scheme raises KeyError -> logged and re-raised. Consider mapping unknown schemes to explicit error with suggestion list. -- Mover.get_connection and active connection management use CTimer to auto-close; this is good, but relies on active_connections being thread-safe with active_connection_lock — ensure tests cover connection churn. -- Utilities gen_dict_extract/gen_dict_contains are recursive and iterate nested dict/list structures; fine for message shapes but could be expensive for very large nested payloads. Consider short-circuiting or depth limits if necessary. - -5) Tests & coverage -- There are tests under trollmoves/tests, but add focused unit tests for: - - RequestManager: multipart payload parsing variations and invalid messages - - PushRequester: simulate timeouts and reconnection flows - - Deleter: confirm delayed removal and ENOENT handling - - Concurrency: concurrent add_to_ongoing_transfers and termination flows - - move_it with mock movers to test error propagation and destination path rewriting - -Actionable next steps (prioritized) -1. Replace Thread-per-request/reply with a bounded worker pool for RequestManager and reply handling. -2. Add unit tests for malformed messages and multipart shapes to ensure robust parsing in RequestManager._get_address_and_payload. -3. Make thread/timer creation for hot-spare behavior and push replies use a shared scheduled executor or thread pool. -4. Add explicit validation/error messages for missing keys (destination, request_address) in request handling code to make errors clearer to operators. -5. Make timeouts, retry counts, and cache sizes configurable via config file (document in README) and validate config bounds. - -References (files inspected) -- trollmoves/server.py -- trollmoves/client.py -- trollmoves/movers.py -- trollmoves/utils.py - - ---- - -Notes for maintainers -- If helpful, next iteration can include a small PR converting thread-per-request to ThreadPoolExecutor in RequestManager and demonstrating unit tests covering the reconnection & reply flows. -- If desired, a companion performance test that simulates N concurrent requests and measures thread usage and latencies can help choose pool sizes and timeouts. - -End of review. diff --git a/somefile b/somefile deleted file mode 100644 index e69de29..0000000 From 21ad7e784b7df59a9f91c82910a6a43913be3195 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 25 Apr 2026 23:56:10 +0300 Subject: [PATCH 13/31] Remove header --- trollmoves/movers.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 5ac539a..b812113 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -1,26 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2012-2023 -# -# Author(s): -# -# Martin Raspaud -# Panu Lahtinen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - """Movers for the move_it scripts.""" import logging From a9668cb1be192bf0c0bae05b8cf87050e0adccc6 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sun, 26 Apr 2026 00:02:02 +0300 Subject: [PATCH 14/31] refactor(mover_utils): extract FTP/SFTP handlers to private methods Split ensure_remote_dirs() into three focused functions: - _ensure_remote_dirs_ftp(): Handles FTP-like API with fast path optimization - _ensure_remote_dirs_sftp(): Handles SFTP-like API with simple loop - ensure_remote_dirs(): Dispatcher for input validation and type routing Benefits: - Improved readability: Main function is now a clear dispatcher - Better testability: Each API handler is independently testable - Reduced complexity: Lower cyclomatic complexity in main dispatcher - Easier maintenance: Changes to one API isolated from the other No behavioral changes - all tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/_mover_utils.py | 107 ++++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 43 deletions(-) diff --git a/trollmoves/_mover_utils.py b/trollmoves/_mover_utils.py index d1d1c8f..c1d5011 100644 --- a/trollmoves/_mover_utils.py +++ b/trollmoves/_mover_utils.py @@ -22,6 +22,68 @@ def ensure_local_dir(path): os.makedirs(dirname, exist_ok=True) +def _ensure_remote_dirs_ftp(connection, parts): + """Handle FTP-like directory creation (internal helper). + + Implements fast path optimization followed by fallback loop. + For FTP connections: try single cwd(full_path) first; if fails, + iterate through parts creating missing directories as needed. + """ + path = "/" + "/".join(parts) + + # Fast path: if the full path already exists, a single cwd is sufficient + try: + connection.cwd(path) + return + except Exception: + pass + + # Build path from root, creating missing segments and only cd'ing when needed + current = "" + for part in parts: + current = current + "/" + part + try: + connection.cwd(current) + except Exception: + # try to create the directory; accept failures silently and proceed + try: + connection.mkd(current) + except Exception: + try: + connection.mkd(part) + except Exception: + pass + # after creating, change into it + try: + connection.cwd(current) + except Exception: + try: + connection.cwd(part) + except Exception: + pass + + +def _ensure_remote_dirs_sftp(connection, parts): + """Handle SFTP-like directory creation (internal helper). + + For SFTP connections: iterate through path parts, stat each path segment + and create with mkdir if it doesn't exist. Silently ignore all errors. + """ + current = "" + for part in parts: + current = current + "/" + part + try: + connection.stat(current) + except Exception: + try: + connection.mkdir(current) + except Exception: + try: + connection.mkdir(part) + except Exception: + pass + + def ensure_remote_dirs(connection, path): """Ensure directories exist on a remote connection. @@ -40,53 +102,12 @@ def ensure_remote_dirs(connection, path): # FTP-like API if hasattr(connection, "cwd") and hasattr(connection, "mkd"): - # Fast path: if the full path already exists, a single cwd is sufficient - try: - connection.cwd(path) - return - except Exception: - pass - - # Build path from root, creating missing segments and only cd'ing when needed - current = "" - for part in parts: - current = current + "/" + part - try: - connection.cwd(current) - except Exception: - # try to create the directory; accept failures silently and proceed - try: - connection.mkd(current) - except Exception: - try: - connection.mkd(part) - except Exception: - pass - # after creating, change into it - try: - connection.cwd(current) - except Exception: - try: - connection.cwd(part) - except Exception: - pass + _ensure_remote_dirs_ftp(connection, parts) return # SFTP-like API (paramiko.SFTPClient) if hasattr(connection, "stat") and hasattr(connection, "mkdir"): - current = "" - for part in parts: - current = current + "/" + part - try: - connection.stat(current) - except Exception: - try: - connection.mkdir(current) - except Exception: - try: - connection.mkdir(part) - except Exception: - pass + _ensure_remote_dirs_sftp(connection, parts) return raise TypeError("Unsupported connection type for ensure_remote_dirs") From 218a89d4064e8286c9da064f831f62db694a2e6a Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sun, 26 Apr 2026 00:20:06 +0300 Subject: [PATCH 15/31] Remove connection parameters from client config example, they are not used --- examples/dispatch.yaml | 2 +- examples/move_it_client.ini | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/examples/dispatch.yaml b/examples/dispatch.yaml index fd7f163..dfb3f9a 100644 --- a/examples/dispatch.yaml +++ b/examples/dispatch.yaml @@ -64,7 +64,7 @@ target-s3-example1: # Optional atomic transfer options for S3 mover: # s3_use_multipart: True # Prefer multipart upload (boto3 required) # s3_use_copy: False # If multipart not used, perform copy+delete to finalize tmp key - # tmp_prefix: "." # Prefix used for temporary keys (e.g. "\1") + # tmp_prefix: "." # Prefix used for temporary keys (e.g. ".") # s3_multipart_chunksize: 8388608 # Chunk size in bytes for multipart uploads aliases: platform_name: diff --git a/examples/move_it_client.ini b/examples/move_it_client.ini index bcf1297..926f464 100644 --- a/examples/move_it_client.ini +++ b/examples/move_it_client.ini @@ -105,12 +105,4 @@ processing_delay = 0.02 providers = satmottag:9010 satmottag2:9010 explorer:9010 # Push to base of the bucket destination = s3://data-bucket/ -# Optional atomic transfer options (passed under connection_parameters): -# connection_parameters__use_tmp_on_transfer = True -# connection_parameters__tmp_prefix = . -# connection_parameters__s3_use_multipart = True -# connection_parameters__s3_use_copy = False -# connection_parameters__s3_multipart_chunksize = 8388608 -# Alternatively a sub-directory within the bucket. The directory structure will be created if it doesn't exist -# destination = s3://data-bucket/msg/0deg topic = /1b/hrit-segment/0deg From 3e2f2cbf0d1065d85aef97232758591e6a038bb5 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sun, 26 Apr 2026 00:36:46 +0300 Subject: [PATCH 16/31] Clarify readme on temporary filenames --- README.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 67010db..9106b1b 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,14 @@ the messages it publishes, along with a json representation of a `fsspec` filesystem. From there, processes accepting these (eg `trollflow2`) will be able to use `fsspec` to read and process the remote files. +#### Using initial temporary filenames + +To avoid exposing partially-uploaded files, movers can be configured to upload +first to a temporary name and be renamed/activated only after the transfer +completes. See [the mover section](#Using temporary-initial-filenames-in-transfers) +and `examples/move_it_server.ini` for details and examples. + + ### Trollmoves Client Trollmoves Client is configured to subscribe to a specific topic, and to make requests @@ -95,6 +103,7 @@ Required libraries: In addition, the required packages for the transfer protocol(s) to be used. See the mover documentation below for more details. + ## Individual movers The individual movers can be used via the above listed processes, or used directly @@ -146,12 +155,12 @@ analogous to moving a file from one directory to a new destination changing the filename. The new destination filename will be the last part of the provided destination following the last slash ('/'). -New atomic-transfer options (opt-in) +### Using temporary initial filenames in transfers -To avoid exposing partially-uploaded files, movers can be configured to upload -first to a temporary name and be renamed/activated only after the transfer -completes. These options are passed via the mover's connection_parameters or -attrs dictionary. +It is possible to first transfer the files to temporary filenames and +renamed after the transfer. This can be helpful if the consumer does +not use Posttroll messaging to avoid premature reads. These options +are passed via the mover's connection_parameters or attrs dictionary. - use_tmp_on_transfer: boolean (default: False) If true, movers will upload to a temporary destination (see tmp_prefix) From 90e000a06469ad63508271fb018c79334de18faf Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sun, 26 Apr 2026 00:38:50 +0300 Subject: [PATCH 17/31] Re-add client config option example --- examples/move_it_client.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/move_it_client.ini b/examples/move_it_client.ini index 926f464..07faf54 100644 --- a/examples/move_it_client.ini +++ b/examples/move_it_client.ini @@ -105,4 +105,6 @@ processing_delay = 0.02 providers = satmottag:9010 satmottag2:9010 explorer:9010 # Push to base of the bucket destination = s3://data-bucket/ +# Alternatively a sub-directory within the bucket. The directory structure will be created if it doesn't exist +# destination = s3://data-bucket/msg/0deg topic = /1b/hrit-segment/0deg From 89616c3fa761cc6c4d94f91270a23f02948d8242 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sun, 26 Apr 2026 11:08:18 +0300 Subject: [PATCH 18/31] Do not include AI reviews directory in the repo --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 22ef5a7..53b80e8 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,6 @@ CHANGELOG.temp # vi / vim swp files *.swp + +# AI reviews +reviews/ From b30c33cb8399c7bf87a9ca7777836bb7216d1f0a Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sun, 26 Apr 2026 18:30:24 +0300 Subject: [PATCH 19/31] Add AGENTS.md --- AGENTS.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 5 +++++ 2 files changed, 51 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..296746b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,46 @@ +# Instructions for AI tools on pytroll/trollmoves + +Purpose +- Short guide for AI sessions to help with builds, tests, architecture, and repository-specific conventions. + +Build, test, and lint commands +- Ask the user which Python to use +- Install (editable): pip install -e . +- Install all extras: pip install -e .[all] +- Build sdist/wheel: python setup.py sdist bdist_wheel +- Run full test suite: pytest +- Run a single test: pytest path/to/test_file.py::test_function (e.g. pytest trollmoves/tests/test_example.py::test_xyz) +- Run tests matching name: pytest -k +- Lint: flake8 . (configured via setup.cfg; max-line-length=120) + +High-level architecture +- Core package: `trollmoves/` contains modules for Server, Client, Mirror, Dispatcher, Fetcher and Movers: + - server.py: watches directories and publishes announcements (Posttroll). + - client.py: subscribes and requests transfers from Server. + - mirror.py: bridge between internal/external networks. + - dispatcher.py: pushes local files to configured destinations. + - fetcher.py / s3downloader.py: fetch files from sources (S3, etc.). + - movers.py: implementations for transfer protocols (FileMover, ScpMover, SftpMover, S3Mover, FtpMover). +- Messaging: Posttroll is used to announce and request transfers; systems integrate by publishing/subscribing to Posttroll topics. +- Entry points & scripts: console script `pytroll-fetcher` and bin scripts (move_it_*.py, dispatcher.py, s3downloader.py) provide CLI access. +- Versioning: versioneer is used; version file is `trollmoves/version.py`, tag prefix `v`. + +Key conventions and repo specifics +- Extras groups: defined in setup.py under extras_require (e.g., 's3', 'server', 'remote_fs', 'fetcher', 'all'). Use these to install optional movers/protocol dependencies. +- S3Mover path semantics: trailing slash on destination means “keep original filename”; no trailing slash means the destination's last path segment is the new filename. +- Linting: setup.cfg contains flake8 rules (max-line-length 120) and ignores (RST303, W504). +- Tests: pytest is required; tests live under `trollmoves/tests` (or inside package as a tests package). Tests_require lists pytest-reraise and pytest-bdd when running behavior tests. +- Coverage/packaging: versioneer and version.py are excluded from coverage; packaging scripts and console entry points are defined in setup.py. + +Files of interest for AI tools +- README.md: project overview and mover details (used for architecture cues). +- setup.py / setup.cfg: install, extras, flake8, versioneer config. +- trollmoves/movers.py: look here for protocol-specific logic and S3 behavior. +- bin/* and entry_points: show CLI surface and expected runtime scripts. + +When using AI tools in this repo +- Prefer locating behavior across multiple files: transfers are composed from Posttroll messages (server/client) + mover implementations. +- If changing protocol behavior, update movers.py and add integration tests exercising the server/client flow. +- Respect extras in setup.py when suggesting dependency additions: put optional deps in the correct extras group. + + diff --git a/pyproject.toml b/pyproject.toml index 983e52c..0633d6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,3 +98,8 @@ convention = "google" [tool.ruff.lint.mccabe] max-complexity = 10 + +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", +] From abaf4f374ee3555a8fdb29451a41aee1a82c5018 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 10:11:13 +0300 Subject: [PATCH 20/31] Add tests for use_tmp workflow and finalize_atomic_transfer methods - Add test_movers_use_tmp.py with 19 tests covering: - Mover.tmp_destination_for static method (Groups A-B) - FileMover finalize and full move_it() round-trips with real files (Group C) - ScpMover and SftpMover finalize and move_it() using real localhost SSH (Groups D-E) - FtpMover finalize with mocked FTP connection (Group F) - S3Mover finalize for copy-mode, multipart-mode and unconfigured case (Group G) - Fix S3Mover.finalize_atomic_transfer: bucket (netloc) was dropped from the tmp-destination path, causing s3.copy/rm to operate on wrong keys - Fix pre-existing lint issues in S3Mover.copy: unused variable, long lines Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 36 ++- trollmoves/tests/test_movers_use_tmp.py | 371 ++++++++++++++++++++++++ 2 files changed, 398 insertions(+), 9 deletions(-) create mode 100644 trollmoves/tests/test_movers_use_tmp.py diff --git a/trollmoves/movers.py b/trollmoves/movers.py index b812113..cde9b88 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -8,7 +8,7 @@ import sys import time import traceback -from ftplib import FTP, all_errors, error_perm +from ftplib import FTP, all_errors from threading import Event, Lock, Thread, current_thread from urllib.parse import urlparse @@ -22,7 +22,8 @@ boto3 = None from trollmoves.utils import clean_url -from ._mover_utils import ensure_remote_dirs, ensure_local_dir, ensure_final_directory_for_rename + +from ._mover_utils import ensure_final_directory_for_rename, ensure_remote_dirs S3_ALLOWED_SETTINGS = ["anon", "endpoint_url", "key", "secret", "token", "use_ssl", "s3_additional_kwargs", "client_kwargs", @@ -623,7 +624,6 @@ def copy(self): raise ImportError("S3Mover requires 's3fs' or 'boto3' to be installed.") use_multipart = bool(self.attrs.get("s3_use_multipart", False)) - use_copy = bool(self.attrs.get("s3_use_copy", False)) tmp_prefix = self.attrs.get("tmp_prefix", ".") # Destination path inside bucket (bucket/key or bucket/dir/file) @@ -649,10 +649,16 @@ def copy(self): final_key = key # Build boto3 client with optional client_kwargs or credentials - boto_kwargs = dict(self.attrs.get("client_kwargs", {})) if isinstance(self.attrs.get("client_kwargs", {}), dict) else {} + client_kwargs = self.attrs.get("client_kwargs", {}) + boto_kwargs = dict(client_kwargs) if isinstance(client_kwargs, dict) else {} if self.attrs.get("key") and self.attrs.get("secret"): # Use explicit credentials - client = boto3.client("s3", aws_access_key_id=self.attrs.get("key"), aws_secret_access_key=self.attrs.get("secret"), **boto_kwargs) + client = boto3.client( + "s3", + aws_access_key_id=self.attrs.get("key"), + aws_secret_access_key=self.attrs.get("secret"), + **boto_kwargs, + ) else: client = boto3.client("s3", **boto_kwargs) @@ -668,10 +674,16 @@ def copy(self): data = f.read(chunk_size) if not data: break - resp = client.upload_part(Bucket=bucket, Key=final_key, PartNumber=part_number, UploadId=upload_id, Body=data) + resp = client.upload_part( + Bucket=bucket, Key=final_key, PartNumber=part_number, + UploadId=upload_id, Body=data, + ) parts.append({"ETag": resp["ETag"], "PartNumber": part_number}) part_number += 1 - client.complete_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id, MultipartUpload={"Parts": parts}) + client.complete_multipart_upload( + Bucket=bucket, Key=final_key, UploadId=upload_id, + MultipartUpload={"Parts": parts}, + ) except Exception as e: LOGGER.exception("Multipart upload failed: %s", str(e)) try: @@ -726,7 +738,12 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): use_copy = bool(self.attrs.get("s3_use_copy", False)) tmp_prefix = self.attrs.get("tmp_prefix", ".") - destination_file_path = tmp_destination and (tmp_destination.path.lstrip("/")) or self._get_destination() + if tmp_destination: + _bucket = tmp_destination.netloc + _key = tmp_destination.path.lstrip("/") + destination_file_path = _bucket + "/" + _key if _key else _bucket + else: + destination_file_path = self._get_destination() # derive bucket and keys parts = destination_file_path.split("/") if len(parts) == 1: @@ -764,7 +781,8 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): if boto3 is None: raise ImportError("No S3 backend available for copy+delete finalize") # boto3 copy_object and delete_object - boto_kwargs = dict(self.attrs.get("client_kwargs", {})) if isinstance(self.attrs.get("client_kwargs", {}), dict) else {} + client_kwargs = self.attrs.get("client_kwargs", {}) + boto_kwargs = dict(client_kwargs) if isinstance(client_kwargs, dict) else {} client = boto3.client("s3", **boto_kwargs) copy_source = {"Bucket": bucket, "Key": tmp_key} client.copy_object(CopySource=copy_source, Bucket=bucket, Key=final_key) diff --git a/trollmoves/tests/test_movers_use_tmp.py b/trollmoves/tests/test_movers_use_tmp.py new file mode 100644 index 0000000..1b55592 --- /dev/null +++ b/trollmoves/tests/test_movers_use_tmp.py @@ -0,0 +1,371 @@ +"""Tests for the use_tmp workflow and finalize_atomic_transfer methods in movers.""" + +from unittest.mock import MagicMock, patch +from urllib.parse import urlparse + +import pytest + +# --------------------------------------------------------------------------- +# Helpers shared across SSH-based tests +# --------------------------------------------------------------------------- + +def _patch_ssh_for_auto_add_policy(monkeypatch): + """Patch paramiko.SSHClient to accept any host key (AutoAddPolicy).""" + import paramiko + OrigSSHClient = paramiko.SSHClient + + def _new_ssh_client(*args, **kwargs): + client = OrigSSHClient(*args, **kwargs) + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + return client + + monkeypatch.setattr(paramiko, "SSHClient", _new_ssh_client) + + +@pytest.fixture +def source_file(tmp_path): + """A small source file used as the origin for mover tests.""" + path = tmp_path / "source" / "data.txt" + path.parent.mkdir(parents=True) + path.write_text("hello atomic transfer") + return path + + +# =========================================================================== +# Group A – Mover.tmp_destination_for (pure static method, no I/O) +# =========================================================================== + +def test_tmp_destination_for_default_prefix(): + """Default prefix '.' is prepended to the basename.""" + from trollmoves.movers import Mover + dest = urlparse("/some/dir/file.txt") + tmp = Mover.tmp_destination_for(dest, ".") + assert tmp.path == "/some/dir/.file.txt" + assert tmp.scheme == dest.scheme + + +def test_tmp_destination_for_custom_prefix(): + """Custom prefix is prepended to the basename.""" + from trollmoves.movers import Mover + dest = urlparse("file:///some/dir/data.nc") + tmp = Mover.tmp_destination_for(dest, "_tmp_") + assert tmp.path == "/some/dir/_tmp_data.nc" + + +def test_tmp_destination_for_non_path_dest(): + """Fallback: if dest has no .path attribute, dest is returned unchanged.""" + from trollmoves.movers import Mover + + class _NoDotPath: + pass + + obj = _NoDotPath() + result = Mover.tmp_destination_for(obj, ".") + assert result is obj + + +# =========================================================================== +# Group B – Mover base class finalize_atomic_transfer +# =========================================================================== + +def test_base_mover_finalize_raises_for_pathless_dest(): + """Base Mover.finalize_atomic_transfer raises NotImplementedError when + tmp_destination has no .path attribute.""" + from trollmoves.movers import Mover + + class _NoPath: + pass + + mover = object.__new__(Mover) + mover.destination = urlparse("/some/path") + with pytest.raises(NotImplementedError): + mover.finalize_atomic_transfer(_NoPath(), urlparse("/other/path")) + + +# =========================================================================== +# Group C – FileMover (real files on local filesystem) +# =========================================================================== + +def test_file_mover_finalize_atomic_transfer_renames(tmp_path): + """finalize_atomic_transfer renames tmp file to final, preserving content.""" + from trollmoves.movers import FileMover + + tmp_file = tmp_path / ".data.txt" + tmp_file.write_text("content") + final_file = tmp_path / "data.txt" + + mover = FileMover(str(tmp_file), str(tmp_file)) + tmp_dest = urlparse(str(tmp_file)) + final_dest = urlparse(str(final_file)) + + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + assert final_file.read_text() == "content" + assert not tmp_file.exists() + + +def test_file_mover_finalize_creates_missing_final_dir(tmp_path): + """finalize_atomic_transfer creates the final directory if it is absent.""" + from trollmoves.movers import FileMover + + tmp_file = tmp_path / ".data.txt" + tmp_file.write_text("data") + final_dir = tmp_path / "subdir" / "deep" + final_file = final_dir / "data.txt" + + mover = FileMover(str(tmp_file), str(tmp_file)) + mover.finalize_atomic_transfer(urlparse(str(tmp_file)), urlparse(str(final_file))) + + assert final_file.exists() + assert not tmp_file.exists() + + +def test_file_mover_finalize_updates_destination(tmp_path): + """finalize_atomic_transfer updates mover.destination to the final dest.""" + from trollmoves.movers import FileMover + + tmp_file = tmp_path / ".data.txt" + tmp_file.write_text("x") + final_file = tmp_path / "data.txt" + + mover = FileMover(str(tmp_file), str(tmp_file)) + final_dest = urlparse(str(final_file)) + mover.finalize_atomic_transfer(urlparse(str(tmp_file)), final_dest) + + assert mover.destination == final_dest + + +def test_move_it_use_tmp_file_scheme(tmp_path, source_file): + """Full use_tmp round-trip via move_it() with empty (local) scheme.""" + from trollmoves.movers import move_it + + dest_dir = tmp_path / "dest" + dest_dir.mkdir() + destination = str(dest_dir / "data.txt") + attrs = {"use_tmp_on_transfer": True} + + returned = move_it(str(source_file), destination, attrs=attrs) + + assert (dest_dir / "data.txt").exists() + assert not (dest_dir / ".data.txt").exists(), "tmp file should be cleaned up" + assert returned == urlparse(destination) + + +def test_move_it_use_tmp_custom_prefix(tmp_path, source_file): + """Full use_tmp round-trip via move_it() with a custom tmp_prefix.""" + from trollmoves.movers import move_it + + dest_dir = tmp_path / "dest" + dest_dir.mkdir() + destination = str(dest_dir / "data.txt") + attrs = {"use_tmp_on_transfer": True, "tmp_prefix": "incoming_"} + + move_it(str(source_file), destination, attrs=attrs) + + assert (dest_dir / "data.txt").exists() + assert not (dest_dir / "incoming_data.txt").exists() + + +def test_move_it_use_tmp_cleanup_on_finalize_error(tmp_path, source_file, monkeypatch): + """When finalize_atomic_transfer raises, move_it removes the tmp file.""" + from trollmoves.movers import FileMover, move_it + + dest_dir = tmp_path / "dest" + dest_dir.mkdir() + destination = str(dest_dir / "data.txt") + attrs = {"use_tmp_on_transfer": True} + + monkeypatch.setattr(FileMover, "finalize_atomic_transfer", + lambda *_: (_ for _ in ()).throw(NotImplementedError("simulated"))) + + with pytest.raises(NotImplementedError): + move_it(str(source_file), destination, attrs=attrs) + + # The tmp file must have been cleaned up + assert not (dest_dir / ".data.txt").exists() + # The final destination should not exist either + assert not (dest_dir / "data.txt").exists() + + +# =========================================================================== +# Group D – ScpMover (real localhost SSH) +# =========================================================================== + +@pytest.mark.slow +def test_scp_mover_finalize_atomic_transfer(tmp_path, monkeypatch): + """ScpMover.finalize_atomic_transfer performs a remote SFTP rename on localhost.""" + from trollmoves.movers import ScpMover + + _patch_ssh_for_auto_add_policy(monkeypatch) + + tmp_file = tmp_path / ".data.txt" + tmp_file.write_text("scp finalize") + final_file = tmp_path / "data.txt" + + # Origin and initial destination path don't matter for finalize; only hostname does + mover = ScpMover(str(tmp_file), f"scp://localhost{tmp_path}/data.txt") + + tmp_dest = urlparse(f"scp://localhost{tmp_path}/.data.txt") + final_dest = urlparse(f"scp://localhost{tmp_path}/data.txt") + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + assert final_file.read_text() == "scp finalize" + assert not tmp_file.exists() + assert mover.destination == final_dest + + +@pytest.mark.slow +def test_move_it_use_tmp_scp_scheme(tmp_path, source_file, monkeypatch): + """Full use_tmp round-trip via move_it() with scp://localhost destination.""" + from trollmoves.movers import move_it + + _patch_ssh_for_auto_add_policy(monkeypatch) + + dest_dir = tmp_path / "dest_scp" + dest_dir.mkdir() + destination = f"scp://localhost{dest_dir}/data.txt" + attrs = {"use_tmp_on_transfer": True} + + returned = move_it(str(source_file), destination, attrs=attrs) + + assert (dest_dir / "data.txt").exists() + assert not (dest_dir / ".data.txt").exists() + assert returned == urlparse(destination) + + +# =========================================================================== +# Group E – SftpMover (real localhost SSH) +# =========================================================================== + +@pytest.mark.slow +def test_sftp_mover_finalize_atomic_transfer(tmp_path, monkeypatch): + """SftpMover.finalize_atomic_transfer performs a remote SFTP rename on localhost.""" + from trollmoves.movers import SftpMover + + _patch_ssh_for_auto_add_policy(monkeypatch) + + tmp_file = tmp_path / ".data.txt" + tmp_file.write_text("sftp finalize") + final_file = tmp_path / "data.txt" + + mover = SftpMover(str(tmp_file), f"sftp://localhost{tmp_path}/data.txt") + tmp_dest = urlparse(f"sftp://localhost{tmp_path}/.data.txt") + final_dest = urlparse(f"sftp://localhost{tmp_path}/data.txt") + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + assert final_file.read_text() == "sftp finalize" + assert not tmp_file.exists() + assert mover.destination == final_dest + + +@pytest.mark.slow +def test_move_it_use_tmp_sftp_scheme(tmp_path, source_file, monkeypatch): + """Full use_tmp round-trip via move_it() with sftp://localhost destination.""" + from trollmoves.movers import move_it + + _patch_ssh_for_auto_add_policy(monkeypatch) + + dest_dir = tmp_path / "dest_sftp" + dest_dir.mkdir() + destination = f"sftp://localhost{dest_dir}/data.txt" + attrs = {"use_tmp_on_transfer": True} + + returned = move_it(str(source_file), destination, attrs=attrs) + + assert (dest_dir / "data.txt").exists() + assert not (dest_dir / ".data.txt").exists() + assert returned == urlparse(destination) + + +# =========================================================================== +# Group F – FtpMover (mocked FTP connection) +# =========================================================================== + +def test_ftp_mover_finalize_atomic_transfer_rename_args(): + """FtpMover.finalize_atomic_transfer calls rename with correct basenames.""" + from trollmoves.movers import FtpMover + + origin = "/path/to/source.txt" + destination = "ftp://ftphost/remote/dir/source.txt" + mover = FtpMover(origin, destination) + + mock_connection = MagicMock() + tmp_dest = urlparse("ftp://ftphost/remote/dir/.source.txt") + final_dest = urlparse("ftp://ftphost/remote/dir/source.txt") + + with patch.object(mover, "get_connection", return_value=mock_connection): + with patch("trollmoves.movers.ensure_remote_dirs"): + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + mock_connection.rename.assert_called_once_with(".source.txt", "source.txt") + + +def test_ftp_mover_finalize_updates_destination(): + """FtpMover.finalize_atomic_transfer updates mover.destination to final.""" + from trollmoves.movers import FtpMover + + mover = FtpMover("/origin.txt", "ftp://ftphost/dir/file.txt") + mock_connection = MagicMock() + tmp_dest = urlparse("ftp://ftphost/dir/.file.txt") + final_dest = urlparse("ftp://ftphost/dir/file.txt") + + with patch.object(mover, "get_connection", return_value=mock_connection): + with patch("trollmoves.movers.ensure_remote_dirs"): + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + assert mover.destination == final_dest + + +# =========================================================================== +# Group G – S3Mover (mocked S3 backend) +# =========================================================================== + +@patch("trollmoves.movers.S3FileSystem") +def test_s3_mover_finalize_copy_mode(mock_s3fs): + """s3_use_copy=True: finalize calls s3.copy and s3.rm with correct keys.""" + from trollmoves.movers import S3Mover + + mock_s3 = MagicMock() + mock_s3fs.return_value = mock_s3 + + mover = S3Mover("/local/file.txt", "s3://mybucket/dir/file.txt", + attrs={"s3_use_copy": True, "tmp_prefix": "."}) + + tmp_dest = urlparse("s3://mybucket/dir/.file.txt") + final_dest = urlparse("s3://mybucket/dir/file.txt") + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + mock_s3.copy.assert_called_once_with("mybucket/dir/.file.txt", "mybucket/dir/file.txt") + mock_s3.rm.assert_called_once_with("mybucket/dir/.file.txt") + assert mover.destination == urlparse("s3://mybucket/dir/file.txt") + + +@patch("trollmoves.movers.boto3") +def test_s3_mover_finalize_multipart_mode(mock_boto3): + """s3_use_multipart=True: finalize only updates destination, no S3 I/O.""" + from trollmoves.movers import S3Mover + + mover = S3Mover("/local/file.txt", "s3://mybucket/dir/.file.txt", + attrs={"s3_use_multipart": True, "tmp_prefix": "."}) + + tmp_dest = urlparse("s3://mybucket/dir/.file.txt") + final_dest = urlparse("s3://mybucket/dir/file.txt") + mover.finalize_atomic_transfer(tmp_dest, final_dest) + + # No boto3 client calls – the multipart copy() already wrote the final key + mock_boto3.client.assert_not_called() + assert mover.destination == urlparse("s3://mybucket/dir/file.txt") + + +@patch("trollmoves.movers.S3FileSystem", None) +@patch("trollmoves.movers.boto3", None) +def test_s3_mover_finalize_raises_if_unconfigured(): + """finalize raises NotImplementedError when neither multipart nor copy is set.""" + from trollmoves.movers import S3Mover + + mover = S3Mover("/local/file.txt", "s3://mybucket/dir/.file.txt", attrs={}) + tmp_dest = urlparse("s3://mybucket/dir/.file.txt") + final_dest = urlparse("s3://mybucket/dir/file.txt") + + with pytest.raises(NotImplementedError): + mover.finalize_atomic_transfer(tmp_dest, final_dest) From 87c570dacf2a268e8ff3fc4b411c9d4c15cfc355 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 10:59:56 +0300 Subject: [PATCH 21/31] Fix bugs in atomic transfer methods and improve tests - movers.py: pass tmp_dest (not mover.destination) to finalize_atomic_transfer in move_it(); broaden exception cleanup from NotImplementedError to all exceptions; add exist_ok=True and guard empty dir in Mover.finalize_atomic_transfer; remove duplicate FileMover.finalize_atomic_transfer (identical to base class); re-raise OSError errno 2 in ScpMover.copy() instead of silently swallowing it; add _S3_MOVER_INTERNAL_KEYS frozenset and filter internal keys before passing to S3FileSystem(); rewrite S3Mover.finalize_atomic_transfer to use final_destination directly instead of stripping tmp prefix; initialize upload_id=None before try block and guard abort on non-None; rename ambiguous 'parts' variable to 'path_parts'/'upload_parts' in S3Mover.copy() - _mover_utils.py: except IOError -> except OSError in ensure_final_directory_for_rename - test_movers_use_tmp.py: fix lambda anti-pattern for raising in monkeypatch; add OSError propagation test; update FTP tests to assert cwd call order instead of patching ensure_remote_dirs away; update S3 copy test to use distinct final directory proving final_destination is used directly; add FTP end-to-end move_it() test with use_tmp Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/_mover_utils.py | 4 +- trollmoves/movers.py | 127 +++++++++++------------- trollmoves/tests/test_movers_use_tmp.py | 87 +++++++++++++--- 3 files changed, 132 insertions(+), 86 deletions(-) diff --git a/trollmoves/_mover_utils.py b/trollmoves/_mover_utils.py index c1d5011..1aa002c 100644 --- a/trollmoves/_mover_utils.py +++ b/trollmoves/_mover_utils.py @@ -18,7 +18,7 @@ def ensure_local_dir(path): return dirname = path if os.path.isfile(path) or os.path.splitext(path)[1]: - dirname = os.path.dirname(path) or '.' + dirname = os.path.dirname(path) or "." os.makedirs(dirname, exist_ok=True) @@ -133,7 +133,7 @@ def ensure_final_directory_for_rename(sftp_connection, final_destination_path): path = path + "/" + p try: sftp_connection.stat(path) - except IOError: + except OSError: try: sftp_connection.mkdir(path) except Exception: diff --git a/trollmoves/movers.py b/trollmoves/movers.py index cde9b88..4c5a75d 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -31,9 +31,12 @@ "default_cache_type", "version_aware", "cache_regions", "asynchronous", "config_kwargs", "kwargs", "session", "max_concurrency", "fixed_upload_size", - # allow our atomic-transfer and multipart options to pass through + # allow our atomic-transfer and multipart options to pass through sanitize "s3_use_multipart", "s3_use_copy", "tmp_prefix", "s3_multipart_chunksize"] +# Keys consumed by S3Mover logic; must not be forwarded to S3FileSystem or boto3 client +_S3_MOVER_INTERNAL_KEYS = frozenset({"s3_use_multipart", "s3_use_copy", "tmp_prefix", "s3_multipart_chunksize"}) + LOGGER = logging.getLogger(__name__) @@ -74,12 +77,12 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ m.copy() # finalize: default finalizer works for local schemes; subclasses should override try: - m.finalize_atomic_transfer(m.destination, new_dest) - except NotImplementedError: - # cleanup tmp and raise error to indicate unsupported scheme for atomic transfer + m.finalize_atomic_transfer(tmp_dest, new_dest) + except Exception: + # Clean up local tmp file on any error; remote tmp files are not cleaned up here try: - if hasattr(m.destination, "path") and os.path.exists(m.destination.path): - os.remove(m.destination.path) + if hasattr(tmp_dest, "path") and os.path.exists(tmp_dest.path): + os.remove(tmp_dest.path) finally: raise last_dest = new_dest @@ -169,8 +172,8 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): raise NotImplementedError("Finalize atomic transfer not implemented for remote schemes") final_dir = os.path.dirname(final_path) - if not os.path.exists(final_dir): - os.makedirs(final_dir) + if final_dir: + os.makedirs(final_dir, exist_ok=True) # Use os.replace for atomic rename where possible os.replace(tmp_path, final_path) # Update mover's destination to final @@ -237,20 +240,6 @@ def move(self): """Move the file.""" shutil.move(self.origin, self.destination.path) - def finalize_atomic_transfer(self, tmp_destination, final_destination): - """Finalize atomic transfer for local filesystem by renaming tmp -> final.""" - try: - tmp_path = tmp_destination.path - final_path = final_destination.path - except AttributeError: - raise NotImplementedError("Finalize atomic transfer not implemented for this scheme") - - final_dir = os.path.dirname(final_path) - if not os.path.exists(final_dir): - os.makedirs(final_dir) - os.replace(tmp_path, final_path) - self.destination = final_destination - class CTimer(Thread): """Call a function after a specified number of seconds. @@ -480,6 +469,7 @@ def copy(self): LOGGER.error("No such file or directory. File not transfered: " "%s. Original error message: %s", self.origin, str(osex)) + raise else: LOGGER.error("OSError in scp.put: %s", str(osex)) raise @@ -633,13 +623,13 @@ def copy(self): # Prefer multipart upload using boto3 when configured and available if use_multipart and boto3 is not None: # Derive final key: if basename starts with tmp_prefix, strip it - parts = destination_file_path.split("/") - if len(parts) == 1: - bucket = parts[0] + path_parts = destination_file_path.split("/") + if len(path_parts) == 1: + bucket = path_parts[0] key = "" else: - bucket = parts[0] - key = "/".join(parts[1:]) + bucket = path_parts[0] + key = "/".join(path_parts[1:]) basename = key.split("/")[-1] if key else "" if basename.startswith(tmp_prefix): @@ -664,10 +654,11 @@ def copy(self): # multipart upload in chunks of 8MB chunk_size = int(self.attrs.get("s3_multipart_chunksize", 8 * 1024 * 1024)) + upload_id = None try: mp = client.create_multipart_upload(Bucket=bucket, Key=final_key) upload_id = mp["UploadId"] - parts = [] + upload_parts = [] part_number = 1 with open(self.origin, "rb") as f: while True: @@ -678,18 +669,19 @@ def copy(self): Bucket=bucket, Key=final_key, PartNumber=part_number, UploadId=upload_id, Body=data, ) - parts.append({"ETag": resp["ETag"], "PartNumber": part_number}) + upload_parts.append({"ETag": resp["ETag"], "PartNumber": part_number}) part_number += 1 client.complete_multipart_upload( Bucket=bucket, Key=final_key, UploadId=upload_id, - MultipartUpload={"Parts": parts}, + MultipartUpload={"Parts": upload_parts}, ) except Exception as e: LOGGER.exception("Multipart upload failed: %s", str(e)) - try: - client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) - except Exception: - pass + if upload_id is not None: + try: + client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) + except Exception: + pass raise # Update destination to final key @@ -699,7 +691,8 @@ def copy(self): # Fallback: use s3fs put to destination_file_path (tmp or final) if S3FileSystem is None: raise ImportError("S3Mover requires 's3fs' to be installed for non-multipart operations.") - s3 = S3FileSystem(**self.attrs) + s3_attrs = {k: v for k, v in self.attrs.items() if k not in _S3_MOVER_INTERNAL_KEYS} + s3 = S3FileSystem(**s3_attrs) LOGGER.debug("Before call to put: destination_file_path = %s", destination_file_path) LOGGER.debug("self.origin = %s", self.origin) _create_s3_destination_path(s3, destination_file_path) @@ -731,63 +724,57 @@ def move(self): def finalize_atomic_transfer(self, tmp_destination, final_destination): """Finalize atomic transfer for S3. - Default behavior: if multipart upload path was used, there's nothing to do. - Otherwise, if configured, perform copy+delete (server-side copy) to move tmp key to final key. + If multipart upload was used, copy() already wrote to the final key — just update + self.destination. Otherwise perform a server-side copy+delete to move the tmp key + to the final key (requires s3_use_copy=True). """ use_multipart = bool(self.attrs.get("s3_use_multipart", False)) use_copy = bool(self.attrs.get("s3_use_copy", False)) - tmp_prefix = self.attrs.get("tmp_prefix", ".") + # Derive source (tmp) S3 path from tmp_destination if tmp_destination: - _bucket = tmp_destination.netloc - _key = tmp_destination.path.lstrip("/") - destination_file_path = _bucket + "/" + _key if _key else _bucket - else: - destination_file_path = self._get_destination() - # derive bucket and keys - parts = destination_file_path.split("/") - if len(parts) == 1: - bucket = parts[0] - tmp_key = "" + tmp_bucket = tmp_destination.netloc + tmp_key = tmp_destination.path.lstrip("/") + tmp_path = (tmp_bucket + "/" + tmp_key) if tmp_key else tmp_bucket else: - bucket = parts[0] - tmp_key = "/".join(parts[1:]) + tmp_path = self._get_destination() + tmp_parts = tmp_path.split("/") + tmp_bucket = tmp_parts[0] + tmp_key = "/".join(tmp_parts[1:]) if len(tmp_parts) > 1 else "" - basename = tmp_key.split("/")[-1] if tmp_key else "" - if basename.startswith(tmp_prefix): - final_basename = basename[len(tmp_prefix):] - final_key = tmp_key.rsplit("/", 1)[0] + "/" + final_basename if "/" in tmp_key else final_basename - else: - final_key = tmp_key + # Derive destination (final) S3 path from final_destination + final_bucket = final_destination.netloc + final_key = final_destination.path.lstrip("/") + final_path = (final_bucket + "/" + final_key) if final_key else final_bucket - # If multipart path was used and boto3 completed upload to final key, nothing to do + # If multipart upload was used, copy() already wrote to the final key if use_multipart and boto3 is not None: - self.destination = urlparse("s3://" + bucket + "/" + final_key) + self.destination = final_destination return - # Otherwise perform copy+delete if configured if not use_copy: - # No server-side rename available and copy disabled: raise to indicate unsupported op raise NotImplementedError("S3 atomic finalize requires either multipart uploads or copy+delete fallback") + s3_attrs = {k: v for k, v in self.attrs.items() if k not in _S3_MOVER_INTERNAL_KEYS} + # use s3fs or boto3 to copy and delete tmp key if S3FileSystem is not None: - s3 = S3FileSystem(**self.attrs) - s3.copy(destination_file_path, bucket + "/" + final_key) - s3.rm(destination_file_path) - self.destination = urlparse("s3://" + bucket + "/" + final_key) + s3 = S3FileSystem(**s3_attrs) + s3.copy(tmp_path, final_path) + s3.rm(tmp_path) + self.destination = final_destination return if boto3 is None: raise ImportError("No S3 backend available for copy+delete finalize") # boto3 copy_object and delete_object - client_kwargs = self.attrs.get("client_kwargs", {}) - boto_kwargs = dict(client_kwargs) if isinstance(client_kwargs, dict) else {} + boto_kwargs = dict(s3_attrs.get("client_kwargs", {})) client = boto3.client("s3", **boto_kwargs) - copy_source = {"Bucket": bucket, "Key": tmp_key} - client.copy_object(CopySource=copy_source, Bucket=bucket, Key=final_key) - client.delete_object(Bucket=bucket, Key=tmp_key) - self.destination = urlparse("s3://" + bucket + "/" + final_key) + copy_source = {"Bucket": tmp_bucket, "Key": tmp_key} + client.copy_object(CopySource=copy_source, Bucket=final_bucket, Key=final_key) + client.delete_object(Bucket=tmp_bucket, Key=tmp_key) + self.destination = final_destination + def _create_s3_destination_path(s3, destination_file_path): diff --git a/trollmoves/tests/test_movers_use_tmp.py b/trollmoves/tests/test_movers_use_tmp.py index 1b55592..86fb9d2 100644 --- a/trollmoves/tests/test_movers_use_tmp.py +++ b/trollmoves/tests/test_movers_use_tmp.py @@ -175,8 +175,10 @@ def test_move_it_use_tmp_cleanup_on_finalize_error(tmp_path, source_file, monkey destination = str(dest_dir / "data.txt") attrs = {"use_tmp_on_transfer": True} - monkeypatch.setattr(FileMover, "finalize_atomic_transfer", - lambda *_: (_ for _ in ()).throw(NotImplementedError("simulated"))) + def _raise(*_): + raise NotImplementedError("simulated") + + monkeypatch.setattr(FileMover, "finalize_atomic_transfer", _raise) with pytest.raises(NotImplementedError): move_it(str(source_file), destination, attrs=attrs) @@ -187,6 +189,27 @@ def test_move_it_use_tmp_cleanup_on_finalize_error(tmp_path, source_file, monkey assert not (dest_dir / "data.txt").exists() +def test_move_it_use_tmp_cleanup_on_oserror(tmp_path, source_file, monkeypatch): + """Non-NotImplementedError exceptions from finalize also trigger cleanup.""" + from trollmoves.movers import FileMover, move_it + + dest_dir = tmp_path / "dest" + dest_dir.mkdir() + destination = str(dest_dir / "data.txt") + attrs = {"use_tmp_on_transfer": True} + + def _raise_os(*_): + raise OSError("disk full") + + monkeypatch.setattr(FileMover, "finalize_atomic_transfer", _raise_os) + + with pytest.raises(OSError, match="disk full"): + move_it(str(source_file), destination, attrs=attrs) + + assert not (dest_dir / ".data.txt").exists() + assert not (dest_dir / "data.txt").exists() + + # =========================================================================== # Group D – ScpMover (real localhost SSH) # =========================================================================== @@ -282,7 +305,7 @@ def test_move_it_use_tmp_sftp_scheme(tmp_path, source_file, monkeypatch): # =========================================================================== def test_ftp_mover_finalize_atomic_transfer_rename_args(): - """FtpMover.finalize_atomic_transfer calls rename with correct basenames.""" + """FtpMover.finalize_atomic_transfer: cwd to dest dir then rename with basenames.""" from trollmoves.movers import FtpMover origin = "/path/to/source.txt" @@ -294,9 +317,10 @@ def test_ftp_mover_finalize_atomic_transfer_rename_args(): final_dest = urlparse("ftp://ftphost/remote/dir/source.txt") with patch.object(mover, "get_connection", return_value=mock_connection): - with patch("trollmoves.movers.ensure_remote_dirs"): - mover.finalize_atomic_transfer(tmp_dest, final_dest) + mover.finalize_atomic_transfer(tmp_dest, final_dest) + # ensure_remote_dirs cds to the directory; rename uses basenames relative to that cwd + mock_connection.cwd.assert_called_with("/remote/dir") mock_connection.rename.assert_called_once_with(".source.txt", "source.txt") @@ -310,8 +334,7 @@ def test_ftp_mover_finalize_updates_destination(): final_dest = urlparse("ftp://ftphost/dir/file.txt") with patch.object(mover, "get_connection", return_value=mock_connection): - with patch("trollmoves.movers.ensure_remote_dirs"): - mover.finalize_atomic_transfer(tmp_dest, final_dest) + mover.finalize_atomic_transfer(tmp_dest, final_dest) assert mover.destination == final_dest @@ -322,22 +345,27 @@ def test_ftp_mover_finalize_updates_destination(): @patch("trollmoves.movers.S3FileSystem") def test_s3_mover_finalize_copy_mode(mock_s3fs): - """s3_use_copy=True: finalize calls s3.copy and s3.rm with correct keys.""" + """s3_use_copy=True: finalize uses final_destination directly (not tmp-stripping). + + tmp is in a staging prefix; final is in a completely different path to prove + the implementation reads final_destination rather than reverse-engineering it + from the tmp path. + """ from trollmoves.movers import S3Mover mock_s3 = MagicMock() mock_s3fs.return_value = mock_s3 - mover = S3Mover("/local/file.txt", "s3://mybucket/dir/file.txt", + mover = S3Mover("/local/file.txt", "s3://mybucket/staging/.file.txt", attrs={"s3_use_copy": True, "tmp_prefix": "."}) - tmp_dest = urlparse("s3://mybucket/dir/.file.txt") - final_dest = urlparse("s3://mybucket/dir/file.txt") + tmp_dest = urlparse("s3://mybucket/staging/.file.txt") + final_dest = urlparse("s3://mybucket/archive/2024/file.txt") # different dir mover.finalize_atomic_transfer(tmp_dest, final_dest) - mock_s3.copy.assert_called_once_with("mybucket/dir/.file.txt", "mybucket/dir/file.txt") - mock_s3.rm.assert_called_once_with("mybucket/dir/.file.txt") - assert mover.destination == urlparse("s3://mybucket/dir/file.txt") + mock_s3.copy.assert_called_once_with("mybucket/staging/.file.txt", "mybucket/archive/2024/file.txt") + mock_s3.rm.assert_called_once_with("mybucket/staging/.file.txt") + assert mover.destination == urlparse("s3://mybucket/archive/2024/file.txt") @patch("trollmoves.movers.boto3") @@ -369,3 +397,34 @@ def test_s3_mover_finalize_raises_if_unconfigured(): with pytest.raises(NotImplementedError): mover.finalize_atomic_transfer(tmp_dest, final_dest) + + +# =========================================================================== +# Group H – move_it() FTP end-to-end with use_tmp (mocked) +# =========================================================================== + +@patch("trollmoves.movers.FTP") +def test_move_it_use_tmp_ftp_scheme(mock_ftp_class, tmp_path): + """Full use_tmp round-trip via move_it() with ftp:// destination (mocked FTP).""" + from trollmoves.movers import move_it + + source = tmp_path / "upload.txt" + source.write_text("ftp content") + + mock_ftp = MagicMock() + mock_ftp_class.return_value.__enter__ = lambda s: mock_ftp + mock_ftp_class.return_value.__exit__ = MagicMock(return_value=False) + mock_ftp_class.return_value = mock_ftp + + destination = "ftp://ftphost/remote/dir/upload.txt" + attrs = {"use_tmp_on_transfer": True} + + move_it(str(source), destination, attrs=attrs) + + # copy step: storbinary was called once (for the tmp file) + assert mock_ftp.storbinary.call_count == 1 + store_call_args = mock_ftp.storbinary.call_args[0] + assert store_call_args[0] == "STOR .upload.txt" + + # finalize step: rename was called once from tmp to final + mock_ftp.rename.assert_called_once_with(".upload.txt", "upload.txt") From 9879c955cfe17a45ad05c1076700d2b4aab987c3 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 12:19:42 +0300 Subject: [PATCH 22/31] Fix ScpMover.copy() to return None on ENOENT; improve test isolation When scp.put() raises OSError with errno 2 (ENOENT/FileNotFoundError), ScpMover.copy() now logs the error and returns None instead of re-raising. This matches the intended behaviour: source file gone means nothing to transfer, so the operation is silently skipped. Also add @patch('paramiko.SSHClient') to test_scp_copy_oserror_exception and test_scp_copy_oserror_exception_errno_2 so they no longer rely on the ScpMover.active_connections class-level cache being populated by a prior test. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 2 +- trollmoves/tests/test_ssh_server.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 4c5a75d..98cf0cd 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -469,7 +469,7 @@ def copy(self): LOGGER.error("No such file or directory. File not transfered: " "%s. Original error message: %s", self.origin, str(osex)) - raise + return else: LOGGER.error("OSError in scp.put: %s", str(osex)) raise diff --git a/trollmoves/tests/test_ssh_server.py b/trollmoves/tests/test_ssh_server.py index 1e8d75e..01727de 100644 --- a/trollmoves/tests/test_ssh_server.py +++ b/trollmoves/tests/test_ssh_server.py @@ -237,8 +237,9 @@ def test_scp_copy_generic_exception(self, mock_scp_client, mock_sshclient): with pytest.raises(Exception): scp_mover.copy() + @patch("paramiko.SSHClient", autospec=True) @patch("scp.SCPClient", autospec=True) - def test_scp_copy_oserror_exception(self, mock_scp_client): + def test_scp_copy_oserror_exception(self, mock_scp_client, mock_sshclient): """Check scp copy for OSError.""" from trollmoves.movers import ScpMover @@ -248,8 +249,9 @@ def test_scp_copy_oserror_exception(self, mock_scp_client): with pytest.raises(OSError): scp_mover.copy() + @patch("paramiko.SSHClient", autospec=True) @patch("scp.SCPClient", autospec=True) - def test_scp_copy_oserror_exception_errno_2(self, mock_scp_client): + def test_scp_copy_oserror_exception_errno_2(self, mock_scp_client, mock_sshclient): """Check scp copy OSError errno 2.""" from trollmoves.movers import ScpMover From 73e82bf93228923212ccbbf40614bd86ac15b5bd Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 12:54:39 +0300 Subject: [PATCH 23/31] Replace catch-all except Exception with specific exception types Narrow broad except Exception clauses to the specific exceptions that each code path can actually raise: movers.py: - supports_atomic(): Exception -> AttributeError (scheme access on non-ParseResult) - tmp_destination_for(): Exception -> AttributeError (path access on non-ParseResult) - FtpMover.finalize_atomic_transfer rename: Exception -> all_errors (ftplib.Error, OSError, EOFError, ssl.SSLError; all_errors already imported) - ScpMover.copy() SCPClient init: Exception -> (TypeError, SSHException, OSError) - ScpMover.copy() scp.put fallback: Exception -> (SCPException, SSHException) - ScpMover.finalize_atomic_transfer sftp.close(): Exception -> (SSHException, OSError) - S3Mover.copy() multipart body: Exception -> (ClientError, BotoCoreError, OSError) - S3Mover.copy() abort_multipart: Exception -> (ClientError, BotoCoreError) - ScpMover.open_connection() connect fallback: keep Exception, add comment (SSHClient.connect() may raise unexpected exceptions from transport/agents) _mover_utils.py: - _ensure_remote_dirs_ftp() cwd/mkd: Exception -> (ftplib.Error, OSError) in all 6 spots - _ensure_remote_dirs_sftp() stat/mkdir: Exception -> OSError in all 3 spots - ensure_final_directory_for_rename() mkdir: Exception -> OSError The two intentionally broad handlers in move_it() (finalize cleanup + outer copy logger) are kept as Exception with clarifying comments. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/_mover_utils.py | 21 +++++++++++---------- trollmoves/movers.py | 28 ++++++++++++++++++---------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/trollmoves/_mover_utils.py b/trollmoves/_mover_utils.py index 1aa002c..102c053 100644 --- a/trollmoves/_mover_utils.py +++ b/trollmoves/_mover_utils.py @@ -5,6 +5,7 @@ (paramiko SFTPClient) objects by duck-typing. """ +import ftplib import os @@ -35,7 +36,7 @@ def _ensure_remote_dirs_ftp(connection, parts): try: connection.cwd(path) return - except Exception: + except (ftplib.Error, OSError): pass # Build path from root, creating missing segments and only cd'ing when needed @@ -44,22 +45,22 @@ def _ensure_remote_dirs_ftp(connection, parts): current = current + "/" + part try: connection.cwd(current) - except Exception: + except (ftplib.Error, OSError): # try to create the directory; accept failures silently and proceed try: connection.mkd(current) - except Exception: + except (ftplib.Error, OSError): try: connection.mkd(part) - except Exception: + except (ftplib.Error, OSError): pass # after creating, change into it try: connection.cwd(current) - except Exception: + except (ftplib.Error, OSError): try: connection.cwd(part) - except Exception: + except (ftplib.Error, OSError): pass @@ -74,13 +75,13 @@ def _ensure_remote_dirs_sftp(connection, parts): current = current + "/" + part try: connection.stat(current) - except Exception: + except OSError: try: connection.mkdir(current) - except Exception: + except OSError: try: connection.mkdir(part) - except Exception: + except OSError: pass @@ -136,5 +137,5 @@ def ensure_final_directory_for_rename(sftp_connection, final_destination_path): except OSError: try: sftp_connection.mkdir(path) - except Exception: + except OSError: pass diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 98cf0cd..d209656 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -79,7 +79,8 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ try: m.finalize_atomic_transfer(tmp_dest, new_dest) except Exception: - # Clean up local tmp file on any error; remote tmp files are not cleaned up here + # Intentionally broad: must clean up local tmp regardless of protocol error. + # Re-raises so the caller sees the original failure. try: if hasattr(tmp_dest, "path") and os.path.exists(tmp_dest.path): os.remove(tmp_dest.path) @@ -96,6 +97,7 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ if hook: hook(pathname, new_dest) except Exception as err: + # Intentionally broad: logs and re-raises any failure from copy/finalize across all protocols. exc_type, exc_value, exc_traceback = sys.exc_info() LOGGER.error("Something went wrong during copy of %s to %s: %s", pathname, str(fake_dest), str(err)) @@ -143,7 +145,7 @@ def supports_atomic(self): """ try: scheme = self.destination.scheme - except Exception: + except AttributeError: scheme = "" return scheme in ("", "file") @@ -152,7 +154,7 @@ def tmp_destination_for(dest, tmp_prefix="."): """Return a copy of dest with the basename prefixed by tmp_prefix.""" try: path = dest.path - except Exception: + except AttributeError: return dest dirname = os.path.dirname(path) basename = os.path.basename(path) @@ -360,7 +362,7 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): ensure_remote_dirs(connection, dest_dirname) try: connection.rename(tmp_basename, final_basename) - except Exception as err: + except all_errors as err: LOGGER.exception("Failed to finalize FTP atomic transfer: %s", str(err)) raise self.destination = final_destination @@ -408,6 +410,8 @@ def open_connection(self): except socket.timeout as sto: LOGGER.exception("SSH connection timed out: %s", str(sto)) except Exception as err: + # Intentionally broad: SSHClient.connect() may raise unexpected exceptions + # (e.g. from underlying transport or third-party SSH agents). LOGGER.exception("Unknown exception at init SSHClient: %s", str(err)) else: return ssh_connection @@ -450,14 +454,15 @@ def move(self): def copy(self): """Upload the file.""" - from scp import SCPClient + from paramiko import SSHException as _SSHException + from scp import SCPClient, SCPException ssh_connection = self.get_connection(self.destination.hostname, self.destination.port or 22, self._dest_username) try: scp = SCPClient(ssh_connection.get_transport()) - except Exception as err: + except (TypeError, _SSHException, OSError) as err: LOGGER.error("Failed to initiate SCPClient: %s", str(err)) ssh_connection.close() raise @@ -473,7 +478,7 @@ def copy(self): else: LOGGER.error("OSError in scp.put: %s", str(osex)) raise - except Exception as err: + except (SCPException, _SSHException) as err: LOGGER.error("Something went wrong with scp: %s", str(err)) LOGGER.error("Exception name %s", type(err).__name__) LOGGER.error("Exception args %s", str(err.args)) @@ -483,6 +488,7 @@ def copy(self): def finalize_atomic_transfer(self, tmp_destination, final_destination): """Finalize atomic transfer for SCP by performing remote rename via SFTP.""" + from paramiko import SSHException as _SSHException ssh_connection = self.get_connection(self.destination.hostname, self.destination.port or 22, self._dest_username) @@ -495,7 +501,7 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): if sftp is not None: try: sftp.close() - except Exception: + except (_SSHException, OSError): pass self.destination = final_destination @@ -622,6 +628,8 @@ def copy(self): # Prefer multipart upload using boto3 when configured and available if use_multipart and boto3 is not None: + from botocore.exceptions import BotoCoreError + from botocore.exceptions import ClientError as BotoCoreClientError # Derive final key: if basename starts with tmp_prefix, strip it path_parts = destination_file_path.split("/") if len(path_parts) == 1: @@ -675,12 +683,12 @@ def copy(self): Bucket=bucket, Key=final_key, UploadId=upload_id, MultipartUpload={"Parts": upload_parts}, ) - except Exception as e: + except (BotoCoreClientError, BotoCoreError, OSError) as e: LOGGER.exception("Multipart upload failed: %s", str(e)) if upload_id is not None: try: client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) - except Exception: + except (BotoCoreClientError, BotoCoreError): pass raise From e64e9d0800ce3ee7c58f16d60ae8c323d28a9436 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 13:25:48 +0300 Subject: [PATCH 24/31] refactor(S3Mover): extract multipart upload to private methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the inline 50-line multipart upload block in copy() into three focused private methods: - _build_boto3_client(): centralises boto3 client construction from self.attrs. Handles explicit key/secret credentials, session token, and client_kwargs. Shared by _multipart_upload() and finalize_atomic_transfer() (which previously missed key/secret and token support on the boto3 copy+delete path). - _do_multipart_upload(client, bucket, final_key): upload mechanics only — create, upload parts loop, complete; aborts best-effort on any error and re-raises. - _multipart_upload(destination_file_path): orchestrator — parses path into bucket/key, strips tmp_prefix from basename if present, builds client, delegates to _do_multipart_upload, updates self.destination. copy() is now a clean dispatcher (~10 lines): check imports, get destination, call _multipart_upload() or fall through to s3fs put. Tests added: - test_build_boto3_client_default_credentials - test_build_boto3_client_explicit_credentials - test_do_multipart_upload_aborts_on_error - test_do_multipart_upload_no_abort_when_create_fails Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 157 +++++++++++++++-------------- trollmoves/tests/test_movers_s3.py | 88 +++++++++++++++- 2 files changed, 170 insertions(+), 75 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index d209656..f8ded22 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -619,81 +619,11 @@ def copy(self): if S3FileSystem is None and boto3 is None: raise ImportError("S3Mover requires 's3fs' or 'boto3' to be installed.") - use_multipart = bool(self.attrs.get("s3_use_multipart", False)) - tmp_prefix = self.attrs.get("tmp_prefix", ".") - - # Destination path inside bucket (bucket/key or bucket/dir/file) destination_file_path = self._get_destination() LOGGER.debug("destination_file_path = %s", destination_file_path) - # Prefer multipart upload using boto3 when configured and available - if use_multipart and boto3 is not None: - from botocore.exceptions import BotoCoreError - from botocore.exceptions import ClientError as BotoCoreClientError - # Derive final key: if basename starts with tmp_prefix, strip it - path_parts = destination_file_path.split("/") - if len(path_parts) == 1: - bucket = path_parts[0] - key = "" - else: - bucket = path_parts[0] - key = "/".join(path_parts[1:]) - - basename = key.split("/")[-1] if key else "" - if basename.startswith(tmp_prefix): - final_basename = basename[len(tmp_prefix):] - final_key = key.rsplit("/", 1)[0] + "/" + final_basename if "/" in key else final_basename - else: - final_key = key - - # Build boto3 client with optional client_kwargs or credentials - client_kwargs = self.attrs.get("client_kwargs", {}) - boto_kwargs = dict(client_kwargs) if isinstance(client_kwargs, dict) else {} - if self.attrs.get("key") and self.attrs.get("secret"): - # Use explicit credentials - client = boto3.client( - "s3", - aws_access_key_id=self.attrs.get("key"), - aws_secret_access_key=self.attrs.get("secret"), - **boto_kwargs, - ) - else: - client = boto3.client("s3", **boto_kwargs) - - # multipart upload in chunks of 8MB - chunk_size = int(self.attrs.get("s3_multipart_chunksize", 8 * 1024 * 1024)) - upload_id = None - try: - mp = client.create_multipart_upload(Bucket=bucket, Key=final_key) - upload_id = mp["UploadId"] - upload_parts = [] - part_number = 1 - with open(self.origin, "rb") as f: - while True: - data = f.read(chunk_size) - if not data: - break - resp = client.upload_part( - Bucket=bucket, Key=final_key, PartNumber=part_number, - UploadId=upload_id, Body=data, - ) - upload_parts.append({"ETag": resp["ETag"], "PartNumber": part_number}) - part_number += 1 - client.complete_multipart_upload( - Bucket=bucket, Key=final_key, UploadId=upload_id, - MultipartUpload={"Parts": upload_parts}, - ) - except (BotoCoreClientError, BotoCoreError, OSError) as e: - LOGGER.exception("Multipart upload failed: %s", str(e)) - if upload_id is not None: - try: - client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) - except (BotoCoreClientError, BotoCoreError): - pass - raise - - # Update destination to final key - self.destination = urlparse("s3://" + bucket + "/" + final_key) + if bool(self.attrs.get("s3_use_multipart", False)) and boto3 is not None: + self._multipart_upload(destination_file_path) return # Fallback: use s3fs put to destination_file_path (tmp or final) @@ -706,6 +636,86 @@ def copy(self): _create_s3_destination_path(s3, destination_file_path) s3.put(self.origin, destination_file_path) + def _build_boto3_client(self): + """Build and return a boto3 S3 client from attrs. + + Reads client_kwargs, key, secret, and token from self.attrs. + Falls back to boto3 default credential chain when key/secret are absent. + """ + client_kwargs = self.attrs.get("client_kwargs", {}) + boto_kwargs = dict(client_kwargs) if isinstance(client_kwargs, dict) else {} + if self.attrs.get("key") and self.attrs.get("secret"): + return boto3.client( + "s3", + aws_access_key_id=self.attrs["key"], + aws_secret_access_key=self.attrs["secret"], + aws_session_token=self.attrs.get("token"), + **boto_kwargs, + ) + return boto3.client("s3", **boto_kwargs) + + def _do_multipart_upload(self, client, bucket, final_key): + """Perform a multipart upload of self.origin to bucket/final_key. + + Uploads in chunks of s3_multipart_chunksize bytes (default 8 MB). + On failure, aborts the multipart upload (best-effort) and re-raises. + """ + from botocore.exceptions import BotoCoreError + from botocore.exceptions import ClientError as BotoCoreClientError + + chunk_size = int(self.attrs.get("s3_multipart_chunksize", 8 * 1024 * 1024)) + upload_id = None + try: + mp = client.create_multipart_upload(Bucket=bucket, Key=final_key) + upload_id = mp["UploadId"] + upload_parts = [] + part_number = 1 + with open(self.origin, "rb") as f: + while True: + data = f.read(chunk_size) + if not data: + break + resp = client.upload_part( + Bucket=bucket, Key=final_key, PartNumber=part_number, + UploadId=upload_id, Body=data, + ) + upload_parts.append({"ETag": resp["ETag"], "PartNumber": part_number}) + part_number += 1 + client.complete_multipart_upload( + Bucket=bucket, Key=final_key, UploadId=upload_id, + MultipartUpload={"Parts": upload_parts}, + ) + except (BotoCoreClientError, BotoCoreError, OSError) as e: + LOGGER.exception("Multipart upload failed: %s", str(e)) + if upload_id is not None: + try: + client.abort_multipart_upload(Bucket=bucket, Key=final_key, UploadId=upload_id) + except (BotoCoreClientError, BotoCoreError): + pass + raise + + def _multipart_upload(self, destination_file_path): + """Orchestrate a boto3 multipart upload. + + Parses destination_file_path into bucket and key, strips the tmp_prefix from the + basename if present to derive the final key, then uploads and updates self.destination. + """ + tmp_prefix = self.attrs.get("tmp_prefix", ".") + path_parts = destination_file_path.split("/") + bucket = path_parts[0] + key = "/".join(path_parts[1:]) if len(path_parts) > 1 else "" + + basename = key.split("/")[-1] if key else "" + if basename.startswith(tmp_prefix): + final_basename = basename[len(tmp_prefix):] + final_key = key.rsplit("/", 1)[0] + "/" + final_basename if "/" in key else final_basename + else: + final_key = key + + client = self._build_boto3_client() + self._do_multipart_upload(client, bucket, final_key) + self.destination = urlparse("s3://" + bucket + "/" + final_key) + def _sanitize_attrs(self): keys = list(self.attrs.keys()) @@ -776,8 +786,7 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): if boto3 is None: raise ImportError("No S3 backend available for copy+delete finalize") # boto3 copy_object and delete_object - boto_kwargs = dict(s3_attrs.get("client_kwargs", {})) - client = boto3.client("s3", **boto_kwargs) + client = self._build_boto3_client() copy_source = {"Bucket": tmp_bucket, "Key": tmp_key} client.copy_object(CopySource=copy_source, Bucket=final_bucket, Key=final_key) client.delete_object(Bucket=tmp_bucket, Key=tmp_key) diff --git a/trollmoves/tests/test_movers_s3.py b/trollmoves/tests/test_movers_s3.py index c736414..7b2d2f4 100644 --- a/trollmoves/tests/test_movers_s3.py +++ b/trollmoves/tests/test_movers_s3.py @@ -5,7 +5,7 @@ import pytest -from trollmoves.movers import S3Mover, S3FileSystem, boto3 +from trollmoves.movers import S3Mover def test_s3_multipart_upload(): @@ -81,3 +81,89 @@ def test_s3_copy_delete_fallback(mock_s3fs): # Cleanup os.remove(tmpname) + + +def test_build_boto3_client_default_credentials(): + """_build_boto3_client uses boto3 default chain when no key/secret present.""" + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + tmpname = f.name + + mock_boto_mod = MagicMock() + mock_boto_mod.client = MagicMock(return_value=MagicMock()) + + with patch("trollmoves.movers.boto3", new=mock_boto_mod): + mover = S3Mover(tmpname, "s3://bucket/key", attrs={"client_kwargs": {"endpoint_url": "http://minio"}}) + mover._build_boto3_client() + mock_boto_mod.client.assert_called_once_with("s3", endpoint_url="http://minio") + + os.remove(tmpname) + + +def test_build_boto3_client_explicit_credentials(): + """_build_boto3_client passes key, secret, and token when present.""" + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + tmpname = f.name + + mock_boto_mod = MagicMock() + mock_boto_mod.client = MagicMock(return_value=MagicMock()) + + with patch("trollmoves.movers.boto3", new=mock_boto_mod): + attrs = {"key": "AKID", "secret": "SECRET", "token": "MYTOKEN"} + mover = S3Mover(tmpname, "s3://bucket/key", attrs=attrs) + mover._build_boto3_client() + mock_boto_mod.client.assert_called_once_with( + "s3", + aws_access_key_id="AKID", + aws_secret_access_key="SECRET", + aws_session_token="MYTOKEN", + ) + + os.remove(tmpname) + + +def test_do_multipart_upload_aborts_on_error(): + """_do_multipart_upload aborts the upload when an error occurs mid-upload.""" + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + f.write(b"data") + tmpname = f.name + + from botocore.exceptions import ClientError + + mock_client = MagicMock() + mock_client.create_multipart_upload.return_value = {"UploadId": "uid-abort"} + mock_client.upload_part.side_effect = ClientError( + {"Error": {"Code": "InternalError", "Message": "fail"}}, "UploadPart" + ) + + mock_boto_mod = MagicMock() + with patch("trollmoves.movers.boto3", new=mock_boto_mod): + mover = S3Mover(tmpname, "s3://bucket/key", attrs={}) + with pytest.raises(ClientError): + mover._do_multipart_upload(mock_client, "bucket", "key") + + mock_client.abort_multipart_upload.assert_called_once_with( + Bucket="bucket", Key="key", UploadId="uid-abort" + ) + os.remove(tmpname) + + +def test_do_multipart_upload_no_abort_when_create_fails(): + """_do_multipart_upload does not call abort if create_multipart_upload itself fails.""" + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + tmpname = f.name + + from botocore.exceptions import ClientError + + mock_client = MagicMock() + mock_client.create_multipart_upload.side_effect = ClientError( + {"Error": {"Code": "AccessDenied", "Message": "denied"}}, "CreateMultipartUpload" + ) + + mock_boto_mod = MagicMock() + with patch("trollmoves.movers.boto3", new=mock_boto_mod): + mover = S3Mover(tmpname, "s3://bucket/key", attrs={}) + with pytest.raises(ClientError): + mover._do_multipart_upload(mock_client, "bucket", "key") + + mock_client.abort_multipart_upload.assert_not_called() + os.remove(tmpname) From 56168983991ffd9164a4c24344243dcaebeb529b Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 14:59:38 +0300 Subject: [PATCH 25/31] Refactor movers.move_it() --- trollmoves/movers.py | 74 +++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index f8ded22..5d5f86e 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -58,7 +58,6 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ new_dest = dest_url._replace(path=new_path) fake_dest = clean_url(new_dest) - LOGGER.debug("new_dest = %s", new_dest) LOGGER.debug("Copying to: %s", fake_dest) try: LOGGER.debug("Scheme = %s", str(dest_url.scheme)) @@ -69,31 +68,9 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ raise try: - use_tmp = bool(attrs and attrs.get("use_tmp_on_transfer")) - if use_tmp: - tmp_prefix = attrs.get("tmp_prefix", ".") - tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) - m = mover_cls(pathname, tmp_dest, attrs=attrs, backup_targets=backup_targets) - m.copy() - # finalize: default finalizer works for local schemes; subclasses should override - try: - m.finalize_atomic_transfer(tmp_dest, new_dest) - except Exception: - # Intentionally broad: must clean up local tmp regardless of protocol error. - # Re-raises so the caller sees the original failure. - try: - if hasattr(tmp_dest, "path") and os.path.exists(tmp_dest.path): - os.remove(tmp_dest.path) - finally: - raise - last_dest = new_dest - else: - m = mover_cls(pathname, new_dest, attrs=attrs, backup_targets=backup_targets) - m.copy() - last_dest = m.destination - if last_dest != new_dest: - new_dest = last_dest - fake_dest = clean_url(new_dest) + tmp_dest = _get_tmp_destination(mover_cls, new_dest, attrs) + mover = _create_mover(mover_cls, pathname, new_dest, attrs, backup_targets, tmp_dest) + _copy(mover, new_dest, tmp_dest) if hook: hook(pathname, new_dest) except Exception as err: @@ -104,9 +81,39 @@ def move_it(pathname, destination, attrs=None, hook=None, rel_path=None, backup_ LOGGER.debug("".join(traceback.format_tb(exc_traceback))) raise err else: - LOGGER.info("Successfully copied %s to %s", - pathname, str(fake_dest)) - return m.destination + LOGGER.info("Successfully copied %s to %s", pathname, str(fake_dest)) + return mover.destination + + +def _create_mover(mover_cls, pathname, new_dest, attrs, backup_targets=None, tmp_dest=None): + if tmp_dest: + return mover_cls(pathname, tmp_dest, attrs=attrs, backup_targets=backup_targets) + return mover_cls(pathname, new_dest, attrs=attrs, backup_targets=backup_targets) + + +def _get_tmp_destination(mover_cls, new_dest, attrs): + use_tmp = bool(attrs and attrs.get("use_tmp_on_transfer")) + if use_tmp: + tmp_prefix = attrs.get("tmp_prefix", ".") + tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) + return tmp_dest + return None + + +def _copy(mover, new_dest, tmp_dest=None): + mover.copy() + if tmp_dest: + # finalize: default finalizer works for local schemes; subclasses should override + try: + mover.finalize_atomic_transfer(tmp_dest, new_dest) + except Exception: + # Intentionally broad: must clean up local tmp regardless of protocol error. + # Re-raises so the caller sees the original failure. + try: + if hasattr(tmp_dest, "path") and os.path.exists(tmp_dest.path): + os.remove(tmp_dest.path) + finally: + raise class Mover: @@ -184,10 +191,8 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): def get_connection(self, hostname, port, username=None): """Get the connection.""" with self.active_connection_lock: - LOGGER.debug("Destination username and passwd: %s %s", - self._dest_username, self._dest_password) - LOGGER.debug("Getting connection to %s@%s:%s", - username, hostname, port) + LOGGER.debug("Destination username and passwd: %s %s", self._dest_username, self._dest_password) + LOGGER.debug("Getting connection to %s@%s:%s", username, hostname, port) try: connection, timer = self.active_connections[(hostname, port, username)] if not self.is_connected(connection): @@ -198,8 +203,7 @@ def get_connection(self, hostname, port, username=None): except KeyError: connection = self.open_connection() - timer = CTimer(int(self.attrs.get("connection_uptime", 30)), - self.delete_connection, (connection,)) + timer = CTimer(int(self.attrs.get("connection_uptime", 30)), self.delete_connection, (connection,)) timer.start() self.active_connections[(self.destination.hostname, port, username)] = connection, timer From 1274f45a2569338a81274077c839b31717780011 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 Apr 2026 15:37:23 +0300 Subject: [PATCH 26/31] Use supports_atomic to guard tmp-file transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Convert Mover.supports_atomic from an instance method to a @classmethod(cls, attrs=None) with a safe default of False. - Add @classmethod supports_atomic overrides in every subclass: FileMover, FtpMover, ScpMover, SftpMover → always True S3Mover → True only when s3_use_multipart or s3_use_copy is set - Use the new classmethod in _get_tmp_destination: when the user requests use_tmp_on_transfer but the mover does not support atomic transfers, log LOGGER.error and return None so the transfer falls back to a direct (non-tmp) copy. - Add tests for supports_atomic on all mover classes and for the move_it fallback behaviour. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- trollmoves/movers.py | 49 +++++++++++++++--- trollmoves/tests/test_movers_s3.py | 35 +++++++++++++ trollmoves/tests/test_movers_use_tmp.py | 68 +++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 8 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 5d5f86e..e72feed 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -94,6 +94,13 @@ def _create_mover(mover_cls, pathname, new_dest, attrs, backup_targets=None, tmp def _get_tmp_destination(mover_cls, new_dest, attrs): use_tmp = bool(attrs and attrs.get("use_tmp_on_transfer")) if use_tmp: + if not mover_cls.supports_atomic(attrs): + LOGGER.error( + "Mover '%s' does not support atomic transfers. " + "Falling back to transfer without temporary files.", + mover_cls.__name__, + ) + return None tmp_prefix = attrs.get("tmp_prefix", ".") tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) return tmp_dest @@ -145,16 +152,15 @@ def move(self): raise NotImplementedError("Move for scheme " + self.destination.scheme + " not implemented (yet).") - def supports_atomic(self): - """Return True if this mover supports the default atomic finalize method. + @classmethod + def supports_atomic(cls, attrs=None): + """Return True if this mover class supports atomic tmp→final transfers. - Subclasses should override if they can perform remote atomic rename. + The default is False (conservative). Subclasses that implement + finalize_atomic_transfer should override and return True (or, in + the case of S3Mover, inspect *attrs* to decide). """ - try: - scheme = self.destination.scheme - except AttributeError: - scheme = "" - return scheme in ("", "file") + return False @staticmethod def tmp_destination_for(dest, tmp_prefix="."): @@ -232,6 +238,11 @@ def delete_connection(self, connection): class FileMover(Mover): """Move files in the filesystem.""" + @classmethod + def supports_atomic(cls, attrs=None): + """Local filesystem always supports atomic rename via os.replace.""" + return True + def copy(self): """Copy the file.""" dirname = os.path.dirname(self.destination.path) @@ -285,6 +296,11 @@ class FtpMover(Mover): active_connections = dict() active_connection_lock = Lock() + @classmethod + def supports_atomic(cls, attrs=None): + """FTP supports atomic rename via RNFR/RNTO.""" + return True + def _get_netrc_authentication(self): """Get login authentications from netrc file if available.""" try: @@ -378,6 +394,11 @@ class ScpMover(Mover): active_connections = dict() active_connection_lock = Lock() + @classmethod + def supports_atomic(cls, attrs=None): + """SCP supports atomic rename via SFTP rename over the same SSH connection.""" + return True + def open_connection(self): """Open a connection.""" import copy @@ -513,6 +534,11 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): class SftpMover(Mover): """Move files over sftp.""" + @classmethod + def supports_atomic(cls, attrs=None): + """SFTP supports atomic rename.""" + return True + def move(self): """Push the file.""" self.copy() @@ -618,6 +644,13 @@ def __init__(self, origin, destination, attrs=None, backup_targets=None): super().__init__(origin, destination, attrs, backup_targets) self._sanitize_attrs() + @classmethod + def supports_atomic(cls, attrs=None): + """S3 supports atomic transfers only when multipart upload or copy+delete is configured.""" + if not attrs: + return False + return bool(attrs.get("s3_use_multipart")) or bool(attrs.get("s3_use_copy")) + def copy(self): """Copy the file to a bucket.""" if S3FileSystem is None and boto3 is None: diff --git a/trollmoves/tests/test_movers_s3.py b/trollmoves/tests/test_movers_s3.py index 7b2d2f4..52e02bb 100644 --- a/trollmoves/tests/test_movers_s3.py +++ b/trollmoves/tests/test_movers_s3.py @@ -167,3 +167,38 @@ def test_do_multipart_upload_no_abort_when_create_fails(): mock_client.abort_multipart_upload.assert_not_called() os.remove(tmpname) + + +# =========================================================================== +# supports_atomic classmethod for S3Mover +# =========================================================================== + +def test_s3_mover_supports_atomic_no_attrs(): + """S3Mover.supports_atomic returns False when no attrs are provided.""" + assert S3Mover.supports_atomic() is False + assert S3Mover.supports_atomic(attrs=None) is False + + +def test_s3_mover_supports_atomic_empty_attrs(): + """S3Mover.supports_atomic returns False when attrs have no relevant keys.""" + assert S3Mover.supports_atomic(attrs={}) is False + + +def test_s3_mover_supports_atomic_with_multipart(): + """S3Mover.supports_atomic returns True when s3_use_multipart is set.""" + assert S3Mover.supports_atomic(attrs={"s3_use_multipart": True}) is True + + +def test_s3_mover_supports_atomic_with_copy(): + """S3Mover.supports_atomic returns True when s3_use_copy is set.""" + assert S3Mover.supports_atomic(attrs={"s3_use_copy": True}) is True + + +def test_s3_mover_supports_atomic_with_both(): + """S3Mover.supports_atomic returns True when both flags are set.""" + assert S3Mover.supports_atomic(attrs={"s3_use_multipart": True, "s3_use_copy": True}) is True + + +def test_s3_mover_supports_atomic_false_values(): + """S3Mover.supports_atomic returns False when flags are explicitly False.""" + assert S3Mover.supports_atomic(attrs={"s3_use_multipart": False, "s3_use_copy": False}) is False diff --git a/trollmoves/tests/test_movers_use_tmp.py b/trollmoves/tests/test_movers_use_tmp.py index 86fb9d2..4b7cb1b 100644 --- a/trollmoves/tests/test_movers_use_tmp.py +++ b/trollmoves/tests/test_movers_use_tmp.py @@ -428,3 +428,71 @@ def test_move_it_use_tmp_ftp_scheme(mock_ftp_class, tmp_path): # finalize step: rename was called once from tmp to final mock_ftp.rename.assert_called_once_with(".upload.txt", "upload.txt") + + +# =========================================================================== +# Group I – supports_atomic classmethod on each mover class +# =========================================================================== + +def test_supports_atomic_file_mover(): + """FileMover always supports atomic transfers.""" + from trollmoves.movers import FileMover + assert FileMover.supports_atomic() is True + assert FileMover.supports_atomic(attrs={}) is True + + +def test_supports_atomic_ftp_mover(): + """FtpMover always supports atomic transfers.""" + from trollmoves.movers import FtpMover + assert FtpMover.supports_atomic() is True + + +def test_supports_atomic_scp_mover(): + """ScpMover always supports atomic transfers.""" + from trollmoves.movers import ScpMover + assert ScpMover.supports_atomic() is True + + +def test_supports_atomic_sftp_mover(): + """SftpMover always supports atomic transfers.""" + from trollmoves.movers import SftpMover + assert SftpMover.supports_atomic() is True + + +def test_supports_atomic_base_mover_returns_false(): + """The base Mover class returns False as a safe default.""" + from trollmoves.movers import Mover + assert Mover.supports_atomic() is False + assert Mover.supports_atomic(attrs={"use_tmp_on_transfer": True}) is False + + +# =========================================================================== +# Group J – move_it() falls back when supports_atomic returns False +# =========================================================================== + +def test_move_it_falls_back_when_atomic_not_supported(tmp_path, monkeypatch, caplog): + """When use_tmp_on_transfer=True but supports_atomic returns False, move_it + falls back to a direct transfer and logs an error.""" + import logging + + from trollmoves.movers import FileMover, move_it + + # Make FileMover report it does not support atomic so we can test the fallback + # without needing a custom protocol. + monkeypatch.setattr(FileMover, "supports_atomic", classmethod(lambda cls, attrs=None: False)) + + source = tmp_path / "source" / "data.txt" + source.parent.mkdir() + source.write_text("fallback content") + dest = tmp_path / "dest" / "data.txt" + + with caplog.at_level(logging.ERROR, logger="trollmoves.movers"): + move_it(str(source), str(dest), attrs={"use_tmp_on_transfer": True}) + + # Final file must exist (transfer succeeded via direct path) + assert dest.exists() + assert dest.read_text() == "fallback content" + # Tmp file must NOT exist + assert not (tmp_path / "dest" / ".data.txt").exists() + # An error must have been logged about the fallback + assert any("does not support atomic" in record.message for record in caplog.records) From 464b22b73c84bbdfc334bfe461a57e242d536121 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 Apr 2026 08:20:21 +0300 Subject: [PATCH 27/31] Flatten _get_tmp_destination() structure --- trollmoves/movers.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index e72feed..c5b6621 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -93,18 +93,21 @@ def _create_mover(mover_cls, pathname, new_dest, attrs, backup_targets=None, tmp def _get_tmp_destination(mover_cls, new_dest, attrs): use_tmp = bool(attrs and attrs.get("use_tmp_on_transfer")) - if use_tmp: - if not mover_cls.supports_atomic(attrs): - LOGGER.error( - "Mover '%s' does not support atomic transfers. " - "Falling back to transfer without temporary files.", - mover_cls.__name__, - ) - return None - tmp_prefix = attrs.get("tmp_prefix", ".") - tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) - return tmp_dest - return None + if not use_tmp: + return None + + if not mover_cls.supports_atomic(attrs): + LOGGER.error( + "Mover '%s' does not support atomic transfers. " + "Falling back to transfer without temporary files.", + mover_cls.__name__, + ) + return None + + tmp_prefix = attrs.get("tmp_prefix", ".") + tmp_dest = mover_cls.tmp_destination_for(new_dest, tmp_prefix) + + return tmp_dest def _copy(mover, new_dest, tmp_dest=None): From 2642483fa9bcf6517c8cc1991b607f45195637a3 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 Apr 2026 08:33:11 +0300 Subject: [PATCH 28/31] Remove unnecessary alias imports --- trollmoves/movers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index c5b6621..87c413f 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -482,7 +482,7 @@ def move(self): def copy(self): """Upload the file.""" - from paramiko import SSHException as _SSHException + from paramiko import SSHException from scp import SCPClient, SCPException ssh_connection = self.get_connection(self.destination.hostname, @@ -490,7 +490,7 @@ def copy(self): self._dest_username) try: scp = SCPClient(ssh_connection.get_transport()) - except (TypeError, _SSHException, OSError) as err: + except (TypeError, SSHException, OSError) as err: LOGGER.error("Failed to initiate SCPClient: %s", str(err)) ssh_connection.close() raise @@ -506,7 +506,7 @@ def copy(self): else: LOGGER.error("OSError in scp.put: %s", str(osex)) raise - except (SCPException, _SSHException) as err: + except (SCPException, SSHException) as err: LOGGER.error("Something went wrong with scp: %s", str(err)) LOGGER.error("Exception name %s", type(err).__name__) LOGGER.error("Exception args %s", str(err.args)) @@ -516,7 +516,7 @@ def copy(self): def finalize_atomic_transfer(self, tmp_destination, final_destination): """Finalize atomic transfer for SCP by performing remote rename via SFTP.""" - from paramiko import SSHException as _SSHException + from paramiko import SSHException ssh_connection = self.get_connection(self.destination.hostname, self.destination.port or 22, self._dest_username) @@ -529,7 +529,7 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): if sftp is not None: try: sftp.close() - except (_SSHException, OSError): + except (SSHException, OSError): pass self.destination = final_destination From a1fe077fee3e9f1cd4d324ac8be085d946b86d3f Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 Apr 2026 08:43:31 +0300 Subject: [PATCH 29/31] Refactor renaming in SCP/SFTP movers --- trollmoves/movers.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 87c413f..d488adb 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -516,24 +516,20 @@ def copy(self): def finalize_atomic_transfer(self, tmp_destination, final_destination): """Finalize atomic transfer for SCP by performing remote rename via SFTP.""" - from paramiko import SSHException ssh_connection = self.get_connection(self.destination.hostname, self.destination.port or 22, self._dest_username) - sftp = None - try: - sftp = ssh_connection.open_sftp() - ensure_final_directory_for_rename(sftp, final_destination.path) - sftp.rename(tmp_destination.path, final_destination.path) - finally: - if sftp is not None: - try: - sftp.close() - except (SSHException, OSError): - pass + _rename_over_sftp(ssh_connection, tmp_destination, final_destination) self.destination = final_destination +def _rename_over_sftp(ssh_connection, tmp_destination, final_destination): + """Rename tmp_destination to final_destination on remote host via SFTP.""" + with ssh_connection.open_sftp() as sftp: + ensure_final_directory_for_rename(sftp, final_destination.path) + sftp.rename(tmp_destination.path, final_destination.path) + + class SftpMover(Mover): """Move files over sftp.""" @@ -573,9 +569,8 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): username=self._dest_username, allow_agent=True, key_filename=self.attrs.get("ssh_private_key_file")) - with ssh.open_sftp() as sftp: - ensure_final_directory_for_rename(sftp, final_destination.path) - sftp.rename(tmp_destination.path, final_destination.path) + + _rename_over_sftp(ssh, tmp_destination, final_destination) self.destination = final_destination From 4952af1566a38ae42f70ef02cfee581d95a3f288 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 Apr 2026 09:14:11 +0300 Subject: [PATCH 30/31] Refactor opening connections --- trollmoves/movers.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index d488adb..5dbc8a3 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -200,24 +200,24 @@ def finalize_atomic_transfer(self, tmp_destination, final_destination): def get_connection(self, hostname, port, username=None): """Get the connection.""" with self.active_connection_lock: - LOGGER.debug("Destination username and passwd: %s %s", self._dest_username, self._dest_password) - LOGGER.debug("Getting connection to %s@%s:%s", username, hostname, port) - try: - connection, timer = self.active_connections[(hostname, port, username)] - if not self.is_connected(connection): - del self.active_connections[(hostname, port, username)] - LOGGER.debug("Resetting connection") - connection = self.open_connection() - timer.cancel() - except KeyError: - connection = self.open_connection() - + connection = self._get_connection(hostname, port, username) timer = CTimer(int(self.attrs.get("connection_uptime", 30)), self.delete_connection, (connection,)) timer.start() self.active_connections[(self.destination.hostname, port, username)] = connection, timer return connection + def _get_connection(self, hostname, port, username=None): + LOGGER.debug("Getting connection to %s@%s:%s", username, hostname, port) + if (hostname, port, username) in self.active_connections: + connection, timer = self.active_connections[(hostname, port, username)] + timer.cancel() + if self.is_connected(connection): + return connection + del self.active_connections[(hostname, port, username)] + LOGGER.debug("Resetting connection") + return self.open_connection() + def delete_connection(self, connection): """Delete active connection *connection*.""" with self.active_connection_lock: From b8f2439e46d491b57fc43227eb9fe948d675295b Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 Apr 2026 09:22:53 +0300 Subject: [PATCH 31/31] Refactor connection deletion --- trollmoves/movers.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/trollmoves/movers.py b/trollmoves/movers.py index 5dbc8a3..43c577d 100644 --- a/trollmoves/movers.py +++ b/trollmoves/movers.py @@ -231,11 +231,14 @@ def delete_connection(self, connection): try: self.close_connection(connection) finally: - for key, (current_connection, current_timer) in self.active_connections.items(): - if current_connection == connection: - del self.active_connections[key] - current_timer.cancel() - break + self._remove_connection_from_active_connections(connection) + + def _remove_connection_from_active_connections(self, connection): + for key, (current_connection, current_timer) in self.active_connections.items(): + if current_connection == connection: + del self.active_connections[key] + current_timer.cancel() + break class FileMover(Mover):