From e4f09ac93d73d1c2a092b37f3f6f811545ac6feb Mon Sep 17 00:00:00 2001 From: David Stone Date: Tue, 31 Mar 2026 16:30:06 +0100 Subject: [PATCH 1/2] Change copyS3File to handle files that have already been persisted --- src/repositories/file-repository.js | 2 +- src/services/file-service.js | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/repositories/file-repository.js b/src/repositories/file-repository.js index a24f221..5a43ee8 100644 --- a/src/repositories/file-repository.js +++ b/src/repositories/file-repository.js @@ -62,7 +62,7 @@ export async function getByFileId(fileId) { /** * Updates the S3 Keys for a given set of files - * @param {{ fileId: string; s3Bucket: string; oldS3Key: string; newS3Key: string; }[]} updateFiles + * @param {{ fileId: string; s3Bucket: string; oldS3Key: string | undefined; newS3Key: string; }[]} updateFiles * @param {ClientSession} session */ export async function updateS3Keys(updateFiles, session) { diff --git a/src/services/file-service.js b/src/services/file-service.js index 352a655..0a09ee2 100644 --- a/src/services/file-service.js +++ b/src/services/file-service.js @@ -174,7 +174,7 @@ export async function persistFiles(files, persistedRetrievalKey) { const session = mongoClient.startSession() /** - * @type {Promise<{ fileId: string, s3Bucket: string; oldS3Key: string; newS3Key: string; }>[]} + * @type {Promise<{ fileId: string, s3Bucket: string; oldS3Key: string | undefined; newS3Key: string; }>[]} */ let updateFiles = [] @@ -232,7 +232,7 @@ export async function persistFiles(files, persistedRetrievalKey) { /** * Deletes old files in staging based on the provided keys. - * @param {Promise<{ fileId: string, s3Bucket: string; oldS3Key: string; newS3Key: string; }>[]} keys - an array of files to handle + * @param {Promise<{ fileId: string, s3Bucket: string; oldS3Key: string | undefined; newS3Key: string; }>[]} keys - an array of files to handle * @param {('oldS3Key'|'newS3Key')} lookupKey - the key to use to look up the S3 key * @param {S3Client} client - S3 client */ @@ -241,6 +241,7 @@ async function deleteOldFiles(keys, lookupKey, client) { const filteredKeys = settledKeys .filter((result) => result.status === 'fulfilled') .map((result) => result.value) + .filter((value) => value.oldS3Key !== undefined) // Filter out any undefined results (files that didn't need copying) // AWS do have the DeleteObjects command instead which would be preferable. However, S3 keys // are stored on a per-document basis not a global and so we can't batch these up in case of any @@ -271,7 +272,16 @@ async function copyS3File(fileId, initiatedRetrievalKey, client) { } if (fileStatus.s3Key.startsWith(loadedPrefix)) { - throw Boom.badRequest(`File ID ${fileId} has already been persisted`) + const msg = `File ${fileId} is already in the loaded directory, no need to copy` + + logger.info(`[copyS3File] ${msg}`) + + return { + fileId, + s3Bucket: fileStatus.s3Bucket, + oldS3Key: undefined, // Marker to indicate no copy was needed + newS3Key: fileStatus.s3Key + } } const oldS3Key = fileStatus.s3Key From 167f7318ea69c7ac7e3788509a22b872e50859ec Mon Sep 17 00:00:00 2001 From: David Stone Date: Tue, 31 Mar 2026 16:47:01 +0100 Subject: [PATCH 2/2] Add tests to copyS3File to handle files that have already been persisted --- src/services/file-service.test.js | 35 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/services/file-service.test.js b/src/services/file-service.test.js index 92c9be2..af56b2a 100644 --- a/src/services/file-service.test.js +++ b/src/services/file-service.test.js @@ -886,7 +886,7 @@ describe('Files service', () => { }) }) - it('should not allow a previously extended file to be extended again', async () => { + it('should allow a previously extended file to be extended again', async () => { /** @type {FormFileUploadStatus} */ const dummyData = { ...successfulFile, @@ -897,20 +897,25 @@ describe('Files service', () => { jest.mocked(verify).mockResolvedValueOnce(true) jest.mocked(repository.getByFileId).mockResolvedValueOnce(dummyData) - await expect( - persistFiles( - [ - { - fileId: dummyData.fileId, - initiatedRetrievalKey: dummyData.retrievalKey - } - ], - dummyData.retrievalKey - ) - ).rejects.toThrow( - Boom.badRequest( - `File ID ${dummyData.fileId} has already been persisted` - ) + jest.mocked(hash).mockResolvedValueOnce('caseSensitiveHash') + jest.mocked(verify).mockResolvedValueOnce(true) + + await persistFiles( + [ + { + fileId: dummyData.fileId, + initiatedRetrievalKey: dummyData.retrievalKey + } + ], + dummyData.retrievalKey + ) + + expect(hash).toHaveBeenCalledWith(dummyData.retrievalKey.toLowerCase()) + expect(repository.updateRetrievalKeys).toHaveBeenCalledWith( + [dummyData.fileId], + 'caseSensitiveHash', + false, + expect.any(Object) ) })