diff --git a/src/main/java/org/ecocean/api/bulk/BulkImporter.java b/src/main/java/org/ecocean/api/bulk/BulkImporter.java index 2388d7b60f..4cab1f072a 100644 --- a/src/main/java/org/ecocean/api/bulk/BulkImporter.java +++ b/src/main/java/org/ecocean/api/bulk/BulkImporter.java @@ -688,9 +688,21 @@ private void processRow(List fields) { if (bv == null) throw new RuntimeException("could not find fmap for key=" + maKey); if (bv.valueIsNull()) continue; MediaAsset ma = this.mediaAssetMap.get(bv.getValueString()); - if (ma == null) - throw new RuntimeException("could not find MediaAsset for maKey=" + maKey + - ", bv=" + bv.getValueString() + " in " + this.mediaAssetMap); + if (ma == null) { + // The referenced image has no MediaAsset — typically because it + // failed image validation at creation time (corrupt/unreadable; + // see AssetStore.isValidImage). Skip just this image so one bad + // file does not abort the whole import; the encounter still + // imports with its other (valid) images. Unlike the valueIsNull() + // skip above (an empty column), this column WAS occupied by an + // image, so we advance `offset` to consume its keyword/quality + // slot — otherwise a later valid image would inherit this + // corrupt image's positional metadata. + System.out.println("[WARN] processRow: skipping image with no MediaAsset (likely " + + "corrupt/unreadable) for maKey=" + maKey + ", value=" + bv.getValueString()); + offset++; + continue; + } Set kws = new HashSet(); if ((offset < kwFields.size()) && (kwFields.get(offset) != null)) kws.add(fmap.get(kwFields.get(offset)).getValueString()); diff --git a/src/main/java/org/ecocean/media/AssetStore.java b/src/main/java/org/ecocean/media/AssetStore.java index ffde682f18..6b65628769 100644 --- a/src/main/java/org/ecocean/media/AssetStore.java +++ b/src/main/java/org/ecocean/media/AssetStore.java @@ -22,7 +22,6 @@ import org.json.JSONObject; import java.awt.image.BufferedImage; -import java.io.EOFException; import java.io.FileInputStream; import java.io.InputStream; import java.nio.file.Files; @@ -684,10 +683,16 @@ public static boolean isValidImage(final File file) { System.out.println("WARNING: isValidImage(" + file + ") file not found"); return false; } catch (final IIOException e) { - if (e.getCause() instanceof EOFException) { - System.out.println("WARNING: isValidImage(" + file + ") is truncated[3]"); - return false; - } + // Any decode IIOException means the registered ImageReader cannot + // read this image (corrupt markers, bad scan data, or truncation). + // Such an image is not safe to send to the detection pipeline, where + // it can hang the decoder. Previously only EOFException-caused + // (truncation) returned false and every OTHER decode error fell + // through to `return true`, mis-classifying corrupt images such as + // "Unsupported marker type 0xNN" as valid. + System.out.println("WARNING: isValidImage(" + file + ") decode failed: " + + e.toString()); + return false; } catch (final IOException e) { System.out.println("WARNING: isValidImage(" + file + ") threw " + e.toString()); return false; @@ -696,7 +701,5 @@ public static boolean isValidImage(final File file) { digestInputStream.close(); } catch (Exception ex) {} } - System.out.println("INFO: isValidImage(" + file + ") fell thru[2] - success?"); - return true; } } diff --git a/src/test/java/org/ecocean/api/bulk/BulkImporterMissingAssetTest.java b/src/test/java/org/ecocean/api/bulk/BulkImporterMissingAssetTest.java new file mode 100644 index 0000000000..fd87978ac6 --- /dev/null +++ b/src/test/java/org/ecocean/api/bulk/BulkImporterMissingAssetTest.java @@ -0,0 +1,173 @@ +package org.ecocean; + +import org.ecocean.api.bulk.*; +import org.ecocean.media.AssetStore; +import org.ecocean.media.MediaAsset; +import org.ecocean.shepherd.core.Shepherd; +import org.ecocean.shepherd.core.ShepherdPMF; + +import javax.jdo.PersistenceManager; +import javax.jdo.PersistenceManagerFactory; +import javax.servlet.ServletException; + +import org.junit.jupiter.api.Test; + +import static org.junit.Assert.*; + +import org.json.JSONObject; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.ecocean.Keyword; + +import org.mockito.MockedConstruction; +import org.mockito.MockedStatic; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockConstruction; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + +/** + * Covers the BulkImporter.processRow() tolerance guard: a row referencing a + * mediaAsset whose MediaAsset is absent from the maMap (which happens when an + * image failed validation at creation time -- see AssetStore.isValidImage) + * must NOT abort the import. Instead the bad image is skipped and the encounter + * still imports with its remaining valid images. + */ +class BulkImporterMissingAssetTest { + PersistenceManagerFactory mockPMF; + PersistenceManager mockPM = mock(PersistenceManager.class); + + @Test void missingAssetIsSkippedNotThrown() + throws ServletException { + Occurrence occ = mock(Occurrence.class); + User user = mock(User.class); + + // A row with two images: index 0 is present in the maMap, index 1 is + // NOT (simulating an image that failed isValidImage and so never got a + // MediaAsset created for it). + Map row = new HashMap(); + row.put("Encounter.submitterID", "fakeSubmitterId"); + row.put("Encounter.year", 2000); + row.put("Encounter.genus", "genus"); + row.put("Encounter.specificEpithet", "specificEpithet"); + row.put("Encounter.mediaAsset0", "good.jpg"); + row.put("Encounter.mediaAsset1", "corrupt.jpg"); + + // maMap holds ONLY the valid image; "corrupt.jpg" is intentionally absent. + // A mocked store keeps the diagnostic MediaAsset.toString() (logged during + // the persistence loop) from NPEing on an otherwise-bare asset. + MediaAsset goodMA = new MediaAsset(0, mock(AssetStore.class), null); + Map maMap = new HashMap(); + maMap.put("good.jpg", goodMA); + + try (MockedConstruction mockShepherd = mockConstruction(Shepherd.class, + (mockSh, context) -> { + when(mockSh.getContext()).thenReturn("context0"); + when(mockSh.getPM()).thenReturn(mockPM); + when(mockSh.getUser(any(String.class))).thenReturn(user); + when(mockSh.getOrCreateOccurrence(any(String.class))).thenReturn(occ); + when(mockSh.getOrCreateOccurrence(null)).thenReturn(occ); + when(mockSh.isValidTaxonomyName(any(String.class))).thenReturn(true); + })) { + try (MockedStatic mockService = mockStatic(ShepherdPMF.class)) { + mockService.when(() -> ShepherdPMF.getPMF(any(String.class))).thenReturn(mockPMF); + + Shepherd myShepherd = new Shepherd("context0"); + Map validated = BulkImportUtil.validateRow( + new JSONObject(row), myShepherd); + List > allRows = new ArrayList >(); + allRows.add(validated); + + BulkImporter imp = new BulkImporter("test-missing-asset", allRows, maMap, null, + myShepherd); + // Before the guard this threw RuntimeException (wrapped as + // ServletException) on the missing "corrupt.jpg" asset. + imp.createImport(); + + assertEquals(1, Util.collectionSize(imp.getEncounters())); + Encounter enc = imp.getEncounters().get(0); + // Only the present ("good.jpg") image yields an annotation; the + // missing one was skipped, not fatal. + assertEquals(1, Util.collectionSize(enc.getAnnotations())); + assertEquals(goodMA, enc.getAnnotations().get(0).getMediaAsset()); + } + } + } + + /** + * Alignment guard: when the CORRUPT/missing image is at index 0 and a VALID + * image is at index 1, the keyword/quality lists are POSITIONAL, so the + * surviving valid image must read keyword1/quality1 -- NOT keyword0/quality0 + * (the corrupt image's metadata). This proves processRow() advances `offset` + * when it skips a missing MediaAsset, consuming the corrupt image's column + * slot so later images stay column-aligned. + */ + @Test void corruptFirstKeepsKeywordQualityAligned() + throws ServletException { + Occurrence occ = mock(Occurrence.class); + User user = mock(User.class); + + // index 0 = corrupt (absent from maMap) with its own keyword/quality; + // index 1 = the valid image with DISTINCT keyword/quality. + Map row = new HashMap(); + row.put("Encounter.submitterID", "fakeSubmitterId"); + row.put("Encounter.year", 2000); + row.put("Encounter.genus", "genus"); + row.put("Encounter.specificEpithet", "specificEpithet"); + row.put("Encounter.mediaAsset0", "corrupt.jpg"); + row.put("Encounter.keyword0", "corruptKeyword"); + row.put("Encounter.quality0", 1.0); + row.put("Encounter.mediaAsset1", "good.jpg"); + row.put("Encounter.keyword1", "goodKeyword"); + row.put("Encounter.quality1", 5.0); + + MediaAsset goodMA = new MediaAsset(0, mock(AssetStore.class), null); + Map maMap = new HashMap(); + maMap.put("good.jpg", goodMA); // "corrupt.jpg" intentionally absent + + try (MockedConstruction mockShepherd = mockConstruction(Shepherd.class, + (mockSh, context) -> { + when(mockSh.getContext()).thenReturn("context0"); + when(mockSh.getPM()).thenReturn(mockPM); + when(mockSh.getUser(any(String.class))).thenReturn(user); + when(mockSh.getOrCreateOccurrence(any(String.class))).thenReturn(occ); + when(mockSh.getOrCreateOccurrence(null)).thenReturn(occ); + when(mockSh.isValidTaxonomyName(any(String.class))).thenReturn(true); + })) { + try (MockedStatic mockService = mockStatic(ShepherdPMF.class)) { + mockService.when(() -> ShepherdPMF.getPMF(any(String.class))).thenReturn(mockPMF); + + Shepherd myShepherd = new Shepherd("context0"); + Map validated = BulkImportUtil.validateRow( + new JSONObject(row), myShepherd); + List > allRows = new ArrayList >(); + allRows.add(validated); + + BulkImporter imp = new BulkImporter("test-corrupt-first", allRows, maMap, null, + myShepherd); + imp.createImport(); + + assertEquals(1, Util.collectionSize(imp.getEncounters())); + Encounter enc = imp.getEncounters().get(0); + // Only the present ("good.jpg") image yields an annotation. + assertEquals(1, Util.collectionSize(enc.getAnnotations())); + Annotation ann = enc.getAnnotations().get(0); + assertEquals(goodMA, ann.getMediaAsset()); + + // The CORE assertion: the surviving valid image must carry its + // OWN positional metadata (keyword1/quality1), not the corrupt + // image's (keyword0/quality0). Before the offset++ fix it would + // inherit "corruptKeyword"/1.0. + assertEquals(Double.valueOf(5.0), ann.getQuality()); + List kws = goodMA.getKeywords(); + assertEquals(1, Util.collectionSize(kws)); + assertEquals("goodKeyword", kws.get(0).getReadableName()); + } + } + } +} diff --git a/src/test/java/org/ecocean/media/AssetStoreIsValidImageTest.java b/src/test/java/org/ecocean/media/AssetStoreIsValidImageTest.java new file mode 100644 index 0000000000..d691818143 --- /dev/null +++ b/src/test/java/org/ecocean/media/AssetStoreIsValidImageTest.java @@ -0,0 +1,54 @@ +package org.ecocean.media; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.awt.image.BufferedImage; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.nio.file.Files; +import javax.imageio.ImageIO; + +import org.junit.jupiter.api.Test; + +public class AssetStoreIsValidImageTest { + + /** Write a small, structurally valid baseline JPEG to a temp file. */ + private static byte[] validJpegBytes() throws Exception { + BufferedImage img = new BufferedImage(64, 64, BufferedImage.TYPE_INT_RGB); + for (int x = 0; x < 64; x++) + for (int y = 0; y < 64; y++) img.setRGB(x, y, (x * 4) << 16 | (y * 4) << 8); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ImageIO.write(img, "jpg", bos); + return bos.toByteArray(); + } + + private static File writeTemp(byte[] bytes, String suffix) throws Exception { + File f = File.createTempFile("isvalidimg", suffix); + f.deleteOnExit(); + Files.write(f.toPath(), bytes); + return f; + } + + @Test + public void validJpegIsValid() throws Exception { + File f = writeTemp(validJpegBytes(), ".jpg"); + assertTrue(AssetStore.isValidImage(f), "a clean JPEG must validate as a valid image"); + } + + @Test + public void corruptMarkerJpegIsInvalid() throws Exception { + // Inject an unsupported marker (0xFF 0x99) into the scan data, after the + // header, to reproduce the real-world failure mode: ImageIO.read() throws + // IIOException "Unsupported marker type 0x99" (cause is NOT EOFException). + byte[] good = validJpegBytes(); + int spliceAt = Math.max(20, good.length - 40); // inside the entropy-coded data + good[spliceAt] = (byte) 0xFF; + good[spliceAt + 1] = (byte) 0x99; + good[spliceAt + 2] = (byte) 0xFF; + good[spliceAt + 3] = (byte) 0x99; + File f = writeTemp(good, ".jpg"); + assertFalse(AssetStore.isValidImage(f), + "a JPEG whose decode throws a non-EOF IIOException must be rejected as invalid"); + } +}