Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions src/main/java/org/ecocean/api/bulk/BulkImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -688,9 +688,21 @@ private void processRow(List<BulkValidator> fields) {
if (bv == null) throw new RuntimeException("could not find fmap for key=" + maKey);
if (bv.valueIsNull()) continue;
MediaAsset ma = this.mediaAssetMap.get(bv.getValueString());
if (ma == null)
throw new RuntimeException("could not find MediaAsset for maKey=" + maKey +
", bv=" + bv.getValueString() + " in " + this.mediaAssetMap);
if (ma == null) {
// The referenced image has no MediaAsset — typically because it
// failed image validation at creation time (corrupt/unreadable;
// see AssetStore.isValidImage). Skip just this image so one bad
// file does not abort the whole import; the encounter still
// imports with its other (valid) images. Unlike the valueIsNull()
// skip above (an empty column), this column WAS occupied by an
// image, so we advance `offset` to consume its keyword/quality
// slot — otherwise a later valid image would inherit this
// corrupt image's positional metadata.
System.out.println("[WARN] processRow: skipping image with no MediaAsset (likely "
+ "corrupt/unreadable) for maKey=" + maKey + ", value=" + bv.getValueString());
offset++;
continue;
}
Set<String> kws = new HashSet<String>();
if ((offset < kwFields.size()) && (kwFields.get(offset) != null))
kws.add(fmap.get(kwFields.get(offset)).getValueString());
Expand Down
17 changes: 10 additions & 7 deletions src/main/java/org/ecocean/media/AssetStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.json.JSONObject;

import java.awt.image.BufferedImage;
import java.io.EOFException;
import java.io.FileInputStream;
import java.io.InputStream;
import java.nio.file.Files;
Expand Down Expand Up @@ -684,10 +683,16 @@ public static boolean isValidImage(final File file) {
System.out.println("WARNING: isValidImage(" + file + ") file not found");
return false;
} catch (final IIOException e) {
if (e.getCause() instanceof EOFException) {
System.out.println("WARNING: isValidImage(" + file + ") is truncated[3]");
return false;
}
// Any decode IIOException means the registered ImageReader cannot
// read this image (corrupt markers, bad scan data, or truncation).
// Such an image is not safe to send to the detection pipeline, where
// it can hang the decoder. Previously only EOFException-caused
// (truncation) returned false and every OTHER decode error fell
// through to `return true`, mis-classifying corrupt images such as
// "Unsupported marker type 0xNN" as valid.
System.out.println("WARNING: isValidImage(" + file + ") decode failed: "
+ e.toString());
return false;
} catch (final IOException e) {
System.out.println("WARNING: isValidImage(" + file + ") threw " + e.toString());
return false;
Expand All @@ -696,7 +701,5 @@ public static boolean isValidImage(final File file) {
digestInputStream.close();
} catch (Exception ex) {}
}
System.out.println("INFO: isValidImage(" + file + ") fell thru[2] - success?");
return true;
}
}
173 changes: 173 additions & 0 deletions src/test/java/org/ecocean/api/bulk/BulkImporterMissingAssetTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package org.ecocean;

import org.ecocean.api.bulk.*;
import org.ecocean.media.AssetStore;
import org.ecocean.media.MediaAsset;
import org.ecocean.shepherd.core.Shepherd;
import org.ecocean.shepherd.core.ShepherdPMF;

import javax.jdo.PersistenceManager;
import javax.jdo.PersistenceManagerFactory;
import javax.servlet.ServletException;

import org.junit.jupiter.api.Test;

import static org.junit.Assert.*;

import org.json.JSONObject;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.ecocean.Keyword;

import org.mockito.MockedConstruction;
import org.mockito.MockedStatic;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.mockConstruction;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.when;

/**
* Covers the BulkImporter.processRow() tolerance guard: a row referencing a
* mediaAsset whose MediaAsset is absent from the maMap (which happens when an
* image failed validation at creation time -- see AssetStore.isValidImage)
* must NOT abort the import. Instead the bad image is skipped and the encounter
* still imports with its remaining valid images.
*/
class BulkImporterMissingAssetTest {
PersistenceManagerFactory mockPMF;
PersistenceManager mockPM = mock(PersistenceManager.class);

@Test void missingAssetIsSkippedNotThrown()
throws ServletException {
Occurrence occ = mock(Occurrence.class);
User user = mock(User.class);

// A row with two images: index 0 is present in the maMap, index 1 is
// NOT (simulating an image that failed isValidImage and so never got a
// MediaAsset created for it).
Map<String, Object> row = new HashMap<String, Object>();
row.put("Encounter.submitterID", "fakeSubmitterId");
row.put("Encounter.year", 2000);
row.put("Encounter.genus", "genus");
row.put("Encounter.specificEpithet", "specificEpithet");
row.put("Encounter.mediaAsset0", "good.jpg");
row.put("Encounter.mediaAsset1", "corrupt.jpg");

// maMap holds ONLY the valid image; "corrupt.jpg" is intentionally absent.
// A mocked store keeps the diagnostic MediaAsset.toString() (logged during
// the persistence loop) from NPEing on an otherwise-bare asset.
MediaAsset goodMA = new MediaAsset(0, mock(AssetStore.class), null);
Map<String, MediaAsset> maMap = new HashMap<String, MediaAsset>();
maMap.put("good.jpg", goodMA);

try (MockedConstruction<Shepherd> mockShepherd = mockConstruction(Shepherd.class,
(mockSh, context) -> {
when(mockSh.getContext()).thenReturn("context0");
when(mockSh.getPM()).thenReturn(mockPM);
when(mockSh.getUser(any(String.class))).thenReturn(user);
when(mockSh.getOrCreateOccurrence(any(String.class))).thenReturn(occ);
when(mockSh.getOrCreateOccurrence(null)).thenReturn(occ);
when(mockSh.isValidTaxonomyName(any(String.class))).thenReturn(true);
})) {
try (MockedStatic<ShepherdPMF> mockService = mockStatic(ShepherdPMF.class)) {
mockService.when(() -> ShepherdPMF.getPMF(any(String.class))).thenReturn(mockPMF);

Shepherd myShepherd = new Shepherd("context0");
Map<String, Object> validated = BulkImportUtil.validateRow(
new JSONObject(row), myShepherd);
List<Map<String, Object> > allRows = new ArrayList<Map<String, Object> >();
allRows.add(validated);

BulkImporter imp = new BulkImporter("test-missing-asset", allRows, maMap, null,
myShepherd);
// Before the guard this threw RuntimeException (wrapped as
// ServletException) on the missing "corrupt.jpg" asset.
imp.createImport();

assertEquals(1, Util.collectionSize(imp.getEncounters()));
Encounter enc = imp.getEncounters().get(0);
// Only the present ("good.jpg") image yields an annotation; the
// missing one was skipped, not fatal.
assertEquals(1, Util.collectionSize(enc.getAnnotations()));
assertEquals(goodMA, enc.getAnnotations().get(0).getMediaAsset());
}
}
}

/**
* Alignment guard: when the CORRUPT/missing image is at index 0 and a VALID
* image is at index 1, the keyword/quality lists are POSITIONAL, so the
* surviving valid image must read keyword1/quality1 -- NOT keyword0/quality0
* (the corrupt image's metadata). This proves processRow() advances `offset`
* when it skips a missing MediaAsset, consuming the corrupt image's column
* slot so later images stay column-aligned.
*/
@Test void corruptFirstKeepsKeywordQualityAligned()
throws ServletException {
Occurrence occ = mock(Occurrence.class);
User user = mock(User.class);

// index 0 = corrupt (absent from maMap) with its own keyword/quality;
// index 1 = the valid image with DISTINCT keyword/quality.
Map<String, Object> row = new HashMap<String, Object>();
row.put("Encounter.submitterID", "fakeSubmitterId");
row.put("Encounter.year", 2000);
row.put("Encounter.genus", "genus");
row.put("Encounter.specificEpithet", "specificEpithet");
row.put("Encounter.mediaAsset0", "corrupt.jpg");
row.put("Encounter.keyword0", "corruptKeyword");
row.put("Encounter.quality0", 1.0);
row.put("Encounter.mediaAsset1", "good.jpg");
row.put("Encounter.keyword1", "goodKeyword");
row.put("Encounter.quality1", 5.0);

MediaAsset goodMA = new MediaAsset(0, mock(AssetStore.class), null);
Map<String, MediaAsset> maMap = new HashMap<String, MediaAsset>();
maMap.put("good.jpg", goodMA); // "corrupt.jpg" intentionally absent

try (MockedConstruction<Shepherd> mockShepherd = mockConstruction(Shepherd.class,
(mockSh, context) -> {
when(mockSh.getContext()).thenReturn("context0");
when(mockSh.getPM()).thenReturn(mockPM);
when(mockSh.getUser(any(String.class))).thenReturn(user);
when(mockSh.getOrCreateOccurrence(any(String.class))).thenReturn(occ);
when(mockSh.getOrCreateOccurrence(null)).thenReturn(occ);
when(mockSh.isValidTaxonomyName(any(String.class))).thenReturn(true);
})) {
try (MockedStatic<ShepherdPMF> mockService = mockStatic(ShepherdPMF.class)) {
mockService.when(() -> ShepherdPMF.getPMF(any(String.class))).thenReturn(mockPMF);

Shepherd myShepherd = new Shepherd("context0");
Map<String, Object> validated = BulkImportUtil.validateRow(
new JSONObject(row), myShepherd);
List<Map<String, Object> > allRows = new ArrayList<Map<String, Object> >();
allRows.add(validated);

BulkImporter imp = new BulkImporter("test-corrupt-first", allRows, maMap, null,
myShepherd);
imp.createImport();

assertEquals(1, Util.collectionSize(imp.getEncounters()));
Encounter enc = imp.getEncounters().get(0);
// Only the present ("good.jpg") image yields an annotation.
assertEquals(1, Util.collectionSize(enc.getAnnotations()));
Annotation ann = enc.getAnnotations().get(0);
assertEquals(goodMA, ann.getMediaAsset());

// The CORE assertion: the surviving valid image must carry its
// OWN positional metadata (keyword1/quality1), not the corrupt
// image's (keyword0/quality0). Before the offset++ fix it would
// inherit "corruptKeyword"/1.0.
assertEquals(Double.valueOf(5.0), ann.getQuality());
List<Keyword> kws = goodMA.getKeywords();
assertEquals(1, Util.collectionSize(kws));
assertEquals("goodKeyword", kws.get(0).getReadableName());
}
}
}
}
54 changes: 54 additions & 0 deletions src/test/java/org/ecocean/media/AssetStoreIsValidImageTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.ecocean.media;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.file.Files;
import javax.imageio.ImageIO;

import org.junit.jupiter.api.Test;

public class AssetStoreIsValidImageTest {

/** Write a small, structurally valid baseline JPEG to a temp file. */
private static byte[] validJpegBytes() throws Exception {
BufferedImage img = new BufferedImage(64, 64, BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < 64; x++)
for (int y = 0; y < 64; y++) img.setRGB(x, y, (x * 4) << 16 | (y * 4) << 8);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ImageIO.write(img, "jpg", bos);
return bos.toByteArray();
}

private static File writeTemp(byte[] bytes, String suffix) throws Exception {
File f = File.createTempFile("isvalidimg", suffix);
f.deleteOnExit();
Files.write(f.toPath(), bytes);
return f;
}

@Test
public void validJpegIsValid() throws Exception {
File f = writeTemp(validJpegBytes(), ".jpg");
assertTrue(AssetStore.isValidImage(f), "a clean JPEG must validate as a valid image");
}

@Test
public void corruptMarkerJpegIsInvalid() throws Exception {
// Inject an unsupported marker (0xFF 0x99) into the scan data, after the
// header, to reproduce the real-world failure mode: ImageIO.read() throws
// IIOException "Unsupported marker type 0x99" (cause is NOT EOFException).
byte[] good = validJpegBytes();
int spliceAt = Math.max(20, good.length - 40); // inside the entropy-coded data
good[spliceAt] = (byte) 0xFF;
good[spliceAt + 1] = (byte) 0x99;
good[spliceAt + 2] = (byte) 0xFF;
good[spliceAt + 3] = (byte) 0x99;
File f = writeTemp(good, ".jpg");
assertFalse(AssetStore.isValidImage(f),
"a JPEG whose decode throws a non-EOF IIOException must be rejected as invalid");
}
}
Loading