From 6bc77f242bf2461819cca78006c447f249fb710c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Thu, 24 Apr 2025 17:16:48 +0200 Subject: [PATCH 1/8] Enable -Yexplicit-nulls and import scala.language.unsafeNulls where needed --- .../cp/meta/core/data/JsonSupport.scala | 2 +- .../nateko/cp/meta/KmlGeoJsonWorkbench.scala | 128 ------------------ 2 files changed, 1 insertion(+), 129 deletions(-) delete mode 100644 src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala diff --git a/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala b/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala index b6a639bae..c31da684c 100644 --- a/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala +++ b/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala @@ -76,7 +76,7 @@ object JsonSupport extends CommonJsonSupport: case c: Circle => c.toJson case p: Pin => p.toJson case jsgf: FeatureWithGeoJson => jsgf.toJson - vanilla.pluss(TypeField -> geo.getClass.getSimpleName) + vanilla.pluss(TypeField -> geo.getClass.getSimpleName.nn) def read(value: JsValue): GeoFeature = value match case JsObject(fields) => diff --git a/src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala b/src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala deleted file mode 100644 index 7bca946a7..000000000 --- a/src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala +++ /dev/null @@ -1,128 +0,0 @@ -package se.lu.nateko.cp.meta - -import scala.language.unsafeNulls - -import com.scalakml.io.KmzFileReader -import com.scalakml.kml.* -import se.lu.nateko.cp.meta.core.data.{Circle, FeatureCollection, GeoFeature, GeoJson, Polygon as GeoPolygon, Position} -import se.lu.nateko.cp.meta.core.etcupload.StationId -import spray.json.{JsNull, JsValue} - -import java.io.File -import java.net.{URI, URL} -import scala.io.Source - -object KmlGeoJsonWorkbench { - - val workDir = "/home/oleg/Downloads/ETC_kmz/" - - def saveKmzs = { - import sys.process.* - for((id, url) <- getKmzUrls){ - (url #> new File(workDir + id.id + ".kmz")).!! - } - } - - def parseKmzs: Iterable[(StationId, JsValue)] = { - new File(workDir).listFiles().map{file => - val id = StationId.unapply(file.getName.stripSuffix(".kmz")).get - id -> getGeoJson(file) - }.sortBy(_._1.id) - } - - def getKmzUrls: Iterable[(StationId, URL)] = { - val lines = Source.fromURL("http://gaia.agraria.unitus.it:89/cpmeta?type=station").getLines() - val header = lines.next().split("\t", -1) - val idIdx = header.indexOf("SITE_ID") - val kmzIdx = header.indexOf("URL_KMZ") - lines.map{line => - val cells = line.split("\t", -1) - val urlBase = cells(kmzIdx).trim - StationId.unapply(cells(idIdx).trim).filterNot(_ => urlBase.isEmpty).map{ - _ -> URI(urlBase + "/download").toURL - } - }.flatten.toIndexedSeq - } - - def getGeoJson(kmz: File): JsValue = { - val areas: Seq[GeoFeature] = new KmzFileReader() - .getKmlFromKmzFile(kmz).flatten.flatMap(_.feature) - .collect{ - case f: Folder => f.features - }.flatten.collect{ - case d: Document if isRelevantDoc(d) => d.features - }.flatten.collect{ - case f: Folder => f.features - case pm: Placemark => Seq(pm) - }.flatten.collect{ - case pm: Placemark => - val lbl = pm.featurePart.name.map(_.trim) - pm.geometry.toSeq.collect{ - case poly: Polygon => processPolygon(poly) - case p: Point => p.coordinates.collect{ - case Coordinate(Some(lon), Some(lat), _) => Position.ofLatLon(lat, lon) - } - }.flatten.map(_.withOptLabel(lbl)) - }.flatten.toList - - areas match{ - case Nil => JsNull - case feat :: Nil => GeoJson.fromFeature(feat) - case multi => GeoJson.fromFeature(FeatureCollection(multi, None, None)) - } - - } - - private def processPolygon(poly: Polygon): Option[GeoFeature] = { - poly.outerBoundaryIs.flatMap(_.linearRing).flatMap(_.coordinates).map{coords => - - val posOriginal = coords.collect{ - case Coordinate(Some(lon), Some(lat), altOpt) => Position(lat, lon, altOpt.filterNot(_ == 0).map(_.toFloat), None, None) - } - - val positions = (if(areClockwise(posOriginal)) posOriginal.reverse else posOriginal).dropRight(1) - - getCircle(positions).getOrElse(GeoPolygon(positions, None, None)) - } - } - - def isRelevantDoc(d: Document): Boolean = { - d.featurePart.name.fold(false){n => - n.trim == "CP areas" || - n.trim == "Target area" || - //n.contains("_LCT") || - //n.contains("_CP") || - //n.contains("(CPs)") || - n.contains("_TA") - } - } - - def areClockwise(pos: Seq[Position]): Boolean = { - pos.sliding(2, 1).map{ps => (ps(1).lon - ps(0).lon) * (ps(1).lat + ps(0).lat)}.sum >= 0 - } - - def getCircle(pos: Seq[Position]): Option[Circle] = if(pos.size < 8) None else{ - val n = pos.size - - val centerLat = pos.map(_.lat).sum / n - val centerLon = pos.map(_.lon).sum / n - val rLon = Math.cos(Math.toRadians(centerLat)) - - val centerDists = pos.map{ p => //good approximation only for small distances - val dlat = Math.toRadians(p.lat - centerLat) - val dlon = Math.toRadians(p.lon - centerLon) * rLon - Math.sqrt(dlat * dlat + dlon * dlon) - } - - val averDist = centerDists.sum / n - val deviations = centerDists.map(dist => Math.abs(averDist - dist)) - val maxDeviation = deviations.max / averDist - - if(maxDeviation < 0.03) - Some(Circle(Position.ofLatLon(centerLat, centerLon), (averDist * 6371000).toFloat, None, None)) - else - None - } - - -} From a0aee078c53fe86657c7c7ddc7fe449ba6ebb427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 16 May 2025 16:38:02 +0200 Subject: [PATCH 2/8] Fix forEach callbacks in HierarchicalBitmap --- .../meta/core/algo/HierarchicalBitmap.scala | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala b/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala index 1dd2546a6..82891da45 100644 --- a/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala +++ b/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala @@ -4,6 +4,7 @@ import scala.language.unsafeNulls import org.roaringbitmap.buffer.ImmutableRoaringBitmap import org.roaringbitmap.buffer.MutableRoaringBitmap +import org.roaringbitmap.IntConsumer import java.io.DataInput import java.io.DataOutput @@ -51,7 +52,11 @@ class HierarchicalBitmap[K](val depth: Int, val coord: Option[Coord])(using geo: if(!seenDifferentKeys) assessDiversityOfKeys(key) if children.isEmpty && seenDifferentKeys && (n >= geo.spilloverThreshold) - then values.forEach{v => addToChild(geo.keyLookup(v), v)} + then values.forEach(new IntConsumer { + def accept(v: Int): Unit = { + addToChild(geo.keyLookup(v), v) + } + }) !wasPresent @@ -136,7 +141,12 @@ class HierarchicalBitmap[K](val depth: Int, val coord: Option[Coord])(using geo: val res = if(seenDifferentKeys && amount > 1){ val list = new ju.ArrayList[Int](amount) - filtered.forEach((i: Int) => {list.add(i);()}) + filtered.forEach(new IntConsumer { + def accept(i: Int): Unit = { + list.add(i); + } + }) + list.sort(iter.valComp) list.iterator.asScala } else @@ -187,10 +197,13 @@ class HierarchicalBitmap[K](val depth: Int, val coord: Option[Coord])(using geo: } } else { val filtered = emptyBitmap - values.forEach((v: Int) => { - val key = geo.keyLookup(v) - if(filterKey(key, req)) filtered.add(v) + values.forEach(new IntConsumer{ + def accept(v: Int) : Unit = { + val key = geo.keyLookup(v) + if(filterKey(key, req)) filtered.add(v) + } }) + //println(s"seen different keys, got ${filtered.getCardinality} results") filtered } From 33ff717b96de5f31e29ddfbdd70e830adb1ef788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 16 May 2025 16:39:47 +0200 Subject: [PATCH 3/8] Fix forEach callback in GeoIndex --- .../nateko/cp/meta/services/sparql/magic/GeoIndex.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala index 0ef281b61..fa7d52f03 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala @@ -5,6 +5,7 @@ import scala.language.unsafeNulls import org.locationtech.jts.algorithm.hull.ConcaveHull import org.locationtech.jts.geom.{Envelope, Geometry, GeometryCollection, GeometryFactory} import org.roaringbitmap.buffer.{ImmutableRoaringBitmap, MutableRoaringBitmap} +import org.roaringbitmap.IntConsumer import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -139,8 +140,11 @@ class DenseCluster(val area: Geometry, objectIds: MutableRoaringBitmap) extends else val currentDataCovs = new ArrayBuffer[DataObjCov]() - objectIds.forEach: objId => - currentDataCovs.addOne(DataObjCov(objId, area)) + objectIds.forEach(new IntConsumer{ + def accept(objId: Int) = { + currentDataCovs.addOne(DataObjCov(objId, area)) + } + }) currentDataCovs.addOne(dobjCov) From 63b7f7766b19ba2e954eea6f48484dea6e4a8fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 16 May 2025 17:29:56 +0200 Subject: [PATCH 4/8] Remove accidental `.nn` change --- .../main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala b/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala index c31da684c..b6a639bae 100644 --- a/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala +++ b/core/src/main/scala/se/lu/nateko/cp/meta/core/data/JsonSupport.scala @@ -76,7 +76,7 @@ object JsonSupport extends CommonJsonSupport: case c: Circle => c.toJson case p: Pin => p.toJson case jsgf: FeatureWithGeoJson => jsgf.toJson - vanilla.pluss(TypeField -> geo.getClass.getSimpleName.nn) + vanilla.pluss(TypeField -> geo.getClass.getSimpleName) def read(value: JsValue): GeoFeature = value match case JsObject(fields) => From f141f81b2c8dec1f81b0cca466e501740de9868b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 16 May 2025 17:31:33 +0200 Subject: [PATCH 5/8] Add accidentally removed file --- .../nateko/cp/meta/KmlGeoJsonWorkbench.scala | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala diff --git a/src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala b/src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala new file mode 100644 index 000000000..7bca946a7 --- /dev/null +++ b/src/test/scala/se/lu/nateko/cp/meta/KmlGeoJsonWorkbench.scala @@ -0,0 +1,128 @@ +package se.lu.nateko.cp.meta + +import scala.language.unsafeNulls + +import com.scalakml.io.KmzFileReader +import com.scalakml.kml.* +import se.lu.nateko.cp.meta.core.data.{Circle, FeatureCollection, GeoFeature, GeoJson, Polygon as GeoPolygon, Position} +import se.lu.nateko.cp.meta.core.etcupload.StationId +import spray.json.{JsNull, JsValue} + +import java.io.File +import java.net.{URI, URL} +import scala.io.Source + +object KmlGeoJsonWorkbench { + + val workDir = "/home/oleg/Downloads/ETC_kmz/" + + def saveKmzs = { + import sys.process.* + for((id, url) <- getKmzUrls){ + (url #> new File(workDir + id.id + ".kmz")).!! + } + } + + def parseKmzs: Iterable[(StationId, JsValue)] = { + new File(workDir).listFiles().map{file => + val id = StationId.unapply(file.getName.stripSuffix(".kmz")).get + id -> getGeoJson(file) + }.sortBy(_._1.id) + } + + def getKmzUrls: Iterable[(StationId, URL)] = { + val lines = Source.fromURL("http://gaia.agraria.unitus.it:89/cpmeta?type=station").getLines() + val header = lines.next().split("\t", -1) + val idIdx = header.indexOf("SITE_ID") + val kmzIdx = header.indexOf("URL_KMZ") + lines.map{line => + val cells = line.split("\t", -1) + val urlBase = cells(kmzIdx).trim + StationId.unapply(cells(idIdx).trim).filterNot(_ => urlBase.isEmpty).map{ + _ -> URI(urlBase + "/download").toURL + } + }.flatten.toIndexedSeq + } + + def getGeoJson(kmz: File): JsValue = { + val areas: Seq[GeoFeature] = new KmzFileReader() + .getKmlFromKmzFile(kmz).flatten.flatMap(_.feature) + .collect{ + case f: Folder => f.features + }.flatten.collect{ + case d: Document if isRelevantDoc(d) => d.features + }.flatten.collect{ + case f: Folder => f.features + case pm: Placemark => Seq(pm) + }.flatten.collect{ + case pm: Placemark => + val lbl = pm.featurePart.name.map(_.trim) + pm.geometry.toSeq.collect{ + case poly: Polygon => processPolygon(poly) + case p: Point => p.coordinates.collect{ + case Coordinate(Some(lon), Some(lat), _) => Position.ofLatLon(lat, lon) + } + }.flatten.map(_.withOptLabel(lbl)) + }.flatten.toList + + areas match{ + case Nil => JsNull + case feat :: Nil => GeoJson.fromFeature(feat) + case multi => GeoJson.fromFeature(FeatureCollection(multi, None, None)) + } + + } + + private def processPolygon(poly: Polygon): Option[GeoFeature] = { + poly.outerBoundaryIs.flatMap(_.linearRing).flatMap(_.coordinates).map{coords => + + val posOriginal = coords.collect{ + case Coordinate(Some(lon), Some(lat), altOpt) => Position(lat, lon, altOpt.filterNot(_ == 0).map(_.toFloat), None, None) + } + + val positions = (if(areClockwise(posOriginal)) posOriginal.reverse else posOriginal).dropRight(1) + + getCircle(positions).getOrElse(GeoPolygon(positions, None, None)) + } + } + + def isRelevantDoc(d: Document): Boolean = { + d.featurePart.name.fold(false){n => + n.trim == "CP areas" || + n.trim == "Target area" || + //n.contains("_LCT") || + //n.contains("_CP") || + //n.contains("(CPs)") || + n.contains("_TA") + } + } + + def areClockwise(pos: Seq[Position]): Boolean = { + pos.sliding(2, 1).map{ps => (ps(1).lon - ps(0).lon) * (ps(1).lat + ps(0).lat)}.sum >= 0 + } + + def getCircle(pos: Seq[Position]): Option[Circle] = if(pos.size < 8) None else{ + val n = pos.size + + val centerLat = pos.map(_.lat).sum / n + val centerLon = pos.map(_.lon).sum / n + val rLon = Math.cos(Math.toRadians(centerLat)) + + val centerDists = pos.map{ p => //good approximation only for small distances + val dlat = Math.toRadians(p.lat - centerLat) + val dlon = Math.toRadians(p.lon - centerLon) * rLon + Math.sqrt(dlat * dlat + dlon * dlon) + } + + val averDist = centerDists.sum / n + val deviations = centerDists.map(dist => Math.abs(averDist - dist)) + val maxDeviation = deviations.max / averDist + + if(maxDeviation < 0.03) + Some(Circle(Position.ofLatLon(centerLat, centerLon), (averDist * 6371000).toFloat, None, None)) + else + None + } + + +} From ef95fecdeb0fda034f30580291cf2aa2fd246d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 16 May 2025 17:44:26 +0200 Subject: [PATCH 6/8] Benefit from explicit-nulls in CpIndex and IndexData --- .../meta/services/sparql/magic/CpIndex.scala | 20 +++---- .../services/sparql/magic/Filtering.scala | 14 ++--- .../sparql/magic/index/IndexData.scala | 55 ++++++++++++------- .../sparql/magic/index/ObjEntry.scala | 21 +++---- 4 files changed, 61 insertions(+), 49 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala index 6b251384f..ca711e114 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala @@ -1,7 +1,5 @@ package se.lu.nateko.cp.meta.services.sparql.magic -import scala.language.unsafeNulls - import org.eclipse.rdf4j.model.IRI import org.eclipse.rdf4j.model.ValueFactory import org.eclipse.rdf4j.sail.Sail @@ -36,10 +34,10 @@ trait ObjSpecific{ } trait ObjInfo extends ObjSpecific{ - def spec: IRI - def submitter: IRI - def station: IRI - def site: IRI + def spec: IRI | Null + def submitter: IRI | Null + def station: IRI | Null + def site: IRI | Null def fileName: Option[String] def sizeInBytes: Option[Long] def samplingHeightMeters: Option[Float] @@ -51,7 +49,7 @@ trait ObjInfo extends ObjSpecific{ } class CpIndex(sail: Sail, geo: Future[GeoIndex], data: IndexData)(using EnvriConfigs) extends ReadWriteLocking: - private val log = LoggerFactory.getLogger(getClass()) + private val log = LoggerFactory.getLogger(getClass()).nn private val filtering = Filtering(data, geo) import data.{contMap, stats, objs, initOk, idLookup} @@ -89,7 +87,7 @@ class CpIndex(sail: Sail, geo: Future[GeoIndex], data: IndexData)(using EnvriCon log.info("CpIndex got initialized with non-empty index data to use") reportDebugInfo() - given factory: ValueFactory = sail.getValueFactory + given factory: ValueFactory = sail.getValueFactory.nn val vocab = new CpmetaVocab(factory) private val queue = new ArrayBlockingQueue[RdfUpdate](UpdateQueueSize) @@ -100,11 +98,11 @@ class CpIndex(sail: Sail, geo: Future[GeoIndex], data: IndexData)(using EnvriCon def fetch(req: DataObjectFetch): Iterator[ObjInfo] = readLocked{ //val start = System.currentTimeMillis - val filter = filtering(req.filter).fold(initOk)(BufferFastAggregation.and(_, initOk)) + val filter = filtering(req.filter).fold(initOk)(BufferFastAggregation.and(_, initOk).nn) val idxIter: Iterator[Int] = req.sort match{ case None => - filter.iterator.asScala.drop(req.offset).map(_.intValue) + filter.iterator.nn.asScala.drop(req.offset).map(_.intValue) case Some(SortBy(prop, descending)) => data.bitmap(prop).iterateSorted(Some(filter), req.offset, descending) } @@ -131,7 +129,7 @@ class CpIndex(sail: Sail, geo: Future[GeoIndex], data: IndexData)(using EnvriCon } def getUniqueKeywords(req: DataObjectFetch): Iterable[String] = readLocked { - val objectIds = filtering(req.filter).fold(initOk)(BufferFastAggregation.and(_, initOk)) + val objectIds = filtering(req.filter).fold(initOk)(BufferFastAggregation.and(_, initOk).nn) data.getObjectKeywords(objectIds) } diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/Filtering.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/Filtering.scala index 66c88d139..36ca1e12f 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/Filtering.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/Filtering.scala @@ -1,7 +1,5 @@ package se.lu.nateko.cp.meta.services.sparql.magic -import scala.language.unsafeNulls - import org.eclipse.rdf4j.model.IRI import org.roaringbitmap.buffer.{BufferFastAggregation, ImmutableRoaringBitmap, MutableRoaringBitmap} import se.lu.nateko.cp.meta.services.sparql.index.* @@ -34,7 +32,7 @@ class Filtering(data: IndexData, geo: Future[GeoIndex]) { case Exists(prop) => prop match { case cp: ContProp => Some(data.bitmap(cp).all) - case optUriProp: OptUriProperty => + case optUriProp: OptUriProperty => // TODO: Not covered by tests, so unsure if it's correct right now. data.categoryBitmap(optUriProp, Seq(None)) match { case bm if bm.isEmpty => None @@ -53,7 +51,7 @@ class Filtering(data: IndexData, geo: Future[GeoIndex]) { .collect { case iri: IRI => iri } .collect { case CpVocab.DataObject(hash, _) => idLookup.get(hash) } .flatten - Some(ImmutableRoaringBitmap.bitmapOf(objIndices*)) + Some(ImmutableRoaringBitmap.bitmapOf(objIndices*).nn) case CategFilter(category, values) => Some(data.categoryBitmap(category, values)) @@ -94,8 +92,8 @@ class Filtering(data: IndexData, geo: Future[GeoIndex]) { case Some(Failure(exc)) => throw Exception("Geo indexing failed", exc) - private def negate(bm: ImmutableRoaringBitmap) = - if objs.length == 0 then emptyBitmap else ImmutableRoaringBitmap.flip(bm, 0, objs.length.toLong) + private def negate(bm: ImmutableRoaringBitmap): MutableRoaringBitmap = + if objs.length == 0 then emptyBitmap else ImmutableRoaringBitmap.flip(bm, 0, objs.length.toLong).nn private def collectUnless[T](iter: Iterator[T])(cond: T => Boolean): Option[Seq[T]] = { var condHappened = false @@ -107,8 +105,8 @@ class Filtering(data: IndexData, geo: Future[GeoIndex]) { } private def or(bms: Seq[ImmutableRoaringBitmap]): Option[MutableRoaringBitmap] = - if (bms.isEmpty) Some(emptyBitmap) else Some(BufferFastAggregation.or(bms*)) + if (bms.isEmpty) Some(emptyBitmap) else Some(BufferFastAggregation.or(bms*).nn) private def and(bms: Seq[ImmutableRoaringBitmap]): Option[MutableRoaringBitmap] = - if (bms.isEmpty) None else Some(BufferFastAggregation.and(bms*)) + if (bms.isEmpty) None else Some(BufferFastAggregation.and(bms*).nn) } diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index a8c5e43c6..407c3607f 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -46,12 +46,12 @@ final class DataStartGeo(objs: IndSeq[ObjEntry]) extends DateTimeGeo(objs(_).dat final class DataEndGeo(objs: IndSeq[ObjEntry]) extends DateTimeGeo(objs(_).dataEnd) final class SubmStartGeo(objs: IndSeq[ObjEntry]) extends DateTimeGeo(objs(_).submissionStart) final class SubmEndGeo(objs: IndSeq[ObjEntry]) extends DateTimeGeo(objs(_).submissionEnd) -final class FileNameGeo(objs: IndSeq[ObjEntry]) extends StringGeo(objs.apply(_).fName) +final class FileNameGeo(objs: IndSeq[ObjEntry]) extends StringGeo(objs.apply(_).nn.fName.nn) final case class StatKey(spec: IRI, submitter: IRI, station: Option[IRI], site: Option[IRI]) final case class StatEntry(key: StatKey, count: Int) -def emptyBitmap = MutableRoaringBitmap.bitmapOf() +def emptyBitmap: MutableRoaringBitmap = MutableRoaringBitmap.bitmapOf().nn final class IndexData(nObjects: Int)( // These members are public only because of serialization, and should not be accessed directly. @@ -65,7 +65,7 @@ final class IndexData(nObjects: Int)( val stats: AnyRefMap[StatKey, MutableRoaringBitmap] = AnyRefMap.empty, val initOk: MutableRoaringBitmap = emptyBitmap ) extends Serializable: - private val log = LoggerFactory.getLogger(getClass()) + private val log = LoggerFactory.getLogger(getClass()).nn private def dataStartBm = DatetimeHierarchicalBitmap(DataStartGeo(objs)) private def dataEndBm = DatetimeHierarchicalBitmap(DataEndGeo(objs)) @@ -109,7 +109,7 @@ final class IndexData(nObjects: Int)( keywordBitmap(values.asInstanceOf[Iterable[Keyword.ValueType]]) case _ => { val category = categMap(prop) - BufferFastAggregation.or(values.map(v => category.getOrElse(v, emptyBitmap)).toSeq*) + BufferFastAggregation.or(values.map(v => category.getOrElse(v, emptyBitmap)).toSeq*).nn } } } @@ -141,7 +141,7 @@ final class IndexData(nObjects: Int)( val objectMap = categMap(Keyword) val objects = keywords.flatMap(objectMap.get) - BufferFastAggregation.or(LazyList(specObjects, objects).flatten*) + BufferFastAggregation.or(LazyList(specObjects, objects).flatten*).nn } def processUpdate( @@ -164,10 +164,13 @@ final class IndexData(nObjects: Int)( if (filterByEnvri) EnvriResolver.infer(subj.toJava).foreach: envri => updateCategSet(EnvriProp, envri, oe.idx, isAssertion) if (isAssertion) { - if (oe.spec != null) removeStat(oe, initOk) + if (oe.spec != null) { + removeStat(oe, initOk) + } + oe.spec = spec addStat(oe, initOk) - } else if (spec === oe.spec) { + } else if (spec == oe.spec) { removeStat(oe, initOk) oe.spec = null } @@ -183,7 +186,7 @@ final class IndexData(nObjects: Int)( case `hasName` => getDataObject(subj).foreach { oe => - val fName = obj.stringValue + val fName: String = obj.stringValue.nn if (isAssertion) oe.fName = fName else if (oe.fName == fName) { oe.fName = null } handleContinuousPropUpdate(FileName, fName, oe.idx, isAssertion) @@ -310,7 +313,7 @@ final class IndexData(nObjects: Int)( val directPrevVers: IndexedSeq[Int] = StatementSource.getStatements(subj, isNextVersionOf, null) - .flatMap(st => getDataObject(st.getObject).map(_.idx)) + .flatMap(st => getDataObject(st.getObject.nn).map(_.idx)) .toIndexedSeq directPrevVers.foreach { oldIdx => @@ -437,7 +440,6 @@ final class IndexData(nObjects: Int)( val _ = mappings.remove(categ) } } - private def getSpecProjectKeywords(spec: IRI)(using CpmetaVocab, StatementSource): Set[String] = { StatementSource .getUriValues(spec, summon[CpmetaVocab].hasAssociatedProject) @@ -541,7 +543,7 @@ final class IndexData(nObjects: Int)( case CpVocab.DataObject(hash, prefix) => val entry = getObjEntry(hash) if (entry.prefix == "") { - entry.prefix = prefix.intern() + entry.prefix = prefix.nn.intern().nn } Some(entry) @@ -575,27 +577,40 @@ private def targetUri(obj: Value, isAssertion: Boolean) = then obj.asInstanceOf[IRI] else null +// TODO: Option.scala isn't currently written for explicit-nulls. Maybe changed in later scalac versions? +private def makeOption[A](arg: A | Null) = { + if (arg == null) { None } + else { Some(arg.nn) } +} + private def keyForDobj(obj: ObjEntry): Option[StatKey] = - if obj.spec == null || obj.submitter == null then None - else - Some( - StatKey(obj.spec, obj.submitter, Option(obj.station), Option(obj.site)) - ) + (obj.spec, obj.submitter) match { + case (spec: IRI, submitter: IRI) => { + Some( + StatKey( + spec, + submitter, + makeOption(obj.station), + makeOption(obj.site) + ) + ) + } + } private def ifDateTime(dt: Value)(mod: Long => Unit): Unit = dt match - case lit: Literal if lit.getDatatype === XSD.DATETIME => - try mod(Instant.parse(lit.stringValue).toEpochMilli) + case lit: Literal if lit.getDatatype == XSD.DATETIME => + try mod(Instant.parse(lit.stringValue).nn.toEpochMilli) catch case _: Throwable => () // ignoring wrong dateTimes case _ => private def ifLong(dt: Value)(mod: Long => Unit): Unit = dt match - case lit: Literal if lit.getDatatype === XSD.LONG => + case lit: Literal if lit.getDatatype == XSD.LONG => try mod(lit.longValue) catch case _: Throwable => () // ignoring wrong longs case _ => private def ifFloat(dt: Value)(mod: Float => Unit): Unit = dt match - case lit: Literal if lit.getDatatype === XSD.FLOAT => + case lit: Literal if lit.getDatatype == XSD.FLOAT => try mod(lit.floatValue) catch case _: Throwable => () // ignoring wrong floats case _ => diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala index ace326316..cf717851e 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala @@ -1,7 +1,5 @@ package se.lu.nateko.cp.meta.services.sparql.magic.index -import scala.language.unsafeNulls - import org.eclipse.rdf4j.model.{IRI, ValueFactory} import se.lu.nateko.cp.meta.core.crypto.Sha256Sum import se.lu.nateko.cp.meta.services.sparql.magic.ObjInfo @@ -10,12 +8,12 @@ import java.time.Instant import scala.compiletime.uninitialized final class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends ObjInfo with Serializable { - var spec: IRI = uninitialized - var submitter: IRI = uninitialized - var station: IRI = uninitialized - var site: IRI = uninitialized + var spec: IRI | Null = uninitialized + var submitter: IRI | Null = uninitialized + var station: IRI | Null = uninitialized + var site: IRI | Null = uninitialized var size: Long = -1 - var fName: String = "" + var fName: String | Null = "" var samplingHeight: Float = Float.NaN var dataStart: Long = Long.MinValue var dataEnd: Long = Long.MinValue @@ -25,15 +23,18 @@ final class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) exte private final def dateTimeFromLong(dt: Long): Option[Instant] = if (dt == Long.MinValue) None - else Some(Instant.ofEpochMilli(dt)) + else Some(Instant.ofEpochMilli(dt).nn) def sizeInBytes: Option[Long] = if (size >= 0) Some(size) else None - def fileName: Option[String] = Option(fName) + def fileName: Option[String] = { + if (fName == null) { None } + else { Some(fName.nn) } + } def samplingHeightMeters: Option[Float] = if (samplingHeight == Float.NaN) None else Some(samplingHeight) def dataStartTime: Option[Instant] = dateTimeFromLong(dataStart) def dataEndTime: Option[Instant] = dateTimeFromLong(dataEnd) def submissionStartTime: Option[Instant] = dateTimeFromLong(submissionStart) def submissionEndTime: Option[Instant] = dateTimeFromLong(submissionEnd) - def uri(factory: ValueFactory): IRI = factory.createIRI(prefix + hash.base64Url) + def uri(factory: ValueFactory): IRI = factory.createIRI(prefix + hash.base64Url).nn } From 4246bdaf8b48e38cdf6874954c156c6cf2627490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 16 May 2025 18:05:09 +0200 Subject: [PATCH 7/8] Actually remove unsafeNulls from IndexData.scala --- .../cp/meta/services/sparql/magic/index/IndexData.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index 407c3607f..dd112453b 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -1,7 +1,5 @@ package se.lu.nateko.cp.meta.services.sparql.magic.index -import scala.language.unsafeNulls - import org.eclipse.rdf4j.model.IRI import org.eclipse.rdf4j.model.Literal import org.eclipse.rdf4j.model.Statement @@ -83,7 +81,7 @@ final class IndexData(nObjects: Int)( def getObjectKeywords(objectIds: ImmutableRoaringBitmap): Iterable[String] = { categoryKeys(Keyword).collect { - case keyword if !BufferFastAggregation.and(objectIds, keywordBitmap(Seq(keyword))).isEmpty() => + case keyword if !BufferFastAggregation.and(objectIds, keywordBitmap(Seq(keyword))).nn.isEmpty() => keyword } } @@ -595,6 +593,7 @@ private def keyForDobj(obj: ObjEntry): Option[StatKey] = ) ) } + case _ => None } private def ifDateTime(dt: Value)(mod: Long => Unit): Unit = dt match From 18ee463511ca7f006a09d5876762b4b8b6ba7231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Wed, 27 Aug 2025 12:45:08 +0200 Subject: [PATCH 8/8] Merge fixup --- .../meta/core/algo/HierarchicalBitmap.scala | 23 ++++--------------- .../meta/services/sparql/magic/GeoIndex.scala | 8 ++----- .../sparql/magic/index/IndexData.scala | 1 - .../sparql/magic/index/ObjEntry.scala | 10 ++++---- 4 files changed, 13 insertions(+), 29 deletions(-) diff --git a/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala b/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala index 82891da45..1dd2546a6 100644 --- a/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala +++ b/core/src/main/scala/se/lu/nateko/cp/meta/core/algo/HierarchicalBitmap.scala @@ -4,7 +4,6 @@ import scala.language.unsafeNulls import org.roaringbitmap.buffer.ImmutableRoaringBitmap import org.roaringbitmap.buffer.MutableRoaringBitmap -import org.roaringbitmap.IntConsumer import java.io.DataInput import java.io.DataOutput @@ -52,11 +51,7 @@ class HierarchicalBitmap[K](val depth: Int, val coord: Option[Coord])(using geo: if(!seenDifferentKeys) assessDiversityOfKeys(key) if children.isEmpty && seenDifferentKeys && (n >= geo.spilloverThreshold) - then values.forEach(new IntConsumer { - def accept(v: Int): Unit = { - addToChild(geo.keyLookup(v), v) - } - }) + then values.forEach{v => addToChild(geo.keyLookup(v), v)} !wasPresent @@ -141,12 +136,7 @@ class HierarchicalBitmap[K](val depth: Int, val coord: Option[Coord])(using geo: val res = if(seenDifferentKeys && amount > 1){ val list = new ju.ArrayList[Int](amount) - filtered.forEach(new IntConsumer { - def accept(i: Int): Unit = { - list.add(i); - } - }) - + filtered.forEach((i: Int) => {list.add(i);()}) list.sort(iter.valComp) list.iterator.asScala } else @@ -197,13 +187,10 @@ class HierarchicalBitmap[K](val depth: Int, val coord: Option[Coord])(using geo: } } else { val filtered = emptyBitmap - values.forEach(new IntConsumer{ - def accept(v: Int) : Unit = { - val key = geo.keyLookup(v) - if(filterKey(key, req)) filtered.add(v) - } + values.forEach((v: Int) => { + val key = geo.keyLookup(v) + if(filterKey(key, req)) filtered.add(v) }) - //println(s"seen different keys, got ${filtered.getCardinality} results") filtered } diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala index fa7d52f03..0ef281b61 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/GeoIndex.scala @@ -5,7 +5,6 @@ import scala.language.unsafeNulls import org.locationtech.jts.algorithm.hull.ConcaveHull import org.locationtech.jts.geom.{Envelope, Geometry, GeometryCollection, GeometryFactory} import org.roaringbitmap.buffer.{ImmutableRoaringBitmap, MutableRoaringBitmap} -import org.roaringbitmap.IntConsumer import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -140,11 +139,8 @@ class DenseCluster(val area: Geometry, objectIds: MutableRoaringBitmap) extends else val currentDataCovs = new ArrayBuffer[DataObjCov]() - objectIds.forEach(new IntConsumer{ - def accept(objId: Int) = { - currentDataCovs.addOne(DataObjCov(objId, area)) - } - }) + objectIds.forEach: objId => + currentDataCovs.addOne(DataObjCov(objId, area)) currentDataCovs.addOne(dobjCov) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index dd112453b..fee31c469 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -25,7 +25,6 @@ import se.lu.nateko.cp.meta.services.sparql.index.StringHierarchicalBitmap.Strin import se.lu.nateko.cp.meta.services.sparql.magic.ObjInfo import se.lu.nateko.cp.meta.utils.parseCommaSepList import se.lu.nateko.cp.meta.utils.parseJsonStringArray -import se.lu.nateko.cp.meta.utils.rdf4j.=== import se.lu.nateko.cp.meta.utils.rdf4j.Rdf4jStatement import se.lu.nateko.cp.meta.utils.rdf4j.asString import se.lu.nateko.cp.meta.utils.rdf4j.toJava diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala index cf717851e..958a6da3f 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala @@ -26,10 +26,7 @@ final class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) exte else Some(Instant.ofEpochMilli(dt).nn) def sizeInBytes: Option[Long] = if (size >= 0) Some(size) else None - def fileName: Option[String] = { - if (fName == null) { None } - else { Some(fName.nn) } - } + def fileName: Option[String] = makeOption(fName) def samplingHeightMeters: Option[Float] = if (samplingHeight == Float.NaN) None else Some(samplingHeight) def dataStartTime: Option[Instant] = dateTimeFromLong(dataStart) def dataEndTime: Option[Instant] = dateTimeFromLong(dataEnd) @@ -37,4 +34,9 @@ final class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) exte def submissionEndTime: Option[Instant] = dateTimeFromLong(submissionEnd) def uri(factory: ValueFactory): IRI = factory.createIRI(prefix + hash.base64Url).nn + + private def makeOption[T](arg: T | Null) : Option[T] = { + if (arg == null) { None } + else { Some(arg.nn) } + } }