SearchScale · narangvivek10 · Feb 17, 2026
diff --git a/pom.xml b/pom.xml
@@ -137,6 +137,32 @@
 
     <build>
         <plugins>
+            <plugin>
+                <groupId>com.diffplug.spotless</groupId>
+                <artifactId>spotless-maven-plugin</artifactId>
+                <version>2.44.5</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>apply</goal>
+                        </goals>
+                        <phase>validate</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <java>
+                        <includes>
+                            <include>src/**/*.java</include>
+                        </includes>
+                        <googleJavaFormat>
+                            <version>1.27.0</version>
+                            <style>GOOGLE</style>
+                            <reflowLongStrings>true</reflowLongStrings>
+                            <formatJavadoc>false</formatJavadoc>
+                        </googleJavaFormat>
+                    </java>
+                </configuration>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>

diff --git a/src/main/java/com/searchscale/lucene/cuvs/benchmarks/BenchmarkConfiguration.java b/src/main/java/com/searchscale/lucene/cuvs/benchmarks/BenchmarkConfiguration.java
@@ -21,30 +21,34 @@ public class BenchmarkConfiguration {
   public boolean saveResultsOnDisk;
   public String resultsDirectory;
   public boolean hasColNames;
-  public String algoToRun;              // keep as String
+  public String algoToRun; // keep as String
   public String groundTruthFile;
   public String cuvsIndexDirPath;
   public String hnswIndexDirPath;
   public boolean loadVectorsInMemory;
   public boolean skipIndexing;
   public int forceMerge;
   public boolean enableTieredMerge;
+  public boolean enableIndexWriterInfoStream;
+  public int ramBufferSizeMB;
 
   // Lucene HNSW parameters
-  public int hnswMaxConn;               // 16 default (max 512)
-  public int hnswBeamWidth;             // 100 default (max 3200)
+  public int hnswMaxConn; // 16 default (max 512)
+  public int hnswBeamWidth; // 100 default (max 3200)
+  public int hnswMergeThreads;
 
   // CAGRA parameters
   public int cagraIntermediateGraphDegree; // 128 default
-  public int cagraGraphDegree;             // 64 default
+  public int cagraGraphDegree; // 64 default
   public int cagraITopK;
   public int cagraSearchWidth;
-  public int cagraHnswLayers;             // layers in CAGRA->HNSW conversion
+  public int cagraHnswLayers; // layers in CAGRA->HNSW conversion
   public int efSearch;
 
   private boolean isLucene() {
     return "LUCENE_HNSW".equalsIgnoreCase(algoToRun);
   }
+
   private boolean isCagra() {
     return "CAGRA_HNSW".equalsIgnoreCase(algoToRun);
   }
@@ -77,12 +81,22 @@ public String prettyString() {
     sb.append("Has column names in the dataset file: ").append(hasColNames).append('\n');
     sb.append("algoToRun {Choices: HNSW | CAGRA}: ").append(algoToRun).append('\n');
     sb.append("Ground Truth file used is: ").append(groundTruthFile).append('\n');
-    if (cuvsIndexDirPath != null) sb.append("CuVS index directory path is: ").append(cuvsIndexDirPath).append('\n');
-    if (hnswIndexDirPath != null) sb.append("HNSW index directory path is: ").append(hnswIndexDirPath).append('\n');
+    if (cuvsIndexDirPath != null)
+      sb.append("CuVS index directory path is: ").append(cuvsIndexDirPath).append('\n');
+    if (hnswIndexDirPath != null)
+      sb.append("HNSW index directory path is: ").append(hnswIndexDirPath).append('\n');
     sb.append("Load vectors in memory before indexing: ").append(loadVectorsInMemory).append('\n');
-    sb.append("Skip indexing (and use existing index for search): ").append(skipIndexing).append('\n');
-    sb.append("Do force merge while indexing documents [a value < 1 implies no force merge]: ").append(forceMerge).append('\n');
-
+    sb.append("Skip indexing (and use existing index for search): ")
+        .append(skipIndexing)
+        .append('\n');
+    sb.append("Do force merge while indexing documents [a value < 1 implies no force merge]: ")
+        .append(forceMerge)
+        .append('\n');
+    sb.append("Enable TieredMerge: ").append(enableTieredMerge).append('\n');
+    sb.append("Num merge threads: ").append(hnswMergeThreads).append('\n');
+    sb.append("enableIndexWriterInfoStream: ").append(enableIndexWriterInfoStream).append('\n');
+    sb.append("ramBufferSizeMB: ").append(ramBufferSizeMB).append('\n');
+
     sb.append("------- algo parameters ------\n");
     if (isLucene()) {
       sb.append("hnswMaxConn: ").append(hnswMaxConn).append('\n');
@@ -98,7 +112,10 @@ public String prettyString() {
     return sb.toString();
   }
 
-  @Override public String toString() { return prettyString(); }
+  @Override
+  public String toString() {
+    return prettyString();
+  }
 
   public void debugPrintArguments() {
     // keep a single source of truth for printing

diff --git a/src/main/java/com/searchscale/lucene/cuvs/benchmarks/FBIvecsReader.java b/src/main/java/com/searchscale/lucene/cuvs/benchmarks/FBIvecsReader.java
@@ -8,12 +8,11 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.GZIPInputStream;
-
-import org.mapdb.IndexTreeList;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-//TODO: The three static methods have a lot of common logic, ideally should be combined as just one.
+// TODO: The three static methods have a lot of common logic, ideally should be combined as just
+// one.
 public class FBIvecsReader {
 
   private static final Logger log = LoggerFactory.getLogger(FBIvecsReader.class.getName());
@@ -176,92 +175,91 @@ public static void readBvecs(String filePath, int numRows, List<float[]> vectors
   }
 
   // New method to read .fbin files (format: num_vectors, dimension, then vector data)
- // Corrected readFbin method for Wiki-88M .fbin files
-public static void readFbin(String filePath, int numRows, List<float[]> vectors) {
-  log.info("Reading {} from file: {}", numRows, filePath);
-
-  try (InputStream is = new FileInputStream(filePath)) {
-    // Read num_vectors (first 4 bytes, little endian)
-    byte[] numVecBytes = is.readNBytes(4);
-    ByteBuffer numVecBuffer = ByteBuffer.wrap(numVecBytes).order(ByteOrder.LITTLE_ENDIAN);
-    int numVectors = numVecBuffer.getInt();
-
-    // Read dimension (next 4 bytes, little endian)
-    byte[] dimBytes = is.readNBytes(4);
-    ByteBuffer dimBuffer = ByteBuffer.wrap(dimBytes).order(ByteOrder.LITTLE_ENDIAN);
-    int dimension = dimBuffer.getInt();
-
-    log.info("File header - total vectors: {}, dimension: {}", numVectors, dimension);
-
-    float[] row = new float[dimension];
-    int count = 0;
-
-    while (is.available() != 0) {
-      byte[] vectorBytes = is.readNBytes(dimension * 4);
-      if (vectorBytes.length != dimension * 4) break;
-      ByteBuffer bb = ByteBuffer.wrap(vectorBytes).order(ByteOrder.LITTLE_ENDIAN);
-      for (int i = 0; i < dimension; i++) row[i] = bb.getFloat();
-      vectors.add(row.clone());
-      count++;
-      if (numRows != -1 && count == numRows) break;
-      if (count % 1000 == 0) System.out.print(".");
+  // Corrected readFbin method for Wiki-88M .fbin files
+  public static void readFbin(String filePath, int numRows, List<float[]> vectors) {
+    log.info("Reading {} from file: {}", numRows, filePath);
+
+    try (InputStream is = new FileInputStream(filePath)) {
+      // Read num_vectors (first 4 bytes, little endian)
+      byte[] numVecBytes = is.readNBytes(4);
+      ByteBuffer numVecBuffer = ByteBuffer.wrap(numVecBytes).order(ByteOrder.LITTLE_ENDIAN);
+      int numVectors = numVecBuffer.getInt();
+
+      // Read dimension (next 4 bytes, little endian)
+      byte[] dimBytes = is.readNBytes(4);
+      ByteBuffer dimBuffer = ByteBuffer.wrap(dimBytes).order(ByteOrder.LITTLE_ENDIAN);
+      int dimension = dimBuffer.getInt();
+
+      log.info("File header - total vectors: {}, dimension: {}", numVectors, dimension);
+
+      float[] row = new float[dimension];
+      int count = 0;
+
+      while (is.available() != 0) {
+        byte[] vectorBytes = is.readNBytes(dimension * 4);
+        if (vectorBytes.length != dimension * 4) break;
+        ByteBuffer bb = ByteBuffer.wrap(vectorBytes).order(ByteOrder.LITTLE_ENDIAN);
+        for (int i = 0; i < dimension; i++) row[i] = bb.getFloat();
+        vectors.add(row.clone());
+        count++;
+        if (numRows != -1 && count == numRows) break;
+        if (count % 1000 == 0) System.out.print(".");
+      }
+      System.out.println();
+      log.info("Reading complete. Read {} vectors out of {} in file.", count, numVectors);
+    } catch (Exception e) {
+      log.error("Error reading fbin file", e);
     }
-    System.out.println();
-    log.info("Reading complete. Read {} vectors out of {} in file.", count, numVectors);
-  } catch (Exception e) {
-    log.error("Error reading fbin file", e);
   }
-}
 
-// Fixed method to read .ibin files (ground truth neighbors)
-public static ArrayList<int[]> readIbin(String filePath, int numRows) {
-  log.info("Reading {} from file: {}", numRows, filePath);
-  ArrayList<int[]> vectors = new ArrayList<int[]>();
+  // Fixed method to read .ibin files (ground truth neighbors)
+  public static ArrayList<int[]> readIbin(String filePath, int numRows) {
+    log.info("Reading {} from file: {}", numRows, filePath);
+    ArrayList<int[]> vectors = new ArrayList<int[]>();
 
-  try {
-    InputStream is = new FileInputStream(filePath);
+    try {
+      InputStream is = new FileInputStream(filePath);
 
-    // For .ibin ground truth files: Read num_vectors first, then dimension
-    byte[] numVecBytes = is.readNBytes(4);
-    ByteBuffer numVecBuffer = ByteBuffer.wrap(numVecBytes).order(ByteOrder.LITTLE_ENDIAN);
-    int numVectors = numVecBuffer.getInt();
+      // For .ibin ground truth files: Read num_vectors first, then dimension
+      byte[] numVecBytes = is.readNBytes(4);
+      ByteBuffer numVecBuffer = ByteBuffer.wrap(numVecBytes).order(ByteOrder.LITTLE_ENDIAN);
+      int numVectors = numVecBuffer.getInt();
 
-    byte[] dimBytes = is.readNBytes(4);
-    ByteBuffer dimBuffer = ByteBuffer.wrap(dimBytes).order(ByteOrder.LITTLE_ENDIAN);
-    int dimension = dimBuffer.getInt();
+      byte[] dimBytes = is.readNBytes(4);
+      ByteBuffer dimBuffer = ByteBuffer.wrap(dimBytes).order(ByteOrder.LITTLE_ENDIAN);
+      int dimension = dimBuffer.getInt();
 
-    log.info("Ground truth file - total vectors: {}, dimension: {}", numVectors, dimension);
+      log.info("Ground truth file - total vectors: {}, dimension: {}", numVectors, dimension);
 
-    int count = 0;
-    while (is.available() != 0 && (numRows == -1 || count < numRows)) {
-      // Read dimension * 4 bytes (int values)
-      byte[] vectorBytes = is.readNBytes(dimension * 4);
-      if (vectorBytes.length != dimension * 4) {
-        break; // End of file
-      }
+      int count = 0;
+      while (is.available() != 0 && (numRows == -1 || count < numRows)) {
+        // Read dimension * 4 bytes (int values)
+        byte[] vectorBytes = is.readNBytes(dimension * 4);
+        if (vectorBytes.length != dimension * 4) {
+          break; // End of file
+        }
 
-      ByteBuffer bb = ByteBuffer.wrap(vectorBytes);
-      bb.order(ByteOrder.LITTLE_ENDIAN);
+        ByteBuffer bb = ByteBuffer.wrap(vectorBytes);
+        bb.order(ByteOrder.LITTLE_ENDIAN);
 
-      int[] row = new int[dimension];
-      for (int i = 0; i < dimension; i++) {
-        row[i] = bb.getInt();
-      }
+        int[] row = new int[dimension];
+        for (int i = 0; i < dimension; i++) {
+          row[i] = bb.getInt();
+        }
 
-      vectors.add(row);
-      count++;
+        vectors.add(row);
+        count++;
 
-      if (count % 1000 == 0) {
-        System.out.print(".");
+        if (count % 1000 == 0) {
+          System.out.print(".");
+        }
       }
+      System.out.println();
+      is.close();
+      log.info("Reading complete. Read {} vectors out of {} total.", count, numVectors);
+    } catch (Exception e) {
+      e.printStackTrace();
     }
-    System.out.println();
-    is.close();
-    log.info("Reading complete. Read {} vectors out of {} total.", count, numVectors);
-  } catch (Exception e) {
-    e.printStackTrace();
+    return vectors;
   }
-  return vectors;
-}
-
 }