From 04aa1c286e6bf0508682a92f76ec2f43bddfe3b5 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 18 Apr 2026 22:47:26 +0200 Subject: [PATCH 1/8] feat: add healthcheck in docker image --- .dockerignore | 28 ++++++++ .github/workflows/ci-build.yml | 2 +- Dockerfile.datastet | 9 +++ build.gradle | 32 +++++++++ .../org/grobid/core/utilities/Versioner.java | 65 +++++++++++++++++++ .../controller/DatastetController.java | 9 +++ .../service/controller/DatastetPaths.java | 5 ++ src/main/resources/revision.txt | 1 + src/main/resources/version.txt | 1 + 9 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 .dockerignore create mode 100644 src/main/java/org/grobid/core/utilities/Versioner.java create mode 100644 src/main/resources/revision.txt create mode 100644 src/main/resources/version.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..08125c6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,28 @@ +# Build outputs (regenerated inside the builder stage) +build/ +.gradle/ +out/ + +# IDE / editor / OS +.idea/ +.vscode/ +*.iml +.DS_Store + +# Runtime & logs (never needed at build time) +logs/ +tmp/ +*.log + +# Research / developer helpers never copied into the image +script/ +doc/ + +# Markdown & meta — not copied into the image +*.md +LICENSE +.claude/ +.github/ + +# Note: .git/ is intentionally NOT listed — Dockerfile.datastet COPYs it so +# Gradle can stamp revision.txt from `git rev-parse`. diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml index 17119aa..c379690 100644 --- a/.github/workflows/ci-build.yml +++ b/.github/workflows/ci-build.yml @@ -4,7 +4,7 @@ on: [push] concurrency: group: gradle -# cancel-in-progress: true + cancel-in-progress: true jobs: diff --git a/Dockerfile.datastet b/Dockerfile.datastet index 9de929f..1e7de38 100644 --- a/Dockerfile.datastet +++ b/Dockerfile.datastet @@ -84,6 +84,15 @@ ENV DATASTET_OPTS="-Djava.library.path=/opt/grobid/grobid-home/lib/lin-64:/usr/l CMD ["./datastet/bin/datastet", "server", "datastet/resources/config/config.yml"] +# Container-level health probe backed by the model-aware /service/health +# endpoint, which returns HTTP 503 until every classifier reports "loaded". +# The 180s start-period covers the worst case with modelPreload=true, where +# SciBERT + four context classifiers + DataType classifier all load before +# the first probe. Using python3 (required by JEP/DeLFT so guaranteed +# present) to avoid a dependency on curl/wget from the base image. +HEALTHCHECK --interval=30s --timeout=5s --start-period=180s --retries=3 \ + CMD python3 -c "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8060/service/health', timeout=4).status == 200 else 1)" || exit 1 + LABEL \ authors="The contributors" \ org.label-schema.name="datastet" \ diff --git a/build.gradle b/build.gradle index 161328d..6a08787 100644 --- a/build.gradle +++ b/build.gradle @@ -30,6 +30,29 @@ apply plugin: 'java-library' apply plugin: 'base' apply plugin: 'com.github.kt3k.coveralls' +def getGitRevision() { + def gitRevision = "unknown" + try { + def result = providers.exec { + workingDir = rootProject.rootDir + commandLine 'git', 'describe', '--tags', '--always', '--first-parent' + } + gitRevision = result.standardOutput.asText.get().trim() + } catch (Exception e) { + println "Could not get Git revision: ${e}" + } + return gitRevision +} + +project.ext.gitRevision = getGitRevision() + +tasks.register('collectGitRevision') { + project.ext.gitRevision = getGitRevision() + doLast { + println "Git revision: ${project.ext.gitRevision}" + } +} + group = "org.grobid.datastet" version = '0.9.0' @@ -281,6 +304,15 @@ artifacts { archives shadowJar } +processResources { + filesMatching(["version.txt", "revision.txt"]) { + expand( + project_version: project.property('version') ?: "unknown", + project_revision: rootProject.ext.gitRevision + ) + } +} + task copyModels(type: Copy) { from "${rootDir}/resources/models" include "**/*.wapiti" diff --git a/src/main/java/org/grobid/core/utilities/Versioner.java b/src/main/java/org/grobid/core/utilities/Versioner.java new file mode 100644 index 0000000..1838ec8 --- /dev/null +++ b/src/main/java/org/grobid/core/utilities/Versioner.java @@ -0,0 +1,65 @@ +package org.grobid.core.utilities; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +public class Versioner { + public static final Logger LOGGER = LoggerFactory.getLogger(Versioner.class); + + private static final String VERSION_FILE = "/version.txt"; + private static final String REVISION_FILE = "/revision.txt"; + private static final String UNKNOWN_VERSION_STR = "unknown"; + + private static String VERSION = null; + private static String REVISION = null; + + public static String getVersion() { + if (VERSION != null) { + return VERSION; + } + synchronized (GrobidProperties.class) { + if (VERSION == null) { + VERSION = readFromSystemPropertyOrFromFile("project.version", VERSION_FILE); + } + } + return VERSION; + } + + public static String getRevision() { + if (REVISION != null) { + return REVISION; + } + synchronized (GrobidProperties.class) { + if (REVISION == null) { + REVISION = readFromSystemPropertyOrFromFile("gitRevision", REVISION_FILE); + } + } + return REVISION; + } + + private static String readFromSystemPropertyOrFromFile(String systemPropertyName, String filePath) { + String value = UNKNOWN_VERSION_STR; + String systemPropertyValue = System.getProperty(systemPropertyName); + if (systemPropertyValue != null) { + value = systemPropertyValue; + } else { + try (InputStream is = GrobidProperties.class.getResourceAsStream(filePath)) { + if (is != null) { + String tmp = IOUtils.toString(is, StandardCharsets.UTF_8); + if (!StringUtils.startsWithIgnoreCase(tmp, "${project_")) { + value = tmp.trim(); + } + } + } catch (IOException e) { + LOGGER.error("Cannot read {} from resources", filePath, e); + } + } + return value; + } +} diff --git a/src/main/java/org/grobid/service/controller/DatastetController.java b/src/main/java/org/grobid/service/controller/DatastetController.java index 955d4e1..8a5494f 100644 --- a/src/main/java/org/grobid/service/controller/DatastetController.java +++ b/src/main/java/org/grobid/service/controller/DatastetController.java @@ -6,8 +6,10 @@ import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.Response; import org.glassfish.jersey.media.multipart.FormDataParam; +import org.grobid.core.utilities.Versioner; import org.grobid.service.configuration.DatastetConfiguration; import org.grobid.service.configuration.DatastetServiceConfiguration; +import org.grobid.service.data.ServiceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,6 +57,13 @@ public Response isAlive() { return DatastetRestProcessGeneric.isAlive(); } + @GET + @Path(PATH_VERSION) + @Produces(MediaType.APPLICATION_JSON) + public ServiceInfo getVersion() { + return new ServiceInfo(Versioner.getVersion(), Versioner.getRevision()); + } + @Path(PATH_DATASET_SENTENCE) @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") @POST diff --git a/src/main/java/org/grobid/service/controller/DatastetPaths.java b/src/main/java/org/grobid/service/controller/DatastetPaths.java index 697a2a3..6220f49 100644 --- a/src/main/java/org/grobid/service/controller/DatastetPaths.java +++ b/src/main/java/org/grobid/service/controller/DatastetPaths.java @@ -40,4 +40,9 @@ public interface DatastetPaths { */ public static final String PATH_RESYNC_DATATYPE_JSON = "resyncJsonDataTypes"; + /** + * path extension returning the running version + git revision of the service. + */ + public static final String PATH_VERSION = "version"; + } diff --git a/src/main/resources/revision.txt b/src/main/resources/revision.txt new file mode 100644 index 0000000..de0de9e --- /dev/null +++ b/src/main/resources/revision.txt @@ -0,0 +1 @@ +${project_revision} \ No newline at end of file diff --git a/src/main/resources/version.txt b/src/main/resources/version.txt new file mode 100644 index 0000000..040cc9a --- /dev/null +++ b/src/main/resources/version.txt @@ -0,0 +1 @@ +${project_version} \ No newline at end of file From 8356ec598da65008e5c81bee13669ea4c4c8f279 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 18 Apr 2026 22:50:08 +0200 Subject: [PATCH 2/8] feat: harden xml parsing --- .../controller/DatastetDataTypeService.java | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/grobid/service/controller/DatastetDataTypeService.java b/src/main/java/org/grobid/service/controller/DatastetDataTypeService.java index 54b11d8..92512ca 100644 --- a/src/main/java/org/grobid/service/controller/DatastetDataTypeService.java +++ b/src/main/java/org/grobid/service/controller/DatastetDataTypeService.java @@ -77,30 +77,43 @@ public Response getResyncJsonDataTypes() { processBuilder.command("python3", "script/converter.py", "resources/dataset/dataseer/csv/all-1.csv"); // ensure we are using the right path to the script processBuilder.directory(new File(this.defaultPath)); + // Merge stderr into stdout so a single drained pipe keeps the child + // process from stalling on a full error buffer. + processBuilder.redirectErrorStream(true); LOGGER.info("calling script:" + processBuilder.command()); + + Process process = null; try { long start = System.currentTimeMillis(); - Process process = processBuilder.start(); - - BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); + process = processBuilder.start(); StringBuilder builder = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) { - builder.append(line); - builder.append(System.getProperty("line.separator")); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + builder.append(line); + builder.append(System.getProperty("line.separator")); + } } int exitCode = process.waitFor(); long end = System.currentTimeMillis(); LOGGER.info("Exit code : " + exitCode); - LOGGER.info("Sync with online DataSeer wiki made in " + ((end - start)/1000) + " seconds"); + LOGGER.info("Sync with online DataSeer wiki made in " + ((end - start) / 1000) + " seconds"); - if (builder.length()>0) + if (builder.length() > 0) jsonDataTypeResource = builder.toString(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOGGER.warn("Interrupted while waiting for converter.py", e); } catch (Exception e) { - e.printStackTrace(); - } + LOGGER.error("Failure running converter.py for resyncJsonDataTypes", e); + } finally { + if (process != null && process.isAlive()) { + process.destroyForcibly(); + } + } return Response.status(Status.OK).entity(jsonDataTypeResource).type(MediaType.APPLICATION_JSON).build(); } From c0b9bba08848b3f6fe2b10904f9f40a177a98d99 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 18 Apr 2026 22:50:32 +0200 Subject: [PATCH 3/8] feat: avoid exception when disambiguation and glutton are not reacheable --- .../core/engines/DatasetDisambiguator.java | 61 ++------ .../core/utilities/ArticleUtilities.java | 136 ++++++++---------- 2 files changed, 77 insertions(+), 120 deletions(-) diff --git a/src/main/java/org/grobid/core/engines/DatasetDisambiguator.java b/src/main/java/org/grobid/core/engines/DatasetDisambiguator.java index 80741c1..84723c7 100644 --- a/src/main/java/org/grobid/core/engines/DatasetDisambiguator.java +++ b/src/main/java/org/grobid/core/engines/DatasetDisambiguator.java @@ -146,7 +146,6 @@ public boolean checkIfAlive() { public void ensureCustomizationReady() { boolean result = false; URL url = null; - CloseableHttpResponse response = null; try { if ((nerd_port != null) && (nerd_port.length() > 0)) if (nerd_port.equals("443")) @@ -157,24 +156,15 @@ public void ensureCustomizationReady() { url = new URL("http://" + nerd_host + "/service/customisation/dataset"); LOGGER.debug("Calling: " + url.toString()); -//System.out.println("Calling: " + url.toString()); - CloseableHttpClient httpClient = HttpClients.createDefault(); HttpGet get = new HttpGet(url.toString()); - Scanner in = null; - try { - response = httpClient.execute(get); -//System.out.println(response.getStatusLine()); + try (CloseableHttpClient httpClient = HttpClients.createDefault(); + CloseableHttpResponse response = httpClient.execute(get)) { int code = response.getStatusLine().getStatusCode(); if (code != 200) { LOGGER.info("Failed customization lookup service: HTTP error code : " + code + " - the customization will be loaded"); } else { result = true; } - } finally { - if (in != null) - in.close(); - if (response != null) - response.close(); } } catch (MalformedURLException e) { LOGGER.warn("entity-fishing URL is malformed, customization skipped"); @@ -196,44 +186,34 @@ public void ensureCustomizationReady() { url = new URL("http://" + nerd_host + "/service/customisations"); LOGGER.debug("Calling: " + url.toString()); -//System.out.println("Calling: " + url.toString()); // load the dataset customisation File cutomisationFile = new File("resources/config/customisation-dataset.json"); cutomisationFile = new File(cutomisationFile.getAbsolutePath()); String json = FileUtils.readFileToString(cutomisationFile, "UTF-8"); - CloseableHttpClient httpClient = HttpClients.createDefault(); HttpPost post = new HttpPost(url.toString()); - //StringBody stringValue = new StringBody(json, ContentType.MULTIPART_FORM_DATA); - //StringBody stringName = new StringBody("dataset", ContentType.MULTIPART_FORM_DATA); MultipartEntityBuilder builder = MultipartEntityBuilder.create(); builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE); builder.addTextBody("value", json); builder.addTextBody("name", "dataset"); - //builder.addPart("value", stringValue); - //builder.addPart("name", stringName); HttpEntity entity = builder.build(); - try { - post.setEntity(entity); - response = httpClient.execute(post); -//System.out.println(response.getStatusLine()); + post.setEntity(entity); + try (CloseableHttpClient httpClient = HttpClients.createDefault(); + CloseableHttpResponse response = httpClient.execute(post)) { int code = response.getStatusLine().getStatusCode(); if (code != 200) { LOGGER.error("Failed loading dataset customisation: HTTP error code : " + code); } else { LOGGER.info("Dataset customisation loaded"); } - } finally { - if (response != null) - response.close(); } } catch (MalformedURLException e) { - e.printStackTrace(); + LOGGER.warn("MalformedURLException while loading dataset customisation", e); } catch (IOException e) { - e.printStackTrace(); + LOGGER.warn("I/O error while loading dataset customisation", e); } } } @@ -489,8 +469,6 @@ public String runNerd(List entities, List subtokens, Strin url = new URL("http://" + nerd_host + ":" + nerd_port + "/service/" + RESOURCEPATH); else url = new URL("http://" + nerd_host + "/service/" + RESOURCEPATH); -//System.out.println("calling... " + url.toString()); - CloseableHttpClient httpClient = HttpClients.createDefault(); HttpPost post = new HttpPost(url.toString()); //post.addHeader("Content-Type", "application/json"); //post.addHeader("Accept", "application/json"); @@ -567,14 +545,9 @@ public String runNerd(List entities, List subtokens, Strin builder.addPart("query", stringBody); HttpEntity entity = builder.build(); - CloseableHttpResponse response = null; - Scanner in = null; - try { - //post.setEntity(new UrlEncodedFormEntity(params)); - post.setEntity(entity); - response = httpClient.execute(post); - // System.out.println(response.getStatusLine()); - + post.setEntity(entity); + try (CloseableHttpClient httpClient = HttpClients.createDefault(); + CloseableHttpResponse response = httpClient.execute(post)) { int code = response.getStatusLine().getStatusCode(); if (code != 200) { LOGGER.warn("entity-fishing annotation returned HTTP " + code + ", disambiguation skipped"); @@ -582,17 +555,13 @@ public String runNerd(List entities, List subtokens, Strin } HttpEntity entityResp = response.getEntity(); - in = new Scanner(entityResp.getContent()); - while (in.hasNext()) { - output.append(in.next()); - output.append(" "); + try (Scanner in = new Scanner(entityResp.getContent())) { + while (in.hasNext()) { + output.append(in.next()); + output.append(" "); + } } EntityUtils.consume(entityResp); - } finally { - if (in != null) - in.close(); - if (response != null) - response.close(); } } catch (MalformedURLException e) { LOGGER.warn("entity-fishing URL is malformed, disambiguation skipped"); diff --git a/src/main/java/org/grobid/core/utilities/ArticleUtilities.java b/src/main/java/org/grobid/core/utilities/ArticleUtilities.java index ebb050d..620e151 100644 --- a/src/main/java/org/grobid/core/utilities/ArticleUtilities.java +++ b/src/main/java/org/grobid/core/utilities/ArticleUtilities.java @@ -3,10 +3,10 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.FileUtils; -import org.apache.http.HttpResponse; -import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; import org.grobid.service.configuration.DatastetServiceConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,6 +24,11 @@ public class ArticleUtilities { private static final Logger logger = LoggerFactory.getLogger(ArticleUtilities.class); + // Shared Jackson mapper: ObjectMapper is thread-safe after configuration + // and retains compiled bean descriptors, so creating one per request is + // wasteful under load. See https://github.com/FasterXML/jackson-docs/wiki/Presentation:-Jackson-Performance + private static final ObjectMapper JSON = new ObjectMapper(); + private DatastetServiceConfiguration configuration; private static String halURL = "https://hal.archives-ouvertes.fr"; @@ -143,42 +148,18 @@ private String getUnpaywallOAUrl(String doi) throws Exception { doi = doi.replace(" ", ""); String queryUrl = "https://api.unpaywall.org/v2/" + doi + "?email=patrice.lopez@science-miner.com"; - HttpClient client = new DefaultHttpClient(); - HttpGet request = new HttpGet(queryUrl); - - // add request header - //request.addHeader("User-Agent", USER_AGENT); - - HttpResponse response = client.execute(request); - - System.out.println("\nSending 'GET' request to URL : " + queryUrl); - System.out.println("Response Code : " + - response.getStatusLine().getStatusCode()); + logger.debug("GET {}", queryUrl); + String json = httpGetAsString(queryUrl); - BufferedReader rd = new BufferedReader( - new InputStreamReader(response.getEntity().getContent())); - - StringBuffer result = new StringBuffer(); - String line = ""; - while ((line = rd.readLine()) != null) { - result.append(line); - } - String json = result.toString(); - //System.out.println(result.toString()); - - // get the best oa url if it exists - ObjectMapper objectMapper = new ObjectMapper(); - JsonNode jsonNode = objectMapper.readTree(json); - // json path is best_oa_location / url_for_pdf + JsonNode jsonNode = JSON.readTree(json); JsonNode bestOALocation = jsonNode.path("best_oa_location"); - String urlForPdf = null; if (!bestOALocation.isMissingNode()) { JsonNode urlForPdfNode = bestOALocation.path("url_for_pdf"); if (!urlForPdfNode.isMissingNode()) { - urlForPdf = urlForPdfNode.asText(); + return urlForPdfNode.asText(); } } - return urlForPdf; + return null; } private String getGluttonOAUrl(String doi) throws Exception { @@ -188,35 +169,36 @@ private String getGluttonOAUrl(String doi) throws Exception { if (port != null) queryUrl += ":" + port; queryUrl += "/service/oa?doi=" + doi; - HttpClient client = new DefaultHttpClient(); - HttpGet request = new HttpGet(queryUrl); + logger.debug("GET {}", queryUrl); + String json = httpGetAsString(queryUrl); - HttpResponse response = client.execute(request); - - System.out.println("\nSending 'GET' request to URL : " + queryUrl); - System.out.println("Response Code : " + - response.getStatusLine().getStatusCode()); - - BufferedReader rd = new BufferedReader( - new InputStreamReader(response.getEntity().getContent())); - - StringBuffer result = new StringBuffer(); - String line = ""; - while ((line = rd.readLine()) != null) { - result.append(line); - } - String json = result.toString(); - - // get the best oa url if it exists - ObjectMapper objectMapper = new ObjectMapper(); - JsonNode jsonNode = objectMapper.readTree(json); - // json path is best_oa_location / url_for_pdf + JsonNode jsonNode = JSON.readTree(json); JsonNode urlForPdfNode = jsonNode.path("oaLink"); - String urlForPdf = null; if (!urlForPdfNode.isMissingNode()) { - urlForPdf = urlForPdfNode.asText(); + return urlForPdfNode.asText(); + } + return null; + } + + /** + * Shared single-use HTTP-GET helper. Every resource (client, response, + * reader) is closed in a try-with-resources chain so a thrown exception + * cannot leak the connection pool or the response entity stream. + */ + private static String httpGetAsString(String url) throws IOException { + HttpGet request = new HttpGet(url); + try (CloseableHttpClient client = HttpClients.createDefault(); + CloseableHttpResponse response = client.execute(request); + BufferedReader rd = new BufferedReader( + new InputStreamReader(response.getEntity().getContent(), StandardCharsets.UTF_8))) { + logger.debug("Response Code : {}", response.getStatusLine().getStatusCode()); + StringBuilder result = new StringBuilder(); + String line; + while ((line = rd.readLine()) != null) { + result.append(line); + } + return result.toString(); } - return urlForPdf; } private static File uploadFile(String urll, String path, String name) throws Exception { @@ -268,33 +250,39 @@ public static String applyPub2TEI(String inputFilePath, String outputFilePath, S String xsl = "-xsl:" + dirToPub2TEI.getAbsolutePath() + "/Stylesheets/Publishers.xsl"; String o = "-o:" + outputFilePath; processBuilder.command("java", "-jar", dirToPub2TEI.getAbsolutePath() + "/Samples/saxon9he.jar", s, xsl, o, "-dtd:off", "-a:off", "-expand:off", "-t"); - //processBuilder.directory(new File(pathToPub2TEI)); - //System.out.println(processBuilder.command().toString()); + // Merge stderr into stdout so we consume a single pipe — otherwise a + // chatty Saxon can fill the OS stderr buffer and deadlock the child. + processBuilder.redirectErrorStream(true); + + Process process = null; try { - Process process = processBuilder.start(); + process = processBuilder.start(); StringBuilder output = new StringBuilder(); - BufferedReader reader = new BufferedReader( - new InputStreamReader(process.getInputStream())); - - String line; - while ((line = reader.readLine()) != null) { - output.append(line + "\n"); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + output.append(line).append("\n"); + } } int exitVal = process.waitFor(); if (exitVal == 0) { - System.out.println("XML transformation done"); + logger.info("XML transformation done"); } else { - // abnormal... - System.out.println("XML transformation failed"); + logger.warn("XML transformation failed (exit code {}): {}", exitVal, output); outputFilePath = null; } - } catch (IOException e) { - e.printStackTrace(); - outputFilePath = null; - } catch (InterruptedException e) { - e.printStackTrace(); + } catch (IOException | InterruptedException e) { + logger.error("Failure running Pub2TEI transformation", e); + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } outputFilePath = null; + } finally { + if (process != null && process.isAlive()) { + process.destroyForcibly(); + } } return outputFilePath; } From 90ade2a69248b509d25c61cf82f22ed377bb86e0 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 18 Apr 2026 22:50:50 +0200 Subject: [PATCH 4/8] feat: revise UI --- src/main/resources/web/datastet/datastet.js | 67 ++++++++++++++----- src/main/resources/web/index.html | 15 +++-- .../resources/web/resources/css/style.css | 14 ++++ 3 files changed, 77 insertions(+), 19 deletions(-) diff --git a/src/main/resources/web/datastet/datastet.js b/src/main/resources/web/datastet/datastet.js index 371546b..5df41b8 100644 --- a/src/main/resources/web/datastet/datastet.js +++ b/src/main/resources/web/datastet/datastet.js @@ -11,13 +11,13 @@ var grobid = (function ($) { var entities = null; // for complete Wikidata concept information, resulting of additional calls to the knowledge base service - var conceptMap = new Object(); + var conceptMap = {}; // store the current entities extracted by the service - var entityMap = new Object(); + var entityMap = {}; // store the references attached to the entities and extracted by the service - var referenceMap = new Object(); + var referenceMap = {}; function defineBaseURL(ext) { var baseUrl = null; @@ -40,6 +40,42 @@ var grobid = (function ($) { $('#gbdForm').attr('action', baseUrl); } + /** + * Polls /service/health and reflects the result on the #healthIndicator + * span in the header. Datastet returns HTTP 503 while models warm up, + * so a red circle during startup is expected — it goes green once + * every classifier reports "loaded". + */ + function startHealthCheck(intervalMs) { + intervalMs = intervalMs || 30000; + var url = defineBaseURL('health'); + function probe() { + var $indicator = $('#healthIndicator'); + if ($indicator.length === 0) return; + $indicator.removeClass('health-unknown health-healthy health-unhealthy') + .addClass('health-checking'); + $.ajax({ + url: url, + method: 'GET', + cache: false, + timeout: 4000 + }).done(function (_data, _status, jqXHR) { + var ok = jqXHR.status === 200; + $indicator.removeClass('health-checking') + .addClass(ok ? 'health-healthy' : 'health-unhealthy') + .attr('title', 'Service status: ' + (ok ? 'healthy' : 'HTTP ' + jqXHR.status) + + ' (checked ' + new Date().toLocaleTimeString() + ')'); + }).fail(function (jqXHR) { + $indicator.removeClass('health-checking').addClass('health-unhealthy') + .attr('title', 'Service status: ' + + (jqXHR.status ? 'HTTP ' + jqXHR.status : 'unreachable') + + ' (checked ' + new Date().toLocaleTimeString() + ')'); + }); + } + probe(); + setInterval(probe, intervalMs); + } + $(document).ready(function () { $("#subTitle").html("About"); @@ -47,6 +83,8 @@ var grobid = (function ($) { $("#divRestI").hide(); $("#divDoc").hide(); + startHealthCheck(); + createInputTextArea(); $("#selectedService").val('annotateDatasetSentence'); @@ -109,20 +147,19 @@ var grobid = (function ($) { function ShowRequest(formData, jqForm, options) { var queryString = $.param(formData); - $('#infoResult').html('Requesting server...'); + $('#infoResult').html(' Requesting server\u2026'); return true; } function AjaxError(jqXHR, textStatus, errorThrown) { - $('#infoResult').html("Error encountered while requesting the server.
" + jqXHR.responseText + "
"); + var responseText = (jqXHR && jqXHR.responseText) ? htmll(String(jqXHR.responseText)) : ""; + $('#infoResult').html("Error encountered while requesting the server.
" + responseText + "
"); entities = null; } function AjaxError3(message) { - if (!message) - message = ""; - message += " - The PDF document cannot be annotated. Please check the server logs."; - $('#infoResult').html("Error encountered while requesting the server.
"+message+"
"); + var safeMessage = htmll(String(message || "")); + $('#infoResult').html("Error encountered while requesting the server.
" + safeMessage + " - The PDF document cannot be annotated. Please check the server logs.
"); entities = null; return true; } @@ -132,13 +169,13 @@ var grobid = (function ($) { } function submitQuery() { - $('#infoResult').html('Requesting server...'); + $('#infoResult').html(' Requesting server\u2026'); $('#requestResult').html(''); // re-init the entity map - entityMap = new Object(); - conceptMap = new Object(); - referenceMap = new Object(); + entityMap = {}; + conceptMap = {}; + referenceMap = {}; var selected = $('#selectedService option:selected').attr('value'); var urlLocal = $('#gbdForm').attr('action'); @@ -763,7 +800,7 @@ var grobid = (function ($) { type_map[datasetNameRaw] = entity['type'] if (!local_map.has(datasetNameRaw)) - local_map.set(datasetNameRaw, new Array()); + local_map.set(datasetNameRaw, []); var localArray = local_map.get(datasetNameRaw) localArray.push(the_id) @@ -776,7 +813,7 @@ var grobid = (function ($) { } }); - var span_ids = new Array(); + var span_ids = []; var allTableContentNamed = ""; var allTableContentImplicit = ""; diff --git a/src/main/resources/web/index.html b/src/main/resources/web/index.html index 387be18..4849caf 100644 --- a/src/main/resources/web/index.html +++ b/src/main/resources/web/index.html @@ -3,8 +3,7 @@ Dataset Identification and Classification Web Application - - +