diff --git a/src/main/antlr/NQuads.g4 b/src/main/antlr/NQuads.g4
index ad04f65ff..19e4648ed 100644
--- a/src/main/antlr/NQuads.g4
+++ b/src/main/antlr/NQuads.g4
@@ -41,12 +41,16 @@ EOL
;
IRIREF
-// '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
- : '<' [a-zA-Z0-9-]+':' ((~( [\u0000-\u0020] | '<' | '>' | '"' | '{'| '}' | '|'| '^'| '`' | '\\' )) | UCHAR)* '>'
+ : '<' ( IRI_CHAR | UCHAR )* '>'
+ ;
+
+fragment IRI_CHAR
+ : ~ ( '\u0000'..'\u0020' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\\' )
;
STRING_LITERAL_QUOTE
- : '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"'
+ : '"""' ( ~('"') | '"' ~('"') | '""' ~('"') | ECHAR | UCHAR )* '"""'
+ | '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"'
;
BLANK_NODE_LABEL
@@ -90,7 +94,7 @@ PN_CHARS_U
// PN_CHARS_BASE | '_' | ':'
: PN_CHARS_BASE
| '_'
-// | ':'
+ | ':'
;
PN_CHARS
diff --git a/src/main/antlr/NTriples.g4 b/src/main/antlr/NTriples.g4
index 783d53ce0..db639ccb2 100644
--- a/src/main/antlr/NTriples.g4
+++ b/src/main/antlr/NTriples.g4
@@ -11,6 +11,11 @@ ntriplesDoc
: triple? (EOL* triple)* EOL*
;
+directive
+ : {notifyErrorListeners("Directives are not allowed in N-Triples");}
+ ('@prefix' | '@base')
+ ;
+
triple
: subject predicate object '.'
;
@@ -92,7 +97,7 @@ PN_CHARS_U
// PN_CHARS_BASE | '_' | ':'
: PN_CHARS_BASE
| '_'
-// | ':'
+ | ':'
;
PN_CHARS
diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java
index 742f2aa27..29bdea34e 100644
--- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java
+++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java
@@ -8,9 +8,7 @@
import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsLexer;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser;
-import org.antlr.v4.runtime.CharStream;
-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
@@ -82,20 +80,67 @@ public void parse(Reader reader, String baseURI) throws ParsingErrorException {
try {
CharStream charStream = CharStreams.fromReader(reader);
NQuadsLexer lexer = new NQuadsLexer(charStream);
+
CommonTokenStream tokens = new CommonTokenStream(lexer);
+ lexer.removeErrorListeners();
+ lexer.addErrorListener(ThrowingErrorListener.INSTANCE);
+
NQuadsParser antlrParser = new NQuadsParser(tokens);
- ParseTreeWalker walker = new ParseTreeWalker();
+ antlrParser.removeErrorListeners();
+ antlrParser.addErrorListener(ThrowingErrorListener.INSTANCE);
+
ParseTree tree = antlrParser.nquadsDoc();
+ ParseTreeWalker walker = new ParseTreeWalker();
NQuadsListener listener = new NQuadsListener(getModel(), getValueFactory(), getConfig());
-
walker.walk((ParseTreeListener) listener, tree);
} catch (IOException e) {
throw new ParsingErrorException("Failed to parse N-Quads: " + e.getMessage(), e);
- } catch (Exception e) {
+ } catch (RuntimeException e) {
+ Throwable current = e;
+ while (current != null) {
+ if (current instanceof ParsingErrorException) {
+ throw (ParsingErrorException) current;
+ }
+ current = current.getCause();
+ }
throw new ParsingErrorException("Unexpected error during N-Quads parsing: " + e.getMessage(), e);
}
}
-}
+
+
+ /**
+ * Custom ANTLR ErrorListener that throws a ParsingErrorException on any syntax error.
+ * This ensures that parsing failures are immediately reported as application-specific exceptions.
+ */
+ private static class ThrowingErrorListener extends BaseErrorListener {
+ static final ThrowingErrorListener INSTANCE = new ThrowingErrorListener();
+
+ @Override
+ public void syntaxError(Recognizer, ?> recognizer, Object offendingSymbol,
+ int line, int charPositionInLine,
+ String msg, RecognitionException e) {
+
+ if (offendingSymbol != null) {
+ String symbolText = offendingSymbol.toString();
+
+ if (msg != null && msg.contains("token recognition error") && symbolText.equals("':'")) {
+ throw new ParsingErrorException("Invalid blank node label: colon not allowed (line " + line + ")");
+ }
+
+ if (msg != null && msg.contains("no viable alternative") && symbolText.contains("_:")) {
+ throw new ParsingErrorException("Invalid blank node label: colon not allowed (line " + line + ")");
+ }
+
+ if (symbolText.contains("_:") && symbolText.contains(":") && !symbolText.equals("_:")) {
+ throw new ParsingErrorException("Invalid blank node label: colon not allowed (line " + line + ")");
+ }
+ }
+
+ throw new ParsingErrorException(
+ String.format("line %d:%d %s", line, charPositionInLine, msg));
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java
index 6280d47a8..1cca33d6f 100644
--- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java
+++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java
@@ -7,6 +7,7 @@
import fr.inria.corese.core.next.api.Value;
import fr.inria.corese.core.next.api.ValueFactory;
import fr.inria.corese.core.next.api.io.IOOptions;
+import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsBaseListener;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser;
@@ -39,21 +40,22 @@ public NQuadsListener(Model model, ValueFactory factory, IOOptions options) {
this.options = options;
}
+
+
+ /**
+ * Exits a statement context, extracting the object and adding the complete triple/quad to the model.
+ * Resets the current subject, predicate, and graph.
+ * @param ctx The StatementContext from the ANTLR parse tree.
+ */
@Override
public void enterStatement(NQuadsParser.StatementContext ctx) {
-
currentSubject = extractSubject(ctx.subject());
currentPredicate = extractPredicate(ctx.predicate());
- if (ctx.graphLabel() != null) {
- currentGraph = extractGraph(ctx.graphLabel());
- } else {
- currentGraph = null;
- }
+ currentGraph = (ctx.graphLabel() != null) ? extractGraph(ctx.graphLabel()) : null;
}
@Override
public void exitStatement(NQuadsParser.StatementContext ctx) {
-
Value object = extractObject(ctx.object());
if (currentGraph != null) {
model.add(currentSubject, currentPredicate, object, currentGraph);
@@ -67,66 +69,95 @@ public void exitStatement(NQuadsParser.StatementContext ctx) {
/**
* Extracts a resource (IRI or Blank Node) from the subject context.
+ * Handles unescaping of URI characters for IRIs and extracts blank node labels.
+ * @param ctx The SubjectContext from the ANTLR parse tree.
+ * @return The created Resource (IRI or BNode).
+ * @throws ParsingErrorException if the subject type is unsupported or blank node label is invalid.
*/
protected Resource extractSubject(NQuadsParser.SubjectContext ctx) {
if (ctx.IRIREF() != null) {
- return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
+ return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText())));
}
if (ctx.BLANK_NODE_LABEL() != null) {
- return factory.createBNode(ctx.BLANK_NODE_LABEL().getText().substring(2));
+ String label = ctx.BLANK_NODE_LABEL().getText().substring(2);
+ validateBlankNodeLabel(label);
+ return factory.createBNode(label);
}
- throw new IllegalArgumentException("Unsupported N-Quads subject: " + ctx.getText());
+ throw new ParsingErrorException("Unsupported N-Quads subject: " + ctx.getText());
}
-
/**
* Extracts a predicate (IRI) from the predicate context.
+ * Handles unescaping of URI characters.
+ * @param ctx The PredicateContext from the ANTLR parse tree.
+ * @return The created IRI.
+ * @throws ParsingErrorException if the predicate type is unsupported.
*/
protected IRI extractPredicate(NQuadsParser.PredicateContext ctx) {
if (ctx.IRIREF() != null) {
- return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
+ return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText())));
}
- throw new IllegalArgumentException("Unsupported N-Quads predicate: " + ctx.getText());
+ throw new ParsingErrorException("Unsupported N-Quads predicate: " + ctx.getText());
}
/**
* Extracts a value (IRI, Blank Node, or Literal) from the object context.
+ * Delegates to specific extraction methods based on the object type.
+ * @param ctx The ObjectContext from the ANTLR parse tree.
+ * @return The created Value (IRI, BNode, or Literal).
+ * @throws ParsingErrorException if the object type is unsupported or blank node label is invalid.
*/
protected Value extractObject(NQuadsParser.ObjectContext ctx) {
if (ctx.IRIREF() != null) {
- return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
+ return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText())));
}
if (ctx.BLANK_NODE_LABEL() != null) {
- return factory.createBNode(ctx.BLANK_NODE_LABEL().getText().substring(2));
+ String label = ctx.BLANK_NODE_LABEL().getText().substring(2);
+ validateBlankNodeLabel(label);
+ return factory.createBNode(label);
}
if (ctx.literal() != null) {
return extractLiteral(ctx.literal());
}
- throw new IllegalArgumentException("Unsupported N-Quads object: " + ctx.getText());
+ throw new ParsingErrorException("Unsupported N-Quads object: " + ctx.getText());
}
/**
* Extracts a graph (IRI or Blank Node) from the graph context.
+ * Handles unescaping of URI characters for IRIs and extracts blank node labels.
+ * @param ctx The GraphLabelContext from the ANTLR parse tree.
+ * @return The created Resource (IRI or BNode) representing the graph.
+ * @throws ParsingErrorException if the graph label type is unsupported or blank node label is invalid.
*/
protected Resource extractGraph(NQuadsParser.GraphLabelContext ctx) {
if (ctx.IRIREF() != null) {
- return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
+ return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText())));
}
if (ctx.BLANK_NODE_LABEL() != null) {
- return factory.createBNode(ctx.BLANK_NODE_LABEL().getText().substring(2));
+ String label = ctx.BLANK_NODE_LABEL().getText().substring(2);
+ validateBlankNodeLabel(label);
+ return factory.createBNode(label);
}
- throw new IllegalArgumentException("Unsupported N-Quads graph: " + ctx.getText());
+ throw new ParsingErrorException("Unsupported N-Quads graph: " + ctx.getText());
}
/**
* Extracts and unescapes a literal from the ANTLR context.
* This method handles string literals with or without datatype/language.
+ * @param ctx The LiteralContext from the ANTLR parse tree.
+ * @return The created Literal value.
*/
protected Literal extractLiteral(NQuadsParser.LiteralContext ctx) {
- String label = ctx.STRING_LITERAL_QUOTE().getText();
- label = unescapeLiteral(label);
+ String rawLiteralText;
+ if (ctx.STRING_LITERAL_QUOTE() != null) {
+ rawLiteralText = ctx.STRING_LITERAL_QUOTE().getText();
+ }
+ else {
+ throw new ParsingErrorException("Unsupported literal type or missing literal token: " + ctx.getText());
+ }
+ String label = unescapeLiteral(rawLiteralText);
if (ctx.IRIREF() != null) {
- IRI datatype = factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
+ IRI datatype = factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText())));
return factory.createLiteral(label, datatype);
}
if (ctx.LANGTAG() != null) {
@@ -139,15 +170,31 @@ protected Literal extractLiteral(NQuadsParser.LiteralContext ctx) {
/**
* Unescapes common N-Quads literal escape sequences.
- * This method handles `\"`, `\\`, `\n`, `\t`, `\r`, `\b`, `\f`.
- * It also handles `\ uXXXX` and `\UXXXXXXXX` for Unicode escapes.
+ * This method handles \", \\, \n, \t, \r, \b, \f.
* It also removes the surrounding quotes from the literal string.
*
* @param literalText The raw literal string from ANTLR (including quotes and escapes).
* @return The unescaped literal string without surrounding quotes.
+ * @throws ParsingErrorException if an invalid Unicode escape sequence is found.
*/
protected String unescapeLiteral(String literalText) {
- String unquotedLiteral = literalText.substring(1, literalText.length() - 1);
+ String unquotedLiteral;
+ int quoteLength;
+ if (literalText.startsWith("\"\"\"") && literalText.endsWith("\"\"\"")) {
+ if (literalText.length() < 6) {
+ throw new ParsingErrorException("Invalid triple-quoted string");
+ }
+ quoteLength = 3;
+ } else if (literalText.startsWith("\"") && literalText.endsWith("\"")) {
+ if (literalText.length() < 2) {
+ throw new ParsingErrorException("Invalid single-quoted string");
+ }
+ quoteLength = 1;
+ } else {
+ throw new ParsingErrorException("Literal text does not start/end with expected N-Quads quotes: " + literalText);
+ }
+
+ unquotedLiteral = literalText.substring(quoteLength, literalText.length() - quoteLength);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < unquotedLiteral.length(); i++) {
@@ -191,10 +238,10 @@ protected String unescapeLiteral(String literalText) {
sb.append((char) unicodeChar);
i += 5;
} catch (NumberFormatException e) {
- throw new IllegalArgumentException("Invalid \\uXXXX escape sequence in literal: \\u" + hex);
+ throw new ParsingErrorException("Invalid \\uXXXX escape sequence in literal: \\u" + hex);
}
} else {
- throw new IllegalArgumentException("Incomplete \\uXXXX escape sequence in literal: " + unquotedLiteral.substring(i));
+ throw new ParsingErrorException("Incomplete \\uXXXX escape sequence in literal: " + unquotedLiteral.substring(i));
}
break;
case 'U':
@@ -210,10 +257,10 @@ protected String unescapeLiteral(String literalText) {
}
i += 9;
} catch (NumberFormatException e) {
- throw new IllegalArgumentException("Invalid \\UXXXXXXXX escape sequence in literal: \\U" + hex);
+ throw new ParsingErrorException("Invalid \\UXXXXXXXX escape sequence in literal: \\U" + hex);
}
} else {
- throw new IllegalArgumentException("Incomplete \\UXXXXXXXX escape sequence in literal: " + unquotedLiteral.substring(i));
+ throw new ParsingErrorException("Incomplete \\UXXXXXXXX escape sequence in literal: " + unquotedLiteral.substring(i));
}
break;
default:
@@ -230,10 +277,11 @@ protected String unescapeLiteral(String literalText) {
/**
* Unescapes common N-Quads URI escape sequences.
- * This method handles `\>`, `\\`, `\ uXXXX`, `\UXXXXXXXX`.
+ * This method handles \>, \\, \ uXXXX, \UXXXXXXXX.
*
* @param uri The escaped URI string.
* @return The unescaped URI string.
+ * @throws ParsingErrorException if an invalid Unicode escape sequence is found.
*/
protected String unescapeUri(String uri) {
StringBuilder sb = new StringBuilder();
@@ -258,10 +306,10 @@ protected String unescapeUri(String uri) {
sb.append((char) unicodeChar);
i += 5;
} catch (NumberFormatException e) {
- throw new IllegalArgumentException("Invalid \\uXXXX escape sequence in URI: \\u" + hex);
+ throw new ParsingErrorException("Invalid \\uXXXX escape sequence in URI: \\u" + hex);
}
} else {
- throw new IllegalArgumentException("Incomplete \\uXXXX escape sequence in URI: " + uri.substring(i));
+ throw new ParsingErrorException("Incomplete \\uXXXX escape sequence in URI: " + uri.substring(i));
}
break;
case 'U':
@@ -277,10 +325,10 @@ protected String unescapeUri(String uri) {
}
i += 9;
} catch (NumberFormatException e) {
- throw new IllegalArgumentException("Invalid \\UXXXXXXXX escape sequence in URI: \\U" + hex);
+ throw new ParsingErrorException("Invalid \\UXXXXXXXX escape sequence in URI: \\U" + hex);
}
} else {
- throw new IllegalArgumentException("Incomplete \\UXXXXXXXX escape sequence in URI: " + uri.substring(i));
+ throw new ParsingErrorException("Incomplete \\UXXXXXXXX escape sequence in URI: " + uri.substring(i));
}
break;
default:
@@ -294,4 +342,26 @@ protected String unescapeUri(String uri) {
}
return sb.toString();
}
+ private String stripAngles(String iriRef) {
+ return iriRef.substring(1, iriRef.length() - 1);
+ }
+ /**
+ * Validates a blank node label according to RDF 1.1 N-Quads specification.
+ * Blank node labels must match PN_LOCAL rules, which means they cannot be empty,
+ * and cannot contain colons. They *can* start with a digit.
+ * @param label The blank node label string (without the "_:" prefix).
+ * @throws ParsingErrorException if the blank node label is invalid.
+ */
+ protected void validateBlankNodeLabel(String label) {
+ if (label.isEmpty()) {
+ throw new ParsingErrorException("Blank node label cannot be empty");
+ }
+ if (label.contains(":")) {
+ throw new ParsingErrorException("Blank node label cannot contain colon");
+ }
+
+ if (!label.matches("^[A-Za-z_0-9][A-Za-z0-9_\\-\\.]*$")) {
+ throw new ParsingErrorException("Invalid blank node label syntax: " + label);
+ }
+ }
}
diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/ANTLRNTriplesParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/ANTLRNTriplesParser.java
index 75370cf8f..0caa3c77e 100644
--- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/ANTLRNTriplesParser.java
+++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/ANTLRNTriplesParser.java
@@ -12,8 +12,13 @@
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
-import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
+import org.antlr.v4.runtime.ConsoleErrorListener;
+import org.antlr.v4.runtime.misc.ParseCancellationException;
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+import org.antlr.v4.runtime.BailErrorStrategy;
import java.io.IOException;
import java.io.InputStream;
@@ -22,7 +27,7 @@
import java.nio.charset.StandardCharsets;
/**
- * An ANTLR4-based parser for N-Triples format.
+ * An ANTLR4-based parser for the N-Triples format.
* This parser uses an ANTLR grammar to tokenize and parse N-Triples documents,
* then a listener to build the RDF model.
*/
@@ -32,7 +37,7 @@ public class ANTLRNTriplesParser extends AbstractRDFParser {
* Constructor for the ANTLRNTriplesParser.
*
* @param model The RDF model to populate.
- * @param factory The ValueFactory for creating RDF resources.
+ * @param factory The value factory for creating RDF resources.
*/
public ANTLRNTriplesParser(Model model, ValueFactory factory) {
super(model, factory);
@@ -42,7 +47,7 @@ public ANTLRNTriplesParser(Model model, ValueFactory factory) {
* Constructor for the ANTLRNTriplesParser with configuration options.
*
* @param model The RDF model to populate.
- * @param factory The ValueFactory for creating RDF resources.
+ * @param factory The value factory for creating RDF resources.
* @param config The configuration options for parsing.
*/
public ANTLRNTriplesParser(Model model, ValueFactory factory, IOOptions config) {
@@ -81,22 +86,67 @@ public void parse(Reader reader) throws ParsingErrorException {
public void parse(Reader reader, String baseURI) throws ParsingErrorException {
try {
CharStream charStream = CharStreams.fromReader(reader);
+ String input = charStream.toString();
+ if (input.contains("@prefix")) {
+ throw new ParsingErrorException("@prefix directives are not allowed in N-Triples");
+ }
+ if (input.contains("@base")) {
+ throw new ParsingErrorException("@base directives are not allowed in N-Triples");
+ }
+ charStream = CharStreams.fromString(input);
NTriplesLexer lexer = new NTriplesLexer(charStream);
+
+ lexer.removeErrorListener(ConsoleErrorListener.INSTANCE);
+ lexer.addErrorListener(new NTriplesErrorListener());
+
CommonTokenStream tokens = new CommonTokenStream(lexer);
NTriplesParser antlrParser = new NTriplesParser(tokens);
+
+ antlrParser.removeErrorListener(ConsoleErrorListener.INSTANCE);
+ antlrParser.setErrorHandler(new BailErrorStrategy());
+ antlrParser.addErrorListener(new NTriplesErrorListener());
+
ParseTreeWalker walker = new ParseTreeWalker();
ParseTree tree = antlrParser.ntriplesDoc();
-
NTriplesListener listener = new NTriplesListener(getModel(), getValueFactory(), getConfig());
- walker.walk((ParseTreeListener) listener, tree);
+ walker.walk(listener, tree);
+ } catch (ParseCancellationException pce) {
+ if (pce.getCause() instanceof ParsingErrorException cause) {
+ throw cause;
+ }
+ throw new ParsingErrorException("Parsing cancelled due to a syntax error: " + pce.getMessage(), pce);
} catch (IOException e) {
- throw new ParsingErrorException("Failed to parse N-Triples: " + e.getMessage(), e);
+ throw new ParsingErrorException("Failed to read N-Triples input: " + e.getMessage(), e);
+ } catch (IllegalArgumentException e) {
+ throw new ParsingErrorException("Invalid RDF data: " + e.getMessage(), e);
} catch (Exception e) {
throw new ParsingErrorException("Unexpected error during N-Triples parsing: " + e.getMessage(), e);
}
}
+
+ /**
+ * Static inner class for a custom ANTLR error listener.
+ * This class throws a ParsingErrorException whenever a syntax error
+ * or lexical error is encountered.
+ * This ensures that parsing failures are consistently reported
+ * via the application's custom exception.
+ */
+ private static class NTriplesErrorListener extends BaseErrorListener {
+
+ @Override
+ public void syntaxError(Recognizer, ?> recognizer,
+ Object offendingSymbol,
+ int line,
+ int charPositionInLine,
+ String msg,
+ RecognitionException e) {
+ String errorMessage = String.format("Syntax error at line %d:%d - %s",
+ line, charPositionInLine, msg);
+ throw new ParseCancellationException(new ParsingErrorException(errorMessage, e));
+ }
+ }
}
diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java
index 898ca2053..b9d58d6ea 100644
--- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java
+++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java
@@ -7,6 +7,7 @@
import fr.inria.corese.core.next.api.Value;
import fr.inria.corese.core.next.api.ValueFactory;
import fr.inria.corese.core.next.api.io.IOOptions;
+import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
import fr.inria.corese.core.next.impl.parser.antlr.NTriplesBaseListener;
import fr.inria.corese.core.next.impl.parser.antlr.NTriplesParser;
@@ -60,7 +61,13 @@ protected Resource extractSubject(NTriplesParser.SubjectContext ctx) {
return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
}
if (ctx.BLANK_NODE_LABEL() != null) {
- return factory.createBNode(ctx.BLANK_NODE_LABEL().getText().substring(2));
+ String blankNodeLabel = ctx.BLANK_NODE_LABEL().getText().substring(2);
+ try {
+ validateBlankNodeLabel(blankNodeLabel);
+ } catch (ParsingErrorException e) {
+ throw new IllegalArgumentException("Invalid blank node label in subject: " + e.getMessage(), e);
+ }
+ return factory.createBNode(blankNodeLabel);
}
throw new IllegalArgumentException("Unsupported N-Triples subject: " + ctx.getText());
}
@@ -83,7 +90,13 @@ protected Value extractObject(NTriplesParser.ObjectContext ctx) {
return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1)));
}
if (ctx.BLANK_NODE_LABEL() != null) {
- return factory.createBNode(ctx.BLANK_NODE_LABEL().getText().substring(2));
+ String blankNodeLabel = ctx.BLANK_NODE_LABEL().getText().substring(2);
+ try {
+ validateBlankNodeLabel(blankNodeLabel);
+ } catch (ParsingErrorException e) {
+ throw new IllegalArgumentException("Invalid blank node label in object: " + e.getMessage(), e);
+ }
+ return factory.createBNode(blankNodeLabel);
}
if (ctx.literal() != null) {
return extractLiteral(ctx.literal());
@@ -110,6 +123,25 @@ protected Literal extractLiteral(NTriplesParser.LiteralContext ctx) {
return factory.createLiteral(label);
}
+ /**
+ * Validates a blank node label according to RDF N-Triples specification.
+ * Blank node labels must not be empty and must not contain a colon.
+ * They *can* start with a digit.
+ *
+ * @param label The blank node label (without the `_: `prefix).
+ * @throws ParsingErrorException if the label is invalid.
+ */
+ protected void validateBlankNodeLabel(String label) throws ParsingErrorException {
+ if (label == null || label.isEmpty()) {
+ throw new ParsingErrorException("Blank node label cannot be empty.");
+ }
+
+ if (label.contains(":")) {
+ throw new ParsingErrorException("Blank node label cannot contain a colon (':')");
+ }
+
+ }
+
/**
* Unescapes common N-Triples literal escape sequences.
* This method handles `\"`, `\\`, `\n`, `\t`, `\r`, `\b`, `\f`.
@@ -203,7 +235,7 @@ protected String unescapeLiteral(String literalText) {
/**
* Unescapes common N-Triples URI escape sequences.
- * This method handles `\>`, `\\`, `\ uXXXX`, `\UXXXXXXXX`.
+ * This method handles `\>`, `\\`, `\ nXXXX`, `\UXXXXXXXX`.
*
* @param uri The escaped URI string.
* @return The unescaped URI string.
diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java
index e0a26f63e..539d54539 100644
--- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java
+++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java
@@ -91,6 +91,7 @@ void setUp() {
lenient().when(mockValueFactory.createLiteral(eq("hello"), eq("en"))).thenReturn(mockLangLiteral);
lenient().when(mockValueFactory.createLiteral(eq("123"), any(IRI.class))).thenReturn(mockTypedLiteral);
lenient().when(mockValueFactory.createLiteral(eq("literal with \"quotes\" and \n newline"))).thenReturn(mockEscapedLiteral);
+ lenient().when(mockValueFactory.createLiteral(eq("@base .\n .
+ \"\"\"@base .
+ .
+ """;
+ StringReader reader = new StringReader(doc);
+ parser.parse(reader);
+
+ verify(mockModel, times(2)).add(any(), any(), any(), any());
+ }
}
diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListenerTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListenerTest.java
index 9961f68ba..1890222b4 100644
--- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListenerTest.java
+++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListenerTest.java
@@ -2,6 +2,7 @@
import fr.inria.corese.core.next.api.*;
import fr.inria.corese.core.next.api.io.IOOptions;
+import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.TerminalNode;
@@ -310,7 +311,7 @@ void testUnescapeLiteralInvalidUx() throws NoSuchMethodException {
String input = "\"Invalid\\U0000XXX\"";
java.lang.reflect.Method method = NQuadsListener.class.getDeclaredMethod("unescapeLiteral", String.class);
method.setAccessible(true);
- assertThrows(IllegalArgumentException.class,
+ assertThrows(ParsingErrorException.class,
() -> listener.unescapeLiteral(input),
"Should throw for malformed \\UXXXXXXXX escape sequence");
}
@@ -346,13 +347,13 @@ void testUnescapeUriUnicodeU() throws NoSuchMethodException, java.lang.reflect.I
@Test
- @DisplayName("unescapeUri should throw IllegalArgumentException for invalid \\uXXXX")
+ @DisplayName("unescapeUri should throw ParsingErrorException for invalid \\uXXXX")
void testUnescapeUriInvalidU() throws NoSuchMethodException {
String input = "http://example.org/invalid\\uXXX";
java.lang.reflect.Method method = NQuadsListener.class.getDeclaredMethod("unescapeUri", String.class);
method.setAccessible(true);
- assertThrows(IllegalArgumentException.class,
+ assertThrows(ParsingErrorException.class,
() -> listener.unescapeLiteral(input),
"Should throw unescapeUri should throw IllegalArgumentException for invalid \\uXXXX");