diff --git a/src/main/antlr/NQuads.g4 b/src/main/antlr/NQuads.g4 index 19e4648ed..b49ab92b1 100644 --- a/src/main/antlr/NQuads.g4 +++ b/src/main/antlr/NQuads.g4 @@ -1,7 +1,7 @@ grammar NQuads; nquadsDoc - : statement? (EOL* statement)* EOL* + : statement? (EOL* statement)* EOL* EOF ; statement @@ -49,8 +49,7 @@ fragment IRI_CHAR ; STRING_LITERAL_QUOTE - : '"""' ( ~('"') | '"' ~('"') | '""' ~('"') | ECHAR | UCHAR )* '"""' - | '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"' + : '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"' ; BLANK_NODE_LABEL diff --git a/src/main/antlr/TriG.g4 b/src/main/antlr/TriG.g4 index f93efea6f..91c95b222 100644 --- a/src/main/antlr/TriG.g4 +++ b/src/main/antlr/TriG.g4 @@ -20,7 +20,7 @@ triplesOrGraph triples2 : blankNodePropertyList predicateObjectList? '.' - | collection predicateObjectList '.' + | collection predicateObjectList? '.' ; wrappedGraph @@ -28,7 +28,7 @@ wrappedGraph ; triplesBlock - : triples ('.' triplesBlock?)? + : triples ('.'? triplesBlock?)? ; labelOrSubject @@ -149,7 +149,6 @@ WS : (('\u0020' | '\u0009' | '\u000A' | '\u000D' ) )+ -> skip ; -// Terminals Graph_w options { caseInsensitive=true; } : 'GRAPH' @@ -169,21 +168,22 @@ BooleanLiteral ; IRIREF - : '<' (PN_CHARS | '.' | ':' | '#' | '@' | '%' | '&' | '$' | '!' | '\'' | '*' | '+' | '/' | '(' | ')' | '-' | ',' | '?' | '~' | UCHAR)* '>' + : '<' (PN_CHARS | '.' | ':' | '#' | '@' | '%' | '&' | '$' | '!' | '\'' | '*' | '+' | '/' | '(' | ')' | '-' | ',' | '?' | '~' | ';' | '=' | UCHAR)* '>' + ; + +BLANK_NODE_LABEL + : '_:' (PN_CHARS_U | [0-9] | PLX) ((PN_CHARS | '.' | PLX)* (PN_CHARS | PLX))? ; PNAME_NS - : PN_PREFIX? ':' + : PN_PREFIX ':' + | ':' ; PNAME_LN : PNAME_NS PN_LOCAL ; -BLANK_NODE_LABEL - : '_:' (PN_CHARS_U | '0' .. '9') ((PN_CHARS | '.')* PN_CHARS)? - ; - LANGTAG : '@' ('a'.. 'z' | 'A' .. 'Z')+ ('-' ('a'.. 'z' | 'A' .. 'Z' | '0' .. '9')* )* ; @@ -197,9 +197,11 @@ DECIMAL ; DOUBLE - : ('+' | '-' )? (('0' .. '9')+ '.' ('0' .. '9')* EXPONENT - | '.' ('0' .. '9')+ EXPONENT - | ('0' .. '9')+ EXPONENT) + : ('+' | '-')? ( + ( ('0'..'9')+ '.' ('0'..'9')* EXPONENT ) + | ( '.' ('0'..'9')+ EXPONENT ) + | ( ('0'..'9')+ EXPONENT ) + ) ; EXPONENT @@ -207,19 +209,31 @@ EXPONENT ; STRING_LITERAL_QUOTE - : '"' ((~[\u0022\u005C\u0010\u0013]) | ECHAR | UCHAR)* '"' + : '"' ((~[\u0022\u005C\u000A\u000D]) | ECHAR | UCHAR)* '"' ; STRING_LITERAL_SINGLE_QUOTE - : '\'' ((~[\u0027\u005C\u0010\u0013]) | ECHAR | UCHAR)* '\'' + : '\'' ((~[\u0027\u005C\u000A\u000D]) | ECHAR | UCHAR)* '\'' ; STRING_LITERAL_LONG_SINGLE_QUOTE - : '\'\'\'' (('\'' | '\'\'')? ( (~['\\] ) | ECHAR | UCHAR))* '\'\'\'' + : '\'\'\'' (LONG_STRING_CHAR_SINGLE | ECHAR | UCHAR)* '\'\'\'' ; STRING_LITERAL_LONG_QUOTE - : '"""' (('"' | '""')? ( (~["'] ) | ECHAR | UCHAR))* '"""' + : '"""' (LONG_STRING_CHAR_DOUBLE | ECHAR | UCHAR)* '"""' + ; + +fragment LONG_STRING_CHAR_DOUBLE + : ~[\\"] + | '"' ~["] + | '"' '"' ~["] + ; + +fragment LONG_STRING_CHAR_SINGLE + : ~[\\'] + | '\'' ~['] + | '\'' '\'' ~['] ; UCHAR @@ -236,10 +250,10 @@ WHITESPACE ; ANON - : '[' WHITESPACE* ']' + : '[' [\u0009\u000A\u000D\u0020]* ']' ; -PN_CHARS_BASE +fragment PN_CHARS_BASE : 'A' .. 'Z' | 'a' .. 'z' | '\u00C0' .. '\u00D6' @@ -253,15 +267,15 @@ PN_CHARS_BASE | '\u3001' .. '\uD7FF' | '\uF900' .. '\uFDCF' | '\uFDF0' .. '\uFFFD' -// | '\u10000' .. '\uEFFFF' + | '\u{10000}'..'\u{EFFFF}' ; -PN_CHARS_U +fragment PN_CHARS_U : PN_CHARS_BASE | '_' ; -PN_CHARS +fragment PN_CHARS : PN_CHARS_U | '-' | [0-9] @@ -270,28 +284,28 @@ PN_CHARS | [\u203F-\u2040] ; -PN_PREFIX +fragment PN_PREFIX : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? ; -PN_LOCAL - : (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? +fragment PN_LOCAL + : (PN_CHARS_U | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? ; -PLX +fragment PLX : PERCENT | PN_LOCAL_ESC ; -PERCENT +fragment PERCENT : '%' HEX HEX ; -HEX +fragment HEX : [0-9a-fA-F] ; -PN_LOCAL_ESC +fragment PN_LOCAL_ESC : '\\' ( '_' | '~' diff --git a/src/main/antlr/Turtle.g4 b/src/main/antlr/Turtle.g4 index a51350b3e..100d602d6 100644 --- a/src/main/antlr/Turtle.g4 +++ b/src/main/antlr/Turtle.g4 @@ -2,26 +2,9 @@ [The "BSD licence"] Copyright (c) 2014, Alejandro Medrano (@ Universidad Politecnica de Madrid, http://www.upm.es/) All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES */ /* Derived from http://www.w3.org/TR/turtle/#sec-grammar-grammar */ -// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false -// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging - grammar Turtle; turtleDoc @@ -68,6 +51,7 @@ predicate : iri ; + object_ : iri | BlankNode @@ -130,7 +114,8 @@ string iri : IRIREF - | PrefixedName + | PNAME_LN + | PNAME_NS ; BlankNode @@ -152,14 +137,12 @@ Prefix_w options { caseInsensitive=true; } : 'PREFIX' ; -// PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? -// Prefix without the final ':' PN_PREFIX : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? ; IRIREF - : '<' ((~( '\u0000' | '\u0020' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' |'\\' )) | UCHAR)* '>' + : '<' (~( '\u0000' | '\u0020' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\\' ) | UCHAR)* '>' ; // Prefix alone @@ -167,12 +150,6 @@ PNAME_NS : PN_PREFIX? ':' ; -PrefixedName - : PNAME_LN - | PNAME_NS - ; - -// Prefix + local name PNAME_LN : PNAME_NS PN_LOCAL ; @@ -205,20 +182,28 @@ EXPONENT // "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''" STRING_LITERAL_LONG_SINGLE_QUOTE - : '\'\'\'' ( ('\'' '\''? )? ( [^'\\] | ECHAR | UCHAR | '"' ) )* '\'\'\'' + : '\'\'\'' ( + ( ~['\\] | ECHAR | UCHAR ) + | '\'' ~['\\] + | '\'\'' ~['\\] + )* '\'\'\'' ; // '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' STRING_LITERAL_LONG_QUOTE - : '"""' ( ('"' '"'? )? ( (~["\\]) | ECHAR | UCHAR )+ )* '"""' + : '"""' ( + ( ~["\\] | ECHAR | UCHAR ) + | '"' ~["\\] + | '""' ~["\\] + )* '"""' ; STRING_LITERAL_QUOTE - : '"' (~ [\u0027\u005C\u000A\u000D] | ECHAR | UCHAR | '"')* '"' + : '"' (~["\\\r\n] | ECHAR | UCHAR)* '"' ; STRING_LITERAL_SINGLE_QUOTE - : '\'' (~ [\u0027\u005C\u000A\u000D] | ECHAR | UCHAR | '\'')* '\'' + : '\'' (~['\\\r\n] | ECHAR | UCHAR)* '\'' ; // Hexadecimal unicode character @@ -257,7 +242,7 @@ PN_CHARS_BASE | '\u3001' .. '\uD7FF' | '\uF900' .. '\uFDCF' | '\uFDF0' .. '\uFFFD' -// | '\u10000' .. '\uEFFFF' + | '\u{10000}'..'\u{EFFFF}' ; PN_CHARS_U diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/option/TitaniumJSONLDProcessorOption.java b/src/main/java/fr/inria/corese/core/next/impl/io/option/TitaniumJSONLDProcessorOption.java index bfc2dab8f..6cda33d66 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/option/TitaniumJSONLDProcessorOption.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/option/TitaniumJSONLDProcessorOption.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.io.AbstractIOOptions; import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.api.io.serialization.SerializationOption; /** * Wrapper around the JsonLdOptions class for the Titanium JSONLD parser and @@ -17,7 +18,8 @@ * @see JsonLdOptions */ -public class TitaniumJSONLDProcessorOption extends AbstractIOOptions implements RDFParserBaseIRIOptions { +public class TitaniumJSONLDProcessorOption extends AbstractIOOptions + implements RDFParserBaseIRIOptions, SerializationOption { private final Builder builder; @@ -138,7 +140,8 @@ public String getBase() { /** * Builder for creating instances of TitaniumJSONLDProcessorOption. * This nested static class provides a fluent API for configuring the - * various options before building the final {@code TitaniumJSONLDProcessorOption} object. + * various options before building the final + * {@code TitaniumJSONLDProcessorOption} object. */ public static class Builder extends AbstractIOOptions.Builder { diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index f49af0569..7cbe8b0a5 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -11,6 +11,7 @@ import fr.inria.corese.core.next.impl.io.parser.ntriples.ANTLRNTriplesParser; import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser; import fr.inria.corese.core.next.impl.io.parser.turtle.ANTLRTurtleParser; +import fr.inria.corese.core.next.impl.io.parser.trig.ANTLRTrigParser; /** * Factory class for creating RDF parsers. Generates according to the RDFFormat provided. @@ -47,6 +48,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNQuadsParser(model, factory, config); } else if (format == RDFFormat.RDFXML) { return new RDFXMLParser(model, factory, config); + } else if (format == RDFFormat.TRIG) { + return new ANTLRTrigParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -70,8 +73,10 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNQuadsParser(model, factory); } else if (format == RDFFormat.RDFXML) { return new RDFXMLParser(model, factory); + } else if (format == RDFFormat.TRIG) { + return new ANTLRTrigParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } -} +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractNTriplesNQuadsListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractNTriplesNQuadsListener.java new file mode 100644 index 000000000..56f562bf4 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractNTriplesNQuadsListener.java @@ -0,0 +1,259 @@ +package fr.inria.corese.core.next.impl.io.parser.common; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; + +/** + * Base class for N-Triples/N-Quads parsers providing common escape handling + * and validation logic. + */ +public abstract class AbstractNTriplesNQuadsListener { + + protected final Model model; + protected final ValueFactory factory; + protected final IOOptions options; + + /** + * Constructs a parser helper. + * + * @param model RDF model to populate + * @param factory ValueFactory for creating RDF terms + * @param options IO configuration options + */ + public AbstractNTriplesNQuadsListener(Model model, ValueFactory factory, IOOptions options) { + this.model = model; + this.factory = factory; + this.options = options; + } + + /** + * Strips angle brackets from an IRI reference. + * + * @param iriRef IRI text with angle brackets + * @return IRI text without angle brackets + */ + public String stripAngles(String iriRef) { + if (iriRef.length() < 2) { + throw new ParsingErrorException("Invalid IRI reference: too short"); + } + return iriRef.substring(1, iriRef.length() - 1); + } + + /** + * Extracts the blank node label by removing the "_:" prefix. + * + * @param text Full blank node text including "_:" prefix + * @return Blank node label without prefix + */ + public String extractBlankNodeLabel(String text) { + if (text.length() < 2) { + throw new ParsingErrorException("Invalid blank node: too short"); + } + return text.substring(2); + } + + /** + * Validates a blank node label according to RDF specifications. + * Labels must not be empty and cannot contain colons. + * + * @param label Blank node label without "_:" prefix + * @throws ParsingErrorException if the label is invalid + */ + public void validateBlankNodeLabel(String label) { + if (label == null || label.isEmpty()) { + throw new ParsingErrorException("Blank node label cannot be empty"); + } + + if (label.contains(ParserConstants.COLON)) { + throw new ParsingErrorException("Blank node label cannot contain colon (':')"); + } + } + + /** + * Unescapes literal strings according to N-Triples/N-Quads specification. + * + * @param literalText Raw literal text including quotes + * @return Unescaped literal content without quotes + */ + public String unescapeLiteral(String literalText) { + String content = removeQuotes(literalText); + return processEscapeSequences(content, "literal"); + } + + /** + * Removes quotes from a literal string. + * Supports both single quotes and triple quotes. + * + * @param text Quoted literal text + * @return Content without quotes + */ + public String removeQuotes(String text) { + if (text.startsWith(ParserConstants.TRIPLE_QUOTE) && text.endsWith(ParserConstants.TRIPLE_QUOTE)) { + if (text.length() < 6) { + throw new ParsingErrorException("Invalid triple-quoted string"); + } + return text.substring(3, text.length() - 3); + } + + if (text.startsWith(ParserConstants.QUOTE) && text.endsWith(ParserConstants.QUOTE)) { + if (text.length() < 2) { + throw new ParsingErrorException("Invalid single-quoted string"); + } + return text.substring(1, text.length() - 1); + } + + throw new ParsingErrorException("Literal does not have expected quotes: " + text); + } + + /** + * Unescapes URI strings according to N-Triples/N-Quads specification. + * + * @param uri Escaped URI string + * @return Unescaped URI string + */ + public String unescapeUri(String uri) { + return processEscapeSequences(uri, "URI"); + } + + /** + * Processes escape sequences in strings. + * Handles: \", \\, \n, \t, \r, \b, \f, \ uXXXX, \UXXXXXXXX, \> + * + * @param input String containing escape sequences + * @param context Context for error messages ("literal" or "URI") + * @return Unescaped string + */ + public String processEscapeSequences(String input, String context) { + StringBuilder result = new StringBuilder(input.length()); + + for (int i = 0; i < input.length(); i++) { + char c = input.charAt(i); + + if (c == '\\' && i + 1 < input.length()) { + i = processEscapeSequence(input, i, result, context); + } else { + result.append(c); + } + } + + return result.toString(); + } + + /** + * Processes a single escape sequence. + * + * @param input Full input string + * @param i Current position (at backslash) + * @param result StringBuilder accumulating the result + * @param context Context for error messages + * @return New position after processing + */ + public int processEscapeSequence(String input, int i, StringBuilder result, String context) { + char next = input.charAt(i + 1); + + switch (next) { + case '"': + result.append('"'); + return i + 1; + case '\\': + result.append('\\'); + return i + 1; + case '>': + result.append('>'); + return i + 1; + case 'n': + result.append('\n'); + return i + 1; + case 't': + result.append('\t'); + return i + 1; + case 'r': + result.append('\r'); + return i + 1; + case 'b': + result.append('\b'); + return i + 1; + case 'f': + result.append('\f'); + return i + 1; + case 'u': + return processUnicodeEscape(input, i, 4, result, context); + case 'U': + return processUnicodeEscape(input, i, 8, result, context); + default: + result.append('\\').append(next); + return i + 1; + } + } + + /** + * Processes Unicode escape sequences . + * + * @param input Full input string + * @param i Current position (at backslash) + * @param hexLength Length of hex digits (4 or 8) + * @param result StringBuilder accumulating the result + * @param context Context for error messages + * @return New position after processing + */ + public int processUnicodeEscape(String input, int i, int hexLength, + StringBuilder result, String context) { + if (i + hexLength + 1 >= input.length()) { + String escapeType = hexLength == 4 ? "\\uXXXX" : "\\UXXXXXXXX"; + throw new ParsingErrorException( + "Incomplete " + escapeType + " escape sequence in " + context + ": " + + input.substring(i)); + } + + String hex = input.substring(i + 2, i + 2 + hexLength); + + try { + int codePoint = Integer.parseInt(hex, 16); + appendCodePoint(result, codePoint); + return i + hexLength + 1; + } catch (NumberFormatException e) { + String escapeType = hexLength == 4 ? "\\u" : "\\U"; + throw new ParsingErrorException( + "Invalid " + escapeType + " escape sequence in " + context + ": " + + escapeType + hex); + } + } + + /** + * Appends a Unicode code point, handling supplementary characters. + * + * @param result StringBuilder to append to + * @param codePoint Unicode code point + */ + public void appendCodePoint(StringBuilder result, int codePoint) { + if (Character.isSupplementaryCodePoint(codePoint)) { + result.append(Character.highSurrogate(codePoint)); + result.append(Character.lowSurrogate(codePoint)); + } else { + result.append((char) codePoint); + } + } + + /** + * Creates a literal from parsed components. + * + * @param label Literal lexical value + * @param datatypeIRI Optional datatype IRI + * @param languageTag Optional language tag + * @return Created literal + */ + public Literal createLiteral(String label, IRI datatypeIRI, String languageTag) { + if (datatypeIRI != null) { + return factory.createLiteral(label, datatypeIRI); + } + if (languageTag != null) { + return factory.createLiteral(label, languageTag); + } + return factory.createLiteral(label); + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractTurtleTriGListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractTurtleTriGListener.java new file mode 100644 index 000000000..b6fa44004 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractTurtleTriGListener.java @@ -0,0 +1,650 @@ +package fr.inria.corese.core.next.impl.io.parser.common; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; + +/** + * Base class for RDF parsers (Turtle, TriG) providing common functionality. + * Implements IRI resolution according to RFC 3986, Unicode escape handling, + * prefix management, and RDF term creation. + * + */ +public abstract class AbstractTurtleTriGListener { + + public final Model model; + public final ValueFactory factory; + public final Map prefixMap = new HashMap<>(); + public String baseURI; + + public Resource currentSubject; + public IRI currentPredicate; + + /** + * Constructs a parser listener with the specified model, factory and base URI. + * + * @param model RDF model to populate with parsed statements + * @param factory factory for creating RDF terms (IRIs, blank nodes, literals) + * @param baseURI base URI for resolving relative IRI references + */ + public AbstractTurtleTriGListener(Model model, ValueFactory factory, String baseURI) { + this.model = model; + this.factory = factory; + this.baseURI = baseURI; + initializeBasePrefix(); + } + + /** + * Registers the base URI as the empty prefix namespace. + */ + public void initializeBasePrefix() { + if (this.baseURI != null && !this.baseURI.isEmpty()) { + prefixMap.put(ParserConstants.EMPTY_STRING, this.baseURI); + model.setNamespace(ParserConstants.EMPTY_STRING, this.baseURI); + } + } + + /** + * Extracts IRI from angle brackets and processes Unicode escape sequences. + * + * @param text raw IRI text including angle brackets + * @return unescaped IRI string + */ + public String extractAndUnescapeIRI(String text) { + String iri = text.substring(1, text.length() - 1); + return unescapeIRI(iri); + } + + /** + * Updates the base URI and registers it in prefix mappings. + * + * @param newBase new base URI to set + */ + public void updateBaseURI(String newBase) { + this.baseURI = resolveIRIAgainstBase(newBase); + prefixMap.put(ParserConstants.EMPTY_STRING, this.baseURI); + model.setNamespace(ParserConstants.EMPTY_STRING, this.baseURI); + } + + /** + * Registers a namespace prefix with its corresponding IRI. + * + * @param prefix namespace prefix + * @param iri namespace IRI + */ + public void registerPrefix(String prefix, String iri) { + String resolvedIRI = resolveIRIAgainstBase(iri); + prefixMap.put(prefix, resolvedIRI); + model.setNamespace(prefix, resolvedIRI); + } + + /** + * Resolves an IRI reference to absolute form. + * Handles prefixed names (QNames), relative IRIs, and absolute IRIs. + * + * @param raw raw IRI string + * @return resolved absolute IRI + * @throws ParsingErrorException if the IRI cannot be resolved + */ + public String resolveIRI(String raw) { + try { + raw = raw.trim(); + + if (raw.equals(ParserConstants.A)) { + return RDF.type.getIRI().stringValue(); + } + + if (raw.equals(ParserConstants.COLON)) { + String ns = prefixMap.get(ParserConstants.EMPTY_STRING); + return ns != null ? ns : getEffectiveBaseURI(); + } + + if (raw.startsWith(ParserConstants.IRI_START) && raw.endsWith(ParserConstants.IRI_END)) { + String iri = raw.substring(1, raw.length() - 1); + iri = unescapeIRI(iri); + return iri.isEmpty() ? getEffectiveBaseURI() : resolveIRIAgainstBase(iri); + } + + if (raw.contains(ParserConstants.COLON)) { + String[] parts = raw.split(ParserConstants.COLON, 2); + String prefix = parts[0]; + String localName = parts[1]; + + if (prefixMap.containsKey(prefix)) { + localName = unescapeIRI(localName); + String ns = prefixMap.get(prefix); + if (ns != null) { + return ns + localName; + } + } + + if (isAbsoluteIRI(raw)) { + return raw; + } + + throw new ParsingErrorException("Undeclared prefix: " + prefix); + } + + return resolveIRIAgainstBase(raw); + + } catch (IllegalArgumentException e) { + throw new ParsingErrorException(e.getMessage(), e); + } + } + + /** + * Resolves a relative IRI reference against the base URI using RFC 3986 algorithm. + * + * @param iri IRI reference to resolve + * @return resolved absolute IRI + */ + public String resolveIRIAgainstBase(String iri) { + String effectiveBase = getEffectiveBaseURI(); + + if (isAbsoluteIRI(iri)) { + return iri; + } + + if (iri.isEmpty()) { + return effectiveBase; + } + + try { + URI baseUri = new URI(effectiveBase); + String baseScheme = baseUri.getScheme(); + String baseAuthority = baseUri.getAuthority(); + String basePath = baseUri.getPath(); + String baseQuery = baseUri.getQuery(); + + String[] refParts = parseReference(iri); + String refScheme = refParts[0]; + String refAuthority = refParts[1]; + String refPath = refParts[2]; + String refQuery = refParts[3]; + String refFragment = refParts[4]; + + String targetScheme, targetAuthority, targetPath, targetQuery, targetFragment; + + // RFC 3986 Section 5.2.2 - Reference Resolution Algorithm + if (refScheme != null) { + targetScheme = refScheme; + targetAuthority = refAuthority; + targetPath = removeDotSegments(refPath); + targetQuery = refQuery; + } else { + if (refAuthority != null) { + targetScheme = baseScheme; + targetAuthority = refAuthority; + targetPath = removeDotSegments(refPath); + targetQuery = refQuery; + } else { + targetScheme = baseScheme; + targetAuthority = baseAuthority; + if (refPath.isEmpty()) { + targetPath = basePath; + targetQuery = refQuery != null ? refQuery : baseQuery; + } else { + if (refPath.startsWith(ParserConstants.SLASH)) { + targetPath = removeDotSegments(refPath); + } else { + targetPath = removeDotSegments(mergePaths(basePath, refPath)); + } + targetQuery = refQuery; + } + } + } + targetFragment = refFragment; + + return buildURI(targetScheme, targetAuthority, targetPath, targetQuery, targetFragment); + + } catch (URISyntaxException e) { + return performSimpleFallback(effectiveBase, iri); + } + } + + /** + * Constructs a URI from its components. + * + * @param scheme URI scheme (e.g., "http", "file") + * @param authority authority component (host, port, userinfo) + * @param path path component + * @param query query component + * @param fragment fragment identifier + * @return normalized URI string + */ + public String buildURI(String scheme, String authority, String path, String query, String fragment) { + StringBuilder result = new StringBuilder(); + if (scheme != null) { + result.append(scheme).append(ParserConstants.COLON); + } + if (authority != null) { + result.append(ParserConstants.DOUBLE_SLASH).append(authority); + } + if (path != null) { + result.append(path); + } + if (query != null) { + result.append(ParserConstants.QUERY_MARK).append(query); + } + if (fragment != null) { + result.append(ParserConstants.FRAGMENT).append(fragment); + } + return normalizeURI(result.toString()); + } + + /** + * Parses a URI reference into its five components. + * + * @param ref URI reference to parse + * @return array containing [scheme, authority, path, query, fragment] + */ + public String[] parseReference(String ref) { + String[] parts = new String[5]; + String remaining = ref; + + int fragmentIndex = remaining.indexOf('#'); + if (fragmentIndex >= 0) { + parts[4] = remaining.substring(fragmentIndex + 1); + remaining = remaining.substring(0, fragmentIndex); + } + + int queryIndex = remaining.indexOf('?'); + if (queryIndex >= 0) { + parts[3] = remaining.substring(queryIndex + 1); + remaining = remaining.substring(0, queryIndex); + } + + int colonIndex = remaining.indexOf(':'); + if (colonIndex > 0 && isValidScheme(remaining.substring(0, colonIndex))) { + parts[0] = remaining.substring(0, colonIndex); + remaining = remaining.substring(colonIndex + 1); + } + + if (remaining.startsWith(ParserConstants.DOUBLE_SLASH)) { + int authorityEnd = remaining.indexOf('/', 2); + if (authorityEnd < 0) { + authorityEnd = remaining.length(); + } + parts[1] = remaining.substring(2, authorityEnd); + remaining = remaining.substring(authorityEnd); + } + + parts[2] = remaining; + return parts; + } + + /** + * Merges a base path with a relative path. + * + * @param basePath base path from base URI + * @param refPath relative path from reference + * @return merged path + */ + public String mergePaths(String basePath, String refPath) { + if (basePath == null || basePath.isEmpty()) { + return ParserConstants.SLASH + refPath; + } + int lastSlash = basePath.lastIndexOf('/'); + return lastSlash >= 0 ? basePath.substring(0, lastSlash + 1) + refPath : refPath; + } + + /** + * Removes dot segments from a path (RFC 3986 Section 5.2.4). + * Processes ".." and "." segments according to the normalization algorithm. + * + * @param path path to normalize + * @return normalized path without dot segments + */ + public String removeDotSegments(String path) { + if (path == null || path.isEmpty()) { + return ParserConstants.EMPTY_STRING; + } + + String input = path; + StringBuilder output = new StringBuilder(); + + while (!input.isEmpty()) { + if (input.startsWith(ParserConstants.DOUBLE_DOT + ParserConstants.SLASH)) { + input = input.substring(3); + } else if (input.startsWith(ParserConstants.DOT + ParserConstants.SLASH)) { + input = input.substring(2); + } else if (input.startsWith(ParserConstants.SLASH + ParserConstants.DOT + ParserConstants.SLASH)) { + input = ParserConstants.SLASH + input.substring(3); + } else if (input.equals(ParserConstants.SLASH + ParserConstants.DOT)) { + input = ParserConstants.SLASH; + } else if (input.startsWith(ParserConstants.SLASH + ParserConstants.DOUBLE_DOT + ParserConstants.SLASH)) { + input = ParserConstants.SLASH + input.substring(4); + removeLastSegment(output); + } else if (input.equals(ParserConstants.SLASH + ParserConstants.DOUBLE_DOT)) { + input = ParserConstants.SLASH; + removeLastSegment(output); + } else if (input.equals(ParserConstants.POINT) || input.equals(ParserConstants.DOUBLE_DOT)) { + input = ParserConstants.EMPTY_STRING; + } else { + int nextSlash; + if (input.startsWith(ParserConstants.SLASH)) { + nextSlash = input.indexOf(ParserConstants.SLASH, 1); + if (nextSlash >= 0) { + output.append(input, 0, nextSlash); + input = input.substring(nextSlash); + } else { + output.append(input); + input = ParserConstants.EMPTY_STRING; + } + } else { + nextSlash = input.indexOf(ParserConstants.SLASH); + if (nextSlash >= 0) { + output.append(input, 0, nextSlash); + input = input.substring(nextSlash); + } else { + output.append(input); + input = ParserConstants.EMPTY_STRING; + } + } + } + } + + return output.toString(); + } + + /** + * Removes the last path segment from the output buffer. + * Used during dot segment removal when processing ".." segments. + * + * @param output string builder containing the path being constructed + */ + public void removeLastSegment(StringBuilder output) { + String outputStr = output.toString(); + int lastSlash = outputStr.lastIndexOf(ParserConstants.SLASH); + output.setLength(lastSlash >= 0 ? lastSlash : 0); + } + + /** + * Provides a fallback resolution mechanism when RFC 3986 parsing fails. + * + * @param base base URI + * @param relative relative IRI reference + * @return resolved IRI using simple concatenation rules + */ + public String performSimpleFallback(String base, String relative) { + if (relative.isEmpty()) { + return base; + } + if (base.endsWith(ParserConstants.SLASH)) { + return base + relative; + } + int lastSlash = base.lastIndexOf('/'); + return lastSlash >= 0 ? base.substring(0, lastSlash + 1) + relative : base + ParserConstants.SLASH + relative; + } + + /** + * Normalizes URI strings, ensuring proper format for file:// URIs. + * + * @param uri URI to normalize + * @return normalized URI string + */ + public String normalizeURI(String uri) { + if (uri == null) { + return null; + } + if (uri.startsWith(ParserConstants.FILE_PROTOCOL_SIMPLE) && !uri.startsWith(ParserConstants.FILE_PROTOCOL_TRIPLE_SLASH)) { + if (!uri.startsWith(ParserConstants.FILE_PROTOCOL)) { + uri = uri.replace(ParserConstants.FILE_PROTOCOL_SIMPLE, ParserConstants.FILE_PROTOCOL_TRIPLE_SLASH); + } + } + return uri; + } + + /** + * Determines whether an IRI is absolute (contains a valid scheme). + * + * @param iri IRI to check + * @return true if the IRI is absolute, false otherwise + */ + public boolean isAbsoluteIRI(String iri) { + if (iri == null || iri.isEmpty()) { + return false; + } + int colonIndex = iri.indexOf(':'); + if (colonIndex == -1 || colonIndex == 0) { + return false; + } + return isValidScheme(iri.substring(0, colonIndex)); + } + + /** + * Validates a URI scheme according to RFC 3986. + * A valid scheme must start with a letter and contain only letters, digits, '+', '-', or '.'. + * + * @param scheme scheme to validate + * @return true if the scheme is valid, false otherwise + */ + public boolean isValidScheme(String scheme) { + if (scheme == null || scheme.isEmpty() || !Character.isLetter(scheme.charAt(0))) { + return false; + } + for (int i = 1; i < scheme.length(); i++) { + char c = scheme.charAt(i); + if (!Character.isLetterOrDigit(c) && c != '+' && c != '-' && c != '.') { + return false; + } + } + return true; + } + + /** + * Returns the effective base URI, using a default if none is set. + * + * @return effective base URI + */ + public String getEffectiveBaseURI() { + String effective = (baseURI != null && !baseURI.isEmpty()) ? baseURI : ParserConstants.getDefaultBaseURI(); + return normalizeURI(effective); + } + /** + * Processes Unicode escape sequences in IRIs. + * + * @param rawIri IRI string potentially containing escape sequences + * @return unescaped IRI string + * @throws IllegalArgumentException if escape sequences are malformed or contain surrogates + */ + public String unescapeIRI(String rawIri) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < rawIri.length(); i++) { + char c = rawIri.charAt(i); + if (c == '\\' && i + 1 < rawIri.length()) { + char next = rawIri.charAt(i + 1); + if (next == 'u' || next == 'U') { + int len = (next == 'u') ? 4 : 8; + if (i + len + 1 <= rawIri.length()) { + String hex = rawIri.substring(i + 2, i + 2 + len); + int codePoint = Integer.parseInt(hex, 16); + if (codePoint >= 0xD800 && codePoint <= 0xDFFF) { + throw new IllegalArgumentException("Surrogates not allowed: \\u" + hex); + } + sb.appendCodePoint(codePoint); + i += len + 1; + } else { + throw new IllegalArgumentException("Incomplete Unicode escape"); + } + } else { + sb.append(next); + i++; + } + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Processes escape sequences in RDF string literals. + * Handles standard escapes (\t, \n, \r, etc.) and Unicode escapes. + * + * @param text literal string including delimiters (quotes or triple-quotes) + * @return unescaped string content + * @throws IllegalArgumentException if escape sequences are malformed + */ + public String unescapeString(String text) { + if (text == null || text.length() < 2) { + return text; + } + + boolean isMultiline = text.startsWith(ParserConstants.TRIPLE_QUOTE) || text.startsWith(ParserConstants.TRIPLE_APOSTROPHE); + String content = isMultiline ? text.substring(3, text.length() - 3) : text.substring(1, text.length() - 1); + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < content.length(); i++) { + char c = content.charAt(i); + if (c == '\\' && i + 1 < content.length()) { + char next = content.charAt(i + 1); + switch (next) { + case 't': + sb.append('\t'); + i++; + break; + case 'n': + sb.append('\n'); + i++; + break; + case 'r': + sb.append('\r'); + i++; + break; + case 'b': + sb.append('\b'); + i++; + break; + case 'f': + sb.append('\f'); + i++; + break; + case '"': + sb.append('"'); + i++; + break; + case '\'': + sb.append('\''); + i++; + break; + case '\\': + sb.append('\\'); + i++; + break; + case 'u': + case 'U': + int len = (next == 'u') ? 4 : 8; + if (i + len + 1 <= content.length()) { + String hex = content.substring(i + 2, i + 2 + len); + int codePoint = Integer.parseInt(hex, 16); + if (codePoint >= 0xD800 && codePoint <= 0xDFFF) { + throw new IllegalArgumentException("Invalid Unicode escape: Surrogate code points not allowed"); + } + sb.appendCodePoint(codePoint); + i += len + 1; + } else { + throw new IllegalArgumentException("Incomplete Unicode escape sequence"); + } + break; + default: + sb.append(c).append(next); + i++; + break; + } + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Adds an RDF statement to the model with exception handling. + * Subclasses may override to support named graphs or other extensions. + * + * @param subject statement subject + * @param predicate statement predicate + * @param object statement object + * @throws ParsingErrorException if the statement cannot be added + */ + public void safeAddStatement(Resource subject, IRI predicate, Value object) { + try { + model.add(subject, predicate, object); + } catch (Exception e) { + throw new ParsingErrorException("Failed to add statement: " + e.getMessage(), e); + } + } + + /** + * Creates an RDF literal with optional language tag or datatype. + * + * @param label literal lexical value + * @param langTag language tag (may be null) + * @param datatypeIRI datatype IRI (may be null) + * @return RDF literal + */ + public Literal createLiteral(String label, String langTag, String datatypeIRI) { + if (langTag != null) { + return factory.createLiteral(label, langTag); + } + if (datatypeIRI != null) { + return factory.createLiteral(label, factory.createIRI(datatypeIRI)); + } + return factory.createLiteral(label); + } + + /** + * Creates a boolean literal. + * + * @param text boolean value as string ("true" or "false") + * @return boolean literal with xsd:boolean datatype + */ + public Literal createBooleanLiteral(String text) { + return factory.createLiteral(text, XSD.BOOLEAN.getIRI()); + } + + /** + * Creates a numeric literal with appropriate XSD datatype. + * + * @param text numeric value as string + * @param type numeric type (INTEGER, DECIMAL, or DOUBLE) + * @return numeric literal with corresponding XSD datatype + */ + public Literal createNumericLiteral(String text, NumericType type) { + switch (type) { + case DOUBLE: + return factory.createLiteral(text, XSD.DOUBLE.getIRI()); + case DECIMAL: + return factory.createLiteral(text, XSD.DECIMAL.getIRI()); + case INTEGER: + default: + return factory.createLiteral(text, XSD.INTEGER.getIRI()); + } + } + + /** + * Enumeration of numeric literal types corresponding to XSD datatypes. + */ + public enum NumericType { + /** + * XSD integer type + */ + INTEGER, + /** + * XSD decimal type + */ + DECIMAL, + /** + * XSD double type + */ + DOUBLE + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java index 29bdea34e..cf3d5a286 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParser.java @@ -10,7 +10,6 @@ import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser; import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.tree.ParseTree; -import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.ParseTreeWalker; import java.io.IOException; @@ -52,7 +51,6 @@ public RDFFormat getRDFFormat() { return RDFFormat.NQUADS; } - @Override public void parse(InputStream in) throws ParsingErrorException { parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); @@ -79,68 +77,78 @@ public void parse(Reader reader) throws ParsingErrorException { public void parse(Reader reader, String baseURI) throws ParsingErrorException { try { CharStream charStream = CharStreams.fromReader(reader); + NQuadsLexer lexer = new NQuadsLexer(charStream); + configureErrorHandling(lexer); CommonTokenStream tokens = new CommonTokenStream(lexer); + NQuadsParser parser = new NQuadsParser(tokens); + configureErrorHandling(parser); - lexer.removeErrorListeners(); - lexer.addErrorListener(ThrowingErrorListener.INSTANCE); - - NQuadsParser antlrParser = new NQuadsParser(tokens); - antlrParser.removeErrorListeners(); - antlrParser.addErrorListener(ThrowingErrorListener.INSTANCE); + ParseTree tree = parser.nquadsDoc(); - ParseTree tree = antlrParser.nquadsDoc(); ParseTreeWalker walker = new ParseTreeWalker(); - NQuadsListener listener = new NQuadsListener(getModel(), getValueFactory(), getConfig()); - walker.walk((ParseTreeListener) listener, tree); + walker.walk(listener, tree); + } catch (ParsingErrorException e) { + throw e; } catch (IOException e) { - throw new ParsingErrorException("Failed to parse N-Quads: " + e.getMessage(), e); - } catch (RuntimeException e) { - Throwable current = e; - while (current != null) { - if (current instanceof ParsingErrorException) { - throw (ParsingErrorException) current; - } - current = current.getCause(); - } - throw new ParsingErrorException("Unexpected error during N-Quads parsing: " + e.getMessage(), e); + throw new ParsingErrorException("Failed to read N-Quads input: " + e.getMessage(), e); + } catch (Exception e) { + throw unwrapException(e); } } + /** + * Configures error handling for lexer or parser. + * Replaces default error listeners with strict N-Quads error listener. + * + * @param recognizer Lexer or parser to configure + */ + private void configureErrorHandling(Recognizer recognizer) { + recognizer.removeErrorListeners(); + recognizer.addErrorListener(NQuadsErrorListener.INSTANCE); + } + + /** + * Unwraps nested exceptions to find and re-throw ParsingErrorException. + * + * @param exception Exception to unwrap + * @return ParsingErrorException if found in cause chain + * @throws ParsingErrorException always, either original or wrapped + */ + private ParsingErrorException unwrapException(Exception exception) { + Throwable current = exception; + while (current != null) { + if (current instanceof ParsingErrorException) { + return (ParsingErrorException) current; + } + current = current.getCause(); + } + return new ParsingErrorException( + "Unexpected error during N-Quads parsing: " + exception.getMessage(), + exception); + } /** * Custom ANTLR ErrorListener that throws a ParsingErrorException on any syntax error. * This ensures that parsing failures are immediately reported as application-specific exceptions. */ - private static class ThrowingErrorListener extends BaseErrorListener { - static final ThrowingErrorListener INSTANCE = new ThrowingErrorListener(); - - @Override - public void syntaxError(Recognizer recognizer, Object offendingSymbol, - int line, int charPositionInLine, - String msg, RecognitionException e) { + private static class NQuadsErrorListener extends BaseErrorListener { - if (offendingSymbol != null) { - String symbolText = offendingSymbol.toString(); - - if (msg != null && msg.contains("token recognition error") && symbolText.equals("':'")) { - throw new ParsingErrorException("Invalid blank node label: colon not allowed (line " + line + ")"); - } - - if (msg != null && msg.contains("no viable alternative") && symbolText.contains("_:")) { - throw new ParsingErrorException("Invalid blank node label: colon not allowed (line " + line + ")"); - } - - if (symbolText.contains("_:") && symbolText.contains(":") && !symbolText.equals("_:")) { - throw new ParsingErrorException("Invalid blank node label: colon not allowed (line " + line + ")"); - } - } + static final NQuadsErrorListener INSTANCE = new NQuadsErrorListener(); + @Override + public void syntaxError(Recognizer recognizer, + Object offendingSymbol, + int line, + int charPositionInLine, + String msg, + RecognitionException e) { throw new ParsingErrorException( - String.format("line %d:%d %s", line, charPositionInLine, msg)); + String.format("Syntax error in N-Quads at line %d:%d - %s", + line, charPositionInLine, msg)); } } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java index 1cca33d6f..eb147add8 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsListener.java @@ -1,13 +1,9 @@ package fr.inria.corese.core.next.impl.io.parser.nquads; -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Literal; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Value; -import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.common.AbstractNTriplesNQuadsListener; import fr.inria.corese.core.next.impl.parser.antlr.NQuadsBaseListener; import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser; @@ -18,50 +14,60 @@ */ public class NQuadsListener extends NQuadsBaseListener { + private final AbstractNTriplesNQuadsListener abstractNTriplesQuadsListener; private final Model model; private final ValueFactory factory; - @SuppressWarnings("unused") - private final IOOptions options; private Resource currentSubject; private IRI currentPredicate; private Resource currentGraph; /** - * Constructor for the NQuadsListener. + * Constructs an N-Quads listener. * - * @param model The RDF model to populate. - * @param factory The ValueFactory for creating RDF resources. - * @param options IOOptions for configuration (if any). + * @param model RDF model to populate + * @param factory ValueFactory for creating RDF resources + * @param options IO configuration options */ public NQuadsListener(Model model, ValueFactory factory, IOOptions options) { this.model = model; this.factory = factory; - this.options = options; + this.abstractNTriplesQuadsListener = new AbstractNTriplesNQuadsListener(model, factory, options) { + }; } - - - /** - * Exits a statement context, extracting the object and adding the complete triple/quad to the model. - * Resets the current subject, predicate, and graph. - * @param ctx The StatementContext from the ANTLR parse tree. - */ @Override public void enterStatement(NQuadsParser.StatementContext ctx) { + + checkForInvalidDirectives(ctx); + + currentSubject = extractSubject(ctx.subject()); currentPredicate = extractPredicate(ctx.predicate()); currentGraph = (ctx.graphLabel() != null) ? extractGraph(ctx.graphLabel()) : null; } + /** + * Checks for invalid directives in N-Quads format. + * N-Quads does not support @base or @prefix directives. + */ + private void checkForInvalidDirectives(NQuadsParser.StatementContext ctx) { + String text = ctx.getText(); + if (text != null && (text.contains("@base") || text.contains("@prefix"))) { + throw new ParsingErrorException( + "Directives (@base, @prefix) are not allowed in N-Quads format"); + } + } @Override public void exitStatement(NQuadsParser.StatementContext ctx) { Value object = extractObject(ctx.object()); + if (currentGraph != null) { model.add(currentSubject, currentPredicate, object, currentGraph); } else { model.add(currentSubject, currentPredicate, object); } + currentSubject = null; currentPredicate = null; currentGraph = null; @@ -69,14 +75,11 @@ public void exitStatement(NQuadsParser.StatementContext ctx) { /** * Extracts a resource (IRI or Blank Node) from the subject context. - * Handles unescaping of URI characters for IRIs and extracts blank node labels. - * @param ctx The SubjectContext from the ANTLR parse tree. - * @return The created Resource (IRI or BNode). - * @throws ParsingErrorException if the subject type is unsupported or blank node label is invalid. */ protected Resource extractSubject(NQuadsParser.SubjectContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText()))); + String iri = abstractNTriplesQuadsListener.unescapeUri(stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } if (ctx.BLANK_NODE_LABEL() != null) { String label = ctx.BLANK_NODE_LABEL().getText().substring(2); @@ -85,30 +88,25 @@ protected Resource extractSubject(NQuadsParser.SubjectContext ctx) { } throw new ParsingErrorException("Unsupported N-Quads subject: " + ctx.getText()); } + /** * Extracts a predicate (IRI) from the predicate context. - * Handles unescaping of URI characters. - * @param ctx The PredicateContext from the ANTLR parse tree. - * @return The created IRI. - * @throws ParsingErrorException if the predicate type is unsupported. */ protected IRI extractPredicate(NQuadsParser.PredicateContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText()))); + String iri = abstractNTriplesQuadsListener.unescapeUri(stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } throw new ParsingErrorException("Unsupported N-Quads predicate: " + ctx.getText()); } /** * Extracts a value (IRI, Blank Node, or Literal) from the object context. - * Delegates to specific extraction methods based on the object type. - * @param ctx The ObjectContext from the ANTLR parse tree. - * @return The created Value (IRI, BNode, or Literal). - * @throws ParsingErrorException if the object type is unsupported or blank node label is invalid. */ protected Value extractObject(NQuadsParser.ObjectContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText()))); + String iri = abstractNTriplesQuadsListener.unescapeUri(stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } if (ctx.BLANK_NODE_LABEL() != null) { String label = ctx.BLANK_NODE_LABEL().getText().substring(2); @@ -122,15 +120,12 @@ protected Value extractObject(NQuadsParser.ObjectContext ctx) { } /** - * Extracts a graph (IRI or Blank Node) from the graph context. - * Handles unescaping of URI characters for IRIs and extracts blank node labels. - * @param ctx The GraphLabelContext from the ANTLR parse tree. - * @return The created Resource (IRI or BNode) representing the graph. - * @throws ParsingErrorException if the graph label type is unsupported or blank node label is invalid. + * Extracts a graph (IRI or Blank Node) from the graph label context. */ protected Resource extractGraph(NQuadsParser.GraphLabelContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText()))); + String iri = abstractNTriplesQuadsListener.unescapeUri(stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } if (ctx.BLANK_NODE_LABEL() != null) { String label = ctx.BLANK_NODE_LABEL().getText().substring(2); @@ -142,209 +137,38 @@ protected Resource extractGraph(NQuadsParser.GraphLabelContext ctx) { /** * Extracts and unescapes a literal from the ANTLR context. - * This method handles string literals with or without datatype/language. - * @param ctx The LiteralContext from the ANTLR parse tree. - * @return The created Literal value. */ protected Literal extractLiteral(NQuadsParser.LiteralContext ctx) { - String rawLiteralText; - if (ctx.STRING_LITERAL_QUOTE() != null) { - rawLiteralText = ctx.STRING_LITERAL_QUOTE().getText(); - } - else { - throw new ParsingErrorException("Unsupported literal type or missing literal token: " + ctx.getText()); - } - String label = unescapeLiteral(rawLiteralText); + String rawText = ctx.STRING_LITERAL_QUOTE() != null + ? ctx.STRING_LITERAL_QUOTE().getText() + : null; - if (ctx.IRIREF() != null) { - IRI datatype = factory.createIRI(unescapeUri(stripAngles(ctx.IRIREF().getText()))); - return factory.createLiteral(label, datatype); - } - if (ctx.LANGTAG() != null) { - String lang = ctx.LANGTAG().getText().substring(1); - return factory.createLiteral(label, lang); + if (rawText == null) { + throw new ParsingErrorException("Missing literal token: " + ctx.getText()); } - return factory.createLiteral(label); - } + String label = abstractNTriplesQuadsListener.unescapeLiteral(rawText); - /** - * Unescapes common N-Quads literal escape sequences. - * This method handles \", \\, \n, \t, \r, \b, \f. - * It also removes the surrounding quotes from the literal string. - * - * @param literalText The raw literal string from ANTLR (including quotes and escapes). - * @return The unescaped literal string without surrounding quotes. - * @throws ParsingErrorException if an invalid Unicode escape sequence is found. - */ - protected String unescapeLiteral(String literalText) { - String unquotedLiteral; - int quoteLength; - if (literalText.startsWith("\"\"\"") && literalText.endsWith("\"\"\"")) { - if (literalText.length() < 6) { - throw new ParsingErrorException("Invalid triple-quoted string"); - } - quoteLength = 3; - } else if (literalText.startsWith("\"") && literalText.endsWith("\"")) { - if (literalText.length() < 2) { - throw new ParsingErrorException("Invalid single-quoted string"); - } - quoteLength = 1; - } else { - throw new ParsingErrorException("Literal text does not start/end with expected N-Quads quotes: " + literalText); - } + IRI datatype = null; + String langTag = null; - unquotedLiteral = literalText.substring(quoteLength, literalText.length() - quoteLength); - - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < unquotedLiteral.length(); i++) { - char c = unquotedLiteral.charAt(i); - if (c == '\\' && i + 1 < unquotedLiteral.length()) { - char nextChar = unquotedLiteral.charAt(i + 1); - switch (nextChar) { - case '"': - sb.append('"'); - i++; - break; - case '\\': - sb.append('\\'); - i++; - break; - case 'n': - sb.append('\n'); - i++; - break; - case 't': - sb.append('\t'); - i++; - break; - case 'r': - sb.append('\r'); - i++; - break; - case 'b': - sb.append('\b'); - i++; - break; - case 'f': - sb.append('\f'); - i++; - break; - case 'u': - if (i + 5 < unquotedLiteral.length()) { - String hex = unquotedLiteral.substring(i + 2, i + 6); - try { - int unicodeChar = Integer.parseInt(hex, 16); - sb.append((char) unicodeChar); - i += 5; - } catch (NumberFormatException e) { - throw new ParsingErrorException("Invalid \\uXXXX escape sequence in literal: \\u" + hex); - } - } else { - throw new ParsingErrorException("Incomplete \\uXXXX escape sequence in literal: " + unquotedLiteral.substring(i)); - } - break; - case 'U': - if (i + 9 < unquotedLiteral.length()) { - String hex = unquotedLiteral.substring(i + 2, i + 10); - try { - int unicodeChar = Integer.parseInt(hex, 16); - if (Character.isSupplementaryCodePoint(unicodeChar)) { - sb.append(Character.highSurrogate(unicodeChar)); - sb.append(Character.lowSurrogate(unicodeChar)); - } else { - sb.append((char) unicodeChar); - } - i += 9; - } catch (NumberFormatException e) { - throw new ParsingErrorException("Invalid \\UXXXXXXXX escape sequence in literal: \\U" + hex); - } - } else { - throw new ParsingErrorException("Incomplete \\UXXXXXXXX escape sequence in literal: " + unquotedLiteral.substring(i)); - } - break; - default: - sb.append(c).append(nextChar); - i++; - break; - } - } else { - sb.append(c); - } + if (ctx.IRIREF() != null) { + String iri = abstractNTriplesQuadsListener.unescapeUri(stripAngles(ctx.IRIREF().getText())); + datatype = factory.createIRI(iri); + } else if (ctx.LANGTAG() != null) { + langTag = ctx.LANGTAG().getText().substring(1); } - return sb.toString(); + + return abstractNTriplesQuadsListener.createLiteral(label, datatype, langTag); } /** - * Unescapes common N-Quads URI escape sequences. - * This method handles \>, \\, \ uXXXX, \UXXXXXXXX. - * - * @param uri The escaped URI string. - * @return The unescaped URI string. - * @throws ParsingErrorException if an invalid Unicode escape sequence is found. + * Strips angle brackets from an IRI reference. */ - protected String unescapeUri(String uri) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < uri.length(); i++) { - char c = uri.charAt(i); - if (c == '\\' && i + 1 < uri.length()) { - char nextChar = uri.charAt(i + 1); - switch (nextChar) { - case '>': - sb.append('>'); - i++; - break; - case '\\': - sb.append('\\'); - i++; - break; - case 'u': - if (i + 5 < uri.length()) { - String hex = uri.substring(i + 2, i + 6); - try { - int unicodeChar = Integer.parseInt(hex, 16); - sb.append((char) unicodeChar); - i += 5; - } catch (NumberFormatException e) { - throw new ParsingErrorException("Invalid \\uXXXX escape sequence in URI: \\u" + hex); - } - } else { - throw new ParsingErrorException("Incomplete \\uXXXX escape sequence in URI: " + uri.substring(i)); - } - break; - case 'U': - if (i + 9 < uri.length()) { - String hex = uri.substring(i + 2, i + 10); - try { - int unicodeChar = Integer.parseInt(hex, 16); - if (Character.isSupplementaryCodePoint(unicodeChar)) { - sb.append(Character.highSurrogate(unicodeChar)); - sb.append(Character.lowSurrogate(unicodeChar)); - } else { - sb.append((char) unicodeChar); - } - i += 9; - } catch (NumberFormatException e) { - throw new ParsingErrorException("Invalid \\UXXXXXXXX escape sequence in URI: \\U" + hex); - } - } else { - throw new ParsingErrorException("Incomplete \\UXXXXXXXX escape sequence in URI: " + uri.substring(i)); - } - break; - default: - sb.append(c).append(nextChar); - i++; - break; - } - } else { - sb.append(c); - } - } - return sb.toString(); - } private String stripAngles(String iriRef) { - return iriRef.substring(1, iriRef.length() - 1); + return abstractNTriplesQuadsListener.stripAngles(iriRef); } + /** * Validates a blank node label according to RDF 1.1 N-Quads specification. * Blank node labels must match PN_LOCAL rules, which means they cannot be empty, @@ -353,15 +177,34 @@ private String stripAngles(String iriRef) { * @throws ParsingErrorException if the blank node label is invalid. */ protected void validateBlankNodeLabel(String label) { - if (label.isEmpty()) { - throw new ParsingErrorException("Blank node label cannot be empty"); - } - if (label.contains(":")) { - throw new ParsingErrorException("Blank node label cannot contain colon"); - } + abstractNTriplesQuadsListener.validateBlankNodeLabel(label); if (!label.matches("^[A-Za-z_0-9][A-Za-z0-9_\\-\\.]*$")) { throw new ParsingErrorException("Invalid blank node label syntax: " + label); } } -} + + /** + * Unescapes common N-Quads literal escape sequences. + * + * @param literalText The raw literal string from ANTLR (including quotes) + * @return The unescaped literal string without surrounding quotes + * @deprecated Use helper.unescapeLiteral instead + */ + @Deprecated + protected String unescapeLiteral(String literalText) { + return abstractNTriplesQuadsListener.unescapeLiteral(literalText); + } + + /** + * Unescapes common N-Quads URI escape sequences. + * + * @param uri The escaped URI string + * @return The unescaped URI string + * @deprecated Use helper.unescapeUri instead + */ + @Deprecated + protected String unescapeUri(String uri) { + return abstractNTriplesQuadsListener.unescapeUri(uri); + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java index b9d58d6ea..e45277a63 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListener.java @@ -8,6 +8,7 @@ import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.common.AbstractNTriplesNQuadsListener; import fr.inria.corese.core.next.impl.parser.antlr.NTriplesBaseListener; import fr.inria.corese.core.next.impl.parser.antlr.NTriplesParser; @@ -18,16 +19,15 @@ */ public class NTriplesListener extends NTriplesBaseListener { + private final AbstractNTriplesNQuadsListener abstractNTriplesQuadsListener; private final Model model; private final ValueFactory factory; - @SuppressWarnings("unused") - private final IOOptions options; private Resource currentSubject; private IRI currentPredicate; /** - * Constructor for the NTriplesListener. + * Constructs an N-Triples listener. * * @param model The RDF model to populate. * @param factory The ValueFactory for creating RDF resources. @@ -36,7 +36,7 @@ public class NTriplesListener extends NTriplesBaseListener { public NTriplesListener(Model model, ValueFactory factory, IOOptions options) { this.model = model; this.factory = factory; - this.options = options; + this.abstractNTriplesQuadsListener = new AbstractNTriplesNQuadsListener(model, factory, options) {}; } @Override @@ -58,18 +58,15 @@ public void exitTriple(NTriplesParser.TripleContext ctx) { */ protected Resource extractSubject(NTriplesParser.SubjectContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1))); + String iri = abstractNTriplesQuadsListener.unescapeUri(abstractNTriplesQuadsListener.stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } if (ctx.BLANK_NODE_LABEL() != null) { - String blankNodeLabel = ctx.BLANK_NODE_LABEL().getText().substring(2); - try { - validateBlankNodeLabel(blankNodeLabel); - } catch (ParsingErrorException e) { - throw new IllegalArgumentException("Invalid blank node label in subject: " + e.getMessage(), e); - } - return factory.createBNode(blankNodeLabel); + String label = abstractNTriplesQuadsListener.extractBlankNodeLabel(ctx.BLANK_NODE_LABEL().getText()); + abstractNTriplesQuadsListener.validateBlankNodeLabel(label); + return factory.createBNode(label); } - throw new IllegalArgumentException("Unsupported N-Triples subject: " + ctx.getText()); + throw new ParsingErrorException("Unsupported N-Triples subject: " + ctx.getText()); } /** @@ -77,9 +74,10 @@ protected Resource extractSubject(NTriplesParser.SubjectContext ctx) { */ protected IRI extractPredicate(NTriplesParser.PredicateContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1))); + String iri = abstractNTriplesQuadsListener.unescapeUri(abstractNTriplesQuadsListener.stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } - throw new IllegalArgumentException("Unsupported N-Triples predicate: " + ctx.getText()); + throw new ParsingErrorException("Unsupported N-Triples predicate: " + ctx.getText()); } /** @@ -87,216 +85,75 @@ protected IRI extractPredicate(NTriplesParser.PredicateContext ctx) { */ protected Value extractObject(NTriplesParser.ObjectContext ctx) { if (ctx.IRIREF() != null) { - return factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1))); + String iri = abstractNTriplesQuadsListener.unescapeUri(abstractNTriplesQuadsListener.stripAngles(ctx.IRIREF().getText())); + return factory.createIRI(iri); } if (ctx.BLANK_NODE_LABEL() != null) { - String blankNodeLabel = ctx.BLANK_NODE_LABEL().getText().substring(2); - try { - validateBlankNodeLabel(blankNodeLabel); - } catch (ParsingErrorException e) { - throw new IllegalArgumentException("Invalid blank node label in object: " + e.getMessage(), e); - } - return factory.createBNode(blankNodeLabel); + String label = abstractNTriplesQuadsListener.extractBlankNodeLabel(ctx.BLANK_NODE_LABEL().getText()); + abstractNTriplesQuadsListener.validateBlankNodeLabel(label); + return factory.createBNode(label); } if (ctx.literal() != null) { return extractLiteral(ctx.literal()); } - throw new IllegalArgumentException("Unsupported N-Triples object: " + ctx.getText()); + throw new ParsingErrorException("Unsupported N-Triples object: " + ctx.getText()); } /** * Extracts and unescapes a literal from the ANTLR context. - * This method handles string literals with or without datatype/language. */ protected Literal extractLiteral(NTriplesParser.LiteralContext ctx) { - String label = ctx.STRING_LITERAL_QUOTE().getText(); - label = unescapeLiteral(label); + String rawText = ctx.STRING_LITERAL_QUOTE().getText(); + String label = abstractNTriplesQuadsListener.unescapeLiteral(rawText); + + IRI datatype = null; + String langTag = null; if (ctx.IRIREF() != null) { - IRI datatype = factory.createIRI(unescapeUri(ctx.IRIREF().getText().substring(1, ctx.IRIREF().getText().length() - 1))); - return factory.createLiteral(label, datatype); - } - if (ctx.LANGTAG() != null) { - String lang = ctx.LANGTAG().getText().substring(1); - return factory.createLiteral(label, lang); + String iri = abstractNTriplesQuadsListener.unescapeUri(abstractNTriplesQuadsListener.stripAngles(ctx.IRIREF().getText())); + datatype = factory.createIRI(iri); + } else if (ctx.LANGTAG() != null) { + langTag = ctx.LANGTAG().getText().substring(1); } - return factory.createLiteral(label); + + return abstractNTriplesQuadsListener.createLiteral(label, datatype, langTag); } /** * Validates a blank node label according to RDF N-Triples specification. * Blank node labels must not be empty and must not contain a colon. - * They *can* start with a digit. * - * @param label The blank node label (without the `_: `prefix). - * @throws ParsingErrorException if the label is invalid. + * @param label The blank node label (without the "_:" prefix) + * @throws ParsingErrorException if the label is invalid + * @deprecated Use helper.validateBlankNodeLabel instead */ + @Deprecated protected void validateBlankNodeLabel(String label) throws ParsingErrorException { - if (label == null || label.isEmpty()) { - throw new ParsingErrorException("Blank node label cannot be empty."); - } - - if (label.contains(":")) { - throw new ParsingErrorException("Blank node label cannot contain a colon (':')"); - } - + abstractNTriplesQuadsListener.validateBlankNodeLabel(label); } /** * Unescapes common N-Triples literal escape sequences. - * This method handles `\"`, `\\`, `\n`, `\t`, `\r`, `\b`, `\f`. - * It also handles `\ uXXXX` and `\UXXXXXXXX` for Unicode escapes. - * It also removes the surrounding quotes from the literal string. * - * @param literalText The raw literal string from ANTLR (including quotes and escapes). - * @return The unescaped literal string without surrounding quotes. + * @param literalText The raw literal string from ANTLR (including quotes) + * @return The unescaped literal string without surrounding quotes + * @deprecated Use helper.unescapeLiteral instead */ + @Deprecated protected String unescapeLiteral(String literalText) { - String unquotedLiteral = literalText.substring(1, literalText.length() - 1); - - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < unquotedLiteral.length(); i++) { - char c = unquotedLiteral.charAt(i); - if (c == '\\' && i + 1 < unquotedLiteral.length()) { - char nextChar = unquotedLiteral.charAt(i + 1); - switch (nextChar) { - case '"': - builder.append('"'); - i++; - break; - case '\\': - builder.append('\\'); - i++; - break; - case 'n': - builder.append('\n'); - i++; - break; - case 't': - builder.append('\t'); - i++; - break; - case 'r': - builder.append('\r'); - i++; - break; - case 'b': - builder.append('\b'); - i++; - break; - case 'f': - builder.append('\f'); - i++; - break; - case 'u': - if (i + 5 < unquotedLiteral.length()) { - String hex = unquotedLiteral.substring(i + 2, i + 6); - try { - int unicodeChar = Integer.parseInt(hex, 16); - builder.append((char) unicodeChar); - i += 5; - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Invalid \\uXXXX escape sequence in literal: \\u" + hex); - } - } else { - throw new IllegalArgumentException("Incomplete \\uXXXX escape sequence in literal: " + unquotedLiteral.substring(i)); - } - break; - case 'U': - if (i + 9 < unquotedLiteral.length()) { - String hex = unquotedLiteral.substring(i + 2, i + 10); - try { - int unicodeChar = Integer.parseInt(hex, 16); - if (Character.isSupplementaryCodePoint(unicodeChar)) { - builder.append(Character.highSurrogate(unicodeChar)); - builder.append(Character.lowSurrogate(unicodeChar)); - } else { - builder.append((char) unicodeChar); - } - i += 9; - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Invalid \\UXXXXXXXX escape sequence in literal: \\U" + hex); - } - } else { - throw new IllegalArgumentException("Incomplete \\UXXXXXXXX escape sequence in literal: " + unquotedLiteral.substring(i)); - } - break; - default: - builder.append(c).append(nextChar); - i++; - break; - } - } else { - builder.append(c); - } - } - return builder.toString(); + return abstractNTriplesQuadsListener.unescapeLiteral(literalText); } /** * Unescapes common N-Triples URI escape sequences. - * This method handles `\>`, `\\`, `\ nXXXX`, `\UXXXXXXXX`. * - * @param uri The escaped URI string. - * @return The unescaped URI string. + * @param uri The escaped URI string + * @return The unescaped URI string + * @deprecated Use helper.unescapeUri instead */ + @Deprecated protected String unescapeUri(String uri) { - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < uri.length(); i++) { - char c = uri.charAt(i); - if (c == '\\' && i + 1 < uri.length()) { - char nextChar = uri.charAt(i + 1); - switch (nextChar) { - case '>': - builder.append('>'); - i++; - break; - case '\\': - builder.append('\\'); - i++; - break; - case 'u': - if (i + 5 < uri.length()) { - String hex = uri.substring(i + 2, i + 6); - try { - int unicodeChar = Integer.parseInt(hex, 16); - builder.append((char) unicodeChar); - i += 5; - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Invalid \\uXXXX escape sequence in URI: \\u" + hex); - } - } else { - throw new IllegalArgumentException("Incomplete \\uXXXX escape sequence in URI: " + uri.substring(i)); - } - break; - case 'U': - if (i + 9 < uri.length()) { - String hex = uri.substring(i + 2, i + 10); - try { - int unicodeChar = Integer.parseInt(hex, 16); - if (Character.isSupplementaryCodePoint(unicodeChar)) { - builder.append(Character.highSurrogate(unicodeChar)); - builder.append(Character.lowSurrogate(unicodeChar)); - } else { - builder.append((char) unicodeChar); - } - i += 9; - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Invalid \\UXXXXXXXX escape sequence in URI: \\U" + hex); - } - } else { - throw new IllegalArgumentException("Incomplete \\UXXXXXXXX escape sequence in URI: " + uri.substring(i)); - } - break; - default: - builder.append(c).append(nextChar); - i++; - break; - } - } else { - builder.append(c); - } - } - return builder.toString(); + return abstractNTriplesQuadsListener.unescapeUri(uri); } } + diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java index 917fa0793..b7db9779d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -174,7 +174,6 @@ private void handleEndElement(String uri, String localName, String qName) { IRI predicate = ctx.predicateStack.pop(); Resource subject = ctx.subjectStack.peek(); String datatypeUri = ctx.datatypeStack.isEmpty() ? null : ctx.datatypeStack.pop(); - //emitLiteral(subject, predicate, text, datatypeUri); String lang = ctx.langStack.isEmpty() ? null : ctx.langStack.peek(); emitter.emitLiteral(subject, predicate, text, datatypeUri, lang); return; @@ -213,7 +212,8 @@ private void updateLang(Attributes attrs) { private void updateDatatype(Attributes attrs) { String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); if (datatype != null) { - ctx.datatypeStack.push(datatype); + String expanded = expandQNameFromQName(datatype); + ctx.datatypeStack.push(expanded); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java index e31e09eff..07b1f1fc2 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java @@ -4,11 +4,10 @@ import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.IncorrectFormatException; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; import org.xml.sax.*; - import java.util.List; import java.util.Optional; - /** * Utility methods for processing RDF/XML constructs. *

@@ -48,6 +47,23 @@ public static Optional resolveDatatype(String datatypeUri) { return Optional.empty(); } + /** + * Expands a QName string (e.g. "xsd:integer") into a full URI if known. + * Currently supports "xsd:" β†’ XML Schema namespace. + * + * @param qname the QName string + * @return expanded full URI if a known prefix, otherwise returns qname unchanged + */ + public static String expandQNameFromQName(String qname) { + if (qname == null) return null; + String xsdPrefix = fr.inria.corese.core.next.impl.common.vocabulary.XSD.xsdString.getPreferredPrefix() + ":"; + if (qname.startsWith(xsdPrefix)) { + return fr.inria.corese.core.next.impl.common.vocabulary.XSD.xsdString.getNamespace() + + qname.substring(xsdPrefix.length()); + } + return qname; + } + /** * Extracts a subject resource from RDF/XML attributes. * Supports rdf:about, rdf:nodeID, rdf:ID. @@ -62,7 +78,7 @@ public static Resource extractSubject(Attributes attrs, ValueFactory factory, St if (about != null) return factory.createIRI(resolveAgainstBase(about, baseURI)); String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - if (nodeID != null) return factory.createBNode("_:" + nodeID); + if (nodeID != null) return factory.createBNode(ParserConstants.BLANK_NODE_PREFIX + nodeID); String id = attrs.getValue(RDF.type.getNamespace(), "ID"); if (id != null) return factory.createIRI(resolveAgainstBase("#" + id, baseURI)); @@ -181,7 +197,7 @@ public static boolean isRdfRDF(String uri, String localName) { */ public static boolean isContainer(String localName, String uri) { return RDF.type.getNamespace().equals(uri) && - ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); + (RDF.Seq.getIRI().getLocalName().equals(localName) || RDF.Bag.getIRI().getLocalName().equals(localName) || RDF.Alt.getIRI().getLocalName().equals(localName)); } /** diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java index cddf9168c..ce0cfa354 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java @@ -5,13 +5,10 @@ import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; - import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.parser.antlr.TriGLexer; import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; -import org.antlr.v4.runtime.CharStream; -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.ParseTreeWalker; @@ -21,10 +18,12 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; /** - * An ANTLR4-based parser for Trig format. - * This parser uses an ANTLR grammar to tokenize and parse Trig documents, + * A parser for the TriG format based on ANTLR4. + * It uses an ANTLR grammar to tokenize and parse TriG documents, * then a listener to build the RDF model. */ public class ANTLRTrigParser extends AbstractRDFParser { @@ -32,19 +31,23 @@ public class ANTLRTrigParser extends AbstractRDFParser { /** * Constructor for the ANTLRTrigParser. * - * @param model The RDF model to populate. - * @param factory The ValueFactory for creating RDF resources. + * @param model The RDF model to be populated. + * @param factory The value factory for creating RDF resources. */ - public ANTLRTrigParser(Model model, ValueFactory factory) { super(model, factory); } + public ANTLRTrigParser(Model model, ValueFactory factory) { + super(model, factory); + } /** * Constructor for the ANTLRTrigParser with configuration options. * - * @param model The RDF model to populate. - * @param factory The ValueFactory for creating RDF resources. - * @param config The configuration options for parsing. + * @param model The RDF model to be populated. + * @param factory The value factory for creating RDF resources. + * @param config Configuration options for parsing. */ - public ANTLRTrigParser(Model model, ValueFactory factory, IOOptions config) {super(model, factory, config);} + public ANTLRTrigParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + } @Override public RDFFormat getRDFFormat() { @@ -52,7 +55,9 @@ public RDFFormat getRDFFormat() { } @Override - public void setConfig(IOOptions config) {} + public void setConfig(IOOptions config) { + // This method is required by the interface but is not used in this implementation. + } @Override public void parse(InputStream in) throws ParsingErrorException { @@ -70,9 +75,9 @@ public void parse(Reader reader) throws ParsingErrorException { } /** - * Parses Trig data from a Reader using ANTLR4. + * Parses TriG data from a {@link Reader} using ANTLR4. * - * @param reader The Reader to read RDF data from. + * @param reader The {@link Reader} to read the RDF data. * @param baseURI The base URI. * @throws ParsingErrorException if a parsing or I/O error occurs. */ @@ -81,16 +86,87 @@ public void parse(Reader reader, String baseURI) throws ParsingErrorException { try { CharStream charStream = CharStreams.fromReader(reader); TriGLexer triGLexer = new TriGLexer(charStream); + + TrigErrorListener trigErrorListener = new TrigErrorListener(); + triGLexer.removeErrorListeners(); + triGLexer.addErrorListener(trigErrorListener); + CommonTokenStream tokens = new CommonTokenStream(triGLexer); TriGParser triGParser = new TriGParser(tokens); + + + triGParser.removeErrorListeners(); + triGParser.addErrorListener(trigErrorListener); + ParseTreeWalker walker = new ParseTreeWalker(); ParseTree tree = triGParser.trigDoc(); - TriGListerner listerner = new TriGListerner(getModel(), getValueFactory(), this.getConfig()); + + if (trigErrorListener.hasErrors()) { + throw new ParsingErrorException("Syntax error in TriG document: " + trigErrorListener.getErrorMessage()); + } + + TriGListerner listerner = new TriGListerner(getModel(), getValueFactory(), this.getConfig(), baseURI); walker.walk((ParseTreeListener) listerner, tree); + + } catch (ParsingErrorException e) { + throw e; } catch (IOException e) { throw new ParsingErrorException("Failed to parse TriG RDF: " + e.getMessage(), e); } catch (Exception e) { throw new ParsingErrorException("Unexpected error during TriG parsing: " + e.getMessage(), e); } } -} \ No newline at end of file + + + /** + * A custom error listener to collect errors from the lexer and parser. + */ + private static class TrigErrorListener extends BaseErrorListener { + private final List errors = new ArrayList<>(); + + /** + * Records syntax errors generated by ANTLR. + * + * @param recognizer The recognizer that detected the error. + * @param offendingSymbol The symbol that caused the error. + * @param line The line number where the error occurred. + * @param charPositionInLine The character position on the line. + * @param msg The error message. + * @param e The recognition exception. + */ + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, + int line, int charPositionInLine, String msg, RecognitionException e) { + if (msg != null && (msg.contains("token recognition error") || msg.contains("mismatched input"))) { + if (offendingSymbol instanceof Token) { + Token token = (Token) offendingSymbol; + String tokenText = token.getText(); + if (msg.contains("token recognition error") && tokenText != null && tokenText.contains("\"")) { + msg = "Invalid string literal - possibly unterminated or contains invalid escape sequence: " + msg; + } + } + } + + String error = "line " + line + ":" + charPositionInLine + " " + msg; + errors.add(error); + } + + /** + * Checks if parsing errors have been found. + * + * @return `true` if the error list is not empty, otherwise `false`. + */ + public boolean hasErrors() { + return !errors.isEmpty(); + } + + /** + * Returns a formatted error message containing all found errors. + * + * @return A {@link String} containing the error messages. + */ + public String getErrorMessage() { + return String.join("; ", errors); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java index a7f199392..ea614152a 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java @@ -3,251 +3,565 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; -import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.common.AbstractTurtleTriGListener; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; import fr.inria.corese.core.next.impl.parser.antlr.TriGBaseListener; import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; -import fr.inria.corese.core.next.api.ValueFactory; -import java.util.HashMap; + import java.util.List; -import java.util.Map; /** - * Listener for the ANTLR4 generated parser for TriG. - * This listener traverses the parse tree and builds the RDF model, - * supporting named graphs. It includes unescaping logic for URIs and literals. + * ANTLR listener for parsing TriG documents into RDF datasets. + * and adds support for named graphs as specified in the TriG format. + * + */ public class TriGListerner extends TriGBaseListener { - private final Model model; - private String baseURI; - private final Map prefixMap = new HashMap<>(); - private final ValueFactory factory; - private Resource currentSubject; - private IRI currentPredicate; - private Resource currentGraph; + private final TriGListenerDelegate delegate; + private boolean insideGraphBlock = false; + /** + * Constructs a TriG listener with explicit base URI. + * + * @param model RDF model to populate with parsed quads + * @param factory factory for creating RDF terms + * @param options I/O options (unused in this constructor) + * @param baseURI base URI for resolving relative references + */ + public TriGListerner(Model model, ValueFactory factory, IOOptions options, String baseURI) { + this.delegate = new TriGListenerDelegate(model, factory, baseURI); + } /** - * Constructor for the TriGListerner. + * Constructs a TriG listener extracting base URI from options. * - * @param model The RDF model to populate. - * @param factory The ValueFactory for creating RDF resources. - * @param options IOOptions for configuration (if any). + * @param model RDF model to populate with parsed quads + * @param factory factory for creating RDF terms + * @param options I/O options potentially containing base URI */ public TriGListerner(Model model, ValueFactory factory, IOOptions options) { - this.model = model; - this.baseURI = baseURI != null ? baseURI : ""; - if (options != null && options instanceof RDFParserBaseIRIOptions); - this.factory = factory; + String baseURI; + if (options instanceof RDFParserBaseIRIOptions) { + RDFParserBaseIRIOptions baseIRIOptions = (RDFParserBaseIRIOptions) options; + baseURI = baseIRIOptions.getBase() != null ? baseIRIOptions.getBase() : ParserConstants.EMPTY_STRING; + } else { + baseURI = ParserConstants.EMPTY_STRING; + } + this.delegate = new TriGListenerDelegate(model, factory, baseURI); + } + + /** + * Handles directive by updating the base URI for relative IRI resolution. + * + * @param ctx base directive context + */ + @Override + public void exitBase(TriGParser.BaseContext ctx) { + String newBase = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.updateBaseURI(newBase); } + /** + * Handles {@code @prefix} directive by registering a namespace prefix. + * + * @param ctx prefix directive context + */ @Override public void exitPrefixID(TriGParser.PrefixIDContext ctx) { String prefix = ctx.PNAME_NS().getText(); - String iri = ctx.IRIREF().getText(); prefix = prefix.substring(0, prefix.length() - 1); - iri = iri.substring(1, iri.length() - 1); - prefixMap.put(prefix, iri); - model.setNamespace(prefix, iri); + String iri = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.registerPrefix(prefix, iri); } + /** + * Handles SPARQL-style {@code BASE} directive by updating the base URI. + * + * @param ctx SPARQL base directive context + */ @Override public void exitSparqlBase(TriGParser.SparqlBaseContext ctx) { - baseURI = ctx.IRIREF().getText().replaceAll("^<|>$", ""); + String newBase = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.updateBaseURI(newBase); } + /** + * Handles SPARQL-style {@code PREFIX} directive by registering a namespace prefix. + * + * @param ctx SPARQL prefix directive context + */ + @Override + public void exitSparqlPrefix(TriGParser.SparqlPrefixContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + String iri = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.registerPrefix(prefix, iri); + } + + /** + * Handles entry into a graph block, setting the current graph context. + * If a graph label is present, it becomes the target graph for subsequent statements. + * + * @param ctx block context + */ @Override public void enterBlock(TriGParser.BlockContext ctx) { - currentGraph = ctx.Graph_w() != null && ctx.labelOrSubject() != null - ? extractLabelOrSubject(ctx.labelOrSubject()) - : null; + if (ctx.Graph_w() != null && ctx.labelOrSubject() != null) { + delegate.setCurrentGraph(extractLabelOrSubject(ctx.labelOrSubject())); + } else { + delegate.setCurrentGraph(null); + } } + /** + * Handles exit from a graph block, clearing the current graph context. + * + * @param ctx block context + */ @Override public void exitBlock(TriGParser.BlockContext ctx) { - currentGraph = null; + delegate.setCurrentGraph(null); } + /** + * Marks entry into a wrapped graph block + * Used for validating standalone collection syntax. + * + * @param ctx wrapped graph context + */ + @Override + public void enterWrappedGraph(TriGParser.WrappedGraphContext ctx) { + insideGraphBlock = true; + } + + /** + * Marks exit from a wrapped graph block. + * + * @param ctx wrapped graph context + */ + @Override + public void exitWrappedGraph(TriGParser.WrappedGraphContext ctx) { + insideGraphBlock = false; + } + + /** + * Handles triples or graph declarations (disambiguated by following syntax). + * Either processes triples with an explicit subject or sets up a named graph. + * + * @param ctx triples or graph context + */ @Override public void enterTriplesOrGraph(TriGParser.TriplesOrGraphContext ctx) { - if (ctx.labelOrSubject() != null && ctx.predicateObjectList() != null) { - currentSubject = extractLabelOrSubject(ctx.labelOrSubject()); - processPredicateObjectList(ctx.predicateObjectList()); + if (ctx.labelOrSubject() != null) { + Resource resource = extractLabelOrSubject(ctx.labelOrSubject()); + if (ctx.predicateObjectList() != null) { + delegate.setCurrentSubject(resource); + processPredicateObjectList(ctx.predicateObjectList()); + } else if (ctx.wrappedGraph() != null) { + delegate.setCurrentGraph(resource); + } } } + /** + * Handles standard triple declarations starting with a subject or blank node property list. + * Subject context is saved and restored to handle nested structures. + * + * @param ctx triples context + */ @Override public void enterTriples(TriGParser.TriplesContext ctx) { - currentSubject = extractSubject(ctx.subject()); - processPredicateObjectList(ctx.predicateObjectList()); + Resource savedSubject = delegate.getCurrentSubject(); + try { + if (ctx.subject() != null) { + delegate.setCurrentSubject(extractSubject(ctx.subject())); + } else if (ctx.blankNodePropertyList() != null) { + delegate.setCurrentSubject(processBlankNodePropertyList(ctx.blankNodePropertyList())); + } + + if (ctx.predicateObjectList() != null) { + processPredicateObjectList(ctx.predicateObjectList()); + } + } finally { + delegate.setCurrentSubject(savedSubject); + } } /** - * Processes a PredicateObjectList context, extracting verbs and corresponding object lists, - * and adding triples to the model for the current subject and graph. + * Handles alternative triple forms starting with blank node property lists or collections. + * Validates that standalone collections only appear within graph blocks. + * Subject context is saved and restored to handle nested structures. * - * @param ctx the predicate-object list context from the parser + * @param ctx triples2 context */ - private void processPredicateObjectList(TriGParser.PredicateObjectListContext ctx) { - List verbs = ctx.verb(); - List objLists = ctx.objectList(); + @Override + public void enterTriples2(TriGParser.Triples2Context ctx) { + Resource savedSubject = delegate.getCurrentSubject(); + + if (ctx.collection() != null && ctx.predicateObjectList() == null) { + validateStandaloneCollection(ctx); + } + + try { + if (ctx.blankNodePropertyList() != null) { + delegate.setCurrentSubject(processBlankNodePropertyList(ctx.blankNodePropertyList())); + } else if (ctx.collection() != null) { + delegate.setCurrentSubject(processCollection(ctx.collection())); + } - for (int i = 0; i < verbs.size(); i++) { - currentPredicate = extractVerb(verbs.get(i)); - List objects = objLists.get(i).object(); - for (TriGParser.ObjectContext objCtx : objects) { - Value object = extractObject(objCtx); - model.add(currentSubject, currentPredicate, object, currentGraph); + if (ctx.predicateObjectList() != null) { + processPredicateObjectList(ctx.predicateObjectList()); } + } finally { + delegate.setCurrentSubject(savedSubject); } } /** - * Extracts an RDF object from the ObjectContext. - * Supports IRIs, blank nodes, literals, and inline blank node property lists. + * Validates that standalone collections only appear within graph blocks. + * TriG syntax requires collections without predicates to be inside braces. * - * @param ctx the object context - * @return the extracted RDF Value + * @param ctx triples2 context containing a potential standalone collection + * @throws ParsingErrorException if a standalone collection appears outside graph blocks */ - private Value extractObject(TriGParser.ObjectContext ctx) { - if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); - if (ctx.blank() != null) return extractBlank(ctx.blank()); - if (ctx.literal() != null) return extractLiteral(ctx.literal()); - if (ctx.blankNodePropertyList() != null) return processBlankNodePropertyList(ctx.blankNodePropertyList()); - throw new RuntimeException("Unsupported object: " + ctx.getText()); + private void validateStandaloneCollection(TriGParser.Triples2Context ctx) { + if (!insideGraphBlock) { + List objects = ctx.collection().object(); + if (objects.isEmpty()) { + throw new ParsingErrorException("Free-standing list of zero-elements outside {} : bad syntax"); + } else { + throw new ParsingErrorException("Free-standing list outside {} : bad syntax"); + } + } } /** - * Processes an inline blank node with its property list, returning the blank node as a Resource. - * Temporarily updates the current subject to the new blank node during processing. + * Processes a blank node property list + * Creates a fresh blank node and parses its property list. * - * @param ctx the blank node property list context - * @return the new blank node resource + * @param ctx blank node property list context + * @return created blank node resource */ private Resource processBlankNodePropertyList(TriGParser.BlankNodePropertyListContext ctx) { - Resource bnode = factory.createBNode(); - Resource savedSubject = currentSubject; - currentSubject = bnode; - processPredicateObjectList(ctx.predicateObjectList()); - currentSubject = savedSubject; + Resource bnode = delegate.createBNode(); + Resource savedSubject = delegate.getCurrentSubject(); + IRI savedPredicate = delegate.getCurrentPredicate(); + + try { + delegate.setCurrentSubject(bnode); + if (ctx.predicateObjectList() != null) { + processPredicateObjectList(ctx.predicateObjectList()); + } + } finally { + delegate.setCurrentSubject(savedSubject); + delegate.setCurrentPredicate(savedPredicate); + } + return bnode; } /** - * Extracts a subject from a SubjectContext, which can be an IRI or a blank node. + * Processes an RDF collection + * Constructs the linked list structure * - * @param ctx the subject context - * @return the extracted subject as a Resource + * @param ctx collection context + * @return head of the list (blank node) or {@code rdf:nil} for empty lists */ - private Resource extractSubject(TriGParser.SubjectContext ctx) { - if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); - if (ctx.blank() != null) return extractBlank(ctx.blank()); - throw new RuntimeException("Unsupported subject: " + ctx.getText()); + private Resource processCollection(TriGParser.CollectionContext ctx) { + List objects = ctx.object(); + + if (objects.isEmpty()) { + return delegate.createIRI(RDF.nil.getIRI().stringValue()); + } + + Resource head = delegate.createBNode(); + Resource current = head; + Resource savedSubject = delegate.getCurrentSubject(); + IRI savedPredicate = delegate.getCurrentPredicate(); + + try { + IRI firstPredicate = delegate.createIRI(RDF.first.getIRI().stringValue()); + IRI restPredicate = delegate.createIRI(RDF.rest.getIRI().stringValue()); + Value nilValue = delegate.createIRI(RDF.nil.getIRI().stringValue()); + + for (int i = 0; i < objects.size(); i++) { + Value object = extractObject(objects.get(i)); + delegate.addStatement(current, firstPredicate, object); + + if (i == objects.size() - 1) { + delegate.addStatement(current, restPredicate, nilValue); + } else { + Resource next = delegate.createBNode(); + delegate.addStatement(current, restPredicate, next); + current = next; + } + } + } finally { + delegate.setCurrentSubject(savedSubject); + delegate.setCurrentPredicate(savedPredicate); + } + + return head; + } + + /** + * Processes a predicate-object list, generating statements for each predicate-object pair. + * + * @param ctx predicate-object list context + */ + private void processPredicateObjectList(TriGParser.PredicateObjectListContext ctx) { + for (int i = 0; i < ctx.verb().size(); i++) { + TriGParser.VerbContext verb = ctx.verb(i); + TriGParser.ObjectListContext objectList = ctx.objectList(i); + + IRI savedPredicate = delegate.getCurrentPredicate(); + try { + delegate.setCurrentPredicate(extractVerb(verb)); + + if (objectList != null) { + for (TriGParser.ObjectContext objectCtx : objectList.object()) { + Value object = extractObject(objectCtx); + delegate.addStatement(object); + } + } + } finally { + delegate.setCurrentPredicate(savedPredicate); + } + } } /** - * Extracts a blank node from a BlankContext, supporting labeled (_:b) and anonymous ([]) forms. + * Extracts an RDF value from an object context. * - * @param ctx the blank context - * @return the blank node as a Resource + * @param ctx object context + * @return extracted RDF value (IRI, blank node, or literal) + * @throws ParsingErrorException if the object type is unsupported + */ + private Value extractObject(TriGParser.ObjectContext ctx) { + if (ctx.iri() != null) { + return delegate.createIRI(delegate.resolveIRI(ctx.iri().getText())); + } + if (ctx.blank() != null) { + return extractBlank(ctx.blank()); + } + if (ctx.literal() != null) { + return extractLiteral(ctx.literal()); + } + if (ctx.blankNodePropertyList() != null) { + return processBlankNodePropertyList(ctx.blankNodePropertyList()); + } + throw new ParsingErrorException("Unsupported object: " + ctx.getText()); + } + + /** + * Extracts a blank node resource from a blank node context. + * + * @param ctx blank node context + * @return blank node resource or collection head + * @throws ParsingErrorException if the blank node format is unsupported */ private Resource extractBlank(TriGParser.BlankContext ctx) { TriGParser.BlankNodeContext node = ctx.blankNode(); if (node != null) { - if (node.BLANK_NODE_LABEL() != null) - return factory.createBNode(node.BLANK_NODE_LABEL().getText()); - if (node.ANON() != null) - return factory.createBNode(); + if (node.BLANK_NODE_LABEL() != null) { + return delegate.createBNode(node.BLANK_NODE_LABEL().getText().substring(2)); + } + if (node.ANON() != null) { + return delegate.createBNode(); + } } - throw new RuntimeException("Unsupported blank node structure: " + ctx.getText()); + + TriGParser.CollectionContext collection = ctx.collection(); + if (collection != null) { + return processCollection(collection); + } + + throw new ParsingErrorException("Unsupported blank node: " + ctx.getText()); } /** - * Extracts a graph label or subject from a LabelOrSubjectContext. - * Supports IRI and blank node. + * Extracts a subject resource from a subject context. * - * @param ctx the label or subject context - * @return the extracted resource + * @param ctx subject context + * @return subject resource (IRI, blank node, or collection head) + * @throws ParsingErrorException if the subject type is unsupported + */ + private Resource extractSubject(TriGParser.SubjectContext ctx) { + if (ctx.iri() != null) { + return delegate.createIRI(delegate.resolveIRI(ctx.iri().getText())); + } + if (ctx.blank() != null) { + if (ctx.blank().blankNode() != null) { + TriGParser.BlankNodeContext node = ctx.blank().blankNode(); + if (node.BLANK_NODE_LABEL() != null) { + return delegate.createBNode(node.BLANK_NODE_LABEL().getText().substring(2)); + } + if (node.ANON() != null) { + return delegate.createBNode(); + } + } else if (ctx.blank().collection() != null) { + return processCollection(ctx.blank().collection()); + } + } + throw new ParsingErrorException("Unsupported subject: " + ctx.getText()); + } + + /** + * Extracts a graph label or subject resource. + * + * @param ctx label or subject context + * @return resource serving as graph name or statement subject + * @throws ParsingErrorException if IRI resolution fails or type is unsupported */ private Resource extractLabelOrSubject(TriGParser.LabelOrSubjectContext ctx) { - if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); - if (ctx.blankNode() != null) return factory.createBNode(ctx.blankNode().getText()); - throw new RuntimeException("Unsupported labelOrSubject: " + ctx.getText()); + if (ctx.iri() != null) { + String iriText = ctx.iri().getText(); + try { + return delegate.createIRI(delegate.resolveIRI(iriText)); + } catch (Exception e) { + throw new ParsingErrorException("Failed to resolve IRI: " + iriText, e); + } + } + if (ctx.blankNode() != null) { + if (ctx.blankNode().BLANK_NODE_LABEL() != null) { + return delegate.createBNode(ctx.blankNode().BLANK_NODE_LABEL().getText().substring(2)); + } + if (ctx.blankNode().ANON() != null) { + return delegate.createBNode(); + } + } + throw new ParsingErrorException("Unsupported label or subject: " + ctx.getText()); } /** - * Extracts a predicate IRI from a VerbContext. - * Handles the special keyword 'a' as rdf:type. + * Extracts a predicate IRI from a verb context. + * Handles the special case of {@code a} as shorthand for {@code rdf:type}. * - * @param ctx the verb context - * @return the extracted IRI + * @param ctx verb context + * @return predicate IRI */ private IRI extractVerb(TriGParser.VerbContext ctx) { - return factory.createIRI(resolveIRI(ctx.getText())); + String verbText = ctx.getText(); + if (verbText.equals(ParserConstants.A)) { + return delegate.createIRI(RDF.type.getIRI().stringValue()); + } + return delegate.createIRI(delegate.resolveIRI(verbText)); } /** - * Extracts a Literal from a LiteralContext, handling typed, language-tagged, boolean, and numeric literals. + * Extracts an RDF literal from a literal context. + * Handles plain, language-tagged, typed, boolean, and numeric literals. * - * @param ctx the literal context - * @return the extracted Literal + * @param ctx literal context + * @return RDF literal value + * @throws ParsingErrorException if the literal type is unsupported */ private Literal extractLiteral(TriGParser.LiteralContext ctx) { if (ctx.rDFLiteral() != null) { - String label = stripQuotes(ctx.rDFLiteral().string().getText()); - if (ctx.rDFLiteral().LANGTAG() != null) - return factory.createLiteral(label, ctx.rDFLiteral().LANGTAG().getText().substring(1)); - if (ctx.rDFLiteral().iri() != null) - return factory.createLiteral(label, factory.createIRI(resolveIRI(ctx.rDFLiteral().iri().getText()))); - return factory.createLiteral(label); - } - if (ctx.BooleanLiteral() != null) - return factory.createLiteral(ctx.BooleanLiteral().getText(), XSD.BOOLEAN.getIRI()); + String label = delegate.unescapeString(ctx.rDFLiteral().string().getText()); + if (ctx.rDFLiteral().LANGTAG() != null) { + return delegate.createLiteral(label, ctx.rDFLiteral().LANGTAG().getText().substring(1), null); + } + if (ctx.rDFLiteral().iri() != null) { + return delegate.createLiteral(label, null, delegate.resolveIRI(ctx.rDFLiteral().iri().getText())); + } + return delegate.createLiteral(label, null, null); + } + if (ctx.BooleanLiteral() != null) { + return delegate.createBooleanLiteral(ctx.BooleanLiteral().getText()); + } if (ctx.numericLiteral() != null) { - if (ctx.numericLiteral().INTEGER() != null) - return factory.createLiteral(ctx.numericLiteral().INTEGER().getText(), XSD.INTEGER.getIRI()); - if (ctx.numericLiteral().DECIMAL() != null) - return factory.createLiteral(ctx.numericLiteral().DECIMAL().getText(), XSD.DECIMAL.getIRI()); - if (ctx.numericLiteral().DOUBLE() != null) - return factory.createLiteral(ctx.numericLiteral().DOUBLE().getText(), XSD.DOUBLE.getIRI()); + String numericText = ctx.numericLiteral().getText(); + + if (ctx.numericLiteral().DOUBLE() != null) { + return delegate.createNumericLiteral(numericText, AbstractTurtleTriGListener.NumericType.DOUBLE); + } else if (ctx.numericLiteral().DECIMAL() != null) { + return delegate.createNumericLiteral(numericText, AbstractTurtleTriGListener.NumericType.DECIMAL); + } else { + return delegate.createNumericLiteral(numericText, AbstractTurtleTriGListener.NumericType.INTEGER); + } } - throw new RuntimeException("Unsupported literal: " + ctx.getText()); + throw new ParsingErrorException("Unsupported literal: " + ctx.getText()); } /** - * Resolves an IRI or QName into a full URI string. - * Handles full IRIs in angle brackets, QNames using prefixes, and special case "a". - * - * @param raw the raw string - * @return the resolved URI string + * Delegate extending {@link AbstractTurtleTriGListener} with TriG-specific named graph support. + * Overrides statement addition to handle quads (subject, predicate, object, graph). */ - private String resolveIRI(String raw) { - raw = raw.trim(); - if (raw.startsWith("<") && raw.endsWith(">")) return raw.substring(1, raw.length() - 1); - if (raw.equals("a")) return RDF.type.getIRI().stringValue(); - if (raw.contains(":")) { - String[] parts = raw.split(":", 2); - String ns = prefixMap.get(parts[0]); - if (ns != null) return ns + parts[1]; - throw new IllegalArgumentException("Undeclared prefix: " + parts[0]); + private static class TriGListenerDelegate extends AbstractTurtleTriGListener { + private Resource currentGraph; + + public TriGListenerDelegate(Model model, ValueFactory factory, String baseURI) { + super(model, factory, baseURI); } - return baseURI + raw; - } - /** - * Strips surrounding quotes from a string literal, including single, double, and multi-line forms. - * - * @param text the quoted string - * @return the unquoted string - */ - private String stripQuotes(String text) { - if (text == null || text.length() < 2) return text; - if ((text.startsWith("\"") && text.endsWith("\"")) || - (text.startsWith("\"\"\"") && text.endsWith("\"\"\"")) || - (text.startsWith("'''") && text.endsWith("'''"))) { - return text.substring(1, text.length() - 1); + public Resource getCurrentSubject() { + return currentSubject; + } + + public void setCurrentSubject(Resource subject) { + this.currentSubject = subject; + } + + public IRI getCurrentPredicate() { + return currentPredicate; + } + + public void setCurrentPredicate(IRI predicate) { + this.currentPredicate = predicate; + } + + + public void setCurrentGraph(Resource graph) { + this.currentGraph = graph; + } + + public void addStatement(Value object) { + safeAddStatement(currentSubject, currentPredicate, object); + } + + public void addStatement(Resource subject, IRI predicate, Value object) { + safeAddStatement(subject, predicate, object); + } + + public Resource createBNode() { + return factory.createBNode(); + } + + public Resource createBNode(String id) { + return factory.createBNode(id); + } + + public IRI createIRI(String iri) { + return factory.createIRI(iri); + } + + /** + * Adds a quad to the model with fallback to default graph on failure. + * + * @param subject statement subject + * @param predicate statement predicate + * @param object statement object + * @throws ParsingErrorException if statement cannot be added to any graph + */ + @Override + public void safeAddStatement(Resource subject, IRI predicate, Value object) { + try { + model.add(subject, predicate, object, currentGraph); + } catch (Exception e) { + if (currentGraph != null) { + try { + model.add(subject, predicate, object, null); + } catch (Exception e2) { + throw new ParsingErrorException("Failed to add statement: " + e.getMessage(), e); + } + } else { + throw new ParsingErrorException("Failed to add statement: " + e.getMessage(), e); + } + } } - return text; } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java index d303efa2c..d2f59cc7c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java @@ -1,30 +1,30 @@ package fr.inria.corese.core.next.impl.io.parser.turtle; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; - -import org.antlr.v4.runtime.CharStream; -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CommonTokenStream; -import org.antlr.v4.runtime.tree.ParseTree; -import org.antlr.v4.runtime.tree.ParseTreeListener; -import org.antlr.v4.runtime.tree.ParseTreeWalker; - import fr.inria.corese.core.next.api.Model; import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.parser.antlr.TurtleLexer; import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; /** * Parser for Turtle RDF files. - * - * @see fr.inria.corese.core.next.impl.io.parser.ParserFactory - * @see Turtle + * */ public class ANTLRTurtleParser extends AbstractRDFParser { @@ -56,59 +56,154 @@ public RDFFormat getRDFFormat() { return RDFFormat.TURTLE; } - /** - * @param config we are not using any config in this parser implementation - */ @Override public void setConfig(IOOptions config) { - // nothing to do - } - - /** - * @return null, we are not using any config in this parser implementation - */ - @Override - public IOOptions getConfig() { - return null; } @Override - public void parse(InputStream in) { - parse(new InputStreamReader(in), null); + public void parse(InputStream in) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); } @Override - public void parse(InputStream in, String baseURI) { - parse(new InputStreamReader(in), baseURI); + public void parse(InputStream in, String baseURI) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURI); } @Override - public void parse(Reader reader) { + public void parse(Reader reader) throws ParsingErrorException { parse(reader, null); } /** - * We are using ANTLR4 lexer and parser - * - * @param reader The Reader to read RDF data from. - * @param baseURI The base URI for resolving relative URIs in the RDF data. + * Parses Turtle data from a {@link Reader} using ANTLR4. + * + * @param reader The {@link Reader} to read the RDF data. + * @param baseURI The base URI. + * @throws ParsingErrorException if a parsing or I/O error occurs. */ @Override - public void parse(Reader reader, String baseURI) { - + public void parse(Reader reader, String baseURI) throws ParsingErrorException { try { CharStream charStream = CharStreams.fromReader(reader); - TurtleLexer lexer = new TurtleLexer(charStream); - CommonTokenStream tokens = new CommonTokenStream(lexer); - TurtleParser parser = new TurtleParser(tokens); - ParseTreeWalker walker = new ParseTreeWalker(); - ParseTree tree = parser.turtleDoc(); - TurtleListenerImpl listener = new TurtleListenerImpl(getModel(), getValueFactory(), this.getConfig()); + TurtleLexer turtleLexer = new TurtleLexer(charStream); + + TurtleErrorListener turtleErrorListener = new TurtleErrorListener(); + turtleLexer.removeErrorListeners(); + turtleLexer.addErrorListener(turtleErrorListener); + + CommonTokenStream tokens = new CommonTokenStream(turtleLexer); + TurtleParser turtleParser = new TurtleParser(tokens); + + turtleParser.removeErrorListeners(); + turtleParser.addErrorListener(turtleErrorListener); + ParseTreeWalker walker = new ParseTreeWalker(); + ParseTree tree; + + try { + tree = turtleParser.turtleDoc(); + + if (turtleErrorListener.hasErrors()) { + String errorMsg = turtleErrorListener.getErrorMessage(); + if (errorMsg == null || errorMsg.trim().isEmpty()) { + errorMsg = "Unknown syntax error detected"; + } + throw new ParsingErrorException("Syntax error in Turtle document: " + errorMsg); + } + } catch (RecognitionException e) { + throw new ParsingErrorException("Recognition error in Turtle document: " + e.getMessage()); + } + + IOOptions effectiveOptions = this.getConfig(); + if (baseURI != null && !baseURI.isEmpty()) { + effectiveOptions = new BaseIRIOptions(baseURI); + } + + TurtleListener listener = new TurtleListener(getModel(), getValueFactory(), effectiveOptions); walker.walk((ParseTreeListener) listener, tree); + } catch (ParsingErrorException e) { + throw e; } catch (IOException e) { - throw new RuntimeException("Failed to parse Turtle RDF", e); + throw new ParsingErrorException("Failed to parse Turtle RDF: " + e.getMessage(), e); + } catch (Exception e) { + throw new ParsingErrorException("Unexpected error during Turtle parsing: " + e.getMessage(), e); + } + } + + /** + * Simple implementation of RDFParserBaseIRIOptions to pass base URI to listener. + */ + private static class BaseIRIOptions implements RDFParserBaseIRIOptions { + private final String baseURI; + + public BaseIRIOptions(String baseURI) { + this.baseURI = baseURI; + } + + @Override + public String getBase() { + return baseURI; + } + } + + /** + * A custom error listener to collect errors from the lexer and parser. + */ + private static class TurtleErrorListener extends BaseErrorListener { + private final List errors = new ArrayList<>(); + + /** + * Records syntax errors generated by ANTLR. + * + * @param recognizer The recognizer that detected the error. + * @param offendingSymbol The symbol that caused the error. + * @param line The line number where the error occurred. + * @param charPositionInLine The character position on the line. + * @param msg The error message. + * @param e The recognition exception. + */ + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, + int line, int charPositionInLine, String msg, RecognitionException e) { + if (msg == null || msg.trim().isEmpty()) { + msg = "Unknown syntax error"; + } + + if (msg.contains("token recognition error") || msg.contains("mismatched input")) { + if (offendingSymbol instanceof Token) { + Token token = (Token) offendingSymbol; + String tokenText = token.getText(); + if (msg.contains("token recognition error") && tokenText != null && tokenText.contains("\"")) { + msg = "Invalid string literal - possibly unterminated or contains invalid escape sequence: " + msg; + } + } + } + + String error = "line " + line + ":" + charPositionInLine + " " + msg; + errors.add(error); + } + + /** + * Checks if parsing errors have been found. + * + * @return `true` if the error list is not empty, otherwise `false`. + */ + public boolean hasErrors() { + return !errors.isEmpty(); + } + + /** + * Returns a formatted error message containing all found errors. + * + * @return A {@link String} containing the error messages. + */ + public String getErrorMessage() { + if (errors.isEmpty()) { + return "Unknown parsing error"; + } + return String.join("; ", errors); } } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListener.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListener.java new file mode 100644 index 000000000..00d554384 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListener.java @@ -0,0 +1,346 @@ +package fr.inria.corese.core.next.impl.io.parser.turtle; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.common.AbstractTurtleTriGListener; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleBaseListener; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; + +import java.util.List; + +/** + * ANTLR listener for parsing Turtle documents into RDF graphs. + * Extends {@link AbstractTurtleTriGListener} for common RDF parsing functionality. + * + */ +public class TurtleListener extends TurtleBaseListener { + + private final AbstractTurtleTriGListener delegate; + + /** + * Constructs a Turtle listener extracting base URI from options. + * + * @param model RDF model to populate with parsed triples + * @param factory factory for creating RDF terms + * @param options I/O options potentially containing base URI + */ + public TurtleListener(Model model, ValueFactory factory, IOOptions options) { + String baseURI = null; + + if (options instanceof RDFParserBaseIRIOptions) { + RDFParserBaseIRIOptions baseIRIOptions = (RDFParserBaseIRIOptions) options; + baseURI = baseIRIOptions.getBase(); + } + + if (baseURI == null || baseURI.isEmpty()) { + baseURI = ParserConstants.EMPTY_STRING; + } + + this.delegate = new TurtleListenerDelegate(model, factory, baseURI); + } + + /** + * Handles {@code @prefix} directive by registering a namespace prefix. + * + * @param ctx prefix directive context + */ + @Override + public void exitPrefixID(TurtleParser.PrefixIDContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + String iri = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.registerPrefix(prefix, iri); + } + + /** + * Handles directive by updating the base URI for relative IRI resolution. + * + * @param ctx base directive context + */ + @Override + public void exitBase(TurtleParser.BaseContext ctx) { + if (ctx.IRIREF() != null) { + String newBase = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.updateBaseURI(newBase); + } + } + + /** + * Handles SPARQL-style {@code BASE} directive by updating the base URI. + * + * @param ctx SPARQL base directive context + */ + @Override + public void exitSparqlBase(TurtleParser.SparqlBaseContext ctx) { + String newBase = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.updateBaseURI(newBase); + } + + /** + * Handles SPARQL-style {@code PREFIX} directive by registering a namespace prefix. + * + * @param ctx SPARQL prefix directive context + */ + @Override + public void exitSparqlPrefix(TurtleParser.SparqlPrefixContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + String iri = delegate.extractAndUnescapeIRI(ctx.IRIREF().getText()); + delegate.registerPrefix(prefix, iri); + } + + /** + * Handles triple declarations starting with a subject or blank node property list. + * Processes the subject and its associated predicate-object list. + * + * @param ctx triples context + * @throws ParsingErrorException if the subject is missing or processing fails + */ + @Override + public void enterTriples(TurtleParser.TriplesContext ctx) { + try { + if (ctx.subject() != null) { + delegate.currentSubject = extractSubject(ctx.subject()); + if (ctx.predicateObjectList() != null) { + processPredicateObjectList(ctx.predicateObjectList()); + } + } else if (ctx.blankNodePropertyList() != null) { + delegate.currentSubject = processBlankNodePropertyList(ctx.blankNodePropertyList()); + if (ctx.predicateObjectList() != null) { + processPredicateObjectList(ctx.predicateObjectList()); + } + } else { + throw new ParsingErrorException("Missing subject in triple."); + } + } catch (ParsingErrorException e) { + throw e; + } catch (Exception e) { + throw new ParsingErrorException("Error processing triples: " + e.getMessage(), e); + } + } + + /** + * Processes a predicate-object list, generating triples for each predicate-object pair. + * + * @param ctx predicate-object list context + */ + private void processPredicateObjectList(TurtleParser.PredicateObjectListContext ctx) { + for (int i = 0; i < ctx.verb().size(); i++) { + TurtleParser.VerbContext verb = ctx.verb(i); + TurtleParser.ObjectListContext objectList = ctx.objectList(i); + + delegate.currentPredicate = extractVerb(verb); + + if (objectList != null) { + for (TurtleParser.Object_Context objectCtx : objectList.object_()) { + Value object = extractObject(objectCtx); + delegate.safeAddStatement(delegate.currentSubject, delegate.currentPredicate, object); + } + } + } + } + + /** + * Extracts an RDF value from an object context. + * + * @param ctx object context + * @return extracted RDF value (IRI, blank node, or literal) + * @throws ParsingErrorException if the object type is unsupported or IRI cannot be resolved + */ + private Value extractObject(TurtleParser.Object_Context ctx) { + if (ctx.iri() != null) { + String resolvedIRI = delegate.resolveIRI(ctx.iri().getText()); + if (resolvedIRI.isEmpty()) { + throw new ParsingErrorException("Cannot resolve object IRI: " + ctx.iri().getText()); + } + return delegate.factory.createIRI(resolvedIRI); + } + + if (ctx.BlankNode() != null) { + String blankNodeText = ctx.BlankNode().getText(); + if (blankNodeText.startsWith(ParserConstants.BLANK_NODE_PREFIX)) { + return delegate.factory.createBNode(blankNodeText.substring(2)); + } else if (blankNodeText.equals(ParserConstants.EMPTY_SQUARE_BRACKET)) { + return delegate.factory.createBNode(); + } else { + throw new ParsingErrorException("Unsupported blank node format: " + blankNodeText); + } + } + + if (ctx.literal() != null) { + return extractLiteral(ctx.literal()); + } + + if (ctx.blankNodePropertyList() != null) { + return processBlankNodePropertyList(ctx.blankNodePropertyList()); + } + + if (ctx.collection() != null) { + return processCollection(ctx.collection()); + } + + throw new ParsingErrorException("Unsupported object type: " + (ctx.getText() != null ? ctx.getText() : "null")); + } + + /** + * Extracts a subject resource from a subject context. + * + * @param ctx subject context + * @return subject resource (IRI, blank node, or collection head) + * @throws ParsingErrorException if the subject type is unsupported + */ + private Resource extractSubject(TurtleParser.SubjectContext ctx) { + if (ctx.iri() != null) { + return delegate.factory.createIRI(delegate.resolveIRI(ctx.iri().getText())); + } + if (ctx.BlankNode() != null) { + String blankNodeText = ctx.BlankNode().getText(); + if (blankNodeText.startsWith(ParserConstants.BLANK_NODE_PREFIX)) { + return delegate.factory.createBNode(blankNodeText.substring(2)); + } else if (blankNodeText.equals(ParserConstants.EMPTY_SQUARE_BRACKET)) { + return delegate.factory.createBNode(); + } else { + throw new ParsingErrorException("Unsupported blank node format: " + blankNodeText); + } + } + if (ctx.collection() != null) { + return processCollection(ctx.collection()); + } + throw new ParsingErrorException("Unsupported subject type: " + ctx.getText()); + } + + /** + * Processes a blank node property list + * Creates a fresh blank node and parses its property list. + * Subject and predicate contexts are saved and restored to handle nested structures. + * + * @param ctx blank node property list context + * @return created blank node resource + */ + private Resource processBlankNodePropertyList(TurtleParser.BlankNodePropertyListContext ctx) { + Resource bnode = delegate.factory.createBNode(); + Resource savedSubject = delegate.currentSubject; + IRI savedPredicate = delegate.currentPredicate; + + try { + delegate.currentSubject = bnode; + if (ctx.predicateObjectList() != null) { + processPredicateObjectList(ctx.predicateObjectList()); + } + } finally { + delegate.currentSubject = savedSubject; + delegate.currentPredicate = savedPredicate; + } + + return bnode; + } + + /** + * Processes an RDF collection + * Constructs the linked list structure using + * + * @param ctx collection context + * @return head of the list (blank node) or {@code rdf:nil} for empty lists + */ + private Resource processCollection(TurtleParser.CollectionContext ctx) { + List objects = ctx.object_(); + + if (objects.isEmpty()) { + return delegate.factory.createIRI(RDF.nil.getIRI().stringValue()); + } + + Resource head = delegate.factory.createBNode(); + Resource current = head; + IRI firstPredicate = delegate.factory.createIRI(RDF.first.getIRI().stringValue()); + IRI restPredicate = delegate.factory.createIRI(RDF.rest.getIRI().stringValue()); + Value nilValue = delegate.factory.createIRI(RDF.nil.getIRI().stringValue()); + + for (int i = 0; i < objects.size(); i++) { + Value object = extractObject(objects.get(i)); + delegate.safeAddStatement(current, firstPredicate, object); + + if (i == objects.size() - 1) { + delegate.safeAddStatement(current, restPredicate, nilValue); + } else { + Resource next = delegate.factory.createBNode(); + delegate.safeAddStatement(current, restPredicate, next); + current = next; + } + } + + return head; + } + + /** + * Extracts an RDF literal from a literal context. + * Handles plain, language-tagged, typed, boolean, and numeric literals. + * + * @param ctx literal context + * @return RDF literal value + * @throws ParsingErrorException if the literal type is unsupported + */ + private Literal extractLiteral(TurtleParser.LiteralContext ctx) { + if (ctx.rdfLiteral() != null) { + String label = delegate.unescapeString(ctx.rdfLiteral().string().getText()); + if (ctx.rdfLiteral().LANGTAG() != null) { + return delegate.createLiteral(label, ctx.rdfLiteral().LANGTAG().getText().substring(1), null); + } + if (ctx.rdfLiteral().iri() != null) { + return delegate.createLiteral(label, null, delegate.resolveIRI(ctx.rdfLiteral().iri().getText())); + } + return delegate.createLiteral(label, null, null); + } + + if (ctx.BooleanLiteral() != null) { + return delegate.createBooleanLiteral(ctx.BooleanLiteral().getText()); + } + + if (ctx.numericLiteral() != null) { + String numericText = ctx.numericLiteral().getText(); + AbstractTurtleTriGListener.NumericType type; + + if (ctx.numericLiteral().DOUBLE() != null) { + type = AbstractTurtleTriGListener.NumericType.DOUBLE; + } else if (ctx.numericLiteral().DECIMAL() != null) { + type = AbstractTurtleTriGListener.NumericType.DECIMAL; + } else { + type = AbstractTurtleTriGListener.NumericType.INTEGER; + } + + return delegate.createNumericLiteral(numericText, type); + } + + throw new ParsingErrorException("Unsupported literal: " + ctx.getText()); + } + + /** + * Extracts a predicate IRI from a verb context. + * Handles the special case of {@code a} as shorthand for {@code rdf:type}. + * + * @param ctx verb context + * @return predicate IRI + * @throws ParsingErrorException if the IRI cannot be resolved + */ + private IRI extractVerb(TurtleParser.VerbContext ctx) { + String verbText = ctx.getText(); + String resolvedIRI = delegate.resolveIRI(verbText); + if (resolvedIRI.isEmpty()) { + throw new ParsingErrorException("Cannot resolve verb to a valid IRI: " + verbText); + } + return delegate.factory.createIRI(resolvedIRI); + } + + /** + * Delegate extending for Turtle parsing. + * Provides access to common RDF parsing functionality without additional extensions. + */ + private static class TurtleListenerDelegate extends AbstractTurtleTriGListener { + public TurtleListenerDelegate(Model model, ValueFactory factory, String baseURI) { + super(model, factory, baseURI); + } + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImpl.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImpl.java deleted file mode 100644 index f89eea896..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImpl.java +++ /dev/null @@ -1,242 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.turtle; - -import java.util.HashMap; -import java.util.Map; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Literal; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Value; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; -import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.parser.antlr.TurtleBaseListener; -import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; - -/** - * Listener for the ANTLR4 generated parser for Turtle. - */ -public class TurtleListenerImpl extends TurtleBaseListener { - - private final Model model; - private String baseURI; - private final Map prefixMap = new HashMap<>(); - private final ValueFactory factory; - - private Resource currentSubject; - private IRI currentPredicate; - - /** - * Constructor for TurtleListenerImpl that initializes the model, value factory, - * and configuration options. - * - * @param model the model to be populated by the parser - * @param factory the value factory used to create RDF values - * @param options optional configuration options for the parser - */ - public TurtleListenerImpl(Model model, ValueFactory factory, IOOptions options) { - this.model = model; - this.baseURI = ""; - if (options != null && options instanceof RDFParserBaseIRIOptions) { - this.baseURI = ((RDFParserBaseIRIOptions) options).getBase(); - } - this.factory = factory; - } - - /** - * Constructor for TurtleListenerImpl that initializes the model and value - * factory. - * - * @param ctx The parse tree context for the {@code prefixID} rule, - * which provides access to the parsed prefix name and IRI reference tokens. - */ - public void exitPrefixID(TurtleParser.PrefixIDContext ctx) { - String prefix = ctx.PNAME_NS().getText(); - String iri = ctx.IRIREF().getText(); - prefix = prefix.substring(0, prefix.length() - 1); - iri = iri.substring(1, iri.length() - 1); - prefixMap.put(prefix, iri); - - model.setNamespace(prefix, iri); - } - - @Override - public void exitSparqlBase(TurtleParser.SparqlBaseContext ctx) { - String iri = ctx.IRIREF().getText(); - baseURI = iri.substring(1, iri.length() - 1); - } - - @Override - public void enterTriples(TurtleParser.TriplesContext ctx) { - currentSubject = extractSubject(ctx.subject()); - } - - @Override - public void enterVerb(TurtleParser.VerbContext ctx) { - currentPredicate = extractVerb(ctx); - } - - @Override - public void exitObject_(TurtleParser.Object_Context ctx) { - Value object = extractObject(ctx); - model.add(currentSubject, currentPredicate, object); - } - - /** - * Resolves the IRI from a raw string, handling prefixed names and base URIs. - * - * @param raw the raw string to resolve - * @return the resolved IRI as a string - */ - private String resolveIRI(String raw) { - if (raw.startsWith("<") && raw.endsWith(">")) { - return raw.substring(1, raw.length() - 1); - } else if (raw.equals("a")) { - return RDF.type.getIRI().stringValue(); - } else if (raw.contains(":")) { - // Prefixed name (e.g., ex:predicate) - String[] parts = raw.split(":", 2); - String ns = prefixMap.get(parts[0]); - if (ns != null) { - return ns + parts[1]; - } else { - throw new IllegalArgumentException("Prefix not declared: " + parts[0]); - } - } else { - return baseURI + raw; - } - } - - /** - * Strips quotes from a string, handling single and triple quotes. - * - * @param text the string to strip quotes from - * @return the stripped string - */ - private String stripQuotes(String text) { - if (text == null || text.length() < 2) - return text; - if ((text.startsWith("\"") && text.endsWith("\"")) || - (text.startsWith("'''") && text.endsWith("'''")) || - (text.startsWith("\"\"\"") && text.endsWith("\"\"\""))) { - return text.substring(1, text.length() - 1); - } - return text; - } - - /** - * Extracts a literal from the given context, handling different types of - * literals. - * - * @param ctx the context containing the literal - * @return the extracted Literal object - */ - private Literal extractLiteral(TurtleParser.LiteralContext ctx) { - String label; - IRI datatype; - String lang; - - if (ctx.rdfLiteral() != null) { - if (ctx.rdfLiteral().iri() != null) { - datatype = factory.createIRI(resolveIRI(ctx.rdfLiteral().iri().getText())); - label = ctx.rdfLiteral().string().getText(); - return factory.createLiteral(stripQuotes(label), datatype); - } - if (ctx.rdfLiteral().LANGTAG() != null) { - lang = ctx.rdfLiteral().LANGTAG().getText().substring(1); - label = ctx.rdfLiteral().string().getText(); - return factory.createLiteral(stripQuotes(label), lang); - } - label = ctx.rdfLiteral().string().getText(); - return factory.createLiteral(stripQuotes(label)); - } - - if (ctx.BooleanLiteral() != null) { - label = ctx.BooleanLiteral().getText(); - datatype = XSD.BOOLEAN.getIRI(); - return factory.createLiteral(label, datatype); - } - if (ctx.numericLiteral() != null) { - if (ctx.numericLiteral().DECIMAL() != null) { - label = ctx.numericLiteral().DECIMAL().getText(); - datatype = XSD.DECIMAL.getIRI(); - return factory.createLiteral(label, datatype); - } - if (ctx.numericLiteral().DOUBLE() != null) { - label = ctx.numericLiteral().DOUBLE().getText(); - datatype = XSD.DOUBLE.getIRI(); - return factory.createLiteral(label, datatype); - } - if (ctx.numericLiteral().INTEGER() != null) { - label = ctx.numericLiteral().INTEGER().getText(); - datatype = XSD.INTEGER.getIRI(); - return factory.createLiteral(label, datatype); - } - } - throw new IllegalArgumentException("Unsupported literal type: " + ctx.getText()); - } - - /** - * Extracts the object from the given context, which can be an IRI, blank node, - * or literal. - * - * @param ctx the context containing the object - * @return the extracted Value object - */ - private Value extractObject(TurtleParser.Object_Context ctx) { - if (ctx.iri() != null) { - return factory.createIRI(resolveIRI(ctx.iri().getText())); - } - if (ctx.BlankNode() != null) { - return factory.createBNode(ctx.BlankNode().getText()); - } - if (ctx.literal() != null) { - return extractLiteral(ctx.literal()); - } - throw new RuntimeException("Unsupported object: " + ctx.getText()); - } - - /** - * Extracts the subject from the given context, which can be an IRI or blank - * node. - * - * @param ctx the context containing the subject - * @return the extracted Resource object - */ - private Resource extractSubject(TurtleParser.SubjectContext ctx) { - if (ctx.iri() != null) { - return factory.createIRI(resolveIRI(ctx.iri().getText())); - } - if (ctx.BlankNode() != null) { - return factory.createBNode(ctx.BlankNode().getText()); - } - throw new RuntimeException("Unsupported subject: " + ctx.getText()); - } - - /** - * Extracts the predicate from the given context, which is expected to be an - * IRI. - * - * @param ctx the context containing the predicate - * @return the extracted IRI object - */ - private IRI extractPredicate(TurtleParser.PredicateContext ctx) { - return factory.createIRI(resolveIRI(ctx.getText())); - } - - /** - * Extracts the verb from the given context, which can be a predicate or an IRI. - * - * @param ctx the context containing the verb - * @return the extracted IRI object - */ - private IRI extractVerb(TurtleParser.VerbContext ctx) { - if (ctx.predicate() != null) { - return extractPredicate(ctx.predicate()); - } else - return factory.createIRI(resolveIRI(ctx.getText())); - } -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/util/ParserConstants.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/util/ParserConstants.java new file mode 100644 index 000000000..a495e6e9b --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/util/ParserConstants.java @@ -0,0 +1,99 @@ +package fr.inria.corese.core.next.impl.io.parser.util; + +import fr.inria.corese.core.util.Property; + +/** + * A utility class containing constants for characters and keywords + * used in the TriG parser. Centralizing these values helps in + * maintaining the code and improves readability. + */ +public final class ParserConstants { + + + public static final String BASE = "@base"; + + public static final String PREFIX = "@prefix"; + + public static final String POINT = "."; + + public static final String DOUBLE_DOT = ".."; + + public static final String SPARQL_BASE = "BASE"; + + public static final String SPARQL_PREFIX = "PREFIX"; + + public static final String GRAPH = "GRAPH"; + + public static final String A = "a"; + + public static final String EMPTY_STRING = ""; + + public static final String E = "e"; + + // --- Delimiters and Punctuation --- + + public static final String COLON = ":"; + + public static final String SEMICOLON = ";"; + + public static final String COMMA = ","; + + public static final String DOT = "."; + + + public static final String EMPTY_SQUARE_BRACKET = "[]"; + // --- IRI and Literal Delimiters --- + + public static final String IRI_START = "<"; + + public static final String IRI_END = ">"; + + public static final String QUOTE = "\""; + + public static final String APOSTROPHE = "'"; + + public static final String TRIPLE_QUOTE = "\"\"\""; + + public static final String TRIPLE_APOSTROPHE = "'''"; + + public static final String SLASH = "/"; + + public static final String DOUBLE_SLASH = "//"; + + + public static final String AT = "@"; + /** + * The blank node prefix. + */ + public static final String BLANK_NODE_PREFIX = "_:"; + + public static final String FILE_PROTOCOL_SIMPLE = "file:/"; + public static final String FILE_PROTOCOL_TRIPLE_SLASH = "file:///"; + public static final String FILE_PROTOCOL = "file://"; + public static final String RDF_TRG_TEST_SUITE_URI = "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-trig/"; + public static final String PNAME_NS_PATTERN = "^[A-Za-z_][A-Za-z0-9_-]*$"; + + public static final String MINUS = "-"; + public static final String SPACE = " "; + public static final String TAB = "\t"; + public static final String LINE_FEED = "\n"; + public static final String CARRIAGE_RETURN = "\r"; + + public static final String FRAGMENT = "#"; + public static final String QUERY_MARK = "?"; + public static final String PLUS = "+"; + + // Prevent instantiation of this utility class. + private ParserConstants() { + throw new UnsupportedOperationException("This is a utility class and cannot be instantiated"); + } + /** + * Returns the configured default base URI for IRI resolution. + * The value is configurable via {@code Property.Value.DEFAULT_BASE_URI}. + * + * @return the default base URI from configuration, or null if not set + */ + public static String getDefaultBaseURI() { + return Property.getStringValue(Property.Value.DEFAULT_BASE_URI); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java index c7d918c03..42348b7ba 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java @@ -19,6 +19,10 @@ import fr.inria.corese.core.next.impl.io.serialization.trig.TriGSerializer; import fr.inria.corese.core.next.impl.io.serialization.turtle.TurtleOption; import fr.inria.corese.core.next.impl.io.serialization.turtle.TurtleSerializer; +import fr.inria.corese.core.next.impl.io.serialization.jsonld.JSONLDSerializer; +import fr.inria.corese.core.next.impl.io.option.TitaniumJSONLDProcessorOption; +import fr.inria.corese.core.next.impl.io.serialization.jsonld.JSONLDSerializer; +import fr.inria.corese.core.next.impl.io.option.TitaniumJSONLDProcessorOption; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,9 +40,12 @@ * to map each format to its corresponding serializer constructor, * providing a flexible and extensible way to manage serializer instances. * - *

It adapts the generic {@link SerializationOption} provided to the specific - * configuration type expected by each serializer in the hierarchy, with a fallback - * to default configurations if an incompatible type is provided.

+ *

+ * It adapts the generic {@link SerializationOption} provided to the specific + * configuration type expected by each serializer in the hierarchy, with a + * fallback + * to default configurations if an incompatible type is provided. + *

*/ public class DefaultSerializerFactory implements SerializerFactory { @@ -50,8 +57,10 @@ public class DefaultSerializerFactory implements SerializerFactory { /** * Constructs a {@code DefaultSerializerFactory} and populates its registry * with constructors for all known {@link RDFFormat} implementations. - * Each constructor attempts to cast the generic {@link SerializationOption} to the - * specific configuration type required by the serializer. If the cast is not possible, + * Each constructor attempts to cast the generic {@link SerializationOption} to + * the + * specific configuration type required by the serializer. If the cast is not + * possible, * it falls back to the format's default configuration. */ public DefaultSerializerFactory() { @@ -73,7 +82,8 @@ public DefaultSerializerFactory() { if (genericConfig instanceof NTriplesOption specificConfig) { return new NTriplesSerializer(model, specificConfig); } else { - logger.warn("Provided config for NTRIPLES is not NTriplesConfig (was {}). Using default NTriplesConfig.", + logger.warn( + "Provided config for NTRIPLES is not NTriplesConfig (was {}). Using default NTriplesConfig.", genericConfig.getClass().getSimpleName()); return new NTriplesSerializer(model, NTriplesOption.defaultConfig()); } @@ -109,6 +119,17 @@ public DefaultSerializerFactory() { } }); + tempRegistry.put(RDFFormat.JSONLD, (model, genericConfig) -> { + if (genericConfig instanceof TitaniumJSONLDProcessorOption specificConfig) { + return new JSONLDSerializer(model, specificConfig); + } else { + logger.warn( + "Provided config for JSONLD is not TitaniumJSONLDProcessorOption (was {}). Using default TitaniumJSONLDProcessorOption.", + genericConfig.getClass().getSimpleName()); + return new JSONLDSerializer(model, new TitaniumJSONLDProcessorOption.Builder().build()); + } + }); + tempRegistry.put(RDFFormat.RDFC_1_0, (model, genericConfig) -> { if (genericConfig instanceof Rdfc10Options specificConfig) { Rdfc10Canonicalizer canonicalizer = new Rdfc10Canonicalizer( @@ -131,18 +152,35 @@ public DefaultSerializerFactory() { }); + tempRegistry.put(RDFFormat.JSONLD, (model, genericConfig) -> { + if (genericConfig instanceof TitaniumJSONLDProcessorOption specificConfig) { + return new JSONLDSerializer(model, specificConfig); + } else { + logger.warn( + "Provided config for JSONLD is not TitaniumJSONLDProcessorOption (was {}). Using default TitaniumJSONLDProcessorOption.", + genericConfig.getClass().getSimpleName()); + return new JSONLDSerializer(model, new TitaniumJSONLDProcessorOption.Builder().build()); + } + }); + this.registry = Collections.unmodifiableMap(tempRegistry); } /** - * Creates an {@link RDFSerializer} instance for the specified format, model, and configuration. + * Creates an {@link RDFSerializer} instance for the specified format, model, + * and configuration. * - * @param format the {@link RDFFormat} for which to create the serializer. Must not be null. + * @param format the {@link RDFFormat} for which to create the serializer. Must + * not be null. * @param model the {@link Model} to be serialized. Must not be null. - * @param config the {@link SerializationOption} to apply during serialization. Must not be null. - * @return a new instance of {@link RDFSerializer} configured for the specified format. - * @throws NullPointerException if any of the arguments (format, model, config) are null. - * @throws IllegalArgumentException if the provided format is not supported by this factory. + * @param config the {@link SerializationOption} to apply during serialization. + * Must not be null. + * @return a new instance of {@link RDFSerializer} configured for the specified + * format. + * @throws NullPointerException if any of the arguments (format, model, + * config) are null. + * @throws IllegalArgumentException if the provided format is not supported by + * this factory. */ @Override public RDFSerializer createSerializer(RDFFormat format, Model model, SerializationOption config) { diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java index 1d9b28ce0..12a72c708 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java @@ -322,18 +322,23 @@ protected void writeValue(Writer writer, Value value) throws IOException { currentlyWritingBlankNodes.add(bNode); boolean handled = false; - if (option instanceof AbstractSerializerOption && getTFamilyOption().useCollections() && bNode.isBNode()) { - handled = writeRDFList(writer, bNode); - } - if (!handled && option instanceof AbstractSerializerOption && getTFamilyOption().getBlankNodeStyle() == BlankNodeStyleEnum.ANONYMOUS && bNode.isBNode()) { // getBlankNodeStyle is on AbstractTFamilyConfig - List properties = model.stream() - .filter(stmt -> stmt.getSubject().equals(bNode)) - .toList(); + boolean isSubject = model.stream().anyMatch(stmt -> stmt.getSubject().equals(bNode)); + + if (!isSubject && option instanceof AbstractSerializerOption) { + if (getTFamilyOption().useCollections() && bNode.isBNode()) { + handled = writeRDFList(writer, bNode); + } + + if (!handled && getTFamilyOption().getBlankNodeStyle() == BlankNodeStyleEnum.ANONYMOUS && bNode.isBNode()) { + List properties = model.stream() + .filter(stmt -> stmt.getSubject().equals(bNode)) + .toList(); - if (!properties.isEmpty()) { - writeInlineBlankNode(writer, properties); - handled = true; + if (!properties.isEmpty()) { + writeInlineBlankNode(writer, properties); + handled = true; + } } } @@ -343,10 +348,12 @@ protected void writeValue(Writer writer, Value value) throws IOException { currentlyWritingBlankNodes.remove(bNode); } else { - throw new IllegalArgumentException("Unsupported value type for " + getFormatName() + " serialization: " + value.getClass().getName()); + throw new IllegalArgumentException("Unsupported value type for " + getFormatName() + + " serialization: " + value.getClass().getName()); } } + /** * Writes an {@link IRI} to the writer. * Attempts to use a prefixed name if possible, otherwise writes the full IRI in angle brackets. @@ -661,48 +668,8 @@ protected Set precomputeInlineBlankNodesAndLists() { if (stmt.getSubject().isBNode()) { Resource bNodeSubject = stmt.getSubject(); if (tFamilyConfig.useCollections() && isRDFListHead(bNodeSubject)) { - Resource current = bNodeSubject; - Set listNodes = new HashSet<>(); - Set visitedInPrecomp = new HashSet<>(); - boolean isList = true; - while (current != null && current.isBNode() && !visitedInPrecomp.contains(current)) { - visitedInPrecomp.add(current); - listNodes.add(current); - final Resource finalCurrentForLambda = current; - List listProps = model.stream() - .filter(s -> s.getSubject().equals(finalCurrentForLambda)) - .toList(); - - if (listProps.size() != 2) { - isList = false; - break; - } - - Optional first = listProps.stream() - .filter(s -> s.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST)) - .map(Statement::getObject) - .findFirst(); - - Optional rest = listProps.stream() - .filter(s -> s.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)) - .map(Statement::getObject) - .findFirst(); - - if (!first.isPresent() || !rest.isPresent()) { - isList = false; - break; - } - - if (rest.get().stringValue().equals(SerializationConstants.RDF_NIL)) { - current = null; - } else if (rest.get().isBNode()) { - current = (Resource) rest.get(); - } else { - isList = false; - break; - } - } - if (isList && current == null) { + Set listNodes = detectListNodes(bNodeSubject); + if (!listNodes.isEmpty()) { precomputed.addAll(listNodes); } } @@ -716,7 +683,10 @@ protected Set precomputeInlineBlankNodesAndLists() { s.getPredicate().stringValue().equals(SerializationConstants.RDF_REST) ); - if (!properties.isEmpty() && !isPartOfList) { + boolean usedAsTopLevelSubject = model.stream() + .anyMatch(s -> s.getSubject().equals(bNodeSubject)); + + if (!properties.isEmpty() && !isPartOfList && !usedAsTopLevelSubject) { precomputed.add(bNodeSubject); } } @@ -725,6 +695,55 @@ protected Set precomputeInlineBlankNodesAndLists() { return precomputed; } + /** + * Traverses an RDF list starting from the given head and collects all list nodes. + * + * @param head the blank node that may be the head of an RDF list + * @return the set of blank nodes that form the list, or an empty set if not a valid list + */ + private Set detectListNodes(Resource head) { + Set listNodes = new HashSet<>(); + Set visited = new HashSet<>(); + Resource current = head; + + while (current != null && current.isBNode() && !visited.contains(current)) { + visited.add(current); + listNodes.add(current); + + final Resource finalCurrent = current; + List props = model.stream() + .filter(s -> s.getSubject().equals(finalCurrent)) + .toList(); + + if (props.size() != 2) { + return Collections.emptySet(); + } + + Optional first = props.stream() + .filter(s -> s.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST)) + .map(Statement::getObject) + .findFirst(); + + Optional rest = props.stream() + .filter(s -> s.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)) + .map(Statement::getObject) + .findFirst(); + + if (!first.isPresent() || !rest.isPresent()) { + return Collections.emptySet(); + } + + if (rest.get().stringValue().equals(SerializationConstants.RDF_NIL)) { + current = null; + } else if (rest.get().isBNode()) { + current = (Resource) rest.get(); + } else { + return Collections.emptySet(); + } + } + return current == null ? listNodes : Collections.emptySet(); + } + /** * Checks if a given blank node is the head of an RDF list. * diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/TitaniumRDFDatasetSerializationAdapter.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/TitaniumRDFDatasetSerializationAdapter.java index d02fce12a..15eced66d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/TitaniumRDFDatasetSerializationAdapter.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/TitaniumRDFDatasetSerializationAdapter.java @@ -38,6 +38,9 @@ import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.common.vocabulary.XSD; import fr.inria.corese.core.next.impl.exception.SerializationException; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Adapter class from Model to RdfDataset for usage in the JSON-LD serialization process using the titanium library. @@ -45,6 +48,7 @@ */ public class TitaniumRDFDatasetSerializationAdapter implements RdfDataset { + private final static Logger logger = LoggerFactory.getLogger(TitaniumRDFDatasetSerializationAdapter.class); private Model model; /** @@ -58,17 +62,18 @@ public TitaniumRDFDatasetSerializationAdapter(Model model) { @Override public RdfGraph getDefaultGraph() { - return new RdfGraph() { + RdfGraph resultGraph = new RdfGraph() { @Override public boolean contains(RdfTriple triple) { - return model.contains(toResource(triple.getSubject()), toIRI(triple.getPredicate()), toValue(triple.getObject())); + return model.contains(toResource(triple.getSubject()), toIRI(triple.getPredicate()), toValue(triple.getObject()), (Resource) null); } @Override public List toList() { - return model.stream().map(TitaniumRDFDatasetSerializationAdapter.this::toRdfTriple).toList(); + return model.filter(null, null, null, (Resource) null).stream().map(TitaniumRDFDatasetSerializationAdapter.this::toRdfTriple).toList(); } }; + return resultGraph; } @Override @@ -187,27 +192,15 @@ public RdfValue getObject() { * @return the converted resource */ private RdfResource toRdfResource(Resource resource) { - if (resource != null && (! (resource.isBNode() || resource.isIRI()))) { - throw new SerializationException("Unknown resource type " + resource, "JSON-LD"); - } else if (resource == null) { + if (resource == null) { return null; + } else if (resource.isIRI()) { + return toRdfIRI((IRI) resource); + } else if (resource.isBNode()) { + return toRdfBlankNode((BNode) resource); + } else { + throw new SerializationException("Unknown resource type " + resource, "JSON-LD"); } - return new RdfResource() { - @Override - public boolean isIRI() { - return resource.isIRI(); - } - - @Override - public boolean isBlankNode() { - return resource.isBNode(); - } - - @Override - public String getValue() { - return resource.stringValue(); - } - }; } /** @@ -258,7 +251,7 @@ public boolean isBlankNode() { } @Override public String getValue() { - return bnode.stringValue(); + return ParserConstants.BLANK_NODE_PREFIX + bnode.stringValue(); } }; } @@ -290,7 +283,7 @@ public String getDatatype() { ) { return literal.getDatatype().stringValue(); } else if (literal.getLanguage().isPresent()) { - return RDF.langString.getIRI().stringValue(); + return "rdf:langString"; // Titanium JSONLD expect the langstring datatype to be in this format ... } else { return XSD.xsdString.getIRI().stringValue(); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializer.java index 11f3a66d8..744628f7b 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializer.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializer.java @@ -197,8 +197,6 @@ private void writeStatementsWithContext(Writer writer) throws IOException { if (context != null) { writer.write(SerializationConstants.CLOSE_BRACE); - writer.write(SerializationConstants.SPACE); - writer.write(SerializationConstants.POINT); writer.write(trigConfig.getLineEnding()); } writer.write(trigConfig.getLineEnding()); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializer.java index 326725de9..ee5a9c0df 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializer.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializer.java @@ -147,12 +147,19 @@ protected String escapeLiteralString(String value) { sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.BACK_SLASH); break; default: - if (option.escapeUnicode() && (c <= 0x1F || c == 0x7F || (c >= 0x80 && c <= 0xFFFF))) { + if (Character.isISOControl(c) ||c == 0x7F) { + sb.append(String.format("\\u%04X", (int) c)); + } + else if (option.escapeUnicode() && c >= 0x80 && c <= 0xFFFF) { sb.append(String.format("\\u%04X", (int) c)); } else if (Character.isHighSurrogate(c)) { int codePoint = value.codePointAt(i); if (Character.isValidCodePoint(codePoint)) { - sb.append(String.format("\\U%08X", codePoint)); + if (option.escapeUnicode()) { + sb.append(String.format("\\U%08X", codePoint)); + } else { + sb.append(Character.toChars(codePoint)); + } i++; } else { sb.append(c); @@ -178,6 +185,7 @@ protected String escapeMultilineLiteralString(String value) { SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE); + } /** diff --git a/src/main/java/fr/inria/corese/core/util/Property.java b/src/main/java/fr/inria/corese/core/util/Property.java index ba2d239bc..06c6a2af3 100644 --- a/src/main/java/fr/inria/corese/core/util/Property.java +++ b/src/main/java/fr/inria/corese/core/util/Property.java @@ -1225,6 +1225,7 @@ public enum Value { // parser configuration STRICT_MODE, + DEFAULT_BASE_URI, // Elasticsearch parameters // TODO Change class to be able to define application-specific properties ELASTICSEARCH_API_KEY, diff --git a/src/main/resources/data/corese/property.properties b/src/main/resources/data/corese/property.properties index 92fe685dd..08d47fe87 100644 --- a/src/main/resources/data/corese/property.properties +++ b/src/main/resources/data/corese/property.properties @@ -262,5 +262,8 @@ STRICT_MODE = false #LOAD_FORMAT = text/turtle;q=1.0, application/rdf+xml;q=0.9, application/ld+json;q=0.7; application/json;q=0.6 #LOAD_FORMAT = application/rdf+xml +# Default base URI for Turtle/TriG parser +DEFAULT_BASE_URI = http://example.org/ + diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/jsonld/JSONLDCircularTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/jsonld/JSONLDCircularTest.java new file mode 100644 index 000000000..613dcbabf --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/jsonld/JSONLDCircularTest.java @@ -0,0 +1,420 @@ +package fr.inria.corese.core.next.impl.io.parser.jsonld; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.api.BNode; +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.api.io.serialization.SerializationOption; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.option.TitaniumJSONLDProcessorOption; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Circular tests for JSON-LD parser and serializer integration. + * These tests verify that data can be correctly serialized to JSON-LD format + * and then parsed back to an equivalent model (round-trip testing). + * + * The circular testing approach ensures that the parser and serializer + * are compatible and preserve data integrity across format transformations. + * + * JSON-LD supports both namespaces and named graphs, and has unique features + * like @context handling, so additional considerations are included. + */ +@DisplayName("JSON-LD Circular Integration Tests") +class JSONLDCircularTest { + + private static final Logger logger = LoggerFactory.getLogger(JSONLDCircularTest.class); + + private ValueFactory valueFactory; + private SerializerFactory serializerFactory; + private ParserFactory parserFactory; + private TitaniumJSONLDProcessorOption defaultConfig; + + // Test data constants + private static final String EXAMPLE_NS = "http://example.org/"; + private static final String SUBJECT_1 = EXAMPLE_NS + "person1"; + private static final String SUBJECT_2 = EXAMPLE_NS + "person2"; + private static final String PREDICATE_NAME = EXAMPLE_NS + "name"; + private static final String PREDICATE_AGE = EXAMPLE_NS + "age"; + private static final String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + private static final String GRAPH_1 = EXAMPLE_NS + "graph1"; + private static final String GRAPH_2 = EXAMPLE_NS + "graph2"; + private static final String LITERAL_JOHN = "John Doe"; + private static final String LITERAL_JANE = "Jane Smith"; + private static final String LITERAL_AGE_25 = "25"; + private static final String LITERAL_HELLO_EN = "Hello"; + private static final String LANGUAGE_TAG_EN = "en"; + private static final String XSD_INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; + private static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; + + @BeforeEach + void setUp() { + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = new TitaniumJSONLDProcessorOption.Builder() + .build(); + } + + /** + * Creates a simple model with basic triples containing IRIs and string + * literals. + * + * @return A model with two simple triples + */ + private Model createSimpleTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + model.add(subject1, predicateName, objectJohn); + model.add(subject2, predicateName, objectJane); + + return model; + } + + /** + * Creates a model with named graphs for testing JSON-LD specific functionality. + * + * @return A model with triples in different named graphs + */ + private Model createNamedGraphsTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + IRI graph1 = valueFactory.createIRI(GRAPH_1); + IRI graph2 = valueFactory.createIRI(GRAPH_2); + + // Add triples to different named graphs + model.add(subject1, predicateName, objectJohn, graph1); + model.add(subject2, predicateName, objectJane, graph2); + model.add(subject1, predicateKnows, subject2, graph1); + + return model; + } + + /** + * Creates a complex model with various RDF value types including + * typed literals, language-tagged literals, and blank nodes. + * + * @return A model with diverse triple patterns + */ + private Model createComplexTestModel() { + Model model = new CoreseModel(); + + // Basic IRI and string literal triple + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal literalJohn = valueFactory.createLiteral(LITERAL_JOHN); + model.add(subject1, predicateName, literalJohn); + + // Typed literal (integer) + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI xsdInteger = valueFactory.createIRI(XSD_INTEGER); + Literal literalAge = valueFactory.createLiteral(LITERAL_AGE_25, xsdInteger); + model.add(subject1, predicateAge, literalAge); + + // Language-tagged literal + Literal literalHelloEn = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + model.add(subject1, predicateGreeting, literalHelloEn); + + // Blank node as subject + BNode blankNodeSubject = valueFactory.createBNode(); + IRI predicateType = valueFactory.createIRI(EXAMPLE_NS + "type"); + IRI objectPerson = valueFactory.createIRI(EXAMPLE_NS + "Person"); + model.add(blankNodeSubject, predicateType, objectPerson); + + // Blank node as object + BNode blankNodeObject = valueFactory.createBNode(); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + model.add(subject1, predicateKnows, blankNodeObject); + + // IRI to IRI relationship + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + model.add(subject1, predicateKnows, subject2); + + return model; + } + + /** + * Creates a model with typed literals for testing. + * + * @return A model with integer and string typed literals + */ + private Model createTypedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + + // Integer literal + Literal integerLiteral = valueFactory.createLiteral(LITERAL_AGE_25, + valueFactory.createIRI(XSD_INTEGER)); + model.add(subject, predicateAge, integerLiteral); + + // String literal with explicit datatype + Literal stringLiteral = valueFactory.createLiteral(LITERAL_JOHN, + valueFactory.createIRI(XSD_STRING)); + model.add(subject, predicateName, stringLiteral); + + return model; + } + + /** + * Creates a model with language-tagged literals for testing. + * + * @return A model with English and French language-tagged literals + */ + private Model createLanguageTaggedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + + // English greeting + Literal englishGreeting = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + model.add(subject, predicateGreeting, englishGreeting); + + // French greeting + Literal frenchGreeting = valueFactory.createLiteral("Bonjour", "fr"); + model.add(subject, predicateGreeting, frenchGreeting); + + return model; + } + + /** + * Creates a model with blank nodes for testing. + * + * @return A model with blank nodes as subject and object + */ + private Model createBlankNodesTestModel() { + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + return model; + } + + /** + * Creates a model with special characters and escape sequences for testing. + * + * @return A model with literals containing newlines, quotes, and Unicode + */ + private Model createSpecialCharactersTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateDescription = valueFactory.createIRI(EXAMPLE_NS + "description"); + IRI predicateNote = valueFactory.createIRI(EXAMPLE_NS + "note"); + + // Literal with newlines and quotes + Literal literalWithEscapes = valueFactory.createLiteral("Line 1\nLine 2\tTabbed \"quoted\" text"); + model.add(subject, predicateDescription, literalWithEscapes); + + // Literal with Unicode characters + Literal literalUnicode = valueFactory.createLiteral("Hello δΈ–η•Œ 🌍"); + model.add(subject, predicateNote, literalUnicode); + + return model; + } + + /** + * Performs a round-trip serialization and parsing cycle. + * + * @param originalModel The model to serialize and parse back + * @return The model resulting from parsing the serialized data + * @throws Exception If serialization or parsing fails + */ + private Model performRoundTrip(Model originalModel) throws Exception { + // Serialize to JSON-LD + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.JSONLD, originalModel, defaultConfig); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + String serializedContent = writer.toString(); + + // Verify serialization produced content (only check for non-empty models) + assertNotNull(serializedContent, "Serialized content should not be null"); + if (originalModel.size() > 0) { + assertTrue(serializedContent.length() > 0, "Serialized content should not be empty for non-empty models"); + } + + // Parse back from JSON-LD + Model deserializedModel = new CoreseModel(); + RDFParser parser = parserFactory.createRDFParser( + RDFFormat.JSONLD, deserializedModel, valueFactory); + + ByteArrayInputStream inputStream = new ByteArrayInputStream( + serializedContent.getBytes(StandardCharsets.UTF_8)); + parser.parse(inputStream); + + return deserializedModel; + } + + @Test + @DisplayName("Round-trip test with simple model containing basic IRIs and literals") + void testRoundTripWithSimpleModel() throws Exception { + // Given: A simple model with basic triples + Model originalModel = createSimpleTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should be equivalent to the original + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing named graphs") + void testRoundTripWithNamedGraphs() throws Exception { + // Given: A model with triples in different named graphs + Model originalModel = createNamedGraphsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All named graph information should be preserved + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving named graphs"); + } + + @Test + @DisplayName("Round-trip test with complex model containing diverse RDF value types") + void testRoundTripWithComplexModel() throws Exception { + // Given: A complex model with various RDF constructs + Model originalModel = createComplexTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should preserve all data + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with empty model") + void testRoundTripWithEmptyModel() throws Exception { + // Given: An empty model + Model originalModel = new CoreseModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should also be empty + assertEquals(0, originalModel.size(), "Original model should be empty"); + assertEquals(0, deserializedModel.size(), "Deserialized model should be empty"); + assertEquals(originalModel, deserializedModel, "Both models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only typed literals") + void testRoundTripWithTypedLiterals() throws Exception { + // Given: A model with various typed literals + Model originalModel = createTypedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All typed literals should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only language-tagged literals") + void testRoundTripWithLanguageTaggedLiterals() throws Exception { + // Given: A model with language-tagged literals + Model originalModel = createLanguageTaggedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All language tags should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only blank nodes") + void testRoundTripWithBlankNodes() throws Exception { + // Given: A model with blank nodes as subjects and objects + Model originalModel = createBlankNodesTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: Blank node structure should be preserved (though IDs may differ) + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + // Note: Blank node equality is based on structure, not IDs + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be structurally equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing special characters and escape sequences") + void testRoundTripWithSpecialCharacters() throws Exception { + // Given: A model with special characters and escape sequences + Model originalModel = createSpecialCharactersTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All special characters should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving special characters"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java index 539d54539..7e55be04e 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/ANTLRNQuadsParserTest.java @@ -195,19 +195,4 @@ void testParseUnicodeEscapeIRIUxInGraph() throws ParsingErrorException { parser.parse(new StringReader(nquad)); verify(mockModel).add(mockSubjectIRI, mockPredicateIRI, mockObjectIRI, expectedGraphIRI); } - - @Test - @DisplayName("Test parsing a document that contains a literal that is a malformed document") - void testMalformedDocumentInception() throws ParsingErrorException { - String doc = """ - . - \"\"\"@base . - . - """; - StringReader reader = new StringReader(doc); - parser.parse(reader); - - verify(mockModel, times(2)).add(any(), any(), any(), any()); - } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsCircularTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsCircularTest.java new file mode 100644 index 000000000..4ad005620 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/nquads/NQuadsCircularTest.java @@ -0,0 +1,415 @@ +package fr.inria.corese.core.next.impl.io.parser.nquads; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.api.BNode; +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.nquads.NQuadsOption; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; + +/** + * Circular tests for N-Quads parser and serializer integration. + * These tests verify that data can be correctly serialized to N-Quads format + * and then parsed back to an equivalent model (round-trip testing). + * + * The circular testing approach ensures that the parser and serializer + * are compatible and preserve data integrity across format transformations. + * + * N-Quads supports named graphs, so additional tests are included for + * quad-based (subject, predicate, object, context) scenarios. + */ +@DisplayName("N-Quads Circular Integration Tests") +class NQuadsCircularTest { + + private ValueFactory valueFactory; + private SerializerFactory serializerFactory; + private ParserFactory parserFactory; + private NQuadsOption defaultConfig; + + // Test data constants + private static final String EXAMPLE_NS = "http://example.org/"; + private static final String SUBJECT_1 = EXAMPLE_NS + "person1"; + private static final String SUBJECT_2 = EXAMPLE_NS + "person2"; + private static final String PREDICATE_NAME = EXAMPLE_NS + "name"; + private static final String PREDICATE_AGE = EXAMPLE_NS + "age"; + private static final String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + private static final String GRAPH_1 = EXAMPLE_NS + "graph1"; + private static final String GRAPH_2 = EXAMPLE_NS + "graph2"; + private static final String LITERAL_JOHN = "John Doe"; + private static final String LITERAL_JANE = "Jane Smith"; + private static final String LITERAL_AGE_25 = "25"; + private static final String LITERAL_HELLO_EN = "Hello"; + private static final String LANGUAGE_TAG_EN = "en"; + private static final String XSD_INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; + private static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; + + @BeforeEach + void setUp() { + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = NQuadsOption.defaultConfig(); + } + + /** + * Creates a simple model with basic triples containing IRIs and string + * literals. + * + * @return A model with two simple triples + */ + private Model createSimpleTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + model.add(subject1, predicateName, objectJohn); + model.add(subject2, predicateName, objectJane); + + return model; + } + + /** + * Creates a model with named graphs (quads) for testing N-Quads specific + * functionality. + * + * @return A model with quads in different named graphs + */ + private Model createNamedGraphsTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + IRI graph1 = valueFactory.createIRI(GRAPH_1); + IRI graph2 = valueFactory.createIRI(GRAPH_2); + + // Add triples to different named graphs + model.add(subject1, predicateName, objectJohn, graph1); + model.add(subject2, predicateName, objectJane, graph2); + model.add(subject1, predicateKnows, subject2, graph1); + + return model; + } + + /** + * Creates a complex model with various RDF value types including + * typed literals, language-tagged literals, and blank nodes. + * + * @return A model with diverse triple patterns + */ + private Model createComplexTestModel() { + Model model = new CoreseModel(); + + // Basic IRI and string literal triple + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal literalJohn = valueFactory.createLiteral(LITERAL_JOHN); + model.add(subject1, predicateName, literalJohn); + + // Typed literal (integer) + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI xsdInteger = valueFactory.createIRI(XSD_INTEGER); + Literal literalAge = valueFactory.createLiteral(LITERAL_AGE_25, xsdInteger); + model.add(subject1, predicateAge, literalAge); + + // Language-tagged literal + Literal literalHelloEn = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + model.add(subject1, predicateGreeting, literalHelloEn); + + // Blank node as subject + BNode blankNodeSubject = valueFactory.createBNode(); + IRI predicateType = valueFactory.createIRI(EXAMPLE_NS + "type"); + IRI objectPerson = valueFactory.createIRI(EXAMPLE_NS + "Person"); + model.add(blankNodeSubject, predicateType, objectPerson); + + // Blank node as object + BNode blankNodeObject = valueFactory.createBNode(); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + model.add(subject1, predicateKnows, blankNodeObject); + + // IRI to IRI relationship + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + model.add(subject1, predicateKnows, subject2); + + return model; + } + + /** + * Creates a model with typed literals for testing. + * + * @return A model with integer and string typed literals + */ + private Model createTypedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + + // Integer literal + Literal integerLiteral = valueFactory.createLiteral(LITERAL_AGE_25, + valueFactory.createIRI(XSD_INTEGER)); + model.add(subject, predicateAge, integerLiteral); + + // String literal with explicit datatype + Literal stringLiteral = valueFactory.createLiteral(LITERAL_JOHN, + valueFactory.createIRI(XSD_STRING)); + model.add(subject, predicateName, stringLiteral); + + return model; + } + + /** + * Creates a model with language-tagged literals for testing. + * + * @return A model with English and French language-tagged literals + */ + private Model createLanguageTaggedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + + // English greeting + Literal englishGreeting = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + model.add(subject, predicateGreeting, englishGreeting); + + // French greeting + Literal frenchGreeting = valueFactory.createLiteral("Bonjour", "fr"); + model.add(subject, predicateGreeting, frenchGreeting); + + return model; + } + + /** + * Creates a model with blank nodes for testing. + * + * @return A model with blank nodes as subject and object + */ + private Model createBlankNodesTestModel() { + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + return model; + } + + /** + * Creates a model with special characters and escape sequences for testing. + * + * @return A model with literals containing newlines, quotes, and Unicode + */ + private Model createSpecialCharactersTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateDescription = valueFactory.createIRI(EXAMPLE_NS + "description"); + IRI predicateNote = valueFactory.createIRI(EXAMPLE_NS + "note"); + + // Literal with newlines and quotes + Literal literalWithEscapes = valueFactory.createLiteral("Line 1\nLine 2\tTabbed \"quoted\" text"); + model.add(subject, predicateDescription, literalWithEscapes); + + // Literal with Unicode characters + Literal literalUnicode = valueFactory.createLiteral("Hello δΈ–η•Œ 🌍"); + model.add(subject, predicateNote, literalUnicode); + + return model; + } + + /** + * Performs a round-trip serialization and parsing cycle. + * + * @param originalModel The model to serialize and parse back + * @return The model resulting from parsing the serialized data + * @throws Exception If serialization or parsing fails + */ + private Model performRoundTrip(Model originalModel) throws Exception { + // Serialize to N-Quads + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.NQUADS, originalModel, defaultConfig); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + String serializedContent = writer.toString(); + + // Verify serialization produced content (only check for non-empty models) + assertNotNull(serializedContent, "Serialized content should not be null"); + if (originalModel.size() > 0) { + assertTrue(serializedContent.length() > 0, "Serialized content should not be empty for non-empty models"); + } + + // Parse back from N-Quads + Model deserializedModel = new CoreseModel(); + RDFParser parser = parserFactory.createRDFParser( + RDFFormat.NQUADS, deserializedModel, valueFactory); + + ByteArrayInputStream inputStream = new ByteArrayInputStream( + serializedContent.getBytes(StandardCharsets.UTF_8)); + parser.parse(inputStream); + + return deserializedModel; + } + + @Test + @DisplayName("Round-trip test with simple model containing basic IRIs and literals") + void testRoundTripWithSimpleModel() throws Exception { + // Given: A simple model with basic triples + Model originalModel = createSimpleTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should be equivalent to the original + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing named graphs (quads)") + void testRoundTripWithNamedGraphs() throws Exception { + // Given: A model with quads in different named graphs + Model originalModel = createNamedGraphsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All named graph information should be preserved + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving named graphs"); + } + + @Test + @DisplayName("Round-trip test with complex model containing diverse RDF value types") + void testRoundTripWithComplexModel() throws Exception { + // Given: A complex model with various RDF constructs + Model originalModel = createComplexTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should preserve all data + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with empty model") + void testRoundTripWithEmptyModel() throws Exception { + // Given: An empty model + Model originalModel = new CoreseModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should also be empty + assertEquals(0, originalModel.size(), "Original model should be empty"); + assertEquals(0, deserializedModel.size(), "Deserialized model should be empty"); + assertEquals(originalModel, deserializedModel, "Both models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only typed literals") + void testRoundTripWithTypedLiterals() throws Exception { + // Given: A model with various typed literals + Model originalModel = createTypedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All typed literals should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only language-tagged literals") + void testRoundTripWithLanguageTaggedLiterals() throws Exception { + // Given: A model with language-tagged literals + Model originalModel = createLanguageTaggedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All language tags should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only blank nodes") + void testRoundTripWithBlankNodes() throws Exception { + // Given: A model with blank nodes as subjects and objects + Model originalModel = createBlankNodesTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: Blank node structure should be preserved (though IDs may differ) + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + // Note: Blank node equality is based on structure, not IDs + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be structurally equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing special characters and escape sequences") + void testRoundTripWithSpecialCharacters() throws Exception { + // Given: A model with special characters and escape sequences + Model originalModel = createSpecialCharactersTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All special characters should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving special characters"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesCircularTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesCircularTest.java new file mode 100644 index 000000000..1b5783b87 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesCircularTest.java @@ -0,0 +1,367 @@ +package fr.inria.corese.core.next.impl.io.parser.ntriples; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.api.BNode; +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.ntriples.NTriplesOption; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; + +/** + * Circular tests for N-Triples parser and serializer integration. + * These tests verify that data can be correctly serialized to N-Triples format + * and then parsed back to an equivalent model (round-trip testing). + * + * The circular testing approach ensures that the parser and serializer + * are compatible and preserve data integrity across format transformations. + */ +@DisplayName("N-Triples Circular Integration Tests") +class NTriplesCircularTest { + + private ValueFactory valueFactory; + private SerializerFactory serializerFactory; + private ParserFactory parserFactory; + private NTriplesOption defaultConfig; + + // Test data constants + private static final String EXAMPLE_NS = "http://example.org/"; + private static final String SUBJECT_1 = EXAMPLE_NS + "person1"; + private static final String SUBJECT_2 = EXAMPLE_NS + "person2"; + private static final String PREDICATE_NAME = EXAMPLE_NS + "name"; + private static final String PREDICATE_AGE = EXAMPLE_NS + "age"; + private static final String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + private static final String LITERAL_JOHN = "John Doe"; + private static final String LITERAL_JANE = "Jane Smith"; + private static final String LITERAL_AGE_25 = "25"; + private static final String LITERAL_HELLO_EN = "Hello"; + private static final String LANGUAGE_TAG_EN = "en"; + private static final String XSD_INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; + private static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; + + @BeforeEach + void setUp() { + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = NTriplesOption.defaultConfig(); + } + + /** + * Creates a simple model with basic triples containing IRIs and string + * literals. + * + * @return A model with two simple triples + */ + private Model createSimpleTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + model.add(subject1, predicateName, objectJohn); + model.add(subject2, predicateName, objectJane); + + return model; + } + + /** + * Creates a complex model with various RDF value types including + * typed literals, language-tagged literals, and blank nodes. + * + * @return A model with diverse triple patterns + */ + private Model createComplexTestModel() { + Model model = new CoreseModel(); + + // Basic IRI and string literal triple + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal literalJohn = valueFactory.createLiteral(LITERAL_JOHN); + model.add(subject1, predicateName, literalJohn); + + // Typed literal (integer) + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI xsdInteger = valueFactory.createIRI(XSD_INTEGER); + Literal literalAge = valueFactory.createLiteral(LITERAL_AGE_25, xsdInteger); + model.add(subject1, predicateAge, literalAge); + + // Language-tagged literal + Literal literalHelloEn = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + model.add(subject1, predicateGreeting, literalHelloEn); + + // Blank node as subject + BNode blankNodeSubject = valueFactory.createBNode(); + IRI predicateType = valueFactory.createIRI(EXAMPLE_NS + "type"); + IRI objectPerson = valueFactory.createIRI(EXAMPLE_NS + "Person"); + model.add(blankNodeSubject, predicateType, objectPerson); + + // Blank node as object + BNode blankNodeObject = valueFactory.createBNode(); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + model.add(subject1, predicateKnows, blankNodeObject); + + // IRI to IRI relationship + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + model.add(subject1, predicateKnows, subject2); + + return model; + } + + /** + * Creates a model with typed literals for testing. + * + * @return A model with integer and string typed literals + */ + private Model createTypedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + + // Integer literal + Literal integerLiteral = valueFactory.createLiteral(LITERAL_AGE_25, + valueFactory.createIRI(XSD_INTEGER)); + model.add(subject, predicateAge, integerLiteral); + + // String literal with explicit datatype + Literal stringLiteral = valueFactory.createLiteral(LITERAL_JOHN, + valueFactory.createIRI(XSD_STRING)); + model.add(subject, predicateName, stringLiteral); + + return model; + } + + /** + * Creates a model with language-tagged literals for testing. + * + * @return A model with English and French language-tagged literals + */ + private Model createLanguageTaggedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + + // English greeting + Literal englishGreeting = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + model.add(subject, predicateGreeting, englishGreeting); + + // French greeting + Literal frenchGreeting = valueFactory.createLiteral("Bonjour", "fr"); + model.add(subject, predicateGreeting, frenchGreeting); + + return model; + } + + /** + * Creates a model with blank nodes for testing. + * + * @return A model with blank nodes as subject and object + */ + private Model createBlankNodesTestModel() { + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + return model; + } + + /** + * Creates a model with special characters and escape sequences for testing. + * + * @return A model with literals containing newlines, quotes, and Unicode + */ + private Model createSpecialCharactersTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateDescription = valueFactory.createIRI(EXAMPLE_NS + "description"); + IRI predicateNote = valueFactory.createIRI(EXAMPLE_NS + "note"); + + // Literal with newlines and quotes + Literal literalWithEscapes = valueFactory.createLiteral("Line 1\nLine 2\tTabbed \"quoted\" text"); + model.add(subject, predicateDescription, literalWithEscapes); + + // Literal with Unicode characters + Literal literalUnicode = valueFactory.createLiteral("Hello δΈ–η•Œ 🌍"); + model.add(subject, predicateNote, literalUnicode); + + return model; + } + + /** + * Performs a round-trip serialization and parsing cycle. + * + * @param originalModel The model to serialize and parse back + * @return The model resulting from parsing the serialized data + * @throws Exception If serialization or parsing fails + */ + private Model performRoundTrip(Model originalModel) throws Exception { + // Serialize to N-Triples + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.NTRIPLES, originalModel, defaultConfig); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + String serializedContent = writer.toString(); + + // Verify serialization produced content (only check for non-empty models) + assertNotNull(serializedContent, "Serialized content should not be null"); + if (originalModel.size() > 0) { + assertTrue(serializedContent.length() > 0, "Serialized content should not be empty for non-empty models"); + } + + // Parse back from N-Triples + Model deserializedModel = new CoreseModel(); + RDFParser parser = parserFactory.createRDFParser( + RDFFormat.NTRIPLES, deserializedModel, valueFactory); + + ByteArrayInputStream inputStream = new ByteArrayInputStream( + serializedContent.getBytes(StandardCharsets.UTF_8)); + parser.parse(inputStream); + + return deserializedModel; + } + + @Test + @DisplayName("Round-trip test with simple model containing basic IRIs and literals") + void testRoundTripWithSimpleModel() throws Exception { + // Given: A simple model with basic triples + Model originalModel = createSimpleTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should be equivalent to the original + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with complex model containing diverse RDF value types") + void testRoundTripWithComplexModel() throws Exception { + // Given: A complex model with various RDF constructs + Model originalModel = createComplexTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should preserve all data + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with empty model") + void testRoundTripWithEmptyModel() throws Exception { + // Given: An empty model + Model originalModel = new CoreseModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should also be empty + assertEquals(0, originalModel.size(), "Original model should be empty"); + assertEquals(0, deserializedModel.size(), "Deserialized model should be empty"); + assertEquals(originalModel, deserializedModel, "Both models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only typed literals") + void testRoundTripWithTypedLiterals() throws Exception { + // Given: A model with various typed literals + Model originalModel = createTypedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All typed literals should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only language-tagged literals") + void testRoundTripWithLanguageTaggedLiterals() throws Exception { + // Given: A model with language-tagged literals + Model originalModel = createLanguageTaggedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All language tags should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only blank nodes") + void testRoundTripWithBlankNodes() throws Exception { + // Given: A model with blank nodes as subjects and objects + Model originalModel = createBlankNodesTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: Blank node structure should be preserved (though IDs may differ) + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + // Note: Blank node equality is based on structure, not IDs + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be structurally equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing special characters and escape sequences") + void testRoundTripWithSpecialCharacters() throws Exception { + // Given: A model with special characters and escape sequences + Model originalModel = createSpecialCharactersTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All special characters should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving special characters"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListenerTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListenerTest.java index b573df6d1..042bc8dea 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListenerTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/ntriples/NTriplesListenerTest.java @@ -2,6 +2,7 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.parser.antlr.NTriplesParser; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.tree.TerminalNode; @@ -222,29 +223,29 @@ void testExtractObjectLiteralLang() { @Test - @DisplayName("unescapeLiteral should throw IllegalArgumentException for invalid \\uXXXX") + @DisplayName("unescapeLiteral should throw ParsingErrorException for invalid \\uXXXX") void testUnescapeLiteralInvalidUx() throws NoSuchMethodException { String input = "\"Invalid\\uXXXX\""; java.lang.reflect.Method method = NTriplesListener.class.getDeclaredMethod("unescapeLiteral", String.class); method.setAccessible(true); - assertThrows(IllegalArgumentException.class, + assertThrows(ParsingErrorException.class, () -> listener.unescapeLiteral(input), - "Should throw unescapeLiteral should throw IllegalArgumentException for invalid \\uXXXX"); + "Should throw unescapeLiteral should throw ParsingErrorException for invalid \\uXXXX"); } @Test - @DisplayName("unescapeLiteral should throw IllegalArgumentException for invalid \\UXXXXXXXX") + @DisplayName("unescapeLiteral should throw ParsingErrorException for invalid \\UXXXXXXXX") void testUnescapeLiteralInvalid() throws NoSuchMethodException { String input = "\"Invalid\\U0000XXX\""; java.lang.reflect.Method method = NTriplesListener.class.getDeclaredMethod("unescapeLiteral", String.class); method.setAccessible(true); - assertThrows(IllegalArgumentException.class, + assertThrows(ParsingErrorException.class, () -> listener.unescapeLiteral(input), - "Should throw unescapeLiteral should throw IllegalArgumentException for invalid \\UXXXXXXXX"); + "Should throw unescapeLiteral should throw ParsingErrorException for invalid \\UXXXXXXXX"); } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLCircularTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLCircularTest.java new file mode 100644 index 000000000..8314b3aca --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLCircularTest.java @@ -0,0 +1,376 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.api.BNode; +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.rdfxml.XmlOption; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; + +/** + * Circular tests for RDF/XML parser and serializer integration. + * These tests verify that data can be correctly serialized to RDF/XML format + * and then parsed back to an equivalent model (round-trip testing). + * + * The circular testing approach ensures that the parser and serializer + * are compatible and preserve data integrity across format transformations. + * + * RDF/XML supports namespaces, so additional tests are included for prefix + * handling. + * + * NOTE: These tests are currently disabled because they cannot work yet. + * We need to wait for the RDF/XML parser implementation from PR #176: + * https://github.com/corese-stack/corese-core/pull/176 + * + * Once the parser is implemented, these tests can be enabled to verify + * the round-trip functionality between the parser and serializer. + */ +@DisplayName("RDF/XML Circular Integration Tests") +class RDFXMLCircularTest { + + private ValueFactory valueFactory; + private SerializerFactory serializerFactory; + private ParserFactory parserFactory; + private XmlOption defaultConfig; + + // Test data constants + private static final String EXAMPLE_NS = "http://example.org/"; + private static final String SUBJECT_1 = EXAMPLE_NS + "person1"; + private static final String SUBJECT_2 = EXAMPLE_NS + "person2"; + private static final String PREDICATE_NAME = EXAMPLE_NS + "name"; + private static final String PREDICATE_AGE = EXAMPLE_NS + "age"; + private static final String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + private static final String LITERAL_JOHN = "John Doe"; + private static final String LITERAL_JANE = "Jane Smith"; + private static final String LITERAL_AGE_25 = "25"; + private static final String LITERAL_HELLO_EN = "Hello"; + private static final String LANGUAGE_TAG_EN = "en"; + private static final String XSD_INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; + private static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; + + @BeforeEach + void setUp() { + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = XmlOption.defaultConfig(); + } + + /** + * Creates a simple model with basic triples containing IRIs and string + * literals. + * + * @return A model with two simple triples + */ + private Model createSimpleTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + model.add(subject1, predicateName, objectJohn); + model.add(subject2, predicateName, objectJane); + + return model; + } + + /** + * Creates a complex model with various RDF value types including + * typed literals, language-tagged literals, and blank nodes. + * + * @return A model with diverse triple patterns + */ + private Model createComplexTestModel() { + Model model = new CoreseModel(); + + // Basic IRI and string literal triple + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal literalJohn = valueFactory.createLiteral(LITERAL_JOHN); + model.add(subject1, predicateName, literalJohn); + + // Typed literal (integer) + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI xsdInteger = valueFactory.createIRI(XSD_INTEGER); + Literal literalAge = valueFactory.createLiteral(LITERAL_AGE_25, xsdInteger); + model.add(subject1, predicateAge, literalAge); + + // Language-tagged literal + Literal literalHelloEn = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + model.add(subject1, predicateGreeting, literalHelloEn); + + // Blank node as subject + BNode blankNodeSubject = valueFactory.createBNode(); + IRI predicateType = valueFactory.createIRI(EXAMPLE_NS + "type"); + IRI objectPerson = valueFactory.createIRI(EXAMPLE_NS + "Person"); + model.add(blankNodeSubject, predicateType, objectPerson); + + // Blank node as object + BNode blankNodeObject = valueFactory.createBNode(); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + model.add(subject1, predicateKnows, blankNodeObject); + + // IRI to IRI relationship + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + model.add(subject1, predicateKnows, subject2); + + return model; + } + + /** + * Creates a model with typed literals for testing. + * + * @return A model with integer and string typed literals + */ + private Model createTypedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + + // Integer literal + Literal integerLiteral = valueFactory.createLiteral(LITERAL_AGE_25, + valueFactory.createIRI(XSD_INTEGER)); + model.add(subject, predicateAge, integerLiteral); + + // String literal with explicit datatype + Literal stringLiteral = valueFactory.createLiteral(LITERAL_JOHN, + valueFactory.createIRI(XSD_STRING)); + model.add(subject, predicateName, stringLiteral); + + return model; + } + + /** + * Creates a model with language-tagged literals for testing. + * + * @return A model with English and French language-tagged literals + */ + private Model createLanguageTaggedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + + // English greeting + Literal englishGreeting = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + model.add(subject, predicateGreeting, englishGreeting); + + // French greeting + Literal frenchGreeting = valueFactory.createLiteral("Bonjour", "fr"); + model.add(subject, predicateGreeting, frenchGreeting); + + return model; + } + + /** + * Creates a model with blank nodes for testing. + * + * @return A model with blank nodes as subject and object + */ + private Model createBlankNodesTestModel() { + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + return model; + } + + /** + * Creates a model with special characters and escape sequences for testing. + * + * @return A model with literals containing newlines, quotes, and Unicode + */ + private Model createSpecialCharactersTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateDescription = valueFactory.createIRI(EXAMPLE_NS + "description"); + IRI predicateNote = valueFactory.createIRI(EXAMPLE_NS + "note"); + + // Literal with newlines and quotes + Literal literalWithEscapes = valueFactory.createLiteral("Line 1\nLine 2\tTabbed \"quoted\" text"); + model.add(subject, predicateDescription, literalWithEscapes); + + // Literal with Unicode characters + Literal literalUnicode = valueFactory.createLiteral("Hello δΈ–η•Œ 🌍"); + model.add(subject, predicateNote, literalUnicode); + + return model; + } + + /** + * Performs a round-trip serialization and parsing cycle. + * + * @param originalModel The model to serialize and parse back + * @return The model resulting from parsing the serialized data + * @throws Exception If serialization or parsing fails + */ + private Model performRoundTrip(Model originalModel) throws Exception { + // Serialize to RDF/XML + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.RDFXML, originalModel, defaultConfig); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + String serializedContent = writer.toString(); + + // Verify serialization produced content (only check for non-empty models) + assertNotNull(serializedContent, "Serialized content should not be null"); + if (originalModel.size() > 0) { + assertTrue(serializedContent.length() > 0, "Serialized content should not be empty for non-empty models"); + } + + // Parse back from RDF/XML + Model deserializedModel = new CoreseModel(); + RDFParser parser = parserFactory.createRDFParser( + RDFFormat.RDFXML, deserializedModel, valueFactory); + + ByteArrayInputStream inputStream = new ByteArrayInputStream( + serializedContent.getBytes(StandardCharsets.UTF_8)); + parser.parse(inputStream); + + return deserializedModel; + } + + @Test + @DisplayName("Round-trip test with simple model containing basic IRIs and literals") + void testRoundTripWithSimpleModel() throws Exception { + // Given: A simple model with basic triples + Model originalModel = createSimpleTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should be equivalent to the original + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with complex model containing diverse RDF value types") + void testRoundTripWithComplexModel() throws Exception { + // Given: A complex model with various RDF constructs + Model originalModel = createComplexTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should preserve all data + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with empty model") + void testRoundTripWithEmptyModel() throws Exception { + // Given: An empty model + Model originalModel = new CoreseModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should also be empty + assertEquals(0, originalModel.size(), "Original model should be empty"); + assertEquals(0, deserializedModel.size(), "Deserialized model should be empty"); + assertEquals(originalModel, deserializedModel, "Both models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only typed literals") + void testRoundTripWithTypedLiterals() throws Exception { + // Given: A model with various typed literals + Model originalModel = createTypedLiteralsTestModel(); + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + // Then: All typed literals should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only language-tagged literals") + void testRoundTripWithLanguageTaggedLiterals() throws Exception { + // Given: A model with language-tagged literals + Model originalModel = createLanguageTaggedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All language tags should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only blank nodes") + void testRoundTripWithBlankNodes() throws Exception { + // Given: A model with blank nodes as subjects and objects + Model originalModel = createBlankNodesTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: Blank node structure should be preserved (though IDs may differ) + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + // Note: Blank node equality is based on structure, not IDs + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be structurally equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing special characters and escape sequences") + void testRoundTripWithSpecialCharacters() throws Exception { + // Given: A model with special characters and escape sequences + Model originalModel = createSpecialCharactersTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All special characters should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving special characters"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java index f4677cdab..27c7f1e1c 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java @@ -7,6 +7,8 @@ import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.InputStream; @@ -21,6 +23,8 @@ * and unescaping of IRIs and literals, and named graphs. */ public class RDFXMLParserTest { + private static final Logger logger = LoggerFactory.getLogger(RDFXMLParserTest.class); + /** * Helper method to parse the RDF/XML String * @param rdfXml @@ -37,6 +41,10 @@ private Model parseRdfXml(String rdfXml) throws Exception { return model; } + /** + * Helper method to print the model. + * @param model + */ /** * Helper method to print the model. * @param model @@ -44,29 +52,34 @@ private Model parseRdfXml(String rdfXml) throws Exception { private void printModel(Model model) { model.stream().forEach(stmt -> { Value obj = stmt.getObject(); + String subjectString = stmt.getSubject().stringValue(); + String predicateString = stmt.getPredicate().stringValue(); + if (obj instanceof Literal literal) { - if (literal.getLanguage().isPresent()) { - System.out.printf("(%s, %s, \"%s\"@%s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel(), - literal.getLanguage().get()); + String label = String.valueOf(literal.getLabel()); + String languageTag = literal.getLanguage().orElse(null); + + if (languageTag != null) { + logger.debug("({}, {}, \"{}\"@{})", + subjectString, + predicateString, + label, + languageTag); } else { - System.out.printf("(%s, %s, \"%s\")%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel()); + logger.debug("({}, {}, \"{}\")", + subjectString, + predicateString, + label); } } else { - System.out.printf("(%s, %s, %s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), + logger.debug("({}, {}, {})", + subjectString, + predicateString, obj.stringValue()); } }); } - /** * Test node elements with IRIs * @throws Exception diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java index 8a9cc7bec..9d4e8db75 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java @@ -80,7 +80,7 @@ private void printModel(Model model) { @Test void testNamedGraphParsing() throws Exception { String trig = """ - @prefix ex: + @prefix ex: . ex:Graph1 { ex:Alice ex:knows ex:Bob . }""".trim(); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGCircularTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGCircularTest.java new file mode 100644 index 000000000..52b4aaf4f --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGCircularTest.java @@ -0,0 +1,414 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.api.BNode; +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.trig.TriGOption; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; + +/** + * Circular tests for TriG parser and serializer integration. + * These tests verify that data can be correctly serialized to TriG format + * and then parsed back to an equivalent model (round-trip testing). + * + * The circular testing approach ensures that the parser and serializer + * are compatible and preserve data integrity across format transformations. + * + * TriG supports both namespaces and named graphs, so additional tests are + * included for these specific features. + */ +@DisplayName("TriG Circular Integration Tests") +class TriGCircularTest { + + private ValueFactory valueFactory; + private SerializerFactory serializerFactory; + private ParserFactory parserFactory; + private TriGOption defaultConfig; + + // Test data constants + private static final String EXAMPLE_NS = "http://example.org/"; + private static final String SUBJECT_1 = EXAMPLE_NS + "person1"; + private static final String SUBJECT_2 = EXAMPLE_NS + "person2"; + private static final String PREDICATE_NAME = EXAMPLE_NS + "name"; + private static final String PREDICATE_AGE = EXAMPLE_NS + "age"; + private static final String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + private static final String GRAPH_1 = EXAMPLE_NS + "graph1"; + private static final String GRAPH_2 = EXAMPLE_NS + "graph2"; + private static final String LITERAL_JOHN = "John Doe"; + private static final String LITERAL_JANE = "Jane Smith"; + private static final String LITERAL_AGE_25 = "25"; + private static final String LITERAL_HELLO_EN = "Hello"; + private static final String LANGUAGE_TAG_EN = "en"; + private static final String XSD_INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; + private static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; + + @BeforeEach + void setUp() { + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = TriGOption.defaultConfig(); + } + + /** + * Creates a simple model with basic triples containing IRIs and string + * literals. + * + * @return A model with two simple triples + */ + private Model createSimpleTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + model.add(subject1, predicateName, objectJohn); + model.add(subject2, predicateName, objectJane); + + return model; + } + + /** + * Creates a model with named graphs for testing TriG specific functionality. + * + * @return A model with triples in different named graphs + */ + private Model createNamedGraphsTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + IRI graph1 = valueFactory.createIRI(GRAPH_1); + IRI graph2 = valueFactory.createIRI(GRAPH_2); + + // Add triples to different named graphs + model.add(subject1, predicateName, objectJohn, graph1); + model.add(subject2, predicateName, objectJane, graph2); + model.add(subject1, predicateKnows, subject2, graph1); + + return model; + } + + /** + * Creates a complex model with various RDF value types including + * typed literals, language-tagged literals, and blank nodes. + * + * @return A model with diverse triple patterns + */ + private Model createComplexTestModel() { + Model model = new CoreseModel(); + + // Basic IRI and string literal triple + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal literalJohn = valueFactory.createLiteral(LITERAL_JOHN); + model.add(subject1, predicateName, literalJohn); + + // Typed literal (integer) + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI xsdInteger = valueFactory.createIRI(XSD_INTEGER); + Literal literalAge = valueFactory.createLiteral(LITERAL_AGE_25, xsdInteger); + model.add(subject1, predicateAge, literalAge); + + // Language-tagged literal + Literal literalHelloEn = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + model.add(subject1, predicateGreeting, literalHelloEn); + + // Blank node as subject + BNode blankNodeSubject = valueFactory.createBNode(); + IRI predicateType = valueFactory.createIRI(EXAMPLE_NS + "type"); + IRI objectPerson = valueFactory.createIRI(EXAMPLE_NS + "Person"); + model.add(blankNodeSubject, predicateType, objectPerson); + + // Blank node as object + BNode blankNodeObject = valueFactory.createBNode(); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + model.add(subject1, predicateKnows, blankNodeObject); + + // IRI to IRI relationship + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + model.add(subject1, predicateKnows, subject2); + + return model; + } + + /** + * Creates a model with typed literals for testing. + * + * @return A model with integer and string typed literals + */ + private Model createTypedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + + // Integer literal + Literal integerLiteral = valueFactory.createLiteral(LITERAL_AGE_25, + valueFactory.createIRI(XSD_INTEGER)); + model.add(subject, predicateAge, integerLiteral); + + // String literal with explicit datatype + Literal stringLiteral = valueFactory.createLiteral(LITERAL_JOHN, + valueFactory.createIRI(XSD_STRING)); + model.add(subject, predicateName, stringLiteral); + + return model; + } + + /** + * Creates a model with language-tagged literals for testing. + * + * @return A model with English and French language-tagged literals + */ + private Model createLanguageTaggedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + + // English greeting + Literal englishGreeting = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + model.add(subject, predicateGreeting, englishGreeting); + + // French greeting + Literal frenchGreeting = valueFactory.createLiteral("Bonjour", "fr"); + model.add(subject, predicateGreeting, frenchGreeting); + + return model; + } + + /** + * Creates a model with blank nodes for testing. + * + * @return A model with blank nodes as subject and object + */ + private Model createBlankNodesTestModel() { + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + return model; + } + + /** + * Creates a model with special characters and escape sequences for testing. + * + * @return A model with literals containing newlines, quotes, and Unicode + */ + private Model createSpecialCharactersTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateDescription = valueFactory.createIRI(EXAMPLE_NS + "description"); + IRI predicateNote = valueFactory.createIRI(EXAMPLE_NS + "note"); + + // Literal with newlines and quotes + Literal literalWithEscapes = valueFactory.createLiteral("Line 1\nLine 2\tTabbed \"quoted\" text"); + model.add(subject, predicateDescription, literalWithEscapes); + + // Literal with Unicode characters + Literal literalUnicode = valueFactory.createLiteral("Hello δΈ–η•Œ 🌍"); + model.add(subject, predicateNote, literalUnicode); + + return model; + } + + /** + * Performs a round-trip serialization and parsing cycle. + * + * @param originalModel The model to serialize and parse back + * @return The model resulting from parsing the serialized data + * @throws Exception If serialization or parsing fails + */ + private Model performRoundTrip(Model originalModel) throws Exception { + // Serialize to TriG + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.TRIG, originalModel, defaultConfig); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + String serializedContent = writer.toString(); + + // Verify serialization produced content (only check for non-empty models) + assertNotNull(serializedContent, "Serialized content should not be null"); + if (originalModel.size() > 0) { + assertTrue(serializedContent.length() > 0, "Serialized content should not be empty for non-empty models"); + } + + // Parse back from TriG + Model deserializedModel = new CoreseModel(); + RDFParser parser = parserFactory.createRDFParser( + RDFFormat.TRIG, deserializedModel, valueFactory); + + ByteArrayInputStream inputStream = new ByteArrayInputStream( + serializedContent.getBytes(StandardCharsets.UTF_8)); + parser.parse(inputStream); + + return deserializedModel; + } + + @Test + @DisplayName("Round-trip test with simple model containing basic IRIs and literals") + void testRoundTripWithSimpleModel() throws Exception { + // Given: A simple model with basic triples + Model originalModel = createSimpleTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should be equivalent to the original + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing named graphs") + void testRoundTripWithNamedGraphs() throws Exception { + // Given: A model with triples in different named graphs + Model originalModel = createNamedGraphsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All named graph information should be preserved + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving named graphs"); + } + + @Test + @DisplayName("Round-trip test with complex model containing diverse RDF value types") + void testRoundTripWithComplexModel() throws Exception { + // Given: A complex model with various RDF constructs + Model originalModel = createComplexTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should preserve all data + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with empty model") + void testRoundTripWithEmptyModel() throws Exception { + // Given: An empty model + Model originalModel = new CoreseModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should also be empty + assertEquals(0, originalModel.size(), "Original model should be empty"); + assertEquals(0, deserializedModel.size(), "Deserialized model should be empty"); + assertEquals(originalModel, deserializedModel, "Both models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only typed literals") + void testRoundTripWithTypedLiterals() throws Exception { + // Given: A model with various typed literals + Model originalModel = createTypedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All typed literals should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only language-tagged literals") + void testRoundTripWithLanguageTaggedLiterals() throws Exception { + // Given: A model with language-tagged literals + Model originalModel = createLanguageTaggedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All language tags should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only blank nodes") + void testRoundTripWithBlankNodes() throws Exception { + // Given: A model with blank nodes as subjects and objects + Model originalModel = createBlankNodesTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: Blank node structure should be preserved (though IDs may differ) + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + // Note: Blank node equality is based on structure, not IDs + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be structurally equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing special characters and escape sequences") + void testRoundTripWithSpecialCharacters() throws Exception { + // Given: A model with special characters and escape sequences + Model originalModel = createSpecialCharactersTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All special characters should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving special characters"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleCircularTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleCircularTest.java new file mode 100644 index 000000000..3335815cf --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleCircularTest.java @@ -0,0 +1,372 @@ +package fr.inria.corese.core.next.impl.io.parser.turtle; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.api.BNode; +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.turtle.TurtleOption; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; + +/** + * Circular tests for Turtle parser and serializer integration. + * These tests verify that data can be correctly serialized to Turtle format + * and then parsed back to an equivalent model (round-trip testing). + * + * The circular testing approach ensures that the parser and serializer + * are compatible and preserve data integrity across format transformations. + */ +@DisplayName("Turtle Circular Integration Tests") +class TurtleCircularTest { + + private ValueFactory valueFactory; + private SerializerFactory serializerFactory; + private ParserFactory parserFactory; + private TurtleOption defaultConfig; + + // Test data constants + private static final String EXAMPLE_NS = "http://example.org/"; + private static final String SUBJECT_1 = EXAMPLE_NS + "person1"; + private static final String SUBJECT_2 = EXAMPLE_NS + "person2"; + private static final String PREDICATE_NAME = EXAMPLE_NS + "name"; + private static final String PREDICATE_AGE = EXAMPLE_NS + "age"; + private static final String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + private static final String LITERAL_JOHN = "John Doe"; + private static final String LITERAL_JANE = "Jane Smith"; + private static final String LITERAL_AGE_25 = "25"; + private static final String LITERAL_HELLO_EN = "Hello"; + private static final String LANGUAGE_TAG_EN = "en"; + private static final String XSD_INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; + private static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; + + @BeforeEach + void setUp() { + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = TurtleOption.defaultConfig(); + } + + /** + * Creates a simple model with basic triples containing IRIs and string + * literals. + * + * @return A model with two simple triples + */ + private Model createSimpleTestModel() { + Model model = new CoreseModel(); + + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal objectJohn = valueFactory.createLiteral(LITERAL_JOHN); + + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + Literal objectJane = valueFactory.createLiteral(LITERAL_JANE); + + model.add(subject1, predicateName, objectJohn); + model.add(subject2, predicateName, objectJane); + + return model; + } + + /** + * Creates a complex model with various RDF value types including + * typed literals, language-tagged literals, and blank nodes. + * + * @return A model with diverse triple patterns + */ + private Model createComplexTestModel() { + Model model = new CoreseModel(); + + // Basic IRI and string literal triple + IRI subject1 = valueFactory.createIRI(SUBJECT_1); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + Literal literalJohn = valueFactory.createLiteral(LITERAL_JOHN); + model.add(subject1, predicateName, literalJohn); + + // Typed literal (integer) + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI xsdInteger = valueFactory.createIRI(XSD_INTEGER); + Literal literalAge = valueFactory.createLiteral(LITERAL_AGE_25, xsdInteger); + model.add(subject1, predicateAge, literalAge); + + // Language-tagged literal + Literal literalHelloEn = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + model.add(subject1, predicateGreeting, literalHelloEn); + + // Blank node as subject + BNode blankNodeSubject = valueFactory.createBNode(); + IRI predicateType = valueFactory.createIRI(EXAMPLE_NS + "type"); + IRI objectPerson = valueFactory.createIRI(EXAMPLE_NS + "Person"); + model.add(blankNodeSubject, predicateType, objectPerson); + + // Blank node as object + BNode blankNodeObject = valueFactory.createBNode(); + IRI predicateKnows = valueFactory.createIRI(PREDICATE_KNOWS); + model.add(subject1, predicateKnows, blankNodeObject); + + // IRI to IRI relationship + IRI subject2 = valueFactory.createIRI(SUBJECT_2); + model.add(subject1, predicateKnows, subject2); + + return model; + } + + /** + * Creates a model with typed literals for testing. + * + * @return A model with integer and string typed literals + */ + private Model createTypedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateAge = valueFactory.createIRI(PREDICATE_AGE); + IRI predicateName = valueFactory.createIRI(PREDICATE_NAME); + + // Integer literal + Literal integerLiteral = valueFactory.createLiteral(LITERAL_AGE_25, + valueFactory.createIRI(XSD_INTEGER)); + model.add(subject, predicateAge, integerLiteral); + + // String literal with explicit datatype + Literal stringLiteral = valueFactory.createLiteral(LITERAL_JOHN, + valueFactory.createIRI(XSD_STRING)); + model.add(subject, predicateName, stringLiteral); + + return model; + } + + /** + * Creates a model with language-tagged literals for testing. + * + * @return A model with English and French language-tagged literals + */ + private Model createLanguageTaggedLiteralsTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateGreeting = valueFactory.createIRI(EXAMPLE_NS + "greeting"); + + // English greeting + Literal englishGreeting = valueFactory.createLiteral(LITERAL_HELLO_EN, LANGUAGE_TAG_EN); + model.add(subject, predicateGreeting, englishGreeting); + + // French greeting + Literal frenchGreeting = valueFactory.createLiteral("Bonjour", "fr"); + model.add(subject, predicateGreeting, frenchGreeting); + + return model; + } + + /** + * Creates a model with blank nodes for testing. + * + * @return A model with blank nodes as subject and object + */ + private Model createBlankNodesTestModel() { + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + return model; + } + + /** + * Creates a model with special characters and escape sequences for testing. + * + * @return A model with literals containing newlines, quotes, and Unicode + */ + private Model createSpecialCharactersTestModel() { + Model model = new CoreseModel(); + + IRI subject = valueFactory.createIRI(SUBJECT_1); + IRI predicateDescription = valueFactory.createIRI(EXAMPLE_NS + "description"); + IRI predicateNote = valueFactory.createIRI(EXAMPLE_NS + "note"); + + // Literal with newlines and quotes + Literal literalWithEscapes = valueFactory.createLiteral("Line 1\nLine 2\tTabbed \"quoted\" text"); + model.add(subject, predicateDescription, literalWithEscapes); + + // Literal with Unicode characters + Literal literalUnicode = valueFactory.createLiteral("Hello δΈ–η•Œ 🌍"); + model.add(subject, predicateNote, literalUnicode); + + // Literal with character forbidden in URIs + Literal badURICharacters = valueFactory.createLiteral("<>-.!~*()?:;/=[]+@&$,%#~^ \\\\ \\t \\b \\n \\r \\f"); + model.add(subject, predicateNote, badURICharacters); + + + return model; + } + + /** + * Performs a round-trip serialization and parsing cycle. + * + * @param originalModel The model to serialize and parse back + * @return The model resulting from parsing the serialized data + * @throws Exception If serialization or parsing fails + */ + private Model performRoundTrip(Model originalModel) throws Exception { + // Serialize to Turtle + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.TURTLE, originalModel, defaultConfig); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + String serializedContent = writer.toString(); + + // Verify serialization produced content (only check for non-empty models) + assertNotNull(serializedContent, "Serialized content should not be null"); + if (originalModel.size() > 0) { + assertTrue(serializedContent.length() > 0, "Serialized content should not be empty for non-empty models"); + } + + // Parse back from Turtle + Model deserializedModel = new CoreseModel(); + RDFParser parser = parserFactory.createRDFParser( + RDFFormat.TURTLE, deserializedModel, valueFactory); + + ByteArrayInputStream inputStream = new ByteArrayInputStream( + serializedContent.getBytes(StandardCharsets.UTF_8)); + parser.parse(inputStream); + + return deserializedModel; + } + + @Test + @DisplayName("Round-trip test with simple model containing basic IRIs and literals") + void testRoundTripWithSimpleModel() throws Exception { + // Given: A simple model with basic triples + Model originalModel = createSimpleTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should be equivalent to the original + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with complex model containing diverse RDF value types") + void testRoundTripWithComplexModel() throws Exception { + // Given: A complex model with various RDF constructs + Model originalModel = createComplexTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should preserve all data + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with empty model") + void testRoundTripWithEmptyModel() throws Exception { + // Given: An empty model + Model originalModel = new CoreseModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: The deserialized model should also be empty + assertEquals(0, originalModel.size(), "Original model should be empty"); + assertEquals(0, deserializedModel.size(), "Deserialized model should be empty"); + assertEquals(originalModel, deserializedModel, "Both models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only typed literals") + void testRoundTripWithTypedLiterals() throws Exception { + // Given: A model with various typed literals + Model originalModel = createTypedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All typed literals should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only language-tagged literals") + void testRoundTripWithLanguageTaggedLiterals() throws Exception { + // Given: A model with language-tagged literals + Model originalModel = createLanguageTaggedLiteralsTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All language tags should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing only blank nodes") + void testRoundTripWithBlankNodes() throws Exception { + // Given: A model with blank nodes as subjects and objects + Model originalModel = createBlankNodesTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: Blank node structure should be preserved (though IDs may differ) + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + // Note: Blank node equality is based on structure, not IDs + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be structurally equivalent"); + } + + @Test + @DisplayName("Round-trip test with model containing special characters and escape sequences") + void testRoundTripWithSpecialCharacters() throws Exception { + // Given: A model with special characters and escape sequences + Model originalModel = createSpecialCharactersTestModel(); + + // When: Performing round-trip serialization and parsing + Model deserializedModel = performRoundTrip(originalModel); + + // Then: All special characters should be preserved correctly + assertEquals(originalModel.size(), deserializedModel.size(), + "Model sizes should be equal after round-trip"); + assertEquals(originalModel, deserializedModel, + "Original and deserialized models should be equivalent, preserving special characters"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImplTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerTest.java similarity index 97% rename from src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImplTest.java rename to src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerTest.java index ffdde398e..59b2a98f1 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImplTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerTest.java @@ -23,9 +23,9 @@ /** * Unit tests for the TurtleListenerImpl parser. */ -class TurtleListenerImplTest { +class TurtleListenerTest { - private static final Logger logger = LoggerFactory.getLogger(TurtleListenerImplTest.class); + private static final Logger logger = LoggerFactory.getLogger(TurtleListenerTest.class); /** * Parses a Turtle string and returns the RDF model. @@ -44,7 +44,7 @@ private Model parseAndPrintModel(String turtleData) throws Exception { ParseTree tree = parser.turtleDoc(); Model model = new CoreseModel(); - TurtleListenerImpl listener = new TurtleListenerImpl(model, factory, null); + TurtleListener listener = new TurtleListener(model, factory, null); ParseTreeWalker walker = new ParseTreeWalker(); walker.walk((ParseTreeListener) listener, tree); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java index 598900ad6..802d4c91e 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java @@ -1,23 +1,29 @@ package fr.inria.corese.core.next.impl.io.serialization; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockConstruction; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.mockito.MockedConstruction; + import fr.inria.corese.core.next.api.Model; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; import fr.inria.corese.core.next.api.io.serialization.SerializationOption; +import fr.inria.corese.core.next.impl.io.serialization.jsonld.JSONLDSerializer; import fr.inria.corese.core.next.impl.io.serialization.canonical.Rdfc10Serializer; +import fr.inria.corese.core.next.impl.io.serialization.jsonld.JSONLDSerializer; import fr.inria.corese.core.next.impl.io.serialization.nquads.NQuadsSerializer; import fr.inria.corese.core.next.impl.io.serialization.ntriples.NTriplesSerializer; import fr.inria.corese.core.next.impl.io.serialization.rdfxml.XmlSerializer; import fr.inria.corese.core.next.impl.io.serialization.trig.TriGSerializer; import fr.inria.corese.core.next.impl.io.serialization.turtle.TurtleSerializer; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; -import org.mockito.MockedConstruction; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockConstruction; /** * Unit tests for the {@link DefaultSerializerFactory} class. @@ -31,7 +37,6 @@ class DefaultSerializerFactoryTest { private Model mockModel; private SerializationOption mockConfig; - @BeforeEach void setUp() { factory = new DefaultSerializerFactory(); @@ -47,7 +52,8 @@ void createSerializer_shouldReturnTurtleSerializer_forTurtleFormat() { assertNotNull(serializer); assertTrue(serializer instanceof TurtleSerializer); - assertEquals(1, mockedConstruction.constructed().size(), "TurtleSerializer constructor should be called once"); + assertEquals(1, mockedConstruction.constructed().size(), + "TurtleSerializer constructor should be called once"); } } @@ -59,7 +65,8 @@ void createSerializer_shouldReturnNTriplesSerializer_forNTriplesFormat() { assertNotNull(serializer); assertTrue(serializer instanceof NTriplesSerializer); - assertEquals(1, mockedConstruction.constructed().size(), "NTriplesSerializer constructor should be called once"); + assertEquals(1, mockedConstruction.constructed().size(), + "NTriplesSerializer constructor should be called once"); } } @@ -71,7 +78,8 @@ void createSerializer_shouldReturnNQuadsSerializer_forNQuadsFormat() { assertNotNull(serializer); assertTrue(serializer instanceof NQuadsSerializer); - assertEquals(1, mockedConstruction.constructed().size(), "NQuadsSerializer constructor should be called once"); + assertEquals(1, mockedConstruction.constructed().size(), + "NQuadsSerializer constructor should be called once"); } } @@ -83,7 +91,8 @@ void createSerializer_shouldReturnTriGSerializer_forTriGFormat() { assertNotNull(serializer); assertTrue(serializer instanceof TriGSerializer); - assertEquals(1, mockedConstruction.constructed().size(), "TriGSerializer constructor should be called once"); + assertEquals(1, mockedConstruction.constructed().size(), + "TriGSerializer constructor should be called once"); } } @@ -111,6 +120,19 @@ void createSerializer_shouldReturnCanonicalSerializer_forCanonicalRdfFormat() { } } + @Test + @DisplayName("createSerializer should return JSONLDSerializer for JSONLD format") + void createSerializer_shouldReturnJSONLDSerializer_forJSONLDFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(JSONLDSerializer.class)) { + RDFSerializer serializer = factory.createSerializer(RDFFormat.JSONLD, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof JSONLDSerializer); + assertEquals(1, mockedConstruction.constructed().size(), + "JSONLDSerializer constructor should be called once"); + } + } + @Test @DisplayName("createSerializer should throw NullPointerException for a null format") void createSerializer_shouldThrowNPE_forNullFormat() { diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/JSONLDSerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/JSONLDSerializerTest.java index b8a97059d..b6573c617 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/JSONLDSerializerTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/jsonld/JSONLDSerializerTest.java @@ -115,17 +115,17 @@ public void modelWithBlankNodesTest() { { "@id":"http://example.org/iri1", "http://example.org/pred1":[ - {"@id":"blank1"} + {"@id":"_:blank1"} ] }, { - "@id":"blank1", + "@id":"_:blank1", "http://example.org/pred1":[ { "@id":"http://example.org/iri1" }, { - "@id":"blank1" + "@id":"_:blank1" }, { "@value":"literal1" @@ -171,17 +171,6 @@ public void modelWithNamedGraphsTest() { "http://example.org/pred1":[ { "@id":"http://example.org/iri1" - }, - { - "@value":"literal1" - }, - { - "@value":"literal2", - "@type":"http://example.org/datatype1" - }, - { - "@language":"en", - "@value":"literal3" } ] }, diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializerTest.java index dd28ae083..24e3bc138 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializerTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/trig/TriGSerializerTest.java @@ -475,7 +475,7 @@ void testBasicTrigSerializationWithNamedGraph() throws SerializationException, I graph:g1 { data:person1 data:name "Alice" . - } . + } """; diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializerTest.java index f76700962..b2a606f17 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializerTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/turtle/TurtleSerializerTest.java @@ -1,16 +1,22 @@ package fr.inria.corese.core.next.impl.io.serialization.turtle; -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; import fr.inria.corese.core.next.impl.common.literal.RDF; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; import fr.inria.corese.core.next.impl.io.serialization.TestStatementFactory; import fr.inria.corese.core.next.impl.io.serialization.option.LiteralDatatypePolicyEnum; import fr.inria.corese.core.next.impl.io.serialization.util.SerializationConstants; import fr.inria.corese.core.next.impl.exception.SerializationException; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.StringWriter; @@ -18,8 +24,7 @@ import java.util.Collections; import java.util.stream.Stream; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.*; /** @@ -283,11 +288,88 @@ void testBlankNodeSerialization() throws SerializationException, IOException { @prefix rdfs: . @prefix xsd: . + _:b1 ns:hasValue "Value of BNode" . ns:mainSubject ns:refersTo _:b1 . + _:b1 ns:hasValue "Value of BNode" . """; String actual = writer.toString().replace("\r\n", "\n"); - assertEquals(expected, actual); + + String expected1 = """ + _:b1 ns:hasValue "Value of BNode" . + ns:mainSubject ns:refersTo _:b1 . +""".trim(); + + String expected2 = """ + ns:mainSubject ns:refersTo _:b1 . + _:b1 ns:hasValue "Value of BNode" . +""".trim(); + assertTrue(expected.contains(expected1) || expected.contains(expected2)); + } + + /** + * Tests serialization verify that the blank node is serialized + */ + @Test + void testBlankNodeSerializarionWithoutId() { + Logger logger = LoggerFactory.getLogger(TurtleSerializerTest.class); + + ValueFactory valueFactory; + SerializerFactory serializerFactory; + ParserFactory parserFactory; + TurtleOption defaultConfig; + String EXAMPLE_NS = "http://example.org/"; + String PREDICATE_KNOWS = EXAMPLE_NS + "knows"; + + valueFactory = new CoreseAdaptedValueFactory(); + serializerFactory = new DefaultSerializerFactory(); + parserFactory = new ParserFactory(); + defaultConfig = TurtleOption.defaultConfig(); + + Model model = new CoreseModel(); + + BNode blankSubject = valueFactory.createBNode(); + BNode blankObject = valueFactory.createBNode(); + IRI predicate = valueFactory.createIRI(PREDICATE_KNOWS); + + model.add(blankSubject, predicate, blankObject); + + + model.stream().forEach(stmt -> { + Value obj = stmt.getObject(); + String subjectString = stmt.getSubject().stringValue(); + String predicateString = stmt.getPredicate().stringValue(); + + if (obj instanceof Literal literal) { + String label = String.valueOf(literal.getLabel()); + String languageTag = literal.getLanguage().orElse(null); + + if (languageTag != null) { + logger.debug("({}, {}, \"{}\"@{})", + subjectString, + predicateString, + label, + languageTag); + } else { + logger.debug("({}, {}, \"{}\")", + subjectString, + predicateString, + label); + } + } else { + logger.debug("({}, {}, {})", + subjectString, + predicateString, + obj.stringValue()); + } + }); + + StringWriter writer = new StringWriter(); + TurtleSerializer turtleSerializer = new TurtleSerializer(model, defaultConfig); + + turtleSerializer.write(writer); + String actual = writer.toString().replace("\r\n", "\n"); + System.out.println(actual); } /** @@ -495,4 +577,39 @@ void testMultilineLiteralSerialization() throws SerializationException, IOExcept assertEquals(expected, actual); } + /** + * Tests serialization of a literal containing escaped characters. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testEscapedCharacterLiteralSerialization() throws SerializationException, IOException { + ValueFactory coreseFactory = new CoreseAdaptedValueFactory(); + Statement statement = coreseFactory.createStatement( + coreseFactory.createIRI("http://example.org/book/1"), + coreseFactory.createIRI("http://example.org/properties/description"), + coreseFactory.createLiteral("\\ \t \b \n \r \f") + ); + + Model coreseModel = new CoreseModel(); + coreseModel.add(statement); + + StringWriter writer = new StringWriter(); + TurtleOption config = new TurtleOption.Builder() + .autoDeclarePrefixes(false) + .includeContext(false) + .prettyPrint(false) + .usePrefixes(false) + .build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(coreseModel, config); + + turtleSerializer.write(writer); + + String expected = " \"\"\"\\ \t \b \n \r \f\"\"\" ."; + + String actual = writer.toString().trim(); + assertEquals(expected, actual); + } + }