diff --git a/src/main/antlr/TriG.g4 b/src/main/antlr/TriG.g4 new file mode 100644 index 000000000..f93efea6f --- /dev/null +++ b/src/main/antlr/TriG.g4 @@ -0,0 +1,321 @@ +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +grammar TriG; + +trigDoc + : ( directive | block )* EOF + ; + +block + : triplesOrGraph + | wrappedGraph + | triples2 + | Graph_w labelOrSubject wrappedGraph + ; + +triplesOrGraph + : labelOrSubject (wrappedGraph | predicateObjectList '.') + ; + +triples2 + : blankNodePropertyList predicateObjectList? '.' + | collection predicateObjectList '.' + ; + +wrappedGraph + : '{' triplesBlock? '}' + ; + +triplesBlock + : triples ('.' triplesBlock?)? + ; + +labelOrSubject + : iri + | blankNode + ; + +directive + : prefixID + | base + | sparqlPrefix + | sparqlBase + ; + +prefixID + : '@prefix' PNAME_NS IRIREF '.' + ; + +base + : '@base' IRIREF '.' + ; + +sparqlPrefix + : Prefix_w PNAME_NS IRIREF + ; + +sparqlBase + : Base_w IRIREF + ; + +triples + : subject predicateObjectList + | blankNodePropertyList predicateObjectList? + ; + +predicateObjectList + : verb objectList (';' (verb objectList)?)* + ; + +objectList + : object (',' object)* + ; + +verb + : predicate + | 'a' + ; + +subject + : iri + | blank + ; + +predicate + : iri + ; + +object + : iri + | blank + | blankNodePropertyList + | literal + ; + +literal + : rDFLiteral + | numericLiteral + | BooleanLiteral + ; + +blank + : blankNode + | collection + ; + +blankNodePropertyList + : '[' predicateObjectList ']' + ; + +collection + : '(' object* ')' + ; + +numericLiteral + : INTEGER + | DECIMAL + | DOUBLE + ; + +rDFLiteral + : string LANGTAG + | string ('^^' iri)? + ; + +string + : STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE + | STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE + ; + +iri + : prefixedName + | IRIREF + ; + +prefixedName + : PNAME_LN + | PNAME_NS + ; + +blankNode + : BLANK_NODE_LABEL + | ANON + ; + +WS + : (('\u0020' | '\u0009' | '\u000A' | '\u000D' ) )+ -> skip + ; + +// Terminals + +Graph_w options { caseInsensitive=true; } + : 'GRAPH' + ; + +Base_w options { caseInsensitive=true; } + : 'BASE' + ; + +Prefix_w options { caseInsensitive=true; } + : 'PREFIX' + ; + +BooleanLiteral + : 'true' + | 'false' + ; + +IRIREF + : '<' (PN_CHARS | '.' | ':' | '#' | '@' | '%' | '&' | '$' | '!' | '\'' | '*' | '+' | '/' | '(' | ')' | '-' | ',' | '?' | '~' | UCHAR)* '>' + ; + +PNAME_NS + : PN_PREFIX? ':' + ; + +PNAME_LN + : PNAME_NS PN_LOCAL + ; + +BLANK_NODE_LABEL + : '_:' (PN_CHARS_U | '0' .. '9') ((PN_CHARS | '.')* PN_CHARS)? + ; + +LANGTAG + : '@' ('a'.. 'z' | 'A' .. 'Z')+ ('-' ('a'.. 'z' | 'A' .. 'Z' | '0' .. '9')* )* + ; + +INTEGER + : ('+' | '-' )? ('0' .. '9')+ + ; + +DECIMAL + : ('+' | '-' )? ('0' .. '9')* '.' ('0' .. '9')+ + ; + +DOUBLE + : ('+' | '-' )? (('0' .. '9')+ '.' ('0' .. '9')* EXPONENT + | '.' ('0' .. '9')+ EXPONENT + | ('0' .. '9')+ EXPONENT) + ; + +EXPONENT + : ('e' | 'E') ('+' | '-' )? ('0' .. '9')+ + ; + +STRING_LITERAL_QUOTE + : '"' ((~[\u0022\u005C\u0010\u0013]) | ECHAR | UCHAR)* '"' + ; + +STRING_LITERAL_SINGLE_QUOTE + : '\'' ((~[\u0027\u005C\u0010\u0013]) | ECHAR | UCHAR)* '\'' + ; + +STRING_LITERAL_LONG_SINGLE_QUOTE + : '\'\'\'' (('\'' | '\'\'')? ( (~['\\] ) | ECHAR | UCHAR))* '\'\'\'' + ; + +STRING_LITERAL_LONG_QUOTE + : '"""' (('"' | '""')? ( (~["'] ) | ECHAR | UCHAR))* '"""' + ; + +UCHAR + : '\\u' HEX HEX HEX HEX + | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX + ; + +ECHAR options { caseInsensitive=true; } + : '\\' [tbnrf"'\\] + ; + +WHITESPACE + : [\u0020\u0009\u000A\u000D] + ; + +ANON + : '[' WHITESPACE* ']' + ; + +PN_CHARS_BASE + : 'A' .. 'Z' + | 'a' .. 'z' + | '\u00C0' .. '\u00D6' + | '\u00D8' .. '\u00F6' + | '\u00F8' .. '\u02FF' + | '\u0370' .. '\u037D' + | '\u037F' .. '\u1FFF' + | '\u200C' .. '\u200D' + | '\u2070' .. '\u218F' + | '\u2C00' .. '\u2FEF' + | '\u3001' .. '\uD7FF' + | '\uF900' .. '\uFDCF' + | '\uFDF0' .. '\uFFFD' +// | '\u10000' .. '\uEFFFF' + ; + +PN_CHARS_U + : PN_CHARS_BASE + | '_' + ; + +PN_CHARS + : PN_CHARS_U + | '-' + | [0-9] + | [\u00B7] + | [\u0300-\u036F] + | [\u203F-\u2040] + ; + +PN_PREFIX + : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? + ; + +PN_LOCAL + : (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? + ; + +PLX + : PERCENT + | PN_LOCAL_ESC + ; + +PERCENT + : '%' HEX HEX + ; + +HEX + : [0-9a-fA-F] + ; + +PN_LOCAL_ESC + : '\\' ( + '_' + | '~' + | '.' + | '-' + | '!' + | '$' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | ';' + | '=' + | '/' + | '?' + | '#' + | '@' + | '%' + ) + ; + +LC + : '#' ~[\r\n]+ -> channel(HIDDEN) + ; \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index bb1f83146..7c4a819b3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -10,9 +10,10 @@ */ public class IRIUtils { - private static final Pattern IRI_PATTERN = Pattern.compile("^(([\\w\\-]+:\\/\\/([\\w\\-_:]+\\.)*[\\w\\-_:]*)(\\/([\\w\\-\\._\\:]+\\/)*))([\\w\\-\\._\\:]+)?(\\?[\\w\\-_\\:\\?\\=]+)?((\\#)?([\\w\\-_]+))?$"); + private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?([\\w\\-_]+))?)?$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); + /** * Prevent instantiation of the utility class. */ @@ -29,15 +30,24 @@ public static String guessNamespace(String iri) { Matcher matcher = IRI_PATTERN.matcher(iri); if(matcher.matches()) { - if((matcher.group(8) == null) || (matcher.group(6) == null && matcher.group(9) == null) ) { // If the IRI has no fragment or ends with a slash - - return matcher.group(1); - } else { - // 1: Domain and path ending with a slash, 6: final path element without slash, 9: final # if there is a fragment - return matcher.group(1) + matcher.group(6) + matcher.group(9); + if(matcher.group("protocol") != null && matcher.group("protocol").equals("_")) { + return ""; + } + StringBuilder namespace = new StringBuilder(); + namespace.append(matcher.group("protocol")).append(":"); + if(matcher.group("dblSlashes") != null) { + namespace.append(matcher.group("dblSlashes")); } + namespace.append(matcher.group("domain")); + if(matcher.group("path") != null) { + namespace.append(matcher.group("path")); + } + if(matcher.group("fragment") != null && matcher.group("finalPath") != null) { + namespace.append(matcher.group("finalPath")).append("#"); + } + return namespace.toString(); } else { - return ""; + throw new IllegalStateException("No namespace found for the given IRI: " + iri + "."); } } catch (IllegalStateException e) { return ""; @@ -54,10 +64,10 @@ public static String guessLocalName(String iri) { Matcher matcher = IRI_PATTERN.matcher(iri); if(matcher.matches()) { - if(matcher.group(10) != null){ // If the IRI has a fragment - return matcher.group(10); - } else if(matcher.group(6) != null ) { // If the IRI has no fragment but do not ends with a slash - return matcher.group(6); + if(matcher.group("fragment") != null){ // If the IRI has a fragment + return matcher.group("fragment"); + } else if(matcher.group("finalPath") != null ) { // If the IRI has no fragment but do not ends with a slash + return matcher.group("finalPath"); } else { // If the URI ends with a slash return ""; } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java new file mode 100644 index 000000000..cddf9168c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java @@ -0,0 +1,96 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; + +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.parser.antlr.TriGLexer; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; + +/** + * An ANTLR4-based parser for Trig format. + * This parser uses an ANTLR grammar to tokenize and parse Trig documents, + * then a listener to build the RDF model. + */ +public class ANTLRTrigParser extends AbstractRDFParser { + + /** + * Constructor for the ANTLRTrigParser. + * + * @param model The RDF model to populate. + * @param factory The ValueFactory for creating RDF resources. + */ + public ANTLRTrigParser(Model model, ValueFactory factory) { super(model, factory); } + + /** + * Constructor for the ANTLRTrigParser with configuration options. + * + * @param model The RDF model to populate. + * @param factory The ValueFactory for creating RDF resources. + * @param config The configuration options for parsing. + */ + public ANTLRTrigParser(Model model, ValueFactory factory, IOOptions config) {super(model, factory, config);} + + @Override + public RDFFormat getRDFFormat() { + return RDFFormat.TRIG; + } + + @Override + public void setConfig(IOOptions config) {} + + @Override + public void parse(InputStream in) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); + } + + @Override + public void parse(InputStream in, String baseURI) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURI); + } + + @Override + public void parse(Reader reader) throws ParsingErrorException { + parse(reader, null); + } + + /** + * Parses Trig data from a Reader using ANTLR4. + * + * @param reader The Reader to read RDF data from. + * @param baseURI The base URI. + * @throws ParsingErrorException if a parsing or I/O error occurs. + */ + @Override + public void parse(Reader reader, String baseURI) throws ParsingErrorException { + try { + CharStream charStream = CharStreams.fromReader(reader); + TriGLexer triGLexer = new TriGLexer(charStream); + CommonTokenStream tokens = new CommonTokenStream(triGLexer); + TriGParser triGParser = new TriGParser(tokens); + ParseTreeWalker walker = new ParseTreeWalker(); + ParseTree tree = triGParser.trigDoc(); + TriGListerner listerner = new TriGListerner(getModel(), getValueFactory(), this.getConfig()); + walker.walk((ParseTreeListener) listerner, tree); + } catch (IOException e) { + throw new ParsingErrorException("Failed to parse TriG RDF: " + e.getMessage(), e); + } catch (Exception e) { + throw new ParsingErrorException("Unexpected error during TriG parsing: " + e.getMessage(), e); + } + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java new file mode 100644 index 000000000..a7f199392 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java @@ -0,0 +1,253 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.parser.antlr.TriGBaseListener; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import fr.inria.corese.core.next.api.ValueFactory; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Listener for the ANTLR4 generated parser for TriG. + * This listener traverses the parse tree and builds the RDF model, + * supporting named graphs. It includes unescaping logic for URIs and literals. + */ +public class TriGListerner extends TriGBaseListener { + private final Model model; + private String baseURI; + private final Map prefixMap = new HashMap<>(); + private final ValueFactory factory; + + private Resource currentSubject; + private IRI currentPredicate; + private Resource currentGraph; + + + /** + * Constructor for the TriGListerner. + * + * @param model The RDF model to populate. + * @param factory The ValueFactory for creating RDF resources. + * @param options IOOptions for configuration (if any). + */ + public TriGListerner(Model model, ValueFactory factory, IOOptions options) { + this.model = model; + this.baseURI = baseURI != null ? baseURI : ""; + if (options != null && options instanceof RDFParserBaseIRIOptions); + this.factory = factory; + } + + @Override + public void exitPrefixID(TriGParser.PrefixIDContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + String iri = ctx.IRIREF().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + iri = iri.substring(1, iri.length() - 1); + prefixMap.put(prefix, iri); + model.setNamespace(prefix, iri); + } + + @Override + public void exitSparqlBase(TriGParser.SparqlBaseContext ctx) { + baseURI = ctx.IRIREF().getText().replaceAll("^<|>$", ""); + } + + @Override + public void enterBlock(TriGParser.BlockContext ctx) { + currentGraph = ctx.Graph_w() != null && ctx.labelOrSubject() != null + ? extractLabelOrSubject(ctx.labelOrSubject()) + : null; + } + + @Override + public void exitBlock(TriGParser.BlockContext ctx) { + currentGraph = null; + } + + @Override + public void enterTriplesOrGraph(TriGParser.TriplesOrGraphContext ctx) { + if (ctx.labelOrSubject() != null && ctx.predicateObjectList() != null) { + currentSubject = extractLabelOrSubject(ctx.labelOrSubject()); + processPredicateObjectList(ctx.predicateObjectList()); + } + } + + @Override + public void enterTriples(TriGParser.TriplesContext ctx) { + currentSubject = extractSubject(ctx.subject()); + processPredicateObjectList(ctx.predicateObjectList()); + } + + /** + * Processes a PredicateObjectList context, extracting verbs and corresponding object lists, + * and adding triples to the model for the current subject and graph. + * + * @param ctx the predicate-object list context from the parser + */ + private void processPredicateObjectList(TriGParser.PredicateObjectListContext ctx) { + List verbs = ctx.verb(); + List objLists = ctx.objectList(); + + for (int i = 0; i < verbs.size(); i++) { + currentPredicate = extractVerb(verbs.get(i)); + List objects = objLists.get(i).object(); + for (TriGParser.ObjectContext objCtx : objects) { + Value object = extractObject(objCtx); + model.add(currentSubject, currentPredicate, object, currentGraph); + } + } + } + + /** + * Extracts an RDF object from the ObjectContext. + * Supports IRIs, blank nodes, literals, and inline blank node property lists. + * + * @param ctx the object context + * @return the extracted RDF Value + */ + private Value extractObject(TriGParser.ObjectContext ctx) { + if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); + if (ctx.blank() != null) return extractBlank(ctx.blank()); + if (ctx.literal() != null) return extractLiteral(ctx.literal()); + if (ctx.blankNodePropertyList() != null) return processBlankNodePropertyList(ctx.blankNodePropertyList()); + throw new RuntimeException("Unsupported object: " + ctx.getText()); + } + + /** + * Processes an inline blank node with its property list, returning the blank node as a Resource. + * Temporarily updates the current subject to the new blank node during processing. + * + * @param ctx the blank node property list context + * @return the new blank node resource + */ + private Resource processBlankNodePropertyList(TriGParser.BlankNodePropertyListContext ctx) { + Resource bnode = factory.createBNode(); + Resource savedSubject = currentSubject; + currentSubject = bnode; + processPredicateObjectList(ctx.predicateObjectList()); + currentSubject = savedSubject; + return bnode; + } + + /** + * Extracts a subject from a SubjectContext, which can be an IRI or a blank node. + * + * @param ctx the subject context + * @return the extracted subject as a Resource + */ + private Resource extractSubject(TriGParser.SubjectContext ctx) { + if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); + if (ctx.blank() != null) return extractBlank(ctx.blank()); + throw new RuntimeException("Unsupported subject: " + ctx.getText()); + } + + /** + * Extracts a blank node from a BlankContext, supporting labeled (_:b) and anonymous ([]) forms. + * + * @param ctx the blank context + * @return the blank node as a Resource + */ + private Resource extractBlank(TriGParser.BlankContext ctx) { + TriGParser.BlankNodeContext node = ctx.blankNode(); + if (node != null) { + if (node.BLANK_NODE_LABEL() != null) + return factory.createBNode(node.BLANK_NODE_LABEL().getText()); + if (node.ANON() != null) + return factory.createBNode(); + } + throw new RuntimeException("Unsupported blank node structure: " + ctx.getText()); + } + + /** + * Extracts a graph label or subject from a LabelOrSubjectContext. + * Supports IRI and blank node. + * + * @param ctx the label or subject context + * @return the extracted resource + */ + private Resource extractLabelOrSubject(TriGParser.LabelOrSubjectContext ctx) { + if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); + if (ctx.blankNode() != null) return factory.createBNode(ctx.blankNode().getText()); + throw new RuntimeException("Unsupported labelOrSubject: " + ctx.getText()); + } + + /** + * Extracts a predicate IRI from a VerbContext. + * Handles the special keyword 'a' as rdf:type. + * + * @param ctx the verb context + * @return the extracted IRI + */ + private IRI extractVerb(TriGParser.VerbContext ctx) { + return factory.createIRI(resolveIRI(ctx.getText())); + } + + /** + * Extracts a Literal from a LiteralContext, handling typed, language-tagged, boolean, and numeric literals. + * + * @param ctx the literal context + * @return the extracted Literal + */ + private Literal extractLiteral(TriGParser.LiteralContext ctx) { + if (ctx.rDFLiteral() != null) { + String label = stripQuotes(ctx.rDFLiteral().string().getText()); + if (ctx.rDFLiteral().LANGTAG() != null) + return factory.createLiteral(label, ctx.rDFLiteral().LANGTAG().getText().substring(1)); + if (ctx.rDFLiteral().iri() != null) + return factory.createLiteral(label, factory.createIRI(resolveIRI(ctx.rDFLiteral().iri().getText()))); + return factory.createLiteral(label); + } + if (ctx.BooleanLiteral() != null) + return factory.createLiteral(ctx.BooleanLiteral().getText(), XSD.BOOLEAN.getIRI()); + if (ctx.numericLiteral() != null) { + if (ctx.numericLiteral().INTEGER() != null) + return factory.createLiteral(ctx.numericLiteral().INTEGER().getText(), XSD.INTEGER.getIRI()); + if (ctx.numericLiteral().DECIMAL() != null) + return factory.createLiteral(ctx.numericLiteral().DECIMAL().getText(), XSD.DECIMAL.getIRI()); + if (ctx.numericLiteral().DOUBLE() != null) + return factory.createLiteral(ctx.numericLiteral().DOUBLE().getText(), XSD.DOUBLE.getIRI()); + } + throw new RuntimeException("Unsupported literal: " + ctx.getText()); + } + + /** + * Resolves an IRI or QName into a full URI string. + * Handles full IRIs in angle brackets, QNames using prefixes, and special case "a". + * + * @param raw the raw string + * @return the resolved URI string + */ + private String resolveIRI(String raw) { + raw = raw.trim(); + if (raw.startsWith("<") && raw.endsWith(">")) return raw.substring(1, raw.length() - 1); + if (raw.equals("a")) return RDF.type.getIRI().stringValue(); + if (raw.contains(":")) { + String[] parts = raw.split(":", 2); + String ns = prefixMap.get(parts[0]); + if (ns != null) return ns + parts[1]; + throw new IllegalArgumentException("Undeclared prefix: " + parts[0]); + } + return baseURI + raw; + } + + /** + * Strips surrounding quotes from a string literal, including single, double, and multi-line forms. + * + * @param text the quoted string + * @return the unquoted string + */ + private String stripQuotes(String text) { + if (text == null || text.length() < 2) return text; + if ((text.startsWith("\"") && text.endsWith("\"")) || + (text.startsWith("\"\"\"") && text.endsWith("\"\"\"")) || + (text.startsWith("'''") && text.endsWith("'''"))) { + return text.substring(1, text.length() - 1); + } + return text; + } +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java new file mode 100644 index 000000000..8a9cc7bec --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java @@ -0,0 +1,183 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Value; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Unit tests for the ANTLRTrigParser class. + * These tests verify the parser's ability to correctly parse Trig + * and interact with the Model and ValueFactory, including error handling + * and unescaping of IRIs and literals, and named graphs. + */ +class ANTLRTrigParserTest { + + private static final Logger logger = LoggerFactory.getLogger(ANTLRTrigParserTest.class); + + /** + * helper method to parse trig data into corese model + * + * @param trigData a string of rdf data in trig format + * @param baseURI the base uri + * @return Corese rdf model + * @throws Exception + */ + + private Model parseFromString(String trigData, String baseURI) throws Exception { + Model model = new CoreseModel(); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser parser = new ANTLRTrigParser(model, factory); + parser.parse(new StringReader(trigData), baseURI); + return model; + } + + /** + * Helper method to print the model. + * @param model + */ + private void printModel(Model model) { + model.stream().forEach(stmt -> { + Value obj = stmt.getObject(); + String subjectString = stmt.getSubject().stringValue(); + String predicateString = stmt.getPredicate().stringValue(); + + if (obj instanceof Literal literal) { + String label = String.valueOf(literal.getLabel()); + String languageTag = literal.getLanguage().orElse(null); + + if (languageTag != null) { + logger.debug("({}, {}, \"{}\"@{})", + subjectString, + predicateString, + label, + languageTag); + } else { + logger.debug("({}, {}, \"{}\")", + subjectString, + predicateString, + label); + } + } else { + logger.debug("({}, {}, {})", + subjectString, + predicateString, + obj.stringValue()); + } + }); + } + + @Test + void testNamedGraphParsing() throws Exception { + String trig = """ + @prefix ex: + ex:Graph1 { + ex:Alice ex:knows ex:Bob . + }""".trim(); + + Model model = parseFromString(trig, null); + printModel(model); + assertEquals(1, model.size()); + + assertEquals(1, model.getNamespaces().size()); + + assertEquals(1, model.contexts().size()); + } + + @Test + void testDocumentThatContainsOneGraphExample1() throws Exception { + String trig = """ + # This document encodes one graph. + @prefix ex: . + @prefix : . + + :G1 { :Monica a ex:Person ; + ex:name "Monica Murphy" ; + ex:homepage ; + ex:email ; + ex:hasSkill ex:Management , + ex:Programming . } + """.trim(); + + Model model = parseFromString(trig, null); + printModel(model); + + assertEquals(6, model.size()); + + assertEquals(2, model.getNamespaces().size()); + + assertEquals(1, model.contexts().size()); + } + + @Test + void testDocumentThatContainsTwoGraphExample() throws Exception { + String trig = """ + # This document contains a same data as the + # previous example. + + @prefix rdf: . + @prefix dc: . + @prefix foaf: . + + # default graph - no {} used. + dc:publisher "Bob" . + dc:publisher "Alice" . + + # GRAPH keyword to highlight a named graph + # Abbreviation of triples using ; + GRAPH + { + [] foaf:name "Bob" ; + foaf:mbox ; + foaf:knows _:b . + } + + GRAPH + { + _:b foaf:name "Alice" ; + foaf:mbox + } + """.trim(); + + Model model = parseFromString(trig, null); + printModel(model); + + assertEquals(7, model.size()); + + assertEquals(3, model.getNamespaces().size()); + + assertEquals(3, model.contexts().size()); + } + + @Test + void testNestedBlankNodesWithSharedIdentifiers() throws Exception { + String trig = """ + @prefix ex: . + + GRAPH ex:graph1 { + ex:Alice ex:knows [ + ex:name "Bob" ; + ex:knows [ + ex:name "Charlie" + ] + ] ; + ex:email "alice@example.org" . + } + """.trim(); + Model model = parseFromString(trig, null); + printModel(model); + + assertEquals(5, model.size()); + + } +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java new file mode 100644 index 000000000..869f5ff5f --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java @@ -0,0 +1,102 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.parser.antlr.TriGLexer; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.junit.jupiter.api.Test; + +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the TriGListenerImpl class. + * These tests verify that the listener correctly processes ANTLR parse tree contexts + * to extract and unescape RDF terms (IRIs, Blank Nodes, Literals) and add them to the model. + */ +class TriGListenerImplTest { + private Model parseTrig(String trigData) throws Exception { + ValueFactory factory = new CoreseAdaptedValueFactory(); + + CharStream input = CharStreams.fromReader(new StringReader(trigData)); + TriGLexer lexer = new TriGLexer(input); + CommonTokenStream tokens = new CommonTokenStream(lexer); + TriGParser parser = new TriGParser(tokens); + ParseTree tree = parser.trigDoc(); + + Model model = new CoreseModel(); + TriGListerner listener = new TriGListerner(model, factory, null); + ParseTreeWalker.DEFAULT.walk(listener, tree); + + return model; + } + + @Test + void testSimpleNamedGraph() throws Exception { + String trig = """ + @prefix ex: . + + GRAPH ex:graph { + ex:subject ex:predicate "Hello" . + } + """; + + Model model = parseTrig(trig); + assertEquals(1, model.size()); + assertEquals(1, model.contexts().size()); + } + + @Test + void testBlankNodeWithProperties() throws Exception { + String trig = """ + @prefix ex: . + GRAPH ex:graph { + ex:Bob ex:knows [ ex:name "Charlie" ] . + } + """; + + Model model = parseTrig(trig); + assertEquals(2, model.size()); + } + + @Test + void testMultipleGraphsAndBase() throws Exception { + String trig = """ + @base . + @prefix dc: . + @prefix ex: . + + dc:creator "Bob" . + + GRAPH ex:other { + dc:creator "Alice" . + } + """; + + Model model = parseTrig(trig); + assertEquals(2, model.contexts().size()); + assertEquals(2, model.size()); + } + + @Test + void testTypedLiteralsAndLang() throws Exception { + String trig = """ + @prefix ex: . + @prefix xsd: . + + ex:subject ex:age "30"^^xsd:integer ; + ex:name "Jean"@fr . + """; + + Model model = parseTrig(trig); + assertEquals(2, model.size()); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java index 9519cd4f0..77bfc928c 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java @@ -30,6 +30,21 @@ public void constructorStringTest() { assertEquals("test", coreseIRI.getLocalName()); } + @Test + public void constructorStringTest_otherURIS() { + CoreseIRI coreseIRI_noSlash = new CoreseIRI("http://www.monicamurphy.org"); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.stringValue()); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.getCoreseNode().getLabel()); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.getNamespace()); + assertEquals("", coreseIRI_noSlash.getLocalName()); + + CoreseIRI coreseIRI_email = new CoreseIRI("mailto:monica@monicamurphy.org"); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.stringValue()); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.getCoreseNode().getLabel()); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.getNamespace()); + assertEquals("", coreseIRI_email.getLocalName()); + } + @Test public void constructorIriTest() { CoreseIRI coreseIRI = new CoreseIRI("http://example.org/test");