From a24d3da7b80786726217318859149f3799c66a2c Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 4 Jun 2025 17:02:52 +0200 Subject: [PATCH 1/6] RDF formats --- .../core/next/api/base/parser/RDFFormat.java | 114 ++++++++ .../core/next/api/base/parser/RDFFormats.java | 255 ++++++++++++++++++ .../core/next/api/base/parser/RDFParser.java | 9 + .../api/base/parser/RDFParserFactory.java | 6 + .../impl/exception/ParsingErrorException.java | 18 ++ 5 files changed, 402 insertions(+) create mode 100644 src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java create mode 100644 src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java create mode 100644 src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java new file mode 100644 index 000000000..4f44df5c5 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java @@ -0,0 +1,114 @@ +package main.java.fr.inria.corese.core.next.api.base.parser; + +import java.nio.charset.Charset; +import java.util.*; +import fr.inria.corese.core.next.api.IRI; + +public interface RDFFormat { + + /** + * Gets the name of this file format. + * + * @return A human-readable format name, e.g. "PLAIN TEXT". + */ + String getName(); + + + /** + * Gets the default MIME type for this file format. + * + * @return A MIME type string, e.g. "text/plain". + */ + String getDefaultMIMEType() ; + + + /** + * Checks if the specified MIME type matches the FileFormat's default MIME type. The MIME types are compared + * ignoring upper/lower-case differences. + * + * @param mimeType The MIME type to compare to the FileFormat's default MIME type. + * @return true if the specified MIME type matches the FileFormat's default MIME type. + */ + boolean hasDefaultMIMEType(String mimeType); + + /** + * Gets the file format's MIME types. + * + * @return An unmodifiable list of MIME type strings, e.g. "text/plain". + */ + List getMIMETypes(); + + + + /** + * Checks if specified MIME type matches one of the FileFormat's MIME types. The MIME types are compared ignoring + * upper/lower-case differences. + * + * @param mimeType The MIME type to compare to the FileFormat's MIME types. + * @return true if the specified MIME type matches one of the FileFormat's MIME types. + */ + boolean hasMIMEType(String mimeType); + + /** + * Gets the default file name extension for this file format. + * + * @return A file name extension (excluding the dot), e.g. "txt", or null if there is no common file + * extension for the format. + */ + String getDefaultFileExtension(); + + /** + * Checks if the specified file name extension matches the FileFormat's default file name extension. The file name + * extension MIME types are compared ignoring upper/lower-case differences. + * + * @param extension The file extension to compare to the FileFormat's file extension. + * @return true if the file format has a default file name extension and if it matches the specified + * extension, false otherwise. + */ + boolean hasDefaultFileExtension(String extension); + + /** + * Gets the file format's file extensions. + * + * @return An unmodifiable list of file extension strings, e.g. "txt". + */ + List getFileExtensions(); + + /** + * Checks if the FileFormat's file extension is equal to the specified file extension. The file extensions are + * compared ignoring upper/lower-case differences. + * + * @param extension The file extension to compare to the FileFormat's file extension. + * @return true if the specified file extension is equal to the FileFormat's file extension. + */ + boolean hasFileExtension(String extension); + + /** + * Get the (default) charset for this file format. + * + * @return the (default) charset for this file format, or null if this format does not have a default charset. + */ + Charset getCharset(); + + /** + * Checks if the FileFormat has a (default) charset. + * + * @return true if the FileFormat has a (default) charset. + */ + boolean hasCharset(); + + /** + * Return true if the RDFFormat supports the encoding of namespace/prefix information. + */ + boolean supportsNamespaces(); + + /** + * Return true if the RDFFormat supports the encoding of contexts/named graphs. + */ + boolean supportsContexts(); + + /** + * Return true if the RDFFormat supports the encoding of RDF-star triples natively. + */ + boolean supportsRDFStar(); +} diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java new file mode 100644 index 000000000..3895d6d00 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java @@ -0,0 +1,255 @@ +package main.java.fr.inria.corese.core.next.api.base.parser; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; + +public enum RDFFormats implements RDFFormat { + + TURTLE("Turtle", + List.of("text/turtle"), + List.of("ttl"), + true, + false, + false), + N3("N3", + List.of("text/n3"), + List.of("n3"), + true, + false, + false), + RDF_XML("RDF/XML", + List.of("application/rdf+xml"), + List.of("rdf", "xml"), + true, + false, + false), + JSON_LD("JSON-LD", + List.of("application/ld+json"), + List.of("jsonld", "json"), + true, + true, + false), + N_TRIPLES("N-Triples", + List.of("application/n-triples"), + List.of("nt"), + false, + false, + false), + TRIG("TriG", + List.of("application/trig"), + List.of("trig"), + true, + true, + false), + NQUADS("N-Quads", + List.of("application/n-quads"), + List.of("nq"), + true, + true, + false); + + public static final boolean DEFAULT_SUPPORTS_NAMESPACES = true; + public static final boolean DEFAULT_SUPPORTS_CONTEXTS = true; + public static final boolean DEFAULT_SUPPORTS_RDF_STAR = false; + + /** + * The file format human-readable name. + */ + private final String name; + + /** + * The file format's MIME types. The first item in the list is interpreted as the default MIME type for the format. + */ + private final List mimeTypes; + + /** + * The file format's (default) charset. + */ + private final Charset charset; + + /** + * The file format's file extensions. The first item in the list is interpreted as the default file extension for + * the format. + */ + private final List fileExtensions; + + /** + * Flag indicating whether the RDFFormat can encode namespace information. + */ + private final boolean supportsNamespaces; + + /** + * Flag indicating whether the RDFFormat can encode context information (ex: Graphs or quads). + */ + private final boolean supportsContexts; + + /** + * Flag indicating whether the RDFFormat can encode RDF-star triples natively. + */ + private final boolean supportsRDFStar; + + RDFFormats(String name, + List mimeTypes, + Charset charset, + List fileExtensions, + boolean supportsNamespaces, + boolean supportsContexts, + boolean supportsRDFStar) { + this.name = name; + this.mimeTypes = mimeTypes; + this.charset = charset; + this.fileExtensions = fileExtensions; + this.supportsNamespaces = supportsNamespaces; + this.supportsContexts = supportsContexts; + this.supportsRDFStar = supportsRDFStar; + } + + RDFFormats(String name, + List mimeTypes, + Charset charset, + List fileExtensions) { + this(name, mimeTypes, charset, fileExtensions, DEFAULT_SUPPORTS_NAMESPACES, DEFAULT_SUPPORTS_CONTEXTS, DEFAULT_SUPPORTS_RDF_STAR); + } + + RDFFormats(String name, + List mimeTypes, + List fileExtensions) { + this(name, mimeTypes, StandardCharsets.UTF_8, fileExtensions, DEFAULT_SUPPORTS_NAMESPACES, DEFAULT_SUPPORTS_CONTEXTS, DEFAULT_SUPPORTS_RDF_STAR); + } + + RDFFormats(String name, + List mimeTypes, + List fileExtensions, + boolean supportsNamespaces, + boolean supportsContexts, + boolean supportsRDFStar) { + this(name, mimeTypes, StandardCharsets.UTF_8, fileExtensions, supportsNamespaces, supportsContexts, supportsRDFStar); + } + + @Override + public String getName() { + return name; + } + + @Override + public String getDefaultMIMEType() { + return mimeTypes.get(0); + } + + @Override + public boolean hasDefaultMIMEType(String mimeType) { + return getDefaultMIMEType().equalsIgnoreCase(mimeType); + } + + @Override + public List getMIMETypes() { + return Collections.unmodifiableList(mimeTypes); + } + + @Override + public boolean hasMIMEType(String mimeType) { + if (mimeType == null) { + return false; + } + String type = mimeType; + if (mimeType.indexOf(';') > 0) { + type = mimeType.substring(0, mimeType.indexOf(';')); + } + for (String mt : this.mimeTypes) { + if (mt.equalsIgnoreCase(mimeType)) { + return true; + } + if (mimeType != type && mt.equalsIgnoreCase(type)) { + return true; + } + } + + return false; + } + + @Override + public String getDefaultFileExtension() { + if (fileExtensions.isEmpty()) { + return null; + } else { + return fileExtensions.get(0); + } + } + + @Override + public boolean hasDefaultFileExtension(String extension) { + String ext = getDefaultFileExtension(); + return ext != null && ext.equalsIgnoreCase(extension); + } + + @Override + public List getFileExtensions() { + return Collections.unmodifiableList(fileExtensions); + } + + @Override + public boolean hasFileExtension(String extension) { + for (String ext : fileExtensions) { + if (ext.equalsIgnoreCase(extension)) { + return true; + } + } + + return false; + } + + @Override + public Charset getCharset() { + return charset; + } + + @Override + public boolean hasCharset() { + return charset != null; + } + + @Override + public boolean supportsNamespaces() { + return supportsNamespaces; + } + + @Override + public boolean supportsContexts() { + return supportsContexts; + } + + @Override + public boolean supportsRDFStar() { + return supportsRDFStar; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(64); + + sb.append(name); + + sb.append(" (mimeTypes="); + for (int i = 0; i < mimeTypes.size(); i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(mimeTypes.get(i)); + } + + sb.append("; ext="); + for (int i = 0; i < fileExtensions.size(); i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(fileExtensions.get(i)); + } + + sb.append(")"); + + return sb.toString(); + } + +} diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java new file mode 100644 index 000000000..f619b1430 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java @@ -0,0 +1,9 @@ +package main.java.fr.inria.corese.core.next.api.base.parser; + +public interface RDFParser { + + /** + * Gets the RDF format that this parser can parse. + */ + RDFFormat getRDFFormat(); +} diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java new file mode 100644 index 000000000..52baf8603 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java @@ -0,0 +1,6 @@ +package main.java.fr.inria.corese.core.next.api.base.parser; + +public interface RDFParserFactory { + + +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java b/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java new file mode 100644 index 000000000..786a062bd --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java @@ -0,0 +1,18 @@ +package main.java.fr.inria.corese.core.next.impl.exception; + +public class ParsingErrorException extends RuntimeException { + + private static final long serialVersionUID = -2053549958572141648L; + + public ParsingErrorException(String message) { + super(message); + } + + public ParsingErrorException(String message, Throwable cause) { + super(message, cause); + } + + public ParsingErrorException(Throwable cause) { + super(cause); + } +} From 46eccb8992f6384089c8b1e6189cc921b2e4b3fa Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 5 Jun 2025 11:31:16 +0200 Subject: [PATCH 2/6] Barebone RDFParser Factory --- .../core/next/api/base/parser/RDFFormat.java | 2 +- .../core/next/api/base/parser/RDFFormats.java | 8 ++++---- .../core/next/api/base/parser/RDFParser.java | 2 +- .../api/base/parser/RDFParserFactory.java | 12 +++++++++++- .../impl/exception/ParsingErrorException.java | 2 +- .../UnsupportedFileFormatException.java | 19 +++++++++++++++++++ 6 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/exception/UnsupportedFileFormatException.java diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java index 4f44df5c5..379fba182 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormat.java @@ -1,4 +1,4 @@ -package main.java.fr.inria.corese.core.next.api.base.parser; +package fr.inria.corese.core.next.api.base.parser; import java.nio.charset.Charset; import java.util.*; diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java index 3895d6d00..d8d99d8b0 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFFormats.java @@ -1,4 +1,4 @@ -package main.java.fr.inria.corese.core.next.api.base.parser; +package fr.inria.corese.core.next.api.base.parser; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -50,9 +50,9 @@ public enum RDFFormats implements RDFFormat { true, false); - public static final boolean DEFAULT_SUPPORTS_NAMESPACES = true; - public static final boolean DEFAULT_SUPPORTS_CONTEXTS = true; - public static final boolean DEFAULT_SUPPORTS_RDF_STAR = false; + private static final boolean DEFAULT_SUPPORTS_NAMESPACES = true; + private static final boolean DEFAULT_SUPPORTS_CONTEXTS = true; + private static final boolean DEFAULT_SUPPORTS_RDF_STAR = false; /** * The file format human-readable name. diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java index f619b1430..301ed6211 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java @@ -1,4 +1,4 @@ -package main.java.fr.inria.corese.core.next.api.base.parser; +package fr.inria.corese.core.next.api.base.parser; public interface RDFParser { diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java index 52baf8603..950de673e 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java @@ -1,6 +1,16 @@ -package main.java.fr.inria.corese.core.next.api.base.parser; +package fr.inria.corese.core.next.api.base.parser; + +import fr.inria.corese.core.next.api.Model; public interface RDFParserFactory { + /** + * Creates a new RDF parser for the specified format and model. + * + * @param format The RDF format to use for parsing. + * @param model The model to which the parsed data will be added. + * @return A new instance of an RDF parser for the specified format and model. + */ + RDFParser createRDFParser(RDFFormat format, Model model); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java b/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java index 786a062bd..cc6bf4eee 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java +++ b/src/main/java/fr/inria/corese/core/next/impl/exception/ParsingErrorException.java @@ -1,4 +1,4 @@ -package main.java.fr.inria.corese.core.next.impl.exception; +package fr.inria.corese.core.next.impl.exception; public class ParsingErrorException extends RuntimeException { diff --git a/src/main/java/fr/inria/corese/core/next/impl/exception/UnsupportedFileFormatException.java b/src/main/java/fr/inria/corese/core/next/impl/exception/UnsupportedFileFormatException.java new file mode 100644 index 000000000..b44687d51 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/exception/UnsupportedFileFormatException.java @@ -0,0 +1,19 @@ +package fr.inria.corese.core.next.impl.exception; + +public class UnsupportedFileFormatException extends Exception { + + private static final long serialVersionUID = 7963163989802143570L; + + public UnsupportedFileFormatException(String message) { + super(message); + } + + public UnsupportedFileFormatException(String message, Throwable cause) { + super(message, cause); + } + + public UnsupportedFileFormatException(Throwable cause) { + super(cause); + } + +} From 2b19ee8ce4ea7737a109811f439a16153dcfaf42 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 5 Jun 2025 11:40:16 +0200 Subject: [PATCH 3/6] barebone parser interface --- .../core/next/api/base/parser/RDFParser.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java index 301ed6211..cf8da275c 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParser.java @@ -1,9 +1,42 @@ package fr.inria.corese.core.next.api.base.parser; +import java.io.InputStream; +import java.io.Reader; + public interface RDFParser { /** * Gets the RDF format that this parser can parse. */ RDFFormat getRDFFormat(); + + /** + * Parses RDF data from the specified InputStream or Reader and adds it to the model. + * + * @param in The InputStream to read RDF data from. + */ + void parse(InputStream in); + + /** + * Parses RDF data from the specified InputStream or Reader and adds it to the model. + * + * @param in The InputStream to read RDF data from. + * @param baseURI The base URI for resolving relative URIs in the RDF data. + */ + void parse(InputStream in, String baseURI); + + /** + * Parses RDF data from the specified InputStream or Reader and adds it to the model. + * + * @param reader The Reader to read RDF data from. + */ + void parse(Reader reader); + + /** + * Parses RDF data from the specified InputStream or Reader and adds it to the model. + * + * @param reader The Reader to read RDF data from. + * @param baseURI The base URI for resolving relative URIs in the RDF data. + */ + void parse(Reader reader, String baseURI); } From b7f8e7fb7d951bf9babd7b10445de94b202d5277 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 20 Jun 2025 13:43:56 +0200 Subject: [PATCH 4/6] TurtleParser and turtle g4 --- .../core/next/impl/parser/turtle/Turtle.g4 | 324 ++++++++++++++++++ .../next/impl/parser/turtle/TurtleParser.java | 46 +++ .../parser/turtle/TurtleParserFactory.java | 18 + 3 files changed, 388 insertions(+) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/parser/turtle/Turtle.g4 create mode 100644 src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/Turtle.g4 b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/Turtle.g4 new file mode 100644 index 000000000..a51350b3e --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/Turtle.g4 @@ -0,0 +1,324 @@ +/* + [The "BSD licence"] + Copyright (c) 2014, Alejandro Medrano (@ Universidad Politecnica de Madrid, http://www.upm.es/) + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +*/ +/* Derived from http://www.w3.org/TR/turtle/#sec-grammar-grammar */ + +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +grammar Turtle; + +turtleDoc + : statement* EOF + ; + +statement + : directive + | triples '.' + ; + +directive + : prefixID + | base + | sparqlPrefix + | sparqlBase + ; + +triples + : subject predicateObjectList + | blankNodePropertyList predicateObjectList? + ; + +predicateObjectList + : verb objectList (';' (verb objectList)?)* + ; + +objectList + : object_ (',' object_)* + ; + +verb + : predicate + | 'a' + ; + +subject + : iri + | BlankNode + | collection + ; + +predicate + : iri + ; + +object_ + : iri + | BlankNode + | collection + | blankNodePropertyList + | literal + ; + +literal + : rdfLiteral + | numericLiteral + | BooleanLiteral + ; + +blankNodePropertyList + : '[' predicateObjectList ']' + ; + +collection + : '(' object_* ')' + ; + +prefixID + : '@prefix' PNAME_NS IRIREF '.' + ; + +base + : '@base' IRIREF '.' + ; + +sparqlBase + : Base_w IRIREF + ; + +sparqlPrefix + : Prefix_w PNAME_NS IRIREF + ; + +numericLiteral + : INTEGER + | DECIMAL + | DOUBLE + ; + +rdfLiteral + : string (LANGTAG | '^^' iri)? + ; + +BooleanLiteral + : 'true' + | 'false' + ; + +string + : STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE + | STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE + ; + +iri + : IRIREF + | PrefixedName + ; + +BlankNode + : BLANK_NODE_LABEL + | ANON + ; + +WS + : ([\t\r\n\u000C] | ' ')+ -> skip + ; + +// LEXER + +Base_w options { caseInsensitive=true; } + : 'BASE' + ; + +Prefix_w options { caseInsensitive=true; } + : 'PREFIX' + ; + +// PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? +// Prefix without the final ':' +PN_PREFIX + : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? + ; + +IRIREF + : '<' ((~( '\u0000' | '\u0020' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' |'\\' )) | UCHAR)* '>' + ; + +// Prefix alone +PNAME_NS + : PN_PREFIX? ':' + ; + +PrefixedName + : PNAME_LN + | PNAME_NS + ; + +// Prefix + local name +PNAME_LN + : PNAME_NS PN_LOCAL + ; + +BLANK_NODE_LABEL + : '_:' (PN_CHARS_U | '0' .. '9') ((PN_CHARS | '.')* PN_CHARS)? + ; + +LANGTAG + : '@' ('a'.. 'z' | 'A' .. 'Z')+ ('-' ('a'.. 'z' | 'A' .. 'Z' | '0' .. '9')* )* + ; + +INTEGER + : ('+' | '-' )? ('0' .. '9')+ + ; + +DECIMAL + : ('+' | '-' )? ('0' .. '9')* '.' ('0' .. '9')+ + ; + +DOUBLE + : ('+' | '-' )? (('0' .. '9')+ '.' ('0' .. '9')* EXPONENT + | '.' ('0' .. '9')+ EXPONENT + | ('0' .. '9')+ EXPONENT) + ; + +EXPONENT + : ('e' | 'E') ('+' | '-' )? ('0' .. '9')+ + ; + +// "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''" +STRING_LITERAL_LONG_SINGLE_QUOTE + : '\'\'\'' ( ('\'' '\''? )? ( [^'\\] | ECHAR | UCHAR | '"' ) )* '\'\'\'' + ; + +// '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' +STRING_LITERAL_LONG_QUOTE + : '"""' ( ('"' '"'? )? ( (~["\\]) | ECHAR | UCHAR )+ )* '"""' + ; + +STRING_LITERAL_QUOTE + : '"' (~ [\u0027\u005C\u000A\u000D] | ECHAR | UCHAR | '"')* '"' + ; + +STRING_LITERAL_SINGLE_QUOTE + : '\'' (~ [\u0027\u005C\u000A\u000D] | ECHAR | UCHAR | '\'')* '\'' + ; + +// Hexadecimal unicode character +UCHAR + : '\\u' HEX HEX HEX HEX + | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX + ; + +// Escaped character +ECHAR + : '\\' [tbnrf"'\\] + ; + +ANON_WS + : ' ' + | '\t' + | '\r' + | '\n' + ; + +ANON + : '[' ANON_WS* ']' + ; + +PN_CHARS_BASE + : 'A' .. 'Z' + | 'a' .. 'z' + | '\u00C0' .. '\u00D6' + | '\u00D8' .. '\u00F6' + | '\u00F8' .. '\u02FF' + | '\u0370' .. '\u037D' + | '\u037F' .. '\u1FFF' + | '\u200C' .. '\u200D' + | '\u2070' .. '\u218F' + | '\u2C00' .. '\u2FEF' + | '\u3001' .. '\uD7FF' + | '\uF900' .. '\uFDCF' + | '\uFDF0' .. '\uFFFD' +// | '\u10000' .. '\uEFFFF' + ; + +PN_CHARS_U + : PN_CHARS_BASE + | '_' + ; + +PN_CHARS + : PN_CHARS_U + | '-' + | [0-9] + | [\u00B7] + | [\u0300-\u036F] + | [\u203F-\u2040] + ; + +PN_LOCAL + : (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? + ; + +PLX + : PERCENT + | PN_LOCAL_ESC + ; + +PERCENT + : '%' HEX HEX + ; + +HEX + : [0-9] + | [A-F] + | [a-f] + ; + +PN_LOCAL_ESC + : '\\' ( + '_' + | '~' + | '.' + | '-' + | '!' + | '$' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | ';' + | '=' + | '/' + | '?' + | '#' + | '@' + | '%' + ) + ; + +LC + : '#' ~[\r\n]* -> channel(HIDDEN) + ; + diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java new file mode 100644 index 000000000..bae3779fa --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java @@ -0,0 +1,46 @@ +package fr.inria.corese.core.next.impl.parser.turtle; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.api.base.parser.RDFParser; + +import java.io.InputStream; +import java.io.Reader; + +public class TurtleParser implements RDFParser { + + private final Model model; + private final RDFFormat format = RDFFormats.TURTLE; + + public TurtleParser(Model model) { + this.model = model; + } + + + @Override + public RDFFormat getRDFFormat() { + return format; + } + + @Override + public void parse(InputStream in) { + + } + + @Override + public void parse(InputStream in, String baseURI) { + + } + + @Override + public void parse(Reader reader) { + + } + + @Override + public void parse(Reader reader, String baseURI) { + + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java new file mode 100644 index 000000000..69be4314f --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java @@ -0,0 +1,18 @@ +package fr.inria.corese.core.next.impl.parser.turtle; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.api.base.parser.RDFParserFactory; + +public class TurtleParserFactory implements RDFParserFactory { + + @Override + public RDFParser createRDFParser(RDFFormat format, Model model) { + if (!format.equals(RDFFormats.TURTLE)) { + throw new IllegalArgumentException("Unsupported format : " + format); + } + return new TurtleParser(model); + } +} From 1070c1b5cecc9bd9af7ff2312edcb00639be7e16 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Wed, 25 Jun 2025 17:01:43 +0200 Subject: [PATCH 5/6] TurtleParser and turtle g4 --- build.gradle.kts | 47 ++++- .../impl/parser/turtle => antlr}/Turtle.g4 | 0 .../impl/parser/turtle/ANTLRTurtleParser.java | 74 +++++++ .../parser/turtle/TurtleListenerImpl.java | 180 ++++++++++++++++++ .../parser/turtle/TurtleParserFactory.java | 2 +- .../parser/turtle/ANTLRTurtleParserSpec.java | 30 +++ .../parser/turtle/TurtleListenerImplSpec.java | 99 ++++++++++ 7 files changed, 427 insertions(+), 5 deletions(-) rename src/main/{java/fr/inria/corese/core/next/impl/parser/turtle => antlr}/Turtle.g4 (100%) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java diff --git a/build.gradle.kts b/build.gradle.kts index 5f8a27673..55fa51cbb 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -15,6 +15,7 @@ plugins { id("com.gradleup.shadow") version "8.3.7" id("org.sonarqube") version "6.1.0.5360" // SonarQube integration id("com.intershop.gradle.javacc") version "5.0.1" // JavaCC plugin for parsing JavaCC files + id("antlr") // Antlr plugin for generating parsers from grammar files } // SonarQube configuration @@ -69,11 +70,11 @@ object Meta { // Project description const val desc = "Corese is a Semantic Web Factory (triple store and SPARQL endpoint) implementing RDF, RDFS, SPARQL 1.1 Query and Update, Shacl. STTL. LDScript." const val githubRepo = "corese-stack/corese-core" - + // License information const val license = "CeCILL-C License" const val licenseUrl = "https://opensource.org/licenses/CeCILL-C" - + // Sonatype OSSRH publishing settings const val release = "https://oss.sonatype.org/service/local/staging/deploy/maven2/" const val snapshot = "https://oss.sonatype.org/content/repositories/snapshots/" @@ -113,6 +114,10 @@ dependencies { implementation("fr.inria.corese.org.semarglproject:semargl-rdfa:0.7.2") // RDFa parser (Semargl) implementation("com.github.jsonld-java:jsonld-java:0.13.4") // JSON-LD processing + // === Antlr dependencies === + antlr("org.antlr:antlr4:4.13.2") // Antlr for parsing (ANTLR 4) + implementation("org.antlr:antlr4-runtime:4.13.2") // Antlr runtime for parsing + // === HTTP and XML === implementation("org.glassfish.jersey.core:jersey-client:3.1.10") // HTTP client (Jersey) implementation("org.glassfish.jersey.inject:jersey-hk2:3.1.10") // Dependency injection for Jersey @@ -143,7 +148,7 @@ publishing { // Configure the publication to include JAR, sources, and Javadoc from(components["java"]) - // Configures version mapping to control how dependency versions are resolved + // Configures version mapping to control how dependency versions are resolved // for different usage contexts (API and runtime). versionMapping { // Defines version mapping for Java API usage. @@ -270,7 +275,7 @@ tasks.withType { tasks { shadowJar { this.archiveClassifier = "jar-with-dependencies" - } + } } // Configure Javadoc tasks to disable doclint warnings. @@ -321,3 +326,37 @@ tasks.withType().configureEach { tasks.withType().configureEach { dependsOn(tasks.withType()) } + +// === Antlr generated sources configuration === + +// Path where Antlr will generate sources +val generatedSourcesPath = "src/main/generated" + +// Add the generated sources directory to the main source set +sourceSets["main"].java.srcDir(file(generatedSourcesPath)) + +// Configure the Antlr task to generate parser code with specific arguments +tasks.named("generateGrammarSource") { + arguments.addAll(listOf("-visitor", "-long-messages", "-package", "fr.inria.corese.core.next.impl.parser.antlr")) + outputDirectory = file("$buildDir/generated-src/antlr/main") + outputs.dirs(outputDirectory) +} + +// Ensure Java compilation depends on Antlr code generation +tasks.named("compileJava") { + dependsOn("generateGrammarSource" /*, "copyAntlrGenerated" */) +} + +// Ensure sources JAR includes generated sources and depends on Antlr code generation +tasks.named("sourcesJar") { + dependsOn("generateGrammarSource" /*, "copyAntlrGenerated" */) + from(generatedSourcesPath) + includeEmptyDirs = false +} + +// Clean up generated sources on clean +tasks.clean { + doLast { + file(generatedSourcesPath).deleteRecursively() + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/Turtle.g4 b/src/main/antlr/Turtle.g4 similarity index 100% rename from src/main/java/fr/inria/corese/core/next/impl/parser/turtle/Turtle.g4 rename to src/main/antlr/Turtle.g4 diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java new file mode 100644 index 000000000..875d6c738 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java @@ -0,0 +1,74 @@ +package fr.inria.corese.core.next.impl.parser.turtle; + +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleLexer; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; + +public class ANTLRTurtleParser implements RDFParser { + + private final Model model; + private final RDFFormat format = RDFFormats.TURTLE; + + public ANTLRTurtleParser(Model model) { + this.model = model; + } + + @Override + public RDFFormat getRDFFormat() { + return format; + } + + @Override + public void parse(InputStream in) { + parse(new InputStreamReader(in), null); + } + + @Override + public void parse(InputStream in, String baseURI) { + parse(new InputStreamReader(in), baseURI); + } + + @Override + public void parse(Reader reader) { + parse(reader, null); + } + + /** + * We are using ANTLR4 lexer and parser + * @param reader The Reader to read RDF data from. + * @param baseURI The base URI for resolving relative URIs in the RDF data. + */ + @Override + public void parse(Reader reader, String baseURI) { + + try { + CharStream charStream = CharStreams.fromReader(reader); + TurtleLexer lexer = new TurtleLexer(charStream); + CommonTokenStream tokens = new CommonTokenStream(lexer); + TurtleParser parser = new TurtleParser(tokens); + ParseTreeWalker walker = new ParseTreeWalker(); + ParseTree tree = parser.turtleDoc(); + TurtleListenerImpl listener = new TurtleListenerImpl(model, baseURI); + + walker.walk((ParseTreeListener) listener, tree); + + } catch (IOException e) { + throw new RuntimeException("Failed to parse Turtle RDF", e); + } + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java new file mode 100644 index 000000000..8488f8968 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java @@ -0,0 +1,180 @@ +package fr.inria.corese.core.next.impl.parser.turtle; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleBaseListener; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseIRI; +import fr.inria.corese.core.next.impl.temp.ModelNamespace; +import fr.inria.corese.core.next.impl.temp.literal.CoreseBNode; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import java.util.HashMap; +import java.util.Map; + +public class TurtleListenerImpl extends TurtleBaseListener { + + private final Model model; + private String baseURI; + private final Map prefixMap = new HashMap<>(); + private final ValueFactory factory = new CoreseAdaptedValueFactory(); + + private Resource currentSubject; + private IRI currentPredicate; + + public TurtleListenerImpl(Model model, String baseURI) { + this.model = model; + this.baseURI = baseURI != null ? baseURI : ""; + } + + public void exitPrefixID(TurtleParser.PrefixIDContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + String iri = ctx.IRIREF().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + iri = iri.substring(1, iri.length() - 1); + prefixMap.put(prefix, iri); + + Namespace ns = new ModelNamespace(prefix, iri); + model.setNamespace(prefix, iri); + } + + public void exitSparqlBase(TurtleParser.SparqlBaseContext ctx) { + String iri = ctx.IRIREF().getText(); + baseURI = iri.substring(1, iri.length() - 1); + } + + public void enterTriples(TurtleParser.TriplesContext ctx) { + currentSubject = extractSubject(ctx.subject()); + } + + public void enterVerb(TurtleParser.VerbContext ctx) { + currentPredicate = extractVerb(ctx); + } + + public void exitObject_(TurtleParser.Object_Context ctx) { + Value object = extractObject(ctx); + model.add(currentSubject, currentPredicate, object); + } + + + @Override public void enterEveryRule(ParserRuleContext ctx) {} + + @Override public void exitEveryRule(ParserRuleContext ctx) { } + + @Override public void visitTerminal(TerminalNode node) { } + + @Override public void visitErrorNode(ErrorNode node) { } + + + private String resolveIRI(String raw) { + if (raw.startsWith("<") && raw.endsWith(">")) { + return raw.substring(1, raw.length() - 1); + } else if (raw.equals("a")) { + return RDF.type.getIRI().stringValue(); + } else if (raw.contains(":")) { + // Prefixed name (e.g., ex:predicate) + String[] parts = raw.split(":", 2); + String ns = prefixMap.get(parts[0]); + if (ns != null) { + return ns + parts[1]; + } else { + throw new IllegalArgumentException("Prefix not declared: " + parts[0]); + } + } else { + return baseURI + raw; + } + } + + private String stripQuotes(String text) { + if (text == null || text.length() < 2) return text; + if ((text.startsWith("\"") && text.endsWith("\"")) || + (text.startsWith("'''") && text.endsWith("'''")) || + (text.startsWith("\"\"\"") && text.endsWith("\"\"\""))) { + return text.substring(1, text.length() - 1); + } + return text; + } + + private Literal extractLiteral(TurtleParser.LiteralContext ctx) { + String label; + IRI datatype; + String lang; + + if (ctx.rdfLiteral() != null) { + if (ctx.rdfLiteral().iri() != null) { + datatype = factory.createIRI(resolveIRI(ctx.rdfLiteral().iri().getText())); + label = ctx.rdfLiteral().string().getText(); + return factory.createLiteral(stripQuotes(label), datatype); + } + if (ctx.rdfLiteral().LANGTAG() != null) { + lang = ctx.rdfLiteral().LANGTAG().getText().substring(1); + label = ctx.rdfLiteral().string().getText(); + return factory.createLiteral(stripQuotes(label), lang); + } + label = ctx.rdfLiteral().string().getText(); + return factory.createLiteral(stripQuotes(label)); + } + + if (ctx.BooleanLiteral() != null) { + label = ctx.BooleanLiteral().getText(); + datatype = XSD.BOOLEAN.getIRI(); + return factory.createLiteral(label, datatype); + } + if (ctx.numericLiteral() != null) { + if (ctx.numericLiteral().DECIMAL() != null) { + label = ctx.numericLiteral().DECIMAL().getText(); + datatype = XSD.DECIMAL.getIRI(); + return factory.createLiteral(label, datatype); + } + if (ctx.numericLiteral().DOUBLE() != null) { + label = ctx.numericLiteral().DOUBLE().getText(); + datatype = XSD.DOUBLE.getIRI(); + return factory.createLiteral(label, datatype); + } + if (ctx.numericLiteral().INTEGER() != null) { + label = ctx.numericLiteral().INTEGER().getText(); + datatype = XSD.INTEGER.getIRI(); + return factory.createLiteral(label, datatype); + } + } + throw new IllegalArgumentException("Unsupported literal type: " + ctx.getText()); + } + + private Value extractObject(TurtleParser.Object_Context ctx) { + if (ctx.iri() != null) { + return new CoreseIRI(resolveIRI(ctx.iri().getText())); + } + if (ctx.BlankNode() != null) { + return new CoreseBNode(ctx.BlankNode().getText()); + } + if (ctx.literal() != null) { + return extractLiteral(ctx.literal()); + } + throw new RuntimeException("Unsupported object: " + ctx.getText()); + } + + private Resource extractSubject(TurtleParser.SubjectContext ctx) { + if (ctx.iri() != null) { + return factory.createIRI(resolveIRI(ctx.iri().getText())); + } + if (ctx.BlankNode() != null) { + return factory.createBNode(ctx.BlankNode().getText()); + } + throw new RuntimeException("Unsupported subject: " + ctx.getText()); + } + + private IRI extractPredicate(TurtleParser.PredicateContext ctx) { + return factory.createIRI(resolveIRI(ctx.getText())); + } + + private IRI extractVerb(TurtleParser.VerbContext ctx) { + if (ctx.predicate() != null) { + return extractPredicate(ctx.predicate()); + } + else return factory.createIRI(resolveIRI(ctx.getText())); + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java index 69be4314f..c20045dab 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java @@ -13,6 +13,6 @@ public RDFParser createRDFParser(RDFFormat format, Model model) { if (!format.equals(RDFFormats.TURTLE)) { throw new IllegalArgumentException("Unsupported format : " + format); } - return new TurtleParser(model); + return new ANTLRTurtleParser(model); } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java b/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java new file mode 100644 index 000000000..3f8724d2d --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java @@ -0,0 +1,30 @@ +package fr.inria.corese.core.next.impl.parser.turtle; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; + +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class ANTLRTurtleParserSpec { + private Model parseFromString(String turtleData, String baseURI) throws Exception { + Model model = new CoreseModel(); + RDFParser parser = new ANTLRTurtleParser(model); + parser.parse(new StringReader(turtleData), baseURI); + return model; + } + + @Test + public void testParseWithPrefixAndTriple() throws Exception { + String turtle = " @prefix ex: . " + + "ex:Alice ex:knows ex:Bob ."; + + Model model = parseFromString(turtle, null); + assertEquals(1, model.size()); + assertEquals(1, model.getNamespaces().size()); + } + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java b/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java new file mode 100644 index 000000000..fb85e80b6 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java @@ -0,0 +1,99 @@ +package fr.inria.corese.core.next.impl.parser.turtle; + +import fr.inria.corese.core.next.impl.parser.antlr.TurtleLexer; +import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.junit.jupiter.api.Test; + +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TurtleListenerImplSpec { + private Model parseAndPrintModel(String turtleData) throws Exception { + CharStream input = CharStreams.fromReader(new StringReader(turtleData)); + TurtleLexer lexer = new TurtleLexer(input); + CommonTokenStream tokens = new CommonTokenStream(lexer); + TurtleParser parser = new TurtleParser(tokens); + ParseTreeWalker walker = new ParseTreeWalker(); + ParseTree tree = parser.turtleDoc(); + + Model model = new CoreseModel(); + TurtleListenerImpl listener = new TurtleListenerImpl(model, null); + walker.walk((ParseTreeListener) listener, tree); + + + model.forEach(stmt -> { + System.out.println(stmt.getSubject().stringValue() + " " + + stmt.getPredicate().stringValue() + " " + + stmt.getObject().stringValue()); + }); + + + return model; + } + + @Test + public void testNamespace() throws Exception { + String turtleData = " @prefix ex: . " + + "ex:subject ex:predicate 1 . "; + + Model model = parseAndPrintModel(turtleData); + assertEquals(model.getNamespaces().size(), 1); + } + + @Test + public void testTypedLiteral() throws Exception { + String turtleData = "@prefix ex: .\n" + + "@prefix xsd: .\n" + + "ex:subject ex:age \"27\"^^xsd:integer ."; + + Model model = parseAndPrintModel(turtleData); + assertEquals(model.size(), 1); + assertEquals(model.getNamespaces().size(), 2); + + } + + @Test + public void testMultipleObjects() throws Exception { + String turtleData = "@prefix ex: .\n" + + "ex:subject ex:knows ex:Alice , ex:Bob ; ex:likes ex:Pizza ."; + + Model model = parseAndPrintModel(turtleData); + assertEquals(model.size(), 3); + assertEquals(model.getNamespaces().size(), 1); + + } + + @Test + public void testRDFtype() throws Exception { + String turtleData = "@prefix ex: .\n" + + "ex:Alice a ex:Person .\n" + + "ex:subject ex:knows ex:Alice , ex:Bob ; ex:likes ex:Pizza ."; + + Model model = parseAndPrintModel(turtleData); + assertEquals(model.size(), 4); + assertEquals(model.getNamespaces().size(), 1); + } + + @Test + public void testBaseIRI() throws Exception { + String turtleData = "@base .\n" + + "@prefix : .\n" + + "@prefix rdf: . \n" + + "\n" + + " rdf:type rdf:Property .\n" + + ":phone rdf:type rdf:Property ."; + + Model model = parseAndPrintModel(turtleData); + assertEquals(model.size(), 2); + assertEquals(model.getNamespaces().size(), 2); + } +} \ No newline at end of file From 274b1d9b30bb7ceaa583bb2c65360682018e7909 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 27 Jun 2025 18:04:39 +0200 Subject: [PATCH 6/6] - changes in the package structure - add factory to listenerImpl --- .../api/base/parser/RDFParserFactory.java | 3 +- .../parser/turtle/ANTLRTurtleParser.java | 9 ++-- .../parser/turtle/TurtleListenerImpl.java | 23 +++------- .../parser/turtle/TurtleParserFactory.java | 11 +++-- .../next/impl/parser/turtle/TurtleParser.java | 46 ------------------- .../parser/turtle/ANTLRTurtleParserSpec.java | 7 ++- .../parser/turtle/TurtleListenerImplSpec.java | 37 +++++++++++++-- 7 files changed, 62 insertions(+), 74 deletions(-) rename src/main/java/fr/inria/corese/core/next/impl/{ => io}/parser/turtle/ANTLRTurtleParser.java (89%) rename src/main/java/fr/inria/corese/core/next/impl/{ => io}/parser/turtle/TurtleListenerImpl.java (90%) rename src/main/java/fr/inria/corese/core/next/impl/{ => io}/parser/turtle/TurtleParserFactory.java (70%) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java rename src/test/java/fr/inria/corese/core/next/impl/{ => io}/parser/turtle/ANTLRTurtleParserSpec.java (74%) rename src/test/java/fr/inria/corese/core/next/impl/{ => io}/parser/turtle/TurtleListenerImplSpec.java (73%) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java index 950de673e..9124ee5e8 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/parser/RDFParserFactory.java @@ -1,6 +1,7 @@ package fr.inria.corese.core.next.api.base.parser; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; public interface RDFParserFactory { @@ -11,6 +12,6 @@ public interface RDFParserFactory { * @param model The model to which the parsed data will be added. * @return A new instance of an RDF parser for the specified format and model. */ - RDFParser createRDFParser(RDFFormat format, Model model); + RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java similarity index 89% rename from src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java index 875d6c738..7fe167569 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParser.java @@ -1,5 +1,6 @@ -package fr.inria.corese.core.next.impl.parser.turtle; +package fr.inria.corese.core.next.impl.io.parser.turtle; +import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.base.parser.RDFParser; import fr.inria.corese.core.next.api.Model; import fr.inria.corese.core.next.api.base.parser.RDFFormat; @@ -23,9 +24,11 @@ public class ANTLRTurtleParser implements RDFParser { private final Model model; private final RDFFormat format = RDFFormats.TURTLE; + private final ValueFactory factory; - public ANTLRTurtleParser(Model model) { + public ANTLRTurtleParser(Model model, ValueFactory factory) { this.model = model; + this.factory = factory; } @Override @@ -63,7 +66,7 @@ public void parse(Reader reader, String baseURI) { TurtleParser parser = new TurtleParser(tokens); ParseTreeWalker walker = new ParseTreeWalker(); ParseTree tree = parser.turtleDoc(); - TurtleListenerImpl listener = new TurtleListenerImpl(model, baseURI); + TurtleListenerImpl listener = new TurtleListenerImpl(model, baseURI, factory); walker.walk((ParseTreeListener) listener, tree); diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImpl.java similarity index 90% rename from src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImpl.java index 8488f8968..ae3f28f95 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImpl.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImpl.java @@ -1,4 +1,4 @@ -package fr.inria.corese.core.next.impl.parser.turtle; +package fr.inria.corese.core.next.impl.io.parser.turtle; import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.impl.common.literal.XSD; @@ -21,14 +21,15 @@ public class TurtleListenerImpl extends TurtleBaseListener { private final Model model; private String baseURI; private final Map prefixMap = new HashMap<>(); - private final ValueFactory factory = new CoreseAdaptedValueFactory(); + private final ValueFactory factory; private Resource currentSubject; private IRI currentPredicate; - public TurtleListenerImpl(Model model, String baseURI) { + public TurtleListenerImpl(Model model, String baseURI, ValueFactory factory) { this.model = model; this.baseURI = baseURI != null ? baseURI : ""; + this.factory = factory; } public void exitPrefixID(TurtleParser.PrefixIDContext ctx) { @@ -47,7 +48,7 @@ public void exitSparqlBase(TurtleParser.SparqlBaseContext ctx) { baseURI = iri.substring(1, iri.length() - 1); } - public void enterTriples(TurtleParser.TriplesContext ctx) { + public void enterTriples(TurtleParser.TriplesContext ctx) { currentSubject = extractSubject(ctx.subject()); } @@ -60,16 +61,6 @@ public void exitObject_(TurtleParser.Object_Context ctx) { model.add(currentSubject, currentPredicate, object); } - - @Override public void enterEveryRule(ParserRuleContext ctx) {} - - @Override public void exitEveryRule(ParserRuleContext ctx) { } - - @Override public void visitTerminal(TerminalNode node) { } - - @Override public void visitErrorNode(ErrorNode node) { } - - private String resolveIRI(String raw) { if (raw.startsWith("<") && raw.endsWith(">")) { return raw.substring(1, raw.length() - 1); @@ -146,10 +137,10 @@ private Literal extractLiteral(TurtleParser.LiteralContext ctx) { private Value extractObject(TurtleParser.Object_Context ctx) { if (ctx.iri() != null) { - return new CoreseIRI(resolveIRI(ctx.iri().getText())); + return factory.createIRI(resolveIRI(ctx.iri().getText())); } if (ctx.BlankNode() != null) { - return new CoreseBNode(ctx.BlankNode().getText()); + return factory.createBNode(ctx.BlankNode().getText()); } if (ctx.literal() != null) { return extractLiteral(ctx.literal()); diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleParserFactory.java similarity index 70% rename from src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleParserFactory.java index c20045dab..1c9a9de66 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleParserFactory.java @@ -1,6 +1,7 @@ -package fr.inria.corese.core.next.impl.parser.turtle; +package fr.inria.corese.core.next.impl.io.parser.turtle; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.base.parser.RDFFormat; import fr.inria.corese.core.next.api.base.parser.RDFFormats; import fr.inria.corese.core.next.api.base.parser.RDFParser; @@ -8,11 +9,15 @@ public class TurtleParserFactory implements RDFParserFactory { + public TurtleParserFactory() { + super(); + } + @Override - public RDFParser createRDFParser(RDFFormat format, Model model) { + public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) { if (!format.equals(RDFFormats.TURTLE)) { throw new IllegalArgumentException("Unsupported format : " + format); } - return new ANTLRTurtleParser(model); + return new ANTLRTurtleParser(model, factory); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java b/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java deleted file mode 100644 index bae3779fa..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleParser.java +++ /dev/null @@ -1,46 +0,0 @@ -package fr.inria.corese.core.next.impl.parser.turtle; - -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.api.base.parser.RDFFormat; -import fr.inria.corese.core.next.api.base.parser.RDFFormats; -import fr.inria.corese.core.next.api.base.parser.RDFParser; - -import java.io.InputStream; -import java.io.Reader; - -public class TurtleParser implements RDFParser { - - private final Model model; - private final RDFFormat format = RDFFormats.TURTLE; - - public TurtleParser(Model model) { - this.model = model; - } - - - @Override - public RDFFormat getRDFFormat() { - return format; - } - - @Override - public void parse(InputStream in) { - - } - - @Override - public void parse(InputStream in, String baseURI) { - - } - - @Override - public void parse(Reader reader) { - - } - - @Override - public void parse(Reader reader, String baseURI) { - - } -} diff --git a/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserSpec.java similarity index 74% rename from src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java rename to src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserSpec.java index 3f8724d2d..6d8f12c83 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/ANTLRTurtleParserSpec.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserSpec.java @@ -1,7 +1,9 @@ -package fr.inria.corese.core.next.impl.parser.turtle; +package fr.inria.corese.core.next.impl.io.parser.turtle; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; @@ -12,7 +14,8 @@ public class ANTLRTurtleParserSpec { private Model parseFromString(String turtleData, String baseURI) throws Exception { Model model = new CoreseModel(); - RDFParser parser = new ANTLRTurtleParser(model); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser parser = new ANTLRTurtleParser(model, factory); parser.parse(new StringReader(turtleData), baseURI); return model; } diff --git a/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImplSpec.java similarity index 73% rename from src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java rename to src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImplSpec.java index fb85e80b6..3f7326452 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/parser/turtle/TurtleListenerImplSpec.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/TurtleListenerImplSpec.java @@ -1,8 +1,10 @@ -package fr.inria.corese.core.next.impl.parser.turtle; +package fr.inria.corese.core.next.impl.io.parser.turtle; +import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.impl.parser.antlr.TurtleLexer; import fr.inria.corese.core.next.impl.parser.antlr.TurtleParser; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStreams; @@ -14,10 +16,12 @@ import java.io.StringReader; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; public class TurtleListenerImplSpec { private Model parseAndPrintModel(String turtleData) throws Exception { + ValueFactory factory = new CoreseAdaptedValueFactory(); + CharStream input = CharStreams.fromReader(new StringReader(turtleData)); TurtleLexer lexer = new TurtleLexer(input); CommonTokenStream tokens = new CommonTokenStream(lexer); @@ -26,16 +30,19 @@ private Model parseAndPrintModel(String turtleData) throws Exception { ParseTree tree = parser.turtleDoc(); Model model = new CoreseModel(); - TurtleListenerImpl listener = new TurtleListenerImpl(model, null); + TurtleListenerImpl listener = new TurtleListenerImpl(model, null, factory); walker.walk((ParseTreeListener) listener, tree); + /* model.forEach(stmt -> { System.out.println(stmt.getSubject().stringValue() + " " + stmt.getPredicate().stringValue() + " " + stmt.getObject().stringValue()); }); + */ + return model; } @@ -96,4 +103,28 @@ public void testBaseIRI() throws Exception { assertEquals(model.size(), 2); assertEquals(model.getNamespaces().size(), 2); } + + @Test + public void testTypedIntegerLiteral() throws Exception { + String turtleData = + "@prefix : .\n" + + "@prefix xsd: .\n" + + ":John :age \"42\"^^xsd:integer ."; + + Model model = parseAndPrintModel(turtleData); + model.objects().forEach(obj -> { + assertTrue(obj.isLiteral(), "Expected object to be a literal"); + // test if we can parse the literal to int. Should be ok + try { + int value = Integer.parseInt(obj.stringValue()); + System.out.println("Parsed integer: " + value); + } catch (NumberFormatException e) { + fail("Literal is not a valid integer: " + obj.stringValue()); + } + }); + + + assertEquals(model.size(), 1); + assertEquals(model.getNamespaces().size(), 2); + } } \ No newline at end of file