From 8c575cb9b7c91c683ca9e4e5f58713016c60b936 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 1 Jul 2025 15:47:13 +0200 Subject: [PATCH 01/64] factory --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 81 +++++++++++++++++++ .../io/parser/rdfxml/RdfXmlParserFactory.java | 23 ++++++ 2 files changed, 104 insertions(+) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java new file mode 100644 index 000000000..b923f125c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -0,0 +1,81 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.InputStream; +import java.io.Reader; + +public class RdfXmlParser extends DefaultHandler implements RDFParser { + + private final Model model; + private final RDFFormat format = RDFFormats.RDF_XML; + private final ValueFactory factory; + private String baseURI; + + public RdfXmlParser(Model model, ValueFactory factory) { + this.model = model; + this.factory = factory; + } + + @Override + public RDFFormat getRDFFormat() { + return format; + } + + @Override + public void parse(InputStream in) { + parse(in, null); + } + + @Override + public void parse(InputStream in, String baseURI) { + this.baseURI = baseURI; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + saxParser.parse(in, this); + } catch (Exception e) { + throw new RuntimeException("Failed to parse RDF/XML input stream", e); + } + } + + @Override + public void parse(Reader reader) { + parse(reader, null); + } + + @Override + public void parse(Reader reader, String baseURI) { + this.baseURI = baseURI; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + InputSource inputSource = new InputSource(reader); + saxParser.parse(inputSource, this); + } catch (Exception e) { + throw new RuntimeException("Failed to parse RDF/XML input stream", e); + } + } + + // SAX: element start + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) { + + } + + // SAX: element end + @Override + public void endElement(String uri, String localName, String qName) { + System.out.println("End: " + qName); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java new file mode 100644 index 000000000..5a3f59b46 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java @@ -0,0 +1,23 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.api.base.parser.RDFParserFactory; + +public class RdfXmlParserFactory implements RDFParserFactory { + + public RdfXmlParserFactory() { + super(); + } + + @Override + public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) { + if (!format.equals(RDFFormats.RDF_XML)) { + throw new IllegalArgumentException("Unsupported format : " + format); + } + return new RdfXmlParser(model, factory); + } +} \ No newline at end of file From 4d5d09ed7215eb5326354ce20b23bb5e3b89aff6 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Mon, 7 Jul 2025 17:08:16 +0200 Subject: [PATCH 02/64] build with Alpakka --- build.gradle.kts | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index b8e91e7e1..24ce19206 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -78,11 +78,11 @@ object Meta { // Project description const val desc = "Corese is a Semantic Web Factory (triple store and SPARQL endpoint) implementing RDF, RDFS, SPARQL 1.1 Query and Update, Shacl. STTL. LDScript." const val githubRepo = "corese-stack/corese-core" - + // License information const val license = "CeCILL-C License" const val licenseUrl = "https://opensource.org/licenses/CeCILL-C" - + // Sonatype OSSRH publishing settings const val release = "https://oss.sonatype.org/service/local/staging/deploy/maven2/" const val snapshot = "https://oss.sonatype.org/content/repositories/snapshots/" @@ -118,7 +118,7 @@ dependencies { runtimeOnly("org.apache.logging.log4j:log4j-slf4j2-impl:2.25.0") // SLF4J binding for Log4j2 (runtime) // === Core dependencies === - api("fr.com.hp.hpl.jena.rdf.arp:arp:2.2.b") // RDF/XML parser (Jena ARP) + api("fr.com.hp.hpl.jena.rdf.arp:arp:2.2.b") // RDF/XML parser (Jena ARP) implementation("fr.inria.corese.org.semarglproject:semargl-rdfa:0.7.2") // RDFa parser (Semargl) implementation("com.github.jsonld-java:jsonld-java:0.13.4") // JSON-LD processing @@ -126,25 +126,28 @@ dependencies { antlr("org.antlr:antlr4:4.13.2") // Antlr for parsing (ANTLR 4) implementation("org.antlr:antlr4-runtime:4.13.2") // Antlr runtime for parsing - - // === JSONLD - implementation("com.apicatalog:titanium-json-ld:1.6.0") - implementation("com.apicatalog:titanium-rdf-api:1.0.0") - implementation("org.eclipse.parsson:parsson:1.1.7") - implementation("jakarta.json:jakarta.json-api:2.1.3") - // === HTTP and XML === implementation("org.glassfish.jersey.core:jersey-client:3.1.10") // HTTP client (Jersey) implementation("org.glassfish.jersey.inject:jersey-hk2:3.1.10") // Dependency injection for Jersey implementation("com.sun.activation:jakarta.activation:2.0.1") // MIME type handling (Jakarta Activation) + // === JSONLD Parsing === + implementation("com.apicatalog:titanium-json-ld:1.6.0") // JSON-LD processing library + implementation("com.apicatalog:titanium-rdf-api:1.0.0") // Titanium RDF API for JSON-LD processing + implementation("org.eclipse.parsson:parsson:1.1.7") // JSON parser for JSON-LD + implementation("jakarta.json:jakarta.json-api:2.1.3") // Jakarta JSON API for JSON processing + + + // === XML parsing === + implementation("com.typesafe.akka:akka-stream_2.13:2.6.20") // Akka Streams for reactive streams processing + implementation("com.lightbend.akka:akka-stream-alpakka-xml_2.13:3.0.4") // Alpakka XML for XML processing with Akka Streams + + // === Utilities === implementation("org.apache.commons:commons-text:1.13.1") // Text manipulation utilities (Commons Text) implementation("org.json:json:20250517") // JSON processing implementation("com.typesafe:config:1.4.3") // Configuration library (Typesafe Config) - - // === Test dependencies === testImplementation(platform("org.junit:junit-bom:5.13.2")) // JUnit BOM for consistent test versions testImplementation("org.junit.jupiter:junit-jupiter:5.13.2") // JUnit Jupiter API and engine @@ -165,7 +168,7 @@ publishing { // Configure the publication to include JAR, sources, and Javadoc from(components["java"]) - // Configures version mapping to control how dependency versions are resolved + // Configures version mapping to control how dependency versions are resolved // for different usage contexts (API and runtime). versionMapping { // Defines version mapping for Java API usage. @@ -292,7 +295,7 @@ tasks.withType { tasks { shadowJar { this.archiveClassifier = "jar-with-dependencies" - } + } } // Configure Javadoc tasks to disable doclint warnings. @@ -392,4 +395,4 @@ tasks.named("generateGrammarSource") { tasks.named("javaccSparqlCorese") { dependsOn("createGeneratedDirs") -} +} \ No newline at end of file From acba4aec32add45941510635cafe85df28adb285 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 8 Jul 2025 14:02:50 +0200 Subject: [PATCH 03/64] xml parser setup --- .../util/SerializationConstants.java | 99 +++++++++++++++++++ .../impl/io/parser/rdfxml/RdfXmlParser.java | 95 ++++++++++++++++-- .../io/parser/rdfxml/RdfxmlParserTest.java | 35 +++++++ 3 files changed, 222 insertions(+), 7 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java b/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java new file mode 100644 index 000000000..58c0090c0 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java @@ -0,0 +1,99 @@ +package fr.inria.corese.core.next.impl.common.serializer.util; + +/** + * Provides common constants used throughout the RDF serialization process. + * This includes URIs for common RDF, RDFS, XSD, and OWL vocabularies, + * as well as various special characters and strings used in serialization formats + * like Turtle, N-Triples, and N-Quads. + */ +public final class SerializationConstants { + + private SerializationConstants() { + // Private constructor to prevent instantiation + } + + // --- Standard RDF/RDFS/XSD/OWL URIs --- + public static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public static final String RDFS_NS = "http://www.w3.org/2000/01/rdf-schema#"; + public static final String XSD_NS = "http://www.w3.org/2001/XMLSchema#"; + public static final String OWL_NS = "http://www.w3.org/2002/07/owl#"; + + public static final String RDF_TYPE = RDF_NS + "type"; + public static final String RDF_FIRST = RDF_NS + "first"; + public static final String RDF_REST = RDF_NS + "rest"; + public static final String RDF_NIL = RDF_NS + "nil"; + + public static final String XSD_STRING = XSD_NS + "string"; + public static final String XSD_INTEGER = XSD_NS + "integer"; + public static final String XSD_DECIMAL = XSD_NS + "decimal"; + public static final String XSD_DOUBLE = XSD_NS + "double"; + public static final String XSD_BOOLEAN = XSD_NS + "boolean"; + public static final String XSD_DATETIME = XSD_NS + "dateTime"; + + // Nouveau namespace FOAF + public static final String FOAF_NS = "http://xmlns.com/foaf/0.1/"; + + + // --- Common Delimiters and Special Characters in Serialization --- + public static final String SPACE = " "; + public static final String TAB = "\t"; + public static final String LINE_FEED = "\n"; + public static final String CARRIAGE_RETURN = "\r"; + public static final String NEWLINE = LINE_FEED; + + public static final String POINT = "."; + public static final String SEMICOLON = ";"; + public static final String COMMA = ","; + public static final String AT_SIGN = "@"; + public static final String CARET = "^"; + public static final String LT = "<"; // Less than + public static final String GT = ">"; // Greater than + public static final String QUOTE = "\""; + public static final String COLON = ":"; + public static final String BACK_SLASH = "\\"; + + // Nouveaux délimiteurs + public static final String HASH = "#"; + public static final String SLASH = "/"; + + + // Turtle-specific + public static final String RDF_TYPE_SHORTCUT = "a"; + public static final String BNODE_PREFIX = "_:"; + public static final String DATATYPE_SEPARATOR = "^^"; + public static final String BLANK_NODE_START = "["; + public static final String BLANK_NODE_END = "]"; + + public static final String OPEN_PARENTHESIS = "("; + public static final String CLOSE_PARENTHESIS = ")"; + + // --- Default Values for Configuration --- + public static final String DEFAULT_INDENTATION = " "; // Two spaces + public static final String DEFAULT_LINE_ENDING = "\n"; // Unix-style + + public static final String EMPTY_STRING = ""; + + // TriG-specific + public static final String OPEN_BRACE = "{"; + public static final String CLOSE_BRACE = "}"; + + // XML-specific constants + public static final String XML_DECLARATION_START = ""; + public static final String RDF_ROOT_START = " statementStack = new ArrayDeque<>(); public RdfXmlParser(Model model, ValueFactory factory) { this.model = model; this.factory = factory; } + // used for my play test class + public RdfXmlParser() { + this(new CoreseModel(), new CoreseAdaptedValueFactory()); + } + @Override public RDFFormat getRDFFormat() { return format; @@ -67,15 +81,82 @@ public void parse(Reader reader, String baseURI) { } } - // SAX: element start @Override - public void startElement(String uri, String localName, String qName, Attributes attributes) { + public void startElement(String uri, String localName, String qName, Attributes attrs) { + characters.setLength(0); + + if (isRdfRDF(uri, localName)) { + return; // skip root element + } + + // nodeElement + if (currentSubject == null) { + currentSubject = extractSubject(attrs); + return; + } + + // propertyElement → create statement and push it + IRI predicate = factory.createIRI(qName); // TODO: resolve properly + var resourceAttr = attrs.getValue(RDF.type.getNamespace(), "resource"); + + if (resourceAttr != null) { + Value object = factory.createLiteral(resourceAttr); + Statement stmt = factory.createStatement(currentSubject, predicate, object); + model.add(stmt); + return; + } + // literal content will be handled in endElement + Statement stub = factory.createStatement(currentSubject, predicate, null); + statementStack.push(stub); } - // SAX: element end @Override public void endElement(String uri, String localName, String qName) { - System.out.println("End: " + qName); + if (!statementStack.isEmpty()) { + Statement stmt = statementStack.pop(); + String content = characters.toString().trim(); + if (!content.isEmpty()) { + Value literal = factory.createLiteral(content); + Statement complete = factory.createStatement( + stmt.getSubject(), stmt.getPredicate(), literal + ); + model.add(complete); + } + } else { + // end of nodeElement + currentSubject = null; + } + } + + @Override + public void characters(char[] ch, int start, int length) { + characters.append(ch, start, length); + } + + private Resource extractSubject(Attributes attrs) { + String about = attrs.getValue(RDF.type.getNamespace(), "about"); + if (about != null) return factory.createIRI(about); + + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + if (nodeID != null) return factory.createBNode("_:" + nodeID); + + String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + if (id != null) return factory.createIRI("#" + id); + + return factory.createBNode(); + } + + private boolean isRdfRDF(String uri, String localName) { + return RDF.type.getNamespace().equals(uri) && "RDF".equals(localName); + } + + private void emitTripleString(String subject, String predicate, String object) { + System.out.printf("Triple: <%s> <%s> %s%n", subject, predicate, object); + } + + + private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { + this.statement = factory.createStatement(subj, pred, obj, context); } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java new file mode 100644 index 000000000..825997f25 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -0,0 +1,35 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import org.junit.jupiter.api.Test; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; + +public class RdfxmlParserTest { + + @Test + public void testBasicRdfParsing() throws Exception { + String rdfXml = "" + + "" + + "" + + " " + + " John Smith" + + " 2025-07-07" + + " " + + ""; + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + RdfXmlParser handler = new RdfXmlParser(); + + saxParser.parse(inputStream, handler); + } + + +} From 7bcf3165bf7b9de488f468e9750869bdf3c291d0 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Thu, 10 Jul 2025 15:07:30 +0200 Subject: [PATCH 04/64] rdf xml parser --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 159 ++++++++++++++---- 1 file changed, 122 insertions(+), 37 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index 61c4d6065..d3f0718bd 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -14,7 +14,6 @@ import java.io.Reader; import java.util.ArrayDeque; import java.util.Deque; -import java.util.Stack; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; public class RdfXmlParser extends DefaultHandler implements RDFParser { @@ -22,19 +21,27 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { private final Model model; private final RDFFormat format = RDFFormats.RDF_XML; private final ValueFactory factory; + + private StringBuilder characters = new StringBuilder(); + private String baseURI; private Resource currentSubject; private Statement statement; - private StringBuilder characters = new StringBuilder(); private final Deque statementStack = new ArrayDeque<>(); + private final Deque subjectStack = new ArrayDeque<>(); + private final Deque predicateStack = new ArrayDeque<>(); + + + private boolean inContainer = false; + private int liIndex = 1; public RdfXmlParser(Model model, ValueFactory factory) { this.model = model; this.factory = factory; } - // used for my play test class + // used for test class and can be removed public RdfXmlParser() { this(new CoreseModel(), new CoreseAdaptedValueFactory()); } @@ -85,47 +92,103 @@ public void parse(Reader reader, String baseURI) { public void startElement(String uri, String localName, String qName, Attributes attrs) { characters.setLength(0); - if (isRdfRDF(uri, localName)) { - return; // skip root element - } + // Ignore rdf:RDF + if (isRdfRDF(uri, localName)) return; - // nodeElement - if (currentSubject == null) { - currentSubject = extractSubject(attrs); + // Handle container elements: rdf:Seq, rdf:Bag, rdf:Alt + if (isContainer(localName, uri)) { + Resource subject = extractSubject(attrs); + subjectStack.push(subject); + inContainer = true; + liIndex = 1; return; } - // propertyElement → create statement and push it - IRI predicate = factory.createIRI(qName); // TODO: resolve properly - var resourceAttr = attrs.getValue(RDF.type.getNamespace(), "resource"); + // Handle container children: rdf:li → rdf:_n + if (inContainer && RDF.type.getNamespace().equals(uri)) { + String pred = null; + if ("li".equals(localName)) { + pred = RDF.type.getNamespace() + "_" + liIndex++; + } else if (localName.matches("_\\d+")) { + pred = RDF.type.getNamespace() + localName; + } + + if (pred != null) { + IRI predicate = factory.createIRI(pred); + String resource = attrs.getValue("rdf:resource"); + if (resource != null) { + model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + } + return; + } + } + + // Handle + if (isDescription(localName, uri)) { + Resource subject = extractSubject(attrs); + + if (!predicateStack.isEmpty() && !subjectStack.isEmpty()) { + Resource parent = subjectStack.peek(); + IRI predicate = predicateStack.peek(); + model.add(factory.createStatement(parent, predicate, subject)); + } + + subjectStack.push(subject); + + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + String value = attrs.getValue(i); + + if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) { + continue; // skip core syntax attributes + } + + IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); + model.add(factory.createStatement(subject, pred, factory.createLiteral(value))); + } - if (resourceAttr != null) { - Value object = factory.createLiteral(resourceAttr); - Statement stmt = factory.createStatement(currentSubject, predicate, object); - model.add(stmt); return; } - // literal content will be handled in endElement - Statement stub = factory.createStatement(currentSubject, predicate, null); - statementStack.push(stub); + + // Handle regular property elements + IRI predicate = factory.createIRI(expandQName(uri, localName, qName)); + predicateStack.push(predicate); + + // Check for rdf:resource + String resource = attrs.getValue("rdf:resource"); + if (resource != null) { + model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + } } @Override public void endElement(String uri, String localName, String qName) { - if (!statementStack.isEmpty()) { - Statement stmt = statementStack.pop(); - String content = characters.toString().trim(); - if (!content.isEmpty()) { - Value literal = factory.createLiteral(content); - Statement complete = factory.createStatement( - stmt.getSubject(), stmt.getPredicate(), literal - ); - model.add(complete); - } - } else { - // end of nodeElement - currentSubject = null; + String text = characters.toString().trim(); + characters.setLength(0); + + if (isContainer(localName, uri)) { + subjectStack.pop(); + inContainer = false; + liIndex = 1; + return; + } + + if (isDescription(localName, uri)) { + subjectStack.pop(); + return; + } + + // Closing a property element with literal content + if (!predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = predicateStack.pop(); + Resource subject = subjectStack.peek(); + model.add(factory.createStatement(subject, predicate, factory.createLiteral(text))); + + } else if (!predicateStack.isEmpty()) { + predicateStack.pop(); // still clean up } } @@ -135,27 +198,49 @@ public void characters(char[] ch, int start, int length) { } private Resource extractSubject(Attributes attrs) { - String about = attrs.getValue(RDF.type.getNamespace(), "about"); + String about = attrs.getValue("rdf:about"); if (about != null) return factory.createIRI(about); - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + String nodeID = attrs.getValue("rdf:nodeID"); if (nodeID != null) return factory.createBNode("_:" + nodeID); - String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + String id = attrs.getValue("rdf:ID"); if (id != null) return factory.createIRI("#" + id); return factory.createBNode(); } private boolean isRdfRDF(String uri, String localName) { - return RDF.type.getNamespace().equals(uri) && "RDF".equals(localName); + return RDF.type.equals(uri) && "RDF".equals(localName); + } + + private boolean isDescription(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); + } + + private boolean isContainer(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && + ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); + } + + private String expandQName(String uri, String localName, String qName) { + return (uri != null && !uri.isEmpty()) ? uri + localName : qName; + } + + private boolean isSyntaxAttribute(String uri, String localName, String qName) { + if (uri != null && RDF.type.getNamespace().equals(uri)) { + return switch (localName) { + case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; + default -> false; + }; + } + return qName.startsWith("xml:"); } private void emitTripleString(String subject, String predicate, String object) { System.out.printf("Triple: <%s> <%s> %s%n", subject, predicate, object); } - private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { this.statement = factory.createStatement(subj, pred, obj, context); } From 8d8193ef5aa24496c3299af954b5a37419361da1 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Thu, 10 Jul 2025 16:50:49 +0200 Subject: [PATCH 05/64] rdf xml parser --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index d3f0718bd..f9c24658f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -31,7 +31,7 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { private final Deque statementStack = new ArrayDeque<>(); private final Deque subjectStack = new ArrayDeque<>(); private final Deque predicateStack = new ArrayDeque<>(); - + private final Deque langStack = new ArrayDeque<>(); private boolean inContainer = false; private int liIndex = 1; @@ -95,6 +95,16 @@ public void startElement(String uri, String localName, String qName, Attributes // Ignore rdf:RDF if (isRdfRDF(uri, localName)) return; + // Handle xml:lang + String xmlLang = attrs.getValue("xml:lang"); + if (xmlLang != null) { + // "" means no language + langStack.push(xmlLang.isEmpty() ? null : xmlLang); + } else { + // Inherit from parent + langStack.push(langStack.peek()); + } + // Handle container elements: rdf:Seq, rdf:Bag, rdf:Alt if (isContainer(localName, uri)) { Resource subject = extractSubject(attrs); @@ -169,29 +179,46 @@ public void endElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); + // Handle language cleanup + if (!langStack.isEmpty()) { + langStack.pop(); + } + + // End of a container (rdf:Seq, rdf:Bag, rdf:Alt) if (isContainer(localName, uri)) { - subjectStack.pop(); + if (!subjectStack.isEmpty()) { + subjectStack.pop(); + } inContainer = false; liIndex = 1; return; } + // End of rdf:Description if (isDescription(localName, uri)) { - subjectStack.pop(); + if (!subjectStack.isEmpty()) { + subjectStack.pop(); + } return; } - // Closing a property element with literal content - if (!predicateStack.isEmpty() && !text.isEmpty()) { + // Closing a property element with text content + if (!predicateStack.isEmpty()) { IRI predicate = predicateStack.pop(); Resource subject = subjectStack.peek(); - model.add(factory.createStatement(subject, predicate, factory.createLiteral(text))); - } else if (!predicateStack.isEmpty()) { - predicateStack.pop(); // still clean up + if (!text.isEmpty()) { + String lang = langStack.peek(); + Value literal = (lang != null) + ? factory.createLiteral(text, lang) + : factory.createLiteral(text); + + model.add(factory.createStatement(subject, predicate, literal)); + } } } + @Override public void characters(char[] ch, int start, int length) { characters.append(ch, start, length); From fd3596207acf826b3e0ad851f4ba3e72f965fe26 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Thu, 10 Jul 2025 17:47:00 +0200 Subject: [PATCH 06/64] rdf xml parser test --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 85 +++++++++++++------ .../io/parser/rdfxml/RdfxmlParserTest.java | 37 +++++--- 2 files changed, 83 insertions(+), 39 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index f9c24658f..b1e6d08bd 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -4,6 +4,7 @@ import fr.inria.corese.core.next.api.base.parser.RDFFormat; import fr.inria.corese.core.next.api.base.parser.RDFFormats; import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.xml.sax.*; @@ -14,6 +15,8 @@ import java.io.Reader; import java.util.ArrayDeque; import java.util.Deque; +import java.util.Optional; + import fr.inria.corese.core.next.impl.common.vocabulary.RDF; public class RdfXmlParser extends DefaultHandler implements RDFParser { @@ -25,13 +28,13 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { private StringBuilder characters = new StringBuilder(); private String baseURI; - private Resource currentSubject; + private Statement statement; - private final Deque statementStack = new ArrayDeque<>(); private final Deque subjectStack = new ArrayDeque<>(); private final Deque predicateStack = new ArrayDeque<>(); private final Deque langStack = new ArrayDeque<>(); + private final Deque datatypeStack = new ArrayDeque<>(); private boolean inContainer = false; private int liIndex = 1; @@ -92,6 +95,9 @@ public void parse(Reader reader, String baseURI) { public void startElement(String uri, String localName, String qName, Attributes attrs) { characters.setLength(0); + String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); + datatypeStack.push(datatype); + // Ignore rdf:RDF if (isRdfRDF(uri, localName)) return; @@ -179,43 +185,57 @@ public void endElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); - // Handle language cleanup - if (!langStack.isEmpty()) { - langStack.pop(); + if (!langStack.isEmpty()) langStack.pop(); + if (!datatypeStack.isEmpty()) { + String datatypeUri = datatypeStack.pop(); + + if (!predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = predicateStack.pop(); + Resource subject = subjectStack.peek(); + + Value literal; + + if (datatypeUri != null) { + Optional known = fromURI(datatypeUri); + + if (known.isPresent()) { + // normalized + IRI normalizedDatatype = known.get().getIRI(); + literal = factory.createLiteral(text, normalizedDatatype); + } else { + // unknown datatype – fallback or warning + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + IRI fallbackDatatype = factory.createIRI(datatypeUri); + literal = factory.createLiteral(text, fallbackDatatype); + } + } else { + // no datatype – use xml:lang if set + String lang = langStack.peek(); + literal = (lang != null) + ? factory.createLiteral(text, lang) + : factory.createLiteral(text); + } + + model.add(factory.createStatement(subject, predicate, literal)); + } else if (!predicateStack.isEmpty()) { + predicateStack.pop(); // cleanup + } + + return; } - // End of a container (rdf:Seq, rdf:Bag, rdf:Alt) + // other cases (containers, descriptions, etc.) if (isContainer(localName, uri)) { - if (!subjectStack.isEmpty()) { - subjectStack.pop(); - } + if (!subjectStack.isEmpty()) subjectStack.pop(); inContainer = false; liIndex = 1; return; } - // End of rdf:Description if (isDescription(localName, uri)) { - if (!subjectStack.isEmpty()) { - subjectStack.pop(); - } + if (!subjectStack.isEmpty()) subjectStack.pop(); return; } - - // Closing a property element with text content - if (!predicateStack.isEmpty()) { - IRI predicate = predicateStack.pop(); - Resource subject = subjectStack.peek(); - - if (!text.isEmpty()) { - String lang = langStack.peek(); - Value literal = (lang != null) - ? factory.createLiteral(text, lang) - : factory.createLiteral(text); - - model.add(factory.createStatement(subject, predicate, literal)); - } - } } @@ -271,4 +291,13 @@ private void emitTripleString(String subject, String predicate, String object) { private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { this.statement = factory.createStatement(subj, pred, obj, context); } + + public Optional fromURI(String uri) { + for (XSD xsd : XSD.values()) { + if (xsd.getIRI().stringValue().equals(uri)) { + return Optional.of(xsd); + } + } + return Optional.empty(); + } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index 825997f25..a78a94193 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,5 +1,6 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; +import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; import javax.xml.parsers.SAXParser; @@ -11,24 +12,38 @@ public class RdfxmlParserTest { @Test public void testBasicRdfParsing() throws Exception { - String rdfXml = "" + - "" + - "" + - " " + - " John Smith" + - " 2025-07-07" + - " " + - ""; + String rdfXml = """ + + + + John Smith + 2025-07-07 + + + """; + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + // Set up the parser SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - RdfXmlParser handler = new RdfXmlParser(); + // Provide an explicit model + CoreseModel model = new CoreseModel(); + RdfXmlParser handler = new RdfXmlParser(model); + // Parse the input saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected two RDF statements"); + + model.statements().forEach(stmt -> { + System.out.println(stmt); + }); } From 6ac245b89568f92cc750a3231aa2df65d9f5e56d Mon Sep 17 00:00:00 2001 From: pierrerene Date: Wed, 16 Jul 2025 10:14:16 +0200 Subject: [PATCH 07/64] abstractstatement rdfxml updates --- .../api/base/model/AbstractStatement.java | 8 +- .../impl/io/parser/rdfxml/RdfXmlParser.java | 172 ++-- .../io/parser/rdfxml/RdfxmlParserTest.java | 784 +++++++++++++++++- 3 files changed, 891 insertions(+), 73 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java index 19707d6c7..3b5a5749a 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java @@ -59,10 +59,10 @@ public int hashCode() { @Override public String toString() { return "(" - + getSubject() - + ", " + getPredicate() - + ", " + getObject() - + (getContext() == null ? "" : ", " + getContext()) + + getSubject().stringValue() + + ", " + getPredicate().stringValue() + + ", " + getObject().stringValue() + + (getContext() == null ? "" : ", " + getContext().stringValue()) + ")"; } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index b1e6d08bd..0ac1cfa29 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -93,25 +93,30 @@ public void parse(Reader reader, String baseURI) { @Override public void startElement(String uri, String localName, String qName, Attributes attrs) { + // Ignore rdf:RDF root element + if (isRdfRDF(uri, localName)) return; characters.setLength(0); + // Handle datatype String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); - datatypeStack.push(datatype); + if (datatype != null) { + datatypeStack.push(datatype); + } - // Ignore rdf:RDF - if (isRdfRDF(uri, localName)) return; // Handle xml:lang String xmlLang = attrs.getValue("xml:lang"); if (xmlLang != null) { - // "" means no language - langStack.push(xmlLang.isEmpty() ? null : xmlLang); - } else { - // Inherit from parent - langStack.push(langStack.peek()); + langStack.push(xmlLang); + } + + // Handle xml:base + String xmlBase = attrs.getValue("xml:base"); + if (xmlBase != null) { + baseURI = xmlBase; } - // Handle container elements: rdf:Seq, rdf:Bag, rdf:Alt + // Handle RDF containers if (isContainer(localName, uri)) { Resource subject = extractSubject(attrs); subjectStack.push(subject); @@ -120,7 +125,7 @@ public void startElement(String uri, String localName, String qName, Attributes return; } - // Handle container children: rdf:li → rdf:_n + // Handle container items: rdf:li → rdf:_n if (inContainer && RDF.type.getNamespace().equals(uri)) { String pred = null; if ("li".equals(localName)) { @@ -133,50 +138,66 @@ public void startElement(String uri, String localName, String qName, Attributes IRI predicate = factory.createIRI(pred); String resource = attrs.getValue("rdf:resource"); if (resource != null) { - model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + model.add(factory.createStatement( + subjectStack.peek(), + predicate, + factory.createIRI(resolveAgainstBase(resource)) + )); } return; } } - // Handle - if (isDescription(localName, uri)) { - Resource subject = extractSubject(attrs); - + // Handle (typed or untyped) + if (isDescription(localName, uri) || isNodeElement(attrs)) { + Resource newSubject = extractSubject(attrs); + // If this or typed element is the object of a property if (!predicateStack.isEmpty() && !subjectStack.isEmpty()) { Resource parent = subjectStack.peek(); - IRI predicate = predicateStack.peek(); - model.add(factory.createStatement(parent, predicate, subject)); + IRI predicate = predicateStack.pop(); // consume the predicate + model.add(factory.createStatement(parent, predicate, newSubject)); } - subjectStack.push(subject); + subjectStack.push(newSubject); + + // If it's a typed node (e.g., ), add rdf:type triple + if (!isDescription(localName, uri)) { + IRI typeIRI = factory.createIRI(expandQName(uri, localName, qName)); + model.add(factory.createStatement( + newSubject, + factory.createIRI(RDF.type.getIRI().stringValue()), + typeIRI + )); + } + // Handle property attributes for (int i = 0; i < attrs.getLength(); i++) { String attrURI = attrs.getURI(i); String attrLocal = attrs.getLocalName(i); String attrQName = attrs.getQName(i); String value = attrs.getValue(i); - if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) { - continue; // skip core syntax attributes - } + if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); - model.add(factory.createStatement(subject, pred, factory.createLiteral(value))); + model.add(factory.createStatement(newSubject, pred, factory.createLiteral(value))); } return; } - - // Handle regular property elements + // Handle regular property elements (e.g., ) IRI predicate = factory.createIRI(expandQName(uri, localName, qName)); predicateStack.push(predicate); - // Check for rdf:resource + // Handle rdf:resource object (IRI) String resource = attrs.getValue("rdf:resource"); if (resource != null) { - model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + model.add(factory.createStatement( + subjectStack.peek(), + predicate, + factory.createIRI(resolveAgainstBase(resource)) + )); } } @@ -185,46 +206,48 @@ public void endElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); + // Always pop lang/datatype if pushed if (!langStack.isEmpty()) langStack.pop(); - if (!datatypeStack.isEmpty()) { - String datatypeUri = datatypeStack.pop(); - - if (!predicateStack.isEmpty() && !text.isEmpty()) { - IRI predicate = predicateStack.pop(); - Resource subject = subjectStack.peek(); - - Value literal; - - if (datatypeUri != null) { - Optional known = fromURI(datatypeUri); - - if (known.isPresent()) { - // normalized - IRI normalizedDatatype = known.get().getIRI(); - literal = factory.createLiteral(text, normalizedDatatype); - } else { - // unknown datatype – fallback or warning - System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); - IRI fallbackDatatype = factory.createIRI(datatypeUri); - literal = factory.createLiteral(text, fallbackDatatype); - } + String datatypeUri = !datatypeStack.isEmpty() ? datatypeStack.pop() : null; + + // Property literal + if (!predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = predicateStack.pop(); + Resource subject = subjectStack.peek(); + + Value literal; + + if (datatypeUri != null && !datatypeUri.isBlank()) { + Optional known = fromURI(datatypeUri); + + if (known.isPresent()) { + // normalized datatype + IRI normalizedDatatype = known.get().getIRI(); + literal = factory.createLiteral(text, normalizedDatatype); } else { - // no datatype – use xml:lang if set - String lang = langStack.peek(); - literal = (lang != null) - ? factory.createLiteral(text, lang) - : factory.createLiteral(text); + // fallback datatype + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + IRI fallbackDatatype = factory.createIRI(datatypeUri); + literal = factory.createLiteral(text, fallbackDatatype); } - - model.add(factory.createStatement(subject, predicate, literal)); - } else if (!predicateStack.isEmpty()) { - predicateStack.pop(); // cleanup + } else { + // no datatype – use language tag if any + String lang = langStack.isEmpty() ? null : langStack.peek(); + literal = (lang != null && !lang.equals("__NO_LANG__")) + ? factory.createLiteral(text, lang) + : factory.createLiteral(text); } + model.add(factory.createStatement(subject, predicate, literal)); return; } - // other cases (containers, descriptions, etc.) + // Clean up stray predicates + if (!predicateStack.isEmpty()) { + predicateStack.pop(); + } + + // Handle containers if (isContainer(localName, uri)) { if (!subjectStack.isEmpty()) subjectStack.pop(); inContainer = false; @@ -232,9 +255,9 @@ public void endElement(String uri, String localName, String qName) { return; } + // Handle end of rdf:Description if (isDescription(localName, uri)) { if (!subjectStack.isEmpty()) subjectStack.pop(); - return; } } @@ -245,18 +268,33 @@ public void characters(char[] ch, int start, int length) { } private Resource extractSubject(Attributes attrs) { - String about = attrs.getValue("rdf:about"); - if (about != null) return factory.createIRI(about); + String about = attrs.getValue(RDF.type.getNamespace(), "about"); + if (about != null) return factory.createIRI(resolveAgainstBase(about)); - String nodeID = attrs.getValue("rdf:nodeID"); + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); if (nodeID != null) return factory.createBNode("_:" + nodeID); - String id = attrs.getValue("rdf:ID"); - if (id != null) return factory.createIRI("#" + id); + String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + if (id != null) return factory.createIRI(resolveAgainstBase("#" + id)); + // Default to blank node return factory.createBNode(); } + private String resolveAgainstBase(String iri) { + if (iri == null) return null; + if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { + // Absolute IRI or no base, return as-is + return iri; + } + + try { + return new java.net.URI(baseURI).resolve(iri).toString(); + } catch (Exception e) { + throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + } + } + private boolean isRdfRDF(String uri, String localName) { return RDF.type.equals(uri) && "RDF".equals(localName); } @@ -292,6 +330,12 @@ private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { this.statement = factory.createStatement(subj, pred, obj, context); } + private boolean isNodeElement(Attributes attrs) { + return attrs.getValue(RDF.type.getNamespace(), "about") != null || + attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || + attrs.getValue(RDF.type.getNamespace(), "ID") != null; + } + public Optional fromURI(String uri) { for (XSD xsd : XSD.values()) { if (xsd.getIRI().stringValue().equals(uri)) { diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index a78a94193..e0cd7e2dd 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,6 +1,10 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.apache.jena.base.Sys; import org.junit.jupiter.api.Test; import javax.xml.parsers.SAXParser; @@ -8,10 +12,58 @@ import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class RdfxmlParserTest { + @Test + public void testNodeElementsWithIRIs() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + + """; + + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected two RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + + } @Test public void testBasicRdfParsing() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ { - System.out.println(stmt); + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample3CompleteDescriptionOfAllGraphPaths() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + + + + Dave Beckett + + + + + + RDF 1.2 XML Syntax + + + """.trim(); + + String inTriG = """ + @prefix dcterms: . + + { + _:b15, _:b16 ; + dcterms:title "RDF 1.2 XML Syntax" . + + _:b15 . + + _:b16 "Dave Beckett" . + } + """; + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected five RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample4UsingMultiplePropertyElements() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + Dave Beckett + + + RDF 1.2 XML Syntax + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample5EmptyPropertyElements() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + Dave Beckett + + + RDF 1.2 XML Syntax + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample6ReplacingPropertyElementsWithStringLiteral() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample7CompleteRDFXML() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample8CompleteExampleXmlLang() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + RDF 1.2 XML Syntax + RDF 1.2 XML Syntax + RDF 1.2 XML Syntax + + + + Der Baum + Das Buch ist außergewöhnlich + The Tree + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(8, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample11CompleteExamplerdfDatatype() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + 123 + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); }); } + @Test + public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); -} + // Assert or inspect the result + assertEquals(6, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample13CompleteExampleUsingRdfparseTypeResource() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + Dave Beckett + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(4, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample14CompleteExampleOfPorpertyAttributesOnAnEmptyPropertyElement() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample15CompleteExampleWithRdfType() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + A marvelous thing + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample16CompleteExampleUsingATypedNodeElementToReplaceAnRdfType() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + A marvelous thing + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample17CompleteExampleUsingRdfIDAndXmlbase() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample18ComplexExampleUsingRdfListProperties() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample19ComplexExampleUsingRdfliProperties() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample20CompleteExampleOfARdfCollectionOfNodes() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(4, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample21CompleteExampleOfRdfID() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + blah + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } +} \ No newline at end of file From ae753422fe63dc0a3ab2629c8894ae626c458e9f Mon Sep 17 00:00:00 2001 From: pierrerene Date: Wed, 16 Jul 2025 10:56:34 +0200 Subject: [PATCH 08/64] build --- build.gradle.kts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 24ce19206..17c2ff9e5 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -78,11 +78,11 @@ object Meta { // Project description const val desc = "Corese is a Semantic Web Factory (triple store and SPARQL endpoint) implementing RDF, RDFS, SPARQL 1.1 Query and Update, Shacl. STTL. LDScript." const val githubRepo = "corese-stack/corese-core" - + // License information const val license = "CeCILL-C License" const val licenseUrl = "https://opensource.org/licenses/CeCILL-C" - + // Sonatype OSSRH publishing settings const val release = "https://oss.sonatype.org/service/local/staging/deploy/maven2/" const val snapshot = "https://oss.sonatype.org/content/repositories/snapshots/" @@ -142,7 +142,6 @@ dependencies { implementation("com.typesafe.akka:akka-stream_2.13:2.6.20") // Akka Streams for reactive streams processing implementation("com.lightbend.akka:akka-stream-alpakka-xml_2.13:3.0.4") // Alpakka XML for XML processing with Akka Streams - // === Utilities === implementation("org.apache.commons:commons-text:1.13.1") // Text manipulation utilities (Commons Text) implementation("org.json:json:20250517") // JSON processing @@ -168,7 +167,7 @@ publishing { // Configure the publication to include JAR, sources, and Javadoc from(components["java"]) - // Configures version mapping to control how dependency versions are resolved + // Configures version mapping to control how dependency versions are resolved // for different usage contexts (API and runtime). versionMapping { // Defines version mapping for Java API usage. @@ -295,7 +294,7 @@ tasks.withType { tasks { shadowJar { this.archiveClassifier = "jar-with-dependencies" - } + } } // Configure Javadoc tasks to disable doclint warnings. From 72fbf4f89947f4f0cd1dd8eee5d1cc335647b6df Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 18 Jul 2025 15:31:45 +0200 Subject: [PATCH 09/64] adding RDF-XML Context and Utils --- .../impl/io/parser/rdfxml/RdfXmlUtils.java | 117 ++++ .../parser/rdfxml/context/RdfXmlContext.java | 83 +++ .../io/parser/rdfxml/RdfxmlParserTest.java | 613 ++++++------------ 3 files changed, 386 insertions(+), 427 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java new file mode 100644 index 000000000..fc52f588a --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java @@ -0,0 +1,117 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import org.xml.sax.*; + +import java.util.List; +import java.util.Optional; + + +public class RdfXmlUtils { + private RdfXmlUtils() { + } + + public static String expandQName(String uri, String localName, String qName) { + return (uri != null && !uri.isEmpty()) ? uri + localName : qName; + } + + public static Optional resolveDatatype(String datatypeUri) { + for (XSD xsd : XSD.values()) { + if (xsd.getIRI().stringValue().equals(datatypeUri)) return Optional.of(xsd); + } + return Optional.empty(); + } + + public static Resource extractSubject(Attributes attrs, ValueFactory factory, String baseURI) { + String about = attrs.getValue(RDF.type.getNamespace(), "about"); + if (about != null) return factory.createIRI(resolveAgainstBase(about, baseURI)); + + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + if (nodeID != null) return factory.createBNode("_:" + nodeID); + + String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + if (id != null) return factory.createIRI(resolveAgainstBase("#" + id, baseURI)); + + // Default to blank node + return factory.createBNode(); + } + + public static String resolveAgainstBase(String iri, String baseURI) { + if (iri == null) return null; + if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { + // Absolute IRI or no base, return as-is + return iri; + } + + try { + return new java.net.URI(baseURI).resolve(iri).toString(); + } catch (Exception e) { + throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + } + } + + public static boolean isDescription(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); + } + + public static boolean isNodeElement(Attributes attrs) { + return attrs.getValue(RDF.type.getNamespace(), "about") != null || + attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || + attrs.getValue(RDF.type.getNamespace(), "ID") != null; + } + + public static boolean isSyntaxAttribute(String uri, String localName, String qName) { + if (uri != null && RDF.type.getNamespace().equals(uri)) { + return switch (localName) { + case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; + default -> false; + }; + } + return qName.startsWith("xml:"); + } + + public static Optional fromURI(String uri) { + for (XSD xsd : XSD.values()) { + if (xsd.getIRI().stringValue().equals(uri)) { + return Optional.of(xsd); + } + } + return Optional.empty(); + } + + public static boolean isRdfRDF(String uri, String localName) { + return RDF.type.equals(uri) && "RDF".equals(localName); + } + + public static boolean isContainer(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && + ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); + } + + public static Resource createRdfCollection(List items, Model model, ValueFactory factory) { + Resource head = factory.createBNode(); + Resource current = head; + + for (int i = 0; i < items.size(); i++) { + Resource next = (i < items.size() - 1) + ? factory.createBNode() + : RDF.nil.getIRI(); // rdf:nil + + model.add(factory.createStatement(current, + RDF.first.getIRI(), + items.get(i))); + + model.add(factory.createStatement(current, + RDF.rest.getIRI(), + next)); + + current = next; + } + + return head; + } + + +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java new file mode 100644 index 000000000..c542ff4ea --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java @@ -0,0 +1,83 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml.context; + +import fr.inria.corese.core.next.api.*; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; + +/** + * Holds shared parsing state during RDF/XML parsing. + * + *

This class acts as a context holder for the SAX-based RDF/XML parser, + * allowing multiple elements and handlers to share and manipulate parsing state. + * It stores stacks for subjects, predicates, datatypes, and languages, + * as well as temporary collections used during the construction of RDF lists and containers.

+ * + *

This context is typically instantiated once per parsing session and passed + * throughout the parsing logic.

+ */ +public class RdfXmlContext { + + /** The RDF model to which parsed triples will be added. */ + public Model model; + + /** The factory used to create IRIs, literals, blank nodes, and statements. */ + public ValueFactory factory; + + /** The base URI against which relative IRIs are resolved. */ + public String baseURI; + + /** A single statement buffer (optional use). */ + public Statement statement; + + /** Builder list for rdf:parseType="Collection" elements. */ + public List collectionBuilder = new ArrayList<>(); + + /** The subject associated with the current RDF collection. */ + public Resource collectionSubject; + + /** The predicate that connects the collection subject to the list head. */ + public IRI collectionPredicate; + + /** Stack of subject resources to manage nesting of elements. */ + public final Deque subjectStack = new ArrayDeque<>(); + + /** Stack of predicates for tracking current RDF properties. */ + public final Deque predicateStack = new ArrayDeque<>(); + + /** Stack for xml:lang values scoped by element depth. */ + public final Deque langStack = new ArrayDeque<>(); + + /** Stack for rdf:datatype URIs associated with literals. */ + public final Deque datatypeStack = new ArrayDeque<>(); + + /** Temporary holder for RDF collection items (unused or optional). */ + public final Deque collectionItems = new ArrayDeque<>(); + + /** Whether the parser is currently inside an RDF container (rdf:Seq, rdf:Bag, rdf:Alt). */ + public boolean inContainer = false; + + /** Whether the parser is currently inside an RDF collection (rdf:parseType="Collection"). */ + public boolean inCollection = false; + + /** If true, skips pushing a subject onto the stack (used for collection items). */ + public boolean suppressSubject = false; + + /** Counter for rdf:li to rdf:_n expansion. */ + public int liIndex = 1; + + /** Optional parseType value for the current element. */ + public String parseType; + + /** + * Constructs a new context for RDF/XML parsing. + * + * @param model the RDF model to populate with triples + * @param factory the value factory used to create RDF terms + */ + public RdfXmlContext(Model model, ValueFactory factory) { + this.model = model; + this.factory = factory; + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index e0cd7e2dd..e1bfa306b 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,24 +1,79 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; +import fr.inria.corese.core.next.api.Literal; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Value; import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.apache.jena.base.Sys; import org.junit.jupiter.api.Test; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import static org.junit.jupiter.api.Assertions.assertEquals; public class RdfxmlParserTest { - @Test - public void testNodeElementsWithIRIs() throws Exception { + /** + * Helper method to parse the RDF/XML String + * @param rdfXml + * @return model + * @throws Exception + */ + private Model parseRdfXml(String rdfXml) throws Exception { Model model = new CoreseModel(); ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + + try (InputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8))) { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + saxParser.parse(inputStream, handler); + } + + return model; + } + + /** + * Helper method to print the model. + * @param model + */ + private void printModel(Model model) { + model.stream().forEach(stmt -> { + Value obj = stmt.getObject(); + if (obj instanceof Literal literal) { + if (literal.getLanguage().isPresent()) { + System.out.printf("(%s, %s, \"%s\"@%s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel(), + literal.getLanguage().get()); + } else { + System.out.printf("(%s, %s, \"%s\")%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel()); + } + } else { + System.out.printf("(%s, %s, %s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + obj.stringValue()); + } + }); + } + + + /** + * Test node elements with IRIs + * @throws Exception + */ + @Test + public void testNodeElementsWithIRIs() throws Exception { String rdfXml = """ - + @@ -36,34 +91,18 @@ public void testNodeElementsWithIRIs() throws Exception { """; - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(2, model.size(), "Expected two RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); - } + /** + * Test a basic RDF/XML file + * @throws Exception + */ @Test public void testBasicRdfParsing() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """; - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(2, model.size(), "Expected two RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a RDF/XML file with Complete description of all graph paths + * @throws Exception + */ @Test public void testExample3CompleteDescriptionOfAllGraphPaths() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - - String inTriG = """ - @prefix dcterms: . - - { - _:b15, _:b16 ; - dcterms:title "RDF 1.2 XML Syntax" . - - _:b15 . - - _:b16 "Dave Beckett" . - } - """; - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(5, model.size(), "Expected five RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test RDF/XML File Using multiple property elements on a node element + * @throws Exception + */ @Test public void testExample4UsingMultiplePropertyElements() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ + @@ -188,33 +183,19 @@ public void testExample4UsingMultiplePropertyElements() throws Exception { """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); } + /** + * Test RDF/XML with Empty property elements + * @throws Exception + */ @Test public void testExample5EmptyPropertyElements() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ + @@ -230,32 +211,19 @@ public void testExample5EmptyPropertyElements() throws Exception { """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a RDF/XML file with Replacing property elements with string literal content into property attributes + * @throws Exception + */ @Test public void testExample6ReplacingPropertyElementsWithStringLiteral() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a Complete RDF/XML + * @throws Exception + */ @Test public void testExample7CompleteRDFXML() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """
- """.trim(); - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); } + /** + * Test a Complete example of xml:lang + * @throws Exception + */ @Test public void testExample8CompleteExampleXmlLang() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(8, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(6, model.size(), "Expected six RDF statements"); } @Test public void testExample11CompleteExamplerdfDatatype() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(1, model.size(), "Expected four RDF statements"); } + /** + * Test a Complete RDF/XML file with a description of graph using rdf:nodeID + * @throws Exception + */ @Test public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); + Model model = parseRdfXml(rdfXml); + printModel(model); // Assert or inspect the result - assertEquals(6, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + assertEquals(4, model.size(), "Expected five RDF statements"); } + /** + * Test a RDF/XML file with a Complete example using rdf:parseType=Resource + * @throws Exception + */ @Test public void testExample13CompleteExampleUsingRdfparseTypeResource() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(4, model.size(), "Expected four RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a RDF/XML file with a Complete example of property attributes on an empty property element + * @throws Exception + */ @Test public void testExample14CompleteExampleOfPorpertyAttributesOnAnEmptyPropertyElement() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(3, model.size(), "Expected three RDF statements"); } + /** + * Test a RDF/XML file with a Complete example with rdf:type + * @throws Exception + */ @Test public void testExample15CompleteExampleWithRdfType() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(2, model.size(), "Expected four RDF statements"); } + /** + * Test a RDF/XML file with a Complete example using a typed node element to replace an rdf:type + * @throws Exception + */ @Test public void testExample16CompleteExampleUsingATypedNodeElementToReplaceAnRdfType() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(2, model.size(), "Expected two RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } @Test + /** + * Test a XML/RDF File using rdf:ID and xml:base + */ public void testExample17CompleteExampleUsingRdfIDAndXmlbase() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(1, model.size(), "Expected one RDF statement"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a Complex example using RDF list properties + * @throws Exception + */ @Test public void testExample18ComplexExampleUsingRdfListProperties() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ @@ -681,32 +500,18 @@ public void testExample18ComplexExampleUsingRdfListProperties() throws Exception """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected three RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected three RDF statements"); } + /** + * Test a Complete example using rdf:li + * @throws Exception + */ @Test - public void testExample19ComplexExampleUsingRdfliProperties() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + public void testExample19CompleteExampleUsingRdfliProperties() throws Exception { + String rdfXml = """ @@ -720,32 +525,18 @@ public void testExample19ComplexExampleUsingRdfliProperties() throws Exception { """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected three RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected three RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a Complete example of a RDF collection + * @throws Exception + */ @Test public void testExample20CompleteExampleOfARdfCollectionOfNodes() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(4, model.size(), "Expected three RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(7, model.size(), "Expected three RDF statements"); } + /** + * Test a Complete example of rdf:ID reifying a property element + * @throws Exception + */ @Test public void testExample21CompleteExampleOfRdfID() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(1, model.size(), "Expected one RDF statement"); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected three RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } } \ No newline at end of file From 786eda20d79340bd7debceed4280f6adb68c9296 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 18 Jul 2025 15:32:03 +0200 Subject: [PATCH 10/64] refactor RdfXmlParser --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 434 +++++++++--------- 1 file changed, 224 insertions(+), 210 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index 0ac1cfa29..d2f9d65d1 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -5,48 +5,52 @@ import fr.inria.corese.core.next.api.base.parser.RDFFormats; import fr.inria.corese.core.next.api.base.parser.RDFParser; import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; -import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.xml.sax.*; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; + import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.InputStream; import java.io.Reader; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.ArrayList; import java.util.Optional; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; - -public class RdfXmlParser extends DefaultHandler implements RDFParser { - - private final Model model; +import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RdfXmlUtils.*; + +/** + * SAX-based RDF/XML parser using a shared parsing context ({@link RdfXmlContext}). + * + *

This parser processes RDF/XML documents using the SAX streaming API. + * It tracks RDF constructs (resources, properties, literals, containers, collections) + * using an explicit stack-based context, and supports features like xml:lang, + * rdf:datatype, rdf:parseType, and property attributes.

+ * + *

The parser adds RDF statements to the provided {@link Model} using + * the supplied {@link ValueFactory}. This parser supports nested nodes, + * blank nodes, typed nodes, and RDF collections.

+ */ +public class RdfXmlParser extends DefaultHandler implements RDFParser { + + /** RDF/XML format identifier for this parser. */ private final RDFFormat format = RDFFormats.RDF_XML; - private final ValueFactory factory; + /** Buffer for accumulating character data between start and end tags. */ private StringBuilder characters = new StringBuilder(); - private String baseURI; - - private Statement statement; - - private final Deque subjectStack = new ArrayDeque<>(); - private final Deque predicateStack = new ArrayDeque<>(); - private final Deque langStack = new ArrayDeque<>(); - private final Deque datatypeStack = new ArrayDeque<>(); - - private boolean inContainer = false; - private int liIndex = 1; + /** Shared state across SAX callbacks. */ + private RdfXmlContext ctx; + /** + * Creates a new parser with a target RDF model and factory. + * + * @param model the RDF model to populate + * @param factory the RDF value factory for term creation + */ public RdfXmlParser(Model model, ValueFactory factory) { - this.model = model; - this.factory = factory; - } - - // used for test class and can be removed - public RdfXmlParser() { - this(new CoreseModel(), new CoreseAdaptedValueFactory()); + this.ctx = new RdfXmlContext(model, factory); } @Override @@ -61,7 +65,7 @@ public void parse(InputStream in) { @Override public void parse(InputStream in, String baseURI) { - this.baseURI = baseURI; + ctx.baseURI = baseURI; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); @@ -79,7 +83,7 @@ public void parse(Reader reader) { @Override public void parse(Reader reader, String baseURI) { - this.baseURI = baseURI; + ctx.baseURI = baseURI; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); @@ -91,257 +95,267 @@ public void parse(Reader reader, String baseURI) { } } + @Override + public void characters(char[] ch, int start, int length) { + characters.append(ch, start, length); + } + + /** + * Handles opening of an XML element. + * Identifies node elements, container constructs, properties, + * and special parseType attributes, updating the parsing context accordingly. + */ @Override public void startElement(String uri, String localName, String qName, Attributes attrs) { - // Ignore rdf:RDF root element - if (isRdfRDF(uri, localName)) return; + // Skip the top-level rdf:RDF wrapper element + if (RdfXmlUtils.isRdfRDF(uri, localName)) return; + + // Reset character buffer characters.setLength(0); - // Handle datatype - String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); - if (datatype != null) { - datatypeStack.push(datatype); + // Handle xml:base (change base URI dynamically) + String xmlBase = attrs.getValue("xml:base"); + if (xmlBase != null) { + ctx.baseURI = xmlBase; } - // Handle xml:lang String xmlLang = attrs.getValue("xml:lang"); if (xmlLang != null) { - langStack.push(xmlLang); + ctx.langStack.push(xmlLang); } - // Handle xml:base - String xmlBase = attrs.getValue("xml:base"); - if (xmlBase != null) { - baseURI = xmlBase; + // Handle rdf:datatype (applies to property literal values) + String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); + if (datatype != null) { + ctx.datatypeStack.push(datatype); } - // Handle RDF containers - if (isContainer(localName, uri)) { - Resource subject = extractSubject(attrs); - subjectStack.push(subject); - inContainer = true; - liIndex = 1; + // --- RDF Container Element --- + if (RdfXmlUtils.isContainer(localName, uri)) { + Resource subject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + ctx.subjectStack.push(subject); + ctx.inContainer = true; + ctx.liIndex = 1; + + IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.model.add(ctx.factory.createStatement(subject, RDF.type.getIRI(), typeIRI)); return; } - // Handle container items: rdf:li → rdf:_n - if (inContainer && RDF.type.getNamespace().equals(uri)) { + // --- Container Items (rdf:li, rdf:_n) --- + if (ctx.inContainer && RDF.type.getNamespace().equals(uri)) { String pred = null; if ("li".equals(localName)) { - pred = RDF.type.getNamespace() + "_" + liIndex++; + pred = RDF.type.getNamespace() + "_" + ctx.liIndex++; } else if (localName.matches("_\\d+")) { pred = RDF.type.getNamespace() + localName; } if (pred != null) { - IRI predicate = factory.createIRI(pred); + IRI predicate = ctx.factory.createIRI(pred); String resource = attrs.getValue("rdf:resource"); if (resource != null) { - model.add(factory.createStatement( - subjectStack.peek(), + ctx.model.add(ctx.factory.createStatement( + ctx.subjectStack.peek(), predicate, - factory.createIRI(resolveAgainstBase(resource)) + ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) )); } return; } } - // Handle (typed or untyped) - if (isDescription(localName, uri) || isNodeElement(attrs)) { - Resource newSubject = extractSubject(attrs); - // If this or typed element is the object of a property - if (!predicateStack.isEmpty() && !subjectStack.isEmpty()) { - Resource parent = subjectStack.peek(); - IRI predicate = predicateStack.pop(); // consume the predicate - model.add(factory.createStatement(parent, predicate, newSubject)); + // --- parseType="Collection" --- + String parseType = attrs.getValue(RDF.type.getNamespace(), "parseType"); + if ("Collection".equals(parseType)) { + IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.predicateStack.push(predicate); + ctx.collectionSubject = ctx.subjectStack.peek(); + ctx.collectionPredicate = predicate; + ctx.collectionBuilder = new ArrayList<>(); + ctx.inCollection = true; + return; + } + + // --- Inside Collection: Collect rdf:Description Items --- + if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { + Resource item = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + ctx.collectionBuilder.add(item); + ctx.suppressSubject = true; + return; + } + + // --- Node Element: rdf:Description or typed node --- + boolean isNode = RdfXmlUtils.isDescription(localName, uri) + || (ctx.subjectStack.isEmpty() && RdfXmlUtils.isNodeElement(attrs)); + + if (isNode) { + Resource newSubject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + + // If current node is object of a property + if (!ctx.predicateStack.isEmpty() && !ctx.subjectStack.isEmpty()) { + Resource parent = ctx.subjectStack.peek(); + IRI predicate = ctx.predicateStack.pop(); + ctx.model.add(ctx.factory.createStatement(parent, predicate, newSubject)); } - subjectStack.push(newSubject); + ctx.subjectStack.push(newSubject); - // If it's a typed node (e.g., ), add rdf:type triple - if (!isDescription(localName, uri)) { - IRI typeIRI = factory.createIRI(expandQName(uri, localName, qName)); - model.add(factory.createStatement( - newSubject, - factory.createIRI(RDF.type.getIRI().stringValue()), - typeIRI - )); + // Emit rdf:type for typed node elements + if (!RdfXmlUtils.isDescription(localName, uri)) { + IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.model.add(ctx.factory.createStatement(newSubject, RDF.type.getIRI(), typeIRI)); } - // Handle property attributes + // Handle non-RDF attributes as property triples for (int i = 0; i < attrs.getLength(); i++) { String attrURI = attrs.getURI(i); String attrLocal = attrs.getLocalName(i); String attrQName = attrs.getQName(i); String value = attrs.getValue(i); - if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; + if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); - model.add(factory.createStatement(newSubject, pred, factory.createLiteral(value))); + IRI pred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); + ctx.model.add(ctx.factory.createStatement(newSubject, pred, ctx.factory.createLiteral(value))); } return; } - // Handle regular property elements (e.g., ) - IRI predicate = factory.createIRI(expandQName(uri, localName, qName)); - predicateStack.push(predicate); + // --- Property Element (e.g., ) --- + IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.predicateStack.push(predicate); - // Handle rdf:resource object (IRI) - String resource = attrs.getValue("rdf:resource"); - if (resource != null) { - model.add(factory.createStatement( - subjectStack.peek(), - predicate, - factory.createIRI(resolveAgainstBase(resource)) - )); - } - } + // --- Property Resource/Object reference --- + String resource = attrs.getValue(RDF.type.getNamespace(), "resource"); + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - @Override - public void endElement(String uri, String localName, String qName) { - String text = characters.toString().trim(); - characters.setLength(0); + if (resource != null || nodeID != null) { + Resource object = resource != null + ? ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) + : ctx.factory.createBNode("_:" + nodeID); - // Always pop lang/datatype if pushed - if (!langStack.isEmpty()) langStack.pop(); - String datatypeUri = !datatypeStack.isEmpty() ? datatypeStack.pop() : null; - - // Property literal - if (!predicateStack.isEmpty() && !text.isEmpty()) { - IRI predicate = predicateStack.pop(); - Resource subject = subjectStack.peek(); - - Value literal; - - if (datatypeUri != null && !datatypeUri.isBlank()) { - Optional known = fromURI(datatypeUri); - - if (known.isPresent()) { - // normalized datatype - IRI normalizedDatatype = known.get().getIRI(); - literal = factory.createLiteral(text, normalizedDatatype); - } else { - // fallback datatype - System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); - IRI fallbackDatatype = factory.createIRI(datatypeUri); - literal = factory.createLiteral(text, fallbackDatatype); - } - } else { - // no datatype – use language tag if any - String lang = langStack.isEmpty() ? null : langStack.peek(); - literal = (lang != null && !lang.equals("__NO_LANG__")) - ? factory.createLiteral(text, lang) - : factory.createLiteral(text); - } + ctx.model.add(ctx.factory.createStatement( + ctx.subjectStack.peek(), + predicate, + object + )); - model.add(factory.createStatement(subject, predicate, literal)); + ctx.predicateStack.pop(); // already used return; } - // Clean up stray predicates - if (!predicateStack.isEmpty()) { - predicateStack.pop(); - } - - // Handle containers - if (isContainer(localName, uri)) { - if (!subjectStack.isEmpty()) subjectStack.pop(); - inContainer = false; - liIndex = 1; + // --- parseType="Resource": create blank node --- + if ("Resource".equals(parseType)) { + Resource parent = ctx.subjectStack.peek(); + Resource bnode = ctx.factory.createBNode(); + ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); + ctx.subjectStack.push(bnode); return; } - // Handle end of rdf:Description - if (isDescription(localName, uri)) { - if (!subjectStack.isEmpty()) subjectStack.pop(); + // --- Inline property attributes: Create blank node with attributes --- + boolean hasNonSyntaxAttributes = false; + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + if (!RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) { + hasNonSyntaxAttributes = true; + break; + } } - } - - - @Override - public void characters(char[] ch, int start, int length) { - characters.append(ch, start, length); - } - - private Resource extractSubject(Attributes attrs) { - String about = attrs.getValue(RDF.type.getNamespace(), "about"); - if (about != null) return factory.createIRI(resolveAgainstBase(about)); - - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - if (nodeID != null) return factory.createBNode("_:" + nodeID); - String id = attrs.getValue(RDF.type.getNamespace(), "ID"); - if (id != null) return factory.createIRI(resolveAgainstBase("#" + id)); + if (hasNonSyntaxAttributes) { + Resource parent = ctx.subjectStack.peek(); + Resource bnode = ctx.factory.createBNode(); + ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); - // Default to blank node - return factory.createBNode(); - } + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + String value = attrs.getValue(i); - private String resolveAgainstBase(String iri) { - if (iri == null) return null; - if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { - // Absolute IRI or no base, return as-is - return iri; - } + if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - try { - return new java.net.URI(baseURI).resolve(iri).toString(); - } catch (Exception e) { - throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + IRI attrPred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); + ctx.model.add(ctx.factory.createStatement(bnode, attrPred, ctx.factory.createLiteral(value))); + } + ctx.predicateStack.pop(); // already emitted } } - private boolean isRdfRDF(String uri, String localName) { - return RDF.type.equals(uri) && "RDF".equals(localName); - } - - private boolean isDescription(String localName, String uri) { - return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); - } - - private boolean isContainer(String localName, String uri) { - return RDF.type.getNamespace().equals(uri) && - ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); - } - - private String expandQName(String uri, String localName, String qName) { - return (uri != null && !uri.isEmpty()) ? uri + localName : qName; - } + /** + * Handles the end of an XML element, emitting a literal or cleaning up context stacks. + */ + @Override + public void endElement(String uri, String localName, String qName) { + String text = characters.toString().trim(); + characters.setLength(0); - private boolean isSyntaxAttribute(String uri, String localName, String qName) { - if (uri != null && RDF.type.getNamespace().equals(uri)) { - return switch (localName) { - case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; - default -> false; - }; + if (!ctx.predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = ctx.predicateStack.pop(); + Resource subject = ctx.subjectStack.peek(); + String datatypeUri = ctx.datatypeStack.isEmpty() ? null : ctx.datatypeStack.pop(); + emitLiteral(subject, predicate, text, datatypeUri); + return; } - return qName.startsWith("xml:"); - } - - private void emitTripleString(String subject, String predicate, String object) { - System.out.printf("Triple: <%s> <%s> %s%n", subject, predicate, object); + cleanEndElement(uri, localName); } - private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { - this.statement = factory.createStatement(subj, pred, obj, context); - } - - private boolean isNodeElement(Attributes attrs) { - return attrs.getValue(RDF.type.getNamespace(), "about") != null || - attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || - attrs.getValue(RDF.type.getNamespace(), "ID") != null; + /** + * Emits a literal statement (optionally typed or language-tagged). + */ + private void emitLiteral(Resource subject, IRI predicate, String text, String datatypeUri) { + Value literal; + if (datatypeUri != null && !datatypeUri.isEmpty()) { + Optional known = RdfXmlUtils.resolveDatatype(datatypeUri); + IRI dtype = known.map(XSD::getIRI).orElseGet(() -> { + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + return ctx.factory.createIRI(datatypeUri); + }); + literal = ctx.factory.createLiteral(text, dtype); + } else { + String lang = ctx.langStack.isEmpty() ? null : ctx.langStack.peek(); + literal = (lang != null && !lang.equals("__NO_LANG__")) + ? ctx.factory.createLiteral(text, lang) + : ctx.factory.createLiteral(text); + } + ctx.model.add(ctx.factory.createStatement(subject, predicate, literal)); } - public Optional fromURI(String uri) { - for (XSD xsd : XSD.values()) { - if (xsd.getIRI().stringValue().equals(uri)) { - return Optional.of(xsd); - } + /** + * Cleans up stacks and handles closing of collections, containers, and resource blocks. + */ + private void cleanEndElement(String uri, String localName) { + if (!ctx.langStack.isEmpty()) ctx.langStack.pop(); + if (!ctx.predicateStack.isEmpty()) ctx.predicateStack.pop(); + if (RdfXmlUtils.isContainer(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + ctx.inContainer = false; + ctx.liIndex = 1; + return; + } + if (ctx.inCollection && localName.equals(ctx.collectionPredicate.getLocalName())) { + Resource listHead = createRdfCollection(ctx.collectionBuilder, ctx.model, ctx.factory); + ctx.model.add(ctx.factory.createStatement(ctx.collectionSubject, ctx.collectionPredicate, listHead)); + ctx.inCollection = false; + ctx.collectionBuilder.clear(); + return; + } + if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + return; + } + if (RdfXmlUtils.isDescription(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + } + if (!ctx.subjectStack.isEmpty() && !ctx.predicateStack.isEmpty()) { + ctx.predicateStack.pop(); + ctx.subjectStack.pop(); } - return Optional.empty(); } -} +} \ No newline at end of file From 7a34f4c7e0306e088797f1586b17bd80825a0e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Tue, 22 Jul 2025 14:12:27 +0200 Subject: [PATCH 11/64] Add RDFXML support to ParserFactory and remove RdfXmlParserFactory --- .../next/impl/io/parser/ParserFactory.java | 5 ++++ .../io/parser/rdfxml/RdfXmlParserFactory.java | 23 ------------------- 2 files changed, 5 insertions(+), 23 deletions(-) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 8b43d930a..70db3899a 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.ANTLRNQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.ANTLRNTriplesParser; +import fr.inria.corese.core.next.impl.io.parser.rdfxml.RdfXmlParser; import fr.inria.corese.core.next.impl.io.parser.turtle.ANTLRTurtleParser; /** @@ -44,6 +45,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNTriplesParser(model, factory, config); } else if (format == RDFFormat.NQUADS) { return new ANTLRNQuadsParser(model, factory, config); + } else if (format == RDFFormat.RDFXML) { + return new RdfXmlParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -65,6 +68,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNTriplesParser(model, factory); } else if (format == RDFFormat.NQUADS) { return new ANTLRNQuadsParser(model, factory); + } else if (format == RDFFormat.RDFXML) { + return new RdfXmlParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java deleted file mode 100644 index 5a3f59b46..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java +++ /dev/null @@ -1,23 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfxml; - -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.api.base.parser.RDFFormat; -import fr.inria.corese.core.next.api.base.parser.RDFFormats; -import fr.inria.corese.core.next.api.base.parser.RDFParser; -import fr.inria.corese.core.next.api.base.parser.RDFParserFactory; - -public class RdfXmlParserFactory implements RDFParserFactory { - - public RdfXmlParserFactory() { - super(); - } - - @Override - public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) { - if (!format.equals(RDFFormats.RDF_XML)) { - throw new IllegalArgumentException("Unsupported format : " + format); - } - return new RdfXmlParser(model, factory); - } -} \ No newline at end of file From 1e8aad5c51845a9e772b27b7737ac705010d49b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Tue, 22 Jul 2025 14:13:24 +0200 Subject: [PATCH 12/64] Refactor RdfXmlParser to extend AbstractRDFParser and update test cases --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 71 ++++++++++++------- .../io/parser/rdfxml/RdfxmlParserTest.java | 11 +-- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index d2f9d65d1..ce3a6b523 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -1,9 +1,9 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.base.parser.RDFFormat; -import fr.inria.corese.core.next.api.base.parser.RDFFormats; -import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; @@ -32,10 +32,10 @@ * the supplied {@link ValueFactory}. This parser supports nested nodes, * blank nodes, typed nodes, and RDF collections.

*/ -public class RdfXmlParser extends DefaultHandler implements RDFParser { +public class RdfXmlParser extends AbstractRDFParser { /** RDF/XML format identifier for this parser. */ - private final RDFFormat format = RDFFormats.RDF_XML; + private final RDFFormat format = RDFFormat.RDFXML; /** Buffer for accumulating character data between start and end tags. */ private StringBuilder characters = new StringBuilder(); @@ -50,17 +50,24 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { * @param factory the RDF value factory for term creation */ public RdfXmlParser(Model model, ValueFactory factory) { - this.ctx = new RdfXmlContext(model, factory); + this(model, factory, null); } - @Override - public RDFFormat getRDFFormat() { - return format; + /** + * Creates a new parser with a target RDF model, factory, and configuration options. + * + * @param model the RDF model to populate + * @param factory the RDF value factory for term creation + * @param config optional configuration options for the parser + */ + public RdfXmlParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + this.ctx = new RdfXmlContext(getModel(), getValueFactory()); } @Override - public void parse(InputStream in) { - parse(in, null); + public RDFFormat getRDFFormat() { + return format; } @Override @@ -70,17 +77,12 @@ public void parse(InputStream in, String baseURI) { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); - saxParser.parse(in, this); + saxParser.parse(in, new RdfXmlSaxHandler()); } catch (Exception e) { throw new RuntimeException("Failed to parse RDF/XML input stream", e); } } - @Override - public void parse(Reader reader) { - parse(reader, null); - } - @Override public void parse(Reader reader, String baseURI) { ctx.baseURI = baseURI; @@ -89,14 +91,37 @@ public void parse(Reader reader, String baseURI) { factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); InputSource inputSource = new InputSource(reader); - saxParser.parse(inputSource, this); + saxParser.parse(inputSource, new RdfXmlSaxHandler()); } catch (Exception e) { throw new RuntimeException("Failed to parse RDF/XML input stream", e); } } - @Override - public void characters(char[] ch, int start, int length) { + /** + * Internal SAX handler that delegates to the parser's methods + */ + private class RdfXmlSaxHandler extends DefaultHandler { + + @Override + public void characters(char[] ch, int start, int length) { + RdfXmlParser.this.handleCharacters(ch, start, length); + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attrs) { + RdfXmlParser.this.handleStartElement(uri, localName, qName, attrs); + } + + @Override + public void endElement(String uri, String localName, String qName) { + RdfXmlParser.this.handleEndElement(uri, localName, qName); + } + } + + /** + * Handles character data between XML elements + */ + private void handleCharacters(char[] ch, int start, int length) { characters.append(ch, start, length); } @@ -105,8 +130,7 @@ public void characters(char[] ch, int start, int length) { * Identifies node elements, container constructs, properties, * and special parseType attributes, updating the parsing context accordingly. */ - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) { + private void handleStartElement(String uri, String localName, String qName, Attributes attrs) { // Skip the top-level rdf:RDF wrapper element if (RdfXmlUtils.isRdfRDF(uri, localName)) return; @@ -291,8 +315,7 @@ public void startElement(String uri, String localName, String qName, Attributes /** * Handles the end of an XML element, emitting a literal or cleaning up context stacks. */ - @Override - public void endElement(String uri, String localName, String qName) { + private void handleEndElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index e1bfa306b..37aeee0a1 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -8,8 +8,6 @@ import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; @@ -26,15 +24,10 @@ public class RdfxmlParserTest { private Model parseRdfXml(String rdfXml) throws Exception { Model model = new CoreseModel(); ValueFactory valueFactory = new CoreseAdaptedValueFactory(); - try (InputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8))) { - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - saxParser.parse(inputStream, handler); + RdfXmlParser parser = new RdfXmlParser(model, valueFactory); + parser.parse(inputStream); } - return model; } From 7e29fb9ffd2e918da14e9a2aa73dbf83831e73c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 23 Jul 2025 15:23:11 +0200 Subject: [PATCH 13/64] Remove outdated documentation files and update installation instructions - Deleted the "Getting Started with Corese-Core" documentation as it is no longer needed. - Removed the "Configuring and Connecting to Different Storage Systems in Corese" documentation to streamline content. - Deleted the "User Guide" file to consolidate documentation. - Updated the installation instructions in "install.md" to reflect changes in dependency management and removed references to Gradle and RDF4J storage. --- INSTALL.md | 82 -- .../advanced/SPARQL_Service_Extension.md | 545 ----------- .../federated_and_distributed_queries.md | 92 -- .../getting_started_with_corese-core.md | 885 ------------------ docs/source/install.md | 53 +- ..._to_different_storage_systems_in_corese.md | 230 ----- docs/source/user_guide.rst | 31 - 7 files changed, 12 insertions(+), 1906 deletions(-) delete mode 100644 INSTALL.md delete mode 100644 docs/source/advanced/SPARQL_Service_Extension.md delete mode 100644 docs/source/federation/federated_and_distributed_queries.md delete mode 100644 docs/source/getting_started/getting_started_with_corese-core.md delete mode 100644 docs/source/storage/configuring_and_connecting_to_different_storage_systems_in_corese.md delete mode 100644 docs/source/user_guide.rst diff --git a/INSTALL.md b/INSTALL.md deleted file mode 100644 index d3463d8dd..000000000 --- a/INSTALL.md +++ /dev/null @@ -1,82 +0,0 @@ -# Installation Guide for Corese-Core - -This document provides step-by-step instructions to install and build **Corese-Core** from source. - -## Prerequisites - -Before installing Corese-Core, make sure you have the following installed: - -- **Java 21** or higher - → Check with: `java -version` -- **Git** (to clone the repository) -- **Gradle 8+** (optional, recommended) - → If not installed, the Gradle Wrapper (`./gradlew`) will be used. -- **Internet access** (to fetch dependencies) - -## Clone the Repository - -```bash -git clone https://github.com/corese-stack/corese-core.git -cd corese-core -``` - -## Build Corese-Core - -You can build the project using the Gradle wrapper: - -```bash -./gradlew build -``` - -This will: - -- Compile all modules -- Run tests -- Generate the main JAR file (in `build/libs/`) -- Publish the library to the local Maven repository if needed - -If you're only interested in building without tests: - -```bash -./gradlew assemble -``` - -## Run Tests - -```bash -./gradlew test -``` - -You can view the test reports in: - -```text -corese-core/build/reports/tests/test/index.html -``` - -## Publish to Local Maven (optional) - -To publish Corese-Core locally for use in other modules (like `corese-gui`, `corese-server`, etc.): - -```bash -./gradlew publishToMavenLocal -``` - -The artifact will be installed under: - -```text -~/.m2/repository/fr/inria/corese/corese-core/ -``` - -## Clean Build - -```bash -./gradlew clean -``` - ---- - -## Troubleshooting - -- *Gradle not found?* → Use `./gradlew` instead of `gradle` -- *Java version too low?* → Corese requires Java 21+. You can install it via SDKMAN, Homebrew, or your package manager. -- *Tests failing due to RDF line endings or hashes?* → Make sure to normalize line endings (`\n`) and verify data hashes if you're running tests on Windows. diff --git a/docs/source/advanced/SPARQL_Service_Extension.md b/docs/source/advanced/SPARQL_Service_Extension.md deleted file mode 100644 index a4e7f3f7c..000000000 --- a/docs/source/advanced/SPARQL_Service_Extension.md +++ /dev/null @@ -1,545 +0,0 @@ -# SPARQL Service Extension - -## Abstract - -This document presents extensions of SPARQL service implemented in Corese. - -## Introduction - -In a SERVICE clause, Corese allows for the addition of URL parameters to the service URL. These parameters are used to tune the behaviour of the service. For example, we can specify the format of the query result returned by the service. - -For example, the following query will make Corese write a trace of the execution of the query in its logs: -``` -http://corese.inria.fr/sparql?mode=debug&query=select * {?s ?p ?o} -``` - - - -### Prefix used in this document. - -```turtle -@prefix st: . -@prefix stm: . -@prefix d3: . -``` - - -## Server URL Parameter - -### Standard parameters -In this section, we consider endpoint URL parameters decoded by a SPARQL endpoint receiving an HTTP request. - -Standard endpoint URL parameters are available. - -``` -query -default-graph-uri -named-graph-uri -``` - - -Currently corese does not implement the Update parameters below. Users can use query parameters listed above instead. - -``` -update -using-graph-uri -using-named-graph-uri -``` - -### Shorthand format parameters -`format` specifies query result format when HTTP Accept header cannot be set. For example, `format=json` is equivalent to: `header "Accept: application/sparql-results+json"`. - -For SELECT and ASK queries: -``` -format = json | xml -``` -For SELECT, ASK, DESCRIBE and CONSTRUCT queries: -``` -format = jsonld | rdfxml | turtle -``` - -### Trace parameter -It is possible to specify several kinds of traces of execution. - -``` -mode = debug | trace -``` - -For example, for the query `SELECT * { ?s ?p ?o } LIMIT 1` the normal trace is: -``` -[datetime] INFO webservice.SPARQLRestAPI.getTriplesXMLForGet:311 [] - getTriplesXMLForGet -[datetime] INFO webservice.SPARQLResult.getResultFormat:90 [] - Endpoint URL: http://localhost:8080/sparql -[datetime] INFO webservice.SPARQLResult.getResultFormat:96 [] - Query: SELECT * { ?s ?p ?o } LIMIT 1 -[datetime] INFO webservice.EventManager.log:72 [] - Workflow Context: -st:remoteHost : "[0:0:0:0:0:0:0:1]" -st:service : "http://ns.inria.fr/sparql-template/default" -request : "[org.eclipse.jetty.server.Request:Request(GET http://localhost:8080/sparql?query=SELECT%20*%20%7B%20?s%20?p%20?o%20%7D%20LIMIT%201)@429f0053]"^^dt:pointer -url : -user query: true -level: PRIVATE - -[datetime] INFO webservice.EventManager.log:73 [] - {st:count="[Map: size=2]"^^dt:map, st:date="[Map: size=2]"^^dt:map, st:host="[Map: size=2]"^^dt:map, st:hostlite="[Map: size=1]"^^dt:map} -[datetime] INFO webservice.EventManager.log:74 [] - {st:sparql=2, "http://ns.inria.fr/sparql-template/default"=17} -[datetime] INFO webservice.EventManager.log:76 [] - {"127.0.0.1"=11} -``` - -The `debug` parameter change the trace in the following: -``` -[datetime] INFO webservice.SPARQLRestAPI.getTriplesXMLForGet:311 [] - getTriplesXMLForGet -[datetime] INFO webservice.SPARQLResult.getResultFormat:90 [] - Endpoint URL: http://localhost:8080/sparql -[datetime] INFO webservice.SPARQLResult.getResultFormat:96 [] - Query: SELECT * { ?s ?p ?o } LIMIT 1 -[datetime] INFO webservice.EventManager.log:72 [] - Workflow Context: -debug : true -st:remoteHost : "[0:0:0:0:0:0:0:1]" -st:service : "http://ns.inria.fr/sparql-template/default" -mode : "("debug" )"^^dt:list -request : "[org.eclipse.jetty.server.Request:Request(GET http://localhost:8080/sparql?query=SELECT%20*%20%7B%20?s%20?p%20?o%20%7D%20LIMIT%201&mode=debug)@5c192889]"^^dt:pointer -url : -user query: true -level: PRIVATE - -[datetime] INFO webservice.EventManager.log:73 [] - {st:count="[Map: size=2]"^^dt:map, st:date="[Map: size=2]"^^dt:map, st:host="[Map: size=2]"^^dt:map, st:hostlite="[Map: size=1]"^^dt:map} -[datetime] INFO webservice.EventManager.log:74 [] - {st:sparql=2, "http://ns.inria.fr/sparql-template/default"=18} -[datetime] INFO webservice.EventManager.log:76 [] - {"127.0.0.1"=11} -select [NODE {?s }, NODE {?p }, NODE {?o }] -QUERY { -AND { - EDGE {?s ?p ?o} } } -[datetime] WARN tool.Message.log:64 [] - Eval: 00 AND { -EDGE {?s ?p ?o} } -[datetime] WARN tool.Message.log:64 [] - Loop: 3 1 -[datetime] INFO webservice.SPARQLResult.getFormat:411 [] - transform: null -``` - -The `trace` parameter change the trace in the following: -``` -[datetime] INFO webservice.SPARQLRestAPI.getTriplesXMLForGet:311 [] - getTriplesXMLForGet -[datetime] INFO webservice.SPARQLResult.getResultFormat:90 [] - Endpoint URL: http://localhost:8080/sparql -[datetime] INFO webservice.SPARQLResult.getResultFormat:96 [] - Query: SELECT * { ?s ?p ?o } LIMIT 1 -Endpoint HTTP Request -header: Accept: */* -header: User-Agent: Wget/1.21.3 -header: Connection: keep-alive -header: Host: localhost:8080 -header: Accept-Encoding: identity -param: query=SELECT * { ?s ?p ?o } LIMIT 1 -param: mode=trace -[datetime] INFO webservice.EventManager.log:72 [] - Workflow Context: -st:remoteHost : "[0:0:0:0:0:0:0:1]" -st:service : "http://ns.inria.fr/sparql-template/default" -mode : "("trace" )"^^dt:list -request : "[org.eclipse.jetty.server.Request:Request(GET http://localhost:8080/sparql?query=SELECT%20*%20%7B%20?s%20?p%20?o%20%7D%20LIMIT%201&mode=trace)@11b799cb]"^^dt:pointer -trace : true -url : -user query: true -level: PRIVATE - -[datetime] INFO webservice.EventManager.log:73 [] - {st:count="[Map: size=2]"^^dt:map, st:date="[Map: size=2]"^^dt:map, st:host="[Map: size=2]"^^dt:map, st:hostlite="[Map: size=1]"^^dt:map} -[datetime] INFO webservice.EventManager.log:74 [] - {st:sparql=2, "http://ns.inria.fr/sparql-template/default"=19} -[datetime] INFO webservice.EventManager.log:76 [] - {"127.0.0.1"=11} -SPARQL endpoint -select * -where { - ?s ?p ?o . -} -limit 1 -01 ?s = ; ?p = rdf:type; ?o = owl:Class; - -service result: - - - - - - - - - -http://linkedgeodata.org/ontology/RailwayConstruction -http://www.w3.org/1999/02/22-rdf-syntax-ns#type -http://www.w3.org/2002/07/owl#Class - - - -``` - -## Client URL Parameter - -In this section we consider service URL parameters decoded by SPARQL service interpreter when calling a service. - -``` -service { BGP } -``` -Such parameters are used to modify the way the Corese server will interact with the remote service. For example, we can specify the format of the query result returned by the service. - -### Standard parameters -Standard dataset uri parameters are available. Hence we can specify a dataset for a service. - -``` -default-graph-uri = uri -named-graph-uri = uri -``` - -### HTTP method -`method` specifies HTTP GET or POST method for calling the service. -``` -method = get | post -``` - - -`header` cna be used to specify any HTTP header parameter. -``` -header=name:value -``` -For example, to specify the HTTP Accept header: -```sparql -SELECT * { - ?s ?p ?o . - SERVICE { - ?o a ?c - } -} LIMIT 1 -``` - -This previous query is equivalent to the usage of `format=json`. `format` can be used to specify the HTTP Accept header. -Specify the format of the service query result returned by the endpoint using content negotiation. -``` -format = xml | json -``` - -### Trace mode -`mode` in client URL parameter is equivalent to `mode` in server URL parameter. It specifies the trace mode of the service. It add to the trace trace intermediate results of service, and shows the string result returned by service. -``` -mode = debug | trace -``` - - -The `trap` mode "traps" syntax error when parsing service query results and in case of an error, return a subset of results if possible. -``` -mode = trap -``` - -### Bindings - -#### Bindings values transmission -`binding` specifies the syntax used for variable bindings sent with the service. Variable bindings are the results of intermediate statement evaluation that can be passed as argument of the service. - -``` -binding = filter | values -``` - -`binding=filter` generates bindings with the following syntax: -``` -filter (?x = x1 && ?y = y1) -``` - -`binding=values` generates bindings with the following syntax: -``` -values (?x ?y) { (x1 y1) } -``` - -For example, with the following data in the local endpoint: - -```turtle -@prefix : . - -ex:John :name "John" . -``` -if the following query is executed: - -```sparql -PREFIX : -SELECT ?x ?age { - ?x :name ?name . - SERVICE { - ?x :name ?name ; - :age ?age . - } -} -``` - -then the query sent to the remote endpoint will be: - -```sparql -PREFIX : -SELECT * { - VALUES ?name { "John" } - ?x :name ?name ; - :age ?age . -} -``` - -#### Bindings variable selection -`focus` and `skip` specify variables that must or must not be passed as variable bindings to the service. -``` -focus=x -skip=y -``` - -#### Bindings in-scope -In order to have their bindings passed into a service clause, variables must be [in-scope](https://www.w3.org/TR/sparql11-query/#variableScope) in the service statement. When a variable is not in-scope, it is possible to make it in-scope with a values expression in the service statement. -``` -values var { undef } -``` - -#### Binding slicing -Specify the size of the slice of intermediate results variable bindings sent with the service. Default is 20 sets of variable bindings (i.e. from 20 intermediate results). - -``` -slice = integer -``` - - -Specify a limit for the number of results returned by the service. - -``` -limit = integer -``` - - -Specify a timeout in millisecond for the service. - -``` -timeout = 123 -``` - - -### Exotic Extension - -Any parameter value can be a LDScript global variable. The SPARQL interpreter evaluates the variable at runtime and replaces it by its value. - -``` -param={?paramValue} -``` - - -Remote server return an RDF document instead of SPARQL query results. Parse the RDF document, create an RDF graph, evaluate service BGP locally on the RDF graph. - -``` -mode=construct -``` - - -Remote server return a document (e.g. JSON) instead of SPARQL query results. Parse the document using a LDScript function that returns an RDF graph. Evaluate service BGP locally on the graph returned by the function. - -``` -wrapper=functionNameURI -``` - - -## Service Log - -Obtain additional information about query execution and query results such as explanation, trace, etc. - -### Log - -Detailed log of federated query execution returned as Linked Result RDF/Turtle document, with source selection, rewritten query, intermediate query results. - -``` -mode=log -``` - - -Query string returned as Linked Result. - -``` -mode=logquery -``` - - -### Explain - -Explain why federated query fail. Save intermediate query and results as Linked Result documents. Return one JSON object that contains the list of URLs of these Linked Results. This mode is processed by corese GUI. - -``` -mode=why -``` - - -Show where query fail: display last executed statement. - -``` -mode=explain -``` - - -### Message - -Return a JSON object message as Linked Result. JSON message contain the Context, the date, execution time. It contains also endpoint exceptions and service that fail in case of federated query. Message is displayed by corese GUI. It is possible to obtain a message systematically by specifying the default mode as such (see below). - -``` -mode=message -``` - - -## Service Extension - -### Endpoint URL Default Parameter - -Define default parameter values for SPARQL endpoint URL in urlprofile.ttl. Mode *, if any, is applied to every service. Parameter "document" is an URL that is added in Query Results "link" tag. - -``` -[] st:mode "*" ; -st:param ( -("mode" "message") ("document" ) -) -. -``` - - -Define parameter values associated to specific mode. - -``` -[] st:mode "map" ; -st:param (("mode" "link") ("transform" stm:mapper)) -. -``` - - -Define service URL with predefined parameter values. - -``` - -st:param (("mode" "map")) -. -``` - - -### Federated SPARQL endpoint - -A federated SPARQL endpoint is an endpoint who dispatches a SPARQL query to several endpoints member of a federation. It processes and returns the union of the query results, processing aggregates, if any, on the union of the results. It is equivalent to a query with a union of service clauses on every endpoint of the federation. -A federation is an URL associated to a list of SPARQL endpoints. It is defined using a Turtle format configuration file, as shown below. - -``` - a st:Federation ; -st:definition ( - - - -) -``` - - -The idea is that a federation is hidden behind a single SPARQL endpoint URL. -The provenance parameter returns the URL of the target endpoint for each result. - -``` -http://corese.inria.fr/d2kab/sparql?mode=provenance& -query=select * where { ?s rdfs:label ?l filter regex(?l, "bio") } limit 10 -``` - - -A variant of federated SPARQL endpoint splits and rewrites the SPARQL query with appropriate service clauses. The endpoint URL is defined with /federate instead of /sparql. - -``` -http://corese.inria.fr/d2kab/federate?query=select where {} -``` - - -### Federated endpoint explain mode - -Federated endpoint with mode explain generate Linked Result for source selection query and results, rewritten federated query. Linked Result also for intermediate service call and service results. -It works for federated engine but also for sparql engine with a standard query with services, sent to corese server with /sparql?mode=why. -The interpreter logs intermediate services and results and at the end, in case of mode=why, it generates Linked Results. - -Corese GUI display Linked Results in several query panels with their results. -Intermediate service call can be executed again in GUI. -GUI can save and load query results with Linked Result. Hence we can keep track of federated query results during the lifetime of the endpoint because documents are managed on server side in temporary files. - -``` -mode=why -``` - - -### Compiler Service - -Compile a federated query as 1) select source query, 2) federated query with service clauses. Return result of select query. Generate two link href documents for select and federated query. - -``` -mode=compile - - - -``` - - -### Evaluation Report Service - -A report can be generated for federated query. Report is stored in a document and an URL for this document is stored in the link href tag of the query result. - -``` -mode=log - - -``` - - -### Transformation Service - -SPARQL endpoint where the result of the SPARQL query is transformed using an STTL transformation specified using a transform parameter. -There may be several transformation parameters. - -Specific transformation URI for sparql query result format. - -``` -transform=st:xml | st:json | st:rdf | st:all -``` - - -#### Linked Result - -The result of a transformation may be stored in a document and an URL for this document is stored in the link href tag of the query result. - -``` -mode=link - - -``` - - -#### Transformation - -The stm:mapper transformation generates a map when query solution contains variables "location", "lat", and "lon". - -``` -transform=stm:mapper -``` - - -Transformation d3:chart for graphic chart - -``` -transform=d3:chart -``` - - -Transformation d3:hierarchy for class hierarchy, d3:graphic for graph, d3:all for both. - -``` -transform=d3:hierarchy -transform=d3:graphic -transform=d3:all -``` - - -### SHACL Service - -Execution of SHACL shapes and execution of a SPARQL query on the SHACL validation report graph. Parameter shacl-shape-url is the URL of a SHACL document that contains the shapes to be evaluated. - -``` -/sparql? -mode=shacl& -uri=shacl-shape-url& -query=select * where { ?report sh:conforms ?b } -``` - - -### Service with Before & After - -Exemple of service where queries are executed before and after the main query. Parameters uri are URL of SPARQL query documents. - -``` -mode=before&uri=url1&mode=after&uri=url2 -``` diff --git a/docs/source/federation/federated_and_distributed_queries.md b/docs/source/federation/federated_and_distributed_queries.md deleted file mode 100644 index 5e0b804f0..000000000 --- a/docs/source/federation/federated_and_distributed_queries.md +++ /dev/null @@ -1,92 +0,0 @@ -# Federated Queries and Federation - -Corese facilitates Federated Queries, enabling users to execute SPARQL queries seamlessly across multiple data sources or endpoints. This document guides you through utilizing Federated Queries and Federation in Corese and details the configuration necessary. - -1. [Federated Queries and Federation](#federated-queries-and-federation) - 1. [1. Federated Queries](#1-federated-queries) - 2. [2. Federations](#2-federations) - -## 1. Federated Queries - -**Basic syntax:** - -Use the `@federate` directive to specify different endpoints. Below is the basic syntax: - -```sparql -@federate … -select * where {?x ?p ?y} -``` - -**Retrieving Provenance Information:** - -To obtain additional details on the origin of the data, include the `@provenance` keyword: - -```sparql -@federate … -@provenance -select * where {?x ?p ?y} -``` - -**Configuration for Corese-Server:** - -In Corese-Server, it's necessary to explicitly specify the list of allowed endpoints. For more details, refer to [Restrict Access to External Endpoints](https://github.com/Wimmics/corese/blob/master/docs/getting%20started/Getting%20Started%20With%20Corese-server.md#4-restrict-access-to-external-endpoints). - -For non-public servers, the `-su` option can be used to allow all endpoints: - -```shell -java -jar corese-server.jar -su -``` - -This option executes the server in superuser mode, allowing connections to all endpoints. This setting is not recommended for public servers due to security concerns. - -## 2. Federations - -A Federation in Corese is a named set of endpoints, defined in a file to avoid the repetition of listing endpoints in each query. - -**Defining a Federation:** - -1. Create a `federation.ttl` file with the following content: - -```turtle -# Define a federation - a st:Federation ; - st:definition ( - - - ). -``` - -2. Next, create a `config.properties` file with the following line - -```properties -FEDERATION = /path/to/federation.ttl -``` - -3. Launch Corese using the `config.properties` file: - -```shell -java -jar corese-server.jar -init config.properties -``` - -```shell -java -jar corese-gui.jar -init config.properties -``` - -```shell -echo "" | java -jar corese-command.jar sparql -if turtle -q ./query.rq --init config.properties -``` - -> Note: `echo ""` and `-if` turtle are workaround methods as this command is not designed to function without input. - -4. Finally, execute a federated query using the federation: - -```sparql -@federation -select * where {?x ?p ?y} -``` - - diff --git a/docs/source/getting_started/getting_started_with_corese-core.md b/docs/source/getting_started/getting_started_with_corese-core.md deleted file mode 100644 index 1785df6d5..000000000 --- a/docs/source/getting_started/getting_started_with_corese-core.md +++ /dev/null @@ -1,885 +0,0 @@ -# Getting Started with Corese-Core - -This tutorial shows how to use the Corese-Core library through simple examples of its main features. -We assume basic knowledge of Java programming and the Semantic Web. - -- The first section describes how to create, load, and export a Graph. -- The second section shows how to query a graph using [SPARQL](https://www.w3.org/TR/sparql11-query/). -- The third section details how to validate a graph using the [Shapes Constraint Language (SHACL)](https://www.w3.org/TR/shacl/). -- The fourth section shows how to transform a graph using the extension language [SPARQL Template Transformation Language (STTL)](https://files.inria.fr/corese/doc/sttl.html). -- The fifth section details how to apply a set of rules on a graph using the [SPARQL Rule](https://files.inria.fr/corese/doc/rule.html) extension language. -- Finally, the sixth section describes how to define and use functions with the [LDScript](https://files.inria.fr/corese/doc/ldscript.html) extension language. - -## **1. Installation** - -Installation instructions are available on the [installation page](../install.md). - -## **2. Graph** - -This section describes how to create a graph manually, load a graph from a file, and serialize a graph to a file. - -### **2.1. Build a Graph Programmatically** - -The example below shows how to create the following RDF graph: - -```mermaid -graph LR; - classDef IRI fill:#FEAE65,stroke-width:0px - classDef DT fill:#FEFEFE,stroke-width:0px - - iri:EdithPiaf(["ex:EdithPiaf"]) - iri:Singer(["ex:Singer"]) - dt:Edith(["''Edith''"]) - dt:Piaf(["''Piaf''"]) - - class iri:EdithPiaf,iri:Singer IRI - class dt:Edith,dt:Piaf DT - - iri:EdithPiaf--rdf:type-->iri:Singer; - iri:EdithPiaf--ex:firstName-->dt:Edith; - iri:EdithPiaf--ex:lastName-->dt:Piaf; -``` - -This graph represents three RDF statements: - -- Edith Piaf is a singer. -- Edith Piaf's first name is Edith. -- Edith Piaf's last name is Piaf. - -```java -import fr.inria.corese.core.kgram.api.core.Node; -import fr.inria.corese.core.logic.RDF; - -public class Example { - public static void main(String[] args) { - // Define the namespace 'ex' - String ex = "http://example.org/"; - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Create and add IRIs to the Graph - Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); - Node singerIRI = graph.addResource(ex + "Singer"); - - // Create and add properties to the Graph - Node rdfTypeProperty = graph.addProperty(RDF.TYPE); - Node firstNameProperty = graph.addProperty(ex + "firstName"); - Node lastNameProperty = graph.addProperty(ex + "lastName"); - - // Create and add literals to the Graph - Node edithLiteral = graph.addLiteral("Edith"); - Node piafLiteral = graph.addLiteral("Piaf"); - - // Add statements to the graph - graph.addEdge(edithPiafIRI, rdfTypeProperty, singerIRI); - graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); - graph.addEdge(edithPiafIRI, lastNameProperty, piafLiteral); - } -} -``` - -### **2.2. Load a Graph from a File** - -This example shows how to load a graph from a file. - -```java -import fr.inria.corese.core.api.Loader; -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.load.LoadException; - -public class Example { - public static void main(String[] args) throws LoadException { - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Create loader and parse file - Load loader = Load.create(graph); - loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); - } -} -``` - -Corese Loader supports the following formats: - -- RDF/XML (`Loader.format.RDFXML_FORMAT`) -- Turtle (`Loader.format.TURTLE_FORMAT`) -- TriG (`Loader.format.TRIG_FORMAT`) -- JSON-LD (`Loader.format.JSONLD_FORMAT`) -- N-Triples (`Loader.format.NT_FORMAT`) -- N-Quads (`Loader.format.NQUADS_FORMAT`) -- RDFa (`Loader.format.RDFA_FORMAT`) - -### **2.3. Export a Graph to a File** - -This example shows how to serialize a graph into a file in Turtle format. - -```java -import java.io.FileWriter; -import java.io.IOException; - -import fr.inria.corese.core.kgram.api.core.Node; -import fr.inria.corese.core.print.ResultFormat; -import fr.inria.corese.core.sparql.api.ResultFormatDef; - -public class Example { - public static void main(String[] args) throws IOException { - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Add some triples to the graph - String ex = "http://example.org/"; - Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); - Node firstNameProperty = graph.addProperty(ex + "firstName"); - Node edithLiteral = graph.addLiteral("Edith"); - graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); - - // Create exporter - ResultFormat exporter = ResultFormat.create(graph, ResultFormatDef.format.TURTLE_FORMAT); - String result = exporter.toString(); - - // Write result to a file - try (FileWriter writer = new FileWriter("output_graph.ttl")) { - writer.write(result); - } - } -} -``` - -Corese can serialize graphs into the following formats: - -- RDF/XML (`ResultFormatDef.format.RDF_XML_FORMAT`) -- Turtle (`ResultFormatDef.format.TURTLE_FORMAT`) -- TriG (`ResultFormatDef.format.TRIG_FORMAT`) -- JSON-LD (`ResultFormatDef.format.JSONLD_FORMAT`) -- N-Triples (`ResultFormatDef.format.NTRIPLES_FORMAT`) -- N-Quads (`ResultFormatDef.format.NQUADS_FORMAT`) -- Canonical RDF SHA-256 (`ResultFormatDef.format.RDFC10_FORMAT`) -- Canonical RDF SHA-384 (`ResultFormatDef.format.RDFC10_SHA384_FORMAT`) - -## **3. SPARQL Queries** - -This section describes how to query a graph using [SPARQL](https://www.w3.org/TR/sparql11-query/) in Corese. - -### **3.1. Executing a SPARQL SELECT Query** - -This example shows how to execute a SPARQL `SELECT` query and retrieve results. - -```java -import fr.inria.corese.core.kgram.api.core.Node; -import fr.inria.corese.core.kgram.core.Mappings; -import fr.inria.corese.core.print.ResultFormat; -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException { - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Add some triples to the graph - String ex = "http://example.org/"; - Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); - Node firstNameProperty = graph.addProperty(ex + "firstName"); - Node edithLiteral = graph.addLiteral("Edith"); - graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); - - // Load and execute SPARQL query - QueryProcess exec = QueryProcess.create(graph); - Mappings map = exec.query("select * where { ?s ?p ?o }"); - - // Print results in Markdown format - System.out.println(ResultFormat.create(map, ResultFormat.format.CSV_FORMAT).toString()); - } -} -``` - -Other supported formats: - -```java -ResultFormat.format.XML_FORMAT -ResultFormat.format.JSON_FORMAT -ResultFormat.format.CSV_FORMAT -ResultFormat.format.TSV_FORMAT -ResultFormat.format.MARKDOWN_FORMAT -``` - -### 3.2. SPARQL ASK Query - -This example shows how to execute a SPARQL `ASK` query and print results. - -```java -import fr.inria.corese.core.kgram.api.core.Node; -import fr.inria.corese.core.kgram.core.Mappings; -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException { - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Add some triples to the graph - String ex = "http://example.org/"; - Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); - Node firstNameProperty = graph.addProperty(ex + "firstName"); - Node edithLiteral = graph.addLiteral("Edith"); - graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); - - // Load and execute SPARQL query - QueryProcess exec = QueryProcess.create(graph); - Mappings map = exec.query("PREFIX ex: SELECT ?x WHERE { ex:EdithPiaf ex:firstName ?x }"); - - // Print boolean result - // If the mapping is empty, the result is false; otherwise, it is true. - System.out.println(!map.isEmpty()); - } -} -``` - -### **3.3. SPARQL CONSTRUCT Query** - -This example shows how to execute a SPARQL `CONSTRUCT` query and retrieve results. - -```java -import fr.inria.corese.core.api.Loader; -import fr.inria.corese.core.kgram.core.Mappings; -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.print.ResultFormat; -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException { - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Add some triples to the graph - Load loader = Load.create(graph); - loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); - - // Load and execute SPARQL query - QueryProcess exec = QueryProcess.create(graph); - Mappings map = exec.query(""" - prefix foaf: - prefix vcard: - - construct { - ?person vcard:FN ?name - } - where { - ?person foaf:name ?name. - } - """); - - // Get result graph - Graph resultGraph = (Graph) map.getGraph(); - - // Print results in TriG format - System.out.println(ResultFormat.create(resultGraph, ResultFormat.format.TRIG_FORMAT).toString()); - } -} -``` - -Other supported formats: - -```java -ResultFormat.format.RDF_XML_FORMAT -ResultFormat.format.TURTLE_FORMAT -ResultFormat.format.TRIG_FORMAT -ResultFormat.format.JSONLD_FORMAT -ResultFormat.format.NTRIPLES_FORMAT -ResultFormat.format.NQUADS_FORMAT -ResultFormat.format.RDFC10_FORMAT -ResultFormat.format.RDFC10_SHA384_FORMAT -``` - -### **3.4. SPARQL UPDATE Query** - -This example shows how to execute a SPARQL `UPDATE` query. - -```java -import fr.inria.corese.core.api.Loader; -import fr.inria.corese.core.kgram.core.Mappings; -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.print.ResultFormat; -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException { - - // Create a new empty Graph - Graph graph = Graph.create(); - - // Add some triples to the graph - Load loader = Load.create(graph); - loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); - - // Load and execute SPARQL query - QueryProcess exec = QueryProcess.create(graph); - exec.query(""" - PREFIX foaf: - PREFIX vcard: - - INSERT { - ?person vcard:FN ?name - } - WHERE { - ?person foaf:name ?name. - } - """); - - // Print updated graph in Turtle format - System.out.println(ResultFormat.create(graph, ResultFormat.format.TURTLE_FORMAT).toString()); - } -} -``` - -Other supported formats: - -```java -ResultFormat.format.RDF_XML_FORMAT -ResultFormat.format.TURTLE_FORMAT -ResultFormat.format.TRIG_FORMAT -ResultFormat.format.JSONLD_FORMAT -ResultFormat.format.NTRIPLES_FORMAT -ResultFormat.format.NQUADS_FORMAT -ResultFormat.format.RDFC10_FORMAT -ResultFormat.format.RDFC10_SHA384_FORMAT -``` - -## **4. Shapes Constraint Language (SHACL)** - -This section shows how to validate a graph using [SHACL](https://www.w3.org/TR/shacl/). - -```java -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.load.LoadException; -import fr.inria.corese.core.print.ResultFormat; -import fr.inria.corese.core.shacl.Shacl; -import fr.inria.corese.core.sparql.api.ResultFormatDef; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws LoadException, EngineException { - // Load data graph - Graph dataGraph = Graph.create(); - Load loader = Load.create(dataGraph); - loader.parse("data.ttl"); - - // Load shape graph - Graph shapeGraph = Graph.create(); - loader = Load.create(shapeGraph); - loader.parse("shapes.ttl"); - - // Validate the data - Shacl shacl = new Shacl(dataGraph, shapeGraph); - Graph result = shacl.eval(); - - // Print results - ResultFormat exporter = ResultFormat.create(result, ResultFormatDef.format.TURTLE_FORMAT); - System.out.println(exporter.toString()); - } -} -``` - -**Input graph file path:** - -```turtle -@prefix ex: . -@prefix rdf: . - -ex:Alice ex:ssn "987-65-4323" ; - ex:worksFor ex:Haribo, ex:KitKat ; - rdf:type ex:Person . - -ex:Bob ex:ssn "124-35-6789" ; - ex:worksFor ex:Twitch ; - rdf:type ex:Person . - -ex:Calvin ex:ssn "648-67-6545" ; - ex:worksFor ex:UntypedCompany ; - rdf:type ex:Person . - -ex:Haribo rdf:type ex:Company . -ex:KitKat rdf:type ex:Company . -ex:Twitch rdf:type ex:Company . -ex:UntypedCompany rdf:type ex:Company . -``` - -**Input shape file path:** - -```turtle -@prefix sh: . -@prefix xsd: . -@prefix ex: . -@prefix rdf: . - -ex:PersonShape - a sh:NodeShape ; - sh:targetClass ex:Person ; - sh:property [ - sh:path ex:ssn ; - sh:maxCount 1 ; - sh:datatype xsd:string ; - sh:pattern "^\\d{3}-\\d{2}-\\d{4}$" ; - ] ; - sh:property [ - sh:path ex:worksFor ; - sh:class ex:Company ; - sh:nodeKind sh:IRI ; - ] ; - sh:closed true ; - sh:ignoredProperties ( rdf:type ) . -``` - -**Result:** - -```turtle -@prefix xsh: . -@prefix sh: . - -[a sh:ValidationReport ; - sh:conforms true] . -``` - -## 5. SPARQL Template Transformation Language (STTL) - -This section shows how to transform a graph using a subset of the [SPARQL Template Transformation Language (STTL)](https://files.inria.fr/corese/doc/sttl.html). - -### 5.1. Transform a graph into a visual HTML format - -This example details how to load a data graph from a file, transform it into a visual HTML format, and export the result to a file. - -```java -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; - -import fr.inria.corese.core.kgram.core.Mappings; -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.load.LoadException; -import fr.inria.corese.core.print.ResultFormat; -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws LoadException, EngineException, IOException { - // Open template file - Path path = Path.of("input template file path"); - String sttl_query = Files.readString(path, StandardCharsets.UTF_8); - - // Load data graph - Graph dataGraph = Graph.create(); - Load ld = Load.create(dataGraph); - ld.parse("input graph file path"); - - // Apply STTL query - QueryProcess exec = QueryProcess.create(dataGraph); - Mappings map = exec.query(sttl_query); - - // Export result - ResultFormat result_xml = ResultFormat.create(map); - result_xml.write("output file path"); - } -} -``` - -**Input template file:** - -```rq -template { - format { - "\n\n\n\t\n%s\t
\n\n\n" - group { - format { - "\t\t\n\t\t\t%s\n\t\t\t%s\n\t\t\t%s\n\t\t\n" - ?s ?p ?o - } - } - } -} -where { - ?s ?p ?o -} -order by ?s ?p ?o -``` - -**Result:** - -```html - - - - - - - - - - - - - - - - - - -
http://example.org/EdithPiafhttp://example.org/firstNameEdith
http://example.org/EdithPiafhttp://example.org/lastNamePiaf
http://example.org/EdithPiafhttp://www.w3.org/1999/02/22-rdf-syntax-ns#typehttp://example.org/Singer
- - -``` - -| ?s | ?p | ?o | -| ------------------------------ | ------------------------------------------------- | --------------------------- | -| | | Edith | -| | | Piaf | -| | | | - -## 6. **SPARQL Rule** - -This section details how to apply a set of rules on a graph using the [SPARQL Rule extension language](https://files.inria.fr/corese/doc/rule.html). - -### 6.1. Load rules from a file - -The example below shows the application of two rules (symmetry and transitivity) on a simple graph. - -```java -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.load.LoadException; -import fr.inria.corese.core.load.RuleLoad; -import fr.inria.corese.core.rule.RuleEngine; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws LoadException, EngineException { - // Create and load data in a graph - Graph dataGraph = Graph.create(); - Load dataLoader = Load.create(dataGraph); - dataLoader.parse("input graph file path"); - - // Create and load rules into a rules engine - RuleEngine ruleEngine = RuleEngine.create(dataGraph); - RuleLoad ruleLoader = RuleLoad.create(ruleEngine); - ruleLoader.parse("input rules file path.rul"); - - // Apply rules on the graph - ruleEngine.process(); - } -} -``` - -**Original graph:** - -```mermaid -graph LR; - classDef IRI fill:#FEAE65,stroke-width:0px - - iri:alice(["ex:Alice"]) - iri:bob(["ex:Bob"]) - iri:camille(["ex:Camille"]) - iri:daniel(["ex:Daniel"]) - iri:elise(["ex:Elise"]) - - class iri:alice,iri:bob,iri:camille,iri:daniel,iri:elise IRI - - iri:alice--ex:friend-->iri:bob; - iri:bob--ex:friend-->iri:camille; - iri:daniel--ex:isMarriedTo-->iri:elise; -``` - -**Rules file:** - -- Symmetry: `if X➝Y then Y➝X` -- Transitivity: `if X➝Y➝Z then X➝Z` - -```xml - - - - - - - - construct { - ?x ?p ?z - } - where { - ?p a owl:TransitiveProperty . - ?x ?p ?y . - ?y ?p ?z - } - ]]> - - - - - - - construct { - ?y ?p ?x - } - where { - ?p a owl:SymmetricProperty . - ?x ?p ?y . - } - ]]> - - - -``` - -**Result graph:** - -```mermaid -graph LR; - classDef IRI fill:#FEAE65,stroke-width:0px; - - iri_alice["ex:Alice"]; - iri_bob["ex:Bob"]; - iri_camille["ex:Camille"]; - iri_daniel["ex:Daniel"]; - iri_elise["ex:Elise"]; - - class iri_alice,iri_bob,iri_camille,iri_daniel,iri_elise IRI; - - iri_alice -- ex:friend --> iri_bob; - iri_bob -- ex:friend --> iri_camille; - iri_alice -. ex:friend .-> iri_camille; - iri_daniel -- ex:isMarriedTo --> iri_elise; - iri_elise -. ex:isMarriedTo .-> iri_daniel; -``` - -### 6.2. OWL Rules - -The example below shows the application of OWL RL rules. - -```java -import fr.inria.corese.core.api.Loader; -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.load.LoadException; -import fr.inria.corese.core.rule.RuleEngine; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws LoadException, EngineException { - // Create a new empty Graph - Graph graph = Graph.create(); - - // Add some triples to the graph - Load loader = Load.create(graph); - loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); - - // Apply rules - RuleEngine engine = RuleEngine.create(graph); - engine.setProfile(RuleEngine.OWL_RL); - engine.process(); - } -} -``` - -## 7. LDScript - -This section describes how to define and use functions with the [LDScript extension language](https://files.inria.fr/corese/doc/ldscript.html). - -### 7.1. Fibonacci function call from Java - -This example shows how to define and compute the twelfth number of the Fibonacci sequence. - -```java -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; - -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.api.IDatatype; -import fr.inria.corese.core.sparql.datatype.DatatypeMap; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException, IOException { - // Open LDScript file - Path path = Path.of("input LDScript file path.rq"); - String ldScript = Files.readString(path, StandardCharsets.UTF_8); - - // Compile LDScript - QueryProcess exec = QueryProcess.create(); - exec.compile(ldScript); - - // Compute the twelfth number of the Fibonacci sequence - String name = "http://ns.inria.fr/fibonacci"; - IDatatype dt = exec.funcall(name, DatatypeMap.newInstance(12)); - - // Print result - System.out.println(dt); - } -} -``` - -**Input LDScript file path:** - -```rq -prefix fun: - -@public function fun:fibonacci(n) { - if (n < 0) { - error() - } - else if (n = 0) { - return(0) - } - else if (n = 1) { - return(1) - } - else { - return (fun:fibonacci(n - 1) + fun:fibonacci(n - 2)) - } -} -``` - -### 7.2. LDScript in SPARQL - -This example shows how to call an LDScript function from a SPARQL query. - -```java -import java.io.IOException; - -import fr.inria.corese.core.kgram.core.Mappings; -import fr.inria.corese.core.load.Load; -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException, IOException { - String check_query = """ - prefix ex: - prefix rdf: - prefix fun: - - select ?name ?area - where { - ?city rdf:type ex:city ; - ex:name ?name ; - ex:area ?area . - filter(?area > fun:toSquareKm(40)) - } - - # Convert square mile to square kilometer - function fun:toSquareKm(squareMile) { - return (squareMile * 2.59) - } - """; - - // Load graph - Graph graph = Graph.create(); - Load ld = Load.create(graph); - ld.parse("input file path"); - - // SPARQL query - QueryProcess exec = QueryProcess.create(graph); - Mappings map = exec.query(check_query); - } -} -``` - -### 7.3. Advanced Example - -The following Java program computes the percentage of people subscribed to social networks in a city compared to its total number of inhabitants. The data is collected from Wikidata. - -```java -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; - -import fr.inria.corese.core.query.QueryProcess; -import fr.inria.corese.core.sparql.api.IDatatype; -import fr.inria.corese.core.sparql.exceptions.EngineException; - -public class Example { - public static void main(String[] args) throws EngineException, IOException { - // Open LDScript file - Path path = Path.of("input LDScript file path.rq"); - String ldScript = Files.readString(path, StandardCharsets.UTF_8); - - // Compile LDScript - QueryProcess exec = QueryProcess.create(); - exec.compile(ldScript); - - // Execute program - String name = "http://ns.inria.fr/main"; - IDatatype dt = exec.funcall(name); - - // Print result - System.out.println(dt); - } -} -``` - -**Input LDScript File:** - -```rq -prefix fun: - -prefix wd: -prefix wdt: -prefix wikibase: -prefix bd: -prefix rdfs: - -@public -function fun:percentage(sub, total) { - return (sub / total * 100) -} - -@public -function fun:citypopulationsocialmedia() { - query( - select ?city ?citylabel ?population ?socialmediafollower where { - service { - ?city wdt:P31 wd:Q1549591; - wdt:P8687 ?socialmediafollower; - wdt:P1082 ?population. - optional { - ?city rdfs:label ?citylabel - filter (lang(?citylabel) = "en"). - } - } - } - order by desc (?socialmediafollower) - limit 100 - ) -} - -@public -function fun:main() { - xt:sort( - maplist ( - function(x) { - let ((citylabel population socialmediafollower) = x) { - return (xt:list(citylabel, fun:percentage(socialmediafollower, population))) - } - }, - fun:citypopulationsocialmedia() - ), - function(x,y) { - let ((x_name, x_value) = x, (y_name, y_value) = y) { - if (x_value < y_value, 1, if(x=y, o, -1)) - } - } - ) -} -``` diff --git a/docs/source/install.md b/docs/source/install.md index 09e3f8f5a..ffc18e9c8 100644 --- a/docs/source/install.md +++ b/docs/source/install.md @@ -1,11 +1,10 @@ + + # Installation -## Corese Core - -[![Badge Maven Central](./_static/logo/badge_maven.svg)](https://central.sonatype.com/artifact/fr.inria.corese/corese-core) [![Badge Github Release](./_static/logo/badge_github.svg)](https://github.com/corese-stack/corese-core/releases) +**Maven:** ```xml - fr.inria.corese corese-core @@ -13,45 +12,17 @@ ``` -```Groovy -// Gradle -implementation 'fr.inria.corese:corese-core:X.Y.Z' // replace X.Y.Z with the version number -``` - -## Storage Modules - -**Jena storage:** - -[![Badge Maven Central](./_static/logo/badge_maven.svg)](https://central.sonatype.com/artifact/fr.inria.corese/corese-jena) [![Badge Github Release](./_static/logo/badge_github.svg)](https://github.com/corese-stack/corese-plugin-storage-jena/releases) - -```xml - - - fr.inria.corese - corese-jena - X.Y.Z - -``` +**Gradle:** ```Groovy -// Gradle -implementation 'fr.inria.corese:corese-jena:X.Y.Z' // replace X.Y.Z with the version number +implementation 'fr.inria.corese:corese-core:X.Y.Z' // replace X.Y.Z with the version number ``` -**RDF4J storage:** - -[![Badge Maven Central](./_static/logo/badge_maven.svg)](https://central.sonatype.com/artifact/fr.inria.corese/corese-rdf4j) [![Badge Github Release](./_static/logo/badge_github.svg)](https://github.com/corese-stack/corese-plugin-storage-rdf4j/releases) - -```xml - - - fr.inria.corese - corese-rdf4j - X.Y.Z - -``` +**Jar:** -```Groovy -// Gradle -implementation 'fr.inria.corese:corese-rdf4j:X.Y.Z' // replace X.Y.Z with the version number -``` + + GitHub Release + + + Maven Central + diff --git a/docs/source/storage/configuring_and_connecting_to_different_storage_systems_in_corese.md b/docs/source/storage/configuring_and_connecting_to_different_storage_systems_in_corese.md deleted file mode 100644 index 2ec5da9ab..000000000 --- a/docs/source/storage/configuring_and_connecting_to_different_storage_systems_in_corese.md +++ /dev/null @@ -1,230 +0,0 @@ -# Configuring and Connecting to Different Storage Systems in Corese - -Corese offers the possibility to connect to a range of storage systems for storing and managing your RDF data. In this document, you'll find information on how to use these storage systems with Corese, including instructions on configuring and utilizing them in the GUI, Server, and Library. - -## 1. Introduction - -In Corese versions prior to 4.4, graphs were loaded and manipulated in RAM. With the introduction of Data Manager in version 4.4, the Corese engine and storage systems are now fully decoupled. This offers several advantages such as : - -- The ability to easily add new storage systems -- The use of persistent storage systems -- The simultaneous use of multiple storage systems -- The flexibility to choose a storage system that meets specific needs - -## 2. What is a Data Manager? - -A data manager in Corese is a bridge between the Corese engine and a storage system, enabling the engine to store and manage graph data in a variety of storage systems while abstracting away the underlying details of those systems. - -A data manager is implemented as an interface called `Datamanager`, and concrete implementations such as `JenaTdb1DataManager` and `Rdf4jModelDataManager` are provided for specific storage systems. By implementing the `Datamanager` interface, it is possible to use the Corese engine with any storage structure. - -Currently, there are three implementations of `Datamanager` available: - -- `JenaTdb1DataManager` for Jena TDB1 storage -- `Rdf4jModelDataManager` for RDF4J model -- `CoreseGraphDataManager` for Corese graph - -## 3. Available Data Manager Implementations - -### 3.1. Jena TDB1 - -[Jena](https://jena.apache.org/) is an open-source Semantic Web framework written in Java and developed by the [Apache Jena project](https://jena.apache.org/). It provides a set of libraries and tools for building applications that process and manipulate RDF data. - -[TDB](https://jena.apache.org/documentation/tdb/) is a native triple store for Jena, designed to efficiently store and query large amounts of RDF data. It supports the full range of RDF languages and standards. - -The `JenaTdb1DataManager` allows the Corese engine to connect to a TDB1 database. Corese can connect to an existing Jena TDB1 database or create a new one. - -TDB1 is a persistent storage system that supports transactions and native concurrent access. In our larger tests, it has been able to efficiently load and manage graphs with up to 600 million triples. However, it is likely capable of handling even larger graph sizes. - -Here is a summary of TDB1's characteristics: - -| Characteristic | Description | -| ------------------------ | ----------- | -| Persistence of data | Yes | -| Native concurrent access | Yes | - -> You can use all the native tools and [Apache Jena - TDB Command-line Utilities](https://jena.apache.org/documentation/tdb/commands.html) . For example, you can use `tdbloader`, a software, to efficiently create a TDB1 database from serialized RDF data. - -### 3.2. Corese Graph - -Corese Graph is the historical API of Corese for storing and manipulating RDF data in memory. It is stable and optimized, and capable of handling large graphs within the limits of available RAM. - -However, Corese Graph does not offer persistence of data and supports native concurrent access. - -Here is a summary of Corese Graph's characteristics: - -| Characteristic | Description | -| ------------------------ | ----------- | -| Persistence of data | No | -| Native concurrent access | Yes | - -### 3.3. RDF4J Model - -[RDF4J](https://rdf4j.org/) is an open-source Java library for working with RDF data. It provides a set of APIs for parsing and serializing RDF, querying with SPARQL, and modeling RDF data with RDFS and OWL. - -[The RDF Model API](https://rdf4j.org/documentation/programming/model/) is a Java interface for storing and manipulating RDF data in memory (It does not store data on disk). This API provides a high-level, abstract representation of an RDF graph. The `Rdf4JModelDataManager` allows the Corese engine to connect to an existing RDF4J `Model` object or create a new one. - -This implementation is not optimized for storing large amounts of data and does not support persistence of data, concurrency, or transactions. It was our first implementation as a proof-of-concept and is not recommended for use in production environments. However, it may still be useful for small-scale testing and development purposes. - -Here is a summary of the RDF4J Model's characteristics: - -| Characteristic | Description | -| ------------------------ | ----------- | -| Persistence of data | No | -| Native concurrent access | No | - -## 4. Configuring Storage Systems in Corese-GUI and Corese-Server - -To configure storage systems in the Corese GUI or Server, it is necessary to create a properties file. This file should include the `STORAGE` configuration property, which specifies the storage systems to use. - -To run Corese-GUI or Corese-Server with a property file, the `-init` option must be used. For instance, the following bash command runs Corese-GUI using the `gui.properties` file: - -```bash -java -jar corese.jar -init "config.properties" -``` - -This will load the storage systems specified in the `STORAGE` property in the `config.properties` file. - -> If no configuration file is specified, Corese will use the default configuration, which is to use a single Corese graph storage system in memory. This behavior is the same as in versions prior to 4.4. - -### 4.1. Configuring One Storage System - -To configure a single storage system, you need to specify the type and ID of the system in the `STORAGE` property. You can also include optional parameters for the system. - -```properties -STORAGE = TYPE_BD1,ID_DB1,PARAM_BD1 -``` - -The fields are as follows: - -`TYPE_BD1`: The type of storage system to use. Possible values are `jena_tdb1`, `rdf4j_model`, and `corese_graph`. - -`ID_DB1`: The ID of the storage system. This ID will be used to reference the storage system in SPARQL queries. - -`PARAM_BD1`: (Optional) Constructor parameter for the storage system. - -| DB type | Parameter | -| ------------ | ---------------------------------------------------------------------------- | -| jena_tdb1 | Empty (use JenaTDB in memory) or path of TDB1 database (use JenaTDB in a DB) | -| rdf4j_model | Empty | -| corese_graph | Empty | - -For example, to configure a Jena TDB1 storage system with ID `musicDB` and the `/…/music` directory as the storage location, the following `STORAGE` property should be specified: - -```properties -STORAGE = jena_tdb1,musicDB,/…/music -``` - -### 4.2. Configuring Multiple Storage Systems - -To configure multiple storage systems in Corese, simply separate the configurations for each storage system with a semicolon (`;`). This is similar to configuring a single storage system, as described in the previous section. - -```properties -STORAGE = TYPE_BD1,ID_DB1,PARAM_BD1;TYPE_BD2,ID_DB2,PARAM_BD2;… -``` - -In the case where multiple storage systems are configured, the first storage system listed is the default storage system. It is accessible directly in SPARQL queries, while the other storage systems must be accessed using the `SERVICE` keyword. - -For example, given the following configuration: - -```properties -STORAGE = corese_graph,friend;jena_tdb1,mélomane;jena_tdb1,music -``` - -The following SPARQL query retrieves information about a person's friends and the music they like: - -```sparql -PREFIX music: -PREFIX person: -PREFIX foaf: - -SELECT ?friendName ?artistName ?description -WHERE { - # Select Casandra's friends from the "friend" database (default dataset) - person:Casandra foaf:knows ?friend . - - # Retrieve the Casandra's friend's name and the artist they like from the "mélomane" database - SERVICE { - ?friend person:likeMusic ?artist . - ?friend foaf:firstName ?friendName . - } - - # Retrieve the artist's name and a description of their music from the "music" database - SERVICE { - ?artist music:name ?artistName . - ?artist music:description ?description . - } -} -``` - -To execute a query in the GUI, open the Corese GUI and enter the query in the SPARQL Query tab. - -To execute in the Server, send a request to the endpoint `http://localhost:8080/sparql` if you are running it locally. - -### 4.3. [Optional] Assign storage to a specific SPARQL endpoint with Corese Server - -Corese Server allows you to assign storage to a specific SPARQL endpoint by define a properties file (eg: `server.properties`) and a profile file (eg: `profile.ttl`). The properties file defines the storage systems available and their locations, while the profile file assigns a storage system to a specific endpoint. - -To create two storage systems, `db1` and `db2`, using the Jena TDB1 storage system and located at `/…/album` and `/…/music`, respectively, you can use the following in the properties file: - -```properties -STORAGE = jena_tdb1,db1,/…/album;jena_tdb1,db2,/…/music -``` - -To assign the `db1` and `db2` storage systems to the `album` (``) and `music` (``) endpoints, respectively, you can use the following in the profile file: - -```turtle -prefix st: - -# Album endpoint, available at http://localhost:8080/album/sparql -<#_1> a st:Server; - st:service "album"; # Assigns the name "album" to this endpoint - st:storage "db1". # Assigns the "db1" storage system to this endpoint - -# Music endpoint, available at http://localhost:8080/music/sparql -<#_2> a st:Server; - st:service "music"; # Assigns the name "music" to this endpoint - st:storage "db2". # Assigns the "db2" storage system to this endpoint -``` - -With this configuration, the endpoint `` will use `db1` data, and `` will use `db2` data. - -To start the server with these configurations, run the following command: - -```bash -java -jar corese-server.jar -init "server.properties" -pp "profile.ttl" -``` - -> You can learn more about profile files here: [Getting Started With corese-server](https://github.com/Wimmics/corese/blob/master/docs/getting%20started/Getting%20Started%20With%20Corese-server.md#4-to-go-deeper) - -## 5. Use Storage Systems in Corese-Library - -To build a `dataManager` using the Corese-Library, you can use a `dataManager` builder class to configure and create the `dataManager`. There are different types of `dataManager` builders available, depending on the type of `dataManager` you want to create. - -For example, the `JenaTdb1DataManagerBuilder` can be used to build a `JenaTdb1DataManager`. To build a `JenaTdb1DataManager` with a specific storage path, you can use the following code: - -```java -JenaTdb1DataManagerBuilder builder = new JenaTdb1DataManagerBuilder(); -builder.setStoragePath("storage/path"); -JenaTdb1DataManager dataManager = builder.build(); -``` - -Similarly, you can use the `CoreseGraphDataManagerBuilder` to build a `CoreseGraphDataManager` or the `Rdf4jModelDataManagerBuilder` to build an `Rdf4jModelDataManager`. - -To execute a query on the `dataManager`, you can use the `QueryProcess` class as follows: - -```java -// Create a QueryProcess using the dataManager -QueryProcess exec = QueryProcess.create(dataManager); - -// Execute a SPARQL query and retrieve the result as a Mappings object -Mappings map = exec.query("select * where { ?s ?p ?o }"); - -// Print the results of the query -for (Mapping m : map) { - System.out.println(m); -} -``` - -This will execute the specified SPARQL query on the `dataManager` and print the results. - -> You can learn more about Corese-Library here: [Getting Started With Corese-library]([corese/Getting Started With Corese-library.md at master · Wimmics/corese · GitHub](https://github.com/Wimmics/corese/blob/master/docs/getting%20started/Getting%20Started%20With%20Corese-library.md)) diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst deleted file mode 100644 index d6dfd0bbf..000000000 --- a/docs/source/user_guide.rst +++ /dev/null @@ -1,31 +0,0 @@ -User Guide -========== - -.. toctree:: - :hidden: - - Getting started - Storage systems - Federated and distributed queries - SPARQL service extension - -Welcome to the Corese-Core User Guide! This guide will help you get started with Corese-Core, configure storage systems, perform federated queries, and extend SPARQL functionalities. - -Contents: ---------- - -1. Getting Started with Corese-Core - Learn how to use main features of Corese-Core. - `Getting started with Corese-Core `_ - -2. Configuring Storage Systems - Guide to configuring and connecting to different storage systems. - `Configuring and connecting to different storage systems in Corese `_ - -3. Federated Queries - Run distributed and federated queries seamlessly. - `Federated and distributed queries `_ - -4. Advanced SPARQL Features - Explore advanced SPARQL extensions for enhanced query capabilities. - `SPARQL service extension `_ From c5012c1b4c0dfffc15e140d90d4608a0e5488c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 23 Jul 2025 16:26:06 +0200 Subject: [PATCH 14/64] docs: Remove .gitignore and update index and user guide documentation --- docs/.gitignore | 25 -- docs/source/conf.py | 3 +- docs/source/index.rst | 7 +- docs/source/user_guide.md | 885 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 891 insertions(+), 29 deletions(-) delete mode 100644 docs/.gitignore create mode 100644 docs/source/user_guide.md diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 2fa6b261c..000000000 --- a/docs/.gitignore +++ /dev/null @@ -1,25 +0,0 @@ -# Generated files for docs -generated/ -bak/ -build/ - -source/* -!source/*.rst -!source/**/*.rst -!source/**/*.py - -!source/README.md -!source/install.md -!source/Doxyfile - - -!source/rest_api/ -!source/cli_ref/ - -!source/_templates/ -!source/_static/ - -!source/advanced/ -!source/federation/ -!source/getting_started/ -!source/storage/ diff --git a/docs/source/conf.py b/docs/source/conf.py index 74c551ce3..bad2b49da 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -96,7 +96,8 @@ } html_sidebars = { - "install": [], + "install": [], + "user_guide": [], } # -- MySt-parcer extension Options ------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 80a27df4b..e55b53caa 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -58,8 +58,8 @@ Corese-Core is a Java library that provides the core functionalities of Corese. Corese offers several interfaces ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * `corese-core `_: Java library to process RDF data and use Corese features via an API. - * `corese-server `_: Tool to easily create, configure and manage SPARQL endpoints. - * `corese-gui `_: Graphical interface that allows an easy and visual use of Corese features. + * `corese-server `_: Tool to create, configure and manage SPARQL endpoints. + * `corese-gui `_: Graphical interface that allows an easy and visual use of Corese features. * `corese-command `_: Command Line Interface for Corese that allows users to interact with Corese features from the terminal. * `corese-python (beta) `_: Python wrapper for accessing and manipulating RDF data with Corese features using py4j. @@ -94,6 +94,7 @@ Home Page .. toctree:: :hidden: - Installation + Accueil + Installation User Guide Java API diff --git a/docs/source/user_guide.md b/docs/source/user_guide.md new file mode 100644 index 000000000..1785df6d5 --- /dev/null +++ b/docs/source/user_guide.md @@ -0,0 +1,885 @@ +# Getting Started with Corese-Core + +This tutorial shows how to use the Corese-Core library through simple examples of its main features. +We assume basic knowledge of Java programming and the Semantic Web. + +- The first section describes how to create, load, and export a Graph. +- The second section shows how to query a graph using [SPARQL](https://www.w3.org/TR/sparql11-query/). +- The third section details how to validate a graph using the [Shapes Constraint Language (SHACL)](https://www.w3.org/TR/shacl/). +- The fourth section shows how to transform a graph using the extension language [SPARQL Template Transformation Language (STTL)](https://files.inria.fr/corese/doc/sttl.html). +- The fifth section details how to apply a set of rules on a graph using the [SPARQL Rule](https://files.inria.fr/corese/doc/rule.html) extension language. +- Finally, the sixth section describes how to define and use functions with the [LDScript](https://files.inria.fr/corese/doc/ldscript.html) extension language. + +## **1. Installation** + +Installation instructions are available on the [installation page](../install.md). + +## **2. Graph** + +This section describes how to create a graph manually, load a graph from a file, and serialize a graph to a file. + +### **2.1. Build a Graph Programmatically** + +The example below shows how to create the following RDF graph: + +```mermaid +graph LR; + classDef IRI fill:#FEAE65,stroke-width:0px + classDef DT fill:#FEFEFE,stroke-width:0px + + iri:EdithPiaf(["ex:EdithPiaf"]) + iri:Singer(["ex:Singer"]) + dt:Edith(["''Edith''"]) + dt:Piaf(["''Piaf''"]) + + class iri:EdithPiaf,iri:Singer IRI + class dt:Edith,dt:Piaf DT + + iri:EdithPiaf--rdf:type-->iri:Singer; + iri:EdithPiaf--ex:firstName-->dt:Edith; + iri:EdithPiaf--ex:lastName-->dt:Piaf; +``` + +This graph represents three RDF statements: + +- Edith Piaf is a singer. +- Edith Piaf's first name is Edith. +- Edith Piaf's last name is Piaf. + +```java +import fr.inria.corese.core.kgram.api.core.Node; +import fr.inria.corese.core.logic.RDF; + +public class Example { + public static void main(String[] args) { + // Define the namespace 'ex' + String ex = "http://example.org/"; + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Create and add IRIs to the Graph + Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); + Node singerIRI = graph.addResource(ex + "Singer"); + + // Create and add properties to the Graph + Node rdfTypeProperty = graph.addProperty(RDF.TYPE); + Node firstNameProperty = graph.addProperty(ex + "firstName"); + Node lastNameProperty = graph.addProperty(ex + "lastName"); + + // Create and add literals to the Graph + Node edithLiteral = graph.addLiteral("Edith"); + Node piafLiteral = graph.addLiteral("Piaf"); + + // Add statements to the graph + graph.addEdge(edithPiafIRI, rdfTypeProperty, singerIRI); + graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); + graph.addEdge(edithPiafIRI, lastNameProperty, piafLiteral); + } +} +``` + +### **2.2. Load a Graph from a File** + +This example shows how to load a graph from a file. + +```java +import fr.inria.corese.core.api.Loader; +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.load.LoadException; + +public class Example { + public static void main(String[] args) throws LoadException { + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Create loader and parse file + Load loader = Load.create(graph); + loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); + } +} +``` + +Corese Loader supports the following formats: + +- RDF/XML (`Loader.format.RDFXML_FORMAT`) +- Turtle (`Loader.format.TURTLE_FORMAT`) +- TriG (`Loader.format.TRIG_FORMAT`) +- JSON-LD (`Loader.format.JSONLD_FORMAT`) +- N-Triples (`Loader.format.NT_FORMAT`) +- N-Quads (`Loader.format.NQUADS_FORMAT`) +- RDFa (`Loader.format.RDFA_FORMAT`) + +### **2.3. Export a Graph to a File** + +This example shows how to serialize a graph into a file in Turtle format. + +```java +import java.io.FileWriter; +import java.io.IOException; + +import fr.inria.corese.core.kgram.api.core.Node; +import fr.inria.corese.core.print.ResultFormat; +import fr.inria.corese.core.sparql.api.ResultFormatDef; + +public class Example { + public static void main(String[] args) throws IOException { + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Add some triples to the graph + String ex = "http://example.org/"; + Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); + Node firstNameProperty = graph.addProperty(ex + "firstName"); + Node edithLiteral = graph.addLiteral("Edith"); + graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); + + // Create exporter + ResultFormat exporter = ResultFormat.create(graph, ResultFormatDef.format.TURTLE_FORMAT); + String result = exporter.toString(); + + // Write result to a file + try (FileWriter writer = new FileWriter("output_graph.ttl")) { + writer.write(result); + } + } +} +``` + +Corese can serialize graphs into the following formats: + +- RDF/XML (`ResultFormatDef.format.RDF_XML_FORMAT`) +- Turtle (`ResultFormatDef.format.TURTLE_FORMAT`) +- TriG (`ResultFormatDef.format.TRIG_FORMAT`) +- JSON-LD (`ResultFormatDef.format.JSONLD_FORMAT`) +- N-Triples (`ResultFormatDef.format.NTRIPLES_FORMAT`) +- N-Quads (`ResultFormatDef.format.NQUADS_FORMAT`) +- Canonical RDF SHA-256 (`ResultFormatDef.format.RDFC10_FORMAT`) +- Canonical RDF SHA-384 (`ResultFormatDef.format.RDFC10_SHA384_FORMAT`) + +## **3. SPARQL Queries** + +This section describes how to query a graph using [SPARQL](https://www.w3.org/TR/sparql11-query/) in Corese. + +### **3.1. Executing a SPARQL SELECT Query** + +This example shows how to execute a SPARQL `SELECT` query and retrieve results. + +```java +import fr.inria.corese.core.kgram.api.core.Node; +import fr.inria.corese.core.kgram.core.Mappings; +import fr.inria.corese.core.print.ResultFormat; +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException { + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Add some triples to the graph + String ex = "http://example.org/"; + Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); + Node firstNameProperty = graph.addProperty(ex + "firstName"); + Node edithLiteral = graph.addLiteral("Edith"); + graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); + + // Load and execute SPARQL query + QueryProcess exec = QueryProcess.create(graph); + Mappings map = exec.query("select * where { ?s ?p ?o }"); + + // Print results in Markdown format + System.out.println(ResultFormat.create(map, ResultFormat.format.CSV_FORMAT).toString()); + } +} +``` + +Other supported formats: + +```java +ResultFormat.format.XML_FORMAT +ResultFormat.format.JSON_FORMAT +ResultFormat.format.CSV_FORMAT +ResultFormat.format.TSV_FORMAT +ResultFormat.format.MARKDOWN_FORMAT +``` + +### 3.2. SPARQL ASK Query + +This example shows how to execute a SPARQL `ASK` query and print results. + +```java +import fr.inria.corese.core.kgram.api.core.Node; +import fr.inria.corese.core.kgram.core.Mappings; +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException { + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Add some triples to the graph + String ex = "http://example.org/"; + Node edithPiafIRI = graph.addResource(ex + "EdithPiaf"); + Node firstNameProperty = graph.addProperty(ex + "firstName"); + Node edithLiteral = graph.addLiteral("Edith"); + graph.addEdge(edithPiafIRI, firstNameProperty, edithLiteral); + + // Load and execute SPARQL query + QueryProcess exec = QueryProcess.create(graph); + Mappings map = exec.query("PREFIX ex: SELECT ?x WHERE { ex:EdithPiaf ex:firstName ?x }"); + + // Print boolean result + // If the mapping is empty, the result is false; otherwise, it is true. + System.out.println(!map.isEmpty()); + } +} +``` + +### **3.3. SPARQL CONSTRUCT Query** + +This example shows how to execute a SPARQL `CONSTRUCT` query and retrieve results. + +```java +import fr.inria.corese.core.api.Loader; +import fr.inria.corese.core.kgram.core.Mappings; +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.print.ResultFormat; +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException { + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Add some triples to the graph + Load loader = Load.create(graph); + loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); + + // Load and execute SPARQL query + QueryProcess exec = QueryProcess.create(graph); + Mappings map = exec.query(""" + prefix foaf: + prefix vcard: + + construct { + ?person vcard:FN ?name + } + where { + ?person foaf:name ?name. + } + """); + + // Get result graph + Graph resultGraph = (Graph) map.getGraph(); + + // Print results in TriG format + System.out.println(ResultFormat.create(resultGraph, ResultFormat.format.TRIG_FORMAT).toString()); + } +} +``` + +Other supported formats: + +```java +ResultFormat.format.RDF_XML_FORMAT +ResultFormat.format.TURTLE_FORMAT +ResultFormat.format.TRIG_FORMAT +ResultFormat.format.JSONLD_FORMAT +ResultFormat.format.NTRIPLES_FORMAT +ResultFormat.format.NQUADS_FORMAT +ResultFormat.format.RDFC10_FORMAT +ResultFormat.format.RDFC10_SHA384_FORMAT +``` + +### **3.4. SPARQL UPDATE Query** + +This example shows how to execute a SPARQL `UPDATE` query. + +```java +import fr.inria.corese.core.api.Loader; +import fr.inria.corese.core.kgram.core.Mappings; +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.print.ResultFormat; +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException { + + // Create a new empty Graph + Graph graph = Graph.create(); + + // Add some triples to the graph + Load loader = Load.create(graph); + loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); + + // Load and execute SPARQL query + QueryProcess exec = QueryProcess.create(graph); + exec.query(""" + PREFIX foaf: + PREFIX vcard: + + INSERT { + ?person vcard:FN ?name + } + WHERE { + ?person foaf:name ?name. + } + """); + + // Print updated graph in Turtle format + System.out.println(ResultFormat.create(graph, ResultFormat.format.TURTLE_FORMAT).toString()); + } +} +``` + +Other supported formats: + +```java +ResultFormat.format.RDF_XML_FORMAT +ResultFormat.format.TURTLE_FORMAT +ResultFormat.format.TRIG_FORMAT +ResultFormat.format.JSONLD_FORMAT +ResultFormat.format.NTRIPLES_FORMAT +ResultFormat.format.NQUADS_FORMAT +ResultFormat.format.RDFC10_FORMAT +ResultFormat.format.RDFC10_SHA384_FORMAT +``` + +## **4. Shapes Constraint Language (SHACL)** + +This section shows how to validate a graph using [SHACL](https://www.w3.org/TR/shacl/). + +```java +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.load.LoadException; +import fr.inria.corese.core.print.ResultFormat; +import fr.inria.corese.core.shacl.Shacl; +import fr.inria.corese.core.sparql.api.ResultFormatDef; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws LoadException, EngineException { + // Load data graph + Graph dataGraph = Graph.create(); + Load loader = Load.create(dataGraph); + loader.parse("data.ttl"); + + // Load shape graph + Graph shapeGraph = Graph.create(); + loader = Load.create(shapeGraph); + loader.parse("shapes.ttl"); + + // Validate the data + Shacl shacl = new Shacl(dataGraph, shapeGraph); + Graph result = shacl.eval(); + + // Print results + ResultFormat exporter = ResultFormat.create(result, ResultFormatDef.format.TURTLE_FORMAT); + System.out.println(exporter.toString()); + } +} +``` + +**Input graph file path:** + +```turtle +@prefix ex: . +@prefix rdf: . + +ex:Alice ex:ssn "987-65-4323" ; + ex:worksFor ex:Haribo, ex:KitKat ; + rdf:type ex:Person . + +ex:Bob ex:ssn "124-35-6789" ; + ex:worksFor ex:Twitch ; + rdf:type ex:Person . + +ex:Calvin ex:ssn "648-67-6545" ; + ex:worksFor ex:UntypedCompany ; + rdf:type ex:Person . + +ex:Haribo rdf:type ex:Company . +ex:KitKat rdf:type ex:Company . +ex:Twitch rdf:type ex:Company . +ex:UntypedCompany rdf:type ex:Company . +``` + +**Input shape file path:** + +```turtle +@prefix sh: . +@prefix xsd: . +@prefix ex: . +@prefix rdf: . + +ex:PersonShape + a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:ssn ; + sh:maxCount 1 ; + sh:datatype xsd:string ; + sh:pattern "^\\d{3}-\\d{2}-\\d{4}$" ; + ] ; + sh:property [ + sh:path ex:worksFor ; + sh:class ex:Company ; + sh:nodeKind sh:IRI ; + ] ; + sh:closed true ; + sh:ignoredProperties ( rdf:type ) . +``` + +**Result:** + +```turtle +@prefix xsh: . +@prefix sh: . + +[a sh:ValidationReport ; + sh:conforms true] . +``` + +## 5. SPARQL Template Transformation Language (STTL) + +This section shows how to transform a graph using a subset of the [SPARQL Template Transformation Language (STTL)](https://files.inria.fr/corese/doc/sttl.html). + +### 5.1. Transform a graph into a visual HTML format + +This example details how to load a data graph from a file, transform it into a visual HTML format, and export the result to a file. + +```java +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import fr.inria.corese.core.kgram.core.Mappings; +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.load.LoadException; +import fr.inria.corese.core.print.ResultFormat; +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws LoadException, EngineException, IOException { + // Open template file + Path path = Path.of("input template file path"); + String sttl_query = Files.readString(path, StandardCharsets.UTF_8); + + // Load data graph + Graph dataGraph = Graph.create(); + Load ld = Load.create(dataGraph); + ld.parse("input graph file path"); + + // Apply STTL query + QueryProcess exec = QueryProcess.create(dataGraph); + Mappings map = exec.query(sttl_query); + + // Export result + ResultFormat result_xml = ResultFormat.create(map); + result_xml.write("output file path"); + } +} +``` + +**Input template file:** + +```rq +template { + format { + "\n\n\n\t\n%s\t
\n\n\n" + group { + format { + "\t\t\n\t\t\t%s\n\t\t\t%s\n\t\t\t%s\n\t\t\n" + ?s ?p ?o + } + } + } +} +where { + ?s ?p ?o +} +order by ?s ?p ?o +``` + +**Result:** + +```html + + + + + + + + + + + + + + + + + + +
http://example.org/EdithPiafhttp://example.org/firstNameEdith
http://example.org/EdithPiafhttp://example.org/lastNamePiaf
http://example.org/EdithPiafhttp://www.w3.org/1999/02/22-rdf-syntax-ns#typehttp://example.org/Singer
+ + +``` + +| ?s | ?p | ?o | +| ------------------------------ | ------------------------------------------------- | --------------------------- | +| | | Edith | +| | | Piaf | +| | | | + +## 6. **SPARQL Rule** + +This section details how to apply a set of rules on a graph using the [SPARQL Rule extension language](https://files.inria.fr/corese/doc/rule.html). + +### 6.1. Load rules from a file + +The example below shows the application of two rules (symmetry and transitivity) on a simple graph. + +```java +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.load.LoadException; +import fr.inria.corese.core.load.RuleLoad; +import fr.inria.corese.core.rule.RuleEngine; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws LoadException, EngineException { + // Create and load data in a graph + Graph dataGraph = Graph.create(); + Load dataLoader = Load.create(dataGraph); + dataLoader.parse("input graph file path"); + + // Create and load rules into a rules engine + RuleEngine ruleEngine = RuleEngine.create(dataGraph); + RuleLoad ruleLoader = RuleLoad.create(ruleEngine); + ruleLoader.parse("input rules file path.rul"); + + // Apply rules on the graph + ruleEngine.process(); + } +} +``` + +**Original graph:** + +```mermaid +graph LR; + classDef IRI fill:#FEAE65,stroke-width:0px + + iri:alice(["ex:Alice"]) + iri:bob(["ex:Bob"]) + iri:camille(["ex:Camille"]) + iri:daniel(["ex:Daniel"]) + iri:elise(["ex:Elise"]) + + class iri:alice,iri:bob,iri:camille,iri:daniel,iri:elise IRI + + iri:alice--ex:friend-->iri:bob; + iri:bob--ex:friend-->iri:camille; + iri:daniel--ex:isMarriedTo-->iri:elise; +``` + +**Rules file:** + +- Symmetry: `if X➝Y then Y➝X` +- Transitivity: `if X➝Y➝Z then X➝Z` + +```xml + + + + + + + + construct { + ?x ?p ?z + } + where { + ?p a owl:TransitiveProperty . + ?x ?p ?y . + ?y ?p ?z + } + ]]> + + + + + + + construct { + ?y ?p ?x + } + where { + ?p a owl:SymmetricProperty . + ?x ?p ?y . + } + ]]> + + + +``` + +**Result graph:** + +```mermaid +graph LR; + classDef IRI fill:#FEAE65,stroke-width:0px; + + iri_alice["ex:Alice"]; + iri_bob["ex:Bob"]; + iri_camille["ex:Camille"]; + iri_daniel["ex:Daniel"]; + iri_elise["ex:Elise"]; + + class iri_alice,iri_bob,iri_camille,iri_daniel,iri_elise IRI; + + iri_alice -- ex:friend --> iri_bob; + iri_bob -- ex:friend --> iri_camille; + iri_alice -. ex:friend .-> iri_camille; + iri_daniel -- ex:isMarriedTo --> iri_elise; + iri_elise -. ex:isMarriedTo .-> iri_daniel; +``` + +### 6.2. OWL Rules + +The example below shows the application of OWL RL rules. + +```java +import fr.inria.corese.core.api.Loader; +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.load.LoadException; +import fr.inria.corese.core.rule.RuleEngine; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws LoadException, EngineException { + // Create a new empty Graph + Graph graph = Graph.create(); + + // Add some triples to the graph + Load loader = Load.create(graph); + loader.parse("input_graph_file.ttl", Loader.format.TURTLE_FORMAT); + + // Apply rules + RuleEngine engine = RuleEngine.create(graph); + engine.setProfile(RuleEngine.OWL_RL); + engine.process(); + } +} +``` + +## 7. LDScript + +This section describes how to define and use functions with the [LDScript extension language](https://files.inria.fr/corese/doc/ldscript.html). + +### 7.1. Fibonacci function call from Java + +This example shows how to define and compute the twelfth number of the Fibonacci sequence. + +```java +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.api.IDatatype; +import fr.inria.corese.core.sparql.datatype.DatatypeMap; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException, IOException { + // Open LDScript file + Path path = Path.of("input LDScript file path.rq"); + String ldScript = Files.readString(path, StandardCharsets.UTF_8); + + // Compile LDScript + QueryProcess exec = QueryProcess.create(); + exec.compile(ldScript); + + // Compute the twelfth number of the Fibonacci sequence + String name = "http://ns.inria.fr/fibonacci"; + IDatatype dt = exec.funcall(name, DatatypeMap.newInstance(12)); + + // Print result + System.out.println(dt); + } +} +``` + +**Input LDScript file path:** + +```rq +prefix fun: + +@public function fun:fibonacci(n) { + if (n < 0) { + error() + } + else if (n = 0) { + return(0) + } + else if (n = 1) { + return(1) + } + else { + return (fun:fibonacci(n - 1) + fun:fibonacci(n - 2)) + } +} +``` + +### 7.2. LDScript in SPARQL + +This example shows how to call an LDScript function from a SPARQL query. + +```java +import java.io.IOException; + +import fr.inria.corese.core.kgram.core.Mappings; +import fr.inria.corese.core.load.Load; +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException, IOException { + String check_query = """ + prefix ex: + prefix rdf: + prefix fun: + + select ?name ?area + where { + ?city rdf:type ex:city ; + ex:name ?name ; + ex:area ?area . + filter(?area > fun:toSquareKm(40)) + } + + # Convert square mile to square kilometer + function fun:toSquareKm(squareMile) { + return (squareMile * 2.59) + } + """; + + // Load graph + Graph graph = Graph.create(); + Load ld = Load.create(graph); + ld.parse("input file path"); + + // SPARQL query + QueryProcess exec = QueryProcess.create(graph); + Mappings map = exec.query(check_query); + } +} +``` + +### 7.3. Advanced Example + +The following Java program computes the percentage of people subscribed to social networks in a city compared to its total number of inhabitants. The data is collected from Wikidata. + +```java +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import fr.inria.corese.core.query.QueryProcess; +import fr.inria.corese.core.sparql.api.IDatatype; +import fr.inria.corese.core.sparql.exceptions.EngineException; + +public class Example { + public static void main(String[] args) throws EngineException, IOException { + // Open LDScript file + Path path = Path.of("input LDScript file path.rq"); + String ldScript = Files.readString(path, StandardCharsets.UTF_8); + + // Compile LDScript + QueryProcess exec = QueryProcess.create(); + exec.compile(ldScript); + + // Execute program + String name = "http://ns.inria.fr/main"; + IDatatype dt = exec.funcall(name); + + // Print result + System.out.println(dt); + } +} +``` + +**Input LDScript File:** + +```rq +prefix fun: + +prefix wd: +prefix wdt: +prefix wikibase: +prefix bd: +prefix rdfs: + +@public +function fun:percentage(sub, total) { + return (sub / total * 100) +} + +@public +function fun:citypopulationsocialmedia() { + query( + select ?city ?citylabel ?population ?socialmediafollower where { + service { + ?city wdt:P31 wd:Q1549591; + wdt:P8687 ?socialmediafollower; + wdt:P1082 ?population. + optional { + ?city rdfs:label ?citylabel + filter (lang(?citylabel) = "en"). + } + } + } + order by desc (?socialmediafollower) + limit 100 + ) +} + +@public +function fun:main() { + xt:sort( + maplist ( + function(x) { + let ((citylabel population socialmediafollower) = x) { + return (xt:list(citylabel, fun:percentage(socialmediafollower, population))) + } + }, + fun:citypopulationsocialmedia() + ), + function(x,y) { + let ((x_name, x_value) = x, (y_name, y_value) = y) { + if (x_value < y_value, 1, if(x=y, o, -1)) + } + } + ) +} +``` From 7d9b9dc7cd683181f6aa132bca64a7576d655d0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 23 Jul 2025 16:33:05 +0200 Subject: [PATCH 15/64] feat: Add CSS classes for light and dark logo images in documentation --- docs/source/index.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index e55b53caa..5cb74b78b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -7,6 +7,13 @@ .. image:: _static/logo/corese-core_doc_light.svg :align: center :width: 400px + :class: only-light + +.. image:: _static/logo/corese-core_doc_dark.svg + :align: center + :width: 400px + :class: only-dark + Corese is a software platform that implements and extends the standards of the Semantic Web. It enables users to create, manipulate, parse, serialize, query, reason about, and validate RDF data. From 60d5cc06753a8822e60329d8c64a35228110193d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 23 Jul 2025 17:42:03 +0200 Subject: [PATCH 16/64] docs: Reorder links for Corese tools in documentation for improved clarity --- docs/source/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 5cb74b78b..cb760a04c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -65,9 +65,9 @@ Corese-Core is a Java library that provides the core functionalities of Corese. Corese offers several interfaces ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * `corese-core `_: Java library to process RDF data and use Corese features via an API. - * `corese-server `_: Tool to create, configure and manage SPARQL endpoints. - * `corese-gui `_: Graphical interface that allows an easy and visual use of Corese features. * `corese-command `_: Command Line Interface for Corese that allows users to interact with Corese features from the terminal. + * `corese-gui `_: Graphical interface that allows an easy and visual use of Corese features. + * `corese-server `_: Tool to create, configure and manage SPARQL endpoints. * `corese-python (beta) `_: Python wrapper for accessing and manipulating RDF data with Corese features using py4j. .. raw:: html From a288a6b457f4c5fb600dcd20f3040d7747a4844e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 23 Jul 2025 17:42:16 +0200 Subject: [PATCH 17/64] fix: Remove theme switcher from navbar configuration for cleaner UI --- docs/source/conf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index bad2b49da..f2da322b9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -82,9 +82,8 @@ "image_light": "_static/logo/corese-core_doc_light.svg", "image_dark": "_static/logo/corese-core_doc_dark.svg" }, - "theme_switcher": True, "navbar_center": [ "navbar-nav" ], - "navbar_end": ["theme-switcher", "navbar-icon-links", "version-switcher"], + "navbar_end": ["navbar-icon-links", "version-switcher"], "icon_links": [ { "name": "GitHub", From 547afbcecba257860816ae90051c586dad385d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 23 Jul 2025 17:42:25 +0200 Subject: [PATCH 18/64] fix: Update version number in installation instructions for Maven and Gradle --- docs/source/install.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/install.md b/docs/source/install.md index ffc18e9c8..c0b0bc77e 100644 --- a/docs/source/install.md +++ b/docs/source/install.md @@ -8,14 +8,14 @@ fr.inria.corese corese-core - X.Y.Z + 4.6.4-SNAPSHOT ``` **Gradle:** ```Groovy -implementation 'fr.inria.corese:corese-core:X.Y.Z' // replace X.Y.Z with the version number +implementation 'fr.inria.corese:corese-core:4.6.4-SNAPSHOT' ``` **Jar:** From 427a5ced1af916c6df37a0e3e65e8c4417f17684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Thu, 24 Jul 2025 14:41:16 +0200 Subject: [PATCH 19/64] fix: Update version and release numbers in Sphinx configuration --- docs/source/conf.py | 21 ++++++++++++++++++++- docs/switcher_generator.sh | 10 +++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index f2da322b9..fd646a7e0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,6 +20,25 @@ project = 'Corese' author = 'Wimmics' +version = "" +release = "" + +def setup(app): + def set_version(app, config): + smv = getattr(app.config, 'smv_current_version', None) + if smv: + config.version = smv + config.release = smv + # Pour le version switcher + html_theme_options["switcher"]["version_match"] = smv + else: + config.version = "dev" + config.release = "dev" + html_theme_options["switcher"]["version_match"] = "dev" + + app.connect("config-inited", set_version) + + # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -91,7 +110,7 @@ "icon": "fab fa-github-square" } ], - "switcher": {"json_url": "https://corese-stack.github.io/corese-core/switcher.json", "version_match": r"v\d+\.\d+\.\d+"} + "switcher": {"json_url": "https://corese-stack.github.io/corese-core/switcher.json", "version_match": version} } html_sidebars = { diff --git a/docs/switcher_generator.sh b/docs/switcher_generator.sh index df73bcb0b..3a94b5adb 100755 --- a/docs/switcher_generator.sh +++ b/docs/switcher_generator.sh @@ -53,19 +53,19 @@ for tag in $tags; do is_first=false else preferred="false" - name="$tag (stable)" + name="$tag" fi # Create a JSON object for the tag - json_object=$(cat < Date: Thu, 24 Jul 2025 15:22:16 +0200 Subject: [PATCH 20/64] feat: Add theme switcher to navbar for improved user experience --- docs/source/conf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index fd646a7e0..216fe9b11 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -101,8 +101,9 @@ def set_version(app, config): "image_light": "_static/logo/corese-core_doc_light.svg", "image_dark": "_static/logo/corese-core_doc_dark.svg" }, + "theme_switcher": True, "navbar_center": [ "navbar-nav" ], - "navbar_end": ["navbar-icon-links", "version-switcher"], + "navbar_end": ["theme-switcher", "navbar-icon-links", "version-switcher"], "icon_links": [ { "name": "GitHub", From 50e539a3764a3862948d6e0d51dc3024b3b51313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Thu, 24 Jul 2025 15:34:27 +0200 Subject: [PATCH 21/64] feat: Implement dynamic favicon switching based on system theme --- docs/source/_static/js/favicon-theme.js | 17 ++++ .../_static/logo/corese-core_doc_fav_dark.svg | 87 +++++++++++++++++++ ..._fav.svg => corese-core_doc_fav_light.svg} | 36 ++++---- docs/source/_templates/layout.html | 6 ++ docs/source/conf.py | 4 +- 5 files changed, 132 insertions(+), 18 deletions(-) create mode 100644 docs/source/_static/js/favicon-theme.js create mode 100644 docs/source/_static/logo/corese-core_doc_fav_dark.svg rename docs/source/_static/logo/{corese-core_doc_fav.svg => corese-core_doc_fav_light.svg} (79%) create mode 100644 docs/source/_templates/layout.html diff --git a/docs/source/_static/js/favicon-theme.js b/docs/source/_static/js/favicon-theme.js new file mode 100644 index 000000000..c857941c7 --- /dev/null +++ b/docs/source/_static/js/favicon-theme.js @@ -0,0 +1,17 @@ +function setFaviconForSystemTheme(e) { + const prefersDark = e.matches; + const favicon = document.getElementById("favicon"); + + if (favicon) { + favicon.href = prefersDark + ? "_static/logo/corese-core_doc_fav_dark.svg" + : "_static/logo/corese-core_doc_fav_light.svg"; + } +} + +// Initialisation +const matcher = window.matchMedia("(prefers-color-scheme: dark)"); +setFaviconForSystemTheme(matcher); + +// Écoute des changements de thème système +matcher.addEventListener("change", setFaviconForSystemTheme); diff --git a/docs/source/_static/logo/corese-core_doc_fav_dark.svg b/docs/source/_static/logo/corese-core_doc_fav_dark.svg new file mode 100644 index 000000000..b05f3f54d --- /dev/null +++ b/docs/source/_static/logo/corese-core_doc_fav_dark.svg @@ -0,0 +1,87 @@ + + diff --git a/docs/source/_static/logo/corese-core_doc_fav.svg b/docs/source/_static/logo/corese-core_doc_fav_light.svg similarity index 79% rename from docs/source/_static/logo/corese-core_doc_fav.svg rename to docs/source/_static/logo/corese-core_doc_fav_light.svg index 693148118..ac50c2978 100644 --- a/docs/source/_static/logo/corese-core_doc_fav.svg +++ b/docs/source/_static/logo/corese-core_doc_fav_light.svg @@ -5,9 +5,9 @@ width="16px" version="1.1" id="svg7" - sodipodi:docname="corese-core_doc_fav.svg" + sodipodi:docname="corese-core_doc_fav_light.svg" xml:space="preserve" - inkscape:version="1.4 (e7c3feb100, 2024-10-09)" + inkscape:version="1.4.2 (ebf0e940d0, 2025-05-08)" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns="http://www.w3.org/2000/svg" @@ -43,22 +43,18 @@ inkscape:pageopacity="0.0" inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" - inkscape:zoom="22.96875" - inkscape:cx="0.56598639" - inkscape:cy="13.757823" + inkscape:zoom="32.482718" + inkscape:cx="2.2627417" + inkscape:cy="12.329633" inkscape:window-width="3072" inkscape:window-height="1659" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" - inkscape:current-layer="g7" /> + sodipodi:nodetypes="ssssss" + style="stroke:#414141;stroke-opacity:1;fill:#414141;fill-opacity:1" /> diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html new file mode 100644 index 000000000..ea787212c --- /dev/null +++ b/docs/source/_templates/layout.html @@ -0,0 +1,6 @@ +{% extends "!layout.html" %} + +{% block extrahead %} + +{{ super() }} +{% endblock %} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 216fe9b11..2258d523c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -85,7 +85,9 @@ def set_version(app, config): html_logo = "_static/logo/corese-core_doc_bar.svg" # Icon to put in the browser tab. -html_favicon = "_static/logo/corese-core_doc_fav.svg" +html_js_files = [ + "js/favicon-theme.js" +] # Modify the title to get good social-media links html_title = "Corese" From 2f820707eb92188677562a334710d9b332a462c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Thu, 24 Jul 2025 15:47:46 +0200 Subject: [PATCH 22/64] feat: Add installation instructions for Maven, Gradle, and direct download --- .../_static/logo/corese-core_doc_fav_dark.svg | 37 ++++++----- .../logo/corese-core_doc_fav_light.svg | 62 ++++++++++--------- docs/source/install.md | 28 --------- docs/source/install.rst | 40 ++++++++++++ 4 files changed, 91 insertions(+), 76 deletions(-) delete mode 100644 docs/source/install.md create mode 100644 docs/source/install.rst diff --git a/docs/source/_static/logo/corese-core_doc_fav_dark.svg b/docs/source/_static/logo/corese-core_doc_fav_dark.svg index b05f3f54d..1ddda212f 100644 --- a/docs/source/_static/logo/corese-core_doc_fav_dark.svg +++ b/docs/source/_static/logo/corese-core_doc_fav_dark.svg @@ -44,7 +44,7 @@ inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" inkscape:zoom="22.96875" - inkscape:cx="4.0707483" + inkscape:cx="4.1142857" inkscape:cy="10.688435" inkscape:window-width="3072" inkscape:window-height="1659" @@ -57,31 +57,30 @@ id="g7"> + style="fill:none;stroke:#e0e0e0;stroke-opacity:1" /> diff --git a/docs/source/_static/logo/corese-core_doc_fav_light.svg b/docs/source/_static/logo/corese-core_doc_fav_light.svg index ac50c2978..9767a0c9c 100644 --- a/docs/source/_static/logo/corese-core_doc_fav_light.svg +++ b/docs/source/_static/logo/corese-core_doc_fav_light.svg @@ -43,42 +43,46 @@ inkscape:pageopacity="0.0" inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" - inkscape:zoom="32.482718" - inkscape:cx="2.2627417" - inkscape:cy="12.329633" + inkscape:zoom="32" + inkscape:cx="4.703125" + inkscape:cy="20.515625" inkscape:window-width="3072" inkscape:window-height="1659" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" - inkscape:current-layer="g6" /> + id="g6" /> diff --git a/docs/source/install.md b/docs/source/install.md deleted file mode 100644 index c0b0bc77e..000000000 --- a/docs/source/install.md +++ /dev/null @@ -1,28 +0,0 @@ - - -# Installation - -**Maven:** - -```xml - - fr.inria.corese - corese-core - 4.6.4-SNAPSHOT - -``` - -**Gradle:** - -```Groovy -implementation 'fr.inria.corese:corese-core:4.6.4-SNAPSHOT' -``` - -**Jar:** - - - GitHub Release - - - Maven Central - diff --git a/docs/source/install.rst b/docs/source/install.rst new file mode 100644 index 000000000..d810b1d2f --- /dev/null +++ b/docs/source/install.rst @@ -0,0 +1,40 @@ +.. _installation: + +Installation +============ + +You can use **Corese-Core** with Maven, Gradle, or by downloading the JAR directly from GitHub or Maven Central. + +Maven +----- + +Add the following dependency to your ``pom.xml`` file: + +.. code-block:: xml + + + fr.inria.corese + corese-core + 4.6.4-SNAPSHOT + + +Gradle +------ + +Add this line to the ``dependencies`` section of your ``build.gradle`` file: + +.. code-block:: groovy + + implementation 'fr.inria.corese:corese-core:4.6.4-SNAPSHOT' + +Direct Download +--------------- + +.. raw:: html + + + GitHub Release + + + Maven Central + From 1cadd613e69bd3df88007b1b95b4ac39fe93777b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Thu, 24 Jul 2025 16:06:50 +0200 Subject: [PATCH 23/64] refactor: Simplify favicon handling by using a script to set the favicon based on system theme --- docs/source/_static/js/favicon-theme.js | 15 +++++---------- docs/source/_templates/layout.html | 12 +++++++++++- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/source/_static/js/favicon-theme.js b/docs/source/_static/js/favicon-theme.js index c857941c7..b7dc10e99 100644 --- a/docs/source/_static/js/favicon-theme.js +++ b/docs/source/_static/js/favicon-theme.js @@ -1,17 +1,12 @@ -function setFaviconForSystemTheme(e) { - const prefersDark = e.matches; +function setFavicon(e) { + const dark = e.matches; const favicon = document.getElementById("favicon"); - if (favicon) { - favicon.href = prefersDark + favicon.href = dark ? "_static/logo/corese-core_doc_fav_dark.svg" : "_static/logo/corese-core_doc_fav_light.svg"; } } -// Initialisation -const matcher = window.matchMedia("(prefers-color-scheme: dark)"); -setFaviconForSystemTheme(matcher); - -// Écoute des changements de thème système -matcher.addEventListener("change", setFaviconForSystemTheme); +const mql = window.matchMedia("(prefers-color-scheme: dark)"); +mql.addEventListener("change", setFavicon); diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html index ea787212c..4c06da170 100644 --- a/docs/source/_templates/layout.html +++ b/docs/source/_templates/layout.html @@ -1,6 +1,16 @@ {% extends "!layout.html" %} {% block extrahead %} - + {{ super() }} {% endblock %} \ No newline at end of file From a2e5deede4392afba9dad898bddeb86aea552242 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 25 Jul 2025 12:54:23 +0200 Subject: [PATCH 24/64] ANTLRTrig --- src/main/antlr/TriG.g4 | 321 ++++++++++++++++++ .../impl/io/parser/trig/ANTLRTrigParser.java | 71 ++++ .../io/parser/trig/TriGListernerImpl.java | 164 +++++++++ .../io/parser/trig/ANTLRTrigParserSpec.java | 64 ++++ 4 files changed, 620 insertions(+) create mode 100644 src/main/antlr/TriG.g4 create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java diff --git a/src/main/antlr/TriG.g4 b/src/main/antlr/TriG.g4 new file mode 100644 index 000000000..f93efea6f --- /dev/null +++ b/src/main/antlr/TriG.g4 @@ -0,0 +1,321 @@ +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +grammar TriG; + +trigDoc + : ( directive | block )* EOF + ; + +block + : triplesOrGraph + | wrappedGraph + | triples2 + | Graph_w labelOrSubject wrappedGraph + ; + +triplesOrGraph + : labelOrSubject (wrappedGraph | predicateObjectList '.') + ; + +triples2 + : blankNodePropertyList predicateObjectList? '.' + | collection predicateObjectList '.' + ; + +wrappedGraph + : '{' triplesBlock? '}' + ; + +triplesBlock + : triples ('.' triplesBlock?)? + ; + +labelOrSubject + : iri + | blankNode + ; + +directive + : prefixID + | base + | sparqlPrefix + | sparqlBase + ; + +prefixID + : '@prefix' PNAME_NS IRIREF '.' + ; + +base + : '@base' IRIREF '.' + ; + +sparqlPrefix + : Prefix_w PNAME_NS IRIREF + ; + +sparqlBase + : Base_w IRIREF + ; + +triples + : subject predicateObjectList + | blankNodePropertyList predicateObjectList? + ; + +predicateObjectList + : verb objectList (';' (verb objectList)?)* + ; + +objectList + : object (',' object)* + ; + +verb + : predicate + | 'a' + ; + +subject + : iri + | blank + ; + +predicate + : iri + ; + +object + : iri + | blank + | blankNodePropertyList + | literal + ; + +literal + : rDFLiteral + | numericLiteral + | BooleanLiteral + ; + +blank + : blankNode + | collection + ; + +blankNodePropertyList + : '[' predicateObjectList ']' + ; + +collection + : '(' object* ')' + ; + +numericLiteral + : INTEGER + | DECIMAL + | DOUBLE + ; + +rDFLiteral + : string LANGTAG + | string ('^^' iri)? + ; + +string + : STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE + | STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE + ; + +iri + : prefixedName + | IRIREF + ; + +prefixedName + : PNAME_LN + | PNAME_NS + ; + +blankNode + : BLANK_NODE_LABEL + | ANON + ; + +WS + : (('\u0020' | '\u0009' | '\u000A' | '\u000D' ) )+ -> skip + ; + +// Terminals + +Graph_w options { caseInsensitive=true; } + : 'GRAPH' + ; + +Base_w options { caseInsensitive=true; } + : 'BASE' + ; + +Prefix_w options { caseInsensitive=true; } + : 'PREFIX' + ; + +BooleanLiteral + : 'true' + | 'false' + ; + +IRIREF + : '<' (PN_CHARS | '.' | ':' | '#' | '@' | '%' | '&' | '$' | '!' | '\'' | '*' | '+' | '/' | '(' | ')' | '-' | ',' | '?' | '~' | UCHAR)* '>' + ; + +PNAME_NS + : PN_PREFIX? ':' + ; + +PNAME_LN + : PNAME_NS PN_LOCAL + ; + +BLANK_NODE_LABEL + : '_:' (PN_CHARS_U | '0' .. '9') ((PN_CHARS | '.')* PN_CHARS)? + ; + +LANGTAG + : '@' ('a'.. 'z' | 'A' .. 'Z')+ ('-' ('a'.. 'z' | 'A' .. 'Z' | '0' .. '9')* )* + ; + +INTEGER + : ('+' | '-' )? ('0' .. '9')+ + ; + +DECIMAL + : ('+' | '-' )? ('0' .. '9')* '.' ('0' .. '9')+ + ; + +DOUBLE + : ('+' | '-' )? (('0' .. '9')+ '.' ('0' .. '9')* EXPONENT + | '.' ('0' .. '9')+ EXPONENT + | ('0' .. '9')+ EXPONENT) + ; + +EXPONENT + : ('e' | 'E') ('+' | '-' )? ('0' .. '9')+ + ; + +STRING_LITERAL_QUOTE + : '"' ((~[\u0022\u005C\u0010\u0013]) | ECHAR | UCHAR)* '"' + ; + +STRING_LITERAL_SINGLE_QUOTE + : '\'' ((~[\u0027\u005C\u0010\u0013]) | ECHAR | UCHAR)* '\'' + ; + +STRING_LITERAL_LONG_SINGLE_QUOTE + : '\'\'\'' (('\'' | '\'\'')? ( (~['\\] ) | ECHAR | UCHAR))* '\'\'\'' + ; + +STRING_LITERAL_LONG_QUOTE + : '"""' (('"' | '""')? ( (~["'] ) | ECHAR | UCHAR))* '"""' + ; + +UCHAR + : '\\u' HEX HEX HEX HEX + | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX + ; + +ECHAR options { caseInsensitive=true; } + : '\\' [tbnrf"'\\] + ; + +WHITESPACE + : [\u0020\u0009\u000A\u000D] + ; + +ANON + : '[' WHITESPACE* ']' + ; + +PN_CHARS_BASE + : 'A' .. 'Z' + | 'a' .. 'z' + | '\u00C0' .. '\u00D6' + | '\u00D8' .. '\u00F6' + | '\u00F8' .. '\u02FF' + | '\u0370' .. '\u037D' + | '\u037F' .. '\u1FFF' + | '\u200C' .. '\u200D' + | '\u2070' .. '\u218F' + | '\u2C00' .. '\u2FEF' + | '\u3001' .. '\uD7FF' + | '\uF900' .. '\uFDCF' + | '\uFDF0' .. '\uFFFD' +// | '\u10000' .. '\uEFFFF' + ; + +PN_CHARS_U + : PN_CHARS_BASE + | '_' + ; + +PN_CHARS + : PN_CHARS_U + | '-' + | [0-9] + | [\u00B7] + | [\u0300-\u036F] + | [\u203F-\u2040] + ; + +PN_PREFIX + : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? + ; + +PN_LOCAL + : (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? + ; + +PLX + : PERCENT + | PN_LOCAL_ESC + ; + +PERCENT + : '%' HEX HEX + ; + +HEX + : [0-9a-fA-F] + ; + +PN_LOCAL_ESC + : '\\' ( + '_' + | '~' + | '.' + | '-' + | '!' + | '$' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | ';' + | '=' + | '/' + | '?' + | '#' + | '@' + | '%' + ) + ; + +LC + : '#' ~[\r\n]+ -> channel(HIDDEN) + ; \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java new file mode 100644 index 000000000..4f53923c7 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java @@ -0,0 +1,71 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; + +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.parser.antlr.TriGLexer; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; + +public class ANTLRTrigParser extends AbstractRDFParser { + + public ANTLRTrigParser(Model model, ValueFactory factory) { super(model, factory); } + + public ANTLRTrigParser(Model model, ValueFactory factory, IOOptions config) {super(model, factory, config);} + + @Override + public RDFFormat getRDFFormat() { + return RDFFormat.TRIG; + } + + @Override + public void setConfig(IOOptions config) {} + + @Override + public void parse(InputStream in) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); + } + + @Override + public void parse(InputStream in, String baseURI) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURI); + } + + @Override + public void parse(Reader reader) throws ParsingErrorException { + parse(reader, null); + } + + @Override + public void parse(Reader reader, String baseURI) throws ParsingErrorException { + try { + CharStream charStream = CharStreams.fromReader(reader); + TriGLexer triGLexer = new TriGLexer(charStream); + CommonTokenStream tokens = new CommonTokenStream(triGLexer); + TriGParser triGParser = new TriGParser(tokens); + ParseTreeWalker walker = new ParseTreeWalker(); + ParseTree tree = triGParser.trigDoc(); + TriGListernerImpl listerner = new TriGListernerImpl(getModel(), getValueFactory(), this.getConfig()); + walker.walk((ParseTreeListener) listerner, tree); + } catch (IOException e) { + throw new ParsingErrorException("Failed to parse TriG RDF: " + e.getMessage(), e); + } catch (Exception e) { + throw new ParsingErrorException("Unexpected error during TriG parsing: " + e.getMessage(), e); + } + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java new file mode 100644 index 000000000..5f39d55b8 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java @@ -0,0 +1,164 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.parser.antlr.TriGBaseListener; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import fr.inria.corese.core.next.api.ValueFactory; +import java.util.HashMap; +import java.util.Map; + +public class TriGListernerImpl extends TriGBaseListener { + private final Model model; + private String baseURI; + private final Map prefixMap = new HashMap<>(); + private final ValueFactory factory; + + private Resource currentSubject; + private IRI currentPredicate; + + public TriGListernerImpl(Model model, ValueFactory factory, IOOptions options) { + this.model = model; + this.baseURI = baseURI != null ? baseURI : ""; + if (options != null && options instanceof RDFParserBaseIRIOptions); + this.factory = factory; + } + + public void exitPrefixID(TriGParser.PrefixIDContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + String iri = ctx.IRIREF().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + iri = iri.substring(1, iri.length() - 1); + prefixMap.put(prefix, iri); + model.setNamespace(prefix, iri); + } + + public void exitSparqlBase(TriGParser.SparqlBaseContext ctx) { + String iri = ctx.IRIREF().getText(); + baseURI = iri.substring(1, iri.length() - 1); + } + + public void enterTriples(TriGParser.TriplesContext ctx) { + currentSubject = extractSubject(ctx.subject()); + } + + public void enterVerb(TriGParser.VerbContext ctx) { + currentPredicate = extractVerb(ctx); + } + + public void exitObject(TriGParser.ObjectContext ctx) { + Value object = extractObject(ctx); + model.add(currentSubject, currentPredicate, object); + } + + private String resolveIRI(String raw) { + if (raw.startsWith("<") && raw.endsWith(">")) { + return raw.substring(1, raw.length() - 1); + } else if (raw.equals("a")) { + return RDF.type.getIRI().stringValue(); + } else if (raw.contains(":")) { + String[] parts = raw.split(":", 2); + String ns = prefixMap.get(parts[0]); + if (ns != null) { + return ns + parts[1]; + } else { + throw new IllegalArgumentException("Prefix not declared: " + parts[0]); + } + } else { + return baseURI + raw; + } + } + + private String stripQuotes(String text) { + if (text == null || text.length() < 2) return text; + if ((text.startsWith("\"") && text.endsWith("\"")) || + (text.startsWith("'''") && text.endsWith("'''")) || + (text.startsWith("\"\"\"") && text.endsWith("\"\"\""))) { + return text.substring(1, text.length() - 1); + } + return text; + } + + private Literal extractLiteral(TriGParser.LiteralContext ctx) { + String label; + IRI datatype; + String lang; + + if (ctx.rDFLiteral() != null) { + if (ctx.rDFLiteral().iri() != null) { + datatype = factory.createIRI(resolveIRI(ctx.rDFLiteral().iri().getText())); + label = ctx.rDFLiteral().string().getText(); + return factory.createLiteral(stripQuotes(label), datatype); + } + if (ctx.rDFLiteral().LANGTAG() != null) { + lang = ctx.rDFLiteral().LANGTAG().getText().substring(1); + label = ctx.rDFLiteral().string().getText(); + return factory.createLiteral(stripQuotes(label), lang); + } + label = ctx.rDFLiteral().string().getText(); + return factory.createLiteral(stripQuotes(label)); + } + + if (ctx.BooleanLiteral() != null) { + label = ctx.BooleanLiteral().getText(); + datatype = XSD.BOOLEAN.getIRI(); + return factory.createLiteral(label, datatype); + } + if (ctx.numericLiteral() != null) { + if (ctx.numericLiteral().DECIMAL() != null) { + label = ctx.numericLiteral().DECIMAL().getText(); + datatype = XSD.DECIMAL.getIRI(); + return factory.createLiteral(label, datatype); + } + if (ctx.numericLiteral().DOUBLE() != null) { + label = ctx.numericLiteral().DOUBLE().getText(); + datatype = XSD.DOUBLE.getIRI(); + return factory.createLiteral(label, datatype); + } + if (ctx.numericLiteral().INTEGER() != null) { + label = ctx.numericLiteral().INTEGER().getText(); + datatype = XSD.INTEGER.getIRI(); + return factory.createLiteral(label, datatype); + } + } + throw new IllegalArgumentException("Unsupported literal type: " + ctx.getText()); + } + + private Value extractObject(TriGParser.ObjectContext ctx) { + if (ctx.iri() != null) { + return factory.createIRI(resolveIRI(ctx.iri().getText())); + } + if (ctx.blank() != null) { + return factory.createBNode(ctx.blank().getText()); + } + if (ctx.literal() != null) { + return extractLiteral(ctx.literal()); + } + throw new RuntimeException("Unsupported object: " + ctx.getText()); + } + + private Resource extractSubject(TriGParser.SubjectContext ctx) { + if (ctx.iri() != null) { + return factory.createIRI(resolveIRI(ctx.iri().getText())); + } + if (ctx.blank() != null) { + return factory.createBNode(ctx.blank().getText()); + } + throw new RuntimeException("Unsupported subject: " + ctx.getText()); + } + + private IRI extractPredicate(TriGParser.PredicateContext ctx) { + return factory.createIRI(resolveIRI(ctx.getText())); + } + + private IRI extractVerb(TriGParser.VerbContext ctx) { + if (ctx.predicate() != null) { + return extractPredicate(ctx.predicate()); + } else { + return factory.createIRI(resolveIRI(ctx.getText())); + } + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java new file mode 100644 index 000000000..373855f27 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java @@ -0,0 +1,64 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; + +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class ANTLRTrigParserSpec { + + private Model parseFromString(String trigData, String baseURI) throws Exception { + Model model = new CoreseModel(); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser parser = new ANTLRTrigParser(model, factory); + parser.parse(new StringReader(trigData), baseURI); + return model; + } + + @Test + public void testNamedGraphParsing() throws Exception { + String trig = "@prefix ex: .\n" + + "ex:Graph1 {\n" + + " ex:Alice ex:knows ex:Bob .\n" + + "}"; + + Model model = parseFromString(trig, null); + + assertEquals(1, model.size()); + + assertEquals(1, model.getNamespaces().size()); + + assertEquals(1, model.contexts().size()); + } + + @Test + public void testDocumentThatContainsOneGraphExample1() throws Exception { + String trig = """ + # This document encodes one graph. + @prefix ex: . + @prefix : . + + :G1 { :Monica a ex:Person ; + ex:name "Monica Murphy" ; + ex:homepage ; + ex:email ; + ex:hasSkill ex:Management , + ex:Programming . } + """.trim(); + + Model model = parseFromString(trig, null); + + assertEquals(6, model.size()); + + assertEquals(2, model.getNamespaces().size()); + + assertEquals(1, model.contexts().size()); + } + +} From 41779894934fcdb3723e973bc5a68ae5c1513f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Thu, 24 Jul 2025 17:00:39 +0200 Subject: [PATCH 25/64] fix: Update badge sizes for GitHub and Maven Central links in installation documentation --- docs/source/_static/logo/badge_github.svg | 18 ++-- docs/source/_static/logo/badge_maven.svg | 14 +++- .../_static/logo/corese-core_doc_bar.svg | 82 ------------------- docs/source/conf.py | 11 +-- docs/source/index.rst | 1 - docs/source/install.rst | 15 ++-- docs/switcher_generator.sh | 2 +- 7 files changed, 35 insertions(+), 108 deletions(-) delete mode 100644 docs/source/_static/logo/corese-core_doc_bar.svg diff --git a/docs/source/_static/logo/badge_github.svg b/docs/source/_static/logo/badge_github.svg index c6571839a..8e7e0b737 100644 --- a/docs/source/_static/logo/badge_github.svg +++ b/docs/source/_static/logo/badge_github.svg @@ -7,7 +7,7 @@ id="svg13" sodipodi:docname="badge_github.svg" xml:space="preserve" - inkscape:version="1.4 (e7c3feb100, 2024-10-09)" + inkscape:version="1.4.2 (ebf0e940d0, 2025-05-08)" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns="http://www.w3.org/2000/svg" @@ -20,15 +20,15 @@ inkscape:pageopacity="0.0" inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" - inkscape:zoom="2.5190679" - inkscape:cx="106.78553" - inkscape:cy="67.485279" + inkscape:zoom="1.78125" + inkscape:cx="-20.491228" + inkscape:cy="159.7193" inkscape:window-width="3072" inkscape:window-height="1659" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" - inkscape:current-layer="g3" /> + style="fill:#ffffff" /> diff --git a/docs/source/_static/logo/badge_maven.svg b/docs/source/_static/logo/badge_maven.svg index b15bb1f5c..c8e9a20b1 100644 --- a/docs/source/_static/logo/badge_maven.svg +++ b/docs/source/_static/logo/badge_maven.svg @@ -7,7 +7,7 @@ id="svg13" sodipodi:docname="badge_maven.svg" xml:space="preserve" - inkscape:version="1.4 (e7c3feb100, 2024-10-09)" + inkscape:version="1.4.2 (ebf0e940d0, 2025-05-08)" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns="http://www.w3.org/2000/svg" @@ -21,8 +21,8 @@ inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" inkscape:zoom="2.5190679" - inkscape:cx="52.797306" - inkscape:cy="119.48864" + inkscape:cx="52.995793" + inkscape:cy="119.68713" inkscape:window-width="3072" inkscape:window-height="1659" inkscape:window-x="0" @@ -271,4 +271,10 @@ class="cls-1" d="m 293.8,31.2 c 0.3,0.1 0.8,0.2 1.1,0.2 0.9,0 1.5,-0.2 1.9,-1 l 0.5,-1.1 -5.4,-13.6 h 3.8 l 3.5,9.3 3.5,-9.3 h 3.8 l -6.3,15.6 c -1,2.5 -2.8,3.2 -5.1,3.2 -0.4,0 -1.3,0 -1.8,-0.2 l 0.5,-3.2 z" id="path22" - style="fill:#ffffff" /> + style="fill:#ffffff" /> diff --git a/docs/source/_static/logo/corese-core_doc_bar.svg b/docs/source/_static/logo/corese-core_doc_bar.svg deleted file mode 100644 index 693148118..000000000 --- a/docs/source/_static/logo/corese-core_doc_bar.svg +++ /dev/null @@ -1,82 +0,0 @@ - - diff --git a/docs/source/conf.py b/docs/source/conf.py index 2258d523c..fae03ba86 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -29,7 +29,6 @@ def set_version(app, config): if smv: config.version = smv config.release = smv - # Pour le version switcher html_theme_options["switcher"]["version_match"] = smv else: config.version = "dev" @@ -82,7 +81,7 @@ def set_version(app, config): html_js_files = [] # Project logo, to place at the top of the sidebar. -html_logo = "_static/logo/corese-core_doc_bar.svg" +html_logo = "_static/logo/corese-core.svg" # Icon to put in the browser tab. html_js_files = [ @@ -99,9 +98,9 @@ def set_version(app, config): # documentation. html_theme_options = { "logo": { - "image_relative": "_static/logo/corese-core_doc_light.svg", - "image_light": "_static/logo/corese-core_doc_light.svg", - "image_dark": "_static/logo/corese-core_doc_dark.svg" + "image_relative": "_static/logo/corese-core.svg", + "image_light": "_static/logo/corese-core.svg", + "image_dark": "_static/logo/corese-core.svg" }, "theme_switcher": True, "navbar_center": [ "navbar-nav" ], @@ -171,8 +170,6 @@ def set_version(app, config): # Tell sphinx what the pygments highlight language should be. highlight_language = 'java' -# Setup the sphinx.ext.todo extension - # Set to false in the final version todo_include_todos = True diff --git a/docs/source/index.rst b/docs/source/index.rst index cb760a04c..2add6f954 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -101,7 +101,6 @@ Home Page .. toctree:: :hidden: - Accueil Installation User Guide Java API diff --git a/docs/source/install.rst b/docs/source/install.rst index d810b1d2f..3856d2644 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -31,10 +31,11 @@ Direct Download --------------- .. raw:: html - - - GitHub Release - - - Maven Central - + diff --git a/docs/switcher_generator.sh b/docs/switcher_generator.sh index 3a94b5adb..63f69306d 100755 --- a/docs/switcher_generator.sh +++ b/docs/switcher_generator.sh @@ -10,7 +10,7 @@ fi json_output_file="$1" html_output_file="$2" -# Set minimal version (before which no documentation would be generated in a compatible way) +# Set minimal version (before which no documentation would be generated in a compatible way) minimal_version="4.6.0" # Get all Git tags and filter by the form vX.Y.Z (semantic versioning) From 15a366c76899b2a4870a1631419f0c0bdac3cd1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Fri, 25 Jul 2025 16:06:03 +0200 Subject: [PATCH 26/64] style: Update README formatting by removing emojis and enhancing download section --- README.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 86dc04f96..d4993f6d0 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ [![License: CECILL-C](https://img.shields.io/badge/License-CECILL--C-blue.svg)](https://cecill.info/licences/Licence_CeCILL-C_V1-en.html) [![Discussions](https://img.shields.io/badge/Discussions-GitHub-blue)](https://github.com/orgs/corese-stack/discussions) -## ✨ Features +## Features - Manipulate RDF graphs (parse, serialize, transform) - Execute SPARQL 1.1 queries and updates @@ -21,7 +21,7 @@ - Apply logic-based rules with SPARQL Rules - Extend functionality and scripting with LDScript -## 🚀 Getting Started +## Getting Started Integrate Corese-Core into your Java project using your preferred build tool. @@ -45,10 +45,14 @@ implementation 'fr.inria.corese:corese-core:4.6.4-SNAPSHOT' Download the latest `.jar` file from: -- [🔗 GitHub Releases](https://github.com/corese-stack/corese-core/releases) -- [📦 Maven Central](https://central.sonatype.com/artifact/fr.inria.corese/corese-core) + + Get it on GitHub + + + Get it on Maven Central + -## 📖 Documentation +## Documentation - [Corese-Core api documentation](https://corese-stack.github.io/corese-core/v4.6.3/java_api/library_root.html) @@ -67,7 +71,7 @@ Download the latest `.jar` file from: - [SPARQL Rule Engine](https://files.inria.fr/corese/doc/rule.html) - [LDScript Reference](https://files.inria.fr/corese/doc/ldscript.html) -## 🤝 Contributing +## Contributing We welcome contributions! Here’s how to get involved: @@ -75,7 +79,7 @@ We welcome contributions! Here’s how to get involved: - [Issue Tracker](https://github.com/corese-stack/corese-core/issues) - [Pull Requests](https://github.com/corese-stack/corese-core/pulls) -## 🔗 Useful Links +## Useful Links - [Corese Website](https://corese-stack.github.io/corese-core) - Mailing List: From 3acaede2899cd510ed0ec108c21b4d5f4512427f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Fri, 25 Jul 2025 16:07:40 +0200 Subject: [PATCH 27/64] feat: Update pre-release workflow to use shadowJar and include build timestamp in release notes --- .github/workflows/publish-prerelease.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish-prerelease.yml b/.github/workflows/publish-prerelease.yml index ef7278521..b23520a34 100644 --- a/.github/workflows/publish-prerelease.yml +++ b/.github/workflows/publish-prerelease.yml @@ -38,7 +38,11 @@ jobs: ${{ runner.os }}-gradle- - name: Build project - run: ./gradlew build + run: ./gradlew shadowJar + + - name: Generate timestamp + id: timestamp + run: echo "timestamp=$(date '+%d %B %Y, %H:%M:%S')" >> $GITHUB_OUTPUT - name: Publish single pre-release uses: ncipollo/release-action@v1 @@ -49,6 +53,12 @@ jobs: body: | 🚧 This is the latest build from the `develop` branch. Not intended for production use. + + **Last Updated:** ${{ steps.timestamp.outputs.timestamp }} + **Build Info:** + - Branch: `${{ github.ref_name }}` + - Commit: `${{ github.sha }}` + - Build Time: `${{ steps.timestamp.outputs.timestamp }}` prerelease: true allowUpdates: true replacesArtifacts: true From efd937bf06f041960b74eb830d61dc1d5c9951af Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 25 Jul 2025 16:58:24 +0200 Subject: [PATCH 28/64] Fixing 181 bug on IRIs --- .../core/next/impl/common/util/IRIUtils.java | 34 ++++++++++++------- .../core/next/impl/temp/CoreseIRITest.java | 15 ++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index bb1f83146..c530dc6ba 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -10,9 +10,10 @@ */ public class IRIUtils { - private static final Pattern IRI_PATTERN = Pattern.compile("^(([\\w\\-]+:\\/\\/([\\w\\-_:]+\\.)*[\\w\\-_:]*)(\\/([\\w\\-\\._\\:]+\\/)*))([\\w\\-\\._\\:]+)?(\\?[\\w\\-_\\:\\?\\=]+)?((\\#)?([\\w\\-_]+))?$"); + private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?([\\w\\-_]+))?)?$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); + /** * Prevent instantiation of the utility class. */ @@ -29,15 +30,24 @@ public static String guessNamespace(String iri) { Matcher matcher = IRI_PATTERN.matcher(iri); if(matcher.matches()) { - if((matcher.group(8) == null) || (matcher.group(6) == null && matcher.group(9) == null) ) { // If the IRI has no fragment or ends with a slash - - return matcher.group(1); - } else { - // 1: Domain and path ending with a slash, 6: final path element without slash, 9: final # if there is a fragment - return matcher.group(1) + matcher.group(6) + matcher.group(9); + if(matcher.group("protocol") != null && matcher.group("protocol").equals("_")) { + return ""; + } + StringBuilder namespace = new StringBuilder(); + namespace.append(matcher.group("protocol")).append(":"); + if(matcher.group("dblSlashes") != null) { + namespace.append(matcher.group("dblSlashes")); } + namespace.append(matcher.group("domain")); + if(matcher.group("path") != null) { + namespace.append(matcher.group("path")); + } + if(matcher.group("fragment") != null) { + namespace.append(matcher.group("finalPath")).append("#"); + } + return namespace.toString(); } else { - return ""; + throw new IllegalStateException("No namespace found for the given IRI: " + iri + "."); } } catch (IllegalStateException e) { return ""; @@ -54,10 +64,10 @@ public static String guessLocalName(String iri) { Matcher matcher = IRI_PATTERN.matcher(iri); if(matcher.matches()) { - if(matcher.group(10) != null){ // If the IRI has a fragment - return matcher.group(10); - } else if(matcher.group(6) != null ) { // If the IRI has no fragment but do not ends with a slash - return matcher.group(6); + if(matcher.group("fragment") != null){ // If the IRI has a fragment + return matcher.group("fragment"); + } else if(matcher.group("finalPath") != null ) { // If the IRI has no fragment but do not ends with a slash + return matcher.group("finalPath"); } else { // If the URI ends with a slash return ""; } diff --git a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java index 9519cd4f0..77bfc928c 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java @@ -30,6 +30,21 @@ public void constructorStringTest() { assertEquals("test", coreseIRI.getLocalName()); } + @Test + public void constructorStringTest_otherURIS() { + CoreseIRI coreseIRI_noSlash = new CoreseIRI("http://www.monicamurphy.org"); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.stringValue()); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.getCoreseNode().getLabel()); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.getNamespace()); + assertEquals("", coreseIRI_noSlash.getLocalName()); + + CoreseIRI coreseIRI_email = new CoreseIRI("mailto:monica@monicamurphy.org"); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.stringValue()); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.getCoreseNode().getLabel()); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.getNamespace()); + assertEquals("", coreseIRI_email.getLocalName()); + } + @Test public void constructorIriTest() { CoreseIRI coreseIRI = new CoreseIRI("http://example.org/test"); From d587cbac3a26281605dec0a205d72070bd19a0b2 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 25 Jul 2025 17:02:07 +0200 Subject: [PATCH 29/64] minor fix --- .../fr/inria/corese/core/next/impl/common/util/IRIUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index c530dc6ba..7c4a819b3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -42,7 +42,7 @@ public static String guessNamespace(String iri) { if(matcher.group("path") != null) { namespace.append(matcher.group("path")); } - if(matcher.group("fragment") != null) { + if(matcher.group("fragment") != null && matcher.group("finalPath") != null) { namespace.append(matcher.group("finalPath")).append("#"); } return namespace.toString(); From d0a685d00ba7c4cc959798b1511e8246fac60285 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 25 Jul 2025 17:23:35 +0200 Subject: [PATCH 30/64] ANTLRTrig parser --- .../impl/io/parser/trig/ANTLRTrigParser.java | 27 +- .../impl/io/parser/trig/TriGListerner.java | 242 ++++++++++++++++++ .../io/parser/trig/TriGListernerImpl.java | 164 ------------ .../io/parser/trig/ANTLRTrigParserSpec.java | 93 ++++++- .../io/parser/trig/TriGListenerImplTest.java | 97 +++++++ 5 files changed, 457 insertions(+), 166 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java index 4f53923c7..cddf9168c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParser.java @@ -22,10 +22,28 @@ import java.io.Reader; import java.nio.charset.StandardCharsets; +/** + * An ANTLR4-based parser for Trig format. + * This parser uses an ANTLR grammar to tokenize and parse Trig documents, + * then a listener to build the RDF model. + */ public class ANTLRTrigParser extends AbstractRDFParser { + /** + * Constructor for the ANTLRTrigParser. + * + * @param model The RDF model to populate. + * @param factory The ValueFactory for creating RDF resources. + */ public ANTLRTrigParser(Model model, ValueFactory factory) { super(model, factory); } + /** + * Constructor for the ANTLRTrigParser with configuration options. + * + * @param model The RDF model to populate. + * @param factory The ValueFactory for creating RDF resources. + * @param config The configuration options for parsing. + */ public ANTLRTrigParser(Model model, ValueFactory factory, IOOptions config) {super(model, factory, config);} @Override @@ -51,6 +69,13 @@ public void parse(Reader reader) throws ParsingErrorException { parse(reader, null); } + /** + * Parses Trig data from a Reader using ANTLR4. + * + * @param reader The Reader to read RDF data from. + * @param baseURI The base URI. + * @throws ParsingErrorException if a parsing or I/O error occurs. + */ @Override public void parse(Reader reader, String baseURI) throws ParsingErrorException { try { @@ -60,7 +85,7 @@ public void parse(Reader reader, String baseURI) throws ParsingErrorException { TriGParser triGParser = new TriGParser(tokens); ParseTreeWalker walker = new ParseTreeWalker(); ParseTree tree = triGParser.trigDoc(); - TriGListernerImpl listerner = new TriGListernerImpl(getModel(), getValueFactory(), this.getConfig()); + TriGListerner listerner = new TriGListerner(getModel(), getValueFactory(), this.getConfig()); walker.walk((ParseTreeListener) listerner, tree); } catch (IOException e) { throw new ParsingErrorException("Failed to parse TriG RDF: " + e.getMessage(), e); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java new file mode 100644 index 000000000..96193ed7c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java @@ -0,0 +1,242 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.parser.antlr.TriGBaseListener; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import fr.inria.corese.core.next.api.ValueFactory; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class TriGListerner extends TriGBaseListener { + private final Model model; + private String baseURI; + private final Map prefixMap = new HashMap<>(); + private final ValueFactory factory; + + private Resource currentSubject; + private IRI currentPredicate; + private Resource currentGraph; + + + /** + * Constructor for the TriGListerner. + * + * @param model The RDF model to populate. + * @param factory The ValueFactory for creating RDF resources. + * @param options IOOptions for configuration (if any). + */ + public TriGListerner(Model model, ValueFactory factory, IOOptions options) { + this.model = model; + this.baseURI = baseURI != null ? baseURI : ""; + if (options != null && options instanceof RDFParserBaseIRIOptions); + this.factory = factory; + } + + public void exitPrefixID(TriGParser.PrefixIDContext ctx) { + String prefix = ctx.PNAME_NS().getText(); + String iri = ctx.IRIREF().getText(); + prefix = prefix.substring(0, prefix.length() - 1); + iri = iri.substring(1, iri.length() - 1); + prefixMap.put(prefix, iri); + model.setNamespace(prefix, iri); + } + + public void exitSparqlBase(TriGParser.SparqlBaseContext ctx) { + baseURI = ctx.IRIREF().getText().replaceAll("^<|>$", ""); + } + + public void enterBlock(TriGParser.BlockContext ctx) { + currentGraph = ctx.Graph_w() != null && ctx.labelOrSubject() != null + ? extractLabelOrSubject(ctx.labelOrSubject()) + : null; + } + + public void exitBlock(TriGParser.BlockContext ctx) { + currentGraph = null; + } + + public void enterTriplesOrGraph(TriGParser.TriplesOrGraphContext ctx) { + if (ctx.labelOrSubject() != null && ctx.predicateObjectList() != null) { + currentSubject = extractLabelOrSubject(ctx.labelOrSubject()); + processPredicateObjectList(ctx.predicateObjectList()); + } + } + + public void enterTriples(TriGParser.TriplesContext ctx) { + currentSubject = extractSubject(ctx.subject()); + processPredicateObjectList(ctx.predicateObjectList()); + } + + /** + * Processes a PredicateObjectList context, extracting verbs and corresponding object lists, + * and adding triples to the model for the current subject and graph. + * + * @param ctx the predicate-object list context from the parser + */ + private void processPredicateObjectList(TriGParser.PredicateObjectListContext ctx) { + List verbs = ctx.verb(); + List objLists = ctx.objectList(); + + for (int i = 0; i < verbs.size(); i++) { + currentPredicate = extractVerb(verbs.get(i)); + List objects = objLists.get(i).object(); + for (TriGParser.ObjectContext objCtx : objects) { + Value object = extractObject(objCtx); + model.add(currentSubject, currentPredicate, object, currentGraph); + } + } + } + + /** + * Extracts an RDF object from the ObjectContext. + * Supports IRIs, blank nodes, literals, and inline blank node property lists. + * + * @param ctx the object context + * @return the extracted RDF Value + */ + private Value extractObject(TriGParser.ObjectContext ctx) { + if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); + if (ctx.blank() != null) return extractBlank(ctx.blank()); + if (ctx.literal() != null) return extractLiteral(ctx.literal()); + if (ctx.blankNodePropertyList() != null) return processBlankNodePropertyList(ctx.blankNodePropertyList()); + throw new RuntimeException("Unsupported object: " + ctx.getText()); + } + + /** + * Processes an inline blank node with its property list, returning the blank node as a Resource. + * Temporarily updates the current subject to the new blank node during processing. + * + * @param ctx the blank node property list context + * @return the new blank node resource + */ + private Resource processBlankNodePropertyList(TriGParser.BlankNodePropertyListContext ctx) { + Resource bnode = factory.createBNode(); + Resource savedSubject = currentSubject; + currentSubject = bnode; + processPredicateObjectList(ctx.predicateObjectList()); + currentSubject = savedSubject; + return bnode; + } + + /** + * Extracts a subject from a SubjectContext, which can be an IRI or a blank node. + * + * @param ctx the subject context + * @return the extracted subject as a Resource + */ + private Resource extractSubject(TriGParser.SubjectContext ctx) { + if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); + if (ctx.blank() != null) return extractBlank(ctx.blank()); + throw new RuntimeException("Unsupported subject: " + ctx.getText()); + } + + /** + * Extracts a blank node from a BlankContext, supporting labeled (_:b) and anonymous ([]) forms. + * + * @param ctx the blank context + * @return the blank node as a Resource + */ + private Resource extractBlank(TriGParser.BlankContext ctx) { + TriGParser.BlankNodeContext node = ctx.blankNode(); + if (node != null) { + if (node.BLANK_NODE_LABEL() != null) + return factory.createBNode(node.BLANK_NODE_LABEL().getText()); + if (node.ANON() != null) + return factory.createBNode(); + } + throw new RuntimeException("Unsupported blank node structure: " + ctx.getText()); + } + + /** + * Extracts a graph label or subject from a LabelOrSubjectContext. + * Supports IRI and blank node. + * + * @param ctx the label or subject context + * @return the extracted resource + */ + private Resource extractLabelOrSubject(TriGParser.LabelOrSubjectContext ctx) { + if (ctx.iri() != null) return factory.createIRI(resolveIRI(ctx.iri().getText())); + if (ctx.blankNode() != null) return factory.createBNode(ctx.blankNode().getText()); + throw new RuntimeException("Unsupported labelOrSubject: " + ctx.getText()); + } + + /** + * Extracts a predicate IRI from a VerbContext. + * Handles the special keyword 'a' as rdf:type. + * + * @param ctx the verb context + * @return the extracted IRI + */ + private IRI extractVerb(TriGParser.VerbContext ctx) { + return factory.createIRI(resolveIRI(ctx.getText())); + } + + /** + * Extracts a Literal from a LiteralContext, handling typed, language-tagged, boolean, and numeric literals. + * + * @param ctx the literal context + * @return the extracted Literal + */ + private Literal extractLiteral(TriGParser.LiteralContext ctx) { + if (ctx.rDFLiteral() != null) { + String label = stripQuotes(ctx.rDFLiteral().string().getText()); + if (ctx.rDFLiteral().LANGTAG() != null) + return factory.createLiteral(label, ctx.rDFLiteral().LANGTAG().getText().substring(1)); + if (ctx.rDFLiteral().iri() != null) + return factory.createLiteral(label, factory.createIRI(resolveIRI(ctx.rDFLiteral().iri().getText()))); + return factory.createLiteral(label); + } + if (ctx.BooleanLiteral() != null) + return factory.createLiteral(ctx.BooleanLiteral().getText(), XSD.BOOLEAN.getIRI()); + if (ctx.numericLiteral() != null) { + if (ctx.numericLiteral().INTEGER() != null) + return factory.createLiteral(ctx.numericLiteral().INTEGER().getText(), XSD.INTEGER.getIRI()); + if (ctx.numericLiteral().DECIMAL() != null) + return factory.createLiteral(ctx.numericLiteral().DECIMAL().getText(), XSD.DECIMAL.getIRI()); + if (ctx.numericLiteral().DOUBLE() != null) + return factory.createLiteral(ctx.numericLiteral().DOUBLE().getText(), XSD.DOUBLE.getIRI()); + } + throw new RuntimeException("Unsupported literal: " + ctx.getText()); + } + + /** + * Resolves an IRI or QName into a full URI string. + * Handles full IRIs in angle brackets, QNames using prefixes, and special case "a". + * + * @param raw the raw string + * @return the resolved URI string + */ + private String resolveIRI(String raw) { + raw = raw.trim(); + if (raw.startsWith("<") && raw.endsWith(">")) return raw.substring(1, raw.length() - 1); + if (raw.equals("a")) return RDF.type.getIRI().stringValue(); + if (raw.contains(":")) { + String[] parts = raw.split(":", 2); + String ns = prefixMap.get(parts[0]); + if (ns != null) return ns + parts[1]; + throw new IllegalArgumentException("Undeclared prefix: " + parts[0]); + } + return baseURI + raw; + } + + /** + * Strips surrounding quotes from a string literal, including single, double, and multi-line forms. + * + * @param text the quoted string + * @return the unquoted string + */ + private String stripQuotes(String text) { + if (text == null || text.length() < 2) return text; + if ((text.startsWith("\"") && text.endsWith("\"")) || + (text.startsWith("\"\"\"") && text.endsWith("\"\"\"")) || + (text.startsWith("'''") && text.endsWith("'''"))) { + return text.substring(1, text.length() - 1); + } + return text; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java deleted file mode 100644 index 5f39d55b8..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListernerImpl.java +++ /dev/null @@ -1,164 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.trig; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; -import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.parser.antlr.TriGBaseListener; -import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; -import fr.inria.corese.core.next.api.ValueFactory; -import java.util.HashMap; -import java.util.Map; - -public class TriGListernerImpl extends TriGBaseListener { - private final Model model; - private String baseURI; - private final Map prefixMap = new HashMap<>(); - private final ValueFactory factory; - - private Resource currentSubject; - private IRI currentPredicate; - - public TriGListernerImpl(Model model, ValueFactory factory, IOOptions options) { - this.model = model; - this.baseURI = baseURI != null ? baseURI : ""; - if (options != null && options instanceof RDFParserBaseIRIOptions); - this.factory = factory; - } - - public void exitPrefixID(TriGParser.PrefixIDContext ctx) { - String prefix = ctx.PNAME_NS().getText(); - String iri = ctx.IRIREF().getText(); - prefix = prefix.substring(0, prefix.length() - 1); - iri = iri.substring(1, iri.length() - 1); - prefixMap.put(prefix, iri); - model.setNamespace(prefix, iri); - } - - public void exitSparqlBase(TriGParser.SparqlBaseContext ctx) { - String iri = ctx.IRIREF().getText(); - baseURI = iri.substring(1, iri.length() - 1); - } - - public void enterTriples(TriGParser.TriplesContext ctx) { - currentSubject = extractSubject(ctx.subject()); - } - - public void enterVerb(TriGParser.VerbContext ctx) { - currentPredicate = extractVerb(ctx); - } - - public void exitObject(TriGParser.ObjectContext ctx) { - Value object = extractObject(ctx); - model.add(currentSubject, currentPredicate, object); - } - - private String resolveIRI(String raw) { - if (raw.startsWith("<") && raw.endsWith(">")) { - return raw.substring(1, raw.length() - 1); - } else if (raw.equals("a")) { - return RDF.type.getIRI().stringValue(); - } else if (raw.contains(":")) { - String[] parts = raw.split(":", 2); - String ns = prefixMap.get(parts[0]); - if (ns != null) { - return ns + parts[1]; - } else { - throw new IllegalArgumentException("Prefix not declared: " + parts[0]); - } - } else { - return baseURI + raw; - } - } - - private String stripQuotes(String text) { - if (text == null || text.length() < 2) return text; - if ((text.startsWith("\"") && text.endsWith("\"")) || - (text.startsWith("'''") && text.endsWith("'''")) || - (text.startsWith("\"\"\"") && text.endsWith("\"\"\""))) { - return text.substring(1, text.length() - 1); - } - return text; - } - - private Literal extractLiteral(TriGParser.LiteralContext ctx) { - String label; - IRI datatype; - String lang; - - if (ctx.rDFLiteral() != null) { - if (ctx.rDFLiteral().iri() != null) { - datatype = factory.createIRI(resolveIRI(ctx.rDFLiteral().iri().getText())); - label = ctx.rDFLiteral().string().getText(); - return factory.createLiteral(stripQuotes(label), datatype); - } - if (ctx.rDFLiteral().LANGTAG() != null) { - lang = ctx.rDFLiteral().LANGTAG().getText().substring(1); - label = ctx.rDFLiteral().string().getText(); - return factory.createLiteral(stripQuotes(label), lang); - } - label = ctx.rDFLiteral().string().getText(); - return factory.createLiteral(stripQuotes(label)); - } - - if (ctx.BooleanLiteral() != null) { - label = ctx.BooleanLiteral().getText(); - datatype = XSD.BOOLEAN.getIRI(); - return factory.createLiteral(label, datatype); - } - if (ctx.numericLiteral() != null) { - if (ctx.numericLiteral().DECIMAL() != null) { - label = ctx.numericLiteral().DECIMAL().getText(); - datatype = XSD.DECIMAL.getIRI(); - return factory.createLiteral(label, datatype); - } - if (ctx.numericLiteral().DOUBLE() != null) { - label = ctx.numericLiteral().DOUBLE().getText(); - datatype = XSD.DOUBLE.getIRI(); - return factory.createLiteral(label, datatype); - } - if (ctx.numericLiteral().INTEGER() != null) { - label = ctx.numericLiteral().INTEGER().getText(); - datatype = XSD.INTEGER.getIRI(); - return factory.createLiteral(label, datatype); - } - } - throw new IllegalArgumentException("Unsupported literal type: " + ctx.getText()); - } - - private Value extractObject(TriGParser.ObjectContext ctx) { - if (ctx.iri() != null) { - return factory.createIRI(resolveIRI(ctx.iri().getText())); - } - if (ctx.blank() != null) { - return factory.createBNode(ctx.blank().getText()); - } - if (ctx.literal() != null) { - return extractLiteral(ctx.literal()); - } - throw new RuntimeException("Unsupported object: " + ctx.getText()); - } - - private Resource extractSubject(TriGParser.SubjectContext ctx) { - if (ctx.iri() != null) { - return factory.createIRI(resolveIRI(ctx.iri().getText())); - } - if (ctx.blank() != null) { - return factory.createBNode(ctx.blank().getText()); - } - throw new RuntimeException("Unsupported subject: " + ctx.getText()); - } - - private IRI extractPredicate(TriGParser.PredicateContext ctx) { - return factory.createIRI(resolveIRI(ctx.getText())); - } - - private IRI extractVerb(TriGParser.VerbContext ctx) { - if (ctx.predicate() != null) { - return extractPredicate(ctx.predicate()); - } else { - return factory.createIRI(resolveIRI(ctx.getText())); - } - } -} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java index 373855f27..970741aaf 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java @@ -1,6 +1,8 @@ package fr.inria.corese.core.next.impl.io.parser.trig; +import fr.inria.corese.core.next.api.Literal; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Value; import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.io.parser.RDFParser; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; @@ -21,6 +23,35 @@ private Model parseFromString(String trigData, String baseURI) throws Exception return model; } + /** + * Helper method to print the model. + * @param model + */ + private void printModel(Model model) { + model.stream().forEach(stmt -> { + Value obj = stmt.getObject(); + if (obj instanceof Literal literal) { + if (literal.getLanguage().isPresent()) { + System.out.printf("(%s, %s, \"%s\"@%s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel(), + literal.getLanguage().get()); + } else { + System.out.printf("(%s, %s, \"%s\")%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel()); + } + } else { + System.out.printf("(%s, %s, %s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + obj.stringValue()); + } + }); + } + @Test public void testNamedGraphParsing() throws Exception { String trig = "@prefix ex: .\n" + @@ -29,7 +60,7 @@ public void testNamedGraphParsing() throws Exception { "}"; Model model = parseFromString(trig, null); - + printModel(model); assertEquals(1, model.size()); assertEquals(1, model.getNamespaces().size()); @@ -53,6 +84,7 @@ public void testDocumentThatContainsOneGraphExample1() throws Exception { """.trim(); Model model = parseFromString(trig, null); + printModel(model); assertEquals(6, model.size()); @@ -61,4 +93,63 @@ public void testDocumentThatContainsOneGraphExample1() throws Exception { assertEquals(1, model.contexts().size()); } + @Test + public void testDocumentThatContainsTwoGraphExample() throws Exception { + String trig = """ + # This document contains a same data as the + # previous example. + + @prefix rdf: . + @prefix dc: . + @prefix foaf: . + + # default graph - no {} used. + dc:publisher "Bob" . + dc:publisher "Alice" . + + # GRAPH keyword to highlight a named graph + # Abbreviation of triples using ; + GRAPH + { + [] foaf:name "Bob" ; + foaf:mbox ; + foaf:knows _:b . + } + + GRAPH + { + _:b foaf:name "Alice" ; + foaf:mbox + } + """.trim(); + + Model model = parseFromString(trig, null); + printModel(model); + + assertEquals(7, model.size()); + + assertEquals(3, model.getNamespaces().size()); + + assertEquals(3, model.contexts().size()); + } + + @Test + public void testNestedBlankNodesWithSharedIdentifiers() throws Exception { + String trig = """ + @prefix ex: . + + GRAPH ex:graph1 { + ex:Alice ex:knows [ + ex:name "Bob" ; + ex:knows [ + ex:name "Charlie" + ] + ] ; + ex:email "alice@example.org" . + } + """.trim(); + Model model = parseFromString(trig, null); + printModel(model); + } + } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java new file mode 100644 index 000000000..88e01228f --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java @@ -0,0 +1,97 @@ +package fr.inria.corese.core.next.impl.io.parser.trig; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.parser.antlr.TriGLexer; +import fr.inria.corese.core.next.impl.parser.antlr.TriGParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.junit.jupiter.api.Test; + +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.*; + +public class TriGListenerImplTest { + private Model parseTrig(String trigData) throws Exception { + ValueFactory factory = new CoreseAdaptedValueFactory(); + + CharStream input = CharStreams.fromReader(new StringReader(trigData)); + TriGLexer lexer = new TriGLexer(input); + CommonTokenStream tokens = new CommonTokenStream(lexer); + TriGParser parser = new TriGParser(tokens); + ParseTree tree = parser.trigDoc(); + + Model model = new CoreseModel(); + TriGListerner listener = new TriGListerner(model, factory, null); + ParseTreeWalker.DEFAULT.walk(listener, tree); + + return model; + } + + @Test + public void testSimpleNamedGraph() throws Exception { + String trig = """ + @prefix ex: . + + GRAPH ex:graph { + ex:subject ex:predicate "Hello" . + } + """; + + Model model = parseTrig(trig); + assertEquals(1, model.size()); + assertEquals(1, model.contexts().size()); + } + + @Test + public void testBlankNodeWithProperties() throws Exception { + String trig = """ + @prefix ex: . + GRAPH ex:graph { + ex:Bob ex:knows [ ex:name "Charlie" ] . + } + """; + + Model model = parseTrig(trig); + assertEquals(2, model.size()); + } + + @Test + public void testMultipleGraphsAndBase() throws Exception { + String trig = """ + @base . + @prefix dc: . + @prefix ex: . + + dc:creator "Bob" . + + GRAPH ex:other { + dc:creator "Alice" . + } + """; + + Model model = parseTrig(trig); + assertEquals(2, model.contexts().size()); + assertEquals(2, model.size()); + } + + @Test + public void testTypedLiteralsAndLang() throws Exception { + String trig = """ + @prefix ex: . + @prefix xsd: . + + ex:subject ex:age "30"^^xsd:integer ; + ex:name "Jean"@fr . + """; + + Model model = parseTrig(trig); + assertEquals(2, model.size()); + } +} From 4d2da0b6bf89e62259ee124087665ec23c54dbae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Fri, 25 Jul 2025 18:24:55 +0200 Subject: [PATCH 31/64] fix: Update favicon paths and add missing SVG files for light and dark modes --- docs/source/_static/js/favicon-theme.js | 4 ++-- .../{corese-core_doc_fav_dark.svg => corese_fav_dark.svg} | 0 .../{corese-core_doc_fav_light.svg => corese_fav_light.svg} | 0 docs/source/_templates/layout.html | 4 ++-- docs/source/conf.py | 4 ++-- docs/source/index.rst | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename docs/source/_static/logo/{corese-core_doc_fav_dark.svg => corese_fav_dark.svg} (100%) rename docs/source/_static/logo/{corese-core_doc_fav_light.svg => corese_fav_light.svg} (100%) diff --git a/docs/source/_static/js/favicon-theme.js b/docs/source/_static/js/favicon-theme.js index b7dc10e99..d534a7d29 100644 --- a/docs/source/_static/js/favicon-theme.js +++ b/docs/source/_static/js/favicon-theme.js @@ -3,8 +3,8 @@ function setFavicon(e) { const favicon = document.getElementById("favicon"); if (favicon) { favicon.href = dark - ? "_static/logo/corese-core_doc_fav_dark.svg" - : "_static/logo/corese-core_doc_fav_light.svg"; + ? "_static/logo/corese_fav_dark.svg" + : "_static/logo/corese_fav_light.svg"; } } diff --git a/docs/source/_static/logo/corese-core_doc_fav_dark.svg b/docs/source/_static/logo/corese_fav_dark.svg similarity index 100% rename from docs/source/_static/logo/corese-core_doc_fav_dark.svg rename to docs/source/_static/logo/corese_fav_dark.svg diff --git a/docs/source/_static/logo/corese-core_doc_fav_light.svg b/docs/source/_static/logo/corese_fav_light.svg similarity index 100% rename from docs/source/_static/logo/corese-core_doc_fav_light.svg rename to docs/source/_static/logo/corese_fav_light.svg diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html index 4c06da170..3fe1e248c 100644 --- a/docs/source/_templates/layout.html +++ b/docs/source/_templates/layout.html @@ -8,8 +8,8 @@ fav.rel = "icon"; fav.type = "image/svg+xml"; fav.href = darkMode - ? "_static/logo/corese-core_doc_fav_dark.svg" - : "_static/logo/corese-core_doc_fav_light.svg"; + ? "_static/logo/corese_fav_dark.svg" + : "_static/logo/corese_fav_light.svg"; document.head.appendChild(fav); {{ super() }} diff --git a/docs/source/conf.py b/docs/source/conf.py index fae03ba86..b959c5025 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -89,8 +89,8 @@ def set_version(app, config): ] # Modify the title to get good social-media links -html_title = "Corese" -html_short_title = "Corese" +html_title = "Corese-core Documentation" +html_short_title = "Corese-core Docs" # -- Theme Options ----------------------------------------------------------- # Theme options are theme-specific and customize the look and feel of a theme diff --git a/docs/source/index.rst b/docs/source/index.rst index 2add6f954..2cc8a5c68 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -78,7 +78,7 @@ Corese-Core is a Java library that provides the core functionalities of Corese. .. _issue reports: https://github.com/corese-stack/corese-core/issues .. _pull requests: https://github.com/corese-stack/corese-core/pulls -For support questions, comments, and any ideas for improvements you`d like to discuss, please use our `discussion forum`_. We welcome everyone to contribute to `issue reports`_, suggest new features, and create `pull requests`_. +For any questions, comments, or improvement ideas, please use our `discussion forum`_. We welcome everyone to contribute via `issue reports`_, suggest new features, and create `pull requests`_. .. ############################################################################# From ed5bdd9eae08f2c739be33cc85edbb2f74b0c2e3 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 25 Jul 2025 16:58:24 +0200 Subject: [PATCH 32/64] Fixing 181 bug on IRIs --- .../core/next/impl/common/util/IRIUtils.java | 34 ++++++++++++------- .../core/next/impl/temp/CoreseIRITest.java | 15 ++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index bb1f83146..c530dc6ba 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -10,9 +10,10 @@ */ public class IRIUtils { - private static final Pattern IRI_PATTERN = Pattern.compile("^(([\\w\\-]+:\\/\\/([\\w\\-_:]+\\.)*[\\w\\-_:]*)(\\/([\\w\\-\\._\\:]+\\/)*))([\\w\\-\\._\\:]+)?(\\?[\\w\\-_\\:\\?\\=]+)?((\\#)?([\\w\\-_]+))?$"); + private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?([\\w\\-_]+))?)?$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); + /** * Prevent instantiation of the utility class. */ @@ -29,15 +30,24 @@ public static String guessNamespace(String iri) { Matcher matcher = IRI_PATTERN.matcher(iri); if(matcher.matches()) { - if((matcher.group(8) == null) || (matcher.group(6) == null && matcher.group(9) == null) ) { // If the IRI has no fragment or ends with a slash - - return matcher.group(1); - } else { - // 1: Domain and path ending with a slash, 6: final path element without slash, 9: final # if there is a fragment - return matcher.group(1) + matcher.group(6) + matcher.group(9); + if(matcher.group("protocol") != null && matcher.group("protocol").equals("_")) { + return ""; + } + StringBuilder namespace = new StringBuilder(); + namespace.append(matcher.group("protocol")).append(":"); + if(matcher.group("dblSlashes") != null) { + namespace.append(matcher.group("dblSlashes")); } + namespace.append(matcher.group("domain")); + if(matcher.group("path") != null) { + namespace.append(matcher.group("path")); + } + if(matcher.group("fragment") != null) { + namespace.append(matcher.group("finalPath")).append("#"); + } + return namespace.toString(); } else { - return ""; + throw new IllegalStateException("No namespace found for the given IRI: " + iri + "."); } } catch (IllegalStateException e) { return ""; @@ -54,10 +64,10 @@ public static String guessLocalName(String iri) { Matcher matcher = IRI_PATTERN.matcher(iri); if(matcher.matches()) { - if(matcher.group(10) != null){ // If the IRI has a fragment - return matcher.group(10); - } else if(matcher.group(6) != null ) { // If the IRI has no fragment but do not ends with a slash - return matcher.group(6); + if(matcher.group("fragment") != null){ // If the IRI has a fragment + return matcher.group("fragment"); + } else if(matcher.group("finalPath") != null ) { // If the IRI has no fragment but do not ends with a slash + return matcher.group("finalPath"); } else { // If the URI ends with a slash return ""; } diff --git a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java index 9519cd4f0..77bfc928c 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java @@ -30,6 +30,21 @@ public void constructorStringTest() { assertEquals("test", coreseIRI.getLocalName()); } + @Test + public void constructorStringTest_otherURIS() { + CoreseIRI coreseIRI_noSlash = new CoreseIRI("http://www.monicamurphy.org"); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.stringValue()); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.getCoreseNode().getLabel()); + assertEquals("http://www.monicamurphy.org", coreseIRI_noSlash.getNamespace()); + assertEquals("", coreseIRI_noSlash.getLocalName()); + + CoreseIRI coreseIRI_email = new CoreseIRI("mailto:monica@monicamurphy.org"); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.stringValue()); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.getCoreseNode().getLabel()); + assertEquals("mailto:monica@monicamurphy.org", coreseIRI_email.getNamespace()); + assertEquals("", coreseIRI_email.getLocalName()); + } + @Test public void constructorIriTest() { CoreseIRI coreseIRI = new CoreseIRI("http://example.org/test"); From 0d6c95b079c7297d1de087493b9725a3b7975aa4 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 25 Jul 2025 17:02:07 +0200 Subject: [PATCH 33/64] minor fix --- .../fr/inria/corese/core/next/impl/common/util/IRIUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index c530dc6ba..7c4a819b3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -42,7 +42,7 @@ public static String guessNamespace(String iri) { if(matcher.group("path") != null) { namespace.append(matcher.group("path")); } - if(matcher.group("fragment") != null) { + if(matcher.group("fragment") != null && matcher.group("finalPath") != null) { namespace.append(matcher.group("finalPath")).append("#"); } return namespace.toString(); From bddf0ee35d1c0178e7c421af8794fd64ec1d4672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 09:57:22 +0200 Subject: [PATCH 34/64] fix: Update Sonatype credentials in publishing configuration for improved security --- .github/workflows/release-maven-central.yml | 4 ++-- build.gradle.kts | 16 +++------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/.github/workflows/release-maven-central.yml b/.github/workflows/release-maven-central.yml index aad043336..62ad78ef4 100644 --- a/.github/workflows/release-maven-central.yml +++ b/.github/workflows/release-maven-central.yml @@ -31,8 +31,8 @@ jobs: - name: Build and publish to Sonatype (staging only) env: - OSSRH_USERNAME: ${{ secrets.OSSRH_USERNAME }} - OSSRH_PASSWORD: ${{ secrets.OSSRH_PASSWORD }} + ORG_GRADLE_PROJECT_sonatypeUsername: ${{ secrets.SONATYPE_USERNAME }} + ORG_GRADLE_PROJECT_sonatypePassword: ${{ secrets.SONATYPE_PASSWORD }} GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} GPG_SIGNING_PASSPHRASE: ${{ secrets.GPG_SIGNING_PASSPHRASE }} run: ./gradlew publishToSonatype diff --git a/build.gradle.kts b/build.gradle.kts index b8bfa7ca6..52479ce6d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -180,20 +180,10 @@ signing { // Configure Nexus publishing and credentials nexusPublishing { repositories { - // Configure Sonatype OSSRH repository for publishing. sonatype { - // Retrieve Sonatype OSSRH credentials from environment variables. - val ossrhUsername = providers.environmentVariable("OSSRH_USERNAME") - val ossrhPassword = providers.environmentVariable("OSSRH_PASSWORD") - - // Set the credentials for Sonatype OSSRH if they are available. - if (ossrhUsername.isPresent && ossrhPassword.isPresent) { - username.set(ossrhUsername.get()) - password.set(ossrhPassword.get()) - } - - // Define the package group for this publication, typically following the group ID. - packageGroup.set(Meta.groupId) + // Set the URLs for the Nexus repository and snapshot repository. + nexusUrl.set(uri("https://ossrh-staging-api.central.sonatype.com/service/local/")) + snapshotRepositoryUrl.set(uri("https://central.sonatype.com/repository/maven-snapshots/")) } } } From 8960366020d0433b34fe24e530f5d19db22f4e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 10:01:16 +0200 Subject: [PATCH 35/64] fix: Remove Sonatype OSSRH publishing settings from project metadata --- build.gradle.kts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 52479ce6d..eddfbb4c9 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -29,10 +29,6 @@ object Meta { // License information const val license = "CeCILL-C License" const val licenseUrl = "https://opensource.org/licenses/CeCILL-C" - - // Sonatype OSSRH publishing settings - const val release = "https://oss.sonatype.org/service/local/staging/deploy/maven2/" - const val snapshot = "https://oss.sonatype.org/content/repositories/snapshots/" } //////////////////////// From 0bb0e9d4ff5d8c59797b61db2df2e509bc64d678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 10:42:06 +0200 Subject: [PATCH 36/64] fix: Update version from 4.6.4-SNAPSHOT to 4.6.4 in changelog, README, installation guide, and CoreseInfo class --- CHANGELOG.md | 2 +- README.md | 6 +++--- build.gradle.kts | 2 +- docs/source/install.rst | 4 ++-- src/main/java/fr/inria/corese/core/util/CoreseInfo.java | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b033eff2..52c165495 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Corese Changelog -## Version 4.6.4-SNAPSHOT – +## Version 4.6.4 – ### Changed diff --git a/README.md b/README.md index d4993f6d0..9ad3bc1a3 100644 --- a/README.md +++ b/README.md @@ -31,14 +31,14 @@ Integrate Corese-Core into your Java project using your preferred build tool. fr.inria.corese corese-core - 4.6.4-SNAPSHOT + 4.6.4 ``` ### Gradle ```groovy -implementation 'fr.inria.corese:corese-core:4.6.4-SNAPSHOT' +implementation 'fr.inria.corese:corese-core:4.6.4' ``` ### Manual JAR @@ -54,7 +54,7 @@ Download the latest `.jar` file from: ## Documentation -- [Corese-Core api documentation](https://corese-stack.github.io/corese-core/v4.6.3/java_api/library_root.html) +- [Corese-Core api documentation](https://corese-stack.github.io/corese-core/v4.6.4/java_api/library_root.html) **W3C Standards:** diff --git a/build.gradle.kts b/build.gradle.kts index eddfbb4c9..3ece3884e 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -20,7 +20,7 @@ object Meta { // Project coordinates const val groupId = "fr.inria.corese" const val artifactId = "corese-core" - const val version = "4.6.4-SNAPSHOT" + const val version = "4.6.4" // Project description const val desc = "Corese is a Semantic Web Factory (triple store and SPARQL endpoint) implementing RDF, RDFS, SPARQL 1.1 Query and Update, Shacl. STTL. LDScript." diff --git a/docs/source/install.rst b/docs/source/install.rst index 3856d2644..66ebff30f 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -15,7 +15,7 @@ Add the following dependency to your ``pom.xml`` file: fr.inria.corese corese-core - 4.6.4-SNAPSHOT + 4.6.4 Gradle @@ -25,7 +25,7 @@ Add this line to the ``dependencies`` section of your ``build.gradle`` file: .. code-block:: groovy - implementation 'fr.inria.corese:corese-core:4.6.4-SNAPSHOT' + implementation 'fr.inria.corese:corese-core:4.6.4' Direct Download --------------- diff --git a/src/main/java/fr/inria/corese/core/util/CoreseInfo.java b/src/main/java/fr/inria/corese/core/util/CoreseInfo.java index 16fb462b7..c202ac361 100644 --- a/src/main/java/fr/inria/corese/core/util/CoreseInfo.java +++ b/src/main/java/fr/inria/corese/core/util/CoreseInfo.java @@ -6,7 +6,7 @@ */ public class CoreseInfo { - private static final String VERSION = "4.6.4-SNAPSHOT"; + private static final String VERSION = "4.6.4"; /** * Retrieves the current version of the Corese application. From 5c9812b6e856a56514c66b318432ef557598df7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 13:20:59 +0200 Subject: [PATCH 37/64] fix: Define package group for Nexus publication in build configuration --- build.gradle.kts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.gradle.kts b/build.gradle.kts index 3ece3884e..4974091ab 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -180,6 +180,9 @@ nexusPublishing { // Set the URLs for the Nexus repository and snapshot repository. nexusUrl.set(uri("https://ossrh-staging-api.central.sonatype.com/service/local/")) snapshotRepositoryUrl.set(uri("https://central.sonatype.com/repository/maven-snapshots/")) + + // Define the package group for this publication, typically following the group ID. + packageGroup.set(Meta.groupId) } } } From c8a5f1bfbaa27b6d0c3c4122b862732bab665d18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 15:23:21 +0200 Subject: [PATCH 38/64] feat: Enhance Maven Central release workflow with automatic promotion and tag input --- .github/workflows/release-maven-central.yml | 24 +++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-maven-central.yml b/.github/workflows/release-maven-central.yml index 62ad78ef4..cac882976 100644 --- a/.github/workflows/release-maven-central.yml +++ b/.github/workflows/release-maven-central.yml @@ -5,6 +5,12 @@ on: push: tags: - "v*.*.*" + workflow_dispatch: + inputs: + tag: + description: 'Tag to publish (e.g. v4.6.4)' + required: true + jobs: release-maven-central: @@ -13,6 +19,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.tag || github.ref }} - name: Set up JDK 11 uses: actions/setup-java@v4 @@ -37,5 +45,17 @@ jobs: GPG_SIGNING_PASSPHRASE: ${{ secrets.GPG_SIGNING_PASSPHRASE }} run: ./gradlew publishToSonatype - - name: Manual Release Reminder - run: echo "The artifacts have been staged on Sonatype. Please log in to Sonatype Nexus to close and release the staging repository manually." + - name: Promote staging to Central Portal (automatic close+release) + env: + SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} + SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} + run: | + echo "Promoting artifacts to Maven Central portal..." + + ENCODED_CREDS=$(echo -n "$SONATYPE_USERNAME:$SONATYPE_PASSWORD" | base64) + NAMESPACE="fr.inria.corese" + + curl -X POST \ + -H "Authorization: Bearer $ENCODED_CREDS" \ + "https://central.sonatype.com/service/local/manual/upload/defaultRepository/$NAMESPACE?publishing_type=automatic" \ + --fail --silent --show-error From 5aa98a1b899cf9c7948d00c76591dcda27287053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 16:39:36 +0200 Subject: [PATCH 39/64] refactor: Update Maven publishing configuration and streamline release workflow --- .github/workflows/release-maven-central.yml | 30 ++-- build.gradle.kts | 147 +++++++------------- 2 files changed, 57 insertions(+), 120 deletions(-) diff --git a/.github/workflows/release-maven-central.yml b/.github/workflows/release-maven-central.yml index cac882976..3ea594ea8 100644 --- a/.github/workflows/release-maven-central.yml +++ b/.github/workflows/release-maven-central.yml @@ -11,7 +11,6 @@ on: description: 'Tag to publish (e.g. v4.6.4)' required: true - jobs: release-maven-central: runs-on: [self-hosted, corese-stack-ubuntu-24] @@ -37,25 +36,14 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - - name: Build and publish to Sonatype (staging only) + - name: Publish to Maven Central via Vanniktech env: - ORG_GRADLE_PROJECT_sonatypeUsername: ${{ secrets.SONATYPE_USERNAME }} - ORG_GRADLE_PROJECT_sonatypePassword: ${{ secrets.SONATYPE_PASSWORD }} - GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} - GPG_SIGNING_PASSPHRASE: ${{ secrets.GPG_SIGNING_PASSPHRASE }} - run: ./gradlew publishToSonatype + ORG_GRADLE_PROJECT_mavenCentralUsername: ${{ secrets.SONATYPE_USERNAME }} + ORG_GRADLE_PROJECT_mavenCentralPassword: ${{ secrets.SONATYPE_PASSWORD }} + ORG_GRADLE_PROJECT_signingInMemoryKey: ${{ secrets.GPG_SIGNING_KEY }} + ORG_GRADLE_PROJECT_signingInMemoryKeyPassword: ${{ secrets.GPG_SIGNING_PASSPHRASE }} + run: ./gradlew publishToMavenCentral + + - name: Manual Release Reminder + run: echo "Upload complete. Please finalize the deployment at https://central.sonatype.com/publishing/deployments" - - name: Promote staging to Central Portal (automatic close+release) - env: - SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} - SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} - run: | - echo "Promoting artifacts to Maven Central portal..." - - ENCODED_CREDS=$(echo -n "$SONATYPE_USERNAME:$SONATYPE_PASSWORD" | base64) - NAMESPACE="fr.inria.corese" - - curl -X POST \ - -H "Authorization: Bearer $ENCODED_CREDS" \ - "https://central.sonatype.com/service/local/manual/upload/defaultRepository/$NAMESPACE?publishing_type=automatic" \ - --fail --silent --show-error diff --git a/build.gradle.kts b/build.gradle.kts index 4974091ab..15924b3f8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -5,7 +5,7 @@ plugins { // Publishing plugins signing // Signs artifacts for Maven Central `maven-publish` // Enables publishing to Maven repositories - id("io.github.gradle-nexus.publish-plugin") version "2.0.0" // Automates Nexus publishing + id("com.vanniktech.maven.publish") version "0.34.0" // Automates Maven publishing tasks // Tooling plugins `jacoco` // For code coverage reports @@ -82,109 +82,58 @@ dependencies { // Publishing settings // ///////////////////////// -// Publication configuration for Maven repositories -publishing { - publications { - create("mavenJava") { - - // Configure the publication to include JAR, sources, and Javadoc - from(components["java"]) - - // Configures version mapping to control how dependency versions are resolved - // for different usage contexts (API and runtime). - versionMapping { - // Defines version mapping for Java API usage. - // Sets the version to be resolved from the runtimeClasspath configuration. - usage("java-api") { - fromResolutionOf("runtimeClasspath") - } - - // Defines version mapping for Java runtime usage. - // Uses the result of dependency resolution to determine the version. - usage("java-runtime") { - fromResolutionResult() - } +mavenPublishing { + coordinates(Meta.groupId, Meta.artifactId, Meta.version) + + pom { + name.set(Meta.artifactId) + description.set(Meta.desc) + url.set("https://github.com/${Meta.githubRepo}") + licenses { + license { + name.set(Meta.license) + url.set(Meta.licenseUrl) + distribution.set("repo") } - - // Configure the publication metadata - groupId = Meta.groupId - artifactId = Meta.artifactId - version = Meta.version - - pom { - name.set(Meta.artifactId) - description.set(Meta.desc) - url.set("https://github.com/${Meta.githubRepo}") - licenses { - license { - name.set(Meta.license) - url.set(Meta.licenseUrl) - } - } - developers { - developer { - id.set("OlivierCorby") - name.set("Olivier Corby") - email.set("olivier.corby@inria.fr") - url.set("http://www-sop.inria.fr/members/Olivier.Corby") - organization.set("Inria") - organizationUrl.set("http://www.inria.fr/") - } - developer { - id.set("remiceres") - name.set("Rémi Cérès") - email.set("remi.ceres@inria.fr") - url.set("http://www-sop.inria.fr/members/Remi.Ceres") - organization.set("Inria") - organizationUrl.set("http://www.inria.fr/") - } - developer { - id.set("pierremaillot") - name.set("Pierre Maillot") - email.set("pierre.maillot@inria.fr") - url.set("https://maillpierre.github.io/personal-page/") - organization.set("Inria") - organizationUrl.set("http://www.inria.fr/") - } - } - scm { - url.set("https://github.com/${Meta.githubRepo}.git") - connection.set("scm:git:git://github.com/${Meta.githubRepo}.git") - developerConnection.set("scm:git:git://github.com/${Meta.githubRepo}.git") - } - issueManagement { - url.set("https://github.com/${Meta.githubRepo}/issues") - } + } + developers { + developer { + id.set("OlivierCorby") + name.set("Olivier Corby") + email.set("olivier.corby@inria.fr") + url.set("http://www-sop.inria.fr/members/Olivier.Corby") + organization.set("Inria") + organizationUrl.set("http://www.inria.fr/") + } + developer { + id.set("remiceres") + name.set("Rémi Cérès") + email.set("remi.ceres@inria.fr") + url.set("http://www-sop.inria.fr/members/Remi.Ceres") + organization.set("Inria") + organizationUrl.set("http://www.inria.fr/") + } + developer { + id.set("pierremaillot") + name.set("Pierre Maillot") + email.set("pierre.maillot@inria.fr") + url.set("https://maillpierre.github.io/personal-page/") + organization.set("Inria") + organizationUrl.set("http://www.inria.fr/") } } - } -} - -// Configure artifact signing -signing { - // Retrieve the GPG signing key and passphrase from environment variables for secure access. - val signingKey = providers.environmentVariable("GPG_SIGNING_KEY") - val signingPassphrase = providers.environmentVariable("GPG_SIGNING_PASSPHRASE") - - // Sign the publications if the GPG signing key and passphrase are available. - if (signingKey.isPresent && signingPassphrase.isPresent) { - useInMemoryPgpKeys(signingKey.get(), signingPassphrase.get()) - sign(publishing.publications) - } -} - -// Configure Nexus publishing and credentials -nexusPublishing { - repositories { - sonatype { - // Set the URLs for the Nexus repository and snapshot repository. - nexusUrl.set(uri("https://ossrh-staging-api.central.sonatype.com/service/local/")) - snapshotRepositoryUrl.set(uri("https://central.sonatype.com/repository/maven-snapshots/")) - - // Define the package group for this publication, typically following the group ID. - packageGroup.set(Meta.groupId) + scm { + url.set("https://github.com/${Meta.githubRepo}/") + connection.set("scm:git:git://github.com/${Meta.githubRepo}.git") + developerConnection.set("scm:git:ssh://git@github.com/${Meta.githubRepo}.git") + } + issueManagement { + url.set("https://github.com/${Meta.githubRepo}/issues") } } + + publishToMavenCentral() + signAllPublications() } ///////////////////////// From 90c6c6adb16251c8f7dfdc65aba3a7785462b33c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 16:59:01 +0200 Subject: [PATCH 40/64] refactor: Update workflow names and descriptions for clarity in Maven Central and Sphinx documentation processes --- .github/workflows/release-maven-central.yml | 9 ++++----- .github/workflows/sphinx-dev.yml | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/release-maven-central.yml b/.github/workflows/release-maven-central.yml index 3ea594ea8..6ae32e4e1 100644 --- a/.github/workflows/release-maven-central.yml +++ b/.github/workflows/release-maven-central.yml @@ -1,5 +1,5 @@ name: Publish Maven Central -run-name: ${{ github.actor }} triggered Maven Central release on ${{ github.ref_name }} +run-name: ${{ github.actor }} triggered Maven Central release on ${{ github.event.inputs.ref || github.ref_name }} on: push: @@ -7,8 +7,8 @@ on: - "v*.*.*" workflow_dispatch: inputs: - tag: - description: 'Tag to publish (e.g. v4.6.4)' + ref: + description: 'Branch or tag to publish (e.g. develop or v4.6.4)' required: true jobs: @@ -19,7 +19,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - ref: ${{ github.event.inputs.tag || github.ref }} + ref: ${{ github.event.inputs.ref || github.ref }} - name: Set up JDK 11 uses: actions/setup-java@v4 @@ -46,4 +46,3 @@ jobs: - name: Manual Release Reminder run: echo "Upload complete. Please finalize the deployment at https://central.sonatype.com/publishing/deployments" - diff --git a/.github/workflows/sphinx-dev.yml b/.github/workflows/sphinx-dev.yml index fd90e6fc7..09c066e96 100644 --- a/.github/workflows/sphinx-dev.yml +++ b/.github/workflows/sphinx-dev.yml @@ -1,5 +1,5 @@ -name: "Sphinx: Render development docs" -run-name: ${{ github.actor }} has launched CI process on ${{ github.ref_name }} +name: "Docs: Render and Deploy Development Documentation" +run-name: ${{ github.actor }} triggered a documentation build on branch ${{ github.ref_name }} on: push: From bf5632c815f00c55a6ed74c6ac8108ba28271173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 17:21:59 +0200 Subject: [PATCH 41/64] feat: Add publication task dependency for Gradle 8 compatibility --- build.gradle.kts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/build.gradle.kts b/build.gradle.kts index 15924b3f8..001ef1273 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -209,3 +209,10 @@ tasks.withType().configureEach { tasks.withType().configureEach { dependsOn(tasks.withType()) } + +// Configure publication task dependency for Gradle 8 compatibility +afterEvaluate { + tasks.named("generateMetadataFileForMavenPublication") { + dependsOn(tasks.named("plainJavadocJar")) + } +} From f67d5269f8d51732b6b9657222ecb15406e128e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Mon, 28 Jul 2025 17:52:12 +0200 Subject: [PATCH 42/64] refactor: Update Javadoc task configuration to disable doclint warnings and streamline setup --- build.gradle.kts | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 001ef1273..da5dca007 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -148,11 +148,14 @@ tasks.withType() { // Configure Javadoc tasks with UTF-8 encoding and disable failure on error. // This ensures that Javadoc generation won't fail due to minor issues. -tasks.withType() { +tasks.withType().configureEach { options.encoding = "UTF-8" isFailOnError = false + // Configure Javadoc tasks to disable doclint warnings. + (options as CoreJavadocOptions).addBooleanOption("Xdoclint:none", true) } + // Configure the shadow JAR task to include dependencies in the output JAR. // This creates a single JAR file with all dependencies bundled. // The JAR file is named with the classifier "jar-with-dependencies". @@ -162,15 +165,6 @@ tasks { } } -// Configure Javadoc tasks to disable doclint warnings. -tasks { - javadoc { - options { - (this as CoreJavadocOptions).addBooleanOption("Xdoclint:none", true) - } - } -} - // Configure the build task to depend on the shadow JAR task. // This ensures that the shadow JAR is built when the project is built. tasks.build { @@ -209,10 +203,3 @@ tasks.withType().configureEach { tasks.withType().configureEach { dependsOn(tasks.withType()) } - -// Configure publication task dependency for Gradle 8 compatibility -afterEvaluate { - tasks.named("generateMetadataFileForMavenPublication") { - dependsOn(tasks.named("plainJavadocJar")) - } -} From 29f7aed8890a4880928cba7df6319db7881af150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Tue, 29 Jul 2025 09:31:41 +0200 Subject: [PATCH 43/64] refactor: Update Java compilation settings and conditional signing for Maven publications --- build.gradle.kts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index da5dca007..c43685d0d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -37,8 +37,7 @@ object Meta { // Java compilation settings java { - withJavadocJar() // Include Javadoc JAR in publications - withSourcesJar() // Include sources JAR in publications + // Note: withJavadocJar() and withSourcesJar() are handled by com.vanniktech.maven.publish plugin sourceCompatibility = JavaVersion.VERSION_11 // Configure minimum Java version } @@ -133,7 +132,11 @@ mavenPublishing { } publishToMavenCentral() - signAllPublications() + + // Only sign publications when GPG keys are available (CI environment) + if (project.hasProperty("signingInMemoryKey") || project.hasProperty("signing.keyId")) { + signAllPublications() + } } ///////////////////////// From 34518890f75487678607e19be4cec5580366726f Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 29 Jul 2025 09:47:17 +0200 Subject: [PATCH 44/64] comments and code cleaning --- .../impl/io/parser/trig/TriGListerner.java | 13 +++- ...rserSpec.java => ANTLRTrigParserTest.java} | 76 +++++++++++++------ .../io/parser/trig/TriGListenerImplTest.java | 15 ++-- 3 files changed, 74 insertions(+), 30 deletions(-) rename src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/{ANTLRTrigParserSpec.java => ANTLRTrigParserTest.java} (68%) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java index 96193ed7c..a7f199392 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListerner.java @@ -12,6 +12,11 @@ import java.util.List; import java.util.Map; +/** + * Listener for the ANTLR4 generated parser for TriG. + * This listener traverses the parse tree and builds the RDF model, + * supporting named graphs. It includes unescaping logic for URIs and literals. + */ public class TriGListerner extends TriGBaseListener { private final Model model; private String baseURI; @@ -37,6 +42,7 @@ public TriGListerner(Model model, ValueFactory factory, IOOptions options) { this.factory = factory; } + @Override public void exitPrefixID(TriGParser.PrefixIDContext ctx) { String prefix = ctx.PNAME_NS().getText(); String iri = ctx.IRIREF().getText(); @@ -46,20 +52,24 @@ public void exitPrefixID(TriGParser.PrefixIDContext ctx) { model.setNamespace(prefix, iri); } + @Override public void exitSparqlBase(TriGParser.SparqlBaseContext ctx) { baseURI = ctx.IRIREF().getText().replaceAll("^<|>$", ""); } + @Override public void enterBlock(TriGParser.BlockContext ctx) { currentGraph = ctx.Graph_w() != null && ctx.labelOrSubject() != null ? extractLabelOrSubject(ctx.labelOrSubject()) : null; } + @Override public void exitBlock(TriGParser.BlockContext ctx) { currentGraph = null; } + @Override public void enterTriplesOrGraph(TriGParser.TriplesOrGraphContext ctx) { if (ctx.labelOrSubject() != null && ctx.predicateObjectList() != null) { currentSubject = extractLabelOrSubject(ctx.labelOrSubject()); @@ -67,6 +77,7 @@ public void enterTriplesOrGraph(TriGParser.TriplesOrGraphContext ctx) { } } + @Override public void enterTriples(TriGParser.TriplesContext ctx) { currentSubject = extractSubject(ctx.subject()); processPredicateObjectList(ctx.predicateObjectList()); @@ -239,4 +250,4 @@ private String stripQuotes(String text) { } return text; } -} +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java similarity index 68% rename from src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java rename to src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java index 970741aaf..8a9cc7bec 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserSpec.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/ANTLRTrigParserTest.java @@ -8,12 +8,31 @@ import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.StringReader; import static org.junit.jupiter.api.Assertions.assertEquals; -public class ANTLRTrigParserSpec { +/** + * Unit tests for the ANTLRTrigParser class. + * These tests verify the parser's ability to correctly parse Trig + * and interact with the Model and ValueFactory, including error handling + * and unescaping of IRIs and literals, and named graphs. + */ +class ANTLRTrigParserTest { + + private static final Logger logger = LoggerFactory.getLogger(ANTLRTrigParserTest.class); + + /** + * helper method to parse trig data into corese model + * + * @param trigData a string of rdf data in trig format + * @param baseURI the base uri + * @return Corese rdf model + * @throws Exception + */ private Model parseFromString(String trigData, String baseURI) throws Exception { Model model = new CoreseModel(); @@ -30,34 +49,41 @@ private Model parseFromString(String trigData, String baseURI) throws Exception private void printModel(Model model) { model.stream().forEach(stmt -> { Value obj = stmt.getObject(); + String subjectString = stmt.getSubject().stringValue(); + String predicateString = stmt.getPredicate().stringValue(); + if (obj instanceof Literal literal) { - if (literal.getLanguage().isPresent()) { - System.out.printf("(%s, %s, \"%s\"@%s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel(), - literal.getLanguage().get()); + String label = String.valueOf(literal.getLabel()); + String languageTag = literal.getLanguage().orElse(null); + + if (languageTag != null) { + logger.debug("({}, {}, \"{}\"@{})", + subjectString, + predicateString, + label, + languageTag); } else { - System.out.printf("(%s, %s, \"%s\")%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel()); + logger.debug("({}, {}, \"{}\")", + subjectString, + predicateString, + label); } } else { - System.out.printf("(%s, %s, %s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), + logger.debug("({}, {}, {})", + subjectString, + predicateString, obj.stringValue()); } }); } @Test - public void testNamedGraphParsing() throws Exception { - String trig = "@prefix ex: .\n" + - "ex:Graph1 {\n" + - " ex:Alice ex:knows ex:Bob .\n" + - "}"; + void testNamedGraphParsing() throws Exception { + String trig = """ + @prefix ex: + ex:Graph1 { + ex:Alice ex:knows ex:Bob . + }""".trim(); Model model = parseFromString(trig, null); printModel(model); @@ -69,7 +95,7 @@ public void testNamedGraphParsing() throws Exception { } @Test - public void testDocumentThatContainsOneGraphExample1() throws Exception { + void testDocumentThatContainsOneGraphExample1() throws Exception { String trig = """ # This document encodes one graph. @prefix ex: . @@ -94,7 +120,7 @@ public void testDocumentThatContainsOneGraphExample1() throws Exception { } @Test - public void testDocumentThatContainsTwoGraphExample() throws Exception { + void testDocumentThatContainsTwoGraphExample() throws Exception { String trig = """ # This document contains a same data as the # previous example. @@ -134,7 +160,7 @@ public void testDocumentThatContainsTwoGraphExample() throws Exception { } @Test - public void testNestedBlankNodesWithSharedIdentifiers() throws Exception { + void testNestedBlankNodesWithSharedIdentifiers() throws Exception { String trig = """ @prefix ex: . @@ -150,6 +176,8 @@ public void testNestedBlankNodesWithSharedIdentifiers() throws Exception { """.trim(); Model model = parseFromString(trig, null); printModel(model); - } -} + assertEquals(5, model.size()); + + } +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java index 88e01228f..869f5ff5f 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/trig/TriGListenerImplTest.java @@ -17,7 +17,12 @@ import static org.junit.jupiter.api.Assertions.*; -public class TriGListenerImplTest { +/** + * Unit tests for the TriGListenerImpl class. + * These tests verify that the listener correctly processes ANTLR parse tree contexts + * to extract and unescape RDF terms (IRIs, Blank Nodes, Literals) and add them to the model. + */ +class TriGListenerImplTest { private Model parseTrig(String trigData) throws Exception { ValueFactory factory = new CoreseAdaptedValueFactory(); @@ -35,7 +40,7 @@ private Model parseTrig(String trigData) throws Exception { } @Test - public void testSimpleNamedGraph() throws Exception { + void testSimpleNamedGraph() throws Exception { String trig = """ @prefix ex: . @@ -50,7 +55,7 @@ public void testSimpleNamedGraph() throws Exception { } @Test - public void testBlankNodeWithProperties() throws Exception { + void testBlankNodeWithProperties() throws Exception { String trig = """ @prefix ex: . GRAPH ex:graph { @@ -63,7 +68,7 @@ public void testBlankNodeWithProperties() throws Exception { } @Test - public void testMultipleGraphsAndBase() throws Exception { + void testMultipleGraphsAndBase() throws Exception { String trig = """ @base . @prefix dc: . @@ -82,7 +87,7 @@ public void testMultipleGraphsAndBase() throws Exception { } @Test - public void testTypedLiteralsAndLang() throws Exception { + void testTypedLiteralsAndLang() throws Exception { String trig = """ @prefix ex: . @prefix xsd: . From 68030d5d20646c53d19390e5b3345105702e4ab2 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 30 Jul 2025 14:44:07 +0200 Subject: [PATCH 45/64] fixing missing factory for trig parser --- .../inria/corese/core/next/impl/io/parser/ParserFactory.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 8b43d930a..2dcccb3a2 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.ANTLRNQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.ANTLRNTriplesParser; +import fr.inria.corese.core.next.impl.io.parser.trig.ANTLRTrigParser; import fr.inria.corese.core.next.impl.io.parser.turtle.ANTLRTurtleParser; /** @@ -44,6 +45,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNTriplesParser(model, factory, config); } else if (format == RDFFormat.NQUADS) { return new ANTLRNQuadsParser(model, factory, config); + } else if (format == RDFFormat.TRIG) { + return new ANTLRTrigParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -65,6 +68,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNTriplesParser(model, factory); } else if (format == RDFFormat.NQUADS) { return new ANTLRNQuadsParser(model, factory); + } else if (format == RDFFormat.TRIG) { + return new ANTLRTrigParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } From 8056835dbd887cab38825687a3d1ad70de54df3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 30 Jul 2025 17:28:16 +0200 Subject: [PATCH 46/64] update: change JDK version from 11 to 21 in GitHub workflows --- .github/workflows/publish-prerelease.yml | 4 ++-- .github/workflows/release-github.yml | 4 ++-- .github/workflows/release-maven-central.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish-prerelease.yml b/.github/workflows/publish-prerelease.yml index b23520a34..16861ada5 100644 --- a/.github/workflows/publish-prerelease.yml +++ b/.github/workflows/publish-prerelease.yml @@ -22,11 +22,11 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 21 uses: actions/setup-java@v4 with: distribution: 'temurin' - java-version: '11' + java-version: '21' cache: 'gradle' - name: Cache Gradle diff --git a/.github/workflows/release-github.yml b/.github/workflows/release-github.yml index 65b807c81..d5d6a5b14 100644 --- a/.github/workflows/release-github.yml +++ b/.github/workflows/release-github.yml @@ -23,11 +23,11 @@ jobs: with: ref: ${{ github.event.inputs.ref || github.ref_name }} - - name: Set up JDK 11 + - name: Set up JDK 21 uses: actions/setup-java@v4 with: distribution: 'temurin' - java-version: '11' + java-version: '21' cache: 'gradle' - name: Cache Gradle diff --git a/.github/workflows/release-maven-central.yml b/.github/workflows/release-maven-central.yml index 6ae32e4e1..849cc4f8a 100644 --- a/.github/workflows/release-maven-central.yml +++ b/.github/workflows/release-maven-central.yml @@ -21,11 +21,11 @@ jobs: with: ref: ${{ github.event.inputs.ref || github.ref }} - - name: Set up JDK 11 + - name: Set up JDK 21 uses: actions/setup-java@v4 with: distribution: 'temurin' - java-version: '11' + java-version: '21' cache: 'gradle' - name: Cache Gradle From f4ca82a004aee6f2bd30ebc47e2e4e7f79ae4d78 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 1 Jul 2025 15:47:13 +0200 Subject: [PATCH 47/64] factory --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 81 +++++++++++++++++++ .../io/parser/rdfxml/RdfXmlParserFactory.java | 23 ++++++ 2 files changed, 104 insertions(+) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java new file mode 100644 index 000000000..b923f125c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -0,0 +1,81 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.InputStream; +import java.io.Reader; + +public class RdfXmlParser extends DefaultHandler implements RDFParser { + + private final Model model; + private final RDFFormat format = RDFFormats.RDF_XML; + private final ValueFactory factory; + private String baseURI; + + public RdfXmlParser(Model model, ValueFactory factory) { + this.model = model; + this.factory = factory; + } + + @Override + public RDFFormat getRDFFormat() { + return format; + } + + @Override + public void parse(InputStream in) { + parse(in, null); + } + + @Override + public void parse(InputStream in, String baseURI) { + this.baseURI = baseURI; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + saxParser.parse(in, this); + } catch (Exception e) { + throw new RuntimeException("Failed to parse RDF/XML input stream", e); + } + } + + @Override + public void parse(Reader reader) { + parse(reader, null); + } + + @Override + public void parse(Reader reader, String baseURI) { + this.baseURI = baseURI; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + InputSource inputSource = new InputSource(reader); + saxParser.parse(inputSource, this); + } catch (Exception e) { + throw new RuntimeException("Failed to parse RDF/XML input stream", e); + } + } + + // SAX: element start + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) { + + } + + // SAX: element end + @Override + public void endElement(String uri, String localName, String qName) { + System.out.println("End: " + qName); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java new file mode 100644 index 000000000..5a3f59b46 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java @@ -0,0 +1,23 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.parser.RDFFormat; +import fr.inria.corese.core.next.api.base.parser.RDFFormats; +import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.api.base.parser.RDFParserFactory; + +public class RdfXmlParserFactory implements RDFParserFactory { + + public RdfXmlParserFactory() { + super(); + } + + @Override + public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) { + if (!format.equals(RDFFormats.RDF_XML)) { + throw new IllegalArgumentException("Unsupported format : " + format); + } + return new RdfXmlParser(model, factory); + } +} \ No newline at end of file From 40ca925bbbc17a891ea791bc22502e7f1f385675 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 8 Jul 2025 14:02:50 +0200 Subject: [PATCH 48/64] xml parser setup --- .../util/SerializationConstants.java | 99 +++++++++++++++++++ .../impl/io/parser/rdfxml/RdfXmlParser.java | 95 ++++++++++++++++-- .../io/parser/rdfxml/RdfxmlParserTest.java | 35 +++++++ 3 files changed, 222 insertions(+), 7 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java b/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java new file mode 100644 index 000000000..58c0090c0 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java @@ -0,0 +1,99 @@ +package fr.inria.corese.core.next.impl.common.serializer.util; + +/** + * Provides common constants used throughout the RDF serialization process. + * This includes URIs for common RDF, RDFS, XSD, and OWL vocabularies, + * as well as various special characters and strings used in serialization formats + * like Turtle, N-Triples, and N-Quads. + */ +public final class SerializationConstants { + + private SerializationConstants() { + // Private constructor to prevent instantiation + } + + // --- Standard RDF/RDFS/XSD/OWL URIs --- + public static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public static final String RDFS_NS = "http://www.w3.org/2000/01/rdf-schema#"; + public static final String XSD_NS = "http://www.w3.org/2001/XMLSchema#"; + public static final String OWL_NS = "http://www.w3.org/2002/07/owl#"; + + public static final String RDF_TYPE = RDF_NS + "type"; + public static final String RDF_FIRST = RDF_NS + "first"; + public static final String RDF_REST = RDF_NS + "rest"; + public static final String RDF_NIL = RDF_NS + "nil"; + + public static final String XSD_STRING = XSD_NS + "string"; + public static final String XSD_INTEGER = XSD_NS + "integer"; + public static final String XSD_DECIMAL = XSD_NS + "decimal"; + public static final String XSD_DOUBLE = XSD_NS + "double"; + public static final String XSD_BOOLEAN = XSD_NS + "boolean"; + public static final String XSD_DATETIME = XSD_NS + "dateTime"; + + // Nouveau namespace FOAF + public static final String FOAF_NS = "http://xmlns.com/foaf/0.1/"; + + + // --- Common Delimiters and Special Characters in Serialization --- + public static final String SPACE = " "; + public static final String TAB = "\t"; + public static final String LINE_FEED = "\n"; + public static final String CARRIAGE_RETURN = "\r"; + public static final String NEWLINE = LINE_FEED; + + public static final String POINT = "."; + public static final String SEMICOLON = ";"; + public static final String COMMA = ","; + public static final String AT_SIGN = "@"; + public static final String CARET = "^"; + public static final String LT = "<"; // Less than + public static final String GT = ">"; // Greater than + public static final String QUOTE = "\""; + public static final String COLON = ":"; + public static final String BACK_SLASH = "\\"; + + // Nouveaux délimiteurs + public static final String HASH = "#"; + public static final String SLASH = "/"; + + + // Turtle-specific + public static final String RDF_TYPE_SHORTCUT = "a"; + public static final String BNODE_PREFIX = "_:"; + public static final String DATATYPE_SEPARATOR = "^^"; + public static final String BLANK_NODE_START = "["; + public static final String BLANK_NODE_END = "]"; + + public static final String OPEN_PARENTHESIS = "("; + public static final String CLOSE_PARENTHESIS = ")"; + + // --- Default Values for Configuration --- + public static final String DEFAULT_INDENTATION = " "; // Two spaces + public static final String DEFAULT_LINE_ENDING = "\n"; // Unix-style + + public static final String EMPTY_STRING = ""; + + // TriG-specific + public static final String OPEN_BRACE = "{"; + public static final String CLOSE_BRACE = "}"; + + // XML-specific constants + public static final String XML_DECLARATION_START = ""; + public static final String RDF_ROOT_START = " statementStack = new ArrayDeque<>(); public RdfXmlParser(Model model, ValueFactory factory) { this.model = model; this.factory = factory; } + // used for my play test class + public RdfXmlParser() { + this(new CoreseModel(), new CoreseAdaptedValueFactory()); + } + @Override public RDFFormat getRDFFormat() { return format; @@ -67,15 +81,82 @@ public void parse(Reader reader, String baseURI) { } } - // SAX: element start @Override - public void startElement(String uri, String localName, String qName, Attributes attributes) { + public void startElement(String uri, String localName, String qName, Attributes attrs) { + characters.setLength(0); + + if (isRdfRDF(uri, localName)) { + return; // skip root element + } + + // nodeElement + if (currentSubject == null) { + currentSubject = extractSubject(attrs); + return; + } + + // propertyElement → create statement and push it + IRI predicate = factory.createIRI(qName); // TODO: resolve properly + var resourceAttr = attrs.getValue(RDF.type.getNamespace(), "resource"); + + if (resourceAttr != null) { + Value object = factory.createLiteral(resourceAttr); + Statement stmt = factory.createStatement(currentSubject, predicate, object); + model.add(stmt); + return; + } + // literal content will be handled in endElement + Statement stub = factory.createStatement(currentSubject, predicate, null); + statementStack.push(stub); } - // SAX: element end @Override public void endElement(String uri, String localName, String qName) { - System.out.println("End: " + qName); + if (!statementStack.isEmpty()) { + Statement stmt = statementStack.pop(); + String content = characters.toString().trim(); + if (!content.isEmpty()) { + Value literal = factory.createLiteral(content); + Statement complete = factory.createStatement( + stmt.getSubject(), stmt.getPredicate(), literal + ); + model.add(complete); + } + } else { + // end of nodeElement + currentSubject = null; + } + } + + @Override + public void characters(char[] ch, int start, int length) { + characters.append(ch, start, length); + } + + private Resource extractSubject(Attributes attrs) { + String about = attrs.getValue(RDF.type.getNamespace(), "about"); + if (about != null) return factory.createIRI(about); + + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + if (nodeID != null) return factory.createBNode("_:" + nodeID); + + String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + if (id != null) return factory.createIRI("#" + id); + + return factory.createBNode(); + } + + private boolean isRdfRDF(String uri, String localName) { + return RDF.type.getNamespace().equals(uri) && "RDF".equals(localName); + } + + private void emitTripleString(String subject, String predicate, String object) { + System.out.printf("Triple: <%s> <%s> %s%n", subject, predicate, object); + } + + + private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { + this.statement = factory.createStatement(subj, pred, obj, context); } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java new file mode 100644 index 000000000..825997f25 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -0,0 +1,35 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import org.junit.jupiter.api.Test; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; + +public class RdfxmlParserTest { + + @Test + public void testBasicRdfParsing() throws Exception { + String rdfXml = "" + + "" + + "" + + " " + + " John Smith" + + " 2025-07-07" + + " " + + ""; + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + RdfXmlParser handler = new RdfXmlParser(); + + saxParser.parse(inputStream, handler); + } + + +} From 479e7cdbd4574838c5b4398486c9877055eac7ea Mon Sep 17 00:00:00 2001 From: pierrerene Date: Thu, 10 Jul 2025 15:07:30 +0200 Subject: [PATCH 49/64] rdf xml parser --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 159 ++++++++++++++---- 1 file changed, 122 insertions(+), 37 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index 61c4d6065..d3f0718bd 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -14,7 +14,6 @@ import java.io.Reader; import java.util.ArrayDeque; import java.util.Deque; -import java.util.Stack; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; public class RdfXmlParser extends DefaultHandler implements RDFParser { @@ -22,19 +21,27 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { private final Model model; private final RDFFormat format = RDFFormats.RDF_XML; private final ValueFactory factory; + + private StringBuilder characters = new StringBuilder(); + private String baseURI; private Resource currentSubject; private Statement statement; - private StringBuilder characters = new StringBuilder(); private final Deque statementStack = new ArrayDeque<>(); + private final Deque subjectStack = new ArrayDeque<>(); + private final Deque predicateStack = new ArrayDeque<>(); + + + private boolean inContainer = false; + private int liIndex = 1; public RdfXmlParser(Model model, ValueFactory factory) { this.model = model; this.factory = factory; } - // used for my play test class + // used for test class and can be removed public RdfXmlParser() { this(new CoreseModel(), new CoreseAdaptedValueFactory()); } @@ -85,47 +92,103 @@ public void parse(Reader reader, String baseURI) { public void startElement(String uri, String localName, String qName, Attributes attrs) { characters.setLength(0); - if (isRdfRDF(uri, localName)) { - return; // skip root element - } + // Ignore rdf:RDF + if (isRdfRDF(uri, localName)) return; - // nodeElement - if (currentSubject == null) { - currentSubject = extractSubject(attrs); + // Handle container elements: rdf:Seq, rdf:Bag, rdf:Alt + if (isContainer(localName, uri)) { + Resource subject = extractSubject(attrs); + subjectStack.push(subject); + inContainer = true; + liIndex = 1; return; } - // propertyElement → create statement and push it - IRI predicate = factory.createIRI(qName); // TODO: resolve properly - var resourceAttr = attrs.getValue(RDF.type.getNamespace(), "resource"); + // Handle container children: rdf:li → rdf:_n + if (inContainer && RDF.type.getNamespace().equals(uri)) { + String pred = null; + if ("li".equals(localName)) { + pred = RDF.type.getNamespace() + "_" + liIndex++; + } else if (localName.matches("_\\d+")) { + pred = RDF.type.getNamespace() + localName; + } + + if (pred != null) { + IRI predicate = factory.createIRI(pred); + String resource = attrs.getValue("rdf:resource"); + if (resource != null) { + model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + } + return; + } + } + + // Handle + if (isDescription(localName, uri)) { + Resource subject = extractSubject(attrs); + + if (!predicateStack.isEmpty() && !subjectStack.isEmpty()) { + Resource parent = subjectStack.peek(); + IRI predicate = predicateStack.peek(); + model.add(factory.createStatement(parent, predicate, subject)); + } + + subjectStack.push(subject); + + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + String value = attrs.getValue(i); + + if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) { + continue; // skip core syntax attributes + } + + IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); + model.add(factory.createStatement(subject, pred, factory.createLiteral(value))); + } - if (resourceAttr != null) { - Value object = factory.createLiteral(resourceAttr); - Statement stmt = factory.createStatement(currentSubject, predicate, object); - model.add(stmt); return; } - // literal content will be handled in endElement - Statement stub = factory.createStatement(currentSubject, predicate, null); - statementStack.push(stub); + + // Handle regular property elements + IRI predicate = factory.createIRI(expandQName(uri, localName, qName)); + predicateStack.push(predicate); + + // Check for rdf:resource + String resource = attrs.getValue("rdf:resource"); + if (resource != null) { + model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + } } @Override public void endElement(String uri, String localName, String qName) { - if (!statementStack.isEmpty()) { - Statement stmt = statementStack.pop(); - String content = characters.toString().trim(); - if (!content.isEmpty()) { - Value literal = factory.createLiteral(content); - Statement complete = factory.createStatement( - stmt.getSubject(), stmt.getPredicate(), literal - ); - model.add(complete); - } - } else { - // end of nodeElement - currentSubject = null; + String text = characters.toString().trim(); + characters.setLength(0); + + if (isContainer(localName, uri)) { + subjectStack.pop(); + inContainer = false; + liIndex = 1; + return; + } + + if (isDescription(localName, uri)) { + subjectStack.pop(); + return; + } + + // Closing a property element with literal content + if (!predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = predicateStack.pop(); + Resource subject = subjectStack.peek(); + model.add(factory.createStatement(subject, predicate, factory.createLiteral(text))); + + } else if (!predicateStack.isEmpty()) { + predicateStack.pop(); // still clean up } } @@ -135,27 +198,49 @@ public void characters(char[] ch, int start, int length) { } private Resource extractSubject(Attributes attrs) { - String about = attrs.getValue(RDF.type.getNamespace(), "about"); + String about = attrs.getValue("rdf:about"); if (about != null) return factory.createIRI(about); - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + String nodeID = attrs.getValue("rdf:nodeID"); if (nodeID != null) return factory.createBNode("_:" + nodeID); - String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + String id = attrs.getValue("rdf:ID"); if (id != null) return factory.createIRI("#" + id); return factory.createBNode(); } private boolean isRdfRDF(String uri, String localName) { - return RDF.type.getNamespace().equals(uri) && "RDF".equals(localName); + return RDF.type.equals(uri) && "RDF".equals(localName); + } + + private boolean isDescription(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); + } + + private boolean isContainer(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && + ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); + } + + private String expandQName(String uri, String localName, String qName) { + return (uri != null && !uri.isEmpty()) ? uri + localName : qName; + } + + private boolean isSyntaxAttribute(String uri, String localName, String qName) { + if (uri != null && RDF.type.getNamespace().equals(uri)) { + return switch (localName) { + case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; + default -> false; + }; + } + return qName.startsWith("xml:"); } private void emitTripleString(String subject, String predicate, String object) { System.out.printf("Triple: <%s> <%s> %s%n", subject, predicate, object); } - private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { this.statement = factory.createStatement(subj, pred, obj, context); } From 1288b1b1ae79ed8f3b6b60070ef678d5cccec1fa Mon Sep 17 00:00:00 2001 From: pierrerene Date: Thu, 10 Jul 2025 16:50:49 +0200 Subject: [PATCH 50/64] rdf xml parser --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index d3f0718bd..f9c24658f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -31,7 +31,7 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { private final Deque statementStack = new ArrayDeque<>(); private final Deque subjectStack = new ArrayDeque<>(); private final Deque predicateStack = new ArrayDeque<>(); - + private final Deque langStack = new ArrayDeque<>(); private boolean inContainer = false; private int liIndex = 1; @@ -95,6 +95,16 @@ public void startElement(String uri, String localName, String qName, Attributes // Ignore rdf:RDF if (isRdfRDF(uri, localName)) return; + // Handle xml:lang + String xmlLang = attrs.getValue("xml:lang"); + if (xmlLang != null) { + // "" means no language + langStack.push(xmlLang.isEmpty() ? null : xmlLang); + } else { + // Inherit from parent + langStack.push(langStack.peek()); + } + // Handle container elements: rdf:Seq, rdf:Bag, rdf:Alt if (isContainer(localName, uri)) { Resource subject = extractSubject(attrs); @@ -169,29 +179,46 @@ public void endElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); + // Handle language cleanup + if (!langStack.isEmpty()) { + langStack.pop(); + } + + // End of a container (rdf:Seq, rdf:Bag, rdf:Alt) if (isContainer(localName, uri)) { - subjectStack.pop(); + if (!subjectStack.isEmpty()) { + subjectStack.pop(); + } inContainer = false; liIndex = 1; return; } + // End of rdf:Description if (isDescription(localName, uri)) { - subjectStack.pop(); + if (!subjectStack.isEmpty()) { + subjectStack.pop(); + } return; } - // Closing a property element with literal content - if (!predicateStack.isEmpty() && !text.isEmpty()) { + // Closing a property element with text content + if (!predicateStack.isEmpty()) { IRI predicate = predicateStack.pop(); Resource subject = subjectStack.peek(); - model.add(factory.createStatement(subject, predicate, factory.createLiteral(text))); - } else if (!predicateStack.isEmpty()) { - predicateStack.pop(); // still clean up + if (!text.isEmpty()) { + String lang = langStack.peek(); + Value literal = (lang != null) + ? factory.createLiteral(text, lang) + : factory.createLiteral(text); + + model.add(factory.createStatement(subject, predicate, literal)); + } } } + @Override public void characters(char[] ch, int start, int length) { characters.append(ch, start, length); From 87aabfd37372e5e05c275e2bd550a9253bb719ae Mon Sep 17 00:00:00 2001 From: pierrerene Date: Thu, 10 Jul 2025 17:47:00 +0200 Subject: [PATCH 51/64] rdf xml parser test --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 85 +++++++++++++------ .../io/parser/rdfxml/RdfxmlParserTest.java | 37 +++++--- 2 files changed, 83 insertions(+), 39 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index f9c24658f..b1e6d08bd 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -4,6 +4,7 @@ import fr.inria.corese.core.next.api.base.parser.RDFFormat; import fr.inria.corese.core.next.api.base.parser.RDFFormats; import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.xml.sax.*; @@ -14,6 +15,8 @@ import java.io.Reader; import java.util.ArrayDeque; import java.util.Deque; +import java.util.Optional; + import fr.inria.corese.core.next.impl.common.vocabulary.RDF; public class RdfXmlParser extends DefaultHandler implements RDFParser { @@ -25,13 +28,13 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { private StringBuilder characters = new StringBuilder(); private String baseURI; - private Resource currentSubject; + private Statement statement; - private final Deque statementStack = new ArrayDeque<>(); private final Deque subjectStack = new ArrayDeque<>(); private final Deque predicateStack = new ArrayDeque<>(); private final Deque langStack = new ArrayDeque<>(); + private final Deque datatypeStack = new ArrayDeque<>(); private boolean inContainer = false; private int liIndex = 1; @@ -92,6 +95,9 @@ public void parse(Reader reader, String baseURI) { public void startElement(String uri, String localName, String qName, Attributes attrs) { characters.setLength(0); + String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); + datatypeStack.push(datatype); + // Ignore rdf:RDF if (isRdfRDF(uri, localName)) return; @@ -179,43 +185,57 @@ public void endElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); - // Handle language cleanup - if (!langStack.isEmpty()) { - langStack.pop(); + if (!langStack.isEmpty()) langStack.pop(); + if (!datatypeStack.isEmpty()) { + String datatypeUri = datatypeStack.pop(); + + if (!predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = predicateStack.pop(); + Resource subject = subjectStack.peek(); + + Value literal; + + if (datatypeUri != null) { + Optional known = fromURI(datatypeUri); + + if (known.isPresent()) { + // normalized + IRI normalizedDatatype = known.get().getIRI(); + literal = factory.createLiteral(text, normalizedDatatype); + } else { + // unknown datatype – fallback or warning + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + IRI fallbackDatatype = factory.createIRI(datatypeUri); + literal = factory.createLiteral(text, fallbackDatatype); + } + } else { + // no datatype – use xml:lang if set + String lang = langStack.peek(); + literal = (lang != null) + ? factory.createLiteral(text, lang) + : factory.createLiteral(text); + } + + model.add(factory.createStatement(subject, predicate, literal)); + } else if (!predicateStack.isEmpty()) { + predicateStack.pop(); // cleanup + } + + return; } - // End of a container (rdf:Seq, rdf:Bag, rdf:Alt) + // other cases (containers, descriptions, etc.) if (isContainer(localName, uri)) { - if (!subjectStack.isEmpty()) { - subjectStack.pop(); - } + if (!subjectStack.isEmpty()) subjectStack.pop(); inContainer = false; liIndex = 1; return; } - // End of rdf:Description if (isDescription(localName, uri)) { - if (!subjectStack.isEmpty()) { - subjectStack.pop(); - } + if (!subjectStack.isEmpty()) subjectStack.pop(); return; } - - // Closing a property element with text content - if (!predicateStack.isEmpty()) { - IRI predicate = predicateStack.pop(); - Resource subject = subjectStack.peek(); - - if (!text.isEmpty()) { - String lang = langStack.peek(); - Value literal = (lang != null) - ? factory.createLiteral(text, lang) - : factory.createLiteral(text); - - model.add(factory.createStatement(subject, predicate, literal)); - } - } } @@ -271,4 +291,13 @@ private void emitTripleString(String subject, String predicate, String object) { private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { this.statement = factory.createStatement(subj, pred, obj, context); } + + public Optional fromURI(String uri) { + for (XSD xsd : XSD.values()) { + if (xsd.getIRI().stringValue().equals(uri)) { + return Optional.of(xsd); + } + } + return Optional.empty(); + } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index 825997f25..a78a94193 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,5 +1,6 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; +import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; import javax.xml.parsers.SAXParser; @@ -11,24 +12,38 @@ public class RdfxmlParserTest { @Test public void testBasicRdfParsing() throws Exception { - String rdfXml = "" + - "" + - "" + - " " + - " John Smith" + - " 2025-07-07" + - " " + - ""; + String rdfXml = """ + + + + John Smith + 2025-07-07 + + + """; + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + // Set up the parser SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - RdfXmlParser handler = new RdfXmlParser(); + // Provide an explicit model + CoreseModel model = new CoreseModel(); + RdfXmlParser handler = new RdfXmlParser(model); + // Parse the input saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected two RDF statements"); + + model.statements().forEach(stmt -> { + System.out.println(stmt); + }); } From f0e6483d2ae8702240e4290d17652c21bcf17030 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Wed, 16 Jul 2025 10:14:16 +0200 Subject: [PATCH 52/64] abstractstatement rdfxml updates --- .../api/base/model/AbstractStatement.java | 8 +- .../impl/io/parser/rdfxml/RdfXmlParser.java | 172 ++-- .../io/parser/rdfxml/RdfxmlParserTest.java | 784 +++++++++++++++++- 3 files changed, 891 insertions(+), 73 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java index 19707d6c7..3b5a5749a 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractStatement.java @@ -59,10 +59,10 @@ public int hashCode() { @Override public String toString() { return "(" - + getSubject() - + ", " + getPredicate() - + ", " + getObject() - + (getContext() == null ? "" : ", " + getContext()) + + getSubject().stringValue() + + ", " + getPredicate().stringValue() + + ", " + getObject().stringValue() + + (getContext() == null ? "" : ", " + getContext().stringValue()) + ")"; } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index b1e6d08bd..0ac1cfa29 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -93,25 +93,30 @@ public void parse(Reader reader, String baseURI) { @Override public void startElement(String uri, String localName, String qName, Attributes attrs) { + // Ignore rdf:RDF root element + if (isRdfRDF(uri, localName)) return; characters.setLength(0); + // Handle datatype String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); - datatypeStack.push(datatype); + if (datatype != null) { + datatypeStack.push(datatype); + } - // Ignore rdf:RDF - if (isRdfRDF(uri, localName)) return; // Handle xml:lang String xmlLang = attrs.getValue("xml:lang"); if (xmlLang != null) { - // "" means no language - langStack.push(xmlLang.isEmpty() ? null : xmlLang); - } else { - // Inherit from parent - langStack.push(langStack.peek()); + langStack.push(xmlLang); + } + + // Handle xml:base + String xmlBase = attrs.getValue("xml:base"); + if (xmlBase != null) { + baseURI = xmlBase; } - // Handle container elements: rdf:Seq, rdf:Bag, rdf:Alt + // Handle RDF containers if (isContainer(localName, uri)) { Resource subject = extractSubject(attrs); subjectStack.push(subject); @@ -120,7 +125,7 @@ public void startElement(String uri, String localName, String qName, Attributes return; } - // Handle container children: rdf:li → rdf:_n + // Handle container items: rdf:li → rdf:_n if (inContainer && RDF.type.getNamespace().equals(uri)) { String pred = null; if ("li".equals(localName)) { @@ -133,50 +138,66 @@ public void startElement(String uri, String localName, String qName, Attributes IRI predicate = factory.createIRI(pred); String resource = attrs.getValue("rdf:resource"); if (resource != null) { - model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + model.add(factory.createStatement( + subjectStack.peek(), + predicate, + factory.createIRI(resolveAgainstBase(resource)) + )); } return; } } - // Handle - if (isDescription(localName, uri)) { - Resource subject = extractSubject(attrs); - + // Handle (typed or untyped) + if (isDescription(localName, uri) || isNodeElement(attrs)) { + Resource newSubject = extractSubject(attrs); + // If this or typed element is the object of a property if (!predicateStack.isEmpty() && !subjectStack.isEmpty()) { Resource parent = subjectStack.peek(); - IRI predicate = predicateStack.peek(); - model.add(factory.createStatement(parent, predicate, subject)); + IRI predicate = predicateStack.pop(); // consume the predicate + model.add(factory.createStatement(parent, predicate, newSubject)); } - subjectStack.push(subject); + subjectStack.push(newSubject); + + // If it's a typed node (e.g., ), add rdf:type triple + if (!isDescription(localName, uri)) { + IRI typeIRI = factory.createIRI(expandQName(uri, localName, qName)); + model.add(factory.createStatement( + newSubject, + factory.createIRI(RDF.type.getIRI().stringValue()), + typeIRI + )); + } + // Handle property attributes for (int i = 0; i < attrs.getLength(); i++) { String attrURI = attrs.getURI(i); String attrLocal = attrs.getLocalName(i); String attrQName = attrs.getQName(i); String value = attrs.getValue(i); - if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) { - continue; // skip core syntax attributes - } + if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); - model.add(factory.createStatement(subject, pred, factory.createLiteral(value))); + model.add(factory.createStatement(newSubject, pred, factory.createLiteral(value))); } return; } - - // Handle regular property elements + // Handle regular property elements (e.g., ) IRI predicate = factory.createIRI(expandQName(uri, localName, qName)); predicateStack.push(predicate); - // Check for rdf:resource + // Handle rdf:resource object (IRI) String resource = attrs.getValue("rdf:resource"); if (resource != null) { - model.add(factory.createStatement(subjectStack.peek(), predicate, factory.createIRI(resource))); + model.add(factory.createStatement( + subjectStack.peek(), + predicate, + factory.createIRI(resolveAgainstBase(resource)) + )); } } @@ -185,46 +206,48 @@ public void endElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); + // Always pop lang/datatype if pushed if (!langStack.isEmpty()) langStack.pop(); - if (!datatypeStack.isEmpty()) { - String datatypeUri = datatypeStack.pop(); - - if (!predicateStack.isEmpty() && !text.isEmpty()) { - IRI predicate = predicateStack.pop(); - Resource subject = subjectStack.peek(); - - Value literal; - - if (datatypeUri != null) { - Optional known = fromURI(datatypeUri); - - if (known.isPresent()) { - // normalized - IRI normalizedDatatype = known.get().getIRI(); - literal = factory.createLiteral(text, normalizedDatatype); - } else { - // unknown datatype – fallback or warning - System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); - IRI fallbackDatatype = factory.createIRI(datatypeUri); - literal = factory.createLiteral(text, fallbackDatatype); - } + String datatypeUri = !datatypeStack.isEmpty() ? datatypeStack.pop() : null; + + // Property literal + if (!predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = predicateStack.pop(); + Resource subject = subjectStack.peek(); + + Value literal; + + if (datatypeUri != null && !datatypeUri.isBlank()) { + Optional known = fromURI(datatypeUri); + + if (known.isPresent()) { + // normalized datatype + IRI normalizedDatatype = known.get().getIRI(); + literal = factory.createLiteral(text, normalizedDatatype); } else { - // no datatype – use xml:lang if set - String lang = langStack.peek(); - literal = (lang != null) - ? factory.createLiteral(text, lang) - : factory.createLiteral(text); + // fallback datatype + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + IRI fallbackDatatype = factory.createIRI(datatypeUri); + literal = factory.createLiteral(text, fallbackDatatype); } - - model.add(factory.createStatement(subject, predicate, literal)); - } else if (!predicateStack.isEmpty()) { - predicateStack.pop(); // cleanup + } else { + // no datatype – use language tag if any + String lang = langStack.isEmpty() ? null : langStack.peek(); + literal = (lang != null && !lang.equals("__NO_LANG__")) + ? factory.createLiteral(text, lang) + : factory.createLiteral(text); } + model.add(factory.createStatement(subject, predicate, literal)); return; } - // other cases (containers, descriptions, etc.) + // Clean up stray predicates + if (!predicateStack.isEmpty()) { + predicateStack.pop(); + } + + // Handle containers if (isContainer(localName, uri)) { if (!subjectStack.isEmpty()) subjectStack.pop(); inContainer = false; @@ -232,9 +255,9 @@ public void endElement(String uri, String localName, String qName) { return; } + // Handle end of rdf:Description if (isDescription(localName, uri)) { if (!subjectStack.isEmpty()) subjectStack.pop(); - return; } } @@ -245,18 +268,33 @@ public void characters(char[] ch, int start, int length) { } private Resource extractSubject(Attributes attrs) { - String about = attrs.getValue("rdf:about"); - if (about != null) return factory.createIRI(about); + String about = attrs.getValue(RDF.type.getNamespace(), "about"); + if (about != null) return factory.createIRI(resolveAgainstBase(about)); - String nodeID = attrs.getValue("rdf:nodeID"); + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); if (nodeID != null) return factory.createBNode("_:" + nodeID); - String id = attrs.getValue("rdf:ID"); - if (id != null) return factory.createIRI("#" + id); + String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + if (id != null) return factory.createIRI(resolveAgainstBase("#" + id)); + // Default to blank node return factory.createBNode(); } + private String resolveAgainstBase(String iri) { + if (iri == null) return null; + if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { + // Absolute IRI or no base, return as-is + return iri; + } + + try { + return new java.net.URI(baseURI).resolve(iri).toString(); + } catch (Exception e) { + throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + } + } + private boolean isRdfRDF(String uri, String localName) { return RDF.type.equals(uri) && "RDF".equals(localName); } @@ -292,6 +330,12 @@ private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { this.statement = factory.createStatement(subj, pred, obj, context); } + private boolean isNodeElement(Attributes attrs) { + return attrs.getValue(RDF.type.getNamespace(), "about") != null || + attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || + attrs.getValue(RDF.type.getNamespace(), "ID") != null; + } + public Optional fromURI(String uri) { for (XSD xsd : XSD.values()) { if (xsd.getIRI().stringValue().equals(uri)) { diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index a78a94193..e0cd7e2dd 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,6 +1,10 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.apache.jena.base.Sys; import org.junit.jupiter.api.Test; import javax.xml.parsers.SAXParser; @@ -8,10 +12,58 @@ import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class RdfxmlParserTest { + @Test + public void testNodeElementsWithIRIs() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + + """; + + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected two RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + + } @Test public void testBasicRdfParsing() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ { - System.out.println(stmt); + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample3CompleteDescriptionOfAllGraphPaths() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + + + + Dave Beckett + + + + + + RDF 1.2 XML Syntax + + + """.trim(); + + String inTriG = """ + @prefix dcterms: . + + { + _:b15, _:b16 ; + dcterms:title "RDF 1.2 XML Syntax" . + + _:b15 . + + _:b16 "Dave Beckett" . + } + """; + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected five RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample4UsingMultiplePropertyElements() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + Dave Beckett + + + RDF 1.2 XML Syntax + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample5EmptyPropertyElements() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + Dave Beckett + + + RDF 1.2 XML Syntax + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample6ReplacingPropertyElementsWithStringLiteral() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample7CompleteRDFXML() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(5, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample8CompleteExampleXmlLang() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + RDF 1.2 XML Syntax + RDF 1.2 XML Syntax + RDF 1.2 XML Syntax + + + + Der Baum + Das Buch ist außergewöhnlich + The Tree + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(8, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample11CompleteExamplerdfDatatype() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + 123 + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); }); } + @Test + public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); -} + // Assert or inspect the result + assertEquals(6, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample13CompleteExampleUsingRdfparseTypeResource() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + Dave Beckett + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(4, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample14CompleteExampleOfPorpertyAttributesOnAnEmptyPropertyElement() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample15CompleteExampleWithRdfType() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + A marvelous thing + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample16CompleteExampleUsingATypedNodeElementToReplaceAnRdfType() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + A marvelous thing + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample17CompleteExampleUsingRdfIDAndXmlbase() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(2, model.size(), "Expected four RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample18ComplexExampleUsingRdfListProperties() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample19ComplexExampleUsingRdfliProperties() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample20CompleteExampleOfARdfCollectionOfNodes() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + + + + + + + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(4, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } + + @Test + public void testExample21CompleteExampleOfRdfID() throws Exception { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ + + + + blah + + + + """.trim(); + + // Prepare input stream + ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); + + // Set up the parser + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + + // Provide an explicit model + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + + // Parse the input + saxParser.parse(inputStream, handler); + + // Assert or inspect the result + assertEquals(3, model.size(), "Expected three RDF statements"); + + model.stream().forEach(stmt -> { + System.out.println(stmt.toString()); + }); + } +} \ No newline at end of file From 6e4dfa23cc4bf141ff048ce9f02a0ec4f9f210ee Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 18 Jul 2025 15:31:45 +0200 Subject: [PATCH 53/64] adding RDF-XML Context and Utils --- .../impl/io/parser/rdfxml/RdfXmlUtils.java | 117 ++++ .../parser/rdfxml/context/RdfXmlContext.java | 83 +++ .../io/parser/rdfxml/RdfxmlParserTest.java | 613 ++++++------------ 3 files changed, 386 insertions(+), 427 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java new file mode 100644 index 000000000..fc52f588a --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java @@ -0,0 +1,117 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import org.xml.sax.*; + +import java.util.List; +import java.util.Optional; + + +public class RdfXmlUtils { + private RdfXmlUtils() { + } + + public static String expandQName(String uri, String localName, String qName) { + return (uri != null && !uri.isEmpty()) ? uri + localName : qName; + } + + public static Optional resolveDatatype(String datatypeUri) { + for (XSD xsd : XSD.values()) { + if (xsd.getIRI().stringValue().equals(datatypeUri)) return Optional.of(xsd); + } + return Optional.empty(); + } + + public static Resource extractSubject(Attributes attrs, ValueFactory factory, String baseURI) { + String about = attrs.getValue(RDF.type.getNamespace(), "about"); + if (about != null) return factory.createIRI(resolveAgainstBase(about, baseURI)); + + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + if (nodeID != null) return factory.createBNode("_:" + nodeID); + + String id = attrs.getValue(RDF.type.getNamespace(), "ID"); + if (id != null) return factory.createIRI(resolveAgainstBase("#" + id, baseURI)); + + // Default to blank node + return factory.createBNode(); + } + + public static String resolveAgainstBase(String iri, String baseURI) { + if (iri == null) return null; + if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { + // Absolute IRI or no base, return as-is + return iri; + } + + try { + return new java.net.URI(baseURI).resolve(iri).toString(); + } catch (Exception e) { + throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + } + } + + public static boolean isDescription(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); + } + + public static boolean isNodeElement(Attributes attrs) { + return attrs.getValue(RDF.type.getNamespace(), "about") != null || + attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || + attrs.getValue(RDF.type.getNamespace(), "ID") != null; + } + + public static boolean isSyntaxAttribute(String uri, String localName, String qName) { + if (uri != null && RDF.type.getNamespace().equals(uri)) { + return switch (localName) { + case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; + default -> false; + }; + } + return qName.startsWith("xml:"); + } + + public static Optional fromURI(String uri) { + for (XSD xsd : XSD.values()) { + if (xsd.getIRI().stringValue().equals(uri)) { + return Optional.of(xsd); + } + } + return Optional.empty(); + } + + public static boolean isRdfRDF(String uri, String localName) { + return RDF.type.equals(uri) && "RDF".equals(localName); + } + + public static boolean isContainer(String localName, String uri) { + return RDF.type.getNamespace().equals(uri) && + ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); + } + + public static Resource createRdfCollection(List items, Model model, ValueFactory factory) { + Resource head = factory.createBNode(); + Resource current = head; + + for (int i = 0; i < items.size(); i++) { + Resource next = (i < items.size() - 1) + ? factory.createBNode() + : RDF.nil.getIRI(); // rdf:nil + + model.add(factory.createStatement(current, + RDF.first.getIRI(), + items.get(i))); + + model.add(factory.createStatement(current, + RDF.rest.getIRI(), + next)); + + current = next; + } + + return head; + } + + +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java new file mode 100644 index 000000000..c542ff4ea --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java @@ -0,0 +1,83 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml.context; + +import fr.inria.corese.core.next.api.*; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; + +/** + * Holds shared parsing state during RDF/XML parsing. + * + *

This class acts as a context holder for the SAX-based RDF/XML parser, + * allowing multiple elements and handlers to share and manipulate parsing state. + * It stores stacks for subjects, predicates, datatypes, and languages, + * as well as temporary collections used during the construction of RDF lists and containers.

+ * + *

This context is typically instantiated once per parsing session and passed + * throughout the parsing logic.

+ */ +public class RdfXmlContext { + + /** The RDF model to which parsed triples will be added. */ + public Model model; + + /** The factory used to create IRIs, literals, blank nodes, and statements. */ + public ValueFactory factory; + + /** The base URI against which relative IRIs are resolved. */ + public String baseURI; + + /** A single statement buffer (optional use). */ + public Statement statement; + + /** Builder list for rdf:parseType="Collection" elements. */ + public List collectionBuilder = new ArrayList<>(); + + /** The subject associated with the current RDF collection. */ + public Resource collectionSubject; + + /** The predicate that connects the collection subject to the list head. */ + public IRI collectionPredicate; + + /** Stack of subject resources to manage nesting of elements. */ + public final Deque subjectStack = new ArrayDeque<>(); + + /** Stack of predicates for tracking current RDF properties. */ + public final Deque predicateStack = new ArrayDeque<>(); + + /** Stack for xml:lang values scoped by element depth. */ + public final Deque langStack = new ArrayDeque<>(); + + /** Stack for rdf:datatype URIs associated with literals. */ + public final Deque datatypeStack = new ArrayDeque<>(); + + /** Temporary holder for RDF collection items (unused or optional). */ + public final Deque collectionItems = new ArrayDeque<>(); + + /** Whether the parser is currently inside an RDF container (rdf:Seq, rdf:Bag, rdf:Alt). */ + public boolean inContainer = false; + + /** Whether the parser is currently inside an RDF collection (rdf:parseType="Collection"). */ + public boolean inCollection = false; + + /** If true, skips pushing a subject onto the stack (used for collection items). */ + public boolean suppressSubject = false; + + /** Counter for rdf:li to rdf:_n expansion. */ + public int liIndex = 1; + + /** Optional parseType value for the current element. */ + public String parseType; + + /** + * Constructs a new context for RDF/XML parsing. + * + * @param model the RDF model to populate with triples + * @param factory the value factory used to create RDF terms + */ + public RdfXmlContext(Model model, ValueFactory factory) { + this.model = model; + this.factory = factory; + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index e0cd7e2dd..e1bfa306b 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,24 +1,79 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; +import fr.inria.corese.core.next.api.Literal; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Value; import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.apache.jena.base.Sys; import org.junit.jupiter.api.Test; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import static org.junit.jupiter.api.Assertions.assertEquals; public class RdfxmlParserTest { - @Test - public void testNodeElementsWithIRIs() throws Exception { + /** + * Helper method to parse the RDF/XML String + * @param rdfXml + * @return model + * @throws Exception + */ + private Model parseRdfXml(String rdfXml) throws Exception { Model model = new CoreseModel(); ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + + try (InputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8))) { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + RdfXmlParser handler = new RdfXmlParser(model, valueFactory); + saxParser.parse(inputStream, handler); + } + + return model; + } + + /** + * Helper method to print the model. + * @param model + */ + private void printModel(Model model) { + model.stream().forEach(stmt -> { + Value obj = stmt.getObject(); + if (obj instanceof Literal literal) { + if (literal.getLanguage().isPresent()) { + System.out.printf("(%s, %s, \"%s\"@%s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel(), + literal.getLanguage().get()); + } else { + System.out.printf("(%s, %s, \"%s\")%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel()); + } + } else { + System.out.printf("(%s, %s, %s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + obj.stringValue()); + } + }); + } + + + /** + * Test node elements with IRIs + * @throws Exception + */ + @Test + public void testNodeElementsWithIRIs() throws Exception { String rdfXml = """ - + @@ -36,34 +91,18 @@ public void testNodeElementsWithIRIs() throws Exception { """; - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(2, model.size(), "Expected two RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); - } + /** + * Test a basic RDF/XML file + * @throws Exception + */ @Test public void testBasicRdfParsing() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """; - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(2, model.size(), "Expected two RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a RDF/XML file with Complete description of all graph paths + * @throws Exception + */ @Test public void testExample3CompleteDescriptionOfAllGraphPaths() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - - String inTriG = """ - @prefix dcterms: . - - { - _:b15, _:b16 ; - dcterms:title "RDF 1.2 XML Syntax" . - - _:b15 . - - _:b16 "Dave Beckett" . - } - """; - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(5, model.size(), "Expected five RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test RDF/XML File Using multiple property elements on a node element + * @throws Exception + */ @Test public void testExample4UsingMultiplePropertyElements() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ + @@ -188,33 +183,19 @@ public void testExample4UsingMultiplePropertyElements() throws Exception { """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); } + /** + * Test RDF/XML with Empty property elements + * @throws Exception + */ @Test public void testExample5EmptyPropertyElements() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ + @@ -230,32 +211,19 @@ public void testExample5EmptyPropertyElements() throws Exception { """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a RDF/XML file with Replacing property elements with string literal content into property attributes + * @throws Exception + */ @Test public void testExample6ReplacingPropertyElementsWithStringLiteral() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a Complete RDF/XML + * @throws Exception + */ @Test public void testExample7CompleteRDFXML() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """
-
""".trim(); - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(5, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected four RDF statements"); } + /** + * Test a Complete example of xml:lang + * @throws Exception + */ @Test public void testExample8CompleteExampleXmlLang() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(8, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(6, model.size(), "Expected six RDF statements"); } @Test public void testExample11CompleteExamplerdfDatatype() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(1, model.size(), "Expected four RDF statements"); } + /** + * Test a Complete RDF/XML file with a description of graph using rdf:nodeID + * @throws Exception + */ @Test public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); + Model model = parseRdfXml(rdfXml); + printModel(model); // Assert or inspect the result - assertEquals(6, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + assertEquals(4, model.size(), "Expected five RDF statements"); } + /** + * Test a RDF/XML file with a Complete example using rdf:parseType=Resource + * @throws Exception + */ @Test public void testExample13CompleteExampleUsingRdfparseTypeResource() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result + Model model = parseRdfXml(rdfXml); + printModel(model); assertEquals(4, model.size(), "Expected four RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a RDF/XML file with a Complete example of property attributes on an empty property element + * @throws Exception + */ @Test public void testExample14CompleteExampleOfPorpertyAttributesOnAnEmptyPropertyElement() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(3, model.size(), "Expected three RDF statements"); } + /** + * Test a RDF/XML file with a Complete example with rdf:type + * @throws Exception + */ @Test public void testExample15CompleteExampleWithRdfType() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected four RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(2, model.size(), "Expected four RDF statements"); } + /** + * Test a RDF/XML file with a Complete example using a typed node element to replace an rdf:type + * @throws Exception + */ @Test public void testExample16CompleteExampleUsingATypedNodeElementToReplaceAnRdfType() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(2, model.size(), "Expected two RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } @Test + /** + * Test a XML/RDF File using rdf:ID and xml:base + */ public void testExample17CompleteExampleUsingRdfIDAndXmlbase() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(2, model.size(), "Expected four RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(1, model.size(), "Expected one RDF statement"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a Complex example using RDF list properties + * @throws Exception + */ @Test public void testExample18ComplexExampleUsingRdfListProperties() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + String rdfXml = """ @@ -681,32 +500,18 @@ public void testExample18ComplexExampleUsingRdfListProperties() throws Exception """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected three RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected three RDF statements"); } + /** + * Test a Complete example using rdf:li + * @throws Exception + */ @Test - public void testExample19ComplexExampleUsingRdfliProperties() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + public void testExample19CompleteExampleUsingRdfliProperties() throws Exception { + String rdfXml = """ @@ -720,32 +525,18 @@ public void testExample19ComplexExampleUsingRdfliProperties() throws Exception { """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected three RDF statements"); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(4, model.size(), "Expected three RDF statements"); - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } + /** + * Test a Complete example of a RDF collection + * @throws Exception + */ @Test public void testExample20CompleteExampleOfARdfCollectionOfNodes() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(4, model.size(), "Expected three RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(7, model.size(), "Expected three RDF statements"); } + /** + * Test a Complete example of rdf:ID reifying a property element + * @throws Exception + */ @Test public void testExample21CompleteExampleOfRdfID() throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); String rdfXml = """ """.trim(); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertEquals(1, model.size(), "Expected one RDF statement"); - // Prepare input stream - ByteArrayInputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8)); - - // Set up the parser - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - - // Provide an explicit model - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - - // Parse the input - saxParser.parse(inputStream, handler); - - // Assert or inspect the result - assertEquals(3, model.size(), "Expected three RDF statements"); - - model.stream().forEach(stmt -> { - System.out.println(stmt.toString()); - }); } } \ No newline at end of file From 203fb932dd7c048aa40a42dc0468e7a72ae24034 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 18 Jul 2025 15:32:03 +0200 Subject: [PATCH 54/64] refactor RdfXmlParser --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 434 +++++++++--------- 1 file changed, 224 insertions(+), 210 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index 0ac1cfa29..d2f9d65d1 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -5,48 +5,52 @@ import fr.inria.corese.core.next.api.base.parser.RDFFormats; import fr.inria.corese.core.next.api.base.parser.RDFParser; import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; -import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.xml.sax.*; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; + import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.InputStream; import java.io.Reader; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.ArrayList; import java.util.Optional; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; - -public class RdfXmlParser extends DefaultHandler implements RDFParser { - - private final Model model; +import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RdfXmlUtils.*; + +/** + * SAX-based RDF/XML parser using a shared parsing context ({@link RdfXmlContext}). + * + *

This parser processes RDF/XML documents using the SAX streaming API. + * It tracks RDF constructs (resources, properties, literals, containers, collections) + * using an explicit stack-based context, and supports features like xml:lang, + * rdf:datatype, rdf:parseType, and property attributes.

+ * + *

The parser adds RDF statements to the provided {@link Model} using + * the supplied {@link ValueFactory}. This parser supports nested nodes, + * blank nodes, typed nodes, and RDF collections.

+ */ +public class RdfXmlParser extends DefaultHandler implements RDFParser { + + /** RDF/XML format identifier for this parser. */ private final RDFFormat format = RDFFormats.RDF_XML; - private final ValueFactory factory; + /** Buffer for accumulating character data between start and end tags. */ private StringBuilder characters = new StringBuilder(); - private String baseURI; - - private Statement statement; - - private final Deque subjectStack = new ArrayDeque<>(); - private final Deque predicateStack = new ArrayDeque<>(); - private final Deque langStack = new ArrayDeque<>(); - private final Deque datatypeStack = new ArrayDeque<>(); - - private boolean inContainer = false; - private int liIndex = 1; + /** Shared state across SAX callbacks. */ + private RdfXmlContext ctx; + /** + * Creates a new parser with a target RDF model and factory. + * + * @param model the RDF model to populate + * @param factory the RDF value factory for term creation + */ public RdfXmlParser(Model model, ValueFactory factory) { - this.model = model; - this.factory = factory; - } - - // used for test class and can be removed - public RdfXmlParser() { - this(new CoreseModel(), new CoreseAdaptedValueFactory()); + this.ctx = new RdfXmlContext(model, factory); } @Override @@ -61,7 +65,7 @@ public void parse(InputStream in) { @Override public void parse(InputStream in, String baseURI) { - this.baseURI = baseURI; + ctx.baseURI = baseURI; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); @@ -79,7 +83,7 @@ public void parse(Reader reader) { @Override public void parse(Reader reader, String baseURI) { - this.baseURI = baseURI; + ctx.baseURI = baseURI; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); @@ -91,257 +95,267 @@ public void parse(Reader reader, String baseURI) { } } + @Override + public void characters(char[] ch, int start, int length) { + characters.append(ch, start, length); + } + + /** + * Handles opening of an XML element. + * Identifies node elements, container constructs, properties, + * and special parseType attributes, updating the parsing context accordingly. + */ @Override public void startElement(String uri, String localName, String qName, Attributes attrs) { - // Ignore rdf:RDF root element - if (isRdfRDF(uri, localName)) return; + // Skip the top-level rdf:RDF wrapper element + if (RdfXmlUtils.isRdfRDF(uri, localName)) return; + + // Reset character buffer characters.setLength(0); - // Handle datatype - String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); - if (datatype != null) { - datatypeStack.push(datatype); + // Handle xml:base (change base URI dynamically) + String xmlBase = attrs.getValue("xml:base"); + if (xmlBase != null) { + ctx.baseURI = xmlBase; } - // Handle xml:lang String xmlLang = attrs.getValue("xml:lang"); if (xmlLang != null) { - langStack.push(xmlLang); + ctx.langStack.push(xmlLang); } - // Handle xml:base - String xmlBase = attrs.getValue("xml:base"); - if (xmlBase != null) { - baseURI = xmlBase; + // Handle rdf:datatype (applies to property literal values) + String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); + if (datatype != null) { + ctx.datatypeStack.push(datatype); } - // Handle RDF containers - if (isContainer(localName, uri)) { - Resource subject = extractSubject(attrs); - subjectStack.push(subject); - inContainer = true; - liIndex = 1; + // --- RDF Container Element --- + if (RdfXmlUtils.isContainer(localName, uri)) { + Resource subject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + ctx.subjectStack.push(subject); + ctx.inContainer = true; + ctx.liIndex = 1; + + IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.model.add(ctx.factory.createStatement(subject, RDF.type.getIRI(), typeIRI)); return; } - // Handle container items: rdf:li → rdf:_n - if (inContainer && RDF.type.getNamespace().equals(uri)) { + // --- Container Items (rdf:li, rdf:_n) --- + if (ctx.inContainer && RDF.type.getNamespace().equals(uri)) { String pred = null; if ("li".equals(localName)) { - pred = RDF.type.getNamespace() + "_" + liIndex++; + pred = RDF.type.getNamespace() + "_" + ctx.liIndex++; } else if (localName.matches("_\\d+")) { pred = RDF.type.getNamespace() + localName; } if (pred != null) { - IRI predicate = factory.createIRI(pred); + IRI predicate = ctx.factory.createIRI(pred); String resource = attrs.getValue("rdf:resource"); if (resource != null) { - model.add(factory.createStatement( - subjectStack.peek(), + ctx.model.add(ctx.factory.createStatement( + ctx.subjectStack.peek(), predicate, - factory.createIRI(resolveAgainstBase(resource)) + ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) )); } return; } } - // Handle (typed or untyped) - if (isDescription(localName, uri) || isNodeElement(attrs)) { - Resource newSubject = extractSubject(attrs); - // If this or typed element is the object of a property - if (!predicateStack.isEmpty() && !subjectStack.isEmpty()) { - Resource parent = subjectStack.peek(); - IRI predicate = predicateStack.pop(); // consume the predicate - model.add(factory.createStatement(parent, predicate, newSubject)); + // --- parseType="Collection" --- + String parseType = attrs.getValue(RDF.type.getNamespace(), "parseType"); + if ("Collection".equals(parseType)) { + IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.predicateStack.push(predicate); + ctx.collectionSubject = ctx.subjectStack.peek(); + ctx.collectionPredicate = predicate; + ctx.collectionBuilder = new ArrayList<>(); + ctx.inCollection = true; + return; + } + + // --- Inside Collection: Collect rdf:Description Items --- + if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { + Resource item = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + ctx.collectionBuilder.add(item); + ctx.suppressSubject = true; + return; + } + + // --- Node Element: rdf:Description or typed node --- + boolean isNode = RdfXmlUtils.isDescription(localName, uri) + || (ctx.subjectStack.isEmpty() && RdfXmlUtils.isNodeElement(attrs)); + + if (isNode) { + Resource newSubject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + + // If current node is object of a property + if (!ctx.predicateStack.isEmpty() && !ctx.subjectStack.isEmpty()) { + Resource parent = ctx.subjectStack.peek(); + IRI predicate = ctx.predicateStack.pop(); + ctx.model.add(ctx.factory.createStatement(parent, predicate, newSubject)); } - subjectStack.push(newSubject); + ctx.subjectStack.push(newSubject); - // If it's a typed node (e.g., ), add rdf:type triple - if (!isDescription(localName, uri)) { - IRI typeIRI = factory.createIRI(expandQName(uri, localName, qName)); - model.add(factory.createStatement( - newSubject, - factory.createIRI(RDF.type.getIRI().stringValue()), - typeIRI - )); + // Emit rdf:type for typed node elements + if (!RdfXmlUtils.isDescription(localName, uri)) { + IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.model.add(ctx.factory.createStatement(newSubject, RDF.type.getIRI(), typeIRI)); } - // Handle property attributes + // Handle non-RDF attributes as property triples for (int i = 0; i < attrs.getLength(); i++) { String attrURI = attrs.getURI(i); String attrLocal = attrs.getLocalName(i); String attrQName = attrs.getQName(i); String value = attrs.getValue(i); - if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; + if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); - model.add(factory.createStatement(newSubject, pred, factory.createLiteral(value))); + IRI pred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); + ctx.model.add(ctx.factory.createStatement(newSubject, pred, ctx.factory.createLiteral(value))); } return; } - // Handle regular property elements (e.g., ) - IRI predicate = factory.createIRI(expandQName(uri, localName, qName)); - predicateStack.push(predicate); + // --- Property Element (e.g., ) --- + IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); + ctx.predicateStack.push(predicate); - // Handle rdf:resource object (IRI) - String resource = attrs.getValue("rdf:resource"); - if (resource != null) { - model.add(factory.createStatement( - subjectStack.peek(), - predicate, - factory.createIRI(resolveAgainstBase(resource)) - )); - } - } + // --- Property Resource/Object reference --- + String resource = attrs.getValue(RDF.type.getNamespace(), "resource"); + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - @Override - public void endElement(String uri, String localName, String qName) { - String text = characters.toString().trim(); - characters.setLength(0); + if (resource != null || nodeID != null) { + Resource object = resource != null + ? ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) + : ctx.factory.createBNode("_:" + nodeID); - // Always pop lang/datatype if pushed - if (!langStack.isEmpty()) langStack.pop(); - String datatypeUri = !datatypeStack.isEmpty() ? datatypeStack.pop() : null; - - // Property literal - if (!predicateStack.isEmpty() && !text.isEmpty()) { - IRI predicate = predicateStack.pop(); - Resource subject = subjectStack.peek(); - - Value literal; - - if (datatypeUri != null && !datatypeUri.isBlank()) { - Optional known = fromURI(datatypeUri); - - if (known.isPresent()) { - // normalized datatype - IRI normalizedDatatype = known.get().getIRI(); - literal = factory.createLiteral(text, normalizedDatatype); - } else { - // fallback datatype - System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); - IRI fallbackDatatype = factory.createIRI(datatypeUri); - literal = factory.createLiteral(text, fallbackDatatype); - } - } else { - // no datatype – use language tag if any - String lang = langStack.isEmpty() ? null : langStack.peek(); - literal = (lang != null && !lang.equals("__NO_LANG__")) - ? factory.createLiteral(text, lang) - : factory.createLiteral(text); - } + ctx.model.add(ctx.factory.createStatement( + ctx.subjectStack.peek(), + predicate, + object + )); - model.add(factory.createStatement(subject, predicate, literal)); + ctx.predicateStack.pop(); // already used return; } - // Clean up stray predicates - if (!predicateStack.isEmpty()) { - predicateStack.pop(); - } - - // Handle containers - if (isContainer(localName, uri)) { - if (!subjectStack.isEmpty()) subjectStack.pop(); - inContainer = false; - liIndex = 1; + // --- parseType="Resource": create blank node --- + if ("Resource".equals(parseType)) { + Resource parent = ctx.subjectStack.peek(); + Resource bnode = ctx.factory.createBNode(); + ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); + ctx.subjectStack.push(bnode); return; } - // Handle end of rdf:Description - if (isDescription(localName, uri)) { - if (!subjectStack.isEmpty()) subjectStack.pop(); + // --- Inline property attributes: Create blank node with attributes --- + boolean hasNonSyntaxAttributes = false; + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + if (!RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) { + hasNonSyntaxAttributes = true; + break; + } } - } - - - @Override - public void characters(char[] ch, int start, int length) { - characters.append(ch, start, length); - } - - private Resource extractSubject(Attributes attrs) { - String about = attrs.getValue(RDF.type.getNamespace(), "about"); - if (about != null) return factory.createIRI(resolveAgainstBase(about)); - - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - if (nodeID != null) return factory.createBNode("_:" + nodeID); - String id = attrs.getValue(RDF.type.getNamespace(), "ID"); - if (id != null) return factory.createIRI(resolveAgainstBase("#" + id)); + if (hasNonSyntaxAttributes) { + Resource parent = ctx.subjectStack.peek(); + Resource bnode = ctx.factory.createBNode(); + ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); - // Default to blank node - return factory.createBNode(); - } + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + String value = attrs.getValue(i); - private String resolveAgainstBase(String iri) { - if (iri == null) return null; - if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { - // Absolute IRI or no base, return as-is - return iri; - } + if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - try { - return new java.net.URI(baseURI).resolve(iri).toString(); - } catch (Exception e) { - throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + IRI attrPred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); + ctx.model.add(ctx.factory.createStatement(bnode, attrPred, ctx.factory.createLiteral(value))); + } + ctx.predicateStack.pop(); // already emitted } } - private boolean isRdfRDF(String uri, String localName) { - return RDF.type.equals(uri) && "RDF".equals(localName); - } - - private boolean isDescription(String localName, String uri) { - return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); - } - - private boolean isContainer(String localName, String uri) { - return RDF.type.getNamespace().equals(uri) && - ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); - } - - private String expandQName(String uri, String localName, String qName) { - return (uri != null && !uri.isEmpty()) ? uri + localName : qName; - } + /** + * Handles the end of an XML element, emitting a literal or cleaning up context stacks. + */ + @Override + public void endElement(String uri, String localName, String qName) { + String text = characters.toString().trim(); + characters.setLength(0); - private boolean isSyntaxAttribute(String uri, String localName, String qName) { - if (uri != null && RDF.type.getNamespace().equals(uri)) { - return switch (localName) { - case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; - default -> false; - }; + if (!ctx.predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = ctx.predicateStack.pop(); + Resource subject = ctx.subjectStack.peek(); + String datatypeUri = ctx.datatypeStack.isEmpty() ? null : ctx.datatypeStack.pop(); + emitLiteral(subject, predicate, text, datatypeUri); + return; } - return qName.startsWith("xml:"); - } - - private void emitTripleString(String subject, String predicate, String object) { - System.out.printf("Triple: <%s> <%s> %s%n", subject, predicate, object); + cleanEndElement(uri, localName); } - private void emitTriple(Resource subj, IRI pred, Value obj, Resource context) { - this.statement = factory.createStatement(subj, pred, obj, context); - } - - private boolean isNodeElement(Attributes attrs) { - return attrs.getValue(RDF.type.getNamespace(), "about") != null || - attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || - attrs.getValue(RDF.type.getNamespace(), "ID") != null; + /** + * Emits a literal statement (optionally typed or language-tagged). + */ + private void emitLiteral(Resource subject, IRI predicate, String text, String datatypeUri) { + Value literal; + if (datatypeUri != null && !datatypeUri.isEmpty()) { + Optional known = RdfXmlUtils.resolveDatatype(datatypeUri); + IRI dtype = known.map(XSD::getIRI).orElseGet(() -> { + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + return ctx.factory.createIRI(datatypeUri); + }); + literal = ctx.factory.createLiteral(text, dtype); + } else { + String lang = ctx.langStack.isEmpty() ? null : ctx.langStack.peek(); + literal = (lang != null && !lang.equals("__NO_LANG__")) + ? ctx.factory.createLiteral(text, lang) + : ctx.factory.createLiteral(text); + } + ctx.model.add(ctx.factory.createStatement(subject, predicate, literal)); } - public Optional fromURI(String uri) { - for (XSD xsd : XSD.values()) { - if (xsd.getIRI().stringValue().equals(uri)) { - return Optional.of(xsd); - } + /** + * Cleans up stacks and handles closing of collections, containers, and resource blocks. + */ + private void cleanEndElement(String uri, String localName) { + if (!ctx.langStack.isEmpty()) ctx.langStack.pop(); + if (!ctx.predicateStack.isEmpty()) ctx.predicateStack.pop(); + if (RdfXmlUtils.isContainer(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + ctx.inContainer = false; + ctx.liIndex = 1; + return; + } + if (ctx.inCollection && localName.equals(ctx.collectionPredicate.getLocalName())) { + Resource listHead = createRdfCollection(ctx.collectionBuilder, ctx.model, ctx.factory); + ctx.model.add(ctx.factory.createStatement(ctx.collectionSubject, ctx.collectionPredicate, listHead)); + ctx.inCollection = false; + ctx.collectionBuilder.clear(); + return; + } + if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + return; + } + if (RdfXmlUtils.isDescription(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + } + if (!ctx.subjectStack.isEmpty() && !ctx.predicateStack.isEmpty()) { + ctx.predicateStack.pop(); + ctx.subjectStack.pop(); } - return Optional.empty(); } -} +} \ No newline at end of file From 12ca855bab3bc29097d46a6e2cfe8f29c7db8fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Tue, 22 Jul 2025 14:12:27 +0200 Subject: [PATCH 55/64] Add RDFXML support to ParserFactory and remove RdfXmlParserFactory --- .../io/parser/rdfxml/RdfXmlParserFactory.java | 23 ------------------- 1 file changed, 23 deletions(-) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java deleted file mode 100644 index 5a3f59b46..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParserFactory.java +++ /dev/null @@ -1,23 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfxml; - -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.api.base.parser.RDFFormat; -import fr.inria.corese.core.next.api.base.parser.RDFFormats; -import fr.inria.corese.core.next.api.base.parser.RDFParser; -import fr.inria.corese.core.next.api.base.parser.RDFParserFactory; - -public class RdfXmlParserFactory implements RDFParserFactory { - - public RdfXmlParserFactory() { - super(); - } - - @Override - public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) { - if (!format.equals(RDFFormats.RDF_XML)) { - throw new IllegalArgumentException("Unsupported format : " + format); - } - return new RdfXmlParser(model, factory); - } -} \ No newline at end of file From f14740a1475a4ca68643d57081f89fe629b41016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Tue, 22 Jul 2025 14:13:24 +0200 Subject: [PATCH 56/64] Refactor RdfXmlParser to extend AbstractRDFParser and update test cases --- .../impl/io/parser/rdfxml/RdfXmlParser.java | 71 ++++++++++++------- .../io/parser/rdfxml/RdfxmlParserTest.java | 11 +-- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java index d2f9d65d1..ce3a6b523 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java @@ -1,9 +1,9 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.base.parser.RDFFormat; -import fr.inria.corese.core.next.api.base.parser.RDFFormats; -import fr.inria.corese.core.next.api.base.parser.RDFParser; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; @@ -32,10 +32,10 @@ * the supplied {@link ValueFactory}. This parser supports nested nodes, * blank nodes, typed nodes, and RDF collections.

*/ -public class RdfXmlParser extends DefaultHandler implements RDFParser { +public class RdfXmlParser extends AbstractRDFParser { /** RDF/XML format identifier for this parser. */ - private final RDFFormat format = RDFFormats.RDF_XML; + private final RDFFormat format = RDFFormat.RDFXML; /** Buffer for accumulating character data between start and end tags. */ private StringBuilder characters = new StringBuilder(); @@ -50,17 +50,24 @@ public class RdfXmlParser extends DefaultHandler implements RDFParser { * @param factory the RDF value factory for term creation */ public RdfXmlParser(Model model, ValueFactory factory) { - this.ctx = new RdfXmlContext(model, factory); + this(model, factory, null); } - @Override - public RDFFormat getRDFFormat() { - return format; + /** + * Creates a new parser with a target RDF model, factory, and configuration options. + * + * @param model the RDF model to populate + * @param factory the RDF value factory for term creation + * @param config optional configuration options for the parser + */ + public RdfXmlParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + this.ctx = new RdfXmlContext(getModel(), getValueFactory()); } @Override - public void parse(InputStream in) { - parse(in, null); + public RDFFormat getRDFFormat() { + return format; } @Override @@ -70,17 +77,12 @@ public void parse(InputStream in, String baseURI) { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); - saxParser.parse(in, this); + saxParser.parse(in, new RdfXmlSaxHandler()); } catch (Exception e) { throw new RuntimeException("Failed to parse RDF/XML input stream", e); } } - @Override - public void parse(Reader reader) { - parse(reader, null); - } - @Override public void parse(Reader reader, String baseURI) { ctx.baseURI = baseURI; @@ -89,14 +91,37 @@ public void parse(Reader reader, String baseURI) { factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); InputSource inputSource = new InputSource(reader); - saxParser.parse(inputSource, this); + saxParser.parse(inputSource, new RdfXmlSaxHandler()); } catch (Exception e) { throw new RuntimeException("Failed to parse RDF/XML input stream", e); } } - @Override - public void characters(char[] ch, int start, int length) { + /** + * Internal SAX handler that delegates to the parser's methods + */ + private class RdfXmlSaxHandler extends DefaultHandler { + + @Override + public void characters(char[] ch, int start, int length) { + RdfXmlParser.this.handleCharacters(ch, start, length); + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attrs) { + RdfXmlParser.this.handleStartElement(uri, localName, qName, attrs); + } + + @Override + public void endElement(String uri, String localName, String qName) { + RdfXmlParser.this.handleEndElement(uri, localName, qName); + } + } + + /** + * Handles character data between XML elements + */ + private void handleCharacters(char[] ch, int start, int length) { characters.append(ch, start, length); } @@ -105,8 +130,7 @@ public void characters(char[] ch, int start, int length) { * Identifies node elements, container constructs, properties, * and special parseType attributes, updating the parsing context accordingly. */ - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) { + private void handleStartElement(String uri, String localName, String qName, Attributes attrs) { // Skip the top-level rdf:RDF wrapper element if (RdfXmlUtils.isRdfRDF(uri, localName)) return; @@ -291,8 +315,7 @@ public void startElement(String uri, String localName, String qName, Attributes /** * Handles the end of an XML element, emitting a literal or cleaning up context stacks. */ - @Override - public void endElement(String uri, String localName, String qName) { + private void handleEndElement(String uri, String localName, String qName) { String text = characters.toString().trim(); characters.setLength(0); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index e1bfa306b..37aeee0a1 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -8,8 +8,6 @@ import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; @@ -26,15 +24,10 @@ public class RdfxmlParserTest { private Model parseRdfXml(String rdfXml) throws Exception { Model model = new CoreseModel(); ValueFactory valueFactory = new CoreseAdaptedValueFactory(); - try (InputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8))) { - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - RdfXmlParser handler = new RdfXmlParser(model, valueFactory); - saxParser.parse(inputStream, handler); + RdfXmlParser parser = new RdfXmlParser(model, valueFactory); + parser.parse(inputStream); } - return model; } From 6099cb27938b42e334710a845e8c0622457d295a Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 25 Jul 2025 10:48:07 +0200 Subject: [PATCH 57/64] Parser RDFXML refactoring --- .../next/impl/io/parser/ParserFactory.java | 10 +- .../impl/io/parser/rdfxml/RDFXMLParser.java | 370 +++++++++++++++++ .../parser/rdfxml/RDFXMLStatementEmitter.java | 76 ++++ .../{RdfXmlUtils.java => RDFXMLUtils.java} | 13 +- .../impl/io/parser/rdfxml/RdfXmlParser.java | 384 ------------------ .../io/parser/rdfxml/RdfxmlParserTest.java | 8 +- 6 files changed, 462 insertions(+), 399 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java rename src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/{RdfXmlUtils.java => RDFXMLUtils.java} (95%) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 2dcccb3a2..f49af0569 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,7 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.ANTLRNQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.ANTLRNTriplesParser; -import fr.inria.corese.core.next.impl.io.parser.trig.ANTLRTrigParser; +import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser; import fr.inria.corese.core.next.impl.io.parser.turtle.ANTLRTurtleParser; /** @@ -45,8 +45,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNTriplesParser(model, factory, config); } else if (format == RDFFormat.NQUADS) { return new ANTLRNQuadsParser(model, factory, config); - } else if (format == RDFFormat.TRIG) { - return new ANTLRTrigParser(model, factory, config); + } else if (format == RDFFormat.RDFXML) { + return new RDFXMLParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -68,8 +68,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new ANTLRNTriplesParser(model, factory); } else if (format == RDFFormat.NQUADS) { return new ANTLRNQuadsParser(model, factory); - } else if (format == RDFFormat.TRIG) { - return new ANTLRTrigParser(model, factory); + } else if (format == RDFFormat.RDFXML) { + return new RDFXMLParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java new file mode 100644 index 000000000..657bcf0b4 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -0,0 +1,370 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; + +import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLUtils.*; + +/** + * SAX-based RDF/XML parser using a shared parsing context ({@link RdfXmlContext}). + * + *

This parser processes RDF/XML documents using the SAX streaming API. + * It tracks RDF constructs (resources, properties, literals, containers, collections) + * using an explicit stack-based context, and supports features like xml:lang, + * rdf:datatype, rdf:parseType, and property attributes.

+ * + *

The parser adds RDF statements to the provided {@link Model} using + * the supplied {@link ValueFactory}. This parser supports nested nodes, + * blank nodes, typed nodes, and RDF collections.

+ */ +public class RDFXMLParser extends AbstractRDFParser { + + /** RDF/XML format identifier for this parser. */ + private final RDFFormat format = RDFFormat.RDFXML; + + /** Buffer for accumulating character data between start and end tags. */ + private StringBuilder characters = new StringBuilder(); + + /** Shared state across SAX callbacks. */ + private RdfXmlContext ctx; + + private final RDFXMLStatementEmitter emitter; + + /** + * Creates a new parser with a target RDF model and factory. + * + * @param model the RDF model to populate + * @param factory the RDF value factory for term creation + */ + public RDFXMLParser(Model model, ValueFactory factory) { + this(model, factory, null); + } + + /** + * Creates a new parser with a target RDF model, factory, and configuration options. + * + * @param model the RDF model to populate + * @param factory the RDF value factory for term creation + * @param config optional configuration options for the parser + */ + public RDFXMLParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + this.ctx = new RdfXmlContext(getModel(), getValueFactory()); + this.emitter = new RDFXMLStatementEmitter(model, factory); + } + + @Override + public RDFFormat getRDFFormat() { + return format; + } + + @Override + public void parse(InputStream in) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); + } + + @Override + public void parse(InputStream in, String baseURI) throws ParsingErrorException { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURI); + } + + public void parse(Reader reader) throws ParsingErrorException { + parse(reader, null); + } + + @Override + public void parse(Reader reader, String baseURI) throws ParsingErrorException { + ctx.baseURI = baseURI; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + InputSource inputSource = new InputSource(reader); + saxParser.parse(inputSource, new RdfXmlSaxHandler()); + } + catch (IOException e) { + throw new ParsingErrorException("Failed to parse RDF/XML input stream: " + e.getMessage() , e); + } catch (Exception e) { + throw new ParsingErrorException("Unexpected error during RDF/XML parsing: " + e.getMessage(), e); + } + } + + /** + * Internal SAX handler that delegates to the parser's methods + */ + private class RdfXmlSaxHandler extends DefaultHandler { + + @Override + public void characters(char[] ch, int start, int length) { + RDFXMLParser.this.handleCharacters(ch, start, length); + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attrs) { + RDFXMLParser.this.handleStartElement(uri, localName, qName, attrs); + } + + @Override + public void endElement(String uri, String localName, String qName) { + RDFXMLParser.this.handleEndElement(uri, localName, qName); + } + } + + /** + * Handles character data between XML elements + */ + private void handleCharacters(char[] ch, int start, int length) { + characters.append(ch, start, length); + } + + /** + * Handles opening of an XML element. + * Identifies node elements, container constructs, properties, + * and special parseType attributes, updating the parsing context accordingly. + */ + private void handleStartElement(String uri, String localName, String qName, Attributes attrs) { + // Skip the top-level rdf:RDF wrapper element + if (RDFXMLUtils.isRdfRDF(uri, localName)) return; + + // Reset character buffer + characters.setLength(0); + + // Handle xml:base (change base URI dynamically) + updateBase(attrs); + + // Handle xml:lang + updateLang(attrs); + + // Handle rdf:datatype (applies to property literal values) + updateDatatype(attrs); + + if (processContainerElement(localName, uri, qName, attrs)) return; + if (processCollectionStart(localName, uri, qName, attrs)) return; + if (processCollectionItem(localName, uri, attrs)) return; + if (processNodeElement(localName, uri, qName, attrs)) return; + processPropertyElement(localName, uri, qName, attrs); + } + + /** + * Handles the end of an XML element, emitting a literal or cleaning up context stacks. + */ + private void handleEndElement(String uri, String localName, String qName) { + String text = characters.toString().trim(); + characters.setLength(0); + + if (!ctx.predicateStack.isEmpty() && !text.isEmpty()) { + IRI predicate = ctx.predicateStack.pop(); + Resource subject = ctx.subjectStack.peek(); + String datatypeUri = ctx.datatypeStack.isEmpty() ? null : ctx.datatypeStack.pop(); + //emitLiteral(subject, predicate, text, datatypeUri); + String lang = ctx.langStack.isEmpty() ? null : ctx.langStack.peek(); + emitter.emitLiteral(subject, predicate, text, datatypeUri, lang); + return; + } + cleanEndElement(uri, localName); + } + + private void updateBase(Attributes attrs) { + String xmlBase = attrs.getValue("xml:base"); + if (xmlBase != null) ctx.baseURI = xmlBase; + } + + private void updateLang(Attributes attrs) { + String xmlLang = attrs.getValue("xml:lang"); + if (xmlLang != null) ctx.langStack.push(xmlLang); + } + + private void updateDatatype(Attributes attrs) { + String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); + if (datatype != null) { + ctx.datatypeStack.push(datatype); + } + } + + private boolean processCollectionStart(String localName, String uri, String qName, Attributes attrs) { + if (!"Collection".equals(getParseType(attrs))) return false; + IRI predicate = ctx.factory.createIRI(RDFXMLUtils.expandQName(uri, localName, qName)); + prepareCollection(predicate); + return true; + } + + private void prepareCollection(IRI predicate) { + ctx.predicateStack.push(predicate); + ctx.collectionSubject = ctx.subjectStack.peek(); + ctx.collectionPredicate = predicate; + ctx.collectionBuilder = new ArrayList<>(); + ctx.inCollection = true; + } + + private boolean processCollectionItem(String localName, String uri, Attributes attrs) { + if (!ctx.inCollection || !RDFXMLUtils.isDescription(localName, uri)) return false; + + Resource item = extractSubject(attrs, ctx.factory, ctx.baseURI); + ctx.collectionBuilder.add(item); + ctx.suppressSubject = true; + + return true; + } + + private boolean processContainerElement(String localName, String uri, String qName, Attributes attrs) { + // --- RDF Container Element --- + if (isContainer(localName, uri)) { + Resource subject = extractSubject(attrs, ctx.factory, ctx.baseURI); + ctx.subjectStack.push(subject); + ctx.inContainer = true; + ctx.liIndex = 1; + emitter.emitType(subject, expandQName(uri, localName, qName)); + + return true; + } + + // --- Container Items (rdf:li, rdf:_n) --- + + if (ctx.inContainer && RDF.type.getNamespace().equals(uri)) { + String pred = switch (localName) { + case "li" -> RDF.type.getNamespace() + "_" + ctx.liIndex++; + default -> localName.matches("_\\d+") ? RDF.type.getNamespace() + localName : null; + }; + + if (pred != null) { + IRI predicate = ctx.factory.createIRI(pred); + String resource = attrs.getValue("rdf:resource"); + if (resource != null) { + emitter.emitResourceTriple(ctx.subjectStack.peek(), predicate, resource, ctx.baseURI); + } + return true; + } + } + return false; + } + + private boolean processNodeElement(String localName, String uri, String qName, Attributes attrs) { + boolean isNode = isDescription(localName, uri) + || (ctx.subjectStack.isEmpty() && RDFXMLUtils.isNodeElement(attrs)); + + if (!isNode) return false; + + Resource newSubject = RDFXMLUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); + + // Add triple if nested in another node as object + if (!ctx.predicateStack.isEmpty() && !ctx.subjectStack.isEmpty()) { + Resource parent = ctx.subjectStack.peek(); + IRI predicate = ctx.predicateStack.pop(); + emitter.emitTriple(parent, predicate, newSubject); + } + + ctx.subjectStack.push(newSubject); + + // Emit rdf:type if typed node + if (!isDescription(localName, uri)) { + emitter.emitType(newSubject, expandQName(uri, localName, qName)); + } + + // Handle non-syntax attributes + emitter.emitPropertyAttributes(newSubject, attrs); + return true; + } + + private void processPropertyElement(String localName, String uri, String qName, Attributes attrs) { + IRI predicate = ctx.factory.createIRI(RDFXMLUtils.expandQName(uri, localName, qName)); + ctx.predicateStack.push(predicate); + + String resource = attrs.getValue(RDF.type.getNamespace(), "resource"); + String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); + + if (resource != null) { + emitter.emitResourceTriple(ctx.subjectStack.peek(), predicate, resource, ctx.baseURI); + ctx.predicateStack.pop(); + return; + } + + if (nodeID != null) { + emitter.emitBNodeTriple(ctx.subjectStack.peek(), predicate, nodeID); + ctx.predicateStack.pop(); + return; + } + + // parseType="Resource" + String parseType = getParseType(attrs); + if ("Resource".equals(parseType)) { + Resource bnode = emitBnodePredicateObject(predicate); + ctx.subjectStack.push(bnode); + return; + } + + // Inline attributes + if (hasNonSyntaxAttributes(attrs)) { + Resource bnode = emitBnodePredicateObject(predicate); + emitter.emitPropertyAttributes(bnode, attrs); + ctx.predicateStack.pop(); + } + } + + private boolean hasNonSyntaxAttributes(Attributes attrs) { + for (int i = 0; i < attrs.getLength(); i++) { + if (!isSyntaxAttribute(attrs.getURI(i), attrs.getLocalName(i), attrs.getQName(i))) { + return true; + } + } + return false; + } + + private Resource emitBnodePredicateObject(IRI predicate) { + Resource parent = ctx.subjectStack.peek(); + Resource bnode = ctx.factory.createBNode(); + emitter.emitTriple(parent, predicate, bnode); + return bnode; + } + + + + /** + * Cleans up stacks and handles closing of collections, containers, and resource blocks. + */ + private void cleanEndElement(String uri, String localName) { + if (!ctx.langStack.isEmpty()) ctx.langStack.pop(); + if (!ctx.predicateStack.isEmpty()) ctx.predicateStack.pop(); + if (RDFXMLUtils.isContainer(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + ctx.inContainer = false; + ctx.liIndex = 1; + return; + } + if (ctx.inCollection && localName.equals(ctx.collectionPredicate.getLocalName())) { + Resource listHead = createRdfCollection(ctx.collectionBuilder, ctx.model, ctx.factory); + ctx.model.add(ctx.factory.createStatement(ctx.collectionSubject, ctx.collectionPredicate, listHead)); + ctx.inCollection = false; + ctx.collectionBuilder.clear(); + return; + } + if (ctx.inCollection && RDFXMLUtils.isDescription(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + return; + } + if (RDFXMLUtils.isDescription(localName, uri)) { + if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); + } + if (!ctx.subjectStack.isEmpty() && !ctx.predicateStack.isEmpty()) { + ctx.subjectStack.pop(); + ctx.predicateStack.pop(); + } + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java new file mode 100644 index 000000000..07b844209 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java @@ -0,0 +1,76 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import org.xml.sax.Attributes; + +import java.util.Optional; + +import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLUtils.*; + +public class RDFXMLStatementEmitter { + + private final Model model; + private final ValueFactory factory; + + public RDFXMLStatementEmitter(Model model, ValueFactory factory) { + this.model = model; + this.factory = factory; + } + + public void emitLiteral(Resource subject, IRI predicate, String text, String datatypeUri, String lang) { + Value literal; + if (datatypeUri != null && !datatypeUri.isEmpty()) { + Optional known = RDFXMLUtils.resolveDatatype(datatypeUri); + IRI dtype = known.map(XSD::getIRI).orElseGet(() -> { + System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); + return factory.createIRI(datatypeUri); + }); + literal = factory.createLiteral(text, dtype); + } else if (lang != null && !lang.equals("__NO_LANG__")) { + literal = factory.createLiteral(text, lang); + } else { + literal = factory.createLiteral(text); + } + model.add(factory.createStatement(subject, predicate, literal)); + } + + public void emitType(Resource subject, String expandedQName) { + model.add(factory.createStatement(subject, RDF.type.getIRI(), factory.createIRI(expandedQName))); + } + + public void emitPropertyAttributes(Resource subject, Attributes attrs) { + for (int i = 0; i < attrs.getLength(); i++) { + String attrURI = attrs.getURI(i); + String attrLocal = attrs.getLocalName(i); + String attrQName = attrs.getQName(i); + String value = attrs.getValue(i); + + if (isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; + + IRI pred = factory.createIRI(expandQName(attrURI, attrLocal, attrQName)); + model.add(factory.createStatement(subject, pred, factory.createLiteral(value))); + } + } + + public void emitResourceTriple(Resource subject, IRI predicate, String resource, String baseURI) { + model.add(factory.createStatement( + subject, + predicate, + factory.createIRI(resolveAgainstBase(resource, baseURI)) + )); + } + + public void emitBNodeTriple(Resource subject, IRI predicate, String nodeID) { + model.add(factory.createStatement( + subject, + predicate, + factory.createBNode("_:" + nodeID) + )); + } + + public void emitTriple(Resource subject, IRI predicate, Resource object) { + model.add(factory.createStatement(subject, predicate, object)); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java similarity index 95% rename from src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java index fc52f588a..163662def 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java @@ -9,8 +9,8 @@ import java.util.Optional; -public class RdfXmlUtils { - private RdfXmlUtils() { +public class RDFXMLUtils { + private RDFXMLUtils() { } public static String expandQName(String uri, String localName, String qName) { @@ -62,6 +62,10 @@ public static boolean isNodeElement(Attributes attrs) { attrs.getValue(RDF.type.getNamespace(), "ID") != null; } + public static String getParseType(Attributes attrs) { + return attrs.getValue(RDF.type.getNamespace(), "parseType"); + } + public static boolean isSyntaxAttribute(String uri, String localName, String qName) { if (uri != null && RDF.type.getNamespace().equals(uri)) { return switch (localName) { @@ -109,9 +113,6 @@ public static Resource createRdfCollection(List items, Model model, Va current = next; } - return head; } - - -} +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java deleted file mode 100644 index ce3a6b523..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ /dev/null @@ -1,384 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfxml; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.base.io.RDFFormat; -import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; -import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; -import org.xml.sax.Attributes; -import org.xml.sax.InputSource; -import org.xml.sax.helpers.DefaultHandler; - -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; -import java.io.InputStream; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Optional; - -import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RdfXmlUtils.*; - -/** - * SAX-based RDF/XML parser using a shared parsing context ({@link RdfXmlContext}). - * - *

This parser processes RDF/XML documents using the SAX streaming API. - * It tracks RDF constructs (resources, properties, literals, containers, collections) - * using an explicit stack-based context, and supports features like xml:lang, - * rdf:datatype, rdf:parseType, and property attributes.

- * - *

The parser adds RDF statements to the provided {@link Model} using - * the supplied {@link ValueFactory}. This parser supports nested nodes, - * blank nodes, typed nodes, and RDF collections.

- */ -public class RdfXmlParser extends AbstractRDFParser { - - /** RDF/XML format identifier for this parser. */ - private final RDFFormat format = RDFFormat.RDFXML; - - /** Buffer for accumulating character data between start and end tags. */ - private StringBuilder characters = new StringBuilder(); - - /** Shared state across SAX callbacks. */ - private RdfXmlContext ctx; - - /** - * Creates a new parser with a target RDF model and factory. - * - * @param model the RDF model to populate - * @param factory the RDF value factory for term creation - */ - public RdfXmlParser(Model model, ValueFactory factory) { - this(model, factory, null); - } - - /** - * Creates a new parser with a target RDF model, factory, and configuration options. - * - * @param model the RDF model to populate - * @param factory the RDF value factory for term creation - * @param config optional configuration options for the parser - */ - public RdfXmlParser(Model model, ValueFactory factory, IOOptions config) { - super(model, factory, config); - this.ctx = new RdfXmlContext(getModel(), getValueFactory()); - } - - @Override - public RDFFormat getRDFFormat() { - return format; - } - - @Override - public void parse(InputStream in, String baseURI) { - ctx.baseURI = baseURI; - try { - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - saxParser.parse(in, new RdfXmlSaxHandler()); - } catch (Exception e) { - throw new RuntimeException("Failed to parse RDF/XML input stream", e); - } - } - - @Override - public void parse(Reader reader, String baseURI) { - ctx.baseURI = baseURI; - try { - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - InputSource inputSource = new InputSource(reader); - saxParser.parse(inputSource, new RdfXmlSaxHandler()); - } catch (Exception e) { - throw new RuntimeException("Failed to parse RDF/XML input stream", e); - } - } - - /** - * Internal SAX handler that delegates to the parser's methods - */ - private class RdfXmlSaxHandler extends DefaultHandler { - - @Override - public void characters(char[] ch, int start, int length) { - RdfXmlParser.this.handleCharacters(ch, start, length); - } - - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) { - RdfXmlParser.this.handleStartElement(uri, localName, qName, attrs); - } - - @Override - public void endElement(String uri, String localName, String qName) { - RdfXmlParser.this.handleEndElement(uri, localName, qName); - } - } - - /** - * Handles character data between XML elements - */ - private void handleCharacters(char[] ch, int start, int length) { - characters.append(ch, start, length); - } - - /** - * Handles opening of an XML element. - * Identifies node elements, container constructs, properties, - * and special parseType attributes, updating the parsing context accordingly. - */ - private void handleStartElement(String uri, String localName, String qName, Attributes attrs) { - // Skip the top-level rdf:RDF wrapper element - if (RdfXmlUtils.isRdfRDF(uri, localName)) return; - - // Reset character buffer - characters.setLength(0); - - // Handle xml:base (change base URI dynamically) - String xmlBase = attrs.getValue("xml:base"); - if (xmlBase != null) { - ctx.baseURI = xmlBase; - } - - // Handle xml:lang - String xmlLang = attrs.getValue("xml:lang"); - if (xmlLang != null) { - ctx.langStack.push(xmlLang); - } - - // Handle rdf:datatype (applies to property literal values) - String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); - if (datatype != null) { - ctx.datatypeStack.push(datatype); - } - - // --- RDF Container Element --- - if (RdfXmlUtils.isContainer(localName, uri)) { - Resource subject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); - ctx.subjectStack.push(subject); - ctx.inContainer = true; - ctx.liIndex = 1; - - IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.model.add(ctx.factory.createStatement(subject, RDF.type.getIRI(), typeIRI)); - return; - } - - // --- Container Items (rdf:li, rdf:_n) --- - if (ctx.inContainer && RDF.type.getNamespace().equals(uri)) { - String pred = null; - if ("li".equals(localName)) { - pred = RDF.type.getNamespace() + "_" + ctx.liIndex++; - } else if (localName.matches("_\\d+")) { - pred = RDF.type.getNamespace() + localName; - } - - if (pred != null) { - IRI predicate = ctx.factory.createIRI(pred); - String resource = attrs.getValue("rdf:resource"); - if (resource != null) { - ctx.model.add(ctx.factory.createStatement( - ctx.subjectStack.peek(), - predicate, - ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) - )); - } - return; - } - } - - // --- parseType="Collection" --- - String parseType = attrs.getValue(RDF.type.getNamespace(), "parseType"); - if ("Collection".equals(parseType)) { - IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.predicateStack.push(predicate); - ctx.collectionSubject = ctx.subjectStack.peek(); - ctx.collectionPredicate = predicate; - ctx.collectionBuilder = new ArrayList<>(); - ctx.inCollection = true; - return; - } - - // --- Inside Collection: Collect rdf:Description Items --- - if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { - Resource item = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); - ctx.collectionBuilder.add(item); - ctx.suppressSubject = true; - return; - } - - // --- Node Element: rdf:Description or typed node --- - boolean isNode = RdfXmlUtils.isDescription(localName, uri) - || (ctx.subjectStack.isEmpty() && RdfXmlUtils.isNodeElement(attrs)); - - if (isNode) { - Resource newSubject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); - - // If current node is object of a property - if (!ctx.predicateStack.isEmpty() && !ctx.subjectStack.isEmpty()) { - Resource parent = ctx.subjectStack.peek(); - IRI predicate = ctx.predicateStack.pop(); - ctx.model.add(ctx.factory.createStatement(parent, predicate, newSubject)); - } - - ctx.subjectStack.push(newSubject); - - // Emit rdf:type for typed node elements - if (!RdfXmlUtils.isDescription(localName, uri)) { - IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.model.add(ctx.factory.createStatement(newSubject, RDF.type.getIRI(), typeIRI)); - } - - // Handle non-RDF attributes as property triples - for (int i = 0; i < attrs.getLength(); i++) { - String attrURI = attrs.getURI(i); - String attrLocal = attrs.getLocalName(i); - String attrQName = attrs.getQName(i); - String value = attrs.getValue(i); - - if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - - IRI pred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); - ctx.model.add(ctx.factory.createStatement(newSubject, pred, ctx.factory.createLiteral(value))); - } - - return; - } - - // --- Property Element (e.g., ) --- - IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.predicateStack.push(predicate); - - // --- Property Resource/Object reference --- - String resource = attrs.getValue(RDF.type.getNamespace(), "resource"); - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - - if (resource != null || nodeID != null) { - Resource object = resource != null - ? ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) - : ctx.factory.createBNode("_:" + nodeID); - - ctx.model.add(ctx.factory.createStatement( - ctx.subjectStack.peek(), - predicate, - object - )); - - ctx.predicateStack.pop(); // already used - return; - } - - // --- parseType="Resource": create blank node --- - if ("Resource".equals(parseType)) { - Resource parent = ctx.subjectStack.peek(); - Resource bnode = ctx.factory.createBNode(); - ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); - ctx.subjectStack.push(bnode); - return; - } - - // --- Inline property attributes: Create blank node with attributes --- - boolean hasNonSyntaxAttributes = false; - for (int i = 0; i < attrs.getLength(); i++) { - String attrURI = attrs.getURI(i); - String attrLocal = attrs.getLocalName(i); - String attrQName = attrs.getQName(i); - if (!RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) { - hasNonSyntaxAttributes = true; - break; - } - } - - if (hasNonSyntaxAttributes) { - Resource parent = ctx.subjectStack.peek(); - Resource bnode = ctx.factory.createBNode(); - ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); - - for (int i = 0; i < attrs.getLength(); i++) { - String attrURI = attrs.getURI(i); - String attrLocal = attrs.getLocalName(i); - String attrQName = attrs.getQName(i); - String value = attrs.getValue(i); - - if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - - IRI attrPred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); - ctx.model.add(ctx.factory.createStatement(bnode, attrPred, ctx.factory.createLiteral(value))); - } - ctx.predicateStack.pop(); // already emitted - } - } - - /** - * Handles the end of an XML element, emitting a literal or cleaning up context stacks. - */ - private void handleEndElement(String uri, String localName, String qName) { - String text = characters.toString().trim(); - characters.setLength(0); - - if (!ctx.predicateStack.isEmpty() && !text.isEmpty()) { - IRI predicate = ctx.predicateStack.pop(); - Resource subject = ctx.subjectStack.peek(); - String datatypeUri = ctx.datatypeStack.isEmpty() ? null : ctx.datatypeStack.pop(); - emitLiteral(subject, predicate, text, datatypeUri); - return; - } - cleanEndElement(uri, localName); - } - - /** - * Emits a literal statement (optionally typed or language-tagged). - */ - private void emitLiteral(Resource subject, IRI predicate, String text, String datatypeUri) { - Value literal; - if (datatypeUri != null && !datatypeUri.isEmpty()) { - Optional known = RdfXmlUtils.resolveDatatype(datatypeUri); - IRI dtype = known.map(XSD::getIRI).orElseGet(() -> { - System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); - return ctx.factory.createIRI(datatypeUri); - }); - literal = ctx.factory.createLiteral(text, dtype); - } else { - String lang = ctx.langStack.isEmpty() ? null : ctx.langStack.peek(); - literal = (lang != null && !lang.equals("__NO_LANG__")) - ? ctx.factory.createLiteral(text, lang) - : ctx.factory.createLiteral(text); - } - ctx.model.add(ctx.factory.createStatement(subject, predicate, literal)); - } - - /** - * Cleans up stacks and handles closing of collections, containers, and resource blocks. - */ - private void cleanEndElement(String uri, String localName) { - if (!ctx.langStack.isEmpty()) ctx.langStack.pop(); - if (!ctx.predicateStack.isEmpty()) ctx.predicateStack.pop(); - if (RdfXmlUtils.isContainer(localName, uri)) { - if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); - ctx.inContainer = false; - ctx.liIndex = 1; - return; - } - if (ctx.inCollection && localName.equals(ctx.collectionPredicate.getLocalName())) { - Resource listHead = createRdfCollection(ctx.collectionBuilder, ctx.model, ctx.factory); - ctx.model.add(ctx.factory.createStatement(ctx.collectionSubject, ctx.collectionPredicate, listHead)); - ctx.inCollection = false; - ctx.collectionBuilder.clear(); - return; - } - if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { - if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); - return; - } - if (RdfXmlUtils.isDescription(localName, uri)) { - if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); - } - if (!ctx.subjectStack.isEmpty() && !ctx.predicateStack.isEmpty()) { - ctx.predicateStack.pop(); - ctx.subjectStack.pop(); - } - } -} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index 37aeee0a1..6df1ca120 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -25,7 +25,7 @@ private Model parseRdfXml(String rdfXml) throws Exception { Model model = new CoreseModel(); ValueFactory valueFactory = new CoreseAdaptedValueFactory(); try (InputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8))) { - RdfXmlParser parser = new RdfXmlParser(model, valueFactory); + RDFXMLParser parser = new RDFXMLParser(model, valueFactory); parser.parse(inputStream); } return model; @@ -311,7 +311,7 @@ public void testExample11CompleteExamplerdfDatatype() throws Exception { """.trim(); Model model = parseRdfXml(rdfXml); printModel(model); - assertEquals(1, model.size(), "Expected four RDF statements"); + assertEquals(1, model.size(), "Expected one RDF statement"); } /** @@ -343,7 +343,7 @@ public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { printModel(model); // Assert or inspect the result - assertEquals(4, model.size(), "Expected five RDF statements"); + assertEquals(4, model.size(), "Expected four RDF statements"); } /** @@ -421,7 +421,7 @@ public void testExample15CompleteExampleWithRdfType() throws Exception { Model model = parseRdfXml(rdfXml); printModel(model); - assertEquals(2, model.size(), "Expected four RDF statements"); + assertEquals(2, model.size(), "Expected two RDF statements"); } /** From 6edb354f62f3309cd2fc7f8c314aa5c52bfbc18f Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 25 Jul 2025 11:39:24 +0200 Subject: [PATCH 58/64] build exemple --- build.gradle.kts | 6 +++--- .../core/next/impl/io/parser/rdfxml/RDFXMLParser.java | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index aa41701f3..ea6331670 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -92,9 +92,9 @@ object Meta { java { withJavadocJar() // Include Javadoc JAR in publications withSourcesJar() // Include sources JAR in publications - toolchain { - languageVersion.set(JavaLanguageVersion.of(21)) - } + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } } ///////////////////////// diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java index 657bcf0b4..3d7d5da45 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -9,8 +9,6 @@ import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; import org.xml.sax.Attributes; import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; From 5255aa820c7bab2574c67fa2fa36219dde97c956 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 25 Jul 2025 13:01:40 +0200 Subject: [PATCH 59/64] ANTLRTrig --- .../corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java index 3d7d5da45..7730da651 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; import org.xml.sax.Attributes; import org.xml.sax.InputSource; +import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; From ca66b5bf618e2c1db0508f4f087e13b929197670 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 29 Jul 2025 09:20:19 +0200 Subject: [PATCH 60/64] new test suit --- .../impl/io/parser/rdfxml/RDFXMLParser.java | 1 + .../parser/rdfxml/RDFXMLStatementEmitter.java | 2 +- .../impl/io/parser/rdfxml/RDFXMLUtils.java | 6 +- .../parser/rdfxml/context/RdfXmlContext.java | 3 - .../rdfxml/RDFXMLStatementEmitterTest.java | 123 +++++++++++ .../io/parser/rdfxml/RDFXMLUtilsTest.java | 91 ++++++++ .../io/parser/rdfxml/RdfxmlParserTest.java | 203 +++++++++++++++++- 7 files changed, 418 insertions(+), 11 deletions(-) create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java index 7730da651..47caf270c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -140,6 +140,7 @@ private void handleCharacters(char[] ch, int start, int length) { * and special parseType attributes, updating the parsing context accordingly. */ private void handleStartElement(String uri, String localName, String qName, Attributes attrs) { + // Skip the top-level rdf:RDF wrapper element if (RDFXMLUtils.isRdfRDF(uri, localName)) return; diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java index 07b844209..1bcd1cea7 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java @@ -73,4 +73,4 @@ public void emitBNodeTriple(Resource subject, IRI predicate, String nodeID) { public void emitTriple(Resource subject, IRI predicate, Resource object) { model.add(factory.createStatement(subject, predicate, object)); } -} +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java index 163662def..d9faf9cb7 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java @@ -3,6 +3,7 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.exception.IncorrectFormatException; import org.xml.sax.*; import java.util.List; @@ -40,15 +41,14 @@ public static Resource extractSubject(Attributes attrs, ValueFactory factory, St public static String resolveAgainstBase(String iri, String baseURI) { if (iri == null) return null; + if (iri.isEmpty()) return baseURI; if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { - // Absolute IRI or no base, return as-is return iri; } - try { return new java.net.URI(baseURI).resolve(iri).toString(); } catch (Exception e) { - throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); + throw new IncorrectFormatException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java index c542ff4ea..051b24485 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java @@ -67,9 +67,6 @@ public class RdfXmlContext { /** Counter for rdf:li to rdf:_n expansion. */ public int liIndex = 1; - /** Optional parseType value for the current element. */ - public String parseType; - /** * Constructs a new context for RDF/XML parsing. * diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java new file mode 100644 index 000000000..56bb2cff8 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java @@ -0,0 +1,123 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.xml.sax.helpers.AttributesImpl; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class RDFXMLStatementEmitterTest { + + private Model model; + private ValueFactory factory; + private RDFXMLStatementEmitter emitter; + + @BeforeEach + public void setUp() { + model = new CoreseModel(); + factory = new CoreseAdaptedValueFactory(); + emitter = new RDFXMLStatementEmitter(model, factory); + } + + @Test + public void testEmitLiteral_plain() { + Literal literal = factory.createLiteral("hello"); + Resource subject = factory.createBNode(); + IRI predicate = factory.createIRI("http://example.org/predicate"); + emitter.emitLiteral(subject, predicate, "hello", null, null); + assertEquals(1, model.size()); + Iterable statements = model.getStatements(subject, predicate, literal); + boolean found = false; + for (Statement stmt : statements) { + if (stmt.getSubject().equals(subject) && + stmt.getPredicate().equals(predicate) && + stmt.getObject().stringValue().equals(literal.stringValue())) { + found = true; + break; + } + } + + assertTrue(found, "Expected statement not found in model"); + } + + @Test + public void testEmitLiteral_withLang() { + Resource subject = factory.createBNode(); + IRI predicate = factory.createIRI("http://example.org/predicate"); + emitter.emitLiteral(subject, predicate, "bonjour", null, "fr"); + + Value obj = model.objects().iterator().next(); + assertTrue(obj.isLiteral()); + assertEquals("bonjour", obj.stringValue()); + } + + @Test + public void testEmitLiteral_withDatatype() { + Resource subject = factory.createBNode(); + IRI predicate = factory.createIRI("http://example.org/age"); + emitter.emitLiteral(subject, predicate, "42", XSD.INTEGER.getIRI().stringValue(), null); + + Value obj = model.objects().iterator().next(); + assertTrue(obj.isLiteral()); + assertEquals("42", obj.stringValue()); + } + + @Test + public void testEmitType() { + Resource subject = factory.createIRI("http://example.org/Alice"); + emitter.emitType(subject, "http://example.org/Person"); + + assertTrue(model.contains(subject, RDF.type.getIRI(), factory.createIRI("http://example.org/Person"))); + } + + @Test + public void testEmitResourceTriple() { + Resource subject = factory.createIRI("http://example.org/Alice"); + IRI predicate = factory.createIRI("http://example.org/knows"); + emitter.emitResourceTriple(subject, predicate, "Bob", "http://example.org/"); + + assertTrue(model.contains(subject, predicate, factory.createIRI("http://example.org/Bob"))); + } + + @Test + public void testEmitBNodeTriple() { + Resource subject = factory.createIRI("http://example.org/Alice"); + IRI predicate = factory.createIRI("http://example.org/knows"); + emitter.emitBNodeTriple(subject, predicate, "b123"); + + assertTrue(model.size() == 1); + Value obj = model.objects().iterator().next(); + assertTrue(obj.stringValue().contains("_:b123")); + } + + @Test + public void testEmitTriple() { + Resource s = factory.createIRI("http://example.org/s"); + IRI p = factory.createIRI("http://example.org/p"); + Resource o = factory.createIRI("http://example.org/o"); + + emitter.emitTriple(s, p, o); + + assertTrue(model.contains(s, p, o)); + } + + @Test + public void testEmitPropertyAttributes() { + Resource s = factory.createIRI("http://example.org/thing"); + AttributesImpl attrs = new AttributesImpl(); + attrs.addAttribute("http://example.org/", "foo", "ex:foo", "CDATA", "val1"); + attrs.addAttribute("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about", "rdf:about", "CDATA", "ignored"); + + emitter.emitPropertyAttributes(s, attrs); + + assertEquals(1, model.size()); + Value object = model.objects().iterator().next(); + assertEquals("val1", object.stringValue()); + } +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java new file mode 100644 index 000000000..e79654630 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java @@ -0,0 +1,91 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfxml; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.common.literal.XSD; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; +import org.xml.sax.helpers.AttributesImpl; + +import java.util.List; +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; + +public class RDFXMLUtilsTest { + + private final ValueFactory factory = new CoreseAdaptedValueFactory(); + + @Test + public void testExpandQName() { + assertEquals("http://example.org/test", RDFXMLUtils.expandQName("http://example.org/", "test", "ex:test")); + assertEquals("ex:test", RDFXMLUtils.expandQName(null, null, "ex:test")); + } + + @Test + public void testResolveDatatype() { + assertEquals(Optional.of(XSD.STRING), RDFXMLUtils.resolveDatatype(XSD.STRING.getIRI().stringValue())); + assertTrue(RDFXMLUtils.resolveDatatype("http://nonexistentdatatype").isEmpty()); + } + + @Test + public void testExtractSubjectWithAbout() { + AttributesImpl attrs = new AttributesImpl(); + attrs.addAttribute(RDF.type.getNamespace(), "about", "", "CDATA", "http://example.org/subject"); + Resource subject = RDFXMLUtils.extractSubject(attrs, factory, null); + assertEquals("http://example.org/subject", subject.stringValue()); + } + + @Test + public void testExtractSubjectWithNodeID() { + AttributesImpl attrs = new AttributesImpl(); + attrs.addAttribute(RDF.type.getNamespace(), "nodeID", "", "CDATA", "b123"); + Resource subject = RDFXMLUtils.extractSubject(attrs, factory, null); + assertTrue(subject.stringValue().contains("_:b123")); + } + + @Test + public void testExtractSubjectWithID() { + AttributesImpl attrs = new AttributesImpl(); + attrs.addAttribute(RDF.type.getNamespace(), "ID", "", "CDATA", "id123"); + Resource subject = RDFXMLUtils.extractSubject(attrs, factory, "http://example.org/"); + assertEquals("http://example.org/id123", subject.stringValue()); + } + + @Test + public void testResolveAgainstBase() { + assertEquals("http://base.org/path", RDFXMLUtils.resolveAgainstBase("path", "http://base.org/")); + } + + @Test + public void testIsSyntaxAttribute() { + assertTrue(RDFXMLUtils.isSyntaxAttribute(RDF.type.getNamespace(), "about", "rdf:about")); + assertTrue(RDFXMLUtils.isSyntaxAttribute(null, "lang", "xml:lang")); + assertFalse(RDFXMLUtils.isSyntaxAttribute("http://example.org/", "type", "ex:type")); + } + + @Test + public void testIsContainer() { + assertTrue(RDFXMLUtils.isContainer("Bag", RDF.type.getNamespace())); + assertFalse(RDFXMLUtils.isContainer("notAContainer", "http://example.org/")); + } + + @Test + public void testCreateRdfCollection() { + Model model = new CoreseModel(); + Resource r1 = factory.createIRI("http://example.org/A"); + Resource r2 = factory.createIRI("http://example.org/B"); + Resource head = RDFXMLUtils.createRdfCollection(List.of(r1, r2), model, factory); + + assertNotNull(head); + assertTrue(model.size() > 0); + assertTrue(model.contains(null, RDF.first.getIRI(), r1)); + assertTrue(model.contains(null, RDF.rest.getIRI(), RDF.nil.getIRI())); + } + + + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java index 6df1ca120..e9442a792 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java @@ -1,20 +1,22 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; -import fr.inria.corese.core.next.api.Literal; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Value; -import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; +import org.semarglproject.vocab.rdfa.RDFa; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; public class RdfxmlParserTest { + private final ValueFactory factory = new CoreseAdaptedValueFactory(); + /** * Helper method to parse the RDF/XML String * @param rdfXml @@ -573,4 +575,197 @@ public void testExample21CompleteExampleOfRdfID() throws Exception { assertEquals(1, model.size(), "Expected one RDF statement"); } + + /** + * Test the resolving uri algorithm, simple exemple + * @throws Exception + */ + @Test + public void testResolvingIRITest0001() throws Exception { + + String rdfXml = """ + + + + + + + + + + """.trim(); + Model model = parseRdfXml(rdfXml); + printModel(model); + assertTrue(model.contains( factory.createIRI("http://example.org/dir/file#frag") , factory.createIRI("http://example.org/value"), factory.createLiteral("v") )); + + } + + /** + * Test the resolving uri algorithm, exemple with blank node + * @throws Exception + */ + @Test + public void testResolveIRITest0002() throws Exception { + String rdfXml = """ + + + + + + + + v + + + + """.trim(); + + Model model = parseRdfXml(rdfXml); + printModel(model); + model.contains(factory.createIRI("http://example.org/value")); + } + + /** + * Test the resolving uri algorithm, exemple with "" + * @throws Exception + */ + @Test + public void testResolveIRITest008() throws Exception { + String rdfXml = + """ + + + + + + + + + + """.trim(); + Model model = parseRdfXml(rdfXml); + model.contains(factory.createIRI("http://example.org/dir/file")); + } + + @Test + public void testResolveIRITest013() throws Exception { + String rdfXml = + """ + + + + + + + + + + + + + """.trim(); + Model model = parseRdfXml(rdfXml); + printModel(model); + } + + @Test + public void testResolveIRITest011() throws Exception { + String rdfXml = + """ + + + + + + + + + + """.trim(); + Model model = parseRdfXml(rdfXml); + printModel(model); + } } \ No newline at end of file From d17a65c65c0ea41771b40bc8e88d1251de5dbbe8 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 1 Aug 2025 17:31:17 +0200 Subject: [PATCH 61/64] comments and code cleaning --- .../impl/io/parser/rdfxml/RDFXMLParser.java | 99 ++++++++++++++++++- .../parser/rdfxml/RDFXMLStatementEmitter.java | 2 + ...lParserTest.java => RDFXMLParserTest.java} | 50 +++++++--- .../rdfxml/RDFXMLStatementEmitterTest.java | 41 ++++++++ .../io/parser/rdfxml/RDFXMLUtilsTest.java | 41 +++++++- .../parser/turtle/ANTLRTurtleParserTest.java | 6 ++ 6 files changed, 214 insertions(+), 25 deletions(-) rename src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/{RdfxmlParserTest.java => RDFXMLParserTest.java} (95%) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java index 47caf270c..a50a4d060 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -182,16 +182,34 @@ private void handleEndElement(String uri, String localName, String qName) { cleanEndElement(uri, localName); } + /** + * Updates the base URI for IRI resolution using the xml:base attribute if present. + * + * @param attrs the XML attributes of the current element + */ private void updateBase(Attributes attrs) { String xmlBase = attrs.getValue("xml:base"); if (xmlBase != null) ctx.baseURI = xmlBase; } + /** + * Updates the language context using the xml:lang attribute if present. + * The language value is pushed onto a stack to support nested scope. + * + * @param attrs the XML attributes of the current element + */ private void updateLang(Attributes attrs) { String xmlLang = attrs.getValue("xml:lang"); if (xmlLang != null) ctx.langStack.push(xmlLang); } + + /** + * Updates the datatype context using the rdf:datatype attribute if present. + * The datatype URI is pushed onto a stack to support nested scope. + * + * @param attrs the XML attributes of the current element + */ private void updateDatatype(Attributes attrs) { String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); if (datatype != null) { @@ -199,6 +217,16 @@ private void updateDatatype(Attributes attrs) { } } + /** + * Processes the start of an RDF collection indicated by parseType="Collection". + * Initializes the internal collection structures and returns true if this is a collection. + * + * @param localName the local name of the element + * @param uri the namespace URI + * @param qName the qualified name + * @param attrs the attributes of the element + * @return true if this element starts a collection, false otherwise + */ private boolean processCollectionStart(String localName, String uri, String qName, Attributes attrs) { if (!"Collection".equals(getParseType(attrs))) return false; IRI predicate = ctx.factory.createIRI(RDFXMLUtils.expandQName(uri, localName, qName)); @@ -206,6 +234,11 @@ private boolean processCollectionStart(String localName, String uri, String qNam return true; } + /** + * Prepares internal context to collect RDF list elements for a collection. + * + * @param predicate the predicate that points to the collection + */ private void prepareCollection(IRI predicate) { ctx.predicateStack.push(predicate); ctx.collectionSubject = ctx.subjectStack.peek(); @@ -214,6 +247,14 @@ private void prepareCollection(IRI predicate) { ctx.inCollection = true; } + /** + * Processes an item inside an RDF collection. Adds the extracted subject to the collection list. + * + * @param localName the local name of the element + * @param uri the namespace URI + * @param attrs the attributes of the element + * @return true if the element is processed as a collection item, false otherwise + */ private boolean processCollectionItem(String localName, String uri, Attributes attrs) { if (!ctx.inCollection || !RDFXMLUtils.isDescription(localName, uri)) return false; @@ -224,6 +265,16 @@ private boolean processCollectionItem(String localName, String uri, Attributes a return true; } + /** + * Processes RDF container elements like rdf:Bag, rdf:Seq, and code rdf:Alt, + * as well as container items like rdf:li and rdf:_n. + * + * @param localName the local name of the element + * @param uri the namespace URI + * @param qName the qualified name + * @param attrs the attributes of the element + * @return true if the element is a container or container item, false otherwise + */ private boolean processContainerElement(String localName, String uri, String qName, Attributes attrs) { // --- RDF Container Element --- if (isContainer(localName, uri)) { @@ -256,6 +307,16 @@ private boolean processContainerElement(String localName, String uri, String qNa return false; } + /** + * Processes an RDF node element such as rdf:Description or a typed node. + * Handles subject creation, optional rdf:type triple emission, and property attributes. + * + * @param localName the local name of the element + * @param uri the namespace URI + * @param qName the qualified name + * @param attrs the element's attributes + * @return true if the element is processed as an RDF node, false otherwise + */ private boolean processNodeElement(String localName, String uri, String qName, Attributes attrs) { boolean isNode = isDescription(localName, uri) || (ctx.subjectStack.isEmpty() && RDFXMLUtils.isNodeElement(attrs)); @@ -283,7 +344,19 @@ private boolean processNodeElement(String localName, String uri, String qName, A return true; } - private void processPropertyElement(String localName, String uri, String qName, Attributes attrs) { + /** + * Processes an RDF property element and emits triples accordingly. + * Handles {@code rdf:resource}, {@code rdf:nodeID}, {@code parseType="Resource"}, + * and inline property attributes. + * + * @param localName the local name of the property element + * @param uri the namespace URI + * @param qName the qualified name + * @param attrs the element's attributes + * + * @return true if the element is processed as an RDF property element, false otherwise + */ + private boolean processPropertyElement(String localName, String uri, String qName, Attributes attrs) { IRI predicate = ctx.factory.createIRI(RDFXMLUtils.expandQName(uri, localName, qName)); ctx.predicateStack.push(predicate); @@ -293,13 +366,13 @@ private void processPropertyElement(String localName, String uri, String qName, if (resource != null) { emitter.emitResourceTriple(ctx.subjectStack.peek(), predicate, resource, ctx.baseURI); ctx.predicateStack.pop(); - return; + return true; } if (nodeID != null) { emitter.emitBNodeTriple(ctx.subjectStack.peek(), predicate, nodeID); ctx.predicateStack.pop(); - return; + return true; } // parseType="Resource" @@ -307,7 +380,7 @@ private void processPropertyElement(String localName, String uri, String qName, if ("Resource".equals(parseType)) { Resource bnode = emitBnodePredicateObject(predicate); ctx.subjectStack.push(bnode); - return; + return true; } // Inline attributes @@ -315,9 +388,17 @@ private void processPropertyElement(String localName, String uri, String qName, Resource bnode = emitBnodePredicateObject(predicate); emitter.emitPropertyAttributes(bnode, attrs); ctx.predicateStack.pop(); + return true; } + return false; } + /** + * Checks if the given attributes contain any non-syntax (i.e., user-defined) attributes. + * + * @param attrs the XML attributes to inspect + * @return true if at least one attribute is not a reserved RDF or XML syntax attribute + */ private boolean hasNonSyntaxAttributes(Attributes attrs) { for (int i = 0; i < attrs.getLength(); i++) { if (!isSyntaxAttribute(attrs.getURI(i), attrs.getLocalName(i), attrs.getQName(i))) { @@ -327,6 +408,12 @@ private boolean hasNonSyntaxAttributes(Attributes attrs) { return false; } + /** + * Emits a blank node as the object of the current predicate and links it to the subject. + * + * @param predicate the predicate of the triple + * @return the newly created blank node + */ private Resource emitBnodePredicateObject(IRI predicate) { Resource parent = ctx.subjectStack.peek(); Resource bnode = ctx.factory.createBNode(); @@ -337,7 +424,9 @@ private Resource emitBnodePredicateObject(IRI predicate) { /** - * Cleans up stacks and handles closing of collections, containers, and resource blocks. + * Cleans up parsing context stacks when an XML end element is encountered. + * @param uri the namespace URI of the element + * @param localName the local name of the element */ private void cleanEndElement(String uri, String localName) { if (!ctx.langStack.isEmpty()) ctx.langStack.pop(); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java index 1bcd1cea7..52f897686 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java @@ -70,6 +70,8 @@ public void emitBNodeTriple(Resource subject, IRI predicate, String nodeID) { )); } + + public void emitTriple(Resource subject, IRI predicate, Resource object) { model.add(factory.createStatement(subject, predicate, object)); } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java similarity index 95% rename from src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java rename to src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java index e9442a792..4275f80c2 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java @@ -1,11 +1,11 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; -import org.semarglproject.vocab.rdfa.RDFa; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.InputStream; @@ -14,7 +14,15 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -public class RdfxmlParserTest { +/** + * Unit tests for the RDFXMLParser class. + * These tests verify the parser's ability to correctly parse RDF/XML + * and interact with the Model and ValueFactory, including error handling + * and unescaping of IRIs and literals, and named graphs. + */ +public class RDFXMLParserTest { + private static final Logger logger = LoggerFactory.getLogger(RDFXMLParserTest.class); + private final ValueFactory factory = new CoreseAdaptedValueFactory(); /** @@ -33,6 +41,10 @@ private Model parseRdfXml(String rdfXml) throws Exception { return model; } + /** + * Helper method to print the model. + * @param model + */ /** * Helper method to print the model. * @param model @@ -40,23 +52,29 @@ private Model parseRdfXml(String rdfXml) throws Exception { private void printModel(Model model) { model.stream().forEach(stmt -> { Value obj = stmt.getObject(); + String subjectString = stmt.getSubject().stringValue(); + String predicateString = stmt.getPredicate().stringValue(); + if (obj instanceof Literal literal) { - if (literal.getLanguage().isPresent()) { - System.out.printf("(%s, %s, \"%s\"@%s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel(), - literal.getLanguage().get()); + String label = String.valueOf(literal.getLabel()); + String languageTag = literal.getLanguage().orElse(null); + + if (languageTag != null) { + logger.debug("({}, {}, \"{}\"@{})", + subjectString, + predicateString, + label, + languageTag); } else { - System.out.printf("(%s, %s, \"%s\")%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel()); + logger.debug("({}, {}, \"{}\")", + subjectString, + predicateString, + label); } } else { - System.out.printf("(%s, %s, %s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), + logger.debug("({}, {}, {})", + subjectString, + predicateString, obj.stringValue()); } }); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java index 56bb2cff8..62a5d9671 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitterTest.java @@ -12,6 +12,19 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +/** + * Unit tests for the RDFXMLStatementEmitter class. + * + * This test suite verifies that the emitter correctly adds RDF statements to the provided + * Model based on various RDF/XML constructs including: + * - Plain literals + * - Typed literals + * - Language-tagged literals + * - Resource IRIs + * - Blank nodes + * - RDF types + * - Property attributes + */ public class RDFXMLStatementEmitterTest { private Model model; @@ -25,6 +38,10 @@ public void setUp() { emitter = new RDFXMLStatementEmitter(model, factory); } + /** + * Test emitting a plain literal statement without language or datatype. + * Asserts that the triple is added to the model correctly. + */ @Test public void testEmitLiteral_plain() { Literal literal = factory.createLiteral("hello"); @@ -46,6 +63,10 @@ public void testEmitLiteral_plain() { assertTrue(found, "Expected statement not found in model"); } + /** + * Test emitting a literal with a language tag. + * Verifies that the correct literal is added to the model. + */ @Test public void testEmitLiteral_withLang() { Resource subject = factory.createBNode(); @@ -57,6 +78,10 @@ public void testEmitLiteral_withLang() { assertEquals("bonjour", obj.stringValue()); } + /** + * Test emitting a literal with a datatype IRI. + * Verifies that the correct typed literal is added to the model. + */ @Test public void testEmitLiteral_withDatatype() { Resource subject = factory.createBNode(); @@ -68,6 +93,10 @@ public void testEmitLiteral_withDatatype() { assertEquals("42", obj.stringValue()); } + /** + * Test emitting a rdf:type statement for a subject. + * Verifies that the rdf:type triple is correctly created. + */ @Test public void testEmitType() { Resource subject = factory.createIRI("http://example.org/Alice"); @@ -76,6 +105,9 @@ public void testEmitType() { assertTrue(model.contains(subject, RDF.type.getIRI(), factory.createIRI("http://example.org/Person"))); } + /** + * Test emitting a triple where the object is a resource IRI resolved against a base. + */ @Test public void testEmitResourceTriple() { Resource subject = factory.createIRI("http://example.org/Alice"); @@ -85,6 +117,9 @@ public void testEmitResourceTriple() { assertTrue(model.contains(subject, predicate, factory.createIRI("http://example.org/Bob"))); } + /** + * Test emitting a triple where the object is a blank node identified by nodeID. + */ @Test public void testEmitBNodeTriple() { Resource subject = factory.createIRI("http://example.org/Alice"); @@ -96,6 +131,9 @@ public void testEmitBNodeTriple() { assertTrue(obj.stringValue().contains("_:b123")); } + /** + * Test emitting a generic triple with subject, predicate, and object resources. + */ @Test public void testEmitTriple() { Resource s = factory.createIRI("http://example.org/s"); @@ -107,6 +145,9 @@ public void testEmitTriple() { assertTrue(model.contains(s, p, o)); } + /** + * Test emitting triples from XML attributes. + */ @Test public void testEmitPropertyAttributes() { Resource s = factory.createIRI("http://example.org/thing"); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java index e79654630..89321f2ee 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtilsTest.java @@ -15,22 +15,39 @@ import static org.junit.jupiter.api.Assertions.*; +/** + * Unit tests for the RDFXMLUtils utility class. + * + * This test suite validates the correct behavior of various utility methods + * related to RDF/XML parsing, including QName expansion, datatype resolution, + * subject extraction, IRI resolution, container detection, syntax attribute recognition, + * and RDF collection creation. + */ public class RDFXMLUtilsTest { private final ValueFactory factory = new CoreseAdaptedValueFactory(); + /** + * Tests expansion of QNames into full IRIs using provided namespace and local name. + */ @Test public void testExpandQName() { assertEquals("http://example.org/test", RDFXMLUtils.expandQName("http://example.org/", "test", "ex:test")); assertEquals("ex:test", RDFXMLUtils.expandQName(null, null, "ex:test")); } + /** + * Tests resolution of known and unknown datatype URIs. + */ @Test public void testResolveDatatype() { assertEquals(Optional.of(XSD.STRING), RDFXMLUtils.resolveDatatype(XSD.STRING.getIRI().stringValue())); assertTrue(RDFXMLUtils.resolveDatatype("http://nonexistentdatatype").isEmpty()); } + /** + * Tests subject extraction using the rdf:about attribute. + */ @Test public void testExtractSubjectWithAbout() { AttributesImpl attrs = new AttributesImpl(); @@ -39,6 +56,10 @@ public void testExtractSubjectWithAbout() { assertEquals("http://example.org/subject", subject.stringValue()); } + + /** + * Tests subject extraction using the rdf:nodeID attribute. + */ @Test public void testExtractSubjectWithNodeID() { AttributesImpl attrs = new AttributesImpl(); @@ -47,6 +68,9 @@ public void testExtractSubjectWithNodeID() { assertTrue(subject.stringValue().contains("_:b123")); } + /** + * Tests subject extraction using the rdf:ID attribute with base URI resolution. + */ @Test public void testExtractSubjectWithID() { AttributesImpl attrs = new AttributesImpl(); @@ -55,11 +79,17 @@ public void testExtractSubjectWithID() { assertEquals("http://example.org/id123", subject.stringValue()); } + /** + * Tests resolving a relative IRI against a base URI. + */ @Test public void testResolveAgainstBase() { assertEquals("http://base.org/path", RDFXMLUtils.resolveAgainstBase("path", "http://base.org/")); } + /** + * Tests recognition of RDF/XML syntax attributes. + */ @Test public void testIsSyntaxAttribute() { assertTrue(RDFXMLUtils.isSyntaxAttribute(RDF.type.getNamespace(), "about", "rdf:about")); @@ -67,12 +97,18 @@ public void testIsSyntaxAttribute() { assertFalse(RDFXMLUtils.isSyntaxAttribute("http://example.org/", "type", "ex:type")); } + /** + * Tests detection of RDF container types (Bag, Seq, Alt). + */ @Test public void testIsContainer() { assertTrue(RDFXMLUtils.isContainer("Bag", RDF.type.getNamespace())); assertFalse(RDFXMLUtils.isContainer("notAContainer", "http://example.org/")); } + /** + * Tests creation of an RDF collection using rdf:first, rdf:rest, and rdf:nil. + */ @Test public void testCreateRdfCollection() { Model model = new CoreseModel(); @@ -85,7 +121,4 @@ public void testCreateRdfCollection() { assertTrue(model.contains(null, RDF.first.getIRI(), r1)); assertTrue(model.contains(null, RDF.rest.getIRI(), RDF.nil.getIRI())); } - - - -} +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserTest.java index 13a245645..bdf67298b 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/turtle/ANTLRTurtleParserTest.java @@ -11,6 +11,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +/** + * Unit tests for the ANTLRTurtle class. + * These tests verify the parser's ability to correctly parse Turtle + * and interact with the Model and ValueFactory, including error handling + * and unescaping of IRIs and literals, and named graphs. + */ public class ANTLRTurtleParserTest { private Model parseFromString(String turtleData, String baseURI) throws Exception { Model model = new CoreseModel(); From 249b769659bb5b33e4fb85c7fce54afccf7f1547 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Fri, 1 Aug 2025 17:53:46 +0200 Subject: [PATCH 62/64] comments and code cleaning --- .../impl/io/parser/rdfxml/RDFXMLParser.java | 10 +- .../parser/rdfxml/RDFXMLStatementEmitter.java | 56 +- .../impl/io/parser/rdfxml/RDFXMLUtils.java | 100 ++- .../impl/io/parser/rdfxml/RdfXmlParser.java | 384 ------------ .../impl/io/parser/rdfxml/RdfXmlUtils.java | 117 ---- ...{RdfXmlContext.java => RDFXMLContext.java} | 4 +- .../io/parser/rdfxml/RDFXMLParserTest.java | 247 +------- .../io/parser/rdfxml/RdfxmlParserTest.java | 576 ------------------ 8 files changed, 180 insertions(+), 1314 deletions(-) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java rename src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/{RdfXmlContext.java => RDFXMLContext.java} (96%) delete mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java index a50a4d060..917fa0793 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParser.java @@ -6,7 +6,7 @@ import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; -import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; +import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RDFXMLContext; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; @@ -23,7 +23,7 @@ import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLUtils.*; /** - * SAX-based RDF/XML parser using a shared parsing context ({@link RdfXmlContext}). + * SAX-based RDF/XML parser using a shared parsing context ({@link RDFXMLContext}). * *

This parser processes RDF/XML documents using the SAX streaming API. * It tracks RDF constructs (resources, properties, literals, containers, collections) @@ -43,7 +43,7 @@ public class RDFXMLParser extends AbstractRDFParser { private StringBuilder characters = new StringBuilder(); /** Shared state across SAX callbacks. */ - private RdfXmlContext ctx; + private RDFXMLContext ctx; private final RDFXMLStatementEmitter emitter; @@ -66,7 +66,7 @@ public RDFXMLParser(Model model, ValueFactory factory) { */ public RDFXMLParser(Model model, ValueFactory factory, IOOptions config) { super(model, factory, config); - this.ctx = new RdfXmlContext(getModel(), getValueFactory()); + this.ctx = new RDFXMLContext(getModel(), getValueFactory()); this.emitter = new RDFXMLStatementEmitter(model, factory); } @@ -336,7 +336,7 @@ private boolean processNodeElement(String localName, String uri, String qName, A // Emit rdf:type if typed node if (!isDescription(localName, uri)) { - emitter.emitType(newSubject, expandQName(uri, localName, qName)); + emitter.emitType(newSubject, expandQName(uri, localName, qName)); } // Handle non-syntax attributes diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java index 52f897686..b8e502446 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLStatementEmitter.java @@ -9,16 +9,35 @@ import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLUtils.*; +/** + * Emits RDF statements from parsed RDF/XML constructs using a given RDF Model + * and ValueFactory. + */ public class RDFXMLStatementEmitter { private final Model model; private final ValueFactory factory; + /** + * Constructs a new emitter for the given RDF model and value factory. + * + * @param model the RDF model where statements will be added + * @param factory the RDF value factory used to create RDF terms + */ public RDFXMLStatementEmitter(Model model, ValueFactory factory) { this.model = model; this.factory = factory; } + /** + * Emits a literal statement with optional datatype or language. + * + * @param subject the subject of the statement + * @param predicate the predicate of the statement + * @param text the literal value + * @param datatypeUri the datatype URI (optional, may be null) + * @param lang the language tag (optional, may be null) + */ public void emitLiteral(Resource subject, IRI predicate, String text, String datatypeUri, String lang) { Value literal; if (datatypeUri != null && !datatypeUri.isEmpty()) { @@ -36,10 +55,23 @@ public void emitLiteral(Resource subject, IRI predicate, String text, String dat model.add(factory.createStatement(subject, predicate, literal)); } + + /** + * Emits a rdf:type statement for the given subject and type URI. + * + * @param subject the subject resource + * @param expandedQName the fully expanded IRI for the type + */ public void emitType(Resource subject, String expandedQName) { model.add(factory.createStatement(subject, RDF.type.getIRI(), factory.createIRI(expandedQName))); } + /** + * Emits RDF statements for non-syntax XML attributes as predicate-object pairs. + * + * @param subject the subject resource + * @param attrs the XML attributes associated with the element + */ public void emitPropertyAttributes(Resource subject, Attributes attrs) { for (int i = 0; i < attrs.getLength(); i++) { String attrURI = attrs.getURI(i); @@ -54,6 +86,14 @@ public void emitPropertyAttributes(Resource subject, Attributes attrs) { } } + /** + * Emits a triple where the object is an IRI resolved against the base URI. + * + * @param subject the subject of the triple + * @param predicate the predicate of the triple + * @param resource the relative or absolute IRI string + * @param baseURI the base URI used to resolve the resource + */ public void emitResourceTriple(Resource subject, IRI predicate, String resource, String baseURI) { model.add(factory.createStatement( subject, @@ -62,6 +102,13 @@ public void emitResourceTriple(Resource subject, IRI predicate, String resource, )); } + /** + * Emits a triple where the object is a blank node identified by node ID. + * + * @param subject the subject of the triple + * @param predicate the predicate of the triple + * @param nodeID the blank node identifier + */ public void emitBNodeTriple(Resource subject, IRI predicate, String nodeID) { model.add(factory.createStatement( subject, @@ -70,8 +117,13 @@ public void emitBNodeTriple(Resource subject, IRI predicate, String nodeID) { )); } - - + /** + * Emits a triple with a resource as object. + * + * @param subject the subject of the triple + * @param predicate the predicate of the triple + * @param object the object resource of the triple + */ public void emitTriple(Resource subject, IRI predicate, Resource object) { model.add(factory.createStatement(subject, predicate, object)); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java index d9faf9cb7..e31e09eff 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLUtils.java @@ -9,15 +9,38 @@ import java.util.List; import java.util.Optional; - +/** + * Utility methods for processing RDF/XML constructs. + *

+ * This class provides helpers for handling RDF/XML syntax attributes, + * QName expansion, datatype resolution, subject extraction, and RDF collections. + *

+ */ public class RDFXMLUtils { private RDFXMLUtils() { + // Utility class; no instantiation. } + + /** + * Expands a QName using the given namespace URI and local name. + * + * @param uri the namespace URI + * @param localName the local name + * @param qName the qualified name (used as fallback) + * @return the expanded IRI, or the qName if the URI is null or empty + */ public static String expandQName(String uri, String localName, String qName) { return (uri != null && !uri.isEmpty()) ? uri + localName : qName; } + + /** + * Resolves a datatype URI to a known XSD enum constant. + * + * @param datatypeUri the datatype URI + * @return an Optional containing the matching XSD type if found + */ public static Optional resolveDatatype(String datatypeUri) { for (XSD xsd : XSD.values()) { if (xsd.getIRI().stringValue().equals(datatypeUri)) return Optional.of(xsd); @@ -25,6 +48,15 @@ public static Optional resolveDatatype(String datatypeUri) { return Optional.empty(); } + /** + * Extracts a subject resource from RDF/XML attributes. + * Supports rdf:about, rdf:nodeID, rdf:ID. + * + * @param attrs the XML attributes + * @param factory the value factory + * @param baseURI the base URI for resolving relative IRIs + * @return a Resource representing the subject + */ public static Resource extractSubject(Attributes attrs, ValueFactory factory, String baseURI) { String about = attrs.getValue(RDF.type.getNamespace(), "about"); if (about != null) return factory.createIRI(resolveAgainstBase(about, baseURI)); @@ -39,6 +71,14 @@ public static Resource extractSubject(Attributes attrs, ValueFactory factory, St return factory.createBNode(); } + /** + * Resolves a relative IRI against a base URI. + * + * @param iri the relative or absolute IRI + * @param baseURI the base URI + * @return the resolved IRI + * @throws IncorrectFormatException if URI resolution fails + */ public static String resolveAgainstBase(String iri, String baseURI) { if (iri == null) return null; if (iri.isEmpty()) return baseURI; @@ -52,20 +92,50 @@ public static String resolveAgainstBase(String iri, String baseURI) { } } + /** + * Determines whether the element is a rdf:Description. + * + * @param localName the local name of the element + * @param uri the namespace URI + * @return {@code true} if it's an RDF description element + */ public static boolean isDescription(String localName, String uri) { return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); } + + /** + * Checks if the attributes define a subject node (via about, nodeID, or ID). + * + * @param attrs the attributes to check + * @return true if any node-identifying attribute is present + */ public static boolean isNodeElement(Attributes attrs) { return attrs.getValue(RDF.type.getNamespace(), "about") != null || attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || attrs.getValue(RDF.type.getNamespace(), "ID") != null; } + + /** + * Retrieves the value of rdf:parseType from attributes. + * + * @param attrs the attributes + * @return the parseType value, or null if not present + */ public static String getParseType(Attributes attrs) { return attrs.getValue(RDF.type.getNamespace(), "parseType"); } + + /** + * Determines whether a given attribute is an RDF/XML syntax attribute. + * + * @param uri the namespace URI + * @param localName the local name + * @param qName the qualified name + * @return true if the attribute is considered syntax-related + */ public static boolean isSyntaxAttribute(String uri, String localName, String qName) { if (uri != null && RDF.type.getNamespace().equals(uri)) { return switch (localName) { @@ -76,6 +146,12 @@ public static boolean isSyntaxAttribute(String uri, String localName, String qNa return qName.startsWith("xml:"); } + /** + * Resolves an XSD datatype from a URI. + * + * @param uri the datatype URI + * @return an Optional containing the XSD constant if matched + */ public static Optional fromURI(String uri) { for (XSD xsd : XSD.values()) { if (xsd.getIRI().stringValue().equals(uri)) { @@ -85,15 +161,37 @@ public static Optional fromURI(String uri) { return Optional.empty(); } + /** + * Checks if an element is the top-level rdf:RDF wrapper. + * + * @param uri the namespace URI + * @param localName the local name + * @return true if the element is rdf:RDF + */ public static boolean isRdfRDF(String uri, String localName) { return RDF.type.equals(uri) && "RDF".equals(localName); } + /** + * Determines if an element is a recognized RDF container: Bag, Seq, or Alt. + * + * @param localName the local name + * @param uri the namespace URI + * @return true if the element is a container type + */ public static boolean isContainer(String localName, String uri) { return RDF.type.getNamespace().equals(uri) && ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); } + /** + * Creates a linked RDF collection using rdf:first and rdf:rest. + * + * @param items the list of resource items + * @param model the RDF model to populate + * @param factory the RDF value factory + * @return the head resource of the RDF collection + */ public static Resource createRdfCollection(List items, Model model, ValueFactory factory) { Resource head = factory.createBNode(); Resource current = head; diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java deleted file mode 100644 index ce3a6b523..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlParser.java +++ /dev/null @@ -1,384 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfxml; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.base.io.RDFFormat; -import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; -import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.io.parser.rdfxml.context.RdfXmlContext; -import org.xml.sax.Attributes; -import org.xml.sax.InputSource; -import org.xml.sax.helpers.DefaultHandler; - -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; -import java.io.InputStream; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Optional; - -import static fr.inria.corese.core.next.impl.io.parser.rdfxml.RdfXmlUtils.*; - -/** - * SAX-based RDF/XML parser using a shared parsing context ({@link RdfXmlContext}). - * - *

This parser processes RDF/XML documents using the SAX streaming API. - * It tracks RDF constructs (resources, properties, literals, containers, collections) - * using an explicit stack-based context, and supports features like xml:lang, - * rdf:datatype, rdf:parseType, and property attributes.

- * - *

The parser adds RDF statements to the provided {@link Model} using - * the supplied {@link ValueFactory}. This parser supports nested nodes, - * blank nodes, typed nodes, and RDF collections.

- */ -public class RdfXmlParser extends AbstractRDFParser { - - /** RDF/XML format identifier for this parser. */ - private final RDFFormat format = RDFFormat.RDFXML; - - /** Buffer for accumulating character data between start and end tags. */ - private StringBuilder characters = new StringBuilder(); - - /** Shared state across SAX callbacks. */ - private RdfXmlContext ctx; - - /** - * Creates a new parser with a target RDF model and factory. - * - * @param model the RDF model to populate - * @param factory the RDF value factory for term creation - */ - public RdfXmlParser(Model model, ValueFactory factory) { - this(model, factory, null); - } - - /** - * Creates a new parser with a target RDF model, factory, and configuration options. - * - * @param model the RDF model to populate - * @param factory the RDF value factory for term creation - * @param config optional configuration options for the parser - */ - public RdfXmlParser(Model model, ValueFactory factory, IOOptions config) { - super(model, factory, config); - this.ctx = new RdfXmlContext(getModel(), getValueFactory()); - } - - @Override - public RDFFormat getRDFFormat() { - return format; - } - - @Override - public void parse(InputStream in, String baseURI) { - ctx.baseURI = baseURI; - try { - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - saxParser.parse(in, new RdfXmlSaxHandler()); - } catch (Exception e) { - throw new RuntimeException("Failed to parse RDF/XML input stream", e); - } - } - - @Override - public void parse(Reader reader, String baseURI) { - ctx.baseURI = baseURI; - try { - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - SAXParser saxParser = factory.newSAXParser(); - InputSource inputSource = new InputSource(reader); - saxParser.parse(inputSource, new RdfXmlSaxHandler()); - } catch (Exception e) { - throw new RuntimeException("Failed to parse RDF/XML input stream", e); - } - } - - /** - * Internal SAX handler that delegates to the parser's methods - */ - private class RdfXmlSaxHandler extends DefaultHandler { - - @Override - public void characters(char[] ch, int start, int length) { - RdfXmlParser.this.handleCharacters(ch, start, length); - } - - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) { - RdfXmlParser.this.handleStartElement(uri, localName, qName, attrs); - } - - @Override - public void endElement(String uri, String localName, String qName) { - RdfXmlParser.this.handleEndElement(uri, localName, qName); - } - } - - /** - * Handles character data between XML elements - */ - private void handleCharacters(char[] ch, int start, int length) { - characters.append(ch, start, length); - } - - /** - * Handles opening of an XML element. - * Identifies node elements, container constructs, properties, - * and special parseType attributes, updating the parsing context accordingly. - */ - private void handleStartElement(String uri, String localName, String qName, Attributes attrs) { - // Skip the top-level rdf:RDF wrapper element - if (RdfXmlUtils.isRdfRDF(uri, localName)) return; - - // Reset character buffer - characters.setLength(0); - - // Handle xml:base (change base URI dynamically) - String xmlBase = attrs.getValue("xml:base"); - if (xmlBase != null) { - ctx.baseURI = xmlBase; - } - - // Handle xml:lang - String xmlLang = attrs.getValue("xml:lang"); - if (xmlLang != null) { - ctx.langStack.push(xmlLang); - } - - // Handle rdf:datatype (applies to property literal values) - String datatype = attrs.getValue(RDF.type.getNamespace(), "datatype"); - if (datatype != null) { - ctx.datatypeStack.push(datatype); - } - - // --- RDF Container Element --- - if (RdfXmlUtils.isContainer(localName, uri)) { - Resource subject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); - ctx.subjectStack.push(subject); - ctx.inContainer = true; - ctx.liIndex = 1; - - IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.model.add(ctx.factory.createStatement(subject, RDF.type.getIRI(), typeIRI)); - return; - } - - // --- Container Items (rdf:li, rdf:_n) --- - if (ctx.inContainer && RDF.type.getNamespace().equals(uri)) { - String pred = null; - if ("li".equals(localName)) { - pred = RDF.type.getNamespace() + "_" + ctx.liIndex++; - } else if (localName.matches("_\\d+")) { - pred = RDF.type.getNamespace() + localName; - } - - if (pred != null) { - IRI predicate = ctx.factory.createIRI(pred); - String resource = attrs.getValue("rdf:resource"); - if (resource != null) { - ctx.model.add(ctx.factory.createStatement( - ctx.subjectStack.peek(), - predicate, - ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) - )); - } - return; - } - } - - // --- parseType="Collection" --- - String parseType = attrs.getValue(RDF.type.getNamespace(), "parseType"); - if ("Collection".equals(parseType)) { - IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.predicateStack.push(predicate); - ctx.collectionSubject = ctx.subjectStack.peek(); - ctx.collectionPredicate = predicate; - ctx.collectionBuilder = new ArrayList<>(); - ctx.inCollection = true; - return; - } - - // --- Inside Collection: Collect rdf:Description Items --- - if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { - Resource item = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); - ctx.collectionBuilder.add(item); - ctx.suppressSubject = true; - return; - } - - // --- Node Element: rdf:Description or typed node --- - boolean isNode = RdfXmlUtils.isDescription(localName, uri) - || (ctx.subjectStack.isEmpty() && RdfXmlUtils.isNodeElement(attrs)); - - if (isNode) { - Resource newSubject = RdfXmlUtils.extractSubject(attrs, ctx.factory, ctx.baseURI); - - // If current node is object of a property - if (!ctx.predicateStack.isEmpty() && !ctx.subjectStack.isEmpty()) { - Resource parent = ctx.subjectStack.peek(); - IRI predicate = ctx.predicateStack.pop(); - ctx.model.add(ctx.factory.createStatement(parent, predicate, newSubject)); - } - - ctx.subjectStack.push(newSubject); - - // Emit rdf:type for typed node elements - if (!RdfXmlUtils.isDescription(localName, uri)) { - IRI typeIRI = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.model.add(ctx.factory.createStatement(newSubject, RDF.type.getIRI(), typeIRI)); - } - - // Handle non-RDF attributes as property triples - for (int i = 0; i < attrs.getLength(); i++) { - String attrURI = attrs.getURI(i); - String attrLocal = attrs.getLocalName(i); - String attrQName = attrs.getQName(i); - String value = attrs.getValue(i); - - if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - - IRI pred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); - ctx.model.add(ctx.factory.createStatement(newSubject, pred, ctx.factory.createLiteral(value))); - } - - return; - } - - // --- Property Element (e.g., ) --- - IRI predicate = ctx.factory.createIRI(RdfXmlUtils.expandQName(uri, localName, qName)); - ctx.predicateStack.push(predicate); - - // --- Property Resource/Object reference --- - String resource = attrs.getValue(RDF.type.getNamespace(), "resource"); - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - - if (resource != null || nodeID != null) { - Resource object = resource != null - ? ctx.factory.createIRI(RdfXmlUtils.resolveAgainstBase(resource, ctx.baseURI)) - : ctx.factory.createBNode("_:" + nodeID); - - ctx.model.add(ctx.factory.createStatement( - ctx.subjectStack.peek(), - predicate, - object - )); - - ctx.predicateStack.pop(); // already used - return; - } - - // --- parseType="Resource": create blank node --- - if ("Resource".equals(parseType)) { - Resource parent = ctx.subjectStack.peek(); - Resource bnode = ctx.factory.createBNode(); - ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); - ctx.subjectStack.push(bnode); - return; - } - - // --- Inline property attributes: Create blank node with attributes --- - boolean hasNonSyntaxAttributes = false; - for (int i = 0; i < attrs.getLength(); i++) { - String attrURI = attrs.getURI(i); - String attrLocal = attrs.getLocalName(i); - String attrQName = attrs.getQName(i); - if (!RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) { - hasNonSyntaxAttributes = true; - break; - } - } - - if (hasNonSyntaxAttributes) { - Resource parent = ctx.subjectStack.peek(); - Resource bnode = ctx.factory.createBNode(); - ctx.model.add(ctx.factory.createStatement(parent, predicate, bnode)); - - for (int i = 0; i < attrs.getLength(); i++) { - String attrURI = attrs.getURI(i); - String attrLocal = attrs.getLocalName(i); - String attrQName = attrs.getQName(i); - String value = attrs.getValue(i); - - if (RdfXmlUtils.isSyntaxAttribute(attrURI, attrLocal, attrQName)) continue; - - IRI attrPred = ctx.factory.createIRI(RdfXmlUtils.expandQName(attrURI, attrLocal, attrQName)); - ctx.model.add(ctx.factory.createStatement(bnode, attrPred, ctx.factory.createLiteral(value))); - } - ctx.predicateStack.pop(); // already emitted - } - } - - /** - * Handles the end of an XML element, emitting a literal or cleaning up context stacks. - */ - private void handleEndElement(String uri, String localName, String qName) { - String text = characters.toString().trim(); - characters.setLength(0); - - if (!ctx.predicateStack.isEmpty() && !text.isEmpty()) { - IRI predicate = ctx.predicateStack.pop(); - Resource subject = ctx.subjectStack.peek(); - String datatypeUri = ctx.datatypeStack.isEmpty() ? null : ctx.datatypeStack.pop(); - emitLiteral(subject, predicate, text, datatypeUri); - return; - } - cleanEndElement(uri, localName); - } - - /** - * Emits a literal statement (optionally typed or language-tagged). - */ - private void emitLiteral(Resource subject, IRI predicate, String text, String datatypeUri) { - Value literal; - if (datatypeUri != null && !datatypeUri.isEmpty()) { - Optional known = RdfXmlUtils.resolveDatatype(datatypeUri); - IRI dtype = known.map(XSD::getIRI).orElseGet(() -> { - System.err.printf("[Warning] Unknown datatype: %s%n", datatypeUri); - return ctx.factory.createIRI(datatypeUri); - }); - literal = ctx.factory.createLiteral(text, dtype); - } else { - String lang = ctx.langStack.isEmpty() ? null : ctx.langStack.peek(); - literal = (lang != null && !lang.equals("__NO_LANG__")) - ? ctx.factory.createLiteral(text, lang) - : ctx.factory.createLiteral(text); - } - ctx.model.add(ctx.factory.createStatement(subject, predicate, literal)); - } - - /** - * Cleans up stacks and handles closing of collections, containers, and resource blocks. - */ - private void cleanEndElement(String uri, String localName) { - if (!ctx.langStack.isEmpty()) ctx.langStack.pop(); - if (!ctx.predicateStack.isEmpty()) ctx.predicateStack.pop(); - if (RdfXmlUtils.isContainer(localName, uri)) { - if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); - ctx.inContainer = false; - ctx.liIndex = 1; - return; - } - if (ctx.inCollection && localName.equals(ctx.collectionPredicate.getLocalName())) { - Resource listHead = createRdfCollection(ctx.collectionBuilder, ctx.model, ctx.factory); - ctx.model.add(ctx.factory.createStatement(ctx.collectionSubject, ctx.collectionPredicate, listHead)); - ctx.inCollection = false; - ctx.collectionBuilder.clear(); - return; - } - if (ctx.inCollection && RdfXmlUtils.isDescription(localName, uri)) { - if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); - return; - } - if (RdfXmlUtils.isDescription(localName, uri)) { - if (!ctx.subjectStack.isEmpty()) ctx.subjectStack.pop(); - } - if (!ctx.subjectStack.isEmpty() && !ctx.predicateStack.isEmpty()) { - ctx.predicateStack.pop(); - ctx.subjectStack.pop(); - } - } -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java deleted file mode 100644 index fc52f588a..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfXmlUtils.java +++ /dev/null @@ -1,117 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfxml; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.impl.common.literal.XSD; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import org.xml.sax.*; - -import java.util.List; -import java.util.Optional; - - -public class RdfXmlUtils { - private RdfXmlUtils() { - } - - public static String expandQName(String uri, String localName, String qName) { - return (uri != null && !uri.isEmpty()) ? uri + localName : qName; - } - - public static Optional resolveDatatype(String datatypeUri) { - for (XSD xsd : XSD.values()) { - if (xsd.getIRI().stringValue().equals(datatypeUri)) return Optional.of(xsd); - } - return Optional.empty(); - } - - public static Resource extractSubject(Attributes attrs, ValueFactory factory, String baseURI) { - String about = attrs.getValue(RDF.type.getNamespace(), "about"); - if (about != null) return factory.createIRI(resolveAgainstBase(about, baseURI)); - - String nodeID = attrs.getValue(RDF.type.getNamespace(), "nodeID"); - if (nodeID != null) return factory.createBNode("_:" + nodeID); - - String id = attrs.getValue(RDF.type.getNamespace(), "ID"); - if (id != null) return factory.createIRI(resolveAgainstBase("#" + id, baseURI)); - - // Default to blank node - return factory.createBNode(); - } - - public static String resolveAgainstBase(String iri, String baseURI) { - if (iri == null) return null; - if (baseURI == null || iri.matches("^[a-zA-Z][a-zA-Z0-9+.-]*:.*")) { - // Absolute IRI or no base, return as-is - return iri; - } - - try { - return new java.net.URI(baseURI).resolve(iri).toString(); - } catch (Exception e) { - throw new RuntimeException("Failed to resolve IRI: " + iri + " against base: " + baseURI, e); - } - } - - public static boolean isDescription(String localName, String uri) { - return RDF.type.getNamespace().equals(uri) && "Description".equals(localName); - } - - public static boolean isNodeElement(Attributes attrs) { - return attrs.getValue(RDF.type.getNamespace(), "about") != null || - attrs.getValue(RDF.type.getNamespace(), "nodeID") != null || - attrs.getValue(RDF.type.getNamespace(), "ID") != null; - } - - public static boolean isSyntaxAttribute(String uri, String localName, String qName) { - if (uri != null && RDF.type.getNamespace().equals(uri)) { - return switch (localName) { - case "about", "ID", "nodeID", "resource", "parseType", "datatype" -> true; - default -> false; - }; - } - return qName.startsWith("xml:"); - } - - public static Optional fromURI(String uri) { - for (XSD xsd : XSD.values()) { - if (xsd.getIRI().stringValue().equals(uri)) { - return Optional.of(xsd); - } - } - return Optional.empty(); - } - - public static boolean isRdfRDF(String uri, String localName) { - return RDF.type.equals(uri) && "RDF".equals(localName); - } - - public static boolean isContainer(String localName, String uri) { - return RDF.type.getNamespace().equals(uri) && - ("Seq".equals(localName) || "Bag".equals(localName) || "Alt".equals(localName)); - } - - public static Resource createRdfCollection(List items, Model model, ValueFactory factory) { - Resource head = factory.createBNode(); - Resource current = head; - - for (int i = 0; i < items.size(); i++) { - Resource next = (i < items.size() - 1) - ? factory.createBNode() - : RDF.nil.getIRI(); // rdf:nil - - model.add(factory.createStatement(current, - RDF.first.getIRI(), - items.get(i))); - - model.add(factory.createStatement(current, - RDF.rest.getIRI(), - next)); - - current = next; - } - - return head; - } - - -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RDFXMLContext.java similarity index 96% rename from src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RDFXMLContext.java index 051b24485..4807977d3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RdfXmlContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/context/RDFXMLContext.java @@ -17,7 +17,7 @@ *

This context is typically instantiated once per parsing session and passed * throughout the parsing logic.

*/ -public class RdfXmlContext { +public class RDFXMLContext { /** The RDF model to which parsed triples will be added. */ public Model model; @@ -73,7 +73,7 @@ public class RdfXmlContext { * @param model the RDF model to populate with triples * @param factory the value factory used to create RDF terms */ - public RdfXmlContext(Model model, ValueFactory factory) { + public RDFXMLContext(Model model, ValueFactory factory) { this.model = model; this.factory = factory; } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java index 4275f80c2..f4677cdab 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RDFXMLParserTest.java @@ -1,18 +1,18 @@ package fr.inria.corese.core.next.impl.io.parser.rdfxml; -import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Value; +import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * Unit tests for the RDFXMLParser class. @@ -21,10 +21,6 @@ * and unescaping of IRIs and literals, and named graphs. */ public class RDFXMLParserTest { - private static final Logger logger = LoggerFactory.getLogger(RDFXMLParserTest.class); - - private final ValueFactory factory = new CoreseAdaptedValueFactory(); - /** * Helper method to parse the RDF/XML String * @param rdfXml @@ -41,10 +37,6 @@ private Model parseRdfXml(String rdfXml) throws Exception { return model; } - /** - * Helper method to print the model. - * @param model - */ /** * Helper method to print the model. * @param model @@ -52,29 +44,23 @@ private Model parseRdfXml(String rdfXml) throws Exception { private void printModel(Model model) { model.stream().forEach(stmt -> { Value obj = stmt.getObject(); - String subjectString = stmt.getSubject().stringValue(); - String predicateString = stmt.getPredicate().stringValue(); - if (obj instanceof Literal literal) { - String label = String.valueOf(literal.getLabel()); - String languageTag = literal.getLanguage().orElse(null); - - if (languageTag != null) { - logger.debug("({}, {}, \"{}\"@{})", - subjectString, - predicateString, - label, - languageTag); + if (literal.getLanguage().isPresent()) { + System.out.printf("(%s, %s, \"%s\"@%s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel(), + literal.getLanguage().get()); } else { - logger.debug("({}, {}, \"{}\")", - subjectString, - predicateString, - label); + System.out.printf("(%s, %s, \"%s\")%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), + literal.getLabel()); } } else { - logger.debug("({}, {}, {})", - subjectString, - predicateString, + System.out.printf("(%s, %s, %s)%n", + stmt.getSubject().stringValue(), + stmt.getPredicate().stringValue(), obj.stringValue()); } }); @@ -331,7 +317,7 @@ public void testExample11CompleteExamplerdfDatatype() throws Exception { """.trim(); Model model = parseRdfXml(rdfXml); printModel(model); - assertEquals(1, model.size(), "Expected one RDF statement"); + assertEquals(1, model.size(), "Expected four RDF statements"); } /** @@ -363,7 +349,7 @@ public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { printModel(model); // Assert or inspect the result - assertEquals(4, model.size(), "Expected four RDF statements"); + assertEquals(4, model.size(), "Expected five RDF statements"); } /** @@ -441,7 +427,7 @@ public void testExample15CompleteExampleWithRdfType() throws Exception { Model model = parseRdfXml(rdfXml); printModel(model); - assertEquals(2, model.size(), "Expected two RDF statements"); + assertEquals(2, model.size(), "Expected four RDF statements"); } /** @@ -593,197 +579,4 @@ public void testExample21CompleteExampleOfRdfID() throws Exception { assertEquals(1, model.size(), "Expected one RDF statement"); } - - /** - * Test the resolving uri algorithm, simple exemple - * @throws Exception - */ - @Test - public void testResolvingIRITest0001() throws Exception { - - String rdfXml = """ - - - - - - - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - assertTrue(model.contains( factory.createIRI("http://example.org/dir/file#frag") , factory.createIRI("http://example.org/value"), factory.createLiteral("v") )); - - } - - /** - * Test the resolving uri algorithm, exemple with blank node - * @throws Exception - */ - @Test - public void testResolveIRITest0002() throws Exception { - String rdfXml = """ - - - - - - - - v - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - model.contains(factory.createIRI("http://example.org/value")); - } - - /** - * Test the resolving uri algorithm, exemple with "" - * @throws Exception - */ - @Test - public void testResolveIRITest008() throws Exception { - String rdfXml = - """ - - - - - - - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - model.contains(factory.createIRI("http://example.org/dir/file")); - } - - @Test - public void testResolveIRITest013() throws Exception { - String rdfXml = - """ - - - - - - - - - - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - } - - @Test - public void testResolveIRITest011() throws Exception { - String rdfXml = - """ - - - - - - - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - } } \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java deleted file mode 100644 index 37aeee0a1..000000000 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfxml/RdfxmlParserTest.java +++ /dev/null @@ -1,576 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfxml; - -import fr.inria.corese.core.next.api.Literal; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Value; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; -import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class RdfxmlParserTest { - /** - * Helper method to parse the RDF/XML String - * @param rdfXml - * @return model - * @throws Exception - */ - private Model parseRdfXml(String rdfXml) throws Exception { - Model model = new CoreseModel(); - ValueFactory valueFactory = new CoreseAdaptedValueFactory(); - try (InputStream inputStream = new ByteArrayInputStream(rdfXml.getBytes(StandardCharsets.UTF_8))) { - RdfXmlParser parser = new RdfXmlParser(model, valueFactory); - parser.parse(inputStream); - } - return model; - } - - /** - * Helper method to print the model. - * @param model - */ - private void printModel(Model model) { - model.stream().forEach(stmt -> { - Value obj = stmt.getObject(); - if (obj instanceof Literal literal) { - if (literal.getLanguage().isPresent()) { - System.out.printf("(%s, %s, \"%s\"@%s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel(), - literal.getLanguage().get()); - } else { - System.out.printf("(%s, %s, \"%s\")%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - literal.getLabel()); - } - } else { - System.out.printf("(%s, %s, %s)%n", - stmt.getSubject().stringValue(), - stmt.getPredicate().stringValue(), - obj.stringValue()); - } - }); - } - - - /** - * Test node elements with IRIs - * @throws Exception - */ - @Test - public void testNodeElementsWithIRIs() throws Exception { - String rdfXml = """ - - - - - - - - - - - - - - """; - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(2, model.size(), "Expected two RDF statements"); - - } - - /** - * Test a basic RDF/XML file - * @throws Exception - */ - @Test - public void testBasicRdfParsing() throws Exception { - String rdfXml = """ - - - - John Smith - 2025-07-07 - - - """; - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(2, model.size(), "Expected two RDF statements"); - } - - /** - * Test a RDF/XML file with Complete description of all graph paths - * @throws Exception - */ - @Test - public void testExample3CompleteDescriptionOfAllGraphPaths() throws Exception { - String rdfXml = """ - - - - - - - - - - - - - - - - Dave Beckett - - - - - - RDF 1.2 XML Syntax - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(5, model.size(), "Expected five RDF statements"); - } - - /** - * Test RDF/XML File Using multiple property elements on a node element - * @throws Exception - */ - @Test - public void testExample4UsingMultiplePropertyElements() throws Exception { - String rdfXml = """ - - - - - - - - - - Dave Beckett - - - RDF 1.2 XML Syntax - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected four RDF statements"); - } - - /** - * Test RDF/XML with Empty property elements - * @throws Exception - */ - @Test - public void testExample5EmptyPropertyElements() throws Exception { - String rdfXml = """ - - - - - - - Dave Beckett - - - RDF 1.2 XML Syntax - - - """.trim(); - - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected four RDF statements"); - - } - - /** - * Test a RDF/XML file with Replacing property elements with string literal content into property attributes - * @throws Exception - */ - @Test - public void testExample6ReplacingPropertyElementsWithStringLiteral() throws Exception { - String rdfXml = """ - - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected four RDF statements"); - - } - - /** - * Test a Complete RDF/XML - * @throws Exception - */ - @Test - public void testExample7CompleteRDFXML() throws Exception { - String rdfXml = """ - - - - - - - - - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected four RDF statements"); - } - - /** - * Test a Complete example of xml:lang - * @throws Exception - */ - @Test - public void testExample8CompleteExampleXmlLang() throws Exception { - String rdfXml = """ - - - - - RDF 1.2 XML Syntax - RDF 1.2 XML Syntax - RDF 1.2 XML Syntax - - - - Der Baum - Das Buch ist außergewöhnlich - The Tree - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(6, model.size(), "Expected six RDF statements"); - } - - @Test - public void testExample11CompleteExamplerdfDatatype() throws Exception { - String rdfXml = """ - - - - - 123 - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(1, model.size(), "Expected four RDF statements"); - } - - /** - * Test a Complete RDF/XML file with a description of graph using rdf:nodeID - * @throws Exception - */ - @Test - public void testExample12CompleteRDFXMLUsingRdfNodeID() throws Exception { - - String rdfXml = """ - - - - - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - - // Assert or inspect the result - assertEquals(4, model.size(), "Expected five RDF statements"); - } - - /** - * Test a RDF/XML file with a Complete example using rdf:parseType=Resource - * @throws Exception - */ - @Test - public void testExample13CompleteExampleUsingRdfparseTypeResource() throws Exception { - String rdfXml = """ - - - - - Dave Beckett - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected four RDF statements"); - - } - - /** - * Test a RDF/XML file with a Complete example of property attributes on an empty property element - * @throws Exception - */ - @Test - public void testExample14CompleteExampleOfPorpertyAttributesOnAnEmptyPropertyElement() throws Exception { - - String rdfXml = """ - - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(3, model.size(), "Expected three RDF statements"); - } - - /** - * Test a RDF/XML file with a Complete example with rdf:type - * @throws Exception - */ - @Test - public void testExample15CompleteExampleWithRdfType() throws Exception { - String rdfXml = """ - - - - - - A marvelous thing - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(2, model.size(), "Expected four RDF statements"); - } - - /** - * Test a RDF/XML file with a Complete example using a typed node element to replace an rdf:type - * @throws Exception - */ - @Test - public void testExample16CompleteExampleUsingATypedNodeElementToReplaceAnRdfType() throws Exception { - String rdfXml = """ - - - - - A marvelous thing - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(2, model.size(), "Expected two RDF statements"); - - } - - @Test - /** - * Test a XML/RDF File using rdf:ID and xml:base - */ - public void testExample17CompleteExampleUsingRdfIDAndXmlbase() throws Exception { - String rdfXml = """ - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(1, model.size(), "Expected one RDF statement"); - - } - - /** - * Test a Complex example using RDF list properties - * @throws Exception - */ - @Test - public void testExample18ComplexExampleUsingRdfListProperties() throws Exception { - - String rdfXml = """ - - - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected three RDF statements"); - } - - /** - * Test a Complete example using rdf:li - * @throws Exception - */ - @Test - public void testExample19CompleteExampleUsingRdfliProperties() throws Exception { - - String rdfXml = """ - - - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(4, model.size(), "Expected three RDF statements"); - - } - - /** - * Test a Complete example of a RDF collection - * @throws Exception - */ - @Test - public void testExample20CompleteExampleOfARdfCollectionOfNodes() throws Exception { - String rdfXml = """ - - - - - - - - - - - - - """.trim(); - - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(7, model.size(), "Expected three RDF statements"); - } - - /** - * Test a Complete example of rdf:ID reifying a property element - * @throws Exception - */ - @Test - public void testExample21CompleteExampleOfRdfID() throws Exception { - String rdfXml = """ - - - - blah - - - - """.trim(); - Model model = parseRdfXml(rdfXml); - printModel(model); - assertEquals(1, model.size(), "Expected one RDF statement"); - - } -} \ No newline at end of file From 4206d90c0e9d810efe904abb43610f64512e7ac6 Mon Sep 17 00:00:00 2001 From: pierrerene Date: Tue, 2 Sep 2025 16:22:26 +0200 Subject: [PATCH 63/64] remove duplicated SerializationConstants.java --- .../util/SerializationConstants.java | 110 ------------------ 1 file changed, 110 deletions(-) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java deleted file mode 100644 index dda97db5f..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java +++ /dev/null @@ -1,110 +0,0 @@ -package fr.inria.corese.core.next.impl.io.serialization.util; - -import fr.inria.corese.core.next.impl.common.vocabulary.OWL; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.common.vocabulary.RDFS; -import fr.inria.corese.core.next.impl.common.vocabulary.XSD; - - -/** - * Provides common constants used throughout the RDF serialization process. - * This includes URIs for common RDF, RDFS, XSD, and OWL vocabularies, - * as well as various special characters and strings used in serialization formats - * like Turtle, N-Triples, and N-Quads. - */ -public final class SerializationConstants { - - private SerializationConstants() { - // Private constructor to prevent instantiation - } - - // --- Standard RDF/RDFS/XSD/OWL URIs --- - public static final String RDF_NS = RDF.HTML.getNamespace(); - public static final String RDF_TYPE = RDF.type.getIRI().stringValue(); - public static final String RDF_FIRST = RDF.first.getIRI().stringValue(); - public static final String RDF_REST = RDF.rest.getIRI().stringValue(); - public static final String RDF_NIL = RDF.nil.getIRI().stringValue(); - - - public static final String RDFS_NS = RDFS.Resource.getNamespace(); - - public static final String XSD_NS = XSD.xsdString.getNamespace(); - - public static final String OWL_NS = OWL.NS; - - public static final String XSD_STRING = XSD_NS + "string"; - public static final String XSD_INTEGER = XSD_NS + "integer"; - public static final String XSD_DECIMAL = XSD_NS + "decimal"; - public static final String XSD_DOUBLE = XSD_NS + "double"; - public static final String XSD_BOOLEAN = XSD_NS + "boolean"; - public static final String XSD_DATETIME = XSD_NS + "dateTime"; - - // Nouveau namespace FOAF - public static final String FOAF_NS = "http://xmlns.com/foaf/0.1/"; - - - // --- Common Delimiters and Special Characters in Serialization --- - public static final String SPACE = " "; - public static final String TAB = "\t"; - public static final String LINE_FEED = "\n"; - public static final String CARRIAGE_RETURN = "\r"; - public static final String NEWLINE = LINE_FEED; - - public static final String POINT = "."; - public static final String SEMICOLON = ";"; - public static final String COMMA = ","; - public static final String AT_SIGN = "@"; - public static final String CARET = "^"; - public static final String LT = "<"; // Less than - public static final String GT = ">"; // Greater than - public static final String QUOTE = "\""; - public static final String COLON = ":"; - public static final String BACK_SLASH = "\\"; - - // Nouveaux délimiteurs - public static final String HASH = "#"; - public static final String SLASH = "/"; - - - // Turtle-specific - public static final String RDF_TYPE_SHORTCUT = "a"; - public static final String BNODE_PREFIX = "_:"; - public static final String DATATYPE_SEPARATOR = "^^"; - public static final String BLANK_NODE_START = "["; - public static final String BLANK_NODE_END = "]"; - - public static final String OPEN_PARENTHESIS = "("; - public static final String CLOSE_PARENTHESIS = ")"; - - // --- Default Values for Configuration --- - public static final String DEFAULT_INDENTATION = " "; // Two spaces - public static final String DEFAULT_LINE_ENDING = "\n"; // Unix-style - - public static final String EMPTY_STRING = ""; - - // TriG-specific - public static final String OPEN_BRACE = "{"; - public static final String CLOSE_BRACE = "}"; - - // XML-specific constants - public static final String XML_DECLARATION_START = ""; - public static final String RDF_ROOT_START = " Date: Tue, 2 Sep 2025 17:08:51 +0200 Subject: [PATCH 64/64] undo SerializationConstants.java deletation and delete the right class --- .../util/SerializationConstants.java | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) rename src/main/java/fr/inria/corese/core/next/impl/{common/serializer => io/serialization}/util/SerializationConstants.java (79%) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java similarity index 79% rename from src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java rename to src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java index 58c0090c0..dda97db5f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/serializer/util/SerializationConstants.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java @@ -1,4 +1,10 @@ -package fr.inria.corese.core.next.impl.common.serializer.util; +package fr.inria.corese.core.next.impl.io.serialization.util; + +import fr.inria.corese.core.next.impl.common.vocabulary.OWL; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.vocabulary.RDFS; +import fr.inria.corese.core.next.impl.common.vocabulary.XSD; + /** * Provides common constants used throughout the RDF serialization process. @@ -13,15 +19,18 @@ private SerializationConstants() { } // --- Standard RDF/RDFS/XSD/OWL URIs --- - public static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; - public static final String RDFS_NS = "http://www.w3.org/2000/01/rdf-schema#"; - public static final String XSD_NS = "http://www.w3.org/2001/XMLSchema#"; - public static final String OWL_NS = "http://www.w3.org/2002/07/owl#"; + public static final String RDF_NS = RDF.HTML.getNamespace(); + public static final String RDF_TYPE = RDF.type.getIRI().stringValue(); + public static final String RDF_FIRST = RDF.first.getIRI().stringValue(); + public static final String RDF_REST = RDF.rest.getIRI().stringValue(); + public static final String RDF_NIL = RDF.nil.getIRI().stringValue(); + - public static final String RDF_TYPE = RDF_NS + "type"; - public static final String RDF_FIRST = RDF_NS + "first"; - public static final String RDF_REST = RDF_NS + "rest"; - public static final String RDF_NIL = RDF_NS + "nil"; + public static final String RDFS_NS = RDFS.Resource.getNamespace(); + + public static final String XSD_NS = XSD.xsdString.getNamespace(); + + public static final String OWL_NS = OWL.NS; public static final String XSD_STRING = XSD_NS + "string"; public static final String XSD_INTEGER = XSD_NS + "integer"; @@ -96,4 +105,6 @@ private SerializationConstants() { public static final String QUOT_ENTITY = """; public static final String APOS_ENTITY = "'"; -} \ No newline at end of file + public static final String DEFAULT_GRAPH_IRI = "http://ns.inria.fr/corese/default-graph"; + +}