From effe53894f958b7316b134c94bff9404b39c2ef5 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 28 Nov 2025 10:13:24 +0100 Subject: [PATCH 01/13] RDFa vocabulary --- .../next/impl/common/vocabulary/RDFa.java | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/common/vocabulary/RDFa.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/vocabulary/RDFa.java b/src/main/java/fr/inria/corese/core/next/impl/common/vocabulary/RDFa.java new file mode 100644 index 000000000..a25b74f26 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/vocabulary/RDFa.java @@ -0,0 +1,63 @@ +package fr.inria.corese.core.next.impl.common.vocabulary; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.impl.common.BasicIRI; +import fr.inria.corese.core.next.impl.exception.IncorrectFormatException; + +public enum RDFa implements Vocabulary { + + PGClass("PGClass"), + Pattern("Pattern"), + PrefixOrTermMapping("PrefixOrTermMapping"), + DocumentError("DocumentError"), + Info("Info"), + PrefixRedefinition("PrefixRedefinition"), + UnresolvedCURIE("UnresolvedCURIE"), + UnresolvedTerm("UnresolvedTerm"), + VocabReferenceError("VocabReferenceError"), + context("context"), + copy("copy"), + prefix("prefix"), + term("term"), + uri("uri"), + usesVocabulary("usesVocabulary"), + vocabulary("vocabulary"), + Error("Error"), + PrefixMapping("PrefixMapping"), + TermMapping("TermMapping"), + Warning("Warning"); + + private final IRI iri; + + /** + * Constructor for the RDFa vocabulary enum. + * + * @param localName the local name of the IRI + * @throws IncorrectFormatException if the namespace and the local name do not form a correct IRI + */ + RDFa(String localName) { + this.iri = new BasicIRI(getNamespace(), localName); + } + @Override + public IRI getIRI() { + return this.iri; + } + + @Override + public String getNamespace() { + return getVocabularyNamespace(); + } + + @Override + public String getPreferredPrefix() { + return getVocabularyPreferredPrefix(); + } + + public static String getVocabularyNamespace() { + return "http://www.w3.org/ns/rdfa#"; + } + + public static String getVocabularyPreferredPrefix() { + return "rdfa"; + } +} From 6aa3c6fe0dd07f98a355d5803f39f15fc5ba3cba Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 4 Dec 2025 10:20:53 +0100 Subject: [PATCH 02/13] RDF11 processing first pass --- .../inria/corese/core/next/api/Namespace.java | 2 +- .../core/next/api/base/io/RDFFormat.java | 19 +- .../next/impl/io/parser/ParserFactory.java | 10 +- .../io/parser/rdfa/AbstractRDFaParser.java | 80 +++ .../{RDFaParser.java => RDFa10Parser.java} | 209 +++---- .../impl/io/parser/rdfa/RDFa11Parser.java | 559 ++++++++++++++++++ .../io/parser/rdfa/RDFaEvaluationContext.java | 176 ------ .../model/AbstractRDFaEvaluationContext.java | 160 +++++ .../rdfa/model/RDFa10EvaluationContext.java | 51 ++ .../rdfa/model/RDFa11EvaluationContext.java | 101 ++++ .../io/parser/rdfa/model/RDFaAttributes.java | 29 + .../rdfa/model/RDFaEvaluationContext.java | 38 ++ .../rdfa/model/RDFaIncompleteStatement.java | 3 +- .../rdfa/model/RDFaInitialPrefixes.java | 121 ++++ .../core/next/api/base/io/RDFFormatTest.java | 2 +- ...aParserTest.java => RDFa10ParserTest.java} | 16 +- .../impl/io/parser/rdfa/RDFa11ParserTest.java | 27 + 17 files changed, 1266 insertions(+), 337 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java rename src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/{RDFaParser.java => RDFa10Parser.java} (59%) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java rename src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/{RDFaParserTest.java => RDFa10ParserTest.java} (96%) create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/api/Namespace.java b/src/main/java/fr/inria/corese/core/next/api/Namespace.java index 76a7f3a52..42c5bde5c 100644 --- a/src/main/java/fr/inria/corese/core/next/api/Namespace.java +++ b/src/main/java/fr/inria/corese/core/next/api/Namespace.java @@ -5,7 +5,7 @@ /** * Represents a namespace with a prefix and the start of an IRI as its name. */ -public interface Namespace extends Serializable, Comparable { +public interface Namespace extends Serializable { /** * @return The prefix of the namespace. diff --git a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java index 14970afa1..f2b8ab814 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java @@ -47,7 +47,7 @@ public class RDFFormat extends FileFormat { public static final RDFFormat RDFXML = new RDFFormat( "RDF/XML", List.of("rdf", "xml"), - List.of("application/rdf+xml"), + List.of("application/rdf+xml", "application/xml", "text/xml"), true, false); @@ -65,10 +65,17 @@ public class RDFFormat extends FileFormat { false, true); - public static final RDFFormat RDFa = new RDFFormat( - "RDFa", - List.of("html", "xhtml"), - List.of("text/html", "application/xhtml+xml"), + public static final RDFFormat RDFa_1_0 = new RDFFormat( + "RDFa 1.0", + List.of("html"), + List.of("text/html"), + true, + false); + + public static final RDFFormat RDFa_1_1 = new RDFFormat( + "RDFa 1.1", + List.of("xhtml", "svg", "xml"), + List.of("application/xhtml+xml", "image/svg+xml", "application/xml", "text/xml"), true, false); @@ -158,7 +165,7 @@ public static Optional byMimeType(String mimeType) { * @return An unmodifiable List of all RdfFormat constants. */ public static List all() { - return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG, RDFC_1_0, RDFa); + return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG, RDFC_1_0, RDFa_1_0, RDFa_1_1); } @Override diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 29f5b11fb..7812e44d3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,7 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.NQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.NTriplesParser; -import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFaParser; +import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFa10Parser; import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser; import fr.inria.corese.core.next.impl.io.parser.turtle.TurtleParser; import fr.inria.corese.core.next.impl.io.parser.trig.TriGParser; @@ -53,8 +53,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new TriGParser(model, factory, config); } else if(format == RDFFormat.RDFC_1_0) { return new NQuadsParser(model, factory, config); - } else if (format == RDFFormat.RDFa) { - return new RDFaParser(model, factory, config); + } else if (format == RDFFormat.RDFa_1_0) { + return new RDFa10Parser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -80,8 +80,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new RDFXMLParser(model, factory); } else if (format == RDFFormat.TRIG) { return new TriGParser(model, factory); - } else if (format == RDFFormat.RDFa) { - return new RDFaParser(model, factory); + } else if (format == RDFFormat.RDFa_1_0) { + return new RDFa10Parser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java new file mode 100644 index 000000000..d9fee9f6f --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java @@ -0,0 +1,80 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.impl.common.util.IRIUtils; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaEvaluationContext; + +import java.util.Optional; + +public abstract class AbstractRDFaParser extends AbstractRDFParser { + + protected RDFaEvaluationContext currentContext; + + protected AbstractRDFaParser(Model model, ValueFactory factory) { + super(model, factory); + } + + protected AbstractRDFaParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + } + + /** + * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI + * + * @param stringResource the resource as stored in the attribute of the HTML element + * @param context the context of the element evalation + * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise + */ + protected Optional resolveStringResource(String stringResource, RDFaEvaluationContext context) { + String resultString = stringResource; + if (resultString.startsWith("[") && resultString.endsWith("]")) { + resultString = resultString.replaceFirst("\\[", ""); + resultString = resultString.replaceFirst("]", ""); + } + + + if (stringUriIsCURIE(resultString)) { // CURIE + int colonIndex = resultString.indexOf(":"); + String prefixString = resultString.substring(0, colonIndex); + String localNameString = resultString.substring(colonIndex + 1); + // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri + if (context.hasIriMapping(prefixString)) { + IRI namespaceIRI = context.getIriMapping(prefixString); + + return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); + } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI + return Optional.of(this.getValueFactory().createIRI(context.getBaseIri().stringValue(), localNameString)); + } else { + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); + } + } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI + return Optional.of(this.getValueFactory().createIRI(resultString)); + + } else if (resultString.startsWith("_:")) { // Blank Node + int colonIndex = resultString.indexOf(":"); + String localNameString = resultString.substring(colonIndex + 1); + return Optional.of(this.getValueFactory().createBNode(localNameString)); + } else if (IRIUtils.isStandardIRI(context.getBaseIri().stringValue() + resultString)) { + String concatenatedRelativeUri = context.getBaseIri().stringValue() + resultString; + return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); + } + return Optional.empty(); + } + + /** + * Equivalent to test if it has a colon, and it is not a blank node + * + * @param stringIri + * @return + */ + protected boolean stringUriIsCURIE(String stringIri) { + int colonIndex = stringIri.indexOf(":"); + return colonIndex > -1 && !stringIri.contains("://") && !stringIri.startsWith("_:") && !stringIri.startsWith("[_:"); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java similarity index 59% rename from src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java index c601825e5..689c15ba9 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java @@ -9,15 +9,15 @@ import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.io.common.IOConstants; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaAttributes; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaEvaluationContext; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; -import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; import org.apache.commons.io.input.ReaderInputStream; import org.jsoup.Jsoup; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.InputStream; import java.io.InputStreamReader; @@ -28,35 +28,23 @@ /** * RDFa parser. This parser will load the RDF data stored as RDFa in an HTML page. Its inner implementation is based on the jsoup library. It loads the html page as DOM and process it following the recommended algorithm in the RDFa recommendation. */ -public class RDFaParser extends AbstractRDFParser { +public class RDFa10Parser extends AbstractRDFaParser { private static final String BASE_TAG = "base"; - private static final String REL_ATTR = "rel"; - private static final String REV_ATTR = "rev"; - private static final String CONTENT_ATTR = "content"; - private static final String HREF_ATTR = "href"; - private static final String SRC_ATTR = "src"; - private static final String ABOUT_ATTR = "about"; - private static final String PROPERTY_ATTR = "property"; - private static final String RESOURCE_ATTR = "resource"; - private static final String DATATYPE_ATTR = "datatype"; - private static final String TYPEOF_ATTR = "typeof"; - private static final String LANG_ATTR = "xml:lang"; - private static final String XMLNS_PREFIX = "xmlns"; - public RDFaParser(Model model, ValueFactory factory) { + public RDFa10Parser(Model model, ValueFactory factory) { this(model, factory, new RDFaParserOptions.Builder().build()); } - public RDFaParser(Model model, ValueFactory factory, IOOptions config) { + public RDFa10Parser(Model model, ValueFactory factory, IOOptions config) { super(model, factory, config); } @Override public RDFFormat getRDFFormat() { - return RDFFormat.RDFa; + return RDFFormat.RDFa_1_0; } @Override @@ -96,7 +84,7 @@ private void processDocument(Document document, IRI baseIri) { Iterator baseElementIterator = document.stream().filter(element -> element.nameIs(BASE_TAG)).iterator(); while (baseElementIterator.hasNext()) { Element baseElement = baseElementIterator.next(); - Attribute baseElementHrefAttribute = baseElement.attribute(HREF_ATTR); + Attribute baseElementHrefAttribute = baseElement.attribute(RDFaAttributes.HREF.getName()); if (baseElementHrefAttribute != null) { String baseIriString = baseElementHrefAttribute.getValue(); baseIriFromXml = getValueFactory().createIRI(baseIriString); @@ -107,7 +95,7 @@ private void processDocument(Document document, IRI baseIri) { } for (Element element : document.children()) { - processElement(element, new RDFaEvaluationContext(baseIri)); + processElement(element, new RDFa10EvaluationContext(baseIri)); } } @@ -118,96 +106,93 @@ private void processDocument(Document document, IRI baseIri) { * @param skipElement Flag thet indicates whether the [current element] can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. * @see RDFa processing in details */ - private void processElement(Element element, RDFaEvaluationContext context, boolean recursive, boolean skipElement) { + private void processElement(Element element, fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext context, boolean recursive, boolean skipElement) { // 1. First, the local values are initialized Resource newSubject = null; Resource currentObject = null; Literal currentObjectLiteral = null; - Map currentMappings = context.uriMappings(); + Map currentMappings = context.getIriMappings(); Set incompleteStatementSet = new HashSet<>(); String language = context.getLanguage(); // 2. Next the [current element] is parsed for [URI mapping]s and these are added to the [local list of URI mappings]. Note that a [URI mapping] will simply overwrite any current mapping in the list that has the same name; // Looking for namespace declarations // Namespace declaration are done using the XML namespace declaration mechanism, that can be seen as an attributes prefixed by "xmlns" and looks like this: "xmlns:prefix=namespace" - Iterator itAttribute = element.attributes().iterator(); - while(itAttribute.hasNext()) { - Attribute attribute = itAttribute.next(); + for (Attribute attribute : element.attributes()) { if (attribute.getKey().startsWith(XMLNS_PREFIX)) { String prefixName = attribute.localName(); IRI prefixNamespace = getValueFactory().createIRI(attribute.getValue(), ""); - context.addUriMapping(prefixName, prefixNamespace); + context.addIriMapping(prefixName, prefixNamespace); } } // 3. The [current element] is also parsed for any language information, and if present, [current language] is set accordingly; - if (element.attribute(LANG_ATTR) != null) { - String langString = element.attr(LANG_ATTR); - language = langString; + if (element.attribute(RDFaAttributes.LANG.getName()) != null) { + language = element.attr(RDFaAttributes.LANG.getName()); } // 4. If the [current element] contains no @rel or @rev attribute, then the next step is to establish a value for [new subject]. Any of the attributes that can carry a resource can set [new subject]; - if(element.attribute(REL_ATTR) == null && element.attribute(REV_ATTR) == null) { + if(element.attribute(RDFaAttributes.REL.getName()) == null && element.attribute(RDFaAttributes.REV.getName()) == null) { // [new subject] is set to the URI obtained from the first match from the following rules: - if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; - Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); + if (element.attribute(RDFaAttributes.ABOUT.getName()) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.ABOUT.getName(), context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); } - } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. - Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); + } else if (element.attribute(RDFaAttributes.SRC.getName()) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.SRC.getName(), context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); } - } else if (element.attribute(RESOURCE_ATTR) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; - Optional newSubjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + } else if (element.attribute(RDFaAttributes.RESOURCE.getName()) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.RESOURCE.getName(), context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); } - } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. - Optional newSubjectResource = getResourceFromElementAttribute(element, HREF_ATTR, context); + } else if (element.attribute(RDFaAttributes.HREF.getName()) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.HREF.getName(), context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); } } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; - newSubject = context.baseIri(); - } else if (element.attribute(TYPEOF_ATTR) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. + newSubject = context.getBaseIri(); + } else if (element.attribute(RDFaAttributes.TYPEOF.getName()) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. newSubject = this.getValueFactory().createBNode(); - } else if (context.parentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to the value of [parent object]. Additionally, if @property is not present then the [skip element] flag is set to 'true'; - newSubject = context.parentObjectResource(); - if(element.attribute(PROPERTY_ATTR) == null) { + } else if (context.getParentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to the value of [parent object]. Additionally, if @property is not present then the [skip element] flag is set to 'true'; + newSubject = context.getParentObjectResource(); + if(element.attribute(RDFaAttributes.PROPERTY.getName()) == null) { skipElement = true; } } } else { // [new subject] is set to the URI obtained from the first match from the following rules: - if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; - Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); + if (element.attribute(RDFaAttributes.ABOUT.getName()) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.ABOUT.getName(), context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); } - } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. - Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); + } else if (element.attribute(RDFaAttributes.SRC.getName()) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.SRC.getName(), context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); } } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; - newSubject = context.baseIri(); - } else if (element.attribute(TYPEOF_ATTR) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. + newSubject = context.getBaseIri(); + } else if (element.attribute(RDFaAttributes.TYPEOF.getName()) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. newSubject = this.getValueFactory().createBNode(); - } else if(context.parentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to that. - newSubject = context.parentObjectResource(); + } else if(context.getParentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to that. + newSubject = context.getParentObjectResource(); } // Then the [current object resource] is set to the URI obtained from the first match from the following rules: - if (element.attribute(RESOURCE_ATTR) != null) { // by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; - Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (element.attribute(RDFaAttributes.RESOURCE.getName()) != null) { // by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + Optional newObjectResource = getResourceFromElementAttribute(element, RDFaAttributes.RESOURCE.getName(), context); if (newObjectResource.isPresent()) { currentObject = newObjectResource.get(); } - } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. - Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + } else if (element.attribute(RDFaAttributes.HREF.getName()) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. + Optional newObjectResource = getResourceFromElementAttribute(element, RDFaAttributes.RESOURCE.getName(), context); if (newObjectResource.isPresent()) { currentObject = newObjectResource.get(); } @@ -216,28 +201,28 @@ private void processElement(Element element, RDFaEvaluationContext context, bool // 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; if(newSubject != null) { - if(element.attribute(TYPEOF_ATTR) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: - Optional typeIri = getResourceFromElementAttribute(element, TYPEOF_ATTR, context); + if(element.attribute(RDFaAttributes.TYPEOF.getName()) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: + Optional typeIri = getResourceFromElementAttribute(element, RDFaAttributes.TYPEOF.getName(), context); if (typeIri.isPresent()) { Statement stat = this.getValueFactory().createStatement(newSubject, RDF.type.getIRI(), typeIri.get()); this.getModel().add(stat); } else { - throw new ParsingErrorException("Typeof statement uses unknown type " + element.attr(TYPEOF_ATTR)); + throw new ParsingErrorException("Typeof statement uses unknown type " + element.attr(RDFaAttributes.TYPEOF.getName())); } } } // 7. If in any of the previous steps a [current object resource] was set to a non-null value, it is now used to generate triples: - if (currentObject != null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { - if(element.attribute(REL_ATTR) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if (currentObject != null && (element.attribute(RDFaAttributes.REL.getName()) != null || element.attribute(RDFaAttributes.REV.getName()) != null)) { + if(element.attribute(RDFaAttributes.REL.getName()) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { IRI property = (IRI) propertyOpt.get(); this.getModel().add(newSubject, property, currentObject); } } - if(element.attribute(REV_ATTR) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(element.attribute(RDFaAttributes.REV.getName()) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { IRI property = (IRI) propertyOpt.get(); this.getModel().add(currentObject, property, newSubject); @@ -246,18 +231,18 @@ private void processElement(Element element, RDFaEvaluationContext context, bool } // 8. If however [current object resource] was set to null, but there are predicates present, then they must be stored as [incomplete triple]s, pending the discovery of a subject that can be used as the object. Also, [current object resource] should be set to a newly created [bnode]; - if (currentObject == null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { + if (currentObject == null && (element.attribute(RDFaAttributes.REL.getName()) != null || element.attribute(RDFaAttributes.REV.getName()) != null)) { currentObject = getValueFactory().createBNode(); - if(element.attribute(REL_ATTR) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(element.attribute(RDFaAttributes.REL.getName()) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { IRI property = (IRI) propertyOpt.get(); RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); incompleteStatementSet.add(statement); } } - if(element.attribute(REV_ATTR) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(element.attribute(RDFaAttributes.REV.getName()) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { IRI property = (IRI) propertyOpt.get(); RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); @@ -268,21 +253,21 @@ private void processElement(Element element, RDFaEvaluationContext context, bool } // 9. The next step of the iteration is to establish any [current object literal]; - if(element.attribute(PROPERTY_ATTR) != null) { // Predicates for the [current object literal] can be set by using @property. If present, one or more URIs are obtained according to the section on CURIE and URI Processing, and then the actual literal value is obtained as follows: - Optional propertyOpt = getResourceFromElementAttribute(element, PROPERTY_ATTR, context); + if(element.attribute(RDFaAttributes.PROPERTY.getName()) != null) { // Predicates for the [current object literal] can be set by using @property. If present, one or more URIs are obtained according to the section on CURIE and URI Processing, and then the actual literal value is obtained as follows: + Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.PROPERTY.getName(), context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { IRI property = (IRI)propertyOpt.get(); IRI datatype = null; - if(element.attribute(DATATYPE_ATTR) != null && ! element.attr(DATATYPE_ATTR).isEmpty()) { - Optional datatypeOpt = getResourceFromElementAttribute(element, DATATYPE_ATTR, context); + if(element.attribute(RDFaAttributes.DATATYPE.getName()) != null && ! element.attr(RDFaAttributes.DATATYPE.getName()).isEmpty()) { + Optional datatypeOpt = getResourceFromElementAttribute(element, RDFaAttributes.DATATYPE.getName(), context); if(datatypeOpt.isPresent() && datatypeOpt.get().isIRI() && ! datatypeOpt.get().equals(RDF.XMLLiteral.getIRI())) { datatype = (IRI) datatypeOpt.get(); } } String value = element.text(); - if(element.attribute(CONTENT_ATTR) != null) { - value = element.attr(CONTENT_ATTR); + if(element.attribute(RDFaAttributes.CONTENT.getName()) != null) { + value = element.attr(RDFaAttributes.CONTENT.getName()); } if(datatype != null) { currentObjectLiteral = this.getValueFactory().createLiteral(value, datatype); @@ -301,29 +286,29 @@ private void processElement(Element element, RDFaEvaluationContext context, bool while(itStat.hasNext()) { RDFaIncompleteStatement statement = itStat.next(); if(statement.isForward()) { - this.getModel().add(context.parentSubjectResource(), statement.getPredicate(), newSubject); + this.getModel().add(context.getParentSubjectResource(), statement.getPredicate(), newSubject); } else if (statement.isBackward()){ - this.getModel().add(newSubject, statement.getPredicate(), context.parentSubjectResource()); + this.getModel().add(newSubject, statement.getPredicate(), context.getParentSubjectResource()); } } // 11. If the [recurse] flag is 'true', all elements that are children of the [current element] are processed using the rules described here, using a new [evaluation context], if(recursive) { if(skipElement) { - RDFaEvaluationContext newContext = new RDFaEvaluationContext(context); + RDFa10EvaluationContext newContext = new fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext(context); newContext.setLanguage(language); - newContext.uriMappings(currentMappings); + newContext.setIriMappings(currentMappings); context = newContext; } else { - context = new RDFaEvaluationContext(context.baseIri()); + context = new RDFa10EvaluationContext(context.getBaseIri()); if(newSubject != null) { - context.parentObjectResource(newSubject); + context.setParentObjectResource(newSubject); } if(currentObject != null) { - context.parentObjectResource(currentObject); + context.setParentObjectResource(currentObject); } - context.uriMappings(currentMappings); - context.incompleteStatements(incompleteStatementSet); + context.setIriMappings(currentMappings); + context.setIncompleteStatements(incompleteStatementSet); context.setLanguage(language); } @@ -339,7 +324,7 @@ private void processElement(Element element, RDFaEvaluationContext context, bool * @param element HTML element * @param context current evaluation context */ - private void processElement(Element element, RDFaEvaluationContext context) { + private void processElement(Element element, fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext context) { processElement(element, context, true, false); } @@ -349,60 +334,6 @@ public void parse(Reader reader, String baseURI) { parse(inputStream , baseURI); } - /** - * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI - * - * @param stringResource the resource as stored in the attribute of the HTML element - * @param context the context of the element evalation - * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise - */ - private Optional resolveStringResource(String stringResource, RDFaEvaluationContext context) { - String resultString = stringResource; - if (resultString.startsWith("[") && resultString.endsWith("]")) { - resultString = resultString.replaceFirst("\\[", ""); - resultString = resultString.replaceFirst("]", ""); - } - - - if (stringUriIsCURIE(resultString)) { // CURIE - int colonIndex = resultString.indexOf(":"); - String prefixString = resultString.substring(0, colonIndex); - String localNameString = resultString.substring(colonIndex + 1); - // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri - if (context.hasUriMapping(prefixString)) { - IRI namespaceIRI = context.uriMapping(prefixString); - - return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); - } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI - return Optional.of(this.getValueFactory().createIRI(context.baseIri().stringValue(), localNameString)); - } else { - throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); - } - } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI - return Optional.of(this.getValueFactory().createIRI(resultString)); - - } else if (resultString.startsWith("_:")) { // Blank Node - int colonIndex = resultString.indexOf(":"); - String localNameString = resultString.substring(colonIndex + 1); - return Optional.of(this.getValueFactory().createBNode(localNameString)); - } else if (IRIUtils.isStandardIRI(context.baseIri().stringValue() + resultString)) { - String concatenatedRelativeUri = context.baseIri().stringValue() + resultString; - return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); - } - return Optional.empty(); - } - - /** - * Equivalent to test if it has a colon, and it is not a blank node - * - * @param stringIri - * @return - */ - private boolean stringUriIsCURIE(String stringIri) { - int colonIndex = stringIri.indexOf(":"); - return colonIndex > -1 && !stringIri.contains("://") && !stringIri.startsWith("_:") && !stringIri.startsWith("[_:"); - } - private Optional getResourceFromElementAttribute(Element element, String attributeName, RDFaEvaluationContext context) { if (element.attribute(attributeName) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; String newSubjectString = element.attr(attributeName); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java new file mode 100644 index 000000000..7ab3e26c6 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java @@ -0,0 +1,559 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.vocabulary.RDFS; +import fr.inria.corese.core.next.impl.common.vocabulary.RDFa; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa11EvaluationContext; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaAttributes; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaInitialPrefixes; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.util.*; + +/** + * SAX-based RDFa 1.1 parser. + * + *

This parser processes XML+RDFa documents (XHTMl, SVG, etc.) using the SAX streaming API. + * It follows the W3C recommendation, using buffers to replace DOM traversal.

+ * + *

+ * This parser does NOT support vocabulary expansion + *

+ */ +public class RDFa11Parser extends AbstractRDFaParser { + + private static final Logger logger = LoggerFactory.getLogger(RDFa11Parser.class); + + private static final String BASE_TAG = "base"; + private static final String XMLNS_PREFIX = "xmlns"; + + private RDFa11EvaluationContext currentContext = null; + + /** + * Buffer for accumulating character data between start and end tags. + */ + private StringBuilder characters = new StringBuilder(); + + // Local context + private boolean skipElement = false; + private Resource newSubject = null; + private Resource currentObjectResource = null; + private Resource typedResource = null; + private Map localIRIMappings = null; + private Set localIncompleteStatements = null; + private Map> localListMappings = null; + private String currentLanguage = null; + private Map localTermMappings = null; + private String localDefaultVocabulary = null; + + private boolean isRootElement = true; + private Attributes currentElementAttributes = null; + + private Model parsingModel = new CoreseModel(); + + public RDFa11Parser(Model model, ValueFactory factory) { + this(model, factory, new RDFaParserOptions.Builder().build()); + } + + public RDFa11Parser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + } + + + @Override + public void parse(InputStream in) { + if (getConfig() instanceof BaseIRIOptions baseIRIOptions) { + String baseIRI = baseIRIOptions.getBaseIRI(); + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseIRI); + } else { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); + } + } + + @Override + public void parse(InputStream in, String baseURIString) { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURIString); + } + + @Override + public RDFFormat getRDFFormat() { + return RDFFormat.RDFa_1_1; + } + + @Override + public void parse(Reader reader, String baseURI) { + try { + this.currentContext = new RDFa11EvaluationContext(getValueFactory().createIRI(baseURI)); + this.currentContext.setParentSubjectResource(this.currentContext.getBaseIri()); + this.currentContext.setParentObjectResource(null); + this.currentContext.setLanguage(null); + + // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance + for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { + currentContext.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); + } + + // https://www.w3.org/2011/rdfa-context/rdfa-1.1 sets a list of predefined terms mappings for RDFa contexts. + this.currentContext.addTermMapping("describedby", getValueFactory().createIRI("http://www.w3.org/2007/05/powder-s#describedby")); + this.currentContext.addTermMapping("license", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#license")); + this.currentContext.addTermMapping("role", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#role")); + + this.currentContext.setDefaultVocabulary(null); + + + skipElement = false; + newSubject = null; + currentObjectResource = null; + typedResource = null; + localIRIMappings = new HashMap<>(); + localIncompleteStatements = new HashSet<>(); + localListMappings = this.currentContext.getListMappings(); + currentLanguage = this.currentContext.getLanguage(); + localTermMappings = this.currentContext.getTermMappings(); + localDefaultVocabulary = this.currentContext.getDefaultVocabulary(); + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + InputSource inputSource = new InputSource(reader); + saxParser.parse(inputSource, new XMLSaxHandler()); + } catch (IOException e) { + throw new ParsingErrorException("Failed to parse XML+RDFa input stream: " + e.getMessage(), e); + } catch (Exception e) { + throw new ParsingErrorException("Unexpected error during XML+RDFa parsing: " + e.getMessage(), e); + } + } + + private void addPrefix(String prefix, String uri) { + IRI prefixIRI = getValueFactory().createIRI(uri); + this.currentContext.addIriMapping(prefix, prefixIRI); + } + + /** + * Handles character data between XML elements + */ + private void handleCharacters(char[] ch, int start, int length) { + characters.append(ch, start, length); + } + + private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { + this.currentElementAttributes = attrs; + } + + private void endProcessElement(String uri, String localName, String qName) { + String currentElementName = qName; + + // The current element is examined for any change to the default vocabulary via @vocab. If @vocab is present and contains a value, the local default vocabulary is updated according to the section on CURIE and IRI Processing. If the value is empty, then the local default vocabulary MUST be reset to the Host Language defined default (if any). + if (this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()) != null + && !this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()).isEmpty()) { + String vocabValue = this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()); + localDefaultVocabulary = vocabValue; + parsingModel.add(this.currentContext.getBaseIri(), RDFa.usesVocabulary.getIRI(), getValueFactory().createLiteral(vocabValue)); + } + + // The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; + for (int i = 0; i < this.currentElementAttributes.getLength(); i++) { + String attribute = this.currentElementAttributes.getQName(i); + if (attribute.startsWith(XMLNS_PREFIX)) { + String attributeValue = this.currentElementAttributes.getValue(i); + String prefixName = this.currentElementAttributes.getLocalName(i); + IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); + localIRIMappings.put(prefixName, prefixNamespace); + } + } + if (this.currentElementAttributes.getValue(RDFaAttributes.PREFIX.getName()) != null + && !this.currentElementAttributes.getValue(RDFaAttributes.PREFIX.getName()).isEmpty()) { + String prefixDeclaration = this.currentElementAttributes.getValue(RDFaAttributes.PREFIX.getName()); + String prefixName = getPrefixFromDeclaration(prefixDeclaration); + IRI prefixIRI = getPrefixIriFromDeclaration(prefixDeclaration); + localIRIMappings.put(prefixName, prefixIRI); + } + + // If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. + if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) == null + && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) == null) { + // If the current element contains the @property attribute, but does not contain either the @content or @datatype attributes, then + if (this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) != null + && !this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()).isEmpty() + && this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) == null + && this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) == null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.ABOUT); + } else if (isRootElement) { + this.newSubject = this.currentContext.getBaseIri(); + } else if (this.currentContext.getParentObjectResource() != null) { + this.newSubject = this.currentContext.getParentObjectResource(); + } + if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { + this.typedResource = this.newSubject; + } else if (isRootElement) { + this.typedResource = resolveStringResource("", this.currentContext).get(); + } else { + if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.HREF); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.SRC); + } else { + this.typedResource = getValueFactory().createBNode(); + } + this.currentObjectResource = this.typedResource; + } + } + // otherwise: + } else { + if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.ABOUT); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.RESOURCE); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.HREF); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.SRC); + } + } else { + if (isRootElement) { + this.newSubject = resolveStringResource("", this.currentContext).get(); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { + this.newSubject = getValueFactory().createBNode(); + } else if (this.currentContext.getParentObjectResource() != null) { + this.newSubject = this.currentContext.getParentObjectResource(); + if (this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) == null) { + skipElement = true; + } + } + if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { + this.typedResource = this.newSubject; + } + } + } + } + + // If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: + if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { + this.newSubject = getResourceFromElementAttribute( RDFaAttributes.ABOUT); + } + if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { + this.typedResource = this.newSubject; + } + if (this.newSubject == null) { + if (isRootElement) { + this.typedResource = resolveStringResource("", this.currentContext).get(); + } else if (this.currentContext.getParentObjectResource() != null) { + this.newSubject = this.currentContext.getParentObjectResource(); + } + } + if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { + this.currentObjectResource = getResourceFromElementAttribute( RDFaAttributes.RESOURCE); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { + this.currentObjectResource = getResourceFromElementAttribute( RDFaAttributes.HREF); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { + this.currentObjectResource = getResourceFromElementAttribute( RDFaAttributes.SRC); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null) { + this.currentObjectResource = this.getValueFactory().createBNode(); + } + if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null + && (this.currentObjectResource == null || this.currentObjectResource.isResource())) { + this.typedResource = (Resource) this.currentObjectResource; + } + } + + // If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; + if (this.typedResource != null) { + Resource typeIri = getResourceFromElementAttribute( RDFaAttributes.TYPEOF); + this.getModel().add(this.typedResource, RDF.type.getIRI(), typeIri); + } + + // If in any of the previous steps a new subject was set to a non-null value different from the parent object; + if (this.newSubject != null && this.newSubject != this.currentContext.getParentObjectResource()) { + this.localListMappings = new HashMap<>(); + } + + // If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: + if (this.currentObjectResource != null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { + IRI relResource = (IRI) getResourceFromElementAttribute( RDFaAttributes.REL); + if (!localListMappings.containsKey(relResource)) { + this.localListMappings.put(relResource, new HashSet<>()); + } + this.localListMappings.get(relResource).add(this.currentObjectResource); + } + if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) == null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { + Resource relResource = getResourceFromElementAttribute( RDFaAttributes.REL); + if (relResource.isIRI()) { + this.getModel().add(newSubject, (IRI) relResource, currentObjectResource); + } else { + throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); + } + } + if (this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { + Resource revResource = getResourceFromElementAttribute( RDFaAttributes.REV); + if (!revResource.isIRI()) { + throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); + } + if (!currentObjectResource.isResource()) { + throw new ParsingErrorException("object resource expected to be a resource but was " + currentObjectResource); + } + this.getModel().add((Resource) currentObjectResource, (IRI) revResource, newSubject); + } + } + } + + // If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); + if (this.currentObjectResource == null) { + this.currentObjectResource = getValueFactory().createBNode(); + if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { + if(! getResourceFromElementAttribute( RDFaAttributes.REL).isIRI()) { + throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); + } + IRI relIRI = (IRI) getResourceFromElementAttribute( RDFaAttributes.REL); + if(this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { // TODO: Step to be double checked, standard unclear + if (!localListMappings.containsKey(relIRI)) { + this.localListMappings.put(relIRI, new HashSet<>()); + } + this.localListMappings.get(relIRI).add(this.currentObjectResource); + } else { + this.localIncompleteStatements.add(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); + } + } else if (this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { + if(! getResourceFromElementAttribute( RDFaAttributes.REV).isIRI()) { + throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); + } + IRI revIRI = (IRI) getResourceFromElementAttribute( RDFaAttributes.REV); + this.localIncompleteStatements.add(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); + } + } + + // The next step of the iteration is to establish any current property value; + if(this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) != null) { + IRI propertyIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.PROPERTY); + Value currentPropertyValue = null; + if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null + && getResourceFromElementAttribute(RDFaAttributes.DATATYPE).isIRI() + && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI()) { + IRI datatypeIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.DATATYPE); + if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { + String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); + currentPropertyValue = getValueFactory().createLiteral(contentString, datatypeIRI); + } else { + String contentString = this.characters.toString().trim(); + currentPropertyValue = getValueFactory().createLiteral(contentString); + this.characters = new StringBuilder(); + } + } else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()).isEmpty()) { + if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { + String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); + currentPropertyValue = getValueFactory().createLiteral(contentString); + } else { + String contentString = this.characters.toString().trim(); + currentPropertyValue = getValueFactory().createLiteral(contentString); + this.characters = new StringBuilder(); + } + //} else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null + // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE).isIRI() + // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { + } else if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { + String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); + currentPropertyValue = getValueFactory().createLiteral(contentString); + } else if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) == null + && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) == null + && this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) == null) { + if(this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { + currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); + } else if(this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { + currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.HREF); + } else if(this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { + currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.SRC); + } + } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null + && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null) { + currentPropertyValue = typedResource; + } else { + String contentString = this.characters.toString().trim(); + if(this.currentLanguage != null + && ! this.currentLanguage.isEmpty()) { + currentPropertyValue = getValueFactory().createLiteral(contentString, this.currentLanguage); + } else { + currentPropertyValue = getValueFactory().createLiteral(contentString); + } + this.characters = new StringBuilder(); + } + + if(this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { + if(! this.localListMappings.containsKey(propertyIRI)) { + this.localListMappings.put(propertyIRI, new HashSet<>()); + } + this.localListMappings.get(propertyIRI).add(currentPropertyValue); + } else { + this.getModel().add(this.newSubject, propertyIRI, currentPropertyValue); + } + } + + // If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: + if(! skipElement + && newSubject != null) { + for(RDFaIncompleteStatement incompleteStatement : this.currentContext.getIncompleteStatement()) { + if(incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { + localListMappings.get(incompleteStatement.getPredicate()).add(newSubject); + } else if(incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { + this.getModel().add(currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), newSubject); + } else if(incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { + this.getModel().add(newSubject, incompleteStatement.getPredicate(), currentContext.getParentSubjectResource()); + } + } + } + + // Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: + Map> oldListMappings = currentContext.getListMappings(); + if(skipElement) { + currentContext = new RDFa11EvaluationContext(this.currentContext); + currentContext.setLanguage(currentLanguage); + currentContext.setIriMappings(localIRIMappings); + } else { + Resource oldParentSubject = this.currentContext.getParentSubjectResource(); + currentContext = new RDFa11EvaluationContext(this.currentContext.getBaseIri()); + currentContext.setParentSubjectResource(newSubject); + if(currentObjectResource != null) { + currentContext.setParentObjectResource(currentObjectResource); + } if (newSubject != null) { + currentContext.setParentObjectResource(newSubject); + } else { + currentContext.setParentObjectResource(oldParentSubject); + } + currentContext.setIriMappings(localIRIMappings); + currentContext.setIncompleteStatements(localIncompleteStatements); + currentContext.setListMappings(localListMappings); + currentContext.setLanguage(currentLanguage); + currentContext.setDefaultVocabulary(localDefaultVocabulary); + } + + // Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: + for(Map.Entry> listMapping : localListMappings.entrySet()) { + IRI propertyIRI = listMapping.getKey(); + Set propertyList = listMapping.getValue(); + + if(!oldListMappings.containsKey(propertyIRI)) { + if(propertyList.isEmpty()) { + getModel().add(newSubject, propertyIRI, RDF.nil.getIRI()); + } else { + ArrayList bnodes = new ArrayList<>(); + for(int i = 0; i < propertyList.size(); i++) { + bnodes.add(getValueFactory().createBNode()); + } + int bnodeIndex = 0; + for(Value listElement : propertyList) { + BNode elementNode = bnodes.get(bnodeIndex); + Resource nextElementNode = RDF.nil.getIRI(); + if(bnodeIndex < bnodes.size() - 1) { + nextElementNode = bnodes.get(bnodeIndex + 1); + } + getModel().add(elementNode, RDF.first.getIRI(), listElement); + getModel().add(elementNode, RDF.rest.getIRI(), nextElementNode); + + bnodeIndex++; + } + getModel().add(newSubject, propertyIRI, bnodes.getFirst()); + } + } + } + + isRootElement = false; + } + + /** + * Internal SAX handler that delegates to the parser's methods + */ + private class XMLSaxHandler extends DefaultHandler { + @Override + public void characters(char[] ch, int start, int length) { + RDFa11Parser.this.handleCharacters(ch, start, length); + } + + @Override + public void startPrefixMapping(String prefix, String uri) { + RDFa11Parser.this.addPrefix(prefix, uri); + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attrs) { + startProcessElement(uri, localName, qName, attrs); + } + + @Override + public void endElement(String uri, String localName, String qName) { + endProcessElement(uri, localName, qName); + } + + @Override + public void error(SAXParseException e) { + throw new ParsingErrorException("Failed to parse XML+RDFa: " + e.getMessage(), e); + } + + @Override + public void fatalError(SAXParseException e) { + throw new ParsingErrorException("Failed to parse XML+RDFa: " + e.getMessage(), e); + } + + @Override + public void warning(SAXParseException e) { + logger.warn("Warning during parsing of XML+RDFa: ", e); + } + } + + private String getPrefixFromDeclaration(String declaration) { + String[] prefixArray = declaration.split(": "); + if (prefixArray.length != 2) { + throw new ParsingErrorException("Error during prefix extraction of " + declaration); + } + return prefixArray[0].toLowerCase(); + } + + private IRI getPrefixIriFromDeclaration(String declaration) { + String[] prefixArray = declaration.split(": "); + if (prefixArray.length != 2) { + throw new ParsingErrorException("Error during prefix extraction of " + declaration); + } + return getValueFactory().createIRI(prefixArray[1].toLowerCase()); + } + + private Resource getResourceFromElementAttribute(RDFaAttributes attribute) { + String attributeValue = this.currentElementAttributes.getValue(attribute.getName()); + if (resolveStringResource(attributeValue, this.currentContext).isPresent()) { + return resolveStringResource(attributeValue, this.currentContext).get(); + } else { + throw new ParsingErrorException("Could not parse @" + attribute.getName() + " value: " + attributeValue); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java deleted file mode 100644 index 088cf6c6a..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java +++ /dev/null @@ -1,176 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Value; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; - -import java.util.*; - -/** - * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. - * @see RDFa recommandation - */ -public class RDFaEvaluationContext { - - /** - * This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved. - */ - private IRI baseIri; - - /** - * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. - */ - private Resource parentSubjectResource ; - - /** - * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. - */ - private Resource parentObjectResource = null; - - /** - * An index of locally defined IRI prefixes - */ - private Map uriMappings = new HashMap<>(); - - /** - * Set of statement in the process of building. - */ - private Set incompleteStatement = new HashSet<>(); - - /** - * The language of the document. Note that there is no default language. - */ - private String language = null; - - public RDFaEvaluationContext(IRI baseIri) { - this.baseIri = baseIri; - this.parentSubjectResource = baseIri; - } - - public RDFaEvaluationContext(IRI baseIri, IRI parentSubjectResource) { - this.baseIri = baseIri; - this.parentSubjectResource = parentSubjectResource; - } - - public RDFaEvaluationContext(RDFaEvaluationContext context) { - this.baseIri = context.baseIri; - this.parentSubjectResource = context.parentSubjectResource; - this.parentObjectResource = context.parentObjectResource; - this.uriMappings = new HashMap<>(context.uriMappings); - this.incompleteStatement = new HashSet<>(context.incompleteStatement); - this.language = context.language; - } - - public IRI baseIri() { - return baseIri; - } - - public RDFaEvaluationContext baseIri(IRI baseIri) { - this.baseIri = baseIri; - return this; - } - - public RDFaEvaluationContext incompleteStatements(Set incompleteStatement) { - this.incompleteStatement = new HashSet<>(incompleteStatement); - return this; - } - - public Iterator getIncompleteStatementIterator() { - return this.incompleteStatement.iterator(); - } - - public RDFaEvaluationContext addStatementWithoutSubject(IRI property, Value object) { - RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); - newStatement.setObject(object); - this.incompleteStatement.add(newStatement); - return this; - } - - public RDFaEvaluationContext addStatementWithoutObject(Resource subject, IRI property) { - RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); - newStatement.setSubject(subject); - this.incompleteStatement.add(newStatement); - return this; - } - - public void clearIncompleteStatements() { - this.incompleteStatement.clear(); - } - - public Resource parentSubjectResource() { - return parentSubjectResource; - } - - public RDFaEvaluationContext parentSubjectResource(Resource parentSubjectResource) { - this.parentSubjectResource = parentSubjectResource; - return this; - } - - public Resource parentObjectResource() { - return parentObjectResource; - } - - public RDFaEvaluationContext parentObjectResource(Resource parentObjectResource) { - this.parentObjectResource = parentObjectResource; - return this; - } - - public Map uriMappings() { - return uriMappings; - } - - public RDFaEvaluationContext uriMappings(Map uriMappings) { - this.uriMappings = uriMappings; - return this; - } - - public boolean hasUriMapping(String prefix) { - return this.uriMappings.containsKey(prefix); - } - - /** - * @param prefix the prefix WITHOUT ":" - * @return the IRI associated to the prefix in this context - */ - public IRI uriMapping(String prefix) { - return this.uriMappings.get(prefix); - } - - public void addUriMapping(String prefix, IRI prefixIri) { - this.uriMappings.put(prefix, prefixIri); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append("BaseURI: ").append(this.baseIri.stringValue()).append(" "); - sb.append("Mappings: ["); - this.uriMappings.forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); - sb.append("] "); - if(this.parentSubjectResource != null) { - sb.append("Subject:").append(this.parentSubjectResource.stringValue()).append(" "); - } else { - sb.append("Subject:").append((Object) null).append(" "); - } - if(this.parentObjectResource != null) { - sb.append("Object: ").append(this.parentObjectResource.stringValue()).append(" "); - } else { - sb.append("Object: ").append((Object) null).append(" "); - } - if(! this.incompleteStatement.isEmpty()) { - sb.append(this.incompleteStatement.size()).append(" incomplete statements."); - } - - return sb.toString(); - } - - public String getLanguage() { - return language; - } - - public void setLanguage(String language) { - this.language = language; - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java new file mode 100644 index 000000000..59e6ffd9a --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java @@ -0,0 +1,160 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +import java.util.*; + +public abstract class AbstractRDFaEvaluationContext implements RDFaEvaluationContext { + + /** + * The base. This will usually be the IRI of the document being processed, but it could be some other IRI, set by some other mechanism, such as the (X)HTML base element. The important thing is that it establishes an IRI against which relative paths can be resolved. + */ + private IRI baseIri; + + /** + * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. + */ + private Resource parentSubjectResource ; + + /** + * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. + */ + private Resource parentObjectResource = null; + + /** + * An index of locally defined IRI prefixes + */ + private Map iriMappings = new HashMap<>(); + + /** + * Set of statement in the process of building. + */ + private Set incompleteStatement = new HashSet<>(); + + /** + * The language of the document. Note that there is no default language. + */ + private String language = null; + + protected AbstractRDFaEvaluationContext(IRI baseIri) { + this(baseIri, baseIri); + } + + protected AbstractRDFaEvaluationContext(IRI baseIri, IRI parentSubjectResource) { + this.baseIri = baseIri; + this.parentSubjectResource = parentSubjectResource; + } + + protected AbstractRDFaEvaluationContext(AbstractRDFaEvaluationContext context) { + this.baseIri = context.baseIri; + this.parentSubjectResource = context.parentSubjectResource; + this.parentObjectResource = context.parentObjectResource; + this.iriMappings = new HashMap<>(context.iriMappings); + this.incompleteStatement = new HashSet<>(context.incompleteStatement); + this.language = context.language; + } + + @Override + public IRI getBaseIri() { + return baseIri; + } + + @Override + public void setBaseIri(IRI baseIri) { + this.baseIri = baseIri; + } + + @Override + public Resource getParentSubjectResource() { + return parentSubjectResource; + } + + @Override + public void setParentSubjectResource(Resource parentSubjectResource) { + this.parentSubjectResource = parentSubjectResource; + } + + @Override + public Resource getParentObjectResource() { + return parentObjectResource; + } + + @Override + public void setParentObjectResource(Resource parentObjectResource) { + this.parentObjectResource = parentObjectResource; + } + + @Override + public Map getIriMappings() { + return iriMappings; + } + + @Override + public void setIriMappings(Map iriMappings) { + this.iriMappings = iriMappings; + } + + @Override + public boolean hasIriMapping(String prefix) { + return this.iriMappings.containsKey(prefix); + } + + /** + * @param prefix the prefix WITHOUT ":" + * @return the IRI associated to the prefix in this context + */ + @Override + public IRI getIriMapping(String prefix) { + return this.iriMappings.get(prefix); + } + + @Override + public void addIriMapping(String prefix, IRI prefixIri) { + this.iriMappings.put(prefix, prefixIri); + } + + @Override + public Set getIncompleteStatement() { + return incompleteStatement; + } + + @Override + public void setIncompleteStatements(Set incompleteStatement) { + this.incompleteStatement = incompleteStatement; + } + + @Override + public Iterator getIncompleteStatementIterator() { + return this.incompleteStatement.iterator(); + } + + @Override + public void addStatementWithoutSubject(IRI property, Value object) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setObject(object); + this.incompleteStatement.add(newStatement); + } + + @Override + public void addStatementWithoutObject(Resource subject, IRI property) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setSubject(subject); + this.incompleteStatement.add(newStatement); + } + + public void clearIncompleteStatements() { + this.incompleteStatement.clear(); + } + + @Override + public String getLanguage() { + return language; + } + + @Override + public void setLanguage(String language) { + this.language = language; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java new file mode 100644 index 000000000..ca9406762 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java @@ -0,0 +1,51 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +import java.util.*; + +/** + * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. + * @see RDFa recommandation + */ +public class RDFa10EvaluationContext extends AbstractRDFaEvaluationContext { + + public RDFa10EvaluationContext(IRI baseIri) { + this(baseIri, baseIri); + } + + public RDFa10EvaluationContext(IRI baseIri, IRI parentSubjectResource) { + super(baseIri, parentSubjectResource); + } + + public RDFa10EvaluationContext(RDFa10EvaluationContext context) { + super(context); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); + sb.append("Mappings: ["); + this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); + sb.append("] "); + if(this.getParentSubjectResource() != null) { + sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); + } else { + sb.append("Subject:").append((Object) null).append(" "); + } + if(this.getParentObjectResource() != null) { + sb.append("Object: ").append(this.getParentObjectResource().stringValue()).append(" "); + } else { + sb.append("Object: ").append((Object) null).append(" "); + } + if(! this.getIncompleteStatement().isEmpty()) { + sb.append(this.getIncompleteStatement().size()).append(" incomplete statements."); + } + + return sb.toString(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java new file mode 100644 index 000000000..36c3c82d0 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java @@ -0,0 +1,101 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +import java.util.*; + +/** + * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. + * @see RDFa recommandation + */ +public class RDFa11EvaluationContext extends AbstractRDFaEvaluationContext { + + /** + * A list mapping that associates IRIs with lists. + */ + private Map> listMappings = new HashMap<>(); + /** + * The language. Note that there is no default language. + */ + private String language = null; + /** + * The term mappings, a list of terms and their associated IRIs. This specification does not define an initial list. Host Languages MAY define an initial list. + */ + private Map termMappings = new HashMap<>(); + + /** + * The default vocabulary, a value to use as the prefix IRI when a term unknown to the RDFa Processor is used. This specification does not define an initial setting for the default vocabulary. Host Languages MAY define an initial setting. + */ + private String defaultVocabulary = null; + + + public RDFa11EvaluationContext(IRI baseIri) { + this(baseIri, baseIri); + } + + public RDFa11EvaluationContext(IRI baseIri, IRI parentSubjectResource) { + super(baseIri, parentSubjectResource); + } + + public RDFa11EvaluationContext(RDFa11EvaluationContext context) { + super(context); + this.listMappings = new HashMap<>(context.listMappings); + this.termMappings = new HashMap<>(context.termMappings); + this.defaultVocabulary = context.defaultVocabulary;; + } + + public String getDefaultVocabulary() { + return defaultVocabulary; + } + + public void setDefaultVocabulary(String defaultVocabulary) { + this.defaultVocabulary = defaultVocabulary; + } + + public void addTermMapping(String term, IRI iri) { + this.termMappings.put(term, iri); + } + + public IRI getTermMapping(String term) { + return this.termMappings.get(term); + } + + public Map getTermMappings() { + return this.termMappings; + } + + public Map> getListMappings() { + return listMappings; + } + + public void setListMappings(Map> listMappings) { + this.listMappings = listMappings; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); + sb.append("Mappings: ["); + this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); + sb.append("] "); + if(this.getParentSubjectResource() != null) { + sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); + } else { + sb.append("Subject:").append((Object) null).append(" "); + } + if(this.getParentObjectResource() != null) { + sb.append("Object: ").append(this.getParentObjectResource().stringValue()).append(" "); + } else { + sb.append("Object: ").append((Object) null).append(" "); + } + if(! this.getIncompleteStatement().isEmpty()) { + sb.append(this.getIncompleteStatement().size()).append(" incomplete statements."); + } + + return sb.toString(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java new file mode 100644 index 000000000..6d23908b6 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java @@ -0,0 +1,29 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +public enum RDFaAttributes { + ABOUT("about"), + CONTENT("content"), + DATATYPE("datatype"), + HREF("href"), + INLIST("inlist"), + PREFIX("prefix"), + PROPERTY("property"), + REL("rel"), + RESOURCE("resource"), + REV("rev"), + SRC("src"), + TYPEOF("typeof"), + VOCAB("vocab"), + LANG("lang"), + LANG_ALT("xml:lang"); + + private final String name; + + RDFaAttributes(String name) { + this.name = name; + } + + public String getName() { + return name; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java new file mode 100644 index 000000000..438f70537 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -0,0 +1,38 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +public interface RDFaEvaluationContext { + + IRI getBaseIri(); + void setBaseIri(IRI baseIri); + + void setParentSubjectResource(Resource parentSubjectResource); + Resource getParentSubjectResource(); + + void setParentObjectResource(Resource parentObjectResource); + Resource getParentObjectResource(); + + void setIncompleteStatements(Set incompleteStatement); + Set getIncompleteStatement(); + Iterator getIncompleteStatementIterator(); + void addStatementWithoutSubject(IRI property, Value object); + void addStatementWithoutObject(Resource subject, IRI property); + void clearIncompleteStatements(); + + boolean hasIriMapping(String prefix); + IRI getIriMapping(String prefix); + Map getIriMappings(); + void addIriMapping(String prefix, IRI prefixIri); + void setIriMappings(Map iriMappings); + + String getLanguage(); + void setLanguage(String language); + +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java index d30a7fe54..524491a8f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java @@ -13,7 +13,8 @@ public class RDFaIncompleteStatement { public enum Direction { FORWARD, - BACKWARD + BACKWARD, + NONE } private Resource subject = null; diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java new file mode 100644 index 000000000..6dc5ce248 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java @@ -0,0 +1,121 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.Namespace; + +/** + * https://www.w3.org/2011/rdfa-context/rdfa-1.1 sets a list of predefined prefixes for RDFa contexts. + */ +public enum RDFaInitialPrefixes implements Namespace { + + // as "https://www.w3.org/ns/activitystreams#" + AS("as", "https://www.w3.org/ns/activitystreams#"), + // cc "http://creativecommons.org/ns#" + CC("cc", "http://creativecommons.org/ns#"), + // csvw "http://www.w3.org/ns/csvw#" + CSVW("cc", "http://www.w3.org/ns/csvw#"), + // ctag "http://commontag.org/ns#" + CTAG("ctag", "http://commontag.org/ns#"), + // dc "http://purl.org/dc/terms/" + DC("dc", "http://purl.org/dc/terms/"), + // dc11 "http://purl.org/dc/elements/1.1/" + DC11("dc11", "http://purl.org/dc/elements/1.1/"), + // dcat "http://www.w3.org/ns/dcat#" + DCAT("dcat", "http://www.w3.org/ns/dcat#"), + // dcterms "http://purl.org/dc/terms/" + DCTERMS("dcterms", "http://purl.org/dc/terms/"), + // dqv "http://www.w3.org/ns/dqv#" + DQV("dqv", "http://www.w3.org/ns/dqv#"), + // duv "https://www.w3.org/ns/duv#" + DUV("duv", "https://www.w3.org/ns/duv#"), + // foaf "http://xmlns.com/foaf/0.1/" + FOAF(fr.inria.corese.core.next.impl.common.vocabulary.FOAF.getVocabularyPreferredPrefix(), fr.inria.corese.core.next.impl.common.vocabulary.FOAF.getVocabularyNamespace()), + // gr "http://purl.org/goodrelations/v1#" + GR("gr", "http://purl.org/goodrelations/v1#"), + // grddl "http://www.w3.org/2003/g/data-view#" + GRDDL("grddl", "http://www.w3.org/2003/g/data-view#"), + // ical "http://www.w3.org/2002/12/cal/icaltzd#" + ICAL("ical", "http://www.w3.org/2002/12/cal/icaltzd#"), + // jsonld "http://www.w3.org/ns/json-ld#" + JSONLD("jsonld", "http://www.w3.org/ns/json-ld#"), + // ldp "http://www.w3.org/ns/ldp#" + LDP("ldp", "http://www.w3.org/ns/ldp#"), + // ma "http://www.w3.org/ns/ma-ont#" + MA("ma", "http://www.w3.org/ns/ma-ont#"), + // oa "http://www.w3.org/ns/oa#" + OA("oa", "http://www.w3.org/ns/oa#"), + // odrl "http://www.w3.org/ns/odrl/2/" + ODRL("odrl", "http://www.w3.org/ns/odrl/2/"), + // og "http://ogp.me/ns#" + OG("og", "http://ogp.me/ns#"), + // org "http://www.w3.org/ns/org#" + ORG("org", "http://www.w3.org/ns/org#"), + // owl "http://www.w3.org/2002/07/owl#" + OWL(fr.inria.corese.core.next.impl.common.vocabulary.OWL.getVocabularyPreferredPrefix(), fr.inria.corese.core.next.impl.common.vocabulary.OWL.getVocabularyNamespace()), + // prov "http://www.w3.org/ns/prov#" + PROV("prov", "http://www.w3.org/ns/prov#"), + // qb "http://purl.org/linked-data/cube#" + QB("qb", "http://purl.org/linked-data/cube#"), + // rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + RDF(fr.inria.corese.core.next.impl.common.vocabulary.RDF.getVocabularyPreferredPrefix(), fr.inria.corese.core.next.impl.common.vocabulary.RDF.getVocabularyNamespace()), + // rdfa "http://www.w3.org/ns/rdfa#" + RDFA(fr.inria.corese.core.next.impl.common.vocabulary.RDFa.getVocabularyPreferredPrefix(), fr.inria.corese.core.next.impl.common.vocabulary.RDFa.getVocabularyNamespace()), + // rdfs "http://www.w3.org/2000/01/rdf-schema#" + RDFS(fr.inria.corese.core.next.impl.common.vocabulary.RDFS.getVocabularyPreferredPrefix(), fr.inria.corese.core.next.impl.common.vocabulary.RDFS.getVocabularyNamespace()), + // rev "http://purl.org/stuff/rev#" + REV("rev", "http://purl.org/stuff/rev#"), + // rif "http://www.w3.org/2007/rif#" + RIF("rif", "http://www.w3.org/2007/rif#"), + // rr "http://www.w3.org/ns/r2rml#" + RR("rr", "http://www.w3.org/ns/r2rml#"), + // schema "http://schema.org/" + SCHEMA("schema", "http://schema.org/"), + // sd "http://www.w3.org/ns/sparql-service-description#" + SD("sd", "http://www.w3.org/ns/sparql-service-description#"), + // sioc "http://rdfs.org/sioc/ns#" + SIOC("sioc", "http://rdfs.org/sioc/ns#"), + // skos "http://www.w3.org/2004/02/skos/core#" + SKOS("skos", "http://www.w3.org/2004/02/skos/core#"), + // skosxl "http://www.w3.org/2008/05/skos-xl#" + SKOSXL("skosxl", "http://www.w3.org/2008/05/skos-xl#"), + // sosa "http://www.w3.org/ns/sosa/" + SOSA("sosa", "http://www.w3.org/ns/sosa/"), + // ssn "http://www.w3.org/ns/ssn/" + SSN("ssn", "http://www.w3.org/ns/ssn/"), + // time "http://www.w3.org/2006/time#" + TIME("time", "http://www.w3.org/2006/time#"), + // v "http://rdf.data-vocabulary.org/#" + V("v", "http://rdf.data-vocabulary.org/#"), + // vcard "http://www.w3.org/2006/vcard/ns#" + VCARD("vcard", "http://www.w3.org/2006/vcard/ns#"), + // void "http://rdfs.org/ns/void#" + VOID("void", "http://rdfs.org/ns/void#"), + // wdr "http://www.w3.org/2007/05/powder#" + WDR("wdr", "http://www.w3.org/2007/05/powder#"), + // wdrs "http://www.w3.org/2007/05/powder-s#" + WDRS("wdrs", "http://www.w3.org/2007/05/powder-s#"), + // xhv "http://www.w3.org/1999/xhtml/vocab#" + XHV("xhv", "http://www.w3.org/1999/xhtml/vocab#"), + // xml "http://www.w3.org/XML/1998/namespace" + XML("xml", "http://www.w3.org/XML/1998/namespace"), + // xsd "http://www.w3.org/2001/XMLSchema#" + XSD(fr.inria.corese.core.next.impl.common.vocabulary.XSD.getVocabularyPreferredPrefix(), fr.inria.corese.core.next.impl.common.vocabulary.XSD.getVocabularyNamespace()), + ; + + private final String prefix; + private final String name; + + RDFaInitialPrefixes(String name, String prefix) { + this.name = name; + this.prefix = prefix; + } + + @Override + public String getPrefix() { + return this.prefix; + } + + @Override + public String getName() { + return this.name; + } +} diff --git a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java index 4d4c1bfe3..540f8d847 100644 --- a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java @@ -306,7 +306,7 @@ void allFormats() { assertTrue(allFormats.contains(RDFFormat.JSONLD)); assertTrue(allFormats.contains(RDFFormat.RDFXML)); assertTrue(allFormats.contains(RDFFormat.TRIG)); - assertTrue(allFormats.contains(RDFFormat.RDFa)); + assertTrue(allFormats.contains(RDFFormat.RDFa_1_0)); assertTrue(allFormats.contains(RDFFormat.RDFC_1_0)); assertThrows(UnsupportedOperationException.class, () -> allFormats.add(RDFFormat.TURTLE), diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java similarity index 96% rename from src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java rename to src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java index 2458067d9..3ee8a809c 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java @@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -public class RDFaParserTest { +public class RDFa10ParserTest { private static final ValueFactory factory = new CoreseAdaptedValueFactory(); @@ -45,11 +45,11 @@ public void basicBaseTest() { referenceModel.add(subject, predicate, object); - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); parser.parse(new ByteArrayInputStream(testDataString.getBytes())); - assertEquals(RDFFormat.RDFa, parser.getRDFFormat()); + assertEquals(RDFFormat.RDFa_1_0, parser.getRDFFormat()); assertEquals(referenceModel.size(), testModel.size()); Iterator itStatementRef = referenceModel.iterator(); Iterator itStatementTest = testModel.iterator(); @@ -79,7 +79,7 @@ public void aboutTest() { Model testModel = new CoreseModel(); - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); @@ -108,7 +108,7 @@ public void basicIRItoIRITest() { Model testModel = new CoreseModel(); Model referenceModel = new CoreseModel(); - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); @@ -143,7 +143,7 @@ public void basicIRItoStringTest() { Model testModel = new CoreseModel(); Model referenceModel = new CoreseModel(); - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); @@ -179,7 +179,7 @@ public void basicIRItoTypedLiteralTest() { Model testModel = new CoreseModel(); Model referenceModel = new CoreseModel(); - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); @@ -226,7 +226,7 @@ public void basicChainTest() { Model testModel = new CoreseModel(); Model referenceModel = new CoreseModel(); - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java new file mode 100644 index 000000000..538fa6b13 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java @@ -0,0 +1,27 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class RDFa11ParserTest { + + + @Test + void getRDFFormat() { + Model model = new CoreseModel(); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser parser = new RDFa11Parser(model, factory); + assertEquals(RDFFormat.RDFa_1_1, parser.getRDFFormat()); + } + + @Test + void parse() { + } +} \ No newline at end of file From a9ca6798b0b2532dfbd6fe64e950b99d8f977d54 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 4 Dec 2025 15:01:03 +0100 Subject: [PATCH 03/13] removing namespace comparison --- .../next/api/base/model/AbstractNamespace.java | 16 ---------------- .../next/impl/common/prefix/PrefixHandler.java | 2 +- .../core/next/api/base/AbstractModelTest.java | 11 ----------- 3 files changed, 1 insertion(+), 28 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java index 795c7959b..f6e2d49ad 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java @@ -19,22 +19,6 @@ public abstract class AbstractNamespace implements Namespace { @Serial private static final long serialVersionUID = 1L; - /** - * Comparator that orders namespaces by prefix, then by URI. - * Null values are ordered first. - */ - private static final Comparator ORDERING = Comparator.nullsFirst( - Comparator.comparing(Namespace::getPrefix) - .thenComparing(Namespace::getName)); - - /** - * Compares this namespace to another based on prefix and name. - */ - @Override - public int compareTo(Namespace other) { - return ORDERING.compare(this, other); - } - /** * Checks equality based on prefix and name. */ diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java b/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java index 9e2722fcd..6115e136c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java @@ -444,8 +444,8 @@ public String getPrefix() { public String getName() { return name; } + @SuppressWarnings("NullableProblems") - @Override public int compareTo(Namespace o) { Objects.requireNonNull(o); int cmp = this.name.compareTo(o.getName()); diff --git a/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java b/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java index 92552b1f1..fa7f118c6 100644 --- a/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java +++ b/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java @@ -692,16 +692,5 @@ public String getPrefix() { public String getName() { return name; } - - @Override - public int compareTo(Namespace other) { - - int prefixComparison = this.getPrefix().compareTo(other.getPrefix()); - if (prefixComparison != 0) { - return prefixComparison; - } - - return this.getName().compareTo(other.getName()); - } } } \ No newline at end of file From ede15484f1112f162ac45dfc7a5fcaa94d8338c1 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 4 Dec 2025 15:01:35 +0100 Subject: [PATCH 04/13] Moving RDFa to only the 1.1 version, removing the old one --- .../core/next/api/base/io/RDFFormat.java | 15 +- .../next/impl/io/parser/ParserFactory.java | 10 +- .../io/parser/rdfa/AbstractRDFaParser.java | 80 ---- .../impl/io/parser/rdfa/RDFa10Parser.java | 344 ------------------ .../{RDFa11Parser.java => RDFaParser.java} | 248 ++++++++----- .../model/AbstractRDFaEvaluationContext.java | 160 -------- .../rdfa/model/RDFa10EvaluationContext.java | 51 --- .../rdfa/model/RDFa11EvaluationContext.java | 101 ----- .../rdfa/model/RDFaEvaluationContext.java | 225 ++++++++++-- .../rdfa/model/RDFaInitialPrefixes.java | 2 +- .../core/next/api/base/io/RDFFormatTest.java | 2 +- .../impl/io/parser/rdfa/RDFa10ParserTest.java | 227 ------------ .../impl/io/parser/rdfa/RDFa11ParserTest.java | 27 -- .../impl/io/parser/rdfa/RDFaParserTest.java | 337 +++++++++++++++++ 14 files changed, 715 insertions(+), 1114 deletions(-) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java rename src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/{RDFa11Parser.java => RDFaParser.java} (73%) delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java delete mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java index f2b8ab814..46606499a 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java @@ -65,17 +65,10 @@ public class RDFFormat extends FileFormat { false, true); - public static final RDFFormat RDFa_1_0 = new RDFFormat( - "RDFa 1.0", - List.of("html"), - List.of("text/html"), - true, - false); - - public static final RDFFormat RDFa_1_1 = new RDFFormat( + public static final RDFFormat RDFA = new RDFFormat( "RDFa 1.1", - List.of("xhtml", "svg", "xml"), - List.of("application/xhtml+xml", "image/svg+xml", "application/xml", "text/xml"), + List.of("xhtml", "svg", "html"), + List.of("application/xhtml+xml", "image/svg+xml", "application/xml", "text/xml", "text/html"), true, false); @@ -165,7 +158,7 @@ public static Optional byMimeType(String mimeType) { * @return An unmodifiable List of all RdfFormat constants. */ public static List all() { - return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG, RDFC_1_0, RDFa_1_0, RDFa_1_1); + return List.of(JSONLD, NQUADS, NTRIPLES, RDFA, RDFC_1_0, RDFXML, TRIG, TURTLE); } @Override diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 7812e44d3..0293f1ff0 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,7 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.NQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.NTriplesParser; -import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFa10Parser; +import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFaParser; import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser; import fr.inria.corese.core.next.impl.io.parser.turtle.TurtleParser; import fr.inria.corese.core.next.impl.io.parser.trig.TriGParser; @@ -53,8 +53,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new TriGParser(model, factory, config); } else if(format == RDFFormat.RDFC_1_0) { return new NQuadsParser(model, factory, config); - } else if (format == RDFFormat.RDFa_1_0) { - return new RDFa10Parser(model, factory, config); + } else if (format == RDFFormat.RDFA) { + return new RDFaParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -80,8 +80,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new RDFXMLParser(model, factory); } else if (format == RDFFormat.TRIG) { return new TriGParser(model, factory); - } else if (format == RDFFormat.RDFa_1_0) { - return new RDFa10Parser(model, factory); + } else if (format == RDFFormat.RDFA) { + return new RDFaParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java deleted file mode 100644 index d9fee9f6f..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/AbstractRDFaParser.java +++ /dev/null @@ -1,80 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; -import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.impl.common.util.IRIUtils; -import fr.inria.corese.core.next.impl.exception.ParsingErrorException; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaEvaluationContext; - -import java.util.Optional; - -public abstract class AbstractRDFaParser extends AbstractRDFParser { - - protected RDFaEvaluationContext currentContext; - - protected AbstractRDFaParser(Model model, ValueFactory factory) { - super(model, factory); - } - - protected AbstractRDFaParser(Model model, ValueFactory factory, IOOptions config) { - super(model, factory, config); - } - - /** - * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI - * - * @param stringResource the resource as stored in the attribute of the HTML element - * @param context the context of the element evalation - * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise - */ - protected Optional resolveStringResource(String stringResource, RDFaEvaluationContext context) { - String resultString = stringResource; - if (resultString.startsWith("[") && resultString.endsWith("]")) { - resultString = resultString.replaceFirst("\\[", ""); - resultString = resultString.replaceFirst("]", ""); - } - - - if (stringUriIsCURIE(resultString)) { // CURIE - int colonIndex = resultString.indexOf(":"); - String prefixString = resultString.substring(0, colonIndex); - String localNameString = resultString.substring(colonIndex + 1); - // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri - if (context.hasIriMapping(prefixString)) { - IRI namespaceIRI = context.getIriMapping(prefixString); - - return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); - } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI - return Optional.of(this.getValueFactory().createIRI(context.getBaseIri().stringValue(), localNameString)); - } else { - throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); - } - } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI - return Optional.of(this.getValueFactory().createIRI(resultString)); - - } else if (resultString.startsWith("_:")) { // Blank Node - int colonIndex = resultString.indexOf(":"); - String localNameString = resultString.substring(colonIndex + 1); - return Optional.of(this.getValueFactory().createBNode(localNameString)); - } else if (IRIUtils.isStandardIRI(context.getBaseIri().stringValue() + resultString)) { - String concatenatedRelativeUri = context.getBaseIri().stringValue() + resultString; - return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); - } - return Optional.empty(); - } - - /** - * Equivalent to test if it has a colon, and it is not a blank node - * - * @param stringIri - * @return - */ - protected boolean stringUriIsCURIE(String stringIri) { - int colonIndex = stringIri.indexOf(":"); - return colonIndex > -1 && !stringIri.contains("://") && !stringIri.startsWith("_:") && !stringIri.startsWith("[_:"); - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java deleted file mode 100644 index 689c15ba9..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10Parser.java +++ /dev/null @@ -1,344 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.base.io.RDFFormat; -import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; -import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; -import fr.inria.corese.core.next.impl.common.util.IRIUtils; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.exception.ParsingErrorException; -import fr.inria.corese.core.next.impl.io.common.IOConstants; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaAttributes; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaEvaluationContext; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; -import org.apache.commons.io.input.ReaderInputStream; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Attribute; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; - -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.util.*; - -/** - * RDFa parser. This parser will load the RDF data stored as RDFa in an HTML page. Its inner implementation is based on the jsoup library. It loads the html page as DOM and process it following the recommended algorithm in the RDFa recommendation. - */ -public class RDFa10Parser extends AbstractRDFaParser { - - private static final String BASE_TAG = "base"; - - private static final String XMLNS_PREFIX = "xmlns"; - - public RDFa10Parser(Model model, ValueFactory factory) { - this(model, factory, new RDFaParserOptions.Builder().build()); - } - - public RDFa10Parser(Model model, ValueFactory factory, IOOptions config) { - super(model, factory, config); - } - - @Override - public RDFFormat getRDFFormat() { - return RDFFormat.RDFa_1_0; - } - - @Override - public void parse(InputStream in) { - if(getConfig() instanceof BaseIRIOptions baseIRIOptions) { - String baseIRI = baseIRIOptions.getBaseIRI(); - parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseIRI); - } else { - parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); - } - } - - @Override - public void parse(InputStream in, String baseURIString) { - try { - Document document = Jsoup.parse(in, null, baseURIString); - - IRI baseIri = getValueFactory().createIRI(baseURIString); - processDocument(document, baseIri); - } catch (Exception e) { - throw new ParsingErrorException("Error during parsing of HTML document", e); - } - } - - /** - * Intermediary function to configure the processing of a document using some basic HTML traversal to determine if a baseIri has been defined in the document. - * If the baseIri in argument is the Corese default base IRI, the value stored in the document is used instead. - * - * @param document Jsoup HTML document to be processed - * @param baseIri An IRI object - */ - private void processDocument(Document document, IRI baseIri) { - // If the base Iri in argument is not the default baseIri, then we take it, else we use the one in the document - if (baseIri.stringValue().equals(IOConstants.getDefaultBaseURI())) { - // Looking for the node in the document - IRI baseIriFromXml = baseIri; - Iterator baseElementIterator = document.stream().filter(element -> element.nameIs(BASE_TAG)).iterator(); - while (baseElementIterator.hasNext()) { - Element baseElement = baseElementIterator.next(); - Attribute baseElementHrefAttribute = baseElement.attribute(RDFaAttributes.HREF.getName()); - if (baseElementHrefAttribute != null) { - String baseIriString = baseElementHrefAttribute.getValue(); - baseIriFromXml = getValueFactory().createIRI(baseIriString); - } - } - - baseIri = this.getValueFactory().createIRI(baseIriFromXml.stringValue()); - } - - for (Element element : document.children()) { - processElement(element, new RDFa10EvaluationContext(baseIri)); - } - } - - /** - * @param element Current element - * @param context Active context - * @param recursive Processing generally continues recursively through the entire tree of elements available. However, if an author indicates that some branch of the tree should be treated as an XML literal, no further processing should take place on that branch, and setting this flag to false would have that effect. - * @param skipElement Flag thet indicates whether the [current element] can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. - * @see RDFa processing in details - */ - private void processElement(Element element, fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext context, boolean recursive, boolean skipElement) { - - // 1. First, the local values are initialized - Resource newSubject = null; - Resource currentObject = null; - Literal currentObjectLiteral = null; - Map currentMappings = context.getIriMappings(); - Set incompleteStatementSet = new HashSet<>(); - String language = context.getLanguage(); - - // 2. Next the [current element] is parsed for [URI mapping]s and these are added to the [local list of URI mappings]. Note that a [URI mapping] will simply overwrite any current mapping in the list that has the same name; - // Looking for namespace declarations - // Namespace declaration are done using the XML namespace declaration mechanism, that can be seen as an attributes prefixed by "xmlns" and looks like this: "xmlns:prefix=namespace" - for (Attribute attribute : element.attributes()) { - if (attribute.getKey().startsWith(XMLNS_PREFIX)) { - String prefixName = attribute.localName(); - IRI prefixNamespace = getValueFactory().createIRI(attribute.getValue(), ""); - context.addIriMapping(prefixName, prefixNamespace); - } - } - - // 3. The [current element] is also parsed for any language information, and if present, [current language] is set accordingly; - if (element.attribute(RDFaAttributes.LANG.getName()) != null) { - language = element.attr(RDFaAttributes.LANG.getName()); - } - - // 4. If the [current element] contains no @rel or @rev attribute, then the next step is to establish a value for [new subject]. Any of the attributes that can carry a resource can set [new subject]; - if(element.attribute(RDFaAttributes.REL.getName()) == null && element.attribute(RDFaAttributes.REV.getName()) == null) { - // [new subject] is set to the URI obtained from the first match from the following rules: - if (element.attribute(RDFaAttributes.ABOUT.getName()) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; - Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.ABOUT.getName(), context); - if (newSubjectResource.isPresent()) { - newSubject = newSubjectResource.get(); - } - } else if (element.attribute(RDFaAttributes.SRC.getName()) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. - Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.SRC.getName(), context); - if (newSubjectResource.isPresent()) { - newSubject = newSubjectResource.get(); - } - } else if (element.attribute(RDFaAttributes.RESOURCE.getName()) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; - Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.RESOURCE.getName(), context); - if (newSubjectResource.isPresent()) { - newSubject = newSubjectResource.get(); - } - } else if (element.attribute(RDFaAttributes.HREF.getName()) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. - Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.HREF.getName(), context); - if (newSubjectResource.isPresent()) { - newSubject = newSubjectResource.get(); - } - } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; - newSubject = context.getBaseIri(); - } else if (element.attribute(RDFaAttributes.TYPEOF.getName()) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. - newSubject = this.getValueFactory().createBNode(); - } else if (context.getParentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to the value of [parent object]. Additionally, if @property is not present then the [skip element] flag is set to 'true'; - newSubject = context.getParentObjectResource(); - if(element.attribute(RDFaAttributes.PROPERTY.getName()) == null) { - skipElement = true; - } - } - } else { - // [new subject] is set to the URI obtained from the first match from the following rules: - if (element.attribute(RDFaAttributes.ABOUT.getName()) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; - Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.ABOUT.getName(), context); - if (newSubjectResource.isPresent()) { - newSubject = newSubjectResource.get(); - } - } else if (element.attribute(RDFaAttributes.SRC.getName()) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. - Optional newSubjectResource = getResourceFromElementAttribute(element, RDFaAttributes.SRC.getName(), context); - if (newSubjectResource.isPresent()) { - newSubject = newSubjectResource.get(); - } - } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; - newSubject = context.getBaseIri(); - } else if (element.attribute(RDFaAttributes.TYPEOF.getName()) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. - newSubject = this.getValueFactory().createBNode(); - } else if(context.getParentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to that. - newSubject = context.getParentObjectResource(); - } - - // Then the [current object resource] is set to the URI obtained from the first match from the following rules: - if (element.attribute(RDFaAttributes.RESOURCE.getName()) != null) { // by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; - Optional newObjectResource = getResourceFromElementAttribute(element, RDFaAttributes.RESOURCE.getName(), context); - if (newObjectResource.isPresent()) { - currentObject = newObjectResource.get(); - } - } else if (element.attribute(RDFaAttributes.HREF.getName()) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. - Optional newObjectResource = getResourceFromElementAttribute(element, RDFaAttributes.RESOURCE.getName(), context); - if (newObjectResource.isPresent()) { - currentObject = newObjectResource.get(); - } - } - } - - // 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; - if(newSubject != null) { - if(element.attribute(RDFaAttributes.TYPEOF.getName()) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: - Optional typeIri = getResourceFromElementAttribute(element, RDFaAttributes.TYPEOF.getName(), context); - if (typeIri.isPresent()) { - Statement stat = this.getValueFactory().createStatement(newSubject, RDF.type.getIRI(), typeIri.get()); - this.getModel().add(stat); - } else { - throw new ParsingErrorException("Typeof statement uses unknown type " + element.attr(RDFaAttributes.TYPEOF.getName())); - } - } - } - - // 7. If in any of the previous steps a [current object resource] was set to a non-null value, it is now used to generate triples: - if (currentObject != null && (element.attribute(RDFaAttributes.REL.getName()) != null || element.attribute(RDFaAttributes.REV.getName()) != null)) { - if(element.attribute(RDFaAttributes.REL.getName()) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); - if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { - IRI property = (IRI) propertyOpt.get(); - this.getModel().add(newSubject, property, currentObject); - } - } - if(element.attribute(RDFaAttributes.REV.getName()) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); - if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { - IRI property = (IRI) propertyOpt.get(); - this.getModel().add(currentObject, property, newSubject); - } - } - } - - // 8. If however [current object resource] was set to null, but there are predicates present, then they must be stored as [incomplete triple]s, pending the discovery of a subject that can be used as the object. Also, [current object resource] should be set to a newly created [bnode]; - if (currentObject == null && (element.attribute(RDFaAttributes.REL.getName()) != null || element.attribute(RDFaAttributes.REV.getName()) != null)) { - currentObject = getValueFactory().createBNode(); - if(element.attribute(RDFaAttributes.REL.getName()) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); - if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { - IRI property = (IRI) propertyOpt.get(); - RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); - incompleteStatementSet.add(statement); - } - } - if(element.attribute(RDFaAttributes.REV.getName()) != null) { - Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.REL.getName(), context); - if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { - IRI property = (IRI) propertyOpt.get(); - RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); - statement.setBackward(); - incompleteStatementSet.add(statement); - } - } - } - - // 9. The next step of the iteration is to establish any [current object literal]; - if(element.attribute(RDFaAttributes.PROPERTY.getName()) != null) { // Predicates for the [current object literal] can be set by using @property. If present, one or more URIs are obtained according to the section on CURIE and URI Processing, and then the actual literal value is obtained as follows: - Optional propertyOpt = getResourceFromElementAttribute(element, RDFaAttributes.PROPERTY.getName(), context); - if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { - IRI property = (IRI)propertyOpt.get(); - - IRI datatype = null; - if(element.attribute(RDFaAttributes.DATATYPE.getName()) != null && ! element.attr(RDFaAttributes.DATATYPE.getName()).isEmpty()) { - Optional datatypeOpt = getResourceFromElementAttribute(element, RDFaAttributes.DATATYPE.getName(), context); - if(datatypeOpt.isPresent() && datatypeOpt.get().isIRI() && ! datatypeOpt.get().equals(RDF.XMLLiteral.getIRI())) { - datatype = (IRI) datatypeOpt.get(); - } - } - String value = element.text(); - if(element.attribute(RDFaAttributes.CONTENT.getName()) != null) { - value = element.attr(RDFaAttributes.CONTENT.getName()); - } - if(datatype != null) { - currentObjectLiteral = this.getValueFactory().createLiteral(value, datatype); - recursive = false; - } else if(language != null) { - currentObjectLiteral = this.getValueFactory().createLiteral(value, language); - } else { - currentObjectLiteral = this.getValueFactory().createLiteral(value); - } - this.getModel().add(newSubject, property, currentObjectLiteral); - } - } - - // 10. If the [skip element] flag is 'false', and [new subject] was set to a non-null value, then any [incomplete triple]s within the current context should be completed: - Iterator itStat = context.getIncompleteStatementIterator(); - while(itStat.hasNext()) { - RDFaIncompleteStatement statement = itStat.next(); - if(statement.isForward()) { - this.getModel().add(context.getParentSubjectResource(), statement.getPredicate(), newSubject); - } else if (statement.isBackward()){ - this.getModel().add(newSubject, statement.getPredicate(), context.getParentSubjectResource()); - } - } - - // 11. If the [recurse] flag is 'true', all elements that are children of the [current element] are processed using the rules described here, using a new [evaluation context], - if(recursive) { - if(skipElement) { - RDFa10EvaluationContext newContext = new fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext(context); - newContext.setLanguage(language); - newContext.setIriMappings(currentMappings); - context = newContext; - } else { - context = new RDFa10EvaluationContext(context.getBaseIri()); - if(newSubject != null) { - context.setParentObjectResource(newSubject); - } - if(currentObject != null) { - context.setParentObjectResource(currentObject); - } - context.setIriMappings(currentMappings); - context.setIncompleteStatements(incompleteStatementSet); - context.setLanguage(language); - } - - for (Element child : element.children()) { - processElement(child, context, recursive, skipElement); - } - } - } - - /** - * Surcharge function that initialize the flags and subject and objet to their initial values for processing - * - * @param element HTML element - * @param context current evaluation context - */ - private void processElement(Element element, fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa10EvaluationContext context) { - processElement(element, context, true, false); - } - - @Override - public void parse(Reader reader, String baseURI) { - InputStream inputStream = new ReaderInputStream(reader, StandardCharsets.UTF_8); - parse(inputStream , baseURI); - } - - private Optional getResourceFromElementAttribute(Element element, String attributeName, RDFaEvaluationContext context) { - if (element.attribute(attributeName) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; - String newSubjectString = element.attr(attributeName); - return resolveStringResource(newSubjectString, context); - } - return Optional.empty(); - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java similarity index 73% rename from src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 7ab3e26c6..7dc79c6c9 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11Parser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -2,17 +2,16 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; +import fr.inria.corese.core.next.impl.common.util.IRIUtils; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.common.vocabulary.RDFS; -import fr.inria.corese.core.next.impl.common.vocabulary.RDFa; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFa11EvaluationContext; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaAttributes; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaEvaluationContext; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaInitialPrefixes; -import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; @@ -39,14 +38,14 @@ * This parser does NOT support vocabulary expansion *

*/ -public class RDFa11Parser extends AbstractRDFaParser { +public class RDFaParser extends AbstractRDFParser { - private static final Logger logger = LoggerFactory.getLogger(RDFa11Parser.class); + private static final Logger logger = LoggerFactory.getLogger(RDFaParser.class); private static final String BASE_TAG = "base"; private static final String XMLNS_PREFIX = "xmlns"; - private RDFa11EvaluationContext currentContext = null; + private RDFaEvaluationContext currentContext = null; /** * Buffer for accumulating character data between start and end tags. @@ -68,13 +67,11 @@ public class RDFa11Parser extends AbstractRDFaParser { private boolean isRootElement = true; private Attributes currentElementAttributes = null; - private Model parsingModel = new CoreseModel(); - - public RDFa11Parser(Model model, ValueFactory factory) { + public RDFaParser(Model model, ValueFactory factory) { this(model, factory, new RDFaParserOptions.Builder().build()); } - public RDFa11Parser(Model model, ValueFactory factory, IOOptions config) { + public RDFaParser(Model model, ValueFactory factory, IOOptions config) { super(model, factory, config); } @@ -96,30 +93,19 @@ public void parse(InputStream in, String baseURIString) { @Override public RDFFormat getRDFFormat() { - return RDFFormat.RDFa_1_1; + return RDFFormat.RDFA; } @Override public void parse(Reader reader, String baseURI) { try { - this.currentContext = new RDFa11EvaluationContext(getValueFactory().createIRI(baseURI)); + this.currentContext = getNewContext(getValueFactory().createIRI(baseURI)); this.currentContext.setParentSubjectResource(this.currentContext.getBaseIri()); this.currentContext.setParentObjectResource(null); this.currentContext.setLanguage(null); - // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance - for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { - currentContext.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); - } - - //
https://www.w3.org/2011/rdfa-context/rdfa-1.1 sets a list of predefined terms mappings for RDFa contexts. - this.currentContext.addTermMapping("describedby", getValueFactory().createIRI("http://www.w3.org/2007/05/powder-s#describedby")); - this.currentContext.addTermMapping("license", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#license")); - this.currentContext.addTermMapping("role", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#role")); - this.currentContext.setDefaultVocabulary(null); - skipElement = false; newSubject = null; currentObjectResource = null; @@ -165,9 +151,7 @@ private void endProcessElement(String uri, String localName, String qName) { // The current element is examined for any change to the default vocabulary via @vocab. If @vocab is present and contains a value, the local default vocabulary is updated according to the section on CURIE and IRI Processing. If the value is empty, then the local default vocabulary MUST be reset to the Host Language defined default (if any). if (this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()) != null && !this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()).isEmpty()) { - String vocabValue = this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()); - localDefaultVocabulary = vocabValue; - parsingModel.add(this.currentContext.getBaseIri(), RDFa.usesVocabulary.getIRI(), getValueFactory().createLiteral(vocabValue)); + localDefaultVocabulary = this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()); } // The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; @@ -207,14 +191,19 @@ private void endProcessElement(String uri, String localName, String qName) { if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { this.typedResource = this.newSubject; } else if (isRootElement) { - this.typedResource = resolveStringResource("", this.currentContext).get(); + Optional emptyAboutResource = resolveStringResource(""); + if (emptyAboutResource.isPresent()) { + this.typedResource = emptyAboutResource.get(); + } else { + throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); + } } else { if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { this.newSubject = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.HREF); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.HREF); } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.SRC); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.SRC); } else { this.typedResource = getValueFactory().createBNode(); } @@ -228,17 +217,22 @@ private void endProcessElement(String uri, String localName, String qName) { && this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null && this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.ABOUT); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.ABOUT); } else if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.RESOURCE); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.HREF); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.HREF); } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.SRC); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.SRC); } } else { if (isRootElement) { - this.newSubject = resolveStringResource("", this.currentContext).get(); + Optional emptyAboutResource = resolveStringResource(""); + if (emptyAboutResource.isPresent()) { + this.newSubject = emptyAboutResource.get(); + } else { + throw new ParsingErrorException("Expected to be able to generate newSubject from empty CURIE"); + } } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { this.newSubject = getValueFactory().createBNode(); } else if (this.currentContext.getParentObjectResource() != null) { @@ -258,24 +252,29 @@ private void endProcessElement(String uri, String localName, String qName) { if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { - this.newSubject = getResourceFromElementAttribute( RDFaAttributes.ABOUT); + this.newSubject = getResourceFromElementAttribute(RDFaAttributes.ABOUT); } if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { this.typedResource = this.newSubject; } if (this.newSubject == null) { if (isRootElement) { - this.typedResource = resolveStringResource("", this.currentContext).get(); + Optional emptyAboutResource = resolveStringResource(""); + if (emptyAboutResource.isPresent()) { + this.typedResource = emptyAboutResource.get(); + } else { + throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); + } } else if (this.currentContext.getParentObjectResource() != null) { this.newSubject = this.currentContext.getParentObjectResource(); } } if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - this.currentObjectResource = getResourceFromElementAttribute( RDFaAttributes.RESOURCE); + this.currentObjectResource = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - this.currentObjectResource = getResourceFromElementAttribute( RDFaAttributes.HREF); + this.currentObjectResource = getResourceFromElementAttribute(RDFaAttributes.HREF); } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - this.currentObjectResource = getResourceFromElementAttribute( RDFaAttributes.SRC); + this.currentObjectResource = getResourceFromElementAttribute(RDFaAttributes.SRC); } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null) { this.currentObjectResource = this.getValueFactory().createBNode(); @@ -289,7 +288,7 @@ private void endProcessElement(String uri, String localName, String qName) { // If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; if (this.typedResource != null) { - Resource typeIri = getResourceFromElementAttribute( RDFaAttributes.TYPEOF); + Resource typeIri = getResourceFromElementAttribute(RDFaAttributes.TYPEOF); this.getModel().add(this.typedResource, RDF.type.getIRI(), typeIri); } @@ -302,7 +301,7 @@ private void endProcessElement(String uri, String localName, String qName) { if (this.currentObjectResource != null) { if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null && this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { - IRI relResource = (IRI) getResourceFromElementAttribute( RDFaAttributes.REL); + IRI relResource = (IRI) getResourceFromElementAttribute(RDFaAttributes.REL); if (!localListMappings.containsKey(relResource)) { this.localListMappings.put(relResource, new HashSet<>()); } @@ -310,7 +309,7 @@ private void endProcessElement(String uri, String localName, String qName) { } if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) == null) { if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { - Resource relResource = getResourceFromElementAttribute( RDFaAttributes.REL); + Resource relResource = getResourceFromElementAttribute(RDFaAttributes.REL); if (relResource.isIRI()) { this.getModel().add(newSubject, (IRI) relResource, currentObjectResource); } else { @@ -318,7 +317,7 @@ private void endProcessElement(String uri, String localName, String qName) { } } if (this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { - Resource revResource = getResourceFromElementAttribute( RDFaAttributes.REV); + Resource revResource = getResourceFromElementAttribute(RDFaAttributes.REV); if (!revResource.isIRI()) { throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); } @@ -334,29 +333,29 @@ private void endProcessElement(String uri, String localName, String qName) { if (this.currentObjectResource == null) { this.currentObjectResource = getValueFactory().createBNode(); if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { - if(! getResourceFromElementAttribute( RDFaAttributes.REL).isIRI()) { + if (!getResourceFromElementAttribute(RDFaAttributes.REL).isIRI()) { throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); } - IRI relIRI = (IRI) getResourceFromElementAttribute( RDFaAttributes.REL); - if(this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { // TODO: Step to be double checked, standard unclear + IRI relIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.REL); + if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { if (!localListMappings.containsKey(relIRI)) { this.localListMappings.put(relIRI, new HashSet<>()); } - this.localListMappings.get(relIRI).add(this.currentObjectResource); + this.localIncompleteStatements.add(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.NONE)); } else { this.localIncompleteStatements.add(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); } } else if (this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { - if(! getResourceFromElementAttribute( RDFaAttributes.REV).isIRI()) { + if (!getResourceFromElementAttribute(RDFaAttributes.REV).isIRI()) { throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); } - IRI revIRI = (IRI) getResourceFromElementAttribute( RDFaAttributes.REV); + IRI revIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.REV); this.localIncompleteStatements.add(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); } } // The next step of the iteration is to establish any current property value; - if(this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) != null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) != null) { IRI propertyIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.PROPERTY); Value currentPropertyValue = null; if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null @@ -381,20 +380,20 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge currentPropertyValue = getValueFactory().createLiteral(contentString); this.characters = new StringBuilder(); } - //} else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null - // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE).isIRI() - // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { + //} else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null + // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE).isIRI() + // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { } else if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); currentPropertyValue = getValueFactory().createLiteral(contentString); } else if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) == null && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) == null && this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) == null) { - if(this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { + if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); - } else if(this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { + } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.HREF); - } else if(this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { + } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.SRC); } } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null @@ -402,8 +401,8 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge currentPropertyValue = typedResource; } else { String contentString = this.characters.toString().trim(); - if(this.currentLanguage != null - && ! this.currentLanguage.isEmpty()) { + if (this.currentLanguage != null + && !this.currentLanguage.isEmpty()) { currentPropertyValue = getValueFactory().createLiteral(contentString, this.currentLanguage); } else { currentPropertyValue = getValueFactory().createLiteral(contentString); @@ -411,8 +410,8 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge this.characters = new StringBuilder(); } - if(this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { - if(! this.localListMappings.containsKey(propertyIRI)) { + if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { + if (!this.localListMappings.containsKey(propertyIRI)) { this.localListMappings.put(propertyIRI, new HashSet<>()); } this.localListMappings.get(propertyIRI).add(currentPropertyValue); @@ -422,14 +421,14 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge } // If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: - if(! skipElement + if (!skipElement && newSubject != null) { - for(RDFaIncompleteStatement incompleteStatement : this.currentContext.getIncompleteStatement()) { - if(incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { + for (RDFaIncompleteStatement incompleteStatement : this.currentContext.getIncompleteStatement()) { + if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { localListMappings.get(incompleteStatement.getPredicate()).add(newSubject); - } else if(incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { + } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { this.getModel().add(currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), newSubject); - } else if(incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { + } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { this.getModel().add(newSubject, incompleteStatement.getPredicate(), currentContext.getParentSubjectResource()); } } @@ -437,22 +436,26 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge // Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: Map> oldListMappings = currentContext.getListMappings(); - if(skipElement) { - currentContext = new RDFa11EvaluationContext(this.currentContext); + if (skipElement) { + currentContext = new RDFaEvaluationContext(this.currentContext); + currentContext.clearIriMappings(); + initializeNewContext(currentContext); currentContext.setLanguage(currentLanguage); - currentContext.setIriMappings(localIRIMappings); + currentContext.addIriMappings(localIRIMappings); } else { Resource oldParentSubject = this.currentContext.getParentSubjectResource(); - currentContext = new RDFa11EvaluationContext(this.currentContext.getBaseIri()); + currentContext = new RDFaEvaluationContext(this.currentContext.getBaseIri()); + initializeNewContext(currentContext); currentContext.setParentSubjectResource(newSubject); - if(currentObjectResource != null) { + if (currentObjectResource != null) { currentContext.setParentObjectResource(currentObjectResource); - } if (newSubject != null) { + } + if (newSubject != null) { currentContext.setParentObjectResource(newSubject); } else { currentContext.setParentObjectResource(oldParentSubject); } - currentContext.setIriMappings(localIRIMappings); + currentContext.addIriMappings(localIRIMappings); currentContext.setIncompleteStatements(localIncompleteStatements); currentContext.setListMappings(localListMappings); currentContext.setLanguage(currentLanguage); @@ -460,23 +463,23 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge } // Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: - for(Map.Entry> listMapping : localListMappings.entrySet()) { + for (Map.Entry> listMapping : localListMappings.entrySet()) { IRI propertyIRI = listMapping.getKey(); Set propertyList = listMapping.getValue(); - if(!oldListMappings.containsKey(propertyIRI)) { - if(propertyList.isEmpty()) { + if (!oldListMappings.containsKey(propertyIRI)) { + if (propertyList.isEmpty()) { getModel().add(newSubject, propertyIRI, RDF.nil.getIRI()); } else { ArrayList bnodes = new ArrayList<>(); - for(int i = 0; i < propertyList.size(); i++) { + for (int i = 0; i < propertyList.size(); i++) { bnodes.add(getValueFactory().createBNode()); } int bnodeIndex = 0; - for(Value listElement : propertyList) { + for (Value listElement : propertyList) { BNode elementNode = bnodes.get(bnodeIndex); Resource nextElementNode = RDF.nil.getIRI(); - if(bnodeIndex < bnodes.size() - 1) { + if (bnodeIndex < bnodes.size() - 1) { nextElementNode = bnodes.get(bnodeIndex + 1); } getModel().add(elementNode, RDF.first.getIRI(), listElement); @@ -498,12 +501,12 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge private class XMLSaxHandler extends DefaultHandler { @Override public void characters(char[] ch, int start, int length) { - RDFa11Parser.this.handleCharacters(ch, start, length); + RDFaParser.this.handleCharacters(ch, start, length); } @Override public void startPrefixMapping(String prefix, String uri) { - RDFa11Parser.this.addPrefix(prefix, uri); + RDFaParser.this.addPrefix(prefix, uri); } @Override @@ -550,10 +553,89 @@ private IRI getPrefixIriFromDeclaration(String declaration) { private Resource getResourceFromElementAttribute(RDFaAttributes attribute) { String attributeValue = this.currentElementAttributes.getValue(attribute.getName()); - if (resolveStringResource(attributeValue, this.currentContext).isPresent()) { - return resolveStringResource(attributeValue, this.currentContext).get(); + Optional resourceResolution = resolveStringResource(attributeValue); + if (resourceResolution.isPresent()) { + return resourceResolution.get(); } else { throw new ParsingErrorException("Could not parse @" + attribute.getName() + " value: " + attributeValue); } } + + + /** + * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI + * + * @param stringResource the resource as stored in the attribute of the HTML element + * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise + */ + protected Optional resolveStringResource(String stringResource) { + String resultString = stringResource; + if (resultString.startsWith("[") && resultString.endsWith("]")) { + resultString = resultString.replaceFirst("\\[", ""); + resultString = resultString.replaceFirst("]", ""); + } + + + if (stringUriIsCURIE(resultString)) { // CURIE + int colonIndex = resultString.indexOf(":"); + String prefixString = resultString.substring(0, colonIndex); + String localNameString = resultString.substring(colonIndex + 1); + // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri + if (currentContext.hasIriMapping(prefixString)) { + IRI namespaceIRI = currentContext.getIriMapping(prefixString); + + return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); + } else if (localIRIMappings.containsKey(prefixString)) { + IRI namespaceIRI = localIRIMappings.get(prefixString); + + return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); + } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI + return Optional.of(this.getValueFactory().createIRI(currentContext.getBaseIri().stringValue(), localNameString)); + } else { + logger.info("{} context mappings", currentContext.getIriMappings().size()); + logger.info("{} local mappings", localIRIMappings.size()); + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); + } + } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI + return Optional.of(this.getValueFactory().createIRI(resultString)); + + } else if (resultString.startsWith("_:")) { // Blank Node + int colonIndex = resultString.indexOf(":"); + String localNameString = resultString.substring(colonIndex + 1); + return Optional.of(this.getValueFactory().createBNode(localNameString)); + } else if (IRIUtils.isStandardIRI(currentContext.getBaseIri().stringValue() + resultString)) { + String concatenatedRelativeUri = currentContext.getBaseIri().stringValue() + resultString; + return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); + } + return Optional.empty(); + } + + /** + * Equivalent to test if it contains a colon, and it is not a blank node + * + * @param stringIri Attribute or text value + * @return true if it is a valid CURIE + */ + protected boolean stringUriIsCURIE(String stringIri) { + int colonIndex = stringIri.indexOf(":"); + return colonIndex > -1 && !stringIri.contains("://") && !stringIri.startsWith("_:") && !stringIri.startsWith("[_:"); + } + + private RDFaEvaluationContext getNewContext(IRI baseIRI) { + RDFaEvaluationContext result = new RDFaEvaluationContext(baseIRI); + initializeNewContext(result); + return result; + } + + private void initializeNewContext(RDFaEvaluationContext context) { + // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance + for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { + context.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); + } + + // https://www.w3.org/2011/rdfa-context/rdfa-1.1 sets a list of predefined terms mappings for RDFa contexts. + context.addTermMapping("describedby", getValueFactory().createIRI("http://www.w3.org/2007/05/powder-s#describedby")); + context.addTermMapping("license", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#license")); + context.addTermMapping("role", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#role")); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java deleted file mode 100644 index 59e6ffd9a..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/AbstractRDFaEvaluationContext.java +++ /dev/null @@ -1,160 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa.model; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Value; - -import java.util.*; - -public abstract class AbstractRDFaEvaluationContext implements RDFaEvaluationContext { - - /** - * The base. This will usually be the IRI of the document being processed, but it could be some other IRI, set by some other mechanism, such as the (X)HTML base element. The important thing is that it establishes an IRI against which relative paths can be resolved. - */ - private IRI baseIri; - - /** - * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. - */ - private Resource parentSubjectResource ; - - /** - * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. - */ - private Resource parentObjectResource = null; - - /** - * An index of locally defined IRI prefixes - */ - private Map iriMappings = new HashMap<>(); - - /** - * Set of statement in the process of building. - */ - private Set incompleteStatement = new HashSet<>(); - - /** - * The language of the document. Note that there is no default language. - */ - private String language = null; - - protected AbstractRDFaEvaluationContext(IRI baseIri) { - this(baseIri, baseIri); - } - - protected AbstractRDFaEvaluationContext(IRI baseIri, IRI parentSubjectResource) { - this.baseIri = baseIri; - this.parentSubjectResource = parentSubjectResource; - } - - protected AbstractRDFaEvaluationContext(AbstractRDFaEvaluationContext context) { - this.baseIri = context.baseIri; - this.parentSubjectResource = context.parentSubjectResource; - this.parentObjectResource = context.parentObjectResource; - this.iriMappings = new HashMap<>(context.iriMappings); - this.incompleteStatement = new HashSet<>(context.incompleteStatement); - this.language = context.language; - } - - @Override - public IRI getBaseIri() { - return baseIri; - } - - @Override - public void setBaseIri(IRI baseIri) { - this.baseIri = baseIri; - } - - @Override - public Resource getParentSubjectResource() { - return parentSubjectResource; - } - - @Override - public void setParentSubjectResource(Resource parentSubjectResource) { - this.parentSubjectResource = parentSubjectResource; - } - - @Override - public Resource getParentObjectResource() { - return parentObjectResource; - } - - @Override - public void setParentObjectResource(Resource parentObjectResource) { - this.parentObjectResource = parentObjectResource; - } - - @Override - public Map getIriMappings() { - return iriMappings; - } - - @Override - public void setIriMappings(Map iriMappings) { - this.iriMappings = iriMappings; - } - - @Override - public boolean hasIriMapping(String prefix) { - return this.iriMappings.containsKey(prefix); - } - - /** - * @param prefix the prefix WITHOUT ":" - * @return the IRI associated to the prefix in this context - */ - @Override - public IRI getIriMapping(String prefix) { - return this.iriMappings.get(prefix); - } - - @Override - public void addIriMapping(String prefix, IRI prefixIri) { - this.iriMappings.put(prefix, prefixIri); - } - - @Override - public Set getIncompleteStatement() { - return incompleteStatement; - } - - @Override - public void setIncompleteStatements(Set incompleteStatement) { - this.incompleteStatement = incompleteStatement; - } - - @Override - public Iterator getIncompleteStatementIterator() { - return this.incompleteStatement.iterator(); - } - - @Override - public void addStatementWithoutSubject(IRI property, Value object) { - RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); - newStatement.setObject(object); - this.incompleteStatement.add(newStatement); - } - - @Override - public void addStatementWithoutObject(Resource subject, IRI property) { - RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); - newStatement.setSubject(subject); - this.incompleteStatement.add(newStatement); - } - - public void clearIncompleteStatements() { - this.incompleteStatement.clear(); - } - - @Override - public String getLanguage() { - return language; - } - - @Override - public void setLanguage(String language) { - this.language = language; - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java deleted file mode 100644 index ca9406762..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa10EvaluationContext.java +++ /dev/null @@ -1,51 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa.model; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Value; - -import java.util.*; - -/** - * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. - * @see RDFa recommandation - */ -public class RDFa10EvaluationContext extends AbstractRDFaEvaluationContext { - - public RDFa10EvaluationContext(IRI baseIri) { - this(baseIri, baseIri); - } - - public RDFa10EvaluationContext(IRI baseIri, IRI parentSubjectResource) { - super(baseIri, parentSubjectResource); - } - - public RDFa10EvaluationContext(RDFa10EvaluationContext context) { - super(context); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); - sb.append("Mappings: ["); - this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); - sb.append("] "); - if(this.getParentSubjectResource() != null) { - sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); - } else { - sb.append("Subject:").append((Object) null).append(" "); - } - if(this.getParentObjectResource() != null) { - sb.append("Object: ").append(this.getParentObjectResource().stringValue()).append(" "); - } else { - sb.append("Object: ").append((Object) null).append(" "); - } - if(! this.getIncompleteStatement().isEmpty()) { - sb.append(this.getIncompleteStatement().size()).append(" incomplete statements."); - } - - return sb.toString(); - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java deleted file mode 100644 index 36c3c82d0..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFa11EvaluationContext.java +++ /dev/null @@ -1,101 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa.model; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Value; - -import java.util.*; - -/** - * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. - * @see RDFa recommandation - */ -public class RDFa11EvaluationContext extends AbstractRDFaEvaluationContext { - - /** - * A list mapping that associates IRIs with lists. - */ - private Map> listMappings = new HashMap<>(); - /** - * The language. Note that there is no default language. - */ - private String language = null; - /** - * The term mappings, a list of terms and their associated IRIs. This specification does not define an initial list. Host Languages MAY define an initial list. - */ - private Map termMappings = new HashMap<>(); - - /** - * The default vocabulary, a value to use as the prefix IRI when a term unknown to the RDFa Processor is used. This specification does not define an initial setting for the default vocabulary. Host Languages MAY define an initial setting. - */ - private String defaultVocabulary = null; - - - public RDFa11EvaluationContext(IRI baseIri) { - this(baseIri, baseIri); - } - - public RDFa11EvaluationContext(IRI baseIri, IRI parentSubjectResource) { - super(baseIri, parentSubjectResource); - } - - public RDFa11EvaluationContext(RDFa11EvaluationContext context) { - super(context); - this.listMappings = new HashMap<>(context.listMappings); - this.termMappings = new HashMap<>(context.termMappings); - this.defaultVocabulary = context.defaultVocabulary;; - } - - public String getDefaultVocabulary() { - return defaultVocabulary; - } - - public void setDefaultVocabulary(String defaultVocabulary) { - this.defaultVocabulary = defaultVocabulary; - } - - public void addTermMapping(String term, IRI iri) { - this.termMappings.put(term, iri); - } - - public IRI getTermMapping(String term) { - return this.termMappings.get(term); - } - - public Map getTermMappings() { - return this.termMappings; - } - - public Map> getListMappings() { - return listMappings; - } - - public void setListMappings(Map> listMappings) { - this.listMappings = listMappings; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); - sb.append("Mappings: ["); - this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); - sb.append("] "); - if(this.getParentSubjectResource() != null) { - sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); - } else { - sb.append("Subject:").append((Object) null).append(" "); - } - if(this.getParentObjectResource() != null) { - sb.append("Object: ").append(this.getParentObjectResource().stringValue()).append(" "); - } else { - sb.append("Object: ").append((Object) null).append(" "); - } - if(! this.getIncompleteStatement().isEmpty()) { - sb.append(this.getIncompleteStatement().size()).append(" incomplete statements."); - } - - return sb.toString(); - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java index 438f70537..6a11a4809 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -4,35 +4,214 @@ import fr.inria.corese.core.next.api.Resource; import fr.inria.corese.core.next.api.Value; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; +import java.util.*; -public interface RDFaEvaluationContext { +/** + * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. + * @see RDFa recommandation + */ +public class RDFaEvaluationContext { + /** + * The base. This will usually be the IRI of the document being processed, but it could be some other IRI, set by some other mechanism, such as the (X)HTML base element. The important thing is that it establishes an IRI against which relative paths can be resolved. + */ + private IRI baseIri; - IRI getBaseIri(); - void setBaseIri(IRI baseIri); + /** + * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. + */ + private Resource parentSubjectResource ; - void setParentSubjectResource(Resource parentSubjectResource); - Resource getParentSubjectResource(); + /** + * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. + */ + private Resource parentObjectResource = null; - void setParentObjectResource(Resource parentObjectResource); - Resource getParentObjectResource(); + /** + * An index of locally defined IRI prefixes + */ + private Map iriMappings = new HashMap<>(); - void setIncompleteStatements(Set incompleteStatement); - Set getIncompleteStatement(); - Iterator getIncompleteStatementIterator(); - void addStatementWithoutSubject(IRI property, Value object); - void addStatementWithoutObject(Resource subject, IRI property); - void clearIncompleteStatements(); + /** + * Set of statement in the process of building. + */ + private Set incompleteStatement = new HashSet<>(); - boolean hasIriMapping(String prefix); - IRI getIriMapping(String prefix); - Map getIriMappings(); - void addIriMapping(String prefix, IRI prefixIri); - void setIriMappings(Map iriMappings); + /** + * The language of the document. Note that there is no default language. + */ + private String language = null; + /** + * A list mapping that associates IRIs with lists. + */ + private Map> listMappings = new HashMap<>(); + /** + * The term mappings, a list of terms and their associated IRIs. This specification does not define an initial list. Host Languages MAY define an initial list. + */ + private Map termMappings = new HashMap<>(); - String getLanguage(); - void setLanguage(String language); + /** + * The default vocabulary, a value to use as the prefix IRI when a term unknown to the RDFa Processor is used. This specification does not define an initial setting for the default vocabulary. Host Languages MAY define an initial setting. + */ + private String defaultVocabulary = null; + public RDFaEvaluationContext(IRI baseIri) { + this.baseIri = baseIri; + } + + public RDFaEvaluationContext(RDFaEvaluationContext context) { + this.baseIri = context.baseIri; + this.parentSubjectResource = context.parentSubjectResource; + this.parentObjectResource = context.parentObjectResource; + this.iriMappings = new HashMap<>(context.iriMappings); + this.incompleteStatement = new HashSet<>(context.incompleteStatement); + this.language = context.language; + this.listMappings = new HashMap<>(context.listMappings); + this.termMappings = new HashMap<>(context.termMappings); + this.defaultVocabulary = context.defaultVocabulary;; + } + + public IRI getBaseIri() { + return baseIri; + } + + public void setBaseIri(IRI baseIri) { + this.baseIri = baseIri; + } + + public Resource getParentSubjectResource() { + return parentSubjectResource; + } + + public void setParentSubjectResource(Resource parentSubjectResource) { + this.parentSubjectResource = parentSubjectResource; + } + + public Resource getParentObjectResource() { + return parentObjectResource; + } + + public void setParentObjectResource(Resource parentObjectResource) { + this.parentObjectResource = parentObjectResource; + } + + public Map getIriMappings() { + return iriMappings; + } + + public void setIriMappings(Map iriMappings) { + this.iriMappings = iriMappings; + } + + public boolean hasIriMapping(String prefix) { + return this.iriMappings.containsKey(prefix); + } + + /** + * @param prefix the prefix WITHOUT ":" + * @return the IRI associated to the prefix in this context + */ + public IRI getIriMapping(String prefix) { + return this.iriMappings.get(prefix); + } + + public void addIriMapping(String prefix, IRI prefixIri) { + this.iriMappings.put(prefix, prefixIri); + } + + public void addIriMappings(Map otherMappings) { + this.iriMappings.putAll(otherMappings); + } + + public void clearIriMappings() { + this.iriMappings.clear(); + } + + public Set getIncompleteStatement() { + return incompleteStatement; + } + + public void setIncompleteStatements(Set incompleteStatement) { + this.incompleteStatement = incompleteStatement; + } + + public Iterator getIncompleteStatementIterator() { + return this.incompleteStatement.iterator(); + } + + public void addStatementWithoutSubject(IRI property, Value object) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setObject(object); + this.incompleteStatement.add(newStatement); + } + + public void addStatementWithoutObject(Resource subject, IRI property) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setSubject(subject); + this.incompleteStatement.add(newStatement); + } + + public void clearIncompleteStatements() { + this.incompleteStatement.clear(); + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + + public String getDefaultVocabulary() { + return defaultVocabulary; + } + + public void setDefaultVocabulary(String defaultVocabulary) { + this.defaultVocabulary = defaultVocabulary; + } + + public void addTermMapping(String term, IRI iri) { + this.termMappings.put(term, iri); + } + + public IRI getTermMapping(String term) { + return this.termMappings.get(term); + } + + public Map getTermMappings() { + return this.termMappings; + } + + public Map> getListMappings() { + return listMappings; + } + + public void setListMappings(Map> listMappings) { + this.listMappings = listMappings; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); + sb.append("Mappings: ["); + this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); + sb.append("] "); + if(this.getParentSubjectResource() != null) { + sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); + } else { + sb.append("Subject:").append((Object) null).append(" "); + } + if(this.getParentObjectResource() != null) { + sb.append("Object: ").append(this.getParentObjectResource().stringValue()).append(" "); + } else { + sb.append("Object: ").append((Object) null).append(" "); + } + if(! this.getIncompleteStatement().isEmpty()) { + sb.append(this.getIncompleteStatement().size()).append(" incomplete statements."); + } + + return sb.toString(); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java index 6dc5ce248..814b06ed1 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java @@ -104,7 +104,7 @@ public enum RDFaInitialPrefixes implements Namespace { private final String prefix; private final String name; - RDFaInitialPrefixes(String name, String prefix) { + RDFaInitialPrefixes(String prefix, String name) { this.name = name; this.prefix = prefix; } diff --git a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java index 540f8d847..949c0408c 100644 --- a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java @@ -306,7 +306,7 @@ void allFormats() { assertTrue(allFormats.contains(RDFFormat.JSONLD)); assertTrue(allFormats.contains(RDFFormat.RDFXML)); assertTrue(allFormats.contains(RDFFormat.TRIG)); - assertTrue(allFormats.contains(RDFFormat.RDFa_1_0)); + assertTrue(allFormats.contains(RDFFormat.RDFA)); assertTrue(allFormats.contains(RDFFormat.RDFC_1_0)); assertThrows(UnsupportedOperationException.class, () -> allFormats.add(RDFFormat.TURTLE), diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java index 3ee8a809c..47394b257 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java @@ -20,231 +20,4 @@ public class RDFa10ParserTest { private static final ValueFactory factory = new CoreseAdaptedValueFactory(); - @Test - public void basicBaseTest() { - String testDataString = """ - - - - - - Test 0001 - - -

This photo was taken by Mark Birbeck.

- -"""; - - Model testModel = new CoreseModel(); - Model referenceModel = new CoreseModel(); - - IRI subject = factory.createIRI("http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/photo1.jpg"); - IRI predicate = factory.createIRI("http://purl.org/dc/elements/1.1/creator"); - Literal object = factory.createLiteral("Mark Birbeck"); - - referenceModel.add(subject, predicate, object); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes())); - - assertEquals(RDFFormat.RDFa_1_0, parser.getRDFFormat()); - assertEquals(referenceModel.size(), testModel.size()); - Iterator itStatementRef = referenceModel.iterator(); - Iterator itStatementTest = testModel.iterator(); - while(itStatementRef.hasNext() && itStatementTest.hasNext()) { - Statement statementRef = itStatementRef.next(); - Statement statementTest = itStatementTest.next(); - assertEquals(statementRef.getSubject(), statementTest.getSubject()); - assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); - assertEquals(statementRef.getObject(), statementTest.getObject()); - assertEquals(statementRef.getContext(), statementTest.getContext()); - } - assertTrue(testModel.contains(subject, predicate, object)); - } - - @Test - public void aboutTest() { - String testDataString = """ - - - - -

- Hello, I'm Pierre. -

- - """; - - Model testModel = new CoreseModel(); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); - - IRI subject = factory.createIRI("http://w3id.org/people/pierre-maillot"); - IRI object = factory.createIRI("http://xmlns.com/foaf/0.1/Person"); - - assertEquals(1, testModel.size()); - assertTrue(testModel.contains(subject, RDF.type.getIRI(), object)); - } - - @Test - public void basicIRItoIRITest() { - String testDataString = """ - - - - - -
-
-
- - - """; - - Model testModel = new CoreseModel(); - Model referenceModel = new CoreseModel(); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); - - IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); - IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); - IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); - - Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); - - referenceModel.add(aeBirthPlaceStatement); - - assertEquals(1, testModel.size()); - assertEquals(referenceModel, testModel); - assertTrue(referenceModel.containsAll(testModel)); - } - - @Test - public void basicIRItoStringTest() { - String testDataString = """ - - - - - -
- Albert Einstein -
- - - """; - - Model testModel = new CoreseModel(); - Model referenceModel = new CoreseModel(); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); - - IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); - IRI foafName = factory.createIRI("http://xmlns.com/foaf/0.1/name"); - Literal aeName = factory.createLiteral("Albert Einstein"); - - Statement aeNameStatement = factory.createStatement(albertEinstein, foafName, aeName); - - referenceModel.add(aeNameStatement); - - assertEquals(1, testModel.size()); - assertEquals(referenceModel, testModel); - assertTrue(referenceModel.containsAll(testModel)); - - } - - @Test - public void basicIRItoTypedLiteralTest() { - String testDataString = """ - - - - - -
- 1879-03-14 -
- - - """; - - Model testModel = new CoreseModel(); - Model referenceModel = new CoreseModel(); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); - - IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); - IRI dateOfBirth = factory.createIRI("http://dbpedia.org/property/dateOfBirth"); - Literal aeDateOfBirth = factory.createLiteral("1879-03-14", XSD.xsdDate.getIRI()); - - Statement aeDateOfBirthStatement = factory.createStatement(albertEinstein, dateOfBirth, aeDateOfBirth); - - referenceModel.add(aeDateOfBirthStatement); - - assertEquals(1, testModel.size()); - assertEquals(referenceModel.size(), testModel.size()); - Iterator itStatementRef = referenceModel.iterator(); - Iterator itStatementTest = testModel.iterator(); - while(itStatementRef.hasNext() && itStatementTest.hasNext()) { - Statement statementRef = itStatementRef.next(); - Statement statementTest = itStatementTest.next(); - assertEquals(statementRef.getSubject(), statementTest.getSubject()); - assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); - assertEquals(statementRef.getObject(), statementTest.getObject()); - assertEquals(statementRef.getContext(), statementTest.getContext()); - } - assertTrue(referenceModel.containsAll(testModel)); - } - - @Test - public void basicChainTest() { - String testDataString = """ - - - - - -
-
- Federal Republic of Germany -
- - - """; - - Model testModel = new CoreseModel(); - Model referenceModel = new CoreseModel(); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa_1_0, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); - - IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); - IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); - IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); - IRI conventionalLongName = factory.createIRI("http://dbpedia.org/property/conventionalLongName"); - Literal gerLongName = factory.createLiteral("Federal Republic of Germany"); - - Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); - Statement germanyNameStatement = factory.createStatement(germany, conventionalLongName, gerLongName); - - referenceModel.add(aeBirthPlaceStatement); - referenceModel.add(germanyNameStatement); - - assertEquals(2, testModel.size()); - assertEquals(referenceModel, testModel); - assertTrue(referenceModel.containsAll(testModel)); - - } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java deleted file mode 100644 index 538fa6b13..000000000 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa11ParserTest.java +++ /dev/null @@ -1,27 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa; - -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.api.base.io.RDFFormat; -import fr.inria.corese.core.next.api.io.parser.RDFParser; -import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; -import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.*; - -class RDFa11ParserTest { - - - @Test - void getRDFFormat() { - Model model = new CoreseModel(); - ValueFactory factory = new CoreseAdaptedValueFactory(); - RDFParser parser = new RDFa11Parser(model, factory); - assertEquals(RDFFormat.RDFa_1_1, parser.getRDFFormat()); - } - - @Test - void parse() { - } -} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java new file mode 100644 index 000000000..90ea19d90 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -0,0 +1,337 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.vocabulary.XSD; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.*; + +class RDFaParserTest { + + private ParserFactory parserFactory = new ParserFactory(); + private ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + private final String defaultTurtlePrefixes = """ + @prefix bibo: . + @prefix cc: . + @prefix dbp: . + @prefix dbp-owl: . + @prefix dbr: . + @prefix dc: . + @prefix ex: . + @prefix foaf: . + @prefix owl: . + @prefix rdf: . + @prefix rdfa: . + @prefix rdfs: . + @prefix xhv: . + @prefix xsd: . + """; + + @Test + void getRDFFormat() { + Model model = new CoreseModel(); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser parser = new RDFaParser(model, factory); + assertEquals(RDFFormat.RDFA, parser.getRDFFormat()); + } + + @Test + void parseCurrentSubjectCreatorHead() { + String currentSubjectXHTML = """ + + + Jo's Friends and Family Blog + + + + + ... + + + """; + String currentSubjectNTriples = defaultTurtlePrefixes + """ + <> foaf:primaryTopic <#bbq> . + <> dc:creator "Jo" . + """; + + Model parsedModel = new CoreseModel(); + Model resultModel = new CoreseModel(); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser testedParser = new RDFaParser(parsedModel, factory); + RDFParser resultParser = parserFactory.createRDFParser(RDFFormat.TURTLE, resultModel, valueFactory); + + assertEquals(RDFFormat.RDFA, testedParser.getRDFFormat()); + + resultParser.parse(new ByteArrayInputStream(currentSubjectNTriples.getBytes())); + testedParser.parse(new ByteArrayInputStream(currentSubjectXHTML.getBytes())); + + assertEquals(resultModel.size(), parsedModel.size()); + Iterator itStatementRef = resultModel.iterator(); + Iterator itStatementTest = parsedModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + } + + @Test + void parseCurrentSubjectCreatorMiddle() { + String currentSubjectXHTML = """ + + + Jo's Blog + + +

Jo's blog

+

+ Welcome to my blog. +

+ + + """; + String currentSubjectNTriples = """ + <> dc:creator "Jo" . + """; + } + + @Test + public void basicBaseTest() { + String testDataString = """ + + + + + + Test 0001 + + +

This photo was taken by Mark Birbeck.

+ +"""; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + IRI subject = valueFactory.createIRI("http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/photo1.jpg"); + IRI predicate = valueFactory.createIRI("http://purl.org/dc/elements/1.1/creator"); + Literal object = valueFactory.createLiteral("Mark Birbeck"); + + referenceModel.add(subject, predicate, object); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes())); + + assertEquals(RDFFormat.RDFA, parser.getRDFFormat()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + assertTrue(testModel.contains(subject, predicate, object)); + } + + @Test + public void aboutTest() { + String testDataString = """ + + + + +

+ Hello, I'm Pierre. +

+ + """; + + Model testModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI subject = valueFactory.createIRI("http://w3id.org/people/pierre-maillot"); + IRI object = valueFactory.createIRI("http://xmlns.com/foaf/0.1/Person"); + + assertEquals(1, testModel.size()); + assertTrue(testModel.contains(subject, RDF.type.getIRI(), object)); + } + + @Test + public void basicIRItoIRITest() { + String testDataString = """ + + + + + +
+
+
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = valueFactory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI birthPlace = valueFactory.createIRI("http://dbpedia.org/property/birthPlace"); + IRI germany = valueFactory.createIRI("http://dbpedia.org/resource/Germany"); + + Statement aeBirthPlaceStatement = valueFactory.createStatement(albertEinstein, birthPlace, germany); + + referenceModel.add(aeBirthPlaceStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + } + + @Test + public void basicIRItoStringTest() { + String testDataString = """ + + + + + +
+ Albert Einstein +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = valueFactory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI foafName = valueFactory.createIRI("http://xmlns.com/foaf/0.1/name"); + Literal aeName = valueFactory.createLiteral("Albert Einstein"); + + Statement aeNameStatement = valueFactory.createStatement(albertEinstein, foafName, aeName); + + referenceModel.add(aeNameStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + + } + + @Test + public void basicIRItoTypedLiteralTest() { + String testDataString = """ + + + + + +
+ 1879-03-14 +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = valueFactory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI dateOfBirth = valueFactory.createIRI("http://dbpedia.org/property/dateOfBirth"); + Literal aeDateOfBirth = valueFactory.createLiteral("1879-03-14", XSD.xsdDate.getIRI()); + + Statement aeDateOfBirthStatement = valueFactory.createStatement(albertEinstein, dateOfBirth, aeDateOfBirth); + + referenceModel.add(aeDateOfBirthStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + assertTrue(referenceModel.containsAll(testModel)); + } + + @Test + public void basicChainTest() { + String testDataString = """ + + + + + +
+
+ Federal Republic of Germany +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = valueFactory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI birthPlace = valueFactory.createIRI("http://dbpedia.org/property/birthPlace"); + IRI germany = valueFactory.createIRI("http://dbpedia.org/resource/Germany"); + IRI conventionalLongName = valueFactory.createIRI("http://dbpedia.org/property/conventionalLongName"); + Literal gerLongName = valueFactory.createLiteral("Federal Republic of Germany"); + + Statement aeBirthPlaceStatement = valueFactory.createStatement(albertEinstein, birthPlace, germany); + Statement germanyNameStatement = valueFactory.createStatement(germany, conventionalLongName, gerLongName); + + referenceModel.add(aeBirthPlaceStatement); + referenceModel.add(germanyNameStatement); + + assertEquals(2, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + + } +} \ No newline at end of file From fbe7c1fd4d69040527676fdd1a20184c5ed35559 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 7 Jan 2026 17:28:03 +0100 Subject: [PATCH 05/13] attribute retrieval fix --- .../next/impl/io/parser/rdfa/RDFaParser.java | 436 +++++++++++------- .../io/parser/rdfa/model/RDFaAttributes.java | 1 + .../io/serialization/SerializerFactory.java | 1 + .../base/AbstractGraphSerializer.java | 2 +- .../impl/io/parser/rdfa/RDFa10ParserTest.java | 23 - .../impl/io/parser/rdfa/RDFaParserTest.java | 105 ++++- 6 files changed, 362 insertions(+), 206 deletions(-) delete mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 7dc79c6c9..5b298cd38 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -61,6 +61,7 @@ public class RDFaParser extends AbstractRDFParser { private Set localIncompleteStatements = null; private Map> localListMappings = null; private String currentLanguage = null; + private Value currentPropertyValue = null; private Map localTermMappings = null; private String localDefaultVocabulary = null; @@ -106,19 +107,7 @@ public void parse(Reader reader, String baseURI) { this.currentContext.setDefaultVocabulary(null); - skipElement = false; - newSubject = null; - currentObjectResource = null; - typedResource = null; - localIRIMappings = new HashMap<>(); - localIncompleteStatements = new HashSet<>(); - localListMappings = this.currentContext.getListMappings(); - currentLanguage = this.currentContext.getLanguage(); - localTermMappings = this.currentContext.getTermMappings(); - localDefaultVocabulary = this.currentContext.getDefaultVocabulary(); - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); InputSource inputSource = new InputSource(reader); saxParser.parse(inputSource, new XMLSaxHandler()); @@ -143,53 +132,89 @@ private void handleCharacters(char[] ch, int start, int length) { private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { this.currentElementAttributes = attrs; - } - - private void endProcessElement(String uri, String localName, String qName) { - String currentElementName = qName; + logger.info("{} {}", qName, debugAttributesToString()); + + this.characters = new StringBuilder(); + + this.skipElement = false; + this.newSubject = null; + this.currentObjectResource = null; + this.typedResource = null; + this.localIRIMappings = this.currentContext.getIriMappings(); + this.localIncompleteStatements = null; + this.localListMappings = this.currentContext.getListMappings(); + this.currentLanguage = this.currentContext.getLanguage(); + this.localTermMappings = this.currentContext.getTermMappings(); + this.localDefaultVocabulary = this.currentContext.getDefaultVocabulary(); + + // HTML-specific base element + if (qName.equals(BASE_TAG) + && isAttributePresent(RDFaAttributes.HREF)) { + Resource resourceBase = getAttributeResourceValue(RDFaAttributes.HREF); + if (resourceBase.isIRI()) { + this.currentContext.setBaseIri((IRI) resourceBase); + } + } - // The current element is examined for any change to the default vocabulary via @vocab. If @vocab is present and contains a value, the local default vocabulary is updated according to the section on CURIE and IRI Processing. If the value is empty, then the local default vocabulary MUST be reset to the Host Language defined default (if any). - if (this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()) != null - && !this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()).isEmpty()) { - localDefaultVocabulary = this.currentElementAttributes.getValue(RDFaAttributes.VOCAB.getName()); + // 2. The current element is examined for any change to the default vocabulary via @vocab. If @vocab is present and contains a value, the local default vocabulary is updated according to the section on CURIE and IRI Processing. If the value is empty, then the local default vocabulary MUST be reset to the Host Language defined default (if any). + if (isAttributePresent(RDFaAttributes.VOCAB) + && !getAttributeStringValue(RDFaAttributes.VOCAB).isEmpty()) { + this.localDefaultVocabulary = getAttributeStringValue(RDFaAttributes.VOCAB); } - // The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; + // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; for (int i = 0; i < this.currentElementAttributes.getLength(); i++) { String attribute = this.currentElementAttributes.getQName(i); if (attribute.startsWith(XMLNS_PREFIX)) { String attributeValue = this.currentElementAttributes.getValue(i); - String prefixName = this.currentElementAttributes.getLocalName(i); + String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); - localIRIMappings.put(prefixName, prefixNamespace); + this.localIRIMappings.put(prefixName, prefixNamespace); } } - if (this.currentElementAttributes.getValue(RDFaAttributes.PREFIX.getName()) != null - && !this.currentElementAttributes.getValue(RDFaAttributes.PREFIX.getName()).isEmpty()) { - String prefixDeclaration = this.currentElementAttributes.getValue(RDFaAttributes.PREFIX.getName()); + if (isAttributePresent(RDFaAttributes.PREFIX) + && !getAttributeStringValue(RDFaAttributes.PREFIX).isEmpty()) { + String prefixDeclaration = getAttributeStringValue(RDFaAttributes.PREFIX); String prefixName = getPrefixFromDeclaration(prefixDeclaration); IRI prefixIRI = getPrefixIriFromDeclaration(prefixDeclaration); - localIRIMappings.put(prefixName, prefixIRI); + this.localIRIMappings.put(prefixName, prefixIRI); + } + + // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; + // Host Languages that incorporate RDFa MAY provide a mechanism for specifying the natural language of an element and its contents (e.g., XML provides the general-purpose XML attribute @xml:lang). + if (isAttributePresent(RDFaAttributes.LANG_ALT) + && !getAttributeStringValue(RDFaAttributes.LANG_ALT).isEmpty()) { + this.currentLanguage = getAttributeStringValue(RDFaAttributes.LANG_ALT); } - // If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. - if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) == null - && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) == null) { - // If the current element contains the @property attribute, but does not contain either the @content or @datatype attributes, then - if (this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) != null - && !this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()).isEmpty() - && this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) == null - && this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) == null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.ABOUT); + // 5. If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. + if (!isAttributePresent(RDFaAttributes.REL) + && !isAttributePresent(RDFaAttributes.REV)) { + // 5.1. If the current element contains the @property attribute, but does not contain either the @content or @datatype attributes, then + if (isAttributePresent(RDFaAttributes.PROPERTY) + && !getAttributeStringValue(RDFaAttributes.PROPERTY).isEmpty() + && !isAttributePresent(RDFaAttributes.CONTENT) + && !isAttributePresent(RDFaAttributes.DATATYPE) + && (isAttributePresent(RDFaAttributes.ABOUT) + || isRootElement + || this.currentContext.getParentObjectResource() != null)) { + // new subject is set to the resource obtained from the first match from the following rule: + // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; + if (isAttributePresent(RDFaAttributes.ABOUT)) { + this.newSubject = getAttributeResourceValue(RDFaAttributes.ABOUT); + // otherwise, if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; } else if (isRootElement) { this.newSubject = this.currentContext.getBaseIri(); + // otherwise, if parent object is present, new subject is set to the value of parent object. } else if (this.currentContext.getParentObjectResource() != null) { this.newSubject = this.currentContext.getParentObjectResource(); } - if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { + // If @typeof is present then typed resource is set to the resource obtained from the first match from the following rules: + if (isAttributePresent(RDFaAttributes.TYPEOF)) { + // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; + if (isAttributePresent(RDFaAttributes.ABOUT)) { this.typedResource = this.newSubject; + // otherwise, if the element is the root element of the document, then act as if there is an empty @about present and process it according to the previous rule; } else if (isRootElement) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { @@ -197,33 +222,46 @@ private void endProcessElement(String uri, String localName, String qName) { } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } + // otherwise, } else { - if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.HREF); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.SRC); + // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; + if (isAttributePresent(RDFaAttributes.RESOURCE)) { + this.typedResource = getAttributeResourceValue(RDFaAttributes.RESOURCE); + // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; + } else if (isAttributePresent(RDFaAttributes.HREF)) { + this.typedResource = getAttributeResourceValue(RDFaAttributes.HREF); + // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing; + } else if (isAttributePresent(RDFaAttributes.SRC)) { + this.typedResource = getAttributeResourceValue(RDFaAttributes.SRC); + // otherwise, the value of typed resource is set to a newly created bnode. } else { this.typedResource = getValueFactory().createBNode(); } + // The value of the current object resource is then set to the value of typed resource. this.currentObjectResource = this.typedResource; } } - // otherwise: + logger.info("{}", this.newSubject); + // 5.2. otherwise: } else { - if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.ABOUT); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.HREF); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.SRC); + if (isAttributePresent(RDFaAttributes.ABOUT) + || isAttributePresent(RDFaAttributes.HREF) + || isAttributePresent(RDFaAttributes.SRC) + || isAttributePresent(RDFaAttributes.RESOURCE)) { + if (isAttributePresent(RDFaAttributes.ABOUT)) { + this.newSubject = getAttributeResourceValue(RDFaAttributes.ABOUT); + logger.info("{}", this.newSubject); + } else if (isAttributePresent(RDFaAttributes.RESOURCE)) { + this.newSubject = getAttributeResourceValue(RDFaAttributes.RESOURCE); + logger.info("{}", this.newSubject); + } else if (isAttributePresent(RDFaAttributes.HREF)) { + this.newSubject = getAttributeResourceValue(RDFaAttributes.HREF); + logger.info("{}", this.newSubject); + } else if (isAttributePresent(RDFaAttributes.SRC)) { + this.newSubject = getAttributeResourceValue(RDFaAttributes.SRC); + logger.info("{}", this.newSubject); + } else { + logger.info("No subject retrieved"); } } else { if (isRootElement) { @@ -233,28 +271,30 @@ private void endProcessElement(String uri, String localName, String qName) { } else { throw new ParsingErrorException("Expected to be able to generate newSubject from empty CURIE"); } - } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { + } else if (isAttributePresent(RDFaAttributes.TYPEOF)) { this.newSubject = getValueFactory().createBNode(); + logger.info("{}", this.newSubject); } else if (this.currentContext.getParentObjectResource() != null) { this.newSubject = this.currentContext.getParentObjectResource(); - if (this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) == null) { + if (!isAttributePresent(RDFaAttributes.PROPERTY)) { skipElement = true; } } - if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { - this.typedResource = this.newSubject; - } + } + if (isAttributePresent(RDFaAttributes.TYPEOF)) { + this.typedResource = this.newSubject; } } } - // If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: - if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) != null) { - this.newSubject = getResourceFromElementAttribute(RDFaAttributes.ABOUT); + // 6. If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: + if (isAttributePresent(RDFaAttributes.REL) + || isAttributePresent(RDFaAttributes.REV)) { + if (isAttributePresent(RDFaAttributes.ABOUT)) { + this.newSubject = getAttributeResourceValue(RDFaAttributes.ABOUT); + logger.info("{}", this.newSubject); } - if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null) { + if (isAttributePresent(RDFaAttributes.TYPEOF)) { this.typedResource = this.newSubject; } if (this.newSubject == null) { @@ -269,57 +309,58 @@ private void endProcessElement(String uri, String localName, String qName) { this.newSubject = this.currentContext.getParentObjectResource(); } } - if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - this.currentObjectResource = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - this.currentObjectResource = getResourceFromElementAttribute(RDFaAttributes.HREF); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - this.currentObjectResource = getResourceFromElementAttribute(RDFaAttributes.SRC); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null) { + if (isAttributePresent(RDFaAttributes.RESOURCE)) { + this.currentObjectResource = getAttributeResourceValue(RDFaAttributes.RESOURCE); + } else if (isAttributePresent(RDFaAttributes.HREF)) { + this.currentObjectResource = getAttributeResourceValue(RDFaAttributes.HREF); + } else if (isAttributePresent(RDFaAttributes.SRC)) { + this.currentObjectResource = getAttributeResourceValue(RDFaAttributes.SRC); + } else if (isAttributePresent(RDFaAttributes.TYPEOF) + && !isAttributePresent(RDFaAttributes.ABOUT)) { this.currentObjectResource = this.getValueFactory().createBNode(); } - if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null + if (isAttributePresent(RDFaAttributes.TYPEOF) + && !isAttributePresent(RDFaAttributes.ABOUT) && (this.currentObjectResource == null || this.currentObjectResource.isResource())) { - this.typedResource = (Resource) this.currentObjectResource; + this.typedResource = this.currentObjectResource; } } + logger.info("{} : subject {}", qName, this.newSubject); - // If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; + // 7. If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; if (this.typedResource != null) { - Resource typeIri = getResourceFromElementAttribute(RDFaAttributes.TYPEOF); + Resource typeIri = getAttributeResourceValue(RDFaAttributes.TYPEOF); this.getModel().add(this.typedResource, RDF.type.getIRI(), typeIri); } - // If in any of the previous steps a new subject was set to a non-null value different from the parent object; + // 8. If in any of the previous steps a new subject was set to a non-null value different from the parent object; if (this.newSubject != null && this.newSubject != this.currentContext.getParentObjectResource()) { this.localListMappings = new HashMap<>(); } - // If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: + // 9. If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: if (this.currentObjectResource != null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { - IRI relResource = (IRI) getResourceFromElementAttribute(RDFaAttributes.REL); + if (isAttributePresent(RDFaAttributes.INLIST) + && isAttributePresent(RDFaAttributes.REL)) { + IRI relResource = (IRI) getAttributeResourceValue(RDFaAttributes.REL); if (!localListMappings.containsKey(relResource)) { this.localListMappings.put(relResource, new HashSet<>()); } this.localListMappings.get(relResource).add(this.currentObjectResource); } - if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) == null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { - Resource relResource = getResourceFromElementAttribute(RDFaAttributes.REL); + if (!isAttributePresent(RDFaAttributes.INLIST)) { + if (isAttributePresent(RDFaAttributes.REL)) { + Resource relResource = getAttributeResourceValue(RDFaAttributes.REL); if (relResource.isIRI()) { this.getModel().add(newSubject, (IRI) relResource, currentObjectResource); } else { throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); } } - if (this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { - Resource revResource = getResourceFromElementAttribute(RDFaAttributes.REV); + if (isAttributePresent(RDFaAttributes.REV)) { + Resource revResource = getAttributeResourceValue(RDFaAttributes.REV); if (!revResource.isIRI()) { - throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); + throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + getAttributeStringValue(RDFaAttributes.REV)); } if (!currentObjectResource.isResource()) { throw new ParsingErrorException("object resource expected to be a resource but was " + currentObjectResource); @@ -329,15 +370,18 @@ private void endProcessElement(String uri, String localName, String qName) { } } - // If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); + // 10. If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); if (this.currentObjectResource == null) { + if(this.localIncompleteStatements == null) { + this.localIncompleteStatements = new HashSet<>(); + } this.currentObjectResource = getValueFactory().createBNode(); - if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) != null) { - if (!getResourceFromElementAttribute(RDFaAttributes.REL).isIRI()) { + if (isAttributePresent(RDFaAttributes.REL)) { + if (!getAttributeResourceValue(RDFaAttributes.REL).isIRI()) { throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); } - IRI relIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.REL); - if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { + IRI relIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REL); + if (isAttributePresent(RDFaAttributes.INLIST)) { if (!localListMappings.containsKey(relIRI)) { this.localListMappings.put(relIRI, new HashSet<>()); } @@ -345,34 +389,37 @@ private void endProcessElement(String uri, String localName, String qName) { } else { this.localIncompleteStatements.add(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); } - } else if (this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) != null) { - if (!getResourceFromElementAttribute(RDFaAttributes.REV).isIRI()) { + } else if (isAttributePresent(RDFaAttributes.REV)) { + if (!getAttributeResourceValue(RDFaAttributes.REV).isIRI()) { throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); } - IRI revIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.REV); + IRI revIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REV); this.localIncompleteStatements.add(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); } } - // The next step of the iteration is to establish any current property value; - if (this.currentElementAttributes.getValue(RDFaAttributes.PROPERTY.getName()) != null) { - IRI propertyIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.PROPERTY); - Value currentPropertyValue = null; - if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null - && getResourceFromElementAttribute(RDFaAttributes.DATATYPE).isIRI() - && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI()) { - IRI datatypeIRI = (IRI) getResourceFromElementAttribute(RDFaAttributes.DATATYPE); - if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { - String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); + // 11. The next step of the iteration is to establish any current property value; + if (isAttributePresent(RDFaAttributes.PROPERTY)) { + IRI propertyIRI = (IRI) getAttributeResourceValue(RDFaAttributes.PROPERTY); + // as a typed literal if @datatype is present, does not have an empty value according to the section on CURIE and IRI Processing, and is not set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. + // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. The final string includes the datatype IRI, as described in [RDF-SYNTAX-GRAMMAR], which will have been obtained according to the section on CURIE and IRI Processing. + if (isAttributePresent(RDFaAttributes.DATATYPE) + && getAttributeResourceValue(RDFaAttributes.DATATYPE).isIRI() + && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI()) { + IRI datatypeIRI = (IRI) getAttributeResourceValue(RDFaAttributes.DATATYPE); + if (isAttributePresent(RDFaAttributes.CONTENT)) { + String contentString = getAttributeStringValue(RDFaAttributes.CONTENT); currentPropertyValue = getValueFactory().createLiteral(contentString, datatypeIRI); } else { String contentString = this.characters.toString().trim(); currentPropertyValue = getValueFactory().createLiteral(contentString); this.characters = new StringBuilder(); } - } else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()).isEmpty()) { - if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { + // otherwise, as a plain literal if @datatype is present but has an empty value according to the section on CURIE and IRI Processing. + // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. + } else if (isAttributePresent(RDFaAttributes.DATATYPE) + && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { + if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); currentPropertyValue = getValueFactory().createLiteral(contentString); } else { @@ -380,27 +427,41 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge currentPropertyValue = getValueFactory().createLiteral(contentString); this.characters = new StringBuilder(); } + // otherwise, as an XML literal if @datatype is present and is set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. + // The value of the XML literal is a string created by serializing to text, all nodes that are descendants of the current element, i.e., not including the element itself, and giving it a datatype of XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. The format of the resulting serialized content is as defined in Exclusive XML Canonicalization Version 1.0 [XML-EXC-C14N]. //} else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null - // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE).isIRI() - // && getResourceFromElementAttribute( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { - } else if (this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) != null) { + // && getAttributeResourceValue( RDFaAttributes.DATATYPE).isIRI() + // && getAttributeResourceValue( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { + // otherwise, as a plain literal using the value of @content if @content is present. + } else if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); currentPropertyValue = getValueFactory().createLiteral(contentString); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.REL.getName()) == null - && this.currentElementAttributes.getValue(RDFaAttributes.REV.getName()) == null - && this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()) == null) { - if (this.currentElementAttributes.getValue(RDFaAttributes.RESOURCE.getName()) != null) { - currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.RESOURCE); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.HREF.getName()) != null) { - currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.HREF); - } else if (this.currentElementAttributes.getValue(RDFaAttributes.SRC.getName()) != null) { - currentPropertyValue = getResourceFromElementAttribute(RDFaAttributes.SRC); + // otherwise, if the @rel, @rev, and @content attributes are not present, as a resource obtained from one of the following: + // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; + // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; + // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing. + } else if (!isAttributePresent(RDFaAttributes.REL) + && !isAttributePresent(RDFaAttributes.REV) + && !isAttributePresent(RDFaAttributes.CONTENT) + && (isAttributePresent(RDFaAttributes.RESOURCE) + || isAttributePresent(RDFaAttributes.HREF) + || isAttributePresent(RDFaAttributes.SRC) + )) { + if (isAttributePresent(RDFaAttributes.RESOURCE)) { + currentPropertyValue = getAttributeResourceValue(RDFaAttributes.RESOURCE); + } else if (isAttributePresent(RDFaAttributes.HREF)) { + currentPropertyValue = getAttributeResourceValue(RDFaAttributes.HREF); + } else if (isAttributePresent(RDFaAttributes.SRC)) { + currentPropertyValue = getAttributeResourceValue(RDFaAttributes.SRC); } - } else if (this.currentElementAttributes.getValue(RDFaAttributes.TYPEOF.getName()) != null - && this.currentElementAttributes.getValue(RDFaAttributes.ABOUT.getName()) == null) { + // otherwise, if @typeof is present and @about is not, the value of typed resource. + } else if (isAttributePresent(RDFaAttributes.TYPEOF) + && !isAttributePresent(RDFaAttributes.ABOUT)) { currentPropertyValue = typedResource; + // otherwise as a plain literal. } else { String contentString = this.characters.toString().trim(); + // Additionally, if there is a value for current language then the value of the plain literal should include this language information, as described in [RDF-SYNTAX-GRAMMAR]. The actual literal is either the value of @content (if present) or a string created by concatenating the text content of each of the descendant elements of the current element in document order. if (this.currentLanguage != null && !this.currentLanguage.isEmpty()) { currentPropertyValue = getValueFactory().createLiteral(contentString, this.currentLanguage); @@ -410,66 +471,91 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge this.characters = new StringBuilder(); } - if (this.currentElementAttributes.getValue(RDFaAttributes.INLIST.getName()) != null) { + // The current property value is then used with each predicate as follows: + // If the element also includes the @inlist attribute, the current property value is added to the local list mapping as follows: + if (isAttributePresent(RDFaAttributes.INLIST)) { + // if the local list mapping does not contain a list associated with the predicate IRI, instantiate a new list and add to local list mappings if (!this.localListMappings.containsKey(propertyIRI)) { this.localListMappings.put(propertyIRI, new HashSet<>()); } + // add the current property value to the list associated with the predicate IRI in the local list mapping this.localListMappings.get(propertyIRI).add(currentPropertyValue); + // Otherwise the current property value is used to generate a triple as follows: + // subject new subject + // predicate full IRI + // object current property value } else { - this.getModel().add(this.newSubject, propertyIRI, currentPropertyValue); + this.getModel().add(this.newSubject, propertyIRI, this.currentPropertyValue); } } - // If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: - if (!skipElement - && newSubject != null) { + // 12. If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: + if (!this.skipElement + && this.newSubject != null) { + if(this.localIncompleteStatements == null) { + this.localIncompleteStatements = new HashSet<>(); + } for (RDFaIncompleteStatement incompleteStatement : this.currentContext.getIncompleteStatement()) { if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { - localListMappings.get(incompleteStatement.getPredicate()).add(newSubject); + localListMappings.get(incompleteStatement.getPredicate()).add(this.newSubject); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { - this.getModel().add(currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), newSubject); + this.getModel().add(this.currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), this.newSubject); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { - this.getModel().add(newSubject, incompleteStatement.getPredicate(), currentContext.getParentSubjectResource()); + this.getModel().add(this.newSubject, incompleteStatement.getPredicate(), this.currentContext.getParentSubjectResource()); } } } - // Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: - Map> oldListMappings = currentContext.getListMappings(); - if (skipElement) { - currentContext = new RDFaEvaluationContext(this.currentContext); - currentContext.clearIriMappings(); - initializeNewContext(currentContext); - currentContext.setLanguage(currentLanguage); - currentContext.addIriMappings(localIRIMappings); + // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: + // If the skip element flag is 'true' then the new evaluation context is a copy of the current context that was passed in to this level of processing, with the language and list of IRI mappings values replaced with the local values; + if (this.skipElement) { + this.currentContext = new RDFaEvaluationContext(this.currentContext); + this.currentContext.clearIriMappings(); + initializeNewContext(this.currentContext); + this.currentContext.setLanguage(this.currentLanguage); + this.currentContext.addIriMappings(this.localIRIMappings); + // Otherwise, the values are: } else { Resource oldParentSubject = this.currentContext.getParentSubjectResource(); - currentContext = new RDFaEvaluationContext(this.currentContext.getBaseIri()); - initializeNewContext(currentContext); - currentContext.setParentSubjectResource(newSubject); - if (currentObjectResource != null) { - currentContext.setParentObjectResource(currentObjectResource); - } - if (newSubject != null) { - currentContext.setParentObjectResource(newSubject); + // the base is set to the base value of the current evaluation context; + this.currentContext = new RDFaEvaluationContext(this.currentContext.getBaseIri()); + initializeNewContext(this.currentContext); + // the parent subject is set to the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; + this.currentContext.setParentSubjectResource(this.newSubject); + // the parent object is set to value of current object resource, if non-null, or the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; + if (this.currentObjectResource != null) { + this.currentContext.setParentObjectResource(this.currentObjectResource); + } else if (this.newSubject != null) { + this.currentContext.setParentObjectResource(this.newSubject); } else { - currentContext.setParentObjectResource(oldParentSubject); + this.currentContext.setParentObjectResource(oldParentSubject); } - currentContext.addIriMappings(localIRIMappings); - currentContext.setIncompleteStatements(localIncompleteStatements); - currentContext.setListMappings(localListMappings); - currentContext.setLanguage(currentLanguage); - currentContext.setDefaultVocabulary(localDefaultVocabulary); + // the list of IRI mappings is set to the local list of IRI mappings; + this.currentContext.addIriMappings(this.localIRIMappings); + // the list of incomplete triples is set to the local list of incomplete triples; + this.currentContext.setIncompleteStatements(this.localIncompleteStatements); + // the list mapping is set to the local list mapping; + this.currentContext.setListMappings(this.localListMappings); + // language is set to the value of current language. + this.currentContext.setLanguage(this.currentLanguage); + // the default vocabulary is set to the value of the local default vocabulary. + this.currentContext.setDefaultVocabulary(this.localDefaultVocabulary); } - // Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: - for (Map.Entry> listMapping : localListMappings.entrySet()) { + this.isRootElement = false; + } + + private void endProcessElement(String uri, String localName, String qName) { + Map> oldListMappings = this.currentContext.getListMappings(); + + // 14. Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: + for (Map.Entry> listMapping : this.localListMappings.entrySet()) { IRI propertyIRI = listMapping.getKey(); Set propertyList = listMapping.getValue(); if (!oldListMappings.containsKey(propertyIRI)) { if (propertyList.isEmpty()) { - getModel().add(newSubject, propertyIRI, RDF.nil.getIRI()); + getModel().add(this.newSubject, propertyIRI, RDF.nil.getIRI()); } else { ArrayList bnodes = new ArrayList<>(); for (int i = 0; i < propertyList.size(); i++) { @@ -487,12 +573,11 @@ && getResourceFromElementAttribute(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.ge bnodeIndex++; } - getModel().add(newSubject, propertyIRI, bnodes.getFirst()); + getModel().add(this.newSubject, propertyIRI, bnodes.getFirst()); } } } - isRootElement = false; } /** @@ -551,7 +636,7 @@ private IRI getPrefixIriFromDeclaration(String declaration) { return getValueFactory().createIRI(prefixArray[1].toLowerCase()); } - private Resource getResourceFromElementAttribute(RDFaAttributes attribute) { + private Resource getAttributeResourceValue(RDFaAttributes attribute) { String attributeValue = this.currentElementAttributes.getValue(attribute.getName()); Optional resourceResolution = resolveStringResource(attributeValue); if (resourceResolution.isPresent()) { @@ -561,6 +646,13 @@ private Resource getResourceFromElementAttribute(RDFaAttributes attribute) { } } + private boolean isAttributePresent(RDFaAttributes attribute) { + return this.currentElementAttributes.getValue(attribute.getName()) != null; + } + + private String getAttributeStringValue(RDFaAttributes attribute) { + return this.currentElementAttributes.getValue(attribute.getName()); + } /** * Resolves the string representation of a resource found in attributes of an element, be it an IRI,
CURIE or relative URI @@ -605,7 +697,7 @@ protected Optional resolveStringResource(String stringResource) { return Optional.of(this.getValueFactory().createBNode(localNameString)); } else if (IRIUtils.isStandardIRI(currentContext.getBaseIri().stringValue() + resultString)) { String concatenatedRelativeUri = currentContext.getBaseIri().stringValue() + resultString; - return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); + return Optional.of(this.getValueFactory().createIRI(concatenatedRelativeUri)); } return Optional.empty(); } @@ -638,4 +730,18 @@ private void initializeNewContext(RDFaEvaluationContext context) { context.addTermMapping("license", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#license")); context.addTermMapping("role", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#role")); } + + private String debugAttributesToString() { + StringBuilder sb = new StringBuilder(); + + if (this.currentElementAttributes != null) { + for (int i = 0; i < this.currentElementAttributes.getLength(); i++) { + String attributeLocalName = this.currentElementAttributes.getQName(i); + String attributeValue = this.currentElementAttributes.getValue(i); + sb.append(attributeLocalName).append(" : ").append(attributeValue).append(" "); + } + } + + return sb.toString(); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java index 6d23908b6..265712e3d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaAttributes.java @@ -2,6 +2,7 @@ public enum RDFaAttributes { ABOUT("about"), + BASE("base"), CONTENT("content"), DATATYPE("datatype"), HREF("href"), diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/SerializerFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/SerializerFactory.java index 8693e1d5e..4468b8a74 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/SerializerFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/SerializerFactory.java @@ -92,6 +92,7 @@ public SerializerFactory() { RDFFormat.RDFC_1_0.getName() ); }); + tempDefaultRegistry.put(RDFFormat.RDFC_1_0, model -> { RDFC10SerializerOptions defaultConfig = RDFC10SerializerOptions.defaultConfig(); RDFC10Canonicalizer rdfc10Canonicalizer = new RDFC10Canonicalizer( diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java index 6e247f9ca..a25b83e4f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/base/AbstractGraphSerializer.java @@ -129,7 +129,7 @@ protected void writeHeader(Writer writer) throws IOException { /** * Collects all namespaces used in the model and attempts to assign prefixes to them - * if auto-declaration is enabled and they are not already mapped. + * if auto-declaration is enabled, and they are not already mapped. */ protected Set collectUsedNamespaces() { Set namespaces = model.stream() diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java deleted file mode 100644 index 47394b257..000000000 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFa10ParserTest.java +++ /dev/null @@ -1,23 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.api.base.io.RDFFormat; -import fr.inria.corese.core.next.api.io.parser.RDFParser; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; -import fr.inria.corese.core.next.impl.common.vocabulary.XSD; -import fr.inria.corese.core.next.impl.io.parser.ParserFactory; -import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; -import fr.inria.corese.core.next.impl.temp.CoreseModel; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayInputStream; -import java.util.Iterator; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class RDFa10ParserTest { - - private static final ValueFactory factory = new CoreseAdaptedValueFactory(); - -} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index 90ea19d90..9db341d31 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -3,6 +3,8 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serializer.RDFSerializer; +import fr.inria.corese.core.next.impl.io.serialization.SerializerFactory; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.common.vocabulary.XSD; import fr.inria.corese.core.next.impl.io.parser.ParserFactory; @@ -11,12 +13,17 @@ import org.junit.jupiter.api.Test; import java.io.ByteArrayInputStream; +import java.io.StringWriter; import java.util.Iterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static org.junit.jupiter.api.Assertions.*; class RDFaParserTest { + private static final Logger logger = LoggerFactory.getLogger(RDFaParserTest.class); + private ParserFactory parserFactory = new ParserFactory(); private ValueFactory valueFactory = new CoreseAdaptedValueFactory(); private final String defaultTurtlePrefixes = """ @@ -59,8 +66,8 @@ void parseCurrentSubjectCreatorHead() { """; String currentSubjectNTriples = defaultTurtlePrefixes + """ - <> foaf:primaryTopic <#bbq> . - <> dc:creator "Jo" . + foaf:primaryTopic . + dc:creator "Jo" . """; Model parsedModel = new CoreseModel(); @@ -71,9 +78,10 @@ void parseCurrentSubjectCreatorHead() { assertEquals(RDFFormat.RDFA, testedParser.getRDFFormat()); - resultParser.parse(new ByteArrayInputStream(currentSubjectNTriples.getBytes())); - testedParser.parse(new ByteArrayInputStream(currentSubjectXHTML.getBytes())); + resultParser.parse(new ByteArrayInputStream(currentSubjectNTriples.getBytes()), "http://example.org/"); + testedParser.parse(new ByteArrayInputStream(currentSubjectXHTML.getBytes()), "http://example.org/"); + logModelContent(parsedModel); assertEquals(resultModel.size(), parsedModel.size()); Iterator itStatementRef = resultModel.iterator(); Iterator itStatementTest = parsedModel.iterator(); @@ -114,13 +122,13 @@ public void basicBaseTest() { - - - Test 0001 - - -

This photo was taken by Mark Birbeck.

- + + + Test 0001 + + +

This photo was taken by Mark Birbeck.

+ """; Model testModel = new CoreseModel(); @@ -137,6 +145,8 @@ public void basicBaseTest() { parser.parse(new ByteArrayInputStream(testDataString.getBytes())); assertEquals(RDFFormat.RDFA, parser.getRDFFormat()); + logModelContent(referenceModel); + logModelContent(testModel); assertEquals(referenceModel.size(), testModel.size()); Iterator itStatementRef = referenceModel.iterator(); Iterator itStatementTest = testModel.iterator(); @@ -173,6 +183,8 @@ public void aboutTest() { IRI subject = valueFactory.createIRI("http://w3id.org/people/pierre-maillot"); IRI object = valueFactory.createIRI("http://xmlns.com/foaf/0.1/Person"); + logModelContent(testModel); + assertEquals(1, testModel.size()); assertTrue(testModel.contains(subject, RDF.type.getIRI(), object)); } @@ -185,9 +197,10 @@ public void basicIRItoIRITest() { -
-
-
+
+
+
+
"""; @@ -207,6 +220,9 @@ public void basicIRItoIRITest() { referenceModel.add(aeBirthPlaceStatement); + logModelContent(referenceModel); + logModelContent(testModel); + assertEquals(1, testModel.size()); assertEquals(referenceModel, testModel); assertTrue(referenceModel.containsAll(testModel)); @@ -256,9 +272,9 @@ public void basicIRItoTypedLiteralTest() { -
- 1879-03-14 -
+
+ 1879-03-14 +
"""; @@ -334,4 +350,59 @@ public void basicChainTest() { assertTrue(referenceModel.containsAll(testModel)); } + + @Test + public void inheritSubjectTest() { + String testDataString = """ + + + + Test 0020 + + +
+ this photo was taken by + Mark Birbeck + +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://inria.fr/"); + + IRI photo1 = valueFactory.createIRI("http://inria.fr/photo1.jpg"); + IRI creator = valueFactory.createIRI("http://purl.org/dc/elements/1.1/creator"); + Literal name = valueFactory.createLiteral("Mark Birbeck"); + + Statement aeDateOfBirthStatement = valueFactory.createStatement(photo1, creator, name); + + referenceModel.add(aeDateOfBirthStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + assertTrue(referenceModel.containsAll(testModel)); + } + + private static void logModelContent(Model model) { + StringWriter outWriter = new StringWriter(); + RDFSerializer serializer = (new SerializerFactory()).createSerializer(RDFFormat.TURTLE, model); + serializer.write(outWriter); + logger.info("{}", outWriter.toString()); + } } \ No newline at end of file From fdb130e9b4cac0af5db4292d3496e98b705b8c63 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 8 Jan 2026 17:17:48 +0100 Subject: [PATCH 06/13] creating local value pile to emulate DOM in SAX --- .../next/impl/io/parser/rdfa/RDFaParser.java | 312 ++++++++++-------- .../rdfa/model/RDFaEvaluationContext.java | 4 +- .../io/parser/rdfa/model/RDFaLocalValues.java | 173 ++++++++++ 3 files changed, 348 insertions(+), 141 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 5b298cd38..e9b9294c9 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -8,10 +8,7 @@ import fr.inria.corese.core.next.impl.common.util.IRIUtils; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaAttributes; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaEvaluationContext; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaInitialPrefixes; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; @@ -47,24 +44,16 @@ public class RDFaParser extends AbstractRDFParser { private RDFaEvaluationContext currentContext = null; + /** + * Buffer/Pile of local value to adapt the parsing algorithm to SAX processing + */ + private final LinkedList localValuePile = new LinkedList<>(); + /** * Buffer for accumulating character data between start and end tags. */ private StringBuilder characters = new StringBuilder(); - // Local context - private boolean skipElement = false; - private Resource newSubject = null; - private Resource currentObjectResource = null; - private Resource typedResource = null; - private Map localIRIMappings = null; - private Set localIncompleteStatements = null; - private Map> localListMappings = null; - private String currentLanguage = null; - private Value currentPropertyValue = null; - private Map localTermMappings = null; - private String localDefaultVocabulary = null; - private boolean isRootElement = true; private Attributes currentElementAttributes = null; @@ -132,20 +121,11 @@ private void handleCharacters(char[] ch, int start, int length) { private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { this.currentElementAttributes = attrs; - logger.info("{} {}", qName, debugAttributesToString()); - this.characters = new StringBuilder(); + // 1 First, the local values are initialized + this.localValuePile.add(new RDFaLocalValues(this.currentContext)); - this.skipElement = false; - this.newSubject = null; - this.currentObjectResource = null; - this.typedResource = null; - this.localIRIMappings = this.currentContext.getIriMappings(); - this.localIncompleteStatements = null; - this.localListMappings = this.currentContext.getListMappings(); - this.currentLanguage = this.currentContext.getLanguage(); - this.localTermMappings = this.currentContext.getTermMappings(); - this.localDefaultVocabulary = this.currentContext.getDefaultVocabulary(); + logger.info("START {} {}", qName, this.localValuePile.getFirst()); // HTML-specific base element if (qName.equals(BASE_TAG) @@ -159,34 +139,43 @@ && isAttributePresent(RDFaAttributes.HREF)) { // 2. The current element is examined for any change to the default vocabulary via @vocab. If @vocab is present and contains a value, the local default vocabulary is updated according to the section on CURIE and IRI Processing. If the value is empty, then the local default vocabulary MUST be reset to the Host Language defined default (if any). if (isAttributePresent(RDFaAttributes.VOCAB) && !getAttributeStringValue(RDFaAttributes.VOCAB).isEmpty()) { - this.localDefaultVocabulary = getAttributeStringValue(RDFaAttributes.VOCAB); + this.localValuePile.getFirst().setDefaultVocabulary(getAttributeStringValue(RDFaAttributes.VOCAB)); } + logger.info("2 {} {}", qName, this.localValuePile.getFirst()); + // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; for (int i = 0; i < this.currentElementAttributes.getLength(); i++) { String attribute = this.currentElementAttributes.getQName(i); + logger.info("3 {} attribute: {}", qName, attribute); if (attribute.startsWith(XMLNS_PREFIX)) { String attributeValue = this.currentElementAttributes.getValue(i); String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); + logger.info("3 {} {} : {}", qName, prefixName, attributeValue); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); - this.localIRIMappings.put(prefixName, prefixNamespace); + this.localValuePile.getFirst().addIRIMappings(prefixName, prefixNamespace); } } + logger.info("3: {} local uri mappings {}", qName, this.localValuePile.getFirst().getIRIMappings()); if (isAttributePresent(RDFaAttributes.PREFIX) && !getAttributeStringValue(RDFaAttributes.PREFIX).isEmpty()) { String prefixDeclaration = getAttributeStringValue(RDFaAttributes.PREFIX); String prefixName = getPrefixFromDeclaration(prefixDeclaration); IRI prefixIRI = getPrefixIriFromDeclaration(prefixDeclaration); - this.localIRIMappings.put(prefixName, prefixIRI); + this.localValuePile.getFirst().addIRIMappings(prefixName, prefixIRI); } + logger.info("3 {} {}", qName, this.localValuePile.getFirst()); + // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; // Host Languages that incorporate RDFa MAY provide a mechanism for specifying the natural language of an element and its contents (e.g., XML provides the general-purpose XML attribute @xml:lang). if (isAttributePresent(RDFaAttributes.LANG_ALT) && !getAttributeStringValue(RDFaAttributes.LANG_ALT).isEmpty()) { - this.currentLanguage = getAttributeStringValue(RDFaAttributes.LANG_ALT); + this.localValuePile.getFirst().setCurrentLanguage(getAttributeStringValue(RDFaAttributes.LANG_ALT)); } + logger.info("4 {} {}", qName, this.localValuePile.getFirst()); + // 5. If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. if (!isAttributePresent(RDFaAttributes.REL) && !isAttributePresent(RDFaAttributes.REV)) { @@ -201,24 +190,27 @@ && isAttributePresent(RDFaAttributes.HREF)) { // new subject is set to the resource obtained from the first match from the following rule: // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.newSubject = getAttributeResourceValue(RDFaAttributes.ABOUT); + this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); + logger.info("5.1 About {}", this.localValuePile.getFirst().getNewSubject()); // otherwise, if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; } else if (isRootElement) { - this.newSubject = this.currentContext.getBaseIri(); + this.localValuePile.getFirst().setNewSubject(this.currentContext.getBaseIri()); + logger.info("5.1 Root element {}", this.localValuePile.getFirst().getNewSubject()); // otherwise, if parent object is present, new subject is set to the value of parent object. } else if (this.currentContext.getParentObjectResource() != null) { - this.newSubject = this.currentContext.getParentObjectResource(); + this.localValuePile.getFirst().setNewSubject(this.currentContext.getParentObjectResource()); + logger.info("5.1 context parent object {}", this.localValuePile.getFirst().getNewSubject()); } // If @typeof is present then typed resource is set to the resource obtained from the first match from the following rules: if (isAttributePresent(RDFaAttributes.TYPEOF)) { // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.typedResource = this.newSubject; + this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getNewSubject()); // otherwise, if the element is the root element of the document, then act as if there is an empty @about present and process it according to the previous rule; } else if (isRootElement) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { - this.typedResource = emptyAboutResource.get(); + this.localValuePile.getFirst().setTypedResource(emptyAboutResource.get()); } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } @@ -226,133 +218,156 @@ && isAttributePresent(RDFaAttributes.HREF)) { } else { // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.typedResource = getAttributeResourceValue(RDFaAttributes.RESOURCE); + this.localValuePile.getFirst().setTypedResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.typedResource = getAttributeResourceValue(RDFaAttributes.HREF); + this.localValuePile.getFirst().setTypedResource(getAttributeResourceValue(RDFaAttributes.HREF)); // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.typedResource = getAttributeResourceValue(RDFaAttributes.SRC); + this.localValuePile.getFirst().setTypedResource(getAttributeResourceValue(RDFaAttributes.SRC)); // otherwise, the value of typed resource is set to a newly created bnode. } else { - this.typedResource = getValueFactory().createBNode(); + this.localValuePile.getFirst().setTypedResource(getValueFactory().createBNode()); } // The value of the current object resource is then set to the value of typed resource. - this.currentObjectResource = this.typedResource; + this.localValuePile.getFirst().setCurrentObjectResource(this.localValuePile.getFirst().getTypedResource()); } } - logger.info("{}", this.newSubject); // 5.2. otherwise: } else { + // If the element contains an @about, @href, @src, or @resource attribute, new subject is set to the resource obtained as follows: if (isAttributePresent(RDFaAttributes.ABOUT) || isAttributePresent(RDFaAttributes.HREF) || isAttributePresent(RDFaAttributes.SRC) || isAttributePresent(RDFaAttributes.RESOURCE)) { + // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.newSubject = getAttributeResourceValue(RDFaAttributes.ABOUT); - logger.info("{}", this.newSubject); + this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); + logger.info("5.2 about {}", this.localValuePile.getFirst().getNewSubject()); + // otherwise, by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.newSubject = getAttributeResourceValue(RDFaAttributes.RESOURCE); - logger.info("{}", this.newSubject); + this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.RESOURCE)); + logger.info("5.2 resource {}", this.localValuePile.getFirst().getNewSubject()); + // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.newSubject = getAttributeResourceValue(RDFaAttributes.HREF); - logger.info("{}", this.newSubject); + this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.HREF)); + logger.info("5.2 href {}", this.localValuePile.getFirst().getNewSubject()); + // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing. } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.newSubject = getAttributeResourceValue(RDFaAttributes.SRC); - logger.info("{}", this.newSubject); - } else { - logger.info("No subject retrieved"); + this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.SRC)); + logger.info("5.2 src {}", this.localValuePile.getFirst().getNewSubject()); } + // otherwise, if no resource is provided by a resource attribute, then the first match from the following rules will apply: } else { + // if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; if (isRootElement) { + logger.info("RootElement {}", qName); Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { - this.newSubject = emptyAboutResource.get(); + this.localValuePile.getFirst().setNewSubject(emptyAboutResource.get()); + logger.info("5.2 rootElement {}", this.localValuePile.getFirst()); } else { throw new ParsingErrorException("Expected to be able to generate newSubject from empty CURIE"); } + // otherwise, if @typeof is present, then new subject is set to be a newly created bnode; } else if (isAttributePresent(RDFaAttributes.TYPEOF)) { - this.newSubject = getValueFactory().createBNode(); - logger.info("{}", this.newSubject); + this.localValuePile.getFirst().setNewSubject(getValueFactory().createBNode()); + logger.info("5.2 typeOf {}", this.localValuePile.getFirst()); + // otherwise, if parent object is present, new subject is set to the value of parent object. Additionally, if @property is not present then the skip element flag is set to 'true'. } else if (this.currentContext.getParentObjectResource() != null) { - this.newSubject = this.currentContext.getParentObjectResource(); + this.localValuePile.getFirst().setNewSubject(this.currentContext.getParentObjectResource()); + logger.info("5.2 parent object resource {}", this.localValuePile.getFirst()); if (!isAttributePresent(RDFaAttributes.PROPERTY)) { - skipElement = true; + this.localValuePile.getFirst().setSkipElement(true); } } } + // Finally, if @typeof is present, set the typed resource to the value of new subject. if (isAttributePresent(RDFaAttributes.TYPEOF)) { - this.typedResource = this.newSubject; + this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getNewSubject()); } } } + logger.info("5 {} {}", qName, this.localValuePile.getFirst()); + // 6. If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: if (isAttributePresent(RDFaAttributes.REL) || isAttributePresent(RDFaAttributes.REV)) { if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.newSubject = getAttributeResourceValue(RDFaAttributes.ABOUT); - logger.info("{}", this.newSubject); + this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); + logger.info("6 about newSubject: {}", this.localValuePile.getFirst().getNewSubject()); } if (isAttributePresent(RDFaAttributes.TYPEOF)) { - this.typedResource = this.newSubject; + this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getNewSubject()); + logger.info("6 typeof newSubject: {}", this.localValuePile.getFirst().getNewSubject()); } - if (this.newSubject == null) { + if (this.localValuePile.getFirst().getNewSubject() == null) { if (isRootElement) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { - this.typedResource = emptyAboutResource.get(); + this.localValuePile.getFirst().setTypedResource(emptyAboutResource.get()); } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } + logger.info("6 root element typed resource: {}", this.localValuePile.getFirst().getTypedResource()); } else if (this.currentContext.getParentObjectResource() != null) { - this.newSubject = this.currentContext.getParentObjectResource(); + this.localValuePile.getFirst().setNewSubject(this.currentContext.getParentObjectResource()); + logger.info("6 parent object resource not null: {}", this.currentContext.getParentObjectResource()); } } if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.currentObjectResource = getAttributeResourceValue(RDFaAttributes.RESOURCE); + this.localValuePile.getFirst().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); + logger.info("6 resource CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.currentObjectResource = getAttributeResourceValue(RDFaAttributes.HREF); + this.localValuePile.getFirst().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.HREF)); + logger.info("6 href CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.currentObjectResource = getAttributeResourceValue(RDFaAttributes.SRC); + this.localValuePile.getFirst().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.SRC)); + logger.info("6 src CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT)) { - this.currentObjectResource = this.getValueFactory().createBNode(); + this.localValuePile.getFirst().setCurrentObjectResource(this.getValueFactory().createBNode()); + logger.info("6 typeof CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); } if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT) - && (this.currentObjectResource == null || this.currentObjectResource.isResource())) { - this.typedResource = this.currentObjectResource; + && (this.localValuePile.getFirst().getCurrentObjectResource() == null + || this.localValuePile.getFirst().getCurrentObjectResource().isResource())) { + this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getCurrentObjectResource()); + logger.info("6 typed resource: {}", this.localValuePile.getFirst().getTypedResource()); } } - logger.info("{} : subject {}", qName, this.newSubject); + + logger.info("6 {} {}", qName, this.localValuePile.getFirst()); // 7. If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; - if (this.typedResource != null) { + if (this.localValuePile.getFirst().getTypedResource() != null) { Resource typeIri = getAttributeResourceValue(RDFaAttributes.TYPEOF); - this.getModel().add(this.typedResource, RDF.type.getIRI(), typeIri); + this.getModel().add(this.localValuePile.getFirst().getTypedResource(), RDF.type.getIRI(), typeIri); } + logger.info("7 {} {}", qName, this.localValuePile.getFirst()); + // 8. If in any of the previous steps a new subject was set to a non-null value different from the parent object; - if (this.newSubject != null && this.newSubject != this.currentContext.getParentObjectResource()) { - this.localListMappings = new HashMap<>(); + if (this.localValuePile.getFirst().getNewSubject() != null && this.localValuePile.getFirst().getNewSubject() != this.currentContext.getParentObjectResource()) { + this.localValuePile.getFirst().setListMappings(new HashMap<>()); } + logger.info("8 {} {}", qName, this.localValuePile.getFirst()); + // 9. If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: - if (this.currentObjectResource != null) { + if (this.localValuePile.getFirst().getCurrentObjectResource() != null) { if (isAttributePresent(RDFaAttributes.INLIST) && isAttributePresent(RDFaAttributes.REL)) { IRI relResource = (IRI) getAttributeResourceValue(RDFaAttributes.REL); - if (!localListMappings.containsKey(relResource)) { - this.localListMappings.put(relResource, new HashSet<>()); - } - this.localListMappings.get(relResource).add(this.currentObjectResource); + this.localValuePile.getFirst().addListMapping(relResource, this.localValuePile.getFirst().getCurrentObjectResource()); } if (!isAttributePresent(RDFaAttributes.INLIST)) { if (isAttributePresent(RDFaAttributes.REL)) { Resource relResource = getAttributeResourceValue(RDFaAttributes.REL); if (relResource.isIRI()) { - this.getModel().add(newSubject, (IRI) relResource, currentObjectResource); + this.getModel().add(this.localValuePile.getFirst().getNewSubject(), (IRI) relResource, this.localValuePile.getFirst().getCurrentObjectResource()); } else { throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); } @@ -362,42 +377,48 @@ && isAttributePresent(RDFaAttributes.REL)) { if (!revResource.isIRI()) { throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + getAttributeStringValue(RDFaAttributes.REV)); } - if (!currentObjectResource.isResource()) { - throw new ParsingErrorException("object resource expected to be a resource but was " + currentObjectResource); + if (!this.localValuePile.getFirst().getCurrentObjectResource().isResource()) { + throw new ParsingErrorException("object resource expected to be a resource but was " + this.localValuePile.getFirst().getCurrentObjectResource()); } - this.getModel().add((Resource) currentObjectResource, (IRI) revResource, newSubject); + this.getModel().add((Resource) this.localValuePile.getFirst().getCurrentObjectResource(), (IRI) revResource, this.localValuePile.getFirst().getNewSubject()); } } } + logger.info("9 {} {}", qName, this.localValuePile.getFirst()); + // 10. If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); - if (this.currentObjectResource == null) { - if(this.localIncompleteStatements == null) { - this.localIncompleteStatements = new HashSet<>(); + if (this.localValuePile.getFirst().getCurrentObjectResource() == null + && (isAttributePresent(RDFaAttributes.REL) + ) || isAttributePresent(RDFaAttributes.REV)) { + if(this.localValuePile.getFirst().getIncompleteStatements() == null) { + this.localValuePile.getFirst().setIncompleteStatements(new HashSet<>()); } - this.currentObjectResource = getValueFactory().createBNode(); + this.localValuePile.getFirst().setCurrentObjectResource(getValueFactory().createBNode()); if (isAttributePresent(RDFaAttributes.REL)) { if (!getAttributeResourceValue(RDFaAttributes.REL).isIRI()) { throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); } IRI relIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REL); if (isAttributePresent(RDFaAttributes.INLIST)) { - if (!localListMappings.containsKey(relIRI)) { - this.localListMappings.put(relIRI, new HashSet<>()); + if (!this.localValuePile.getFirst().getListMappings().containsKey(relIRI)) { + this.localValuePile.getFirst().addListMappings(relIRI, new HashSet<>()); } - this.localIncompleteStatements.add(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.NONE)); + this.localValuePile.getFirst().addIncompleteStatement(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.NONE)); } else { - this.localIncompleteStatements.add(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); + this.localValuePile.getFirst().addIncompleteStatement(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); } } else if (isAttributePresent(RDFaAttributes.REV)) { if (!getAttributeResourceValue(RDFaAttributes.REV).isIRI()) { throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); } IRI revIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REV); - this.localIncompleteStatements.add(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); + this.localValuePile.getFirst().addIncompleteStatement(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); } } + logger.info("10 {} {}", qName, this.localValuePile.getFirst()); + // 11. The next step of the iteration is to establish any current property value; if (isAttributePresent(RDFaAttributes.PROPERTY)) { IRI propertyIRI = (IRI) getAttributeResourceValue(RDFaAttributes.PROPERTY); @@ -409,10 +430,10 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() IRI datatypeIRI = (IRI) getAttributeResourceValue(RDFaAttributes.DATATYPE); if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = getAttributeStringValue(RDFaAttributes.CONTENT); - currentPropertyValue = getValueFactory().createLiteral(contentString, datatypeIRI); + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); } else { String contentString = this.characters.toString().trim(); - currentPropertyValue = getValueFactory().createLiteral(contentString); + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); this.characters = new StringBuilder(); } // otherwise, as a plain literal if @datatype is present but has an empty value according to the section on CURIE and IRI Processing. @@ -421,10 +442,10 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); - currentPropertyValue = getValueFactory().createLiteral(contentString); + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } else { String contentString = this.characters.toString().trim(); - currentPropertyValue = getValueFactory().createLiteral(contentString); + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); this.characters = new StringBuilder(); } // otherwise, as an XML literal if @datatype is present and is set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. @@ -435,7 +456,7 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { // otherwise, as a plain literal using the value of @content if @content is present. } else if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); - currentPropertyValue = getValueFactory().createLiteral(contentString); + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); // otherwise, if the @rel, @rev, and @content attributes are not present, as a resource obtained from one of the following: // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; @@ -448,25 +469,25 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { || isAttributePresent(RDFaAttributes.SRC) )) { if (isAttributePresent(RDFaAttributes.RESOURCE)) { - currentPropertyValue = getAttributeResourceValue(RDFaAttributes.RESOURCE); + this.localValuePile.getFirst().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.RESOURCE)); } else if (isAttributePresent(RDFaAttributes.HREF)) { - currentPropertyValue = getAttributeResourceValue(RDFaAttributes.HREF); + this.localValuePile.getFirst().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.HREF)); } else if (isAttributePresent(RDFaAttributes.SRC)) { - currentPropertyValue = getAttributeResourceValue(RDFaAttributes.SRC); + this.localValuePile.getFirst().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.SRC)); } // otherwise, if @typeof is present and @about is not, the value of typed resource. } else if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT)) { - currentPropertyValue = typedResource; + this.localValuePile.getFirst().setCurrentPropertyValue(this.localValuePile.getFirst().getTypedResource()); // otherwise as a plain literal. } else { String contentString = this.characters.toString().trim(); // Additionally, if there is a value for current language then the value of the plain literal should include this language information, as described in [RDF-SYNTAX-GRAMMAR]. The actual literal is either the value of @content (if present) or a string created by concatenating the text content of each of the descendant elements of the current element in document order. - if (this.currentLanguage != null - && !this.currentLanguage.isEmpty()) { - currentPropertyValue = getValueFactory().createLiteral(contentString, this.currentLanguage); + if (this.localValuePile.getFirst().getCurrentLanguage() != null + && !this.localValuePile.getFirst().getCurrentLanguage().isEmpty()) { + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, this.localValuePile.getFirst().getCurrentLanguage())); } else { - currentPropertyValue = getValueFactory().createLiteral(contentString); + this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } this.characters = new StringBuilder(); } @@ -475,45 +496,49 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { // If the element also includes the @inlist attribute, the current property value is added to the local list mapping as follows: if (isAttributePresent(RDFaAttributes.INLIST)) { // if the local list mapping does not contain a list associated with the predicate IRI, instantiate a new list and add to local list mappings - if (!this.localListMappings.containsKey(propertyIRI)) { - this.localListMappings.put(propertyIRI, new HashSet<>()); + if (!this.localValuePile.getFirst().getListMappings().containsKey(propertyIRI)) { + this.localValuePile.getFirst().addListMappings(propertyIRI, new HashSet<>()); } // add the current property value to the list associated with the predicate IRI in the local list mapping - this.localListMappings.get(propertyIRI).add(currentPropertyValue); + this.localValuePile.getFirst().addListMapping(propertyIRI, this.localValuePile.getFirst().getCurrentPropertyValue()); // Otherwise the current property value is used to generate a triple as follows: // subject new subject // predicate full IRI // object current property value } else { - this.getModel().add(this.newSubject, propertyIRI, this.currentPropertyValue); + this.getModel().add(this.localValuePile.getFirst().getNewSubject(), propertyIRI, this.localValuePile.getFirst().getCurrentPropertyValue()); } } + logger.info("11 {} {}", qName, this.localValuePile.getFirst()); + // 12. If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: - if (!this.skipElement - && this.newSubject != null) { - if(this.localIncompleteStatements == null) { - this.localIncompleteStatements = new HashSet<>(); + if (!this.localValuePile.getFirst().isSkipElement() + && this.localValuePile.getFirst().getNewSubject() != null) { + if(this.localValuePile.getFirst().getIncompleteStatements() == null) { + this.localValuePile.getFirst().setIncompleteStatements(new HashSet<>()); } for (RDFaIncompleteStatement incompleteStatement : this.currentContext.getIncompleteStatement()) { if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { - localListMappings.get(incompleteStatement.getPredicate()).add(this.newSubject); + this.localValuePile.getFirst().addListMapping(incompleteStatement.getPredicate(), this.localValuePile.getFirst().getNewSubject()); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { - this.getModel().add(this.currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), this.newSubject); + this.getModel().add(this.currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), this.localValuePile.getFirst().getNewSubject()); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { - this.getModel().add(this.newSubject, incompleteStatement.getPredicate(), this.currentContext.getParentSubjectResource()); + this.getModel().add(this.localValuePile.getFirst().getNewSubject(), incompleteStatement.getPredicate(), this.currentContext.getParentSubjectResource()); } } } + logger.info("12 {} {}", qName, this.localValuePile.getFirst()); + // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: // If the skip element flag is 'true' then the new evaluation context is a copy of the current context that was passed in to this level of processing, with the language and list of IRI mappings values replaced with the local values; - if (this.skipElement) { + if (this.localValuePile.getFirst().isSkipElement()) { this.currentContext = new RDFaEvaluationContext(this.currentContext); this.currentContext.clearIriMappings(); initializeNewContext(this.currentContext); - this.currentContext.setLanguage(this.currentLanguage); - this.currentContext.addIriMappings(this.localIRIMappings); + this.currentContext.setLanguage(this.localValuePile.getFirst().getCurrentLanguage()); + this.currentContext.setIriMappings(this.localValuePile.getFirst().getIRIMappings()); // Otherwise, the values are: } else { Resource oldParentSubject = this.currentContext.getParentSubjectResource(); @@ -521,27 +546,32 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { this.currentContext = new RDFaEvaluationContext(this.currentContext.getBaseIri()); initializeNewContext(this.currentContext); // the parent subject is set to the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; - this.currentContext.setParentSubjectResource(this.newSubject); + this.currentContext.setParentSubjectResource(this.localValuePile.getFirst().getNewSubject()); // the parent object is set to value of current object resource, if non-null, or the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; - if (this.currentObjectResource != null) { - this.currentContext.setParentObjectResource(this.currentObjectResource); - } else if (this.newSubject != null) { - this.currentContext.setParentObjectResource(this.newSubject); + if (this.localValuePile.getFirst().getCurrentObjectResource() != null) { + logger.info("13 parent object resource = current object resource {}", this.localValuePile.getFirst().getCurrentObjectResource()); + this.currentContext.setParentObjectResource(this.localValuePile.getFirst().getCurrentObjectResource()); + } else if (this.localValuePile.getFirst().getNewSubject() != null) { + this.currentContext.setParentObjectResource(this.localValuePile.getFirst().getNewSubject()); + logger.info("13 parent object resource = new subject {}", this.localValuePile.getFirst().getNewSubject()); } else { this.currentContext.setParentObjectResource(oldParentSubject); } + logger.info("13 context parent object resource: {}", this.currentContext.getParentObjectResource()); // the list of IRI mappings is set to the local list of IRI mappings; - this.currentContext.addIriMappings(this.localIRIMappings); + this.currentContext.setIriMappings(this.localValuePile.getFirst().getIRIMappings()); // the list of incomplete triples is set to the local list of incomplete triples; - this.currentContext.setIncompleteStatements(this.localIncompleteStatements); + this.currentContext.setIncompleteStatements(this.localValuePile.getFirst().getIncompleteStatements()); // the list mapping is set to the local list mapping; - this.currentContext.setListMappings(this.localListMappings); + this.currentContext.setListMappings(this.localValuePile.getFirst().getListMappings()); // language is set to the value of current language. - this.currentContext.setLanguage(this.currentLanguage); + this.currentContext.setLanguage(this.localValuePile.getFirst().getCurrentLanguage()); // the default vocabulary is set to the value of the local default vocabulary. - this.currentContext.setDefaultVocabulary(this.localDefaultVocabulary); + this.currentContext.setDefaultVocabulary(this.localValuePile.getFirst().getDefaultVocabulary()); } + logger.info("13 {} {}", qName, this.localValuePile.getFirst()); + this.isRootElement = false; } @@ -549,13 +579,13 @@ private void endProcessElement(String uri, String localName, String qName) { Map> oldListMappings = this.currentContext.getListMappings(); // 14. Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: - for (Map.Entry> listMapping : this.localListMappings.entrySet()) { + for (Map.Entry> listMapping : this.localValuePile.getFirst().getListMappings().entrySet()) { IRI propertyIRI = listMapping.getKey(); Set propertyList = listMapping.getValue(); if (!oldListMappings.containsKey(propertyIRI)) { if (propertyList.isEmpty()) { - getModel().add(this.newSubject, propertyIRI, RDF.nil.getIRI()); + getModel().add(this.localValuePile.getFirst().getNewSubject(), propertyIRI, RDF.nil.getIRI()); } else { ArrayList bnodes = new ArrayList<>(); for (int i = 0; i < propertyList.size(); i++) { @@ -573,11 +603,15 @@ private void endProcessElement(String uri, String localName, String qName) { bnodeIndex++; } - getModel().add(this.newSubject, propertyIRI, bnodes.getFirst()); + getModel().add(this.localValuePile.getFirst().getNewSubject(), propertyIRI, bnodes.getFirst()); } } } + logger.info("14 {} newSubject: {}, currentObjectResource: {}, currentPropertyValue: {}, typedResource: {}, skip: {}", qName, this.localValuePile.getFirst().getNewSubject(), this.localValuePile.getFirst().getCurrentObjectResource(), this.localValuePile.getFirst().getCurrentPropertyValue(), this.localValuePile.getFirst().getTypedResource(), this.localValuePile.getFirst().isSkipElement()); + + this.localValuePile.pop(); + } /** @@ -677,16 +711,14 @@ protected Optional resolveStringResource(String stringResource) { IRI namespaceIRI = currentContext.getIriMapping(prefixString); return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); - } else if (localIRIMappings.containsKey(prefixString)) { - IRI namespaceIRI = localIRIMappings.get(prefixString); + } else if (this.localValuePile.getFirst().getIRIMappings().containsKey(prefixString)) { + IRI namespaceIRI = this.localValuePile.getFirst().getIRIMappings().get(prefixString); return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI return Optional.of(this.getValueFactory().createIRI(currentContext.getBaseIri().stringValue(), localNameString)); } else { - logger.info("{} context mappings", currentContext.getIriMappings().size()); - logger.info("{} local mappings", localIRIMappings.size()); - throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix among " + this.currentContext.getIriMappings() + " and " + this.localValuePile.getFirst().getIRIMappings()); } } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI return Optional.of(this.getValueFactory().createIRI(resultString)); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java index 6a11a4809..43e1a2be0 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -119,7 +119,9 @@ public void addIriMapping(String prefix, IRI prefixIri) { } public void addIriMappings(Map otherMappings) { - this.iriMappings.putAll(otherMappings); + if(otherMappings != null) { + this.iriMappings.putAll(otherMappings); + } } public void clearIriMappings() { diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java new file mode 100644 index 000000000..58b5f6070 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java @@ -0,0 +1,173 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class RDFaLocalValues { + + // Local context + private boolean skipElement = false; + private Resource newSubject = null; + private Resource currentObjectResource = null; + private Resource typedResource = null; + private Map iRIMappings = new HashMap<>(); + private Set incompleteStatements = null; + private Map> listMappings = new HashMap<>(); + private String currentLanguage = null; + private Value currentPropertyValue = null; + private String defaultVocabulary = null; + + public RDFaLocalValues() { + } + + /** + * Constructor to be used in step 1 of RDFa processing + * @param context + */ + public RDFaLocalValues(RDFaEvaluationContext context) { + this.skipElement = false; + this.newSubject = null; + this.currentObjectResource = null; + this.typedResource = null; + this.iRIMappings = context.getIriMappings(); + this.incompleteStatements = context.getIncompleteStatement(); + this.listMappings = context.getListMappings(); + this.currentLanguage = context.getLanguage(); + this.defaultVocabulary = context.getDefaultVocabulary(); + } + + public RDFaLocalValues(RDFaLocalValues other) { + this.skipElement = other.skipElement; + this.newSubject = other.newSubject; + this.currentObjectResource = other.currentObjectResource; + this.typedResource = other.typedResource; + this.iRIMappings = other.iRIMappings; + this.incompleteStatements = other.incompleteStatements; + this.listMappings = other.listMappings; + this.currentLanguage = other.currentLanguage; + this.currentPropertyValue = other.currentPropertyValue; + this.defaultVocabulary = other.defaultVocabulary; + } + + public boolean isSkipElement() { + return skipElement; + } + + public void setSkipElement(boolean skipElement) { + this.skipElement = skipElement; + } + + public Resource getNewSubject() { + return newSubject; + } + + public void setNewSubject(Resource newSubject) { + this.newSubject = newSubject; + } + + public Resource getCurrentObjectResource() { + return currentObjectResource; + } + + public void setCurrentObjectResource(Resource currentObjectResource) { + this.currentObjectResource = currentObjectResource; + } + + public Resource getTypedResource() { + return typedResource; + } + + public void setTypedResource(Resource typedResource) { + this.typedResource = typedResource; + } + + public Map getIRIMappings() { + return iRIMappings; + } + + public void setIRIMappings(Map iRIMappings) { + this.iRIMappings = iRIMappings; + } + + public void addIRIMappings(String key, IRI value) { + if(this.iRIMappings == null) { + this.iRIMappings = new HashMap<>(); + } + this.iRIMappings.put(key, value); + } + + public Set getIncompleteStatements() { + return incompleteStatements; + } + + public void setIncompleteStatements(Set incompleteStatements) { + this.incompleteStatements = incompleteStatements; + } + + public void addIncompleteStatement(RDFaIncompleteStatement statement) { + this.incompleteStatements.add(statement); + } + + public Map> getListMappings() { + return listMappings; + } + + public void setListMappings(Map> listMappings) { + this.listMappings = listMappings; + } + + public void addListMapping(IRI key, Value value) { + if(! this.listMappings.containsKey(key)) { + this.listMappings.put(key, new HashSet<>()); + } + this.listMappings.get(key).add(value); + } + + public void addListMappings(IRI key, Set objects) { + this.listMappings.put(key, objects); + } + + public String getCurrentLanguage() { + return currentLanguage; + } + + public void setCurrentLanguage(String currentLanguage) { + this.currentLanguage = currentLanguage; + } + + public Value getCurrentPropertyValue() { + return currentPropertyValue; + } + + public void setCurrentPropertyValue(Value currentPropertyValue) { + this.currentPropertyValue = currentPropertyValue; + } + + public String getDefaultVocabulary() { + return defaultVocabulary; + } + + public void setDefaultVocabulary(String defaultVocabulary) { + this.defaultVocabulary = defaultVocabulary; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("newSubject: ").append(this.newSubject).append(" "); + sb.append("currentObjectResource: ").append(this.currentObjectResource).append(" "); + sb.append("typedResource: ").append(this.typedResource).append(" "); + sb.append("currentLanguage: ").append(this.currentLanguage).append(" "); + sb.append("currentPropertyValue: ").append(this.currentPropertyValue).append(" "); + sb.append("defaultVocabulary: ").append(this.defaultVocabulary).append(" "); + + return sb.toString(); + } +} From 65d8cfea1a62409ff6cdaf40760451ca3dc6f837 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Tue, 13 Jan 2026 14:36:46 +0100 Subject: [PATCH 07/13] Full storage of the contexts --- .../next/impl/io/parser/rdfa/RDFaParser.java | 492 +++++++++--------- .../rdfa/model/RDFaEvaluationContext.java | 3 +- ...Values.java => RDFaProcessingContext.java} | 61 ++- .../impl/io/parser/rdfa/RDFaParserTest.java | 2 +- 4 files changed, 316 insertions(+), 242 deletions(-) rename src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/{RDFaLocalValues.java => RDFaProcessingContext.java} (73%) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index e9b9294c9..8671fd80d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.*; +import fr.inria.corese.core.next.impl.io.serialization.util.SerializationConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; @@ -42,20 +43,12 @@ public class RDFaParser extends AbstractRDFParser { private static final String BASE_TAG = "base"; private static final String XMLNS_PREFIX = "xmlns"; - private RDFaEvaluationContext currentContext = null; + private String baseIri = SerializationConstants.getDefaultBaseURI(); /** * Buffer/Pile of local value to adapt the parsing algorithm to SAX processing */ - private final LinkedList localValuePile = new LinkedList<>(); - - /** - * Buffer for accumulating character data between start and end tags. - */ - private StringBuilder characters = new StringBuilder(); - - private boolean isRootElement = true; - private Attributes currentElementAttributes = null; + private final LinkedList processingContexts = new LinkedList<>(); public RDFaParser(Model model, ValueFactory factory) { this(model, factory, new RDFaParserOptions.Builder().build()); @@ -63,21 +56,20 @@ public RDFaParser(Model model, ValueFactory factory) { public RDFaParser(Model model, ValueFactory factory, IOOptions config) { super(model, factory, config); + if (getConfig() instanceof BaseIRIOptions baseIRIOptions) { + this.baseIri = baseIRIOptions.getBaseIRI(); + } } @Override public void parse(InputStream in) { - if (getConfig() instanceof BaseIRIOptions baseIRIOptions) { - String baseIRI = baseIRIOptions.getBaseIRI(); - parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseIRI); - } else { - parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); - } + parse(new InputStreamReader(in, StandardCharsets.UTF_8), this.baseIri); } @Override public void parse(InputStream in, String baseURIString) { + this.baseIri = baseURIString; parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURIString); } @@ -89,12 +81,7 @@ public RDFFormat getRDFFormat() { @Override public void parse(Reader reader, String baseURI) { try { - this.currentContext = getNewContext(getValueFactory().createIRI(baseURI)); - this.currentContext.setParentSubjectResource(this.currentContext.getBaseIri()); - this.currentContext.setParentObjectResource(null); - this.currentContext.setLanguage(null); - - this.currentContext.setDefaultVocabulary(null); + this.baseIri = baseURI; SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); @@ -109,72 +96,133 @@ public void parse(Reader reader, String baseURI) { private void addPrefix(String prefix, String uri) { IRI prefixIRI = getValueFactory().createIRI(uri); - this.currentContext.addIriMapping(prefix, prefixIRI); + currentProcessingContext().getEvaluationContext().addIriMapping(prefix, prefixIRI); } /** * Handles character data between XML elements + * Accumulate the characters in all local values in the pile */ private void handleCharacters(char[] ch, int start, int length) { - characters.append(ch, start, length); + for (RDFaProcessingContext value : this.processingContexts) { + value.addCharacters(ch, start, length); + } } private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { - this.currentElementAttributes = attrs; + + if(this.processingContexts.size() > 1) { // Not a root element + RDFaProcessingContext processingContext = new RDFaProcessingContext(currentProcessingContext().getEvaluationContext()); + processingContext.setCurrentElementAttributes(attrs); + processingContext.setRootElement(false); + // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: + // If the skip element flag is 'true' then the new evaluation context is a copy of the current context that was passed in to this level of processing, with the language and list of IRI mappings values replaced with the local values; + if (this.currentProcessingContext().isSkipElement()) { + currentProcessingContext().setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext())); + currentProcessingContext().getEvaluationContext().clearIriMappings(); + initializeNewContext(currentProcessingContext().getEvaluationContext()); + currentProcessingContext().getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); + currentProcessingContext().getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); + // Otherwise, the values are: + } else { + Resource oldParentSubject = currentProcessingContext().getEvaluationContext().getParentSubjectResource(); + // the base is set to the base value of the current evaluation context; + currentProcessingContext().setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext().getBaseIri())); + initializeNewContext(currentProcessingContext().getEvaluationContext()); + // the parent subject is set to the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; + currentProcessingContext().getEvaluationContext().setParentSubjectResource(this.currentProcessingContext().getNewSubject()); + // the parent object is set to value of current object resource, if non-null, or the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; + if (this.currentProcessingContext().getCurrentObjectResource() != null) { + logger.info("13 parent object resource = current object resource {}", this.currentProcessingContext().getCurrentObjectResource()); + currentProcessingContext().getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getCurrentObjectResource()); + } else if (this.currentProcessingContext().getNewSubject() != null) { + currentProcessingContext().getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getNewSubject()); + logger.info("13 parent object resource = new subject {}", this.currentProcessingContext().getNewSubject()); + } else { + currentProcessingContext().getEvaluationContext().setParentObjectResource(oldParentSubject); + } + logger.info("13 context parent object resource: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); + // the list of IRI mappings is set to the local list of IRI mappings; + currentProcessingContext().getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); + // the list of incomplete triples is set to the local list of incomplete triples; + currentProcessingContext().getEvaluationContext().setIncompleteStatements(this.currentProcessingContext().getIncompleteStatements()); + // the list mapping is set to the local list mapping; + currentProcessingContext().getEvaluationContext().setListMappings(this.currentProcessingContext().getListMappings()); + // language is set to the value of current language. + currentProcessingContext().getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); + // the default vocabulary is set to the value of the local default vocabulary. + currentProcessingContext().getEvaluationContext().setDefaultVocabulary(this.currentProcessingContext().getDefaultVocabulary()); + } + this.processingContexts.add(processingContext); + + logger.info("13 {} {}", qName, this.currentProcessingContext()); + } else { + RDFaEvaluationContext startingContext = getNewContext(getValueFactory().createIRI(this.baseIri)); + startingContext.setParentSubjectResource(startingContext.getBaseIri()); + startingContext.setParentObjectResource(null); + startingContext.setLanguage(null); + startingContext.setDefaultVocabulary(null); + RDFaProcessingContext processingContext = new RDFaProcessingContext(startingContext); + processingContext.setCurrentElementAttributes(attrs); + processingContext.setRootElement(true); + this.processingContexts.add(processingContext); + logger.info("START {} {}", qName, this.currentProcessingContext()); + } // 1 First, the local values are initialized - this.localValuePile.add(new RDFaLocalValues(this.currentContext)); - logger.info("START {} {}", qName, this.localValuePile.getFirst()); + // HTML-specific base element if (qName.equals(BASE_TAG) && isAttributePresent(RDFaAttributes.HREF)) { Resource resourceBase = getAttributeResourceValue(RDFaAttributes.HREF); if (resourceBase.isIRI()) { - this.currentContext.setBaseIri((IRI) resourceBase); + currentProcessingContext().getEvaluationContext().setBaseIri((IRI) resourceBase); } } // 2. The current element is examined for any change to the default vocabulary via @vocab. If @vocab is present and contains a value, the local default vocabulary is updated according to the section on CURIE and IRI Processing. If the value is empty, then the local default vocabulary MUST be reset to the Host Language defined default (if any). if (isAttributePresent(RDFaAttributes.VOCAB) && !getAttributeStringValue(RDFaAttributes.VOCAB).isEmpty()) { - this.localValuePile.getFirst().setDefaultVocabulary(getAttributeStringValue(RDFaAttributes.VOCAB)); + this.currentProcessingContext().setDefaultVocabulary(getAttributeStringValue(RDFaAttributes.VOCAB)); } - logger.info("2 {} {}", qName, this.localValuePile.getFirst()); + logger.info("2 {} {}", qName, this.currentProcessingContext()); // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; - for (int i = 0; i < this.currentElementAttributes.getLength(); i++) { - String attribute = this.currentElementAttributes.getQName(i); + for (int i = 0; i < this.currentElementAttributes().getLength(); i++) { + String attribute = this.currentElementAttributes().getQName(i); logger.info("3 {} attribute: {}", qName, attribute); if (attribute.startsWith(XMLNS_PREFIX)) { - String attributeValue = this.currentElementAttributes.getValue(i); + String attributeValue = this.currentElementAttributes().getValue(i); String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); logger.info("3 {} {} : {}", qName, prefixName, attributeValue); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); - this.localValuePile.getFirst().addIRIMappings(prefixName, prefixNamespace); + this.currentProcessingContext().addIRIMappings(prefixName, prefixNamespace); } } - logger.info("3: {} local uri mappings {}", qName, this.localValuePile.getFirst().getIRIMappings()); if (isAttributePresent(RDFaAttributes.PREFIX) && !getAttributeStringValue(RDFaAttributes.PREFIX).isEmpty()) { String prefixDeclaration = getAttributeStringValue(RDFaAttributes.PREFIX); String prefixName = getPrefixFromDeclaration(prefixDeclaration); IRI prefixIRI = getPrefixIriFromDeclaration(prefixDeclaration); - this.localValuePile.getFirst().addIRIMappings(prefixName, prefixIRI); + this.currentProcessingContext().addIRIMappings(prefixName, prefixIRI); } - logger.info("3 {} {}", qName, this.localValuePile.getFirst()); + logger.info("3 {} {}", qName, this.currentProcessingContext()); // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; // Host Languages that incorporate RDFa MAY provide a mechanism for specifying the natural language of an element and its contents (e.g., XML provides the general-purpose XML attribute @xml:lang). if (isAttributePresent(RDFaAttributes.LANG_ALT) && !getAttributeStringValue(RDFaAttributes.LANG_ALT).isEmpty()) { - this.localValuePile.getFirst().setCurrentLanguage(getAttributeStringValue(RDFaAttributes.LANG_ALT)); + this.currentProcessingContext().setCurrentLanguage(getAttributeStringValue(RDFaAttributes.LANG_ALT)); + } else if (isAttributePresent(RDFaAttributes.LANG) + && !getAttributeStringValue(RDFaAttributes.LANG).isEmpty()) { + this.currentProcessingContext().setCurrentLanguage(getAttributeStringValue(RDFaAttributes.LANG)); } - logger.info("4 {} {}", qName, this.localValuePile.getFirst()); + logger.info("4 {} {}", qName, this.currentProcessingContext()); // 5. If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. if (!isAttributePresent(RDFaAttributes.REL) @@ -185,32 +233,32 @@ && isAttributePresent(RDFaAttributes.HREF)) { && !isAttributePresent(RDFaAttributes.CONTENT) && !isAttributePresent(RDFaAttributes.DATATYPE) && (isAttributePresent(RDFaAttributes.ABOUT) - || isRootElement - || this.currentContext.getParentObjectResource() != null)) { + || this.currentProcessingContext().isRootElement() + || currentProcessingContext().getEvaluationContext().getParentObjectResource() != null)) { // new subject is set to the resource obtained from the first match from the following rule: // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); - logger.info("5.1 About {}", this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); + logger.info("5.1 About {}", this.currentProcessingContext().getNewSubject()); // otherwise, if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; - } else if (isRootElement) { - this.localValuePile.getFirst().setNewSubject(this.currentContext.getBaseIri()); - logger.info("5.1 Root element {}", this.localValuePile.getFirst().getNewSubject()); + } else if (this.currentProcessingContext().isRootElement()) { + this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getBaseIri()); + logger.info("5.1 Root element {}", this.currentProcessingContext().getNewSubject()); // otherwise, if parent object is present, new subject is set to the value of parent object. - } else if (this.currentContext.getParentObjectResource() != null) { - this.localValuePile.getFirst().setNewSubject(this.currentContext.getParentObjectResource()); - logger.info("5.1 context parent object {}", this.localValuePile.getFirst().getNewSubject()); + } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { + this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); + logger.info("5.1 context parent object {}", this.currentProcessingContext().getNewSubject()); } // If @typeof is present then typed resource is set to the resource obtained from the first match from the following rules: if (isAttributePresent(RDFaAttributes.TYPEOF)) { // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getNewSubject()); // otherwise, if the element is the root element of the document, then act as if there is an empty @about present and process it according to the previous rule; - } else if (isRootElement) { + } else if (this.currentProcessingContext().isRootElement()) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { - this.localValuePile.getFirst().setTypedResource(emptyAboutResource.get()); + this.currentProcessingContext().setTypedResource(emptyAboutResource.get()); } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } @@ -218,19 +266,19 @@ && isAttributePresent(RDFaAttributes.HREF)) { } else { // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.localValuePile.getFirst().setTypedResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); + this.currentProcessingContext().setTypedResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.localValuePile.getFirst().setTypedResource(getAttributeResourceValue(RDFaAttributes.HREF)); + this.currentProcessingContext().setTypedResource(getAttributeResourceValue(RDFaAttributes.HREF)); // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.localValuePile.getFirst().setTypedResource(getAttributeResourceValue(RDFaAttributes.SRC)); + this.currentProcessingContext().setTypedResource(getAttributeResourceValue(RDFaAttributes.SRC)); // otherwise, the value of typed resource is set to a newly created bnode. } else { - this.localValuePile.getFirst().setTypedResource(getValueFactory().createBNode()); + this.currentProcessingContext().setTypedResource(getValueFactory().createBNode()); } // The value of the current object resource is then set to the value of typed resource. - this.localValuePile.getFirst().setCurrentObjectResource(this.localValuePile.getFirst().getTypedResource()); + this.currentProcessingContext().setCurrentObjectResource(this.currentProcessingContext().getTypedResource()); } } // 5.2. otherwise: @@ -242,134 +290,134 @@ && isAttributePresent(RDFaAttributes.HREF)) { || isAttributePresent(RDFaAttributes.RESOURCE)) { // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); - logger.info("5.2 about {}", this.localValuePile.getFirst().getNewSubject()); - // otherwise, by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; + this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); + logger.info("5.2 about {}", this.currentProcessingContext().getNewSubject()); + // otherwise, by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.RESOURCE)); - logger.info("5.2 resource {}", this.localValuePile.getFirst().getNewSubject()); - // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; + this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.RESOURCE)); + logger.info("5.2 resource {}", this.currentProcessingContext().getNewSubject()); + // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.HREF)); - logger.info("5.2 href {}", this.localValuePile.getFirst().getNewSubject()); - // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing. + this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.HREF)); + logger.info("5.2 href {}", this.currentProcessingContext().getNewSubject()); + // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing. } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.SRC)); - logger.info("5.2 src {}", this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.SRC)); + logger.info("5.2 src {}", this.currentProcessingContext().getNewSubject()); } - // otherwise, if no resource is provided by a resource attribute, then the first match from the following rules will apply: + // otherwise, if no resource is provided by a resource attribute, then the first match from the following rules will apply: } else { // if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; - if (isRootElement) { - logger.info("RootElement {}", qName); + if (this.currentProcessingContext().isRootElement()) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { - this.localValuePile.getFirst().setNewSubject(emptyAboutResource.get()); - logger.info("5.2 rootElement {}", this.localValuePile.getFirst()); + this.currentProcessingContext().setNewSubject(emptyAboutResource.get()); + logger.info("5.2 rootElement {} {}", qName, this.currentProcessingContext()); } else { throw new ParsingErrorException("Expected to be able to generate newSubject from empty CURIE"); } - // otherwise, if @typeof is present, then new subject is set to be a newly created bnode; + // otherwise, if @typeof is present, then new subject is set to be a newly created bnode; } else if (isAttributePresent(RDFaAttributes.TYPEOF)) { - this.localValuePile.getFirst().setNewSubject(getValueFactory().createBNode()); - logger.info("5.2 typeOf {}", this.localValuePile.getFirst()); - // otherwise, if parent object is present, new subject is set to the value of parent object. Additionally, if @property is not present then the skip element flag is set to 'true'. - } else if (this.currentContext.getParentObjectResource() != null) { - this.localValuePile.getFirst().setNewSubject(this.currentContext.getParentObjectResource()); - logger.info("5.2 parent object resource {}", this.localValuePile.getFirst()); + this.currentProcessingContext().setNewSubject(getValueFactory().createBNode()); + logger.info("5.2 typeOf {}", this.currentProcessingContext()); + // otherwise, if parent object is present, new subject is set to the value of parent object. Additionally, if @property is not present then the skip element flag is set to 'true'. + } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { + this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); + logger.info("5.2 parent object resource {}", this.currentProcessingContext()); if (!isAttributePresent(RDFaAttributes.PROPERTY)) { - this.localValuePile.getFirst().setSkipElement(true); + this.currentProcessingContext().setSkipElement(true); } } } // Finally, if @typeof is present, set the typed resource to the value of new subject. if (isAttributePresent(RDFaAttributes.TYPEOF)) { - this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getNewSubject()); } } } - logger.info("5 {} {}", qName, this.localValuePile.getFirst()); + logger.info("5 {} {}", qName, this.currentProcessingContext()); // 6. If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: if (isAttributePresent(RDFaAttributes.REL) || isAttributePresent(RDFaAttributes.REV)) { if (isAttributePresent(RDFaAttributes.ABOUT)) { - this.localValuePile.getFirst().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); - logger.info("6 about newSubject: {}", this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); + logger.info("6 about newSubject: {}", this.currentProcessingContext().getNewSubject()); } if (isAttributePresent(RDFaAttributes.TYPEOF)) { - this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getNewSubject()); - logger.info("6 typeof newSubject: {}", this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getNewSubject()); + logger.info("6 typeof newSubject: {}", this.currentProcessingContext().getNewSubject()); } - if (this.localValuePile.getFirst().getNewSubject() == null) { - if (isRootElement) { + if (this.currentProcessingContext().getNewSubject() == null) { + if (this.currentProcessingContext().isRootElement()) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { - this.localValuePile.getFirst().setTypedResource(emptyAboutResource.get()); + this.currentProcessingContext().setTypedResource(emptyAboutResource.get()); } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } - logger.info("6 root element typed resource: {}", this.localValuePile.getFirst().getTypedResource()); - } else if (this.currentContext.getParentObjectResource() != null) { - this.localValuePile.getFirst().setNewSubject(this.currentContext.getParentObjectResource()); - logger.info("6 parent object resource not null: {}", this.currentContext.getParentObjectResource()); + logger.info("6 root element typed resource: {}", this.currentProcessingContext().getTypedResource()); + } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { + this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); + logger.info("6 parent object resource not null: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); } } if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.localValuePile.getFirst().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); - logger.info("6 resource CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); + this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); + logger.info("6 resource CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.localValuePile.getFirst().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.HREF)); - logger.info("6 href CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); + this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.HREF)); + logger.info("6 href CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.localValuePile.getFirst().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.SRC)); - logger.info("6 src CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); + this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.SRC)); + logger.info("6 src CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT)) { - this.localValuePile.getFirst().setCurrentObjectResource(this.getValueFactory().createBNode()); - logger.info("6 typeof CurrentObjectResource: {}", this.localValuePile.getFirst().getCurrentObjectResource()); + this.currentProcessingContext().setCurrentObjectResource(this.getValueFactory().createBNode()); + logger.info("6 typeof CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT) - && (this.localValuePile.getFirst().getCurrentObjectResource() == null - || this.localValuePile.getFirst().getCurrentObjectResource().isResource())) { - this.localValuePile.getFirst().setTypedResource(this.localValuePile.getFirst().getCurrentObjectResource()); - logger.info("6 typed resource: {}", this.localValuePile.getFirst().getTypedResource()); + && (this.currentProcessingContext().getCurrentObjectResource() == null + || this.currentProcessingContext().getCurrentObjectResource().isResource())) { + this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getCurrentObjectResource()); + logger.info("6 typed resource: {}", this.currentProcessingContext().getTypedResource()); } } - logger.info("6 {} {}", qName, this.localValuePile.getFirst()); + logger.info("6 {} {}", qName, this.currentProcessingContext()); // 7. If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; - if (this.localValuePile.getFirst().getTypedResource() != null) { + if (this.currentProcessingContext().getTypedResource() != null + && isAttributePresent(RDFaAttributes.TYPEOF)) { Resource typeIri = getAttributeResourceValue(RDFaAttributes.TYPEOF); - this.getModel().add(this.localValuePile.getFirst().getTypedResource(), RDF.type.getIRI(), typeIri); + this.getModel().add(this.currentProcessingContext().getTypedResource(), RDF.type.getIRI(), typeIri); } - logger.info("7 {} {}", qName, this.localValuePile.getFirst()); + logger.info("7 {} {}", qName, this.currentProcessingContext()); // 8. If in any of the previous steps a new subject was set to a non-null value different from the parent object; - if (this.localValuePile.getFirst().getNewSubject() != null && this.localValuePile.getFirst().getNewSubject() != this.currentContext.getParentObjectResource()) { - this.localValuePile.getFirst().setListMappings(new HashMap<>()); + if (this.currentProcessingContext().getNewSubject() != null && this.currentProcessingContext().getNewSubject() != currentProcessingContext().getEvaluationContext().getParentObjectResource()) { + this.currentProcessingContext().setListMappings(new HashMap<>()); } - logger.info("8 {} {}", qName, this.localValuePile.getFirst()); + logger.info("8 {} {}", qName, this.currentProcessingContext()); // 9. If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: - if (this.localValuePile.getFirst().getCurrentObjectResource() != null) { + if (this.currentProcessingContext().getCurrentObjectResource() != null) { if (isAttributePresent(RDFaAttributes.INLIST) && isAttributePresent(RDFaAttributes.REL)) { IRI relResource = (IRI) getAttributeResourceValue(RDFaAttributes.REL); - this.localValuePile.getFirst().addListMapping(relResource, this.localValuePile.getFirst().getCurrentObjectResource()); + this.currentProcessingContext().addListMapping(relResource, this.currentProcessingContext().getCurrentObjectResource()); } if (!isAttributePresent(RDFaAttributes.INLIST)) { if (isAttributePresent(RDFaAttributes.REL)) { Resource relResource = getAttributeResourceValue(RDFaAttributes.REL); if (relResource.isIRI()) { - this.getModel().add(this.localValuePile.getFirst().getNewSubject(), (IRI) relResource, this.localValuePile.getFirst().getCurrentObjectResource()); + this.getModel().add(this.currentProcessingContext().getNewSubject(), (IRI) relResource, this.currentProcessingContext().getCurrentObjectResource()); } else { - throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); + throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes().getValue(RDFaAttributes.REL.getName())); } } if (isAttributePresent(RDFaAttributes.REV)) { @@ -377,47 +425,47 @@ && isAttributePresent(RDFaAttributes.REL)) { if (!revResource.isIRI()) { throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + getAttributeStringValue(RDFaAttributes.REV)); } - if (!this.localValuePile.getFirst().getCurrentObjectResource().isResource()) { - throw new ParsingErrorException("object resource expected to be a resource but was " + this.localValuePile.getFirst().getCurrentObjectResource()); + if (!this.currentProcessingContext().getCurrentObjectResource().isResource()) { + throw new ParsingErrorException("object resource expected to be a resource but was " + this.currentProcessingContext().getCurrentObjectResource()); } - this.getModel().add((Resource) this.localValuePile.getFirst().getCurrentObjectResource(), (IRI) revResource, this.localValuePile.getFirst().getNewSubject()); + this.getModel().add(this.currentProcessingContext().getCurrentObjectResource(), (IRI) revResource, this.currentProcessingContext().getNewSubject()); } } } - logger.info("9 {} {}", qName, this.localValuePile.getFirst()); + logger.info("9 {} {}", qName, this.currentProcessingContext()); // 10. If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); - if (this.localValuePile.getFirst().getCurrentObjectResource() == null + if (this.currentProcessingContext().getCurrentObjectResource() == null && (isAttributePresent(RDFaAttributes.REL) - ) || isAttributePresent(RDFaAttributes.REV)) { - if(this.localValuePile.getFirst().getIncompleteStatements() == null) { - this.localValuePile.getFirst().setIncompleteStatements(new HashSet<>()); + ) || isAttributePresent(RDFaAttributes.REV)) { + if (this.currentProcessingContext().getIncompleteStatements() == null) { + this.currentProcessingContext().setIncompleteStatements(new HashSet<>()); } - this.localValuePile.getFirst().setCurrentObjectResource(getValueFactory().createBNode()); + this.currentProcessingContext().setCurrentObjectResource(getValueFactory().createBNode()); if (isAttributePresent(RDFaAttributes.REL)) { if (!getAttributeResourceValue(RDFaAttributes.REL).isIRI()) { - throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REL.getName())); + throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes().getValue(RDFaAttributes.REL.getName())); } IRI relIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REL); if (isAttributePresent(RDFaAttributes.INLIST)) { - if (!this.localValuePile.getFirst().getListMappings().containsKey(relIRI)) { - this.localValuePile.getFirst().addListMappings(relIRI, new HashSet<>()); + if (!this.currentProcessingContext().getListMappings().containsKey(relIRI)) { + this.currentProcessingContext().addListMappings(relIRI, new HashSet<>()); } - this.localValuePile.getFirst().addIncompleteStatement(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.NONE)); + this.currentProcessingContext().addIncompleteStatement(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.NONE)); } else { - this.localValuePile.getFirst().addIncompleteStatement(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); + this.currentProcessingContext().addIncompleteStatement(new RDFaIncompleteStatement(relIRI, RDFaIncompleteStatement.Direction.FORWARD)); } } else if (isAttributePresent(RDFaAttributes.REV)) { if (!getAttributeResourceValue(RDFaAttributes.REV).isIRI()) { - throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes.getValue(RDFaAttributes.REV.getName())); + throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes().getValue(RDFaAttributes.REV.getName())); } IRI revIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REV); - this.localValuePile.getFirst().addIncompleteStatement(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); + this.currentProcessingContext().addIncompleteStatement(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); } } - logger.info("10 {} {}", qName, this.localValuePile.getFirst()); + logger.info("10 {} {}", qName, this.currentProcessingContext()); // 11. The next step of the iteration is to establish any current property value; if (isAttributePresent(RDFaAttributes.PROPERTY)) { @@ -430,23 +478,21 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() IRI datatypeIRI = (IRI) getAttributeResourceValue(RDFaAttributes.DATATYPE); if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = getAttributeStringValue(RDFaAttributes.CONTENT); - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); } else { - String contentString = this.characters.toString().trim(); - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); - this.characters = new StringBuilder(); + String contentString = this.currentProcessingContext().getCharacters().trim(); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } // otherwise, as a plain literal if @datatype is present but has an empty value according to the section on CURIE and IRI Processing. // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. } else if (isAttributePresent(RDFaAttributes.DATATYPE) && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { if (isAttributePresent(RDFaAttributes.CONTENT)) { - String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + String contentString = this.currentElementAttributes().getValue(RDFaAttributes.CONTENT.getName()); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } else { - String contentString = this.characters.toString().trim(); - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); - this.characters = new StringBuilder(); + String contentString = this.currentProcessingContext().getCharacters().trim(); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } // otherwise, as an XML literal if @datatype is present and is set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. // The value of the XML literal is a string created by serializing to text, all nodes that are descendants of the current element, i.e., not including the element itself, and giving it a datatype of XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. The format of the resulting serialized content is as defined in Exclusive XML Canonicalization Version 1.0 [XML-EXC-C14N]. @@ -455,8 +501,8 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { // && getAttributeResourceValue( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { // otherwise, as a plain literal using the value of @content if @content is present. } else if (isAttributePresent(RDFaAttributes.CONTENT)) { - String contentString = this.currentElementAttributes.getValue(RDFaAttributes.CONTENT.getName()); - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + String contentString = this.currentElementAttributes().getValue(RDFaAttributes.CONTENT.getName()); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); // otherwise, if the @rel, @rev, and @content attributes are not present, as a resource obtained from one of the following: // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; @@ -469,123 +515,76 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { || isAttributePresent(RDFaAttributes.SRC) )) { if (isAttributePresent(RDFaAttributes.RESOURCE)) { - this.localValuePile.getFirst().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.RESOURCE)); + this.currentProcessingContext().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.RESOURCE)); } else if (isAttributePresent(RDFaAttributes.HREF)) { - this.localValuePile.getFirst().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.HREF)); + this.currentProcessingContext().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.HREF)); } else if (isAttributePresent(RDFaAttributes.SRC)) { - this.localValuePile.getFirst().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.SRC)); + this.currentProcessingContext().setCurrentPropertyValue(getAttributeResourceValue(RDFaAttributes.SRC)); } // otherwise, if @typeof is present and @about is not, the value of typed resource. } else if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT)) { - this.localValuePile.getFirst().setCurrentPropertyValue(this.localValuePile.getFirst().getTypedResource()); + this.currentProcessingContext().setCurrentPropertyValue(this.currentProcessingContext().getTypedResource()); // otherwise as a plain literal. } else { - String contentString = this.characters.toString().trim(); + String contentString = this.currentProcessingContext().getCharacters().trim(); // Additionally, if there is a value for current language then the value of the plain literal should include this language information, as described in [RDF-SYNTAX-GRAMMAR]. The actual literal is either the value of @content (if present) or a string created by concatenating the text content of each of the descendant elements of the current element in document order. - if (this.localValuePile.getFirst().getCurrentLanguage() != null - && !this.localValuePile.getFirst().getCurrentLanguage().isEmpty()) { - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, this.localValuePile.getFirst().getCurrentLanguage())); + if (this.currentProcessingContext().getCurrentLanguage() != null + && !this.currentProcessingContext().getCurrentLanguage().isEmpty()) { + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, this.currentProcessingContext().getCurrentLanguage())); } else { - this.localValuePile.getFirst().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } - this.characters = new StringBuilder(); } // The current property value is then used with each predicate as follows: // If the element also includes the @inlist attribute, the current property value is added to the local list mapping as follows: if (isAttributePresent(RDFaAttributes.INLIST)) { // if the local list mapping does not contain a list associated with the predicate IRI, instantiate a new list and add to local list mappings - if (!this.localValuePile.getFirst().getListMappings().containsKey(propertyIRI)) { - this.localValuePile.getFirst().addListMappings(propertyIRI, new HashSet<>()); + if (!this.currentProcessingContext().getListMappings().containsKey(propertyIRI)) { + this.currentProcessingContext().addListMappings(propertyIRI, new HashSet<>()); } // add the current property value to the list associated with the predicate IRI in the local list mapping - this.localValuePile.getFirst().addListMapping(propertyIRI, this.localValuePile.getFirst().getCurrentPropertyValue()); + this.currentProcessingContext().addListMapping(propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); // Otherwise the current property value is used to generate a triple as follows: // subject new subject // predicate full IRI // object current property value } else { - this.getModel().add(this.localValuePile.getFirst().getNewSubject(), propertyIRI, this.localValuePile.getFirst().getCurrentPropertyValue()); + this.getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); } } - logger.info("11 {} {}", qName, this.localValuePile.getFirst()); + logger.info("11 {} {}", qName, this.currentProcessingContext()); // 12. If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: - if (!this.localValuePile.getFirst().isSkipElement() - && this.localValuePile.getFirst().getNewSubject() != null) { - if(this.localValuePile.getFirst().getIncompleteStatements() == null) { - this.localValuePile.getFirst().setIncompleteStatements(new HashSet<>()); + if (!this.currentProcessingContext().isSkipElement() + && this.currentProcessingContext().getNewSubject() != null) { + if (this.currentProcessingContext().getIncompleteStatements() == null) { + this.currentProcessingContext().setIncompleteStatements(new HashSet<>()); } - for (RDFaIncompleteStatement incompleteStatement : this.currentContext.getIncompleteStatement()) { + for (RDFaIncompleteStatement incompleteStatement : currentProcessingContext().getEvaluationContext().getIncompleteStatement()) { if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { - this.localValuePile.getFirst().addListMapping(incompleteStatement.getPredicate(), this.localValuePile.getFirst().getNewSubject()); + this.currentProcessingContext().addListMapping(incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { - this.getModel().add(this.currentContext.getParentSubjectResource(), incompleteStatement.getPredicate(), this.localValuePile.getFirst().getNewSubject()); + this.getModel().add(currentProcessingContext().getEvaluationContext().getParentSubjectResource(), incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { - this.getModel().add(this.localValuePile.getFirst().getNewSubject(), incompleteStatement.getPredicate(), this.currentContext.getParentSubjectResource()); + this.getModel().add(this.currentProcessingContext().getNewSubject(), incompleteStatement.getPredicate(), currentProcessingContext().getEvaluationContext().getParentSubjectResource()); } } } - logger.info("12 {} {}", qName, this.localValuePile.getFirst()); - - // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: - // If the skip element flag is 'true' then the new evaluation context is a copy of the current context that was passed in to this level of processing, with the language and list of IRI mappings values replaced with the local values; - if (this.localValuePile.getFirst().isSkipElement()) { - this.currentContext = new RDFaEvaluationContext(this.currentContext); - this.currentContext.clearIriMappings(); - initializeNewContext(this.currentContext); - this.currentContext.setLanguage(this.localValuePile.getFirst().getCurrentLanguage()); - this.currentContext.setIriMappings(this.localValuePile.getFirst().getIRIMappings()); - // Otherwise, the values are: - } else { - Resource oldParentSubject = this.currentContext.getParentSubjectResource(); - // the base is set to the base value of the current evaluation context; - this.currentContext = new RDFaEvaluationContext(this.currentContext.getBaseIri()); - initializeNewContext(this.currentContext); - // the parent subject is set to the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; - this.currentContext.setParentSubjectResource(this.localValuePile.getFirst().getNewSubject()); - // the parent object is set to value of current object resource, if non-null, or the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; - if (this.localValuePile.getFirst().getCurrentObjectResource() != null) { - logger.info("13 parent object resource = current object resource {}", this.localValuePile.getFirst().getCurrentObjectResource()); - this.currentContext.setParentObjectResource(this.localValuePile.getFirst().getCurrentObjectResource()); - } else if (this.localValuePile.getFirst().getNewSubject() != null) { - this.currentContext.setParentObjectResource(this.localValuePile.getFirst().getNewSubject()); - logger.info("13 parent object resource = new subject {}", this.localValuePile.getFirst().getNewSubject()); - } else { - this.currentContext.setParentObjectResource(oldParentSubject); - } - logger.info("13 context parent object resource: {}", this.currentContext.getParentObjectResource()); - // the list of IRI mappings is set to the local list of IRI mappings; - this.currentContext.setIriMappings(this.localValuePile.getFirst().getIRIMappings()); - // the list of incomplete triples is set to the local list of incomplete triples; - this.currentContext.setIncompleteStatements(this.localValuePile.getFirst().getIncompleteStatements()); - // the list mapping is set to the local list mapping; - this.currentContext.setListMappings(this.localValuePile.getFirst().getListMappings()); - // language is set to the value of current language. - this.currentContext.setLanguage(this.localValuePile.getFirst().getCurrentLanguage()); - // the default vocabulary is set to the value of the local default vocabulary. - this.currentContext.setDefaultVocabulary(this.localValuePile.getFirst().getDefaultVocabulary()); - } - - logger.info("13 {} {}", qName, this.localValuePile.getFirst()); - - this.isRootElement = false; - } - - private void endProcessElement(String uri, String localName, String qName) { - Map> oldListMappings = this.currentContext.getListMappings(); + logger.info("12 {} {}", qName, this.currentProcessingContext()); + Map> oldListMappings = currentProcessingContext().getEvaluationContext().getListMappings(); // 14. Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: - for (Map.Entry> listMapping : this.localValuePile.getFirst().getListMappings().entrySet()) { + for (Map.Entry> listMapping : this.currentProcessingContext().getListMappings().entrySet()) { IRI propertyIRI = listMapping.getKey(); Set propertyList = listMapping.getValue(); if (!oldListMappings.containsKey(propertyIRI)) { if (propertyList.isEmpty()) { - getModel().add(this.localValuePile.getFirst().getNewSubject(), propertyIRI, RDF.nil.getIRI()); + getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, RDF.nil.getIRI()); } else { ArrayList bnodes = new ArrayList<>(); for (int i = 0; i < propertyList.size(); i++) { @@ -603,15 +602,18 @@ private void endProcessElement(String uri, String localName, String qName) { bnodeIndex++; } - getModel().add(this.localValuePile.getFirst().getNewSubject(), propertyIRI, bnodes.getFirst()); + getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, bnodes.getFirst()); } } } - logger.info("14 {} newSubject: {}, currentObjectResource: {}, currentPropertyValue: {}, typedResource: {}, skip: {}", qName, this.localValuePile.getFirst().getNewSubject(), this.localValuePile.getFirst().getCurrentObjectResource(), this.localValuePile.getFirst().getCurrentPropertyValue(), this.localValuePile.getFirst().getTypedResource(), this.localValuePile.getFirst().isSkipElement()); + logger.info("14 {} newSubject: {}, currentObjectResource: {}, currentPropertyValue: {}, typedResource: {}, skip: {}", qName, this.currentProcessingContext().getNewSubject(), this.currentProcessingContext().getCurrentObjectResource(), this.currentProcessingContext().getCurrentPropertyValue(), this.currentProcessingContext().getTypedResource(), this.currentProcessingContext().isSkipElement()); - this.localValuePile.pop(); + } + + private void endProcessElement(String uri, String localName, String qName) { + this.processingContexts.pop(); } /** @@ -671,7 +673,7 @@ private IRI getPrefixIriFromDeclaration(String declaration) { } private Resource getAttributeResourceValue(RDFaAttributes attribute) { - String attributeValue = this.currentElementAttributes.getValue(attribute.getName()); + String attributeValue = this.currentProcessingContext().getCurrentElementAttributes().getValue(attribute.getName()); Optional resourceResolution = resolveStringResource(attributeValue); if (resourceResolution.isPresent()) { return resourceResolution.get(); @@ -681,11 +683,29 @@ private Resource getAttributeResourceValue(RDFaAttributes attribute) { } private boolean isAttributePresent(RDFaAttributes attribute) { - return this.currentElementAttributes.getValue(attribute.getName()) != null; + return this.currentProcessingContext().getCurrentElementAttributes().getValue(attribute.getName()) != null; } private String getAttributeStringValue(RDFaAttributes attribute) { - return this.currentElementAttributes.getValue(attribute.getName()); + return this.currentProcessingContext().getCurrentElementAttributes().getValue(attribute.getName()); + } + + /** + * Convenience accessor to the top of the processing contexts pile + * + * @return + */ + private RDFaProcessingContext currentProcessingContext() { + return this.processingContexts.getFirst(); + } + + /** + * Convenience accessor to the HTML attributes at the top of the local values pile + * + * @return + */ + private Attributes currentElementAttributes() { + return currentProcessingContext().getCurrentElementAttributes(); } /** @@ -707,18 +727,18 @@ protected Optional resolveStringResource(String stringResource) { String prefixString = resultString.substring(0, colonIndex); String localNameString = resultString.substring(colonIndex + 1); // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri - if (currentContext.hasIriMapping(prefixString)) { - IRI namespaceIRI = currentContext.getIriMapping(prefixString); + if (currentProcessingContext().getEvaluationContext().hasIriMapping(prefixString)) { + IRI namespaceIRI = currentProcessingContext().getEvaluationContext().getIriMapping(prefixString); return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); - } else if (this.localValuePile.getFirst().getIRIMappings().containsKey(prefixString)) { - IRI namespaceIRI = this.localValuePile.getFirst().getIRIMappings().get(prefixString); + } else if (this.currentProcessingContext().getIRIMappings().containsKey(prefixString)) { + IRI namespaceIRI = this.currentProcessingContext().getIRIMappings().get(prefixString); return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI - return Optional.of(this.getValueFactory().createIRI(currentContext.getBaseIri().stringValue(), localNameString)); + return Optional.of(this.getValueFactory().createIRI(currentProcessingContext().getEvaluationContext().getBaseIri().stringValue(), localNameString)); } else { - throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix among " + this.currentContext.getIriMappings() + " and " + this.localValuePile.getFirst().getIRIMappings()); + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix among " + this.currentProcessingContext().getEvaluationContext().getIriMappings() + " and " + this.currentProcessingContext().getIRIMappings()); } } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI return Optional.of(this.getValueFactory().createIRI(resultString)); @@ -727,8 +747,8 @@ protected Optional resolveStringResource(String stringResource) { int colonIndex = resultString.indexOf(":"); String localNameString = resultString.substring(colonIndex + 1); return Optional.of(this.getValueFactory().createBNode(localNameString)); - } else if (IRIUtils.isStandardIRI(currentContext.getBaseIri().stringValue() + resultString)) { - String concatenatedRelativeUri = currentContext.getBaseIri().stringValue() + resultString; + } else if (IRIUtils.isStandardIRI(currentProcessingContext().getEvaluationContext().getBaseIri().stringValue() + resultString)) { + String concatenatedRelativeUri = currentProcessingContext().getEvaluationContext().getBaseIri().stringValue() + resultString; return Optional.of(this.getValueFactory().createIRI(concatenatedRelativeUri)); } return Optional.empty(); @@ -766,10 +786,10 @@ private void initializeNewContext(RDFaEvaluationContext context) { private String debugAttributesToString() { StringBuilder sb = new StringBuilder(); - if (this.currentElementAttributes != null) { - for (int i = 0; i < this.currentElementAttributes.getLength(); i++) { - String attributeLocalName = this.currentElementAttributes.getQName(i); - String attributeValue = this.currentElementAttributes.getValue(i); + if (this.currentElementAttributes() != null) { + for (int i = 0; i < this.currentElementAttributes().getLength(); i++) { + String attributeLocalName = this.currentElementAttributes().getQName(i); + String attributeValue = this.currentElementAttributes().getValue(i); sb.append(attributeLocalName).append(" : ").append(attributeValue).append(" "); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java index 43e1a2be0..5c1f8f557 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -198,7 +198,8 @@ public String toString() { sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); sb.append("Mappings: ["); - this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); +// this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); + sb.append(this.iriMappings.size()); sb.append("] "); if(this.getParentSubjectResource() != null) { sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java similarity index 73% rename from src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java rename to src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java index 58b5f6070..8a622559f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaLocalValues.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java @@ -3,13 +3,17 @@ import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.Resource; import fr.inria.corese.core.next.api.Value; +import org.xml.sax.Attributes; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; -public class RDFaLocalValues { +/** + * Corresponds to the local values for the valuation of an element and the current context at the moment of its evaluation + */ +public class RDFaProcessingContext { // Local context private boolean skipElement = false; @@ -22,15 +26,25 @@ public class RDFaLocalValues { private String currentLanguage = null; private Value currentPropertyValue = null; private String defaultVocabulary = null; + private Attributes currentElementAttributes = null; - public RDFaLocalValues() { + /** + * Buffer for accumulating character data between start and end tags. + */ + private StringBuilder characters = new StringBuilder(); + + private boolean isRootElement = true; + + private RDFaEvaluationContext evaluationContext = null; + + public RDFaProcessingContext() { } /** * Constructor to be used in step 1 of RDFa processing * @param context */ - public RDFaLocalValues(RDFaEvaluationContext context) { + public RDFaProcessingContext(RDFaEvaluationContext context) { this.skipElement = false; this.newSubject = null; this.currentObjectResource = null; @@ -40,9 +54,10 @@ public RDFaLocalValues(RDFaEvaluationContext context) { this.listMappings = context.getListMappings(); this.currentLanguage = context.getLanguage(); this.defaultVocabulary = context.getDefaultVocabulary(); + this.evaluationContext = context; } - public RDFaLocalValues(RDFaLocalValues other) { + public RDFaProcessingContext(RDFaProcessingContext other) { this.skipElement = other.skipElement; this.newSubject = other.newSubject; this.currentObjectResource = other.currentObjectResource; @@ -167,7 +182,45 @@ public String toString() { sb.append("currentLanguage: ").append(this.currentLanguage).append(" "); sb.append("currentPropertyValue: ").append(this.currentPropertyValue).append(" "); sb.append("defaultVocabulary: ").append(this.defaultVocabulary).append(" "); + sb.append("characters: ").append(this.getCharacters()).append(" "); + sb.append("Evaluation context: ").append(this.getEvaluationContext()).append(" "); return sb.toString(); } + + public String getCharacters() { + return characters.toString(); + } + + public void setCharacters(StringBuilder characters) { + this.characters = characters; + } + + public void addCharacters(char[] ch, int start, int length) { + this.characters.append(ch, start, length); + } + + public Attributes getCurrentElementAttributes() { + return currentElementAttributes; + } + + public void setCurrentElementAttributes(Attributes currentElementAttributes) { + this.currentElementAttributes = currentElementAttributes; + } + + public boolean isRootElement() { + return isRootElement; + } + + public void setRootElement(boolean rootElement) { + isRootElement = rootElement; + } + + public RDFaEvaluationContext getEvaluationContext() { + return evaluationContext; + } + + public void setEvaluationContext(RDFaEvaluationContext evaluationContext) { + this.evaluationContext = evaluationContext; + } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index 9db341d31..a5da1dad5 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -142,7 +142,7 @@ public void basicBaseTest() { RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); - parser.parse(new ByteArrayInputStream(testDataString.getBytes())); + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/"); assertEquals(RDFFormat.RDFA, parser.getRDFFormat()); logModelContent(referenceModel); From df78d2190ae291ceffd9de088b65a02357df6828 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 14 Jan 2026 14:29:50 +0100 Subject: [PATCH 08/13] Fix subject passing --- .../next/impl/io/parser/rdfa/RDFaParser.java | 258 +++++++++--------- .../rdfa/model/RDFaEvaluationContext.java | 8 +- .../rdfa/model/RDFaProcessingContext.java | 23 +- 3 files changed, 155 insertions(+), 134 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 8671fd80d..ddf780af8 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -110,68 +110,69 @@ private void handleCharacters(char[] ch, int start, int length) { } private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { + logger.info("startProcessElement {}", qName); + // 1 First, the local values are initialized + RDFaProcessingContext processingContext = null; if(this.processingContexts.size() > 1) { // Not a root element - RDFaProcessingContext processingContext = new RDFaProcessingContext(currentProcessingContext().getEvaluationContext()); - processingContext.setCurrentElementAttributes(attrs); + processingContext = new RDFaProcessingContext(currentProcessingContext().getEvaluationContext()); + processingContext.setElementAttributes(attrs); processingContext.setRootElement(false); // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: // If the skip element flag is 'true' then the new evaluation context is a copy of the current context that was passed in to this level of processing, with the language and list of IRI mappings values replaced with the local values; if (this.currentProcessingContext().isSkipElement()) { - currentProcessingContext().setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext())); - currentProcessingContext().getEvaluationContext().clearIriMappings(); - initializeNewContext(currentProcessingContext().getEvaluationContext()); - currentProcessingContext().getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); - currentProcessingContext().getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); + processingContext.setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext())); + processingContext.getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); + processingContext.getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); // Otherwise, the values are: } else { Resource oldParentSubject = currentProcessingContext().getEvaluationContext().getParentSubjectResource(); // the base is set to the base value of the current evaluation context; - currentProcessingContext().setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext().getBaseIri())); - initializeNewContext(currentProcessingContext().getEvaluationContext()); + processingContext.setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext().getBaseIri())); // the parent subject is set to the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; - currentProcessingContext().getEvaluationContext().setParentSubjectResource(this.currentProcessingContext().getNewSubject()); + processingContext.getEvaluationContext().setParentSubjectResource(this.currentProcessingContext().getNewSubject()); // the parent object is set to value of current object resource, if non-null, or the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; if (this.currentProcessingContext().getCurrentObjectResource() != null) { logger.info("13 parent object resource = current object resource {}", this.currentProcessingContext().getCurrentObjectResource()); - currentProcessingContext().getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getCurrentObjectResource()); + processingContext.getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getCurrentObjectResource()); } else if (this.currentProcessingContext().getNewSubject() != null) { - currentProcessingContext().getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getNewSubject()); + processingContext.getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getNewSubject()); logger.info("13 parent object resource = new subject {}", this.currentProcessingContext().getNewSubject()); } else { - currentProcessingContext().getEvaluationContext().setParentObjectResource(oldParentSubject); + processingContext.getEvaluationContext().setParentObjectResource(oldParentSubject); } logger.info("13 context parent object resource: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); // the list of IRI mappings is set to the local list of IRI mappings; - currentProcessingContext().getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); + processingContext.getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); // the list of incomplete triples is set to the local list of incomplete triples; - currentProcessingContext().getEvaluationContext().setIncompleteStatements(this.currentProcessingContext().getIncompleteStatements()); + processingContext.getEvaluationContext().setIncompleteStatements(this.currentProcessingContext().getIncompleteStatements()); // the list mapping is set to the local list mapping; - currentProcessingContext().getEvaluationContext().setListMappings(this.currentProcessingContext().getListMappings()); + processingContext.getEvaluationContext().setListMappings(this.currentProcessingContext().getListMappings()); // language is set to the value of current language. - currentProcessingContext().getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); + processingContext.getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); // the default vocabulary is set to the value of the local default vocabulary. - currentProcessingContext().getEvaluationContext().setDefaultVocabulary(this.currentProcessingContext().getDefaultVocabulary()); + processingContext.getEvaluationContext().setDefaultVocabulary(this.currentProcessingContext().getDefaultVocabulary()); } - this.processingContexts.add(processingContext); logger.info("13 {} {}", qName, this.currentProcessingContext()); } else { + // This is the start of the document RDFaEvaluationContext startingContext = getNewContext(getValueFactory().createIRI(this.baseIri)); + initializeEvaluationContextMappings(startingContext); startingContext.setParentSubjectResource(startingContext.getBaseIri()); startingContext.setParentObjectResource(null); startingContext.setLanguage(null); startingContext.setDefaultVocabulary(null); - RDFaProcessingContext processingContext = new RDFaProcessingContext(startingContext); - processingContext.setCurrentElementAttributes(attrs); + processingContext = new RDFaProcessingContext(startingContext); + processingContext.setElementAttributes(attrs); processingContext.setRootElement(true); - this.processingContexts.add(processingContext); - logger.info("START {} {}", qName, this.currentProcessingContext()); } - - // 1 First, the local values are initialized - - + processingContext.setElementName(qName); + this.processingContexts.addFirst(processingContext); + if(! this.currentProcessingContext().getElementName().equals(qName)) { + throw new ParsingErrorException("Start process element "+ qName +" is not paired with the right context" + this.currentProcessingContext()); + } + logger.info("START {} {}", qName, this.currentProcessingContext()); // HTML-specific base element if (qName.equals(BASE_TAG) @@ -188,16 +189,16 @@ && isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setDefaultVocabulary(getAttributeStringValue(RDFaAttributes.VOCAB)); } - logger.info("2 {} {}", qName, this.currentProcessingContext()); +// logger.info("2 {}", this.currentProcessingContext()); // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; for (int i = 0; i < this.currentElementAttributes().getLength(); i++) { String attribute = this.currentElementAttributes().getQName(i); - logger.info("3 {} attribute: {}", qName, attribute); +// logger.info("3 {} attribute: {}", qName, attribute); if (attribute.startsWith(XMLNS_PREFIX)) { String attributeValue = this.currentElementAttributes().getValue(i); String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); - logger.info("3 {} {} : {}", qName, prefixName, attributeValue); +// logger.info("3 {} {} : {}", prefixName, attributeValue); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); this.currentProcessingContext().addIRIMappings(prefixName, prefixNamespace); } @@ -210,7 +211,7 @@ && isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().addIRIMappings(prefixName, prefixIRI); } - logger.info("3 {} {}", qName, this.currentProcessingContext()); +// logger.info("3 {} {}", qName, this.currentProcessingContext()); // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; // Host Languages that incorporate RDFa MAY provide a mechanism for specifying the natural language of an element and its contents (e.g., XML provides the general-purpose XML attribute @xml:lang). @@ -222,7 +223,7 @@ && isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setCurrentLanguage(getAttributeStringValue(RDFaAttributes.LANG)); } - logger.info("4 {} {}", qName, this.currentProcessingContext()); +// logger.info("4 {} {}", qName, this.currentProcessingContext()); // 5. If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. if (!isAttributePresent(RDFaAttributes.REL) @@ -232,22 +233,23 @@ && isAttributePresent(RDFaAttributes.HREF)) { && !getAttributeStringValue(RDFaAttributes.PROPERTY).isEmpty() && !isAttributePresent(RDFaAttributes.CONTENT) && !isAttributePresent(RDFaAttributes.DATATYPE) - && (isAttributePresent(RDFaAttributes.ABOUT) - || this.currentProcessingContext().isRootElement() - || currentProcessingContext().getEvaluationContext().getParentObjectResource() != null)) { + && ((isAttributePresent(RDFaAttributes.ABOUT) + && ! getAttributeStringValue(RDFaAttributes.ABOUT).isEmpty()) + || this.currentProcessingContext().isRootElement() + || currentProcessingContext().getEvaluationContext().getParentObjectResource() != null)) { // new subject is set to the resource obtained from the first match from the following rule: // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); - logger.info("5.1 About {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.1 About {}", this.currentProcessingContext().getNewSubject()); // otherwise, if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; } else if (this.currentProcessingContext().isRootElement()) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getBaseIri()); - logger.info("5.1 Root element {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.1 Root element {}", this.currentProcessingContext().getNewSubject()); // otherwise, if parent object is present, new subject is set to the value of parent object. } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); - logger.info("5.1 context parent object {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.1 context parent object {}", this.currentProcessingContext().getNewSubject()); } // If @typeof is present then typed resource is set to the resource obtained from the first match from the following rules: if (isAttributePresent(RDFaAttributes.TYPEOF)) { @@ -291,19 +293,19 @@ && isAttributePresent(RDFaAttributes.HREF)) { // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); - logger.info("5.2 about {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.2 about {}", this.currentProcessingContext().getNewSubject()); // otherwise, by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.RESOURCE)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.RESOURCE)); - logger.info("5.2 resource {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.2 resource {}", this.currentProcessingContext().getNewSubject()); // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.HREF)); - logger.info("5.2 href {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.2 href {}", this.currentProcessingContext().getNewSubject()); // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing. } else if (isAttributePresent(RDFaAttributes.SRC)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.SRC)); - logger.info("5.2 src {}", this.currentProcessingContext().getNewSubject()); +// logger.info("5.2 src {}", this.currentProcessingContext().getNewSubject()); } // otherwise, if no resource is provided by a resource attribute, then the first match from the following rules will apply: } else { @@ -312,18 +314,18 @@ && isAttributePresent(RDFaAttributes.HREF)) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { this.currentProcessingContext().setNewSubject(emptyAboutResource.get()); - logger.info("5.2 rootElement {} {}", qName, this.currentProcessingContext()); +// logger.info("5.2 rootElement {} {}", qName, this.currentProcessingContext()); } else { throw new ParsingErrorException("Expected to be able to generate newSubject from empty CURIE"); } // otherwise, if @typeof is present, then new subject is set to be a newly created bnode; } else if (isAttributePresent(RDFaAttributes.TYPEOF)) { this.currentProcessingContext().setNewSubject(getValueFactory().createBNode()); - logger.info("5.2 typeOf {}", this.currentProcessingContext()); +// logger.info("5.2 typeOf {}", this.currentProcessingContext()); // otherwise, if parent object is present, new subject is set to the value of parent object. Additionally, if @property is not present then the skip element flag is set to 'true'. } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); - logger.info("5.2 parent object resource {}", this.currentProcessingContext()); +// logger.info("5.2 parent object resource {}", this.currentProcessingContext()); if (!isAttributePresent(RDFaAttributes.PROPERTY)) { this.currentProcessingContext().setSkipElement(true); } @@ -336,18 +338,18 @@ && isAttributePresent(RDFaAttributes.HREF)) { } } - logger.info("5 {} {}", qName, this.currentProcessingContext()); +// logger.info("5 {} {}", qName, this.currentProcessingContext()); // 6. If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: if (isAttributePresent(RDFaAttributes.REL) || isAttributePresent(RDFaAttributes.REV)) { if (isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); - logger.info("6 about newSubject: {}", this.currentProcessingContext().getNewSubject()); +// logger.info("6 about newSubject: {}", this.currentProcessingContext().getNewSubject()); } if (isAttributePresent(RDFaAttributes.TYPEOF)) { this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getNewSubject()); - logger.info("6 typeof newSubject: {}", this.currentProcessingContext().getNewSubject()); +// logger.info("6 typeof newSubject: {}", this.currentProcessingContext().getNewSubject()); } if (this.currentProcessingContext().getNewSubject() == null) { if (this.currentProcessingContext().isRootElement()) { @@ -357,36 +359,36 @@ && isAttributePresent(RDFaAttributes.HREF)) { } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } - logger.info("6 root element typed resource: {}", this.currentProcessingContext().getTypedResource()); +// logger.info("6 root element typed resource: {}", this.currentProcessingContext().getTypedResource()); } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); - logger.info("6 parent object resource not null: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); +// logger.info("6 parent object resource not null: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); } } if (isAttributePresent(RDFaAttributes.RESOURCE)) { this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); - logger.info("6 resource CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); +// logger.info("6 resource CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.HREF)); - logger.info("6 href CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); +// logger.info("6 href CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.SRC)) { this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.SRC)); - logger.info("6 src CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); +// logger.info("6 src CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setCurrentObjectResource(this.getValueFactory().createBNode()); - logger.info("6 typeof CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); +// logger.info("6 typeof CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT) && (this.currentProcessingContext().getCurrentObjectResource() == null || this.currentProcessingContext().getCurrentObjectResource().isResource())) { this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getCurrentObjectResource()); - logger.info("6 typed resource: {}", this.currentProcessingContext().getTypedResource()); +// logger.info("6 typed resource: {}", this.currentProcessingContext().getTypedResource()); } } - logger.info("6 {} {}", qName, this.currentProcessingContext()); +// logger.info("6 {} {}", qName, this.currentProcessingContext()); // 7. If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; if (this.currentProcessingContext().getTypedResource() != null @@ -395,14 +397,14 @@ && isAttributePresent(RDFaAttributes.TYPEOF)) { this.getModel().add(this.currentProcessingContext().getTypedResource(), RDF.type.getIRI(), typeIri); } - logger.info("7 {} {}", qName, this.currentProcessingContext()); +// logger.info("7 {} {}", qName, this.currentProcessingContext()); // 8. If in any of the previous steps a new subject was set to a non-null value different from the parent object; if (this.currentProcessingContext().getNewSubject() != null && this.currentProcessingContext().getNewSubject() != currentProcessingContext().getEvaluationContext().getParentObjectResource()) { this.currentProcessingContext().setListMappings(new HashMap<>()); } - logger.info("8 {} {}", qName, this.currentProcessingContext()); +// logger.info("8 {} {}", qName, this.currentProcessingContext()); // 9. If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: if (this.currentProcessingContext().getCurrentObjectResource() != null) { @@ -433,7 +435,7 @@ && isAttributePresent(RDFaAttributes.REL)) { } } - logger.info("9 {} {}", qName, this.currentProcessingContext()); +// logger.info("9 {} {}", qName, this.currentProcessingContext()); // 10. If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); if (this.currentProcessingContext().getCurrentObjectResource() == null @@ -465,10 +467,74 @@ && isAttributePresent(RDFaAttributes.REL)) { } } - logger.info("10 {} {}", qName, this.currentProcessingContext()); +// logger.info("10 {} {}", qName, this.currentProcessingContext()); + + + + // 12. If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: + if (!this.currentProcessingContext().isSkipElement() + && this.currentProcessingContext().getNewSubject() != null) { + if (this.currentProcessingContext().getIncompleteStatements() == null) { + this.currentProcessingContext().setIncompleteStatements(new HashSet<>()); + } + for (RDFaIncompleteStatement incompleteStatement : currentProcessingContext().getEvaluationContext().getIncompleteStatement()) { + if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { + this.currentProcessingContext().addListMapping(incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); + } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { + this.getModel().add(currentProcessingContext().getEvaluationContext().getParentSubjectResource(), incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); + } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { + this.getModel().add(this.currentProcessingContext().getNewSubject(), incompleteStatement.getPredicate(), currentProcessingContext().getEvaluationContext().getParentSubjectResource()); + } + } + } + +// logger.info("12 {} {}", qName, this.currentProcessingContext()); + Map> oldListMappings = currentProcessingContext().getEvaluationContext().getListMappings(); + // 14. Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: + for (Map.Entry> listMapping : this.currentProcessingContext().getListMappings().entrySet()) { + IRI propertyIRI = listMapping.getKey(); + Set propertyList = listMapping.getValue(); + + if (!oldListMappings.containsKey(propertyIRI)) { + if (propertyList.isEmpty()) { + getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, RDF.nil.getIRI()); + } else { + ArrayList bnodes = new ArrayList<>(); + for (int i = 0; i < propertyList.size(); i++) { + bnodes.add(getValueFactory().createBNode()); + } + int bnodeIndex = 0; + for (Value listElement : propertyList) { + BNode elementNode = bnodes.get(bnodeIndex); + Resource nextElementNode = RDF.nil.getIRI(); + if (bnodeIndex < bnodes.size() - 1) { + nextElementNode = bnodes.get(bnodeIndex + 1); + } + getModel().add(elementNode, RDF.first.getIRI(), listElement); + getModel().add(elementNode, RDF.rest.getIRI(), nextElementNode); + + bnodeIndex++; + } + getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, bnodes.getFirst()); + } + } + } + +// logger.info("14 {}", this.currentProcessingContext()); + + } + + private void endProcessElement(String uri, String localName, String qName) { + logger.info("endProcessElement {}", qName); + if(! this.currentProcessingContext().getElementName().equals(qName)) { + throw new ParsingErrorException("End process element "+ qName +" is not paired with the right context" + this.currentProcessingContext()); + } + + logger.info("11 Cont. START {}", this.currentProcessingContext()); // 11. The next step of the iteration is to establish any current property value; if (isAttributePresent(RDFaAttributes.PROPERTY)) { + logger.info("11 Cont. Attribute property found {}", this.currentProcessingContext()); IRI propertyIRI = (IRI) getAttributeResourceValue(RDFaAttributes.PROPERTY); // as a typed literal if @datatype is present, does not have an empty value according to the section on CURIE and IRI Processing, and is not set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. The final string includes the datatype IRI, as described in [RDF-SYNTAX-GRAMMAR], which will have been obtained according to the section on CURIE and IRI Processing. @@ -555,63 +621,7 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { } } - logger.info("11 {} {}", qName, this.currentProcessingContext()); - - // 12. If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: - if (!this.currentProcessingContext().isSkipElement() - && this.currentProcessingContext().getNewSubject() != null) { - if (this.currentProcessingContext().getIncompleteStatements() == null) { - this.currentProcessingContext().setIncompleteStatements(new HashSet<>()); - } - for (RDFaIncompleteStatement incompleteStatement : currentProcessingContext().getEvaluationContext().getIncompleteStatement()) { - if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { - this.currentProcessingContext().addListMapping(incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); - } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { - this.getModel().add(currentProcessingContext().getEvaluationContext().getParentSubjectResource(), incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); - } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.BACKWARD) { - this.getModel().add(this.currentProcessingContext().getNewSubject(), incompleteStatement.getPredicate(), currentProcessingContext().getEvaluationContext().getParentSubjectResource()); - } - } - } - - logger.info("12 {} {}", qName, this.currentProcessingContext()); - Map> oldListMappings = currentProcessingContext().getEvaluationContext().getListMappings(); - - // 14. Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: - for (Map.Entry> listMapping : this.currentProcessingContext().getListMappings().entrySet()) { - IRI propertyIRI = listMapping.getKey(); - Set propertyList = listMapping.getValue(); - - if (!oldListMappings.containsKey(propertyIRI)) { - if (propertyList.isEmpty()) { - getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, RDF.nil.getIRI()); - } else { - ArrayList bnodes = new ArrayList<>(); - for (int i = 0; i < propertyList.size(); i++) { - bnodes.add(getValueFactory().createBNode()); - } - int bnodeIndex = 0; - for (Value listElement : propertyList) { - BNode elementNode = bnodes.get(bnodeIndex); - Resource nextElementNode = RDF.nil.getIRI(); - if (bnodeIndex < bnodes.size() - 1) { - nextElementNode = bnodes.get(bnodeIndex + 1); - } - getModel().add(elementNode, RDF.first.getIRI(), listElement); - getModel().add(elementNode, RDF.rest.getIRI(), nextElementNode); - - bnodeIndex++; - } - getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, bnodes.getFirst()); - } - } - } - - logger.info("14 {} newSubject: {}, currentObjectResource: {}, currentPropertyValue: {}, typedResource: {}, skip: {}", qName, this.currentProcessingContext().getNewSubject(), this.currentProcessingContext().getCurrentObjectResource(), this.currentProcessingContext().getCurrentPropertyValue(), this.currentProcessingContext().getTypedResource(), this.currentProcessingContext().isSkipElement()); - - } - - private void endProcessElement(String uri, String localName, String qName) { + logger.info("11 Cont. END {}", this.currentProcessingContext()); this.processingContexts.pop(); } @@ -673,7 +683,7 @@ private IRI getPrefixIriFromDeclaration(String declaration) { } private Resource getAttributeResourceValue(RDFaAttributes attribute) { - String attributeValue = this.currentProcessingContext().getCurrentElementAttributes().getValue(attribute.getName()); + String attributeValue = this.currentProcessingContext().getElementAttributes().getValue(attribute.getName()); Optional resourceResolution = resolveStringResource(attributeValue); if (resourceResolution.isPresent()) { return resourceResolution.get(); @@ -683,11 +693,11 @@ private Resource getAttributeResourceValue(RDFaAttributes attribute) { } private boolean isAttributePresent(RDFaAttributes attribute) { - return this.currentProcessingContext().getCurrentElementAttributes().getValue(attribute.getName()) != null; + return this.currentProcessingContext().getElementAttributes().getValue(attribute.getName()) != null; } private String getAttributeStringValue(RDFaAttributes attribute) { - return this.currentProcessingContext().getCurrentElementAttributes().getValue(attribute.getName()); + return this.currentProcessingContext().getElementAttributes().getValue(attribute.getName()); } /** @@ -705,7 +715,7 @@ private RDFaProcessingContext currentProcessingContext() { * @return */ private Attributes currentElementAttributes() { - return currentProcessingContext().getCurrentElementAttributes(); + return currentProcessingContext().getElementAttributes(); } /** @@ -750,6 +760,8 @@ protected Optional resolveStringResource(String stringResource) { } else if (IRIUtils.isStandardIRI(currentProcessingContext().getEvaluationContext().getBaseIri().stringValue() + resultString)) { String concatenatedRelativeUri = currentProcessingContext().getEvaluationContext().getBaseIri().stringValue() + resultString; return Optional.of(this.getValueFactory().createIRI(concatenatedRelativeUri)); + } else if(this.currentProcessingContext().getEvaluationContext().getTermMapping(resultString) != null) { + return Optional.of(this.currentProcessingContext().getEvaluationContext().getTermMapping(resultString)); } return Optional.empty(); } @@ -767,11 +779,11 @@ protected boolean stringUriIsCURIE(String stringIri) { private RDFaEvaluationContext getNewContext(IRI baseIRI) { RDFaEvaluationContext result = new RDFaEvaluationContext(baseIRI); - initializeNewContext(result); + initializeEvaluationContextMappings(result); return result; } - private void initializeNewContext(RDFaEvaluationContext context) { + private void initializeEvaluationContextMappings(RDFaEvaluationContext context) { // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { context.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java index 5c1f8f557..87f2bfb51 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -60,14 +60,14 @@ public RDFaEvaluationContext(IRI baseIri) { public RDFaEvaluationContext(RDFaEvaluationContext context) { this.baseIri = context.baseIri; - this.parentSubjectResource = context.parentSubjectResource; - this.parentObjectResource = context.parentObjectResource; - this.iriMappings = new HashMap<>(context.iriMappings); + this.defaultVocabulary = context.defaultVocabulary; this.incompleteStatement = new HashSet<>(context.incompleteStatement); + this.iriMappings = new HashMap<>(context.iriMappings); this.language = context.language; this.listMappings = new HashMap<>(context.listMappings); + this.parentObjectResource = context.parentObjectResource; + this.parentSubjectResource = context.parentSubjectResource; this.termMappings = new HashMap<>(context.termMappings); - this.defaultVocabulary = context.defaultVocabulary;; } public IRI getBaseIri() { diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java index 8a622559f..136735f10 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java @@ -16,6 +16,7 @@ public class RDFaProcessingContext { // Local context + private String elementName = null; private boolean skipElement = false; private Resource newSubject = null; private Resource currentObjectResource = null; @@ -26,7 +27,7 @@ public class RDFaProcessingContext { private String currentLanguage = null; private Value currentPropertyValue = null; private String defaultVocabulary = null; - private Attributes currentElementAttributes = null; + private Attributes elementAttributes = null; /** * Buffer for accumulating character data between start and end tags. @@ -175,14 +176,14 @@ public void setDefaultVocabulary(String defaultVocabulary) { @Override public String toString() { StringBuilder sb = new StringBuilder(); - + sb.append(this.elementName).append(" "); sb.append("newSubject: ").append(this.newSubject).append(" "); sb.append("currentObjectResource: ").append(this.currentObjectResource).append(" "); sb.append("typedResource: ").append(this.typedResource).append(" "); sb.append("currentLanguage: ").append(this.currentLanguage).append(" "); sb.append("currentPropertyValue: ").append(this.currentPropertyValue).append(" "); sb.append("defaultVocabulary: ").append(this.defaultVocabulary).append(" "); - sb.append("characters: ").append(this.getCharacters()).append(" "); + sb.append("characters: ").append(this.getCharacters().trim()).append(" "); sb.append("Evaluation context: ").append(this.getEvaluationContext()).append(" "); return sb.toString(); @@ -200,12 +201,12 @@ public void addCharacters(char[] ch, int start, int length) { this.characters.append(ch, start, length); } - public Attributes getCurrentElementAttributes() { - return currentElementAttributes; + public Attributes getElementAttributes() { + return elementAttributes; } - public void setCurrentElementAttributes(Attributes currentElementAttributes) { - this.currentElementAttributes = currentElementAttributes; + public void setElementAttributes(Attributes elementAttributes) { + this.elementAttributes = elementAttributes; } public boolean isRootElement() { @@ -223,4 +224,12 @@ public RDFaEvaluationContext getEvaluationContext() { public void setEvaluationContext(RDFaEvaluationContext evaluationContext) { this.evaluationContext = evaluationContext; } + + public String getElementName() { + return elementName; + } + + public void setElementName(String elementName) { + this.elementName = elementName; + } } From f193abdf04d5a4444e1a40fa74bc37a3a2119c7a Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 14 Jan 2026 14:46:48 +0100 Subject: [PATCH 09/13] Fix prefix mappings --- .../next/impl/io/parser/rdfa/RDFaParser.java | 78 ++++++++++++------- .../rdfa/model/RDFaEvaluationContext.java | 44 ----------- .../rdfa/model/RDFaProcessingContext.java | 18 ----- .../impl/io/parser/rdfa/RDFaParserTest.java | 2 +- 4 files changed, 52 insertions(+), 90 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index ddf780af8..ec0f9bae6 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -45,6 +45,11 @@ public class RDFaParser extends AbstractRDFParser { private String baseIri = SerializationConstants.getDefaultBaseURI(); + /** + * An index of IRI prefixes + */ + private Map iriMappings = new HashMap<>(); + /** * Buffer/Pile of local value to adapt the parsing algorithm to SAX processing */ @@ -58,6 +63,11 @@ public RDFaParser(Model model, ValueFactory factory, IOOptions config) { super(model, factory, config); if (getConfig() instanceof BaseIRIOptions baseIRIOptions) { this.baseIri = baseIRIOptions.getBaseIRI(); + + } + // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance + for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { + this.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); } } @@ -96,7 +106,7 @@ public void parse(Reader reader, String baseURI) { private void addPrefix(String prefix, String uri) { IRI prefixIRI = getValueFactory().createIRI(uri); - currentProcessingContext().getEvaluationContext().addIriMapping(prefix, prefixIRI); + this.addIriMapping(prefix, prefixIRI); } /** @@ -118,12 +128,12 @@ private void startProcessElement(String uri, String localName, String qName, Att processingContext = new RDFaProcessingContext(currentProcessingContext().getEvaluationContext()); processingContext.setElementAttributes(attrs); processingContext.setRootElement(false); + this.setIriMappings(this.getIriMappings()); // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: // If the skip element flag is 'true' then the new evaluation context is a copy of the current context that was passed in to this level of processing, with the language and list of IRI mappings values replaced with the local values; if (this.currentProcessingContext().isSkipElement()) { processingContext.setEvaluationContext(new RDFaEvaluationContext(currentProcessingContext().getEvaluationContext())); processingContext.getEvaluationContext().setLanguage(this.currentProcessingContext().getCurrentLanguage()); - processingContext.getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); // Otherwise, the values are: } else { Resource oldParentSubject = currentProcessingContext().getEvaluationContext().getParentSubjectResource(); @@ -142,8 +152,6 @@ private void startProcessElement(String uri, String localName, String qName, Att processingContext.getEvaluationContext().setParentObjectResource(oldParentSubject); } logger.info("13 context parent object resource: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); - // the list of IRI mappings is set to the local list of IRI mappings; - processingContext.getEvaluationContext().setIriMappings(this.currentProcessingContext().getIRIMappings()); // the list of incomplete triples is set to the local list of incomplete triples; processingContext.getEvaluationContext().setIncompleteStatements(this.currentProcessingContext().getIncompleteStatements()); // the list mapping is set to the local list mapping; @@ -194,13 +202,13 @@ && isAttributePresent(RDFaAttributes.HREF)) { // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; for (int i = 0; i < this.currentElementAttributes().getLength(); i++) { String attribute = this.currentElementAttributes().getQName(i); -// logger.info("3 {} attribute: {}", qName, attribute); + logger.info("3 {} attribute: {}", qName, attribute); if (attribute.startsWith(XMLNS_PREFIX)) { String attributeValue = this.currentElementAttributes().getValue(i); String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); -// logger.info("3 {} {} : {}", prefixName, attributeValue); + logger.info("3 {} {} : {}", qName, prefixName, attributeValue); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); - this.currentProcessingContext().addIRIMappings(prefixName, prefixNamespace); + this.addIriMapping(prefixName, prefixNamespace); } } if (isAttributePresent(RDFaAttributes.PREFIX) @@ -208,10 +216,10 @@ && isAttributePresent(RDFaAttributes.HREF)) { String prefixDeclaration = getAttributeStringValue(RDFaAttributes.PREFIX); String prefixName = getPrefixFromDeclaration(prefixDeclaration); IRI prefixIRI = getPrefixIriFromDeclaration(prefixDeclaration); - this.currentProcessingContext().addIRIMappings(prefixName, prefixIRI); + this.addIriMapping(prefixName, prefixIRI); } -// logger.info("3 {} {}", qName, this.currentProcessingContext()); + logger.info("3 {} {}", qName, this.currentProcessingContext()); // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; // Host Languages that incorporate RDFa MAY provide a mechanism for specifying the natural language of an element and its contents (e.g., XML provides the general-purpose XML attribute @xml:lang). @@ -737,18 +745,18 @@ protected Optional resolveStringResource(String stringResource) { String prefixString = resultString.substring(0, colonIndex); String localNameString = resultString.substring(colonIndex + 1); // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri - if (currentProcessingContext().getEvaluationContext().hasIriMapping(prefixString)) { - IRI namespaceIRI = currentProcessingContext().getEvaluationContext().getIriMapping(prefixString); + if (this.hasIriMapping(prefixString)) { + IRI namespaceIRI = this.getIriMapping(prefixString); return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); - } else if (this.currentProcessingContext().getIRIMappings().containsKey(prefixString)) { - IRI namespaceIRI = this.currentProcessingContext().getIRIMappings().get(prefixString); + } else if (this.getIriMappings().containsKey(prefixString)) { + IRI namespaceIRI = this.getIriMappings().get(prefixString); return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI return Optional.of(this.getValueFactory().createIRI(currentProcessingContext().getEvaluationContext().getBaseIri().stringValue(), localNameString)); } else { - throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix among " + this.currentProcessingContext().getEvaluationContext().getIriMappings() + " and " + this.currentProcessingContext().getIRIMappings()); + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix among " + this.getIriMappings().keySet() + " and " + this.getIriMappings().keySet()); } } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI return Optional.of(this.getValueFactory().createIRI(resultString)); @@ -784,10 +792,6 @@ private RDFaEvaluationContext getNewContext(IRI baseIRI) { } private void initializeEvaluationContextMappings(RDFaEvaluationContext context) { - // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance - for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { - context.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); - } // https://www.w3.org/2011/rdfa-context/rdfa-1.1 sets a list of predefined terms mappings for RDFa contexts. context.addTermMapping("describedby", getValueFactory().createIRI("http://www.w3.org/2007/05/powder-s#describedby")); @@ -795,17 +799,37 @@ private void initializeEvaluationContextMappings(RDFaEvaluationContext context) context.addTermMapping("role", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#role")); } - private String debugAttributesToString() { - StringBuilder sb = new StringBuilder(); + private Map getIriMappings() { + return iriMappings; + } - if (this.currentElementAttributes() != null) { - for (int i = 0; i < this.currentElementAttributes().getLength(); i++) { - String attributeLocalName = this.currentElementAttributes().getQName(i); - String attributeValue = this.currentElementAttributes().getValue(i); - sb.append(attributeLocalName).append(" : ").append(attributeValue).append(" "); - } + private void setIriMappings(Map iriMappings) { + this.iriMappings = iriMappings; + } + + private boolean hasIriMapping(String prefix) { + return this.iriMappings.containsKey(prefix); + } + + /** + * @param prefix the prefix WITHOUT ":" + * @return the IRI associated to the prefix in this context + */ + private IRI getIriMapping(String prefix) { + return this.iriMappings.get(prefix); + } + + private void addIriMapping(String prefix, IRI prefixIri) { + this.iriMappings.put(prefix, prefixIri); + } + + private void addIriMappings(Map otherMappings) { + if(otherMappings != null) { + this.iriMappings.putAll(otherMappings); } + } - return sb.toString(); + private void clearIriMappings() { + this.iriMappings.clear(); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java index 87f2bfb51..05c0e34ca 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -26,11 +26,6 @@ public class RDFaEvaluationContext { */ private Resource parentObjectResource = null; - /** - * An index of locally defined IRI prefixes - */ - private Map iriMappings = new HashMap<>(); - /** * Set of statement in the process of building. */ @@ -62,7 +57,6 @@ public RDFaEvaluationContext(RDFaEvaluationContext context) { this.baseIri = context.baseIri; this.defaultVocabulary = context.defaultVocabulary; this.incompleteStatement = new HashSet<>(context.incompleteStatement); - this.iriMappings = new HashMap<>(context.iriMappings); this.language = context.language; this.listMappings = new HashMap<>(context.listMappings); this.parentObjectResource = context.parentObjectResource; @@ -94,40 +88,6 @@ public void setParentObjectResource(Resource parentObjectResource) { this.parentObjectResource = parentObjectResource; } - public Map getIriMappings() { - return iriMappings; - } - - public void setIriMappings(Map iriMappings) { - this.iriMappings = iriMappings; - } - - public boolean hasIriMapping(String prefix) { - return this.iriMappings.containsKey(prefix); - } - - /** - * @param prefix the prefix WITHOUT ":" - * @return the IRI associated to the prefix in this context - */ - public IRI getIriMapping(String prefix) { - return this.iriMappings.get(prefix); - } - - public void addIriMapping(String prefix, IRI prefixIri) { - this.iriMappings.put(prefix, prefixIri); - } - - public void addIriMappings(Map otherMappings) { - if(otherMappings != null) { - this.iriMappings.putAll(otherMappings); - } - } - - public void clearIriMappings() { - this.iriMappings.clear(); - } - public Set getIncompleteStatement() { return incompleteStatement; } @@ -197,10 +157,6 @@ public String toString() { StringBuilder sb = new StringBuilder(); sb.append("BaseURI: ").append(this.getBaseIri().stringValue()).append(" "); - sb.append("Mappings: ["); -// this.getIriMappings().forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); - sb.append(this.iriMappings.size()); - sb.append("] "); if(this.getParentSubjectResource() != null) { sb.append("Subject:").append(this.getParentSubjectResource().stringValue()).append(" "); } else { diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java index 136735f10..41785f30e 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java @@ -21,7 +21,6 @@ public class RDFaProcessingContext { private Resource newSubject = null; private Resource currentObjectResource = null; private Resource typedResource = null; - private Map iRIMappings = new HashMap<>(); private Set incompleteStatements = null; private Map> listMappings = new HashMap<>(); private String currentLanguage = null; @@ -50,7 +49,6 @@ public RDFaProcessingContext(RDFaEvaluationContext context) { this.newSubject = null; this.currentObjectResource = null; this.typedResource = null; - this.iRIMappings = context.getIriMappings(); this.incompleteStatements = context.getIncompleteStatement(); this.listMappings = context.getListMappings(); this.currentLanguage = context.getLanguage(); @@ -63,7 +61,6 @@ public RDFaProcessingContext(RDFaProcessingContext other) { this.newSubject = other.newSubject; this.currentObjectResource = other.currentObjectResource; this.typedResource = other.typedResource; - this.iRIMappings = other.iRIMappings; this.incompleteStatements = other.incompleteStatements; this.listMappings = other.listMappings; this.currentLanguage = other.currentLanguage; @@ -103,21 +100,6 @@ public void setTypedResource(Resource typedResource) { this.typedResource = typedResource; } - public Map getIRIMappings() { - return iRIMappings; - } - - public void setIRIMappings(Map iRIMappings) { - this.iRIMappings = iRIMappings; - } - - public void addIRIMappings(String key, IRI value) { - if(this.iRIMappings == null) { - this.iRIMappings = new HashMap<>(); - } - this.iRIMappings.put(key, value); - } - public Set getIncompleteStatements() { return incompleteStatements; } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index a5da1dad5..468edd0af 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -194,7 +194,7 @@ public void basicIRItoIRITest() { String testDataString = """ - +
From 15e279c74e1c939cfc5de047a6a8464cdd516ec8 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 14 Jan 2026 15:15:27 +0100 Subject: [PATCH 10/13] fixing property value null --- .../next/impl/io/parser/rdfa/RDFaParser.java | 59 ++++++++++++------- .../rdfa/model/RDFaProcessingContext.java | 4 +- .../impl/io/parser/rdfa/RDFaParserTest.java | 3 + 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index ec0f9bae6..cbe15c0c8 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -119,6 +119,12 @@ private void handleCharacters(char[] ch, int start, int length) { } } + private void clearAllCharactersBuffers() { + for (RDFaProcessingContext value : this.processingContexts) { + value.clearCharacters(); + } + } + private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { logger.info("startProcessElement {}", qName); @@ -555,7 +561,10 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); } else { String contentString = this.currentProcessingContext().getCharacters().trim(); - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + if(! contentString.isEmpty()) { + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + this.clearAllCharactersBuffers(); + } } // otherwise, as a plain literal if @datatype is present but has an empty value according to the section on CURIE and IRI Processing. // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. @@ -565,8 +574,11 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { String contentString = this.currentElementAttributes().getValue(RDFaAttributes.CONTENT.getName()); this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); } else { - String contentString = this.currentProcessingContext().getCharacters().trim(); - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + String contentString = this.currentProcessingContext().getCharacters().trim(); + if(! contentString.isEmpty()) { + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + this.clearAllCharactersBuffers(); + } } // otherwise, as an XML literal if @datatype is present and is set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. // The value of the XML literal is a string created by serializing to text, all nodes that are descendants of the current element, i.e., not including the element itself, and giving it a datatype of XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. The format of the resulting serialized content is as defined in Exclusive XML Canonicalization Version 1.0 [XML-EXC-C14N]. @@ -602,30 +614,35 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { // otherwise as a plain literal. } else { String contentString = this.currentProcessingContext().getCharacters().trim(); - // Additionally, if there is a value for current language then the value of the plain literal should include this language information, as described in [RDF-SYNTAX-GRAMMAR]. The actual literal is either the value of @content (if present) or a string created by concatenating the text content of each of the descendant elements of the current element in document order. - if (this.currentProcessingContext().getCurrentLanguage() != null - && !this.currentProcessingContext().getCurrentLanguage().isEmpty()) { - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, this.currentProcessingContext().getCurrentLanguage())); - } else { - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + if(! contentString.isEmpty()) { + // Additionally, if there is a value for current language then the value of the plain literal should include this language information, as described in [RDF-SYNTAX-GRAMMAR]. The actual literal is either the value of @content (if present) or a string created by concatenating the text content of each of the descendant elements of the current element in document order. + if (this.currentProcessingContext().getCurrentLanguage() != null + && !this.currentProcessingContext().getCurrentLanguage().isEmpty()) { + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, this.currentProcessingContext().getCurrentLanguage())); + } else { + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + } + this.clearAllCharactersBuffers(); } } // The current property value is then used with each predicate as follows: // If the element also includes the @inlist attribute, the current property value is added to the local list mapping as follows: - if (isAttributePresent(RDFaAttributes.INLIST)) { - // if the local list mapping does not contain a list associated with the predicate IRI, instantiate a new list and add to local list mappings - if (!this.currentProcessingContext().getListMappings().containsKey(propertyIRI)) { - this.currentProcessingContext().addListMappings(propertyIRI, new HashSet<>()); + if(this.currentProcessingContext().getCurrentPropertyValue() != null) { + if (isAttributePresent(RDFaAttributes.INLIST)) { + // if the local list mapping does not contain a list associated with the predicate IRI, instantiate a new list and add to local list mappings + if (!this.currentProcessingContext().getListMappings().containsKey(propertyIRI)) { + this.currentProcessingContext().addListMappings(propertyIRI, new HashSet<>()); + } + // add the current property value to the list associated with the predicate IRI in the local list mapping + this.currentProcessingContext().addListMapping(propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); + // Otherwise the current property value is used to generate a triple as follows: + // subject new subject + // predicate full IRI + // object current property value + } else { + this.getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); } - // add the current property value to the list associated with the predicate IRI in the local list mapping - this.currentProcessingContext().addListMapping(propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); - // Otherwise the current property value is used to generate a triple as follows: - // subject new subject - // predicate full IRI - // object current property value - } else { - this.getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java index 41785f30e..7780d09db 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java @@ -175,8 +175,8 @@ public String getCharacters() { return characters.toString(); } - public void setCharacters(StringBuilder characters) { - this.characters = characters; + public void clearCharacters() { + this.characters = new StringBuilder(); } public void addCharacters(char[] ch, int start, int length) { diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index 468edd0af..cd4cfbc1f 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -384,6 +384,9 @@ public void inheritSubjectTest() { referenceModel.add(aeDateOfBirthStatement); + logModelContent(referenceModel); + logModelContent(testModel); + assertEquals(1, testModel.size()); assertEquals(referenceModel.size(), testModel.size()); Iterator itStatementRef = referenceModel.iterator(); From 86d93553aa9f1c2a638cfa88aa6205a3e3a23361 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 14 Jan 2026 15:23:08 +0100 Subject: [PATCH 11/13] Fix typed literal --- .../corese/core/next/impl/io/parser/rdfa/RDFaParser.java | 7 ++++--- .../core/next/impl/io/parser/rdfa/RDFaParserTest.java | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index cbe15c0c8..d916deaae 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -562,7 +562,7 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() } else { String contentString = this.currentProcessingContext().getCharacters().trim(); if(! contentString.isEmpty()) { - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); this.clearAllCharactersBuffers(); } } @@ -570,13 +570,14 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. } else if (isAttributePresent(RDFaAttributes.DATATYPE) && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { + IRI datatypeIRI = (IRI) getAttributeResourceValue(RDFaAttributes.DATATYPE); if (isAttributePresent(RDFaAttributes.CONTENT)) { String contentString = this.currentElementAttributes().getValue(RDFaAttributes.CONTENT.getName()); - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); } else { String contentString = this.currentProcessingContext().getCharacters().trim(); if(! contentString.isEmpty()) { - this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); + this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); this.clearAllCharactersBuffers(); } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index cd4cfbc1f..43aa98c79 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -303,6 +303,10 @@ public void basicIRItoTypedLiteralTest() { Statement statementTest = itStatementTest.next(); assertEquals(statementRef.getSubject(), statementTest.getSubject()); assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject().isLiteral(), statementTest.getObject().isLiteral()); + if(statementRef.getObject().isLiteral()) { + assertEquals(((Literal) statementRef.getObject()).getDatatype(), ((Literal) statementTest.getObject()).getDatatype()); + } assertEquals(statementRef.getObject(), statementTest.getObject()); assertEquals(statementRef.getContext(), statementTest.getContext()); } From 89a55210e97c1ee128748f4073a39f17f51a9b55 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 14 Jan 2026 18:07:37 +0100 Subject: [PATCH 12/13] all unit test pass and code commented --- .../next/impl/io/parser/rdfa/RDFaParser.java | 107 ++++-------------- .../rdfa/model/RDFaProcessingContext.java | 14 +-- 2 files changed, 29 insertions(+), 92 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index d916deaae..53020e5ab 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -125,14 +125,17 @@ private void clearAllCharactersBuffers() { } } + /* + * The algorithm in W3C recommendation is based on DOM processing, but this implementation is made in SAX. + * To reconcile both approaches, the "local values" are stored in a pile of ProcessingContext. The IRI mapping are shared independently. + * All operations except the ones that create literals are done ine this function. + */ private void startProcessElement(String uri, String localName, String qName, Attributes attrs) { - logger.info("startProcessElement {}", qName); // 1 First, the local values are initialized RDFaProcessingContext processingContext = null; if(this.processingContexts.size() > 1) { // Not a root element processingContext = new RDFaProcessingContext(currentProcessingContext().getEvaluationContext()); - processingContext.setElementAttributes(attrs); processingContext.setRootElement(false); this.setIriMappings(this.getIriMappings()); // 13. Next, all elements that are children of the current element are processed using the rules described here, using a new evaluation context, initialized as follows: @@ -149,15 +152,12 @@ private void startProcessElement(String uri, String localName, String qName, Att processingContext.getEvaluationContext().setParentSubjectResource(this.currentProcessingContext().getNewSubject()); // the parent object is set to value of current object resource, if non-null, or the value of new subject, if non-null, or the value of the parent subject of the current evaluation context; if (this.currentProcessingContext().getCurrentObjectResource() != null) { - logger.info("13 parent object resource = current object resource {}", this.currentProcessingContext().getCurrentObjectResource()); processingContext.getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getCurrentObjectResource()); } else if (this.currentProcessingContext().getNewSubject() != null) { processingContext.getEvaluationContext().setParentObjectResource(this.currentProcessingContext().getNewSubject()); - logger.info("13 parent object resource = new subject {}", this.currentProcessingContext().getNewSubject()); } else { processingContext.getEvaluationContext().setParentObjectResource(oldParentSubject); } - logger.info("13 context parent object resource: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); // the list of incomplete triples is set to the local list of incomplete triples; processingContext.getEvaluationContext().setIncompleteStatements(this.currentProcessingContext().getIncompleteStatements()); // the list mapping is set to the local list mapping; @@ -167,8 +167,6 @@ private void startProcessElement(String uri, String localName, String qName, Att // the default vocabulary is set to the value of the local default vocabulary. processingContext.getEvaluationContext().setDefaultVocabulary(this.currentProcessingContext().getDefaultVocabulary()); } - - logger.info("13 {} {}", qName, this.currentProcessingContext()); } else { // This is the start of the document RDFaEvaluationContext startingContext = getNewContext(getValueFactory().createIRI(this.baseIri)); @@ -178,15 +176,14 @@ private void startProcessElement(String uri, String localName, String qName, Att startingContext.setLanguage(null); startingContext.setDefaultVocabulary(null); processingContext = new RDFaProcessingContext(startingContext); - processingContext.setElementAttributes(attrs); processingContext.setRootElement(true); } processingContext.setElementName(qName); + processingContext.setElementAttributes(attrs); this.processingContexts.addFirst(processingContext); if(! this.currentProcessingContext().getElementName().equals(qName)) { throw new ParsingErrorException("Start process element "+ qName +" is not paired with the right context" + this.currentProcessingContext()); } - logger.info("START {} {}", qName, this.currentProcessingContext()); // HTML-specific base element if (qName.equals(BASE_TAG) @@ -203,20 +200,14 @@ && isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setDefaultVocabulary(getAttributeStringValue(RDFaAttributes.VOCAB)); } -// logger.info("2 {}", this.currentProcessingContext()); - // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; - for (int i = 0; i < this.currentElementAttributes().getLength(); i++) { - String attribute = this.currentElementAttributes().getQName(i); - logger.info("3 {} attribute: {}", qName, attribute); + this.currentProcessingContext().getElementAttributes().forEach((String attribute, String attributeValue) -> { if (attribute.startsWith(XMLNS_PREFIX)) { - String attributeValue = this.currentElementAttributes().getValue(i); String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); - logger.info("3 {} {} : {}", qName, prefixName, attributeValue); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); this.addIriMapping(prefixName, prefixNamespace); } - } + }); if (isAttributePresent(RDFaAttributes.PREFIX) && !getAttributeStringValue(RDFaAttributes.PREFIX).isEmpty()) { String prefixDeclaration = getAttributeStringValue(RDFaAttributes.PREFIX); @@ -225,8 +216,6 @@ && isAttributePresent(RDFaAttributes.HREF)) { this.addIriMapping(prefixName, prefixIRI); } - logger.info("3 {} {}", qName, this.currentProcessingContext()); - // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; // Host Languages that incorporate RDFa MAY provide a mechanism for specifying the natural language of an element and its contents (e.g., XML provides the general-purpose XML attribute @xml:lang). if (isAttributePresent(RDFaAttributes.LANG_ALT) @@ -237,8 +226,6 @@ && isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setCurrentLanguage(getAttributeStringValue(RDFaAttributes.LANG)); } -// logger.info("4 {} {}", qName, this.currentProcessingContext()); - // 5. If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. This step has two possible alternatives. if (!isAttributePresent(RDFaAttributes.REL) && !isAttributePresent(RDFaAttributes.REV)) { @@ -255,15 +242,12 @@ && isAttributePresent(RDFaAttributes.HREF)) { // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); -// logger.info("5.1 About {}", this.currentProcessingContext().getNewSubject()); // otherwise, if the element is the root element of the document, then act as if there is an empty @about present, and process it according to the rule for @about, above; } else if (this.currentProcessingContext().isRootElement()) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getBaseIri()); -// logger.info("5.1 Root element {}", this.currentProcessingContext().getNewSubject()); // otherwise, if parent object is present, new subject is set to the value of parent object. } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); -// logger.info("5.1 context parent object {}", this.currentProcessingContext().getNewSubject()); } // If @typeof is present then typed resource is set to the resource obtained from the first match from the following rules: if (isAttributePresent(RDFaAttributes.TYPEOF)) { @@ -307,19 +291,15 @@ && isAttributePresent(RDFaAttributes.HREF)) { // by using the resource from @about, if present, obtained according to the section on CURIE and IRI Processing; if (isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); -// logger.info("5.2 about {}", this.currentProcessingContext().getNewSubject()); // otherwise, by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.RESOURCE)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.RESOURCE)); -// logger.info("5.2 resource {}", this.currentProcessingContext().getNewSubject()); // otherwise, by using the IRI from @href, if present, obtained according to the section on CURIE and IRI Processing; } else if (isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.HREF)); -// logger.info("5.2 href {}", this.currentProcessingContext().getNewSubject()); // otherwise, by using the IRI from @src, if present, obtained according to the section on CURIE and IRI Processing. } else if (isAttributePresent(RDFaAttributes.SRC)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.SRC)); -// logger.info("5.2 src {}", this.currentProcessingContext().getNewSubject()); } // otherwise, if no resource is provided by a resource attribute, then the first match from the following rules will apply: } else { @@ -328,18 +308,15 @@ && isAttributePresent(RDFaAttributes.HREF)) { Optional emptyAboutResource = resolveStringResource(""); if (emptyAboutResource.isPresent()) { this.currentProcessingContext().setNewSubject(emptyAboutResource.get()); -// logger.info("5.2 rootElement {} {}", qName, this.currentProcessingContext()); } else { throw new ParsingErrorException("Expected to be able to generate newSubject from empty CURIE"); } // otherwise, if @typeof is present, then new subject is set to be a newly created bnode; } else if (isAttributePresent(RDFaAttributes.TYPEOF)) { this.currentProcessingContext().setNewSubject(getValueFactory().createBNode()); -// logger.info("5.2 typeOf {}", this.currentProcessingContext()); // otherwise, if parent object is present, new subject is set to the value of parent object. Additionally, if @property is not present then the skip element flag is set to 'true'. } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); -// logger.info("5.2 parent object resource {}", this.currentProcessingContext()); if (!isAttributePresent(RDFaAttributes.PROPERTY)) { this.currentProcessingContext().setSkipElement(true); } @@ -352,18 +329,14 @@ && isAttributePresent(RDFaAttributes.HREF)) { } } -// logger.info("5 {} {}", qName, this.currentProcessingContext()); - // 6. If the current element does contain a @rel or @rev attribute, then the next step is to establish both a value for new subject and a value for current object resource: if (isAttributePresent(RDFaAttributes.REL) || isAttributePresent(RDFaAttributes.REV)) { if (isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setNewSubject(getAttributeResourceValue(RDFaAttributes.ABOUT)); -// logger.info("6 about newSubject: {}", this.currentProcessingContext().getNewSubject()); } if (isAttributePresent(RDFaAttributes.TYPEOF)) { this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getNewSubject()); -// logger.info("6 typeof newSubject: {}", this.currentProcessingContext().getNewSubject()); } if (this.currentProcessingContext().getNewSubject() == null) { if (this.currentProcessingContext().isRootElement()) { @@ -373,37 +346,28 @@ && isAttributePresent(RDFaAttributes.HREF)) { } else { throw new ParsingErrorException("Expected to be able to generate typedResource from empty CURIE"); } -// logger.info("6 root element typed resource: {}", this.currentProcessingContext().getTypedResource()); } else if (currentProcessingContext().getEvaluationContext().getParentObjectResource() != null) { this.currentProcessingContext().setNewSubject(currentProcessingContext().getEvaluationContext().getParentObjectResource()); -// logger.info("6 parent object resource not null: {}", currentProcessingContext().getEvaluationContext().getParentObjectResource()); } } if (isAttributePresent(RDFaAttributes.RESOURCE)) { this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.RESOURCE)); -// logger.info("6 resource CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.HREF)) { this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.HREF)); -// logger.info("6 href CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.SRC)) { this.currentProcessingContext().setCurrentObjectResource(getAttributeResourceValue(RDFaAttributes.SRC)); -// logger.info("6 src CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } else if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT)) { this.currentProcessingContext().setCurrentObjectResource(this.getValueFactory().createBNode()); -// logger.info("6 typeof CurrentObjectResource: {}", this.currentProcessingContext().getCurrentObjectResource()); } if (isAttributePresent(RDFaAttributes.TYPEOF) && !isAttributePresent(RDFaAttributes.ABOUT) && (this.currentProcessingContext().getCurrentObjectResource() == null || this.currentProcessingContext().getCurrentObjectResource().isResource())) { this.currentProcessingContext().setTypedResource(this.currentProcessingContext().getCurrentObjectResource()); -// logger.info("6 typed resource: {}", this.currentProcessingContext().getTypedResource()); } } -// logger.info("6 {} {}", qName, this.currentProcessingContext()); - // 7. If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; if (this.currentProcessingContext().getTypedResource() != null && isAttributePresent(RDFaAttributes.TYPEOF)) { @@ -411,15 +375,11 @@ && isAttributePresent(RDFaAttributes.TYPEOF)) { this.getModel().add(this.currentProcessingContext().getTypedResource(), RDF.type.getIRI(), typeIri); } -// logger.info("7 {} {}", qName, this.currentProcessingContext()); - // 8. If in any of the previous steps a new subject was set to a non-null value different from the parent object; if (this.currentProcessingContext().getNewSubject() != null && this.currentProcessingContext().getNewSubject() != currentProcessingContext().getEvaluationContext().getParentObjectResource()) { this.currentProcessingContext().setListMappings(new HashMap<>()); } -// logger.info("8 {} {}", qName, this.currentProcessingContext()); - // 9. If in any of the previous steps a current object resource was set to a non-null value, it is now used to generate triples and add entries to the local list mapping: if (this.currentProcessingContext().getCurrentObjectResource() != null) { if (isAttributePresent(RDFaAttributes.INLIST) @@ -433,7 +393,7 @@ && isAttributePresent(RDFaAttributes.REL)) { if (relResource.isIRI()) { this.getModel().add(this.currentProcessingContext().getNewSubject(), (IRI) relResource, this.currentProcessingContext().getCurrentObjectResource()); } else { - throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes().getValue(RDFaAttributes.REL.getName())); + throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentProcessingContext().getElementAttributes().get(RDFaAttributes.REL.getName())); } } if (isAttributePresent(RDFaAttributes.REV)) { @@ -449,8 +409,6 @@ && isAttributePresent(RDFaAttributes.REL)) { } } -// logger.info("9 {} {}", qName, this.currentProcessingContext()); - // 10. If however current object resource was set to null, but there are predicates present, then they must be stored as incomplete triples, pending the discovery of a subject that can be used as the object. Also, current object resource should be set to a newly created bnode (so that the incomplete triples have a subject to connect to if they are ultimately turned into triples); if (this.currentProcessingContext().getCurrentObjectResource() == null && (isAttributePresent(RDFaAttributes.REL) @@ -461,7 +419,7 @@ && isAttributePresent(RDFaAttributes.REL)) { this.currentProcessingContext().setCurrentObjectResource(getValueFactory().createBNode()); if (isAttributePresent(RDFaAttributes.REL)) { if (!getAttributeResourceValue(RDFaAttributes.REL).isIRI()) { - throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentElementAttributes().getValue(RDFaAttributes.REL.getName())); + throw new ParsingErrorException("Value of attribute @rel expected to be an IRI but was " + this.currentProcessingContext().getElementAttributes().get(RDFaAttributes.REL.getName())); } IRI relIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REL); if (isAttributePresent(RDFaAttributes.INLIST)) { @@ -474,17 +432,13 @@ && isAttributePresent(RDFaAttributes.REL)) { } } else if (isAttributePresent(RDFaAttributes.REV)) { if (!getAttributeResourceValue(RDFaAttributes.REV).isIRI()) { - throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentElementAttributes().getValue(RDFaAttributes.REV.getName())); + throw new ParsingErrorException("Value of attribute @rev expected to be an IRI but was " + this.currentProcessingContext().getElementAttributes().get(RDFaAttributes.REV.getName())); } IRI revIRI = (IRI) getAttributeResourceValue(RDFaAttributes.REV); this.currentProcessingContext().addIncompleteStatement(new RDFaIncompleteStatement(revIRI, RDFaIncompleteStatement.Direction.BACKWARD)); } } -// logger.info("10 {} {}", qName, this.currentProcessingContext()); - - - // 12. If the skip element flag is 'false', and new subject was set to a non-null value, then any incomplete triples within the current context should be completed: if (!this.currentProcessingContext().isSkipElement() && this.currentProcessingContext().getNewSubject() != null) { @@ -502,7 +456,6 @@ && isAttributePresent(RDFaAttributes.REL)) { } } -// logger.info("12 {} {}", qName, this.currentProcessingContext()); Map> oldListMappings = currentProcessingContext().getEvaluationContext().getListMappings(); // 14. Finally, if there is one or more mapping in the local list mapping, list triples are generated as follows: @@ -535,20 +488,18 @@ && isAttributePresent(RDFaAttributes.REL)) { } } -// logger.info("14 {}", this.currentProcessingContext()); - } + /* + * Ths function will apply the operations for the creation of literals using the character buffer and remove the current top processing context from the pile. + */ private void endProcessElement(String uri, String localName, String qName) { - logger.info("endProcessElement {}", qName); if(! this.currentProcessingContext().getElementName().equals(qName)) { throw new ParsingErrorException("End process element "+ qName +" is not paired with the right context" + this.currentProcessingContext()); } - logger.info("11 Cont. START {}", this.currentProcessingContext()); // 11. The next step of the iteration is to establish any current property value; if (isAttributePresent(RDFaAttributes.PROPERTY)) { - logger.info("11 Cont. Attribute property found {}", this.currentProcessingContext()); IRI propertyIRI = (IRI) getAttributeResourceValue(RDFaAttributes.PROPERTY); // as a typed literal if @datatype is present, does not have an empty value according to the section on CURIE and IRI Processing, and is not set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. // The actual literal is either the value of @content (if present) or a string created by concatenating the value of all descendant text nodes, of the current element in turn. The final string includes the datatype IRI, as described in [RDF-SYNTAX-GRAMMAR], which will have been obtained according to the section on CURIE and IRI Processing. @@ -572,7 +523,7 @@ && getAttributeResourceValue(RDFaAttributes.DATATYPE) != RDF.XMLLiteral.getIRI() && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { IRI datatypeIRI = (IRI) getAttributeResourceValue(RDFaAttributes.DATATYPE); if (isAttributePresent(RDFaAttributes.CONTENT)) { - String contentString = this.currentElementAttributes().getValue(RDFaAttributes.CONTENT.getName()); + String contentString = this.currentProcessingContext().getElementAttributes().get(RDFaAttributes.CONTENT.getName()); this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString, datatypeIRI)); } else { String contentString = this.currentProcessingContext().getCharacters().trim(); @@ -583,12 +534,10 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { } // otherwise, as an XML literal if @datatype is present and is set to XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. // The value of the XML literal is a string created by serializing to text, all nodes that are descendants of the current element, i.e., not including the element itself, and giving it a datatype of XMLLiteral in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. The format of the resulting serialized content is as defined in Exclusive XML Canonicalization Version 1.0 [XML-EXC-C14N]. - //} else if (this.currentElementAttributes.getValue(RDFaAttributes.DATATYPE.getName()) != null - // && getAttributeResourceValue( RDFaAttributes.DATATYPE).isIRI() - // && getAttributeResourceValue( RDFaAttributes.DATATYPE) == RDF.XMLLiteral.getIRI()) { + // otherwise, as a plain literal using the value of @content if @content is present. } else if (isAttributePresent(RDFaAttributes.CONTENT)) { - String contentString = this.currentElementAttributes().getValue(RDFaAttributes.CONTENT.getName()); + String contentString = this.currentProcessingContext().getElementAttributes().get(RDFaAttributes.CONTENT.getName()); this.currentProcessingContext().setCurrentPropertyValue(getValueFactory().createLiteral(contentString)); // otherwise, if the @rel, @rev, and @content attributes are not present, as a resource obtained from one of the following: // by using the resource from @resource, if present, obtained according to the section on CURIE and IRI Processing; @@ -642,13 +591,12 @@ && getAttributeStringValue(RDFaAttributes.DATATYPE).isEmpty()) { // predicate full IRI // object current property value } else { - this.getModel().add(this.currentProcessingContext().getNewSubject(), propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); + Statement statement = getValueFactory().createStatement(this.currentProcessingContext().getNewSubject(), propertyIRI, this.currentProcessingContext().getCurrentPropertyValue()); + this.getModel().add(statement); } } } - logger.info("11 Cont. END {}", this.currentProcessingContext()); - this.processingContexts.pop(); } @@ -709,7 +657,7 @@ private IRI getPrefixIriFromDeclaration(String declaration) { } private Resource getAttributeResourceValue(RDFaAttributes attribute) { - String attributeValue = this.currentProcessingContext().getElementAttributes().getValue(attribute.getName()); + String attributeValue = this.currentProcessingContext().getElementAttributes().get(attribute.getName()); Optional resourceResolution = resolveStringResource(attributeValue); if (resourceResolution.isPresent()) { return resourceResolution.get(); @@ -719,31 +667,21 @@ private Resource getAttributeResourceValue(RDFaAttributes attribute) { } private boolean isAttributePresent(RDFaAttributes attribute) { - return this.currentProcessingContext().getElementAttributes().getValue(attribute.getName()) != null; + return this.currentProcessingContext().getElementAttributes().get(attribute.getName()) != null; } private String getAttributeStringValue(RDFaAttributes attribute) { - return this.currentProcessingContext().getElementAttributes().getValue(attribute.getName()); + return this.currentProcessingContext().getElementAttributes().get(attribute.getName()); } /** * Convenience accessor to the top of the processing contexts pile * - * @return */ private RDFaProcessingContext currentProcessingContext() { return this.processingContexts.getFirst(); } - /** - * Convenience accessor to the HTML attributes at the top of the local values pile - * - * @return - */ - private Attributes currentElementAttributes() { - return currentProcessingContext().getElementAttributes(); - } - /** * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI * @@ -810,7 +748,6 @@ private RDFaEvaluationContext getNewContext(IRI baseIRI) { } private void initializeEvaluationContextMappings(RDFaEvaluationContext context) { - // https://www.w3.org/2011/rdfa-context/rdfa-1.1 sets a list of predefined terms mappings for RDFa contexts. context.addTermMapping("describedby", getValueFactory().createIRI("http://www.w3.org/2007/05/powder-s#describedby")); context.addTermMapping("license", getValueFactory().createIRI("http://www.w3.org/1999/xhtml/vocab#license")); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java index 7780d09db..a3e4e9898 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java @@ -11,7 +11,7 @@ import java.util.Set; /** - * Corresponds to the local values for the valuation of an element and the current context at the moment of its evaluation + * Corresponds to the local values for the processing of an element and the current context at the moment of its evaluation */ public class RDFaProcessingContext { @@ -26,7 +26,7 @@ public class RDFaProcessingContext { private String currentLanguage = null; private Value currentPropertyValue = null; private String defaultVocabulary = null; - private Attributes elementAttributes = null; + private Map elementAttributes = new HashMap<>(); /** * Buffer for accumulating character data between start and end tags. @@ -37,9 +37,6 @@ public class RDFaProcessingContext { private RDFaEvaluationContext evaluationContext = null; - public RDFaProcessingContext() { - } - /** * Constructor to be used in step 1 of RDFa processing * @param context @@ -167,6 +164,7 @@ public String toString() { sb.append("defaultVocabulary: ").append(this.defaultVocabulary).append(" "); sb.append("characters: ").append(this.getCharacters().trim()).append(" "); sb.append("Evaluation context: ").append(this.getEvaluationContext()).append(" "); + sb.append("Attributes: ").append(this.elementAttributes.keySet()).append(" "); return sb.toString(); } @@ -183,12 +181,14 @@ public void addCharacters(char[] ch, int start, int length) { this.characters.append(ch, start, length); } - public Attributes getElementAttributes() { + public Map getElementAttributes() { return elementAttributes; } public void setElementAttributes(Attributes elementAttributes) { - this.elementAttributes = elementAttributes; + for(int i = 0; i < elementAttributes.getLength(); i++) { + this.elementAttributes.put(elementAttributes.getQName(i), elementAttributes.getValue(i)); + } } public boolean isRootElement() { From 895c7487dcd2dd87559c6132e3184b6f10fb0e5e Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Tue, 27 Jan 2026 14:09:15 +0100 Subject: [PATCH 13/13] Review fix --- .../inria/corese/core/next/api/Namespace.java | 2 +- .../next/api/base/model/AbstractModel.java | 2 +- .../api/base/model/AbstractNamespace.java | 7 +- .../impl/common/prefix/PrefixHandler.java | 6 +- .../next/impl/io/parser/rdfa/RDFaParser.java | 35 +++-- .../rdfa/model/RDFaEvaluationContext.java | 24 +-- .../rdfa/model/RDFaIncompleteStatement.java | 8 +- .../rdfa/model/RDFaInitialPrefixes.java | 12 +- .../rdfa/model/RDFaProcessingContext.java | 146 ++++++++++++++++-- .../core/next/impl/temp/ModelNamespace.java | 2 +- .../core/next/api/base/AbstractModelTest.java | 16 +- .../impl/io/parser/rdfa/RDFaParserTest.java | 72 ++++++++- .../core/next/impl/temp/CoreseModelTest.java | 6 +- 13 files changed, 265 insertions(+), 73 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/Namespace.java b/src/main/java/fr/inria/corese/core/next/api/Namespace.java index 42c5bde5c..28e62fa9c 100644 --- a/src/main/java/fr/inria/corese/core/next/api/Namespace.java +++ b/src/main/java/fr/inria/corese/core/next/api/Namespace.java @@ -15,7 +15,7 @@ public interface Namespace extends Serializable { /** * @return The name of the namespace, which is the start of an IRI. */ - String getName(); + String getNamespace(); /** * @param o diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java index b0b94d275..3da788dc3 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java @@ -34,7 +34,7 @@ public Model unmodifiable() { public Namespace setNamespace(String prefix, String name) { Optional existing = getNamespace(prefix); - if (!existing.isPresent() || !existing.get().getName().equals(name)) { + if (!existing.isPresent() || !existing.get().getNamespace().equals(name)) { Namespace namespace = new ModelNamespace(prefix, name); setNamespace(namespace); return namespace; diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java index f6e2d49ad..8c15b1b4d 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractNamespace.java @@ -1,7 +1,6 @@ package fr.inria.corese.core.next.api.base.model; import java.io.Serial; -import java.util.Comparator; import java.util.Objects; import fr.inria.corese.core.next.api.Namespace; @@ -32,7 +31,7 @@ public boolean equals(Object object) { } Namespace ns = (Namespace) object; return Objects.equals(getPrefix(), ns.getPrefix()) - && Objects.equals(getName(), ns.getName()); + && Objects.equals(getNamespace(), ns.getNamespace()); } /** @@ -40,7 +39,7 @@ public boolean equals(Object object) { */ @Override public int hashCode() { - return Objects.hash(getPrefix(), getName()); + return Objects.hash(getPrefix(), getNamespace()); } /** @@ -48,6 +47,6 @@ public int hashCode() { */ @Override public String toString() { - return getPrefix() + " :: " + getName(); + return getPrefix() + " :: " + getNamespace(); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java b/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java index 6115e136c..a862494ae 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/prefix/PrefixHandler.java @@ -118,7 +118,7 @@ public void setNamespace(Namespace namespace) { if (namespace == null) { throw new IllegalArgumentException("Namespace cannot be null"); } - setPrefix(namespace.getPrefix(), namespace.getName()); + setPrefix(namespace.getPrefix(), namespace.getNamespace()); } /** @@ -441,14 +441,14 @@ public String getPrefix() { } @Override - public String getName() { + public String getNamespace() { return name; } @SuppressWarnings("NullableProblems") public int compareTo(Namespace o) { Objects.requireNonNull(o); - int cmp = this.name.compareTo(o.getName()); + int cmp = this.name.compareTo(o.getNamespace()); if (cmp != 0) { return cmp; } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 53020e5ab..03cd432e3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -67,7 +67,7 @@ public RDFaParser(Model model, ValueFactory factory, IOOptions config) { } // Initializing the iri mappings with the default prefixes as defined by https://www.w3.org/TR/rdfa-core/#xmlrdfaconformance for (RDFaInitialPrefixes prefixObject : RDFaInitialPrefixes.values()) { - this.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getName())); + this.addIriMapping(prefixObject.getPrefix(), getValueFactory().createIRI(prefixObject.getNamespace())); } } @@ -202,7 +202,7 @@ && isAttributePresent(RDFaAttributes.HREF)) { // 3. The current element is examined for IRI mappings and these are added to the local list of IRI mappings. Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name; this.currentProcessingContext().getElementAttributes().forEach((String attribute, String attributeValue) -> { - if (attribute.startsWith(XMLNS_PREFIX)) { + if (attribute.startsWith(XMLNS_PREFIX + ":")) { String prefixName = attribute.replace(XMLNS_PREFIX + ":", ""); IRI prefixNamespace = getValueFactory().createIRI(attributeValue, ""); this.addIriMapping(prefixName, prefixNamespace); @@ -211,9 +211,7 @@ && isAttributePresent(RDFaAttributes.HREF)) { if (isAttributePresent(RDFaAttributes.PREFIX) && !getAttributeStringValue(RDFaAttributes.PREFIX).isEmpty()) { String prefixDeclaration = getAttributeStringValue(RDFaAttributes.PREFIX); - String prefixName = getPrefixFromDeclaration(prefixDeclaration); - IRI prefixIRI = getPrefixIriFromDeclaration(prefixDeclaration); - this.addIriMapping(prefixName, prefixIRI); + this.addIriMappings(getPrefixesFromDeclaration(prefixDeclaration)); } // 4. The current element is also parsed for any language information, and if present, current language is set accordingly; @@ -445,7 +443,7 @@ && isAttributePresent(RDFaAttributes.REL)) { if (this.currentProcessingContext().getIncompleteStatements() == null) { this.currentProcessingContext().setIncompleteStatements(new HashSet<>()); } - for (RDFaIncompleteStatement incompleteStatement : currentProcessingContext().getEvaluationContext().getIncompleteStatement()) { + for (RDFaIncompleteStatement incompleteStatement : currentProcessingContext().getEvaluationContext().getIncompleteStatements()) { if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.NONE) { this.currentProcessingContext().addListMapping(incompleteStatement.getPredicate(), this.currentProcessingContext().getNewSubject()); } else if (incompleteStatement.getDirection() == RDFaIncompleteStatement.Direction.FORWARD) { @@ -640,20 +638,25 @@ public void warning(SAXParseException e) { } } - private String getPrefixFromDeclaration(String declaration) { - String[] prefixArray = declaration.split(": "); - if (prefixArray.length != 2) { + private Map getPrefixesFromDeclaration(String declaration) { + String[] prefixArray = declaration.split(" "); + HashMap result = new HashMap<>(); + // prefix array should contain an even number of elements corresponding to prefix/namespace pairs + if (prefixArray.length % 2 != 0) { throw new ParsingErrorException("Error during prefix extraction of " + declaration); } - return prefixArray[0].toLowerCase(); - } + int numberOfPairs = prefixArray.length / 2; + for(int pairNumber = 0; pairNumber < numberOfPairs; pairNumber++) { + String prefix = prefixArray[pairNumber*2]; + if(! prefix.endsWith(":")) { + throw new ParsingErrorException("Expecting namespace prefix declaration to end with \":\", got " + prefix + " in declaration " + declaration); + } + prefix = prefix.replaceAll(":$", ""); // Removing trailing : + IRI namespace = getValueFactory().createIRI(prefixArray[pairNumber*2 +1]); - private IRI getPrefixIriFromDeclaration(String declaration) { - String[] prefixArray = declaration.split(": "); - if (prefixArray.length != 2) { - throw new ParsingErrorException("Error during prefix extraction of " + declaration); + result.put(prefix, namespace); } - return getValueFactory().createIRI(prefixArray[1].toLowerCase()); + return result; } private Resource getAttributeResourceValue(RDFaAttributes attribute) { diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java index 05c0e34ca..8071de201 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaEvaluationContext.java @@ -27,9 +27,9 @@ public class RDFaEvaluationContext { private Resource parentObjectResource = null; /** - * Set of statement in the process of building. + * Set of statements in the process of building. */ - private Set incompleteStatement = new HashSet<>(); + private Set incompleteStatements = new HashSet<>(); /** * The language of the document. Note that there is no default language. @@ -56,7 +56,7 @@ public RDFaEvaluationContext(IRI baseIri) { public RDFaEvaluationContext(RDFaEvaluationContext context) { this.baseIri = context.baseIri; this.defaultVocabulary = context.defaultVocabulary; - this.incompleteStatement = new HashSet<>(context.incompleteStatement); + this.incompleteStatements = new HashSet<>(context.incompleteStatements); this.language = context.language; this.listMappings = new HashMap<>(context.listMappings); this.parentObjectResource = context.parentObjectResource; @@ -88,32 +88,32 @@ public void setParentObjectResource(Resource parentObjectResource) { this.parentObjectResource = parentObjectResource; } - public Set getIncompleteStatement() { - return incompleteStatement; + public Set getIncompleteStatements() { + return incompleteStatements; } public void setIncompleteStatements(Set incompleteStatement) { - this.incompleteStatement = incompleteStatement; + this.incompleteStatements = incompleteStatement; } public Iterator getIncompleteStatementIterator() { - return this.incompleteStatement.iterator(); + return this.incompleteStatements.iterator(); } public void addStatementWithoutSubject(IRI property, Value object) { RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); newStatement.setObject(object); - this.incompleteStatement.add(newStatement); + this.incompleteStatements.add(newStatement); } public void addStatementWithoutObject(Resource subject, IRI property) { RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); newStatement.setSubject(subject); - this.incompleteStatement.add(newStatement); + this.incompleteStatements.add(newStatement); } public void clearIncompleteStatements() { - this.incompleteStatement.clear(); + this.incompleteStatements.clear(); } public String getLanguage() { @@ -167,8 +167,8 @@ public String toString() { } else { sb.append("Object: ").append((Object) null).append(" "); } - if(! this.getIncompleteStatement().isEmpty()) { - sb.append(this.getIncompleteStatement().size()).append(" incomplete statements."); + if(! this.getIncompleteStatements().isEmpty()) { + sb.append(this.getIncompleteStatements().size()).append(" incomplete statements."); } return sb.toString(); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java index 524491a8f..d023a6e7d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java @@ -125,10 +125,16 @@ public String toString() { @Override public boolean equals(Object o) { + if (this == o) { + return true; + } if(! (o instanceof RDFaIncompleteStatement oStat)) { return false; } - return oStat.getSubject() == this.getSubject() && oStat.getPredicate() == this.getPredicate() && oStat.getObject() == this.getObject() && oStat.getDirection() == this.getDirection(); + return oStat.getSubject().equals(this.getSubject()) + && oStat.getPredicate().equals(this.getPredicate()) + && oStat.getObject().equals(this.getObject()) + && oStat.getDirection().equals(this.getDirection()); } @Override diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java index 814b06ed1..64b81e987 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaInitialPrefixes.java @@ -12,7 +12,7 @@ public enum RDFaInitialPrefixes implements Namespace { // cc "http://creativecommons.org/ns#" CC("cc", "http://creativecommons.org/ns#"), // csvw "http://www.w3.org/ns/csvw#" - CSVW("cc", "http://www.w3.org/ns/csvw#"), + CSVW("csvw", "http://www.w3.org/ns/csvw#"), // ctag "http://commontag.org/ns#" CTAG("ctag", "http://commontag.org/ns#"), // dc "http://purl.org/dc/terms/" @@ -102,10 +102,10 @@ public enum RDFaInitialPrefixes implements Namespace { ; private final String prefix; - private final String name; + private final String namespace; - RDFaInitialPrefixes(String prefix, String name) { - this.name = name; + RDFaInitialPrefixes(String prefix, String namespaceString) { + this.namespace = namespaceString; this.prefix = prefix; } @@ -115,7 +115,7 @@ public String getPrefix() { } @Override - public String getName() { - return this.name; + public String getNamespace() { + return this.namespace; } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java index a3e4e9898..9fe7801bb 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaProcessingContext.java @@ -46,108 +46,180 @@ public RDFaProcessingContext(RDFaEvaluationContext context) { this.newSubject = null; this.currentObjectResource = null; this.typedResource = null; - this.incompleteStatements = context.getIncompleteStatement(); + this.incompleteStatements = context.getIncompleteStatements(); this.listMappings = context.getListMappings(); this.currentLanguage = context.getLanguage(); this.defaultVocabulary = context.getDefaultVocabulary(); this.evaluationContext = context; } - public RDFaProcessingContext(RDFaProcessingContext other) { - this.skipElement = other.skipElement; - this.newSubject = other.newSubject; - this.currentObjectResource = other.currentObjectResource; - this.typedResource = other.typedResource; - this.incompleteStatements = other.incompleteStatements; - this.listMappings = other.listMappings; - this.currentLanguage = other.currentLanguage; - this.currentPropertyValue = other.currentPropertyValue; - this.defaultVocabulary = other.defaultVocabulary; - } - + /** + * + * @return The skip element flag, which indicates whether the current element can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. + */ public boolean isSkipElement() { return skipElement; } + /** + * + * @param skipElement The skip element flag, which indicates whether the current element can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. + */ public void setSkipElement(boolean skipElement) { this.skipElement = skipElement; } + /** + * + * @return A new subject value, which once calculated will set the parent subject in an evaluation context, as well as being used to complete any incomplete triples, as described in the next section. + */ public Resource getNewSubject() { return newSubject; } + /** + * + * @param newSubject A new subject value, which once calculated will set the parent subject in an evaluation context, as well as being used to complete any incomplete triples, as described in the next section. + */ public void setNewSubject(Resource newSubject) { this.newSubject = newSubject; } + /** + * + * @return A value for the current object resource, the resource to use when creating triples that have a resource object. + */ public Resource getCurrentObjectResource() { return currentObjectResource; } + /** + * + * @param currentObjectResource A value for the current object resource, the resource to use when creating triples that have a resource object. + */ public void setCurrentObjectResource(Resource currentObjectResource) { this.currentObjectResource = currentObjectResource; } + /** + * + * @return A value for the typed resource, the source for creating rdf:type relationships to types specified in @typeof. + */ public Resource getTypedResource() { return typedResource; } + /** + * + * @param typedResource A value for the typed resource, the source for creating rdf:type relationships to types specified in @typeof. + */ public void setTypedResource(Resource typedResource) { this.typedResource = typedResource; } + /** + * + * @return A list of incomplete triples. A triple can be incomplete when no object resource is provided alongside a predicate that requires a resource (i.e., @rel or @rev). The triples can be completed when a resource becomes available, which will be when the next subject is specified (part of the process called chaining). + */ public Set getIncompleteStatements() { return incompleteStatements; } + /** + * + * @param incompleteStatements A list of incomplete triples. A triple can be incomplete when no object resource is provided alongside a predicate that requires a resource (i.e., @rel or @rev). The triples can be completed when a resource becomes available, which will be when the next subject is specified (part of the process called chaining). + */ public void setIncompleteStatements(Set incompleteStatements) { this.incompleteStatements = incompleteStatements; } + /** + * + * @param statement An incomplete triples. A triple can be incomplete when no object resource is provided alongside a predicate that requires a resource (i.e., @rel or @rev). The triples can be completed when a resource becomes available, which will be when the next subject is specified (part of the process called chaining). + */ public void addIncompleteStatement(RDFaIncompleteStatement statement) { this.incompleteStatements.add(statement); } + /** + * + * @return A list mapping that associates IRIs with lists. + */ public Map> getListMappings() { return listMappings; } + /** + * + * @param listMappings A list mapping that associates IRIs with lists. + */ public void setListMappings(Map> listMappings) { this.listMappings = listMappings; } + /** + * + * @param key The IRI of the list + * @param value The resource associated to this list + */ public void addListMapping(IRI key, Value value) { - if(! this.listMappings.containsKey(key)) { - this.listMappings.put(key, new HashSet<>()); - } + this.listMappings.computeIfAbsent(key,k -> new HashSet<>()); this.listMappings.get(key).add(value); } + /** + * + * @param key The IRI of the list + * @param objects The resources associated to this list + */ public void addListMappings(IRI key, Set objects) { this.listMappings.put(key, objects); } + /** + * + * @return The language. Note that there is no default language. + */ public String getCurrentLanguage() { return currentLanguage; } + /** + * + * @param currentLanguage The language. Note that there is no default language. + */ public void setCurrentLanguage(String currentLanguage) { this.currentLanguage = currentLanguage; } + /** + * + * @return A value for the current property value, the literal to use when creating triples that have a literal object, or IRI-s in the absence of @rel or @rev. + */ public Value getCurrentPropertyValue() { return currentPropertyValue; } + /** + * + * @param currentPropertyValue A value for the current property value, the literal to use when creating triples that have a literal object, or IRI-s in the absence of @rel or @rev. + */ public void setCurrentPropertyValue(Value currentPropertyValue) { this.currentPropertyValue = currentPropertyValue; } + /** + * + * @return The default vocabulary, a value to use as the prefix IRI when a term unknown to the RDFa Processor is used. + */ public String getDefaultVocabulary() { return defaultVocabulary; } + /** + * + * @param defaultVocabulary The default vocabulary, a value to use as the prefix IRI when a term unknown to the RDFa Processor is used. + */ public void setDefaultVocabulary(String defaultVocabulary) { this.defaultVocabulary = defaultVocabulary; } @@ -169,48 +241,90 @@ public String toString() { return sb.toString(); } + /** + * + * @return The string created by concatenating the text content of each of the descendant elements of the current element in document order. + */ public String getCharacters() { return characters.toString(); } + /** + * Clear the current character buffer + */ public void clearCharacters() { this.characters = new StringBuilder(); } + /** + * Adds characters to the character buffer + */ public void addCharacters(char[] ch, int start, int length) { this.characters.append(ch, start, length); } + /** + * + * @return The map of the XML attribute of the current element + */ public Map getElementAttributes() { return elementAttributes; } + /** + * + * @param elementAttributes The map of the XML attribute of the current element + */ public void setElementAttributes(Attributes elementAttributes) { for(int i = 0; i < elementAttributes.getLength(); i++) { this.elementAttributes.put(elementAttributes.getQName(i), elementAttributes.getValue(i)); } } + /** + * + * @return The flag that indicates that the current element is at the root of the document + */ public boolean isRootElement() { return isRootElement; } + /** + * + * @param rootElement The flag that indicates that the current element is at the root of the document + */ public void setRootElement(boolean rootElement) { isRootElement = rootElement; } + /** + * + * @return The evaluation context that is used to evaluate the current element + */ public RDFaEvaluationContext getEvaluationContext() { return evaluationContext; } + /** + * + * @param evaluationContext The evaluation context that is used to evaluate the current element + */ public void setEvaluationContext(RDFaEvaluationContext evaluationContext) { this.evaluationContext = evaluationContext; } + /** + * + * @return The name ot the current element + */ public String getElementName() { return elementName; } + /** + * + * @param elementName The name ot the current element + */ public void setElementName(String elementName) { this.elementName = elementName; } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/ModelNamespace.java b/src/main/java/fr/inria/corese/core/next/impl/temp/ModelNamespace.java index a5178316c..518355474 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/ModelNamespace.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/ModelNamespace.java @@ -29,7 +29,7 @@ public String getPrefix() { } @Override - public String getName() { + public String getNamespace() { return namespaceURI; } } \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java b/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java index fa7f118c6..50745cbdf 100644 --- a/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java +++ b/src/test/java/fr/inria/corese/core/next/api/base/AbstractModelTest.java @@ -75,16 +75,16 @@ void testSetNamespaceWithStringArgs() { Optional ns = model.getNamespace("ex"); assertTrue(ns.isPresent()); assertEquals("ex", ns.get().getPrefix()); - assertEquals("http://example.org/ns/", ns.get().getName()); + assertEquals("http://example.org/ns/", ns.get().getNamespace()); Namespace existingNs = model.setNamespace("ex", "http://example.org/ns/"); - assertEquals("http://example.org/ns/", existingNs.getName()); + assertEquals("http://example.org/ns/", existingNs.getNamespace()); assertEquals(1, model.getNamespaces().size()); model.setNamespace("ex", "http://example.com/newns/"); Optional updatedNs = model.getNamespace("ex"); assertTrue(updatedNs.isPresent()); - assertEquals("http://example.com/newns/", updatedNs.get().getName()); + assertEquals("http://example.com/newns/", updatedNs.get().getNamespace()); assertEquals(1, model.getNamespaces().size()); } @@ -99,7 +99,7 @@ void testSetNamespaceWithNamespaceObject() { Optional fetchedNs = model.getNamespace("geosparql"); assertTrue(fetchedNs.isPresent()); - assertEquals("http://example.org/ont/geosparql#", fetchedNs.get().getName()); + assertEquals("http://example.org/ont/geosparql#", fetchedNs.get().getNamespace()); assertEquals(1, model.getNamespaces().size()); } @@ -115,8 +115,8 @@ void testGetNamespaces() { Set namespaces = model.getNamespaces(); assertEquals(2, namespaces.size()); - assertTrue(namespaces.stream().anyMatch(n -> n.getPrefix().equals("ex1") && n.getName().equals("http://example.org/ns1/"))); - assertTrue(namespaces.stream().anyMatch(n -> n.getPrefix().equals("ex2") && n.getName().equals("http://example.org/ns2/"))); + assertTrue(namespaces.stream().anyMatch(n -> n.getPrefix().equals("ex1") && n.getNamespace().equals("http://example.org/ns1/"))); + assertTrue(namespaces.stream().anyMatch(n -> n.getPrefix().equals("ex2") && n.getNamespace().equals("http://example.org/ns2/"))); } /** @@ -134,7 +134,7 @@ void testRemoveNamespace() { // Comparison assertEquals(testNs.getPrefix(), removedNs.get().getPrefix()); - assertEquals(testNs.getName(), removedNs.get().getName()); + assertEquals(testNs.getNamespace(), removedNs.get().getNamespace()); assertEquals(0, model.getNamespaces().size()); assertFalse(model.getNamespace("ex").isPresent()); @@ -689,7 +689,7 @@ public String getPrefix() { } @Override - public String getName() { + public String getNamespace() { return name; } } diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index 43aa98c79..68ffd7b56 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -110,9 +110,33 @@ void parseCurrentSubjectCreatorMiddle() { """; - String currentSubjectNTriples = """ + String currentSubjectNTriples = defaultTurtlePrefixes + """ <> dc:creator "Jo" . """; + + Model parsedModel = new CoreseModel(); + Model resultModel = new CoreseModel(); + ValueFactory factory = new CoreseAdaptedValueFactory(); + RDFParser testedParser = new RDFaParser(parsedModel, factory); + RDFParser resultParser = parserFactory.createRDFParser(RDFFormat.TURTLE, resultModel, valueFactory); + + assertEquals(RDFFormat.RDFA, testedParser.getRDFFormat()); + + resultParser.parse(new ByteArrayInputStream(currentSubjectNTriples.getBytes()), "http://example.org/"); + testedParser.parse(new ByteArrayInputStream(currentSubjectXHTML.getBytes()), "http://example.org/"); + + logModelContent(parsedModel); + assertEquals(resultModel.size(), parsedModel.size()); + Iterator itStatementRef = resultModel.iterator(); + Iterator itStatementTest = parsedModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } } @Test @@ -406,6 +430,52 @@ public void inheritSubjectTest() { assertTrue(referenceModel.containsAll(testModel)); } + @Test + public void multiplePrefixDeclaration() { + String testDataString = """ + + + + Test 0020 + + +
+ this photo was taken by + Mark Birbeck + and John Doe + +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFA, testModel, valueFactory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://inria.fr/"); + + IRI photo1 = valueFactory.createIRI("http://inria.fr/photo1.jpg"); + IRI creator1 = valueFactory.createIRI("http://purl.org/dc/elements/1.1/creator"); + Literal name1 = valueFactory.createLiteral("Mark Birbeck"); + IRI creator2 = valueFactory.createIRI("https://schema.org/creator"); + Literal name2 = valueFactory.createLiteral("John Doe"); + + Statement stat1 = valueFactory.createStatement(photo1, creator1, name1); + Statement stat2 = valueFactory.createStatement(photo1, creator2, name2); + + referenceModel.add(stat1); + referenceModel.add(stat2); + + logModelContent(referenceModel); + logModelContent(testModel); + + assertEquals(2, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + } + private static void logModelContent(Model model) { StringWriter outWriter = new StringWriter(); RDFSerializer serializer = (new SerializerFactory()).createSerializer(RDFFormat.TURTLE, model); diff --git a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseModelTest.java b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseModelTest.java index ebcded5d4..04d2d381f 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseModelTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseModelTest.java @@ -163,7 +163,7 @@ void testFullConstructor() { Set testNamespaces = new HashSet<>(); Namespace ns1 = mock(Namespace.class); when(ns1.getPrefix()).thenReturn("ex"); - when(ns1.getName()).thenReturn("http://example.org/"); + when(ns1.getNamespace()).thenReturn("http://example.org/"); testNamespaces.add(ns1); CoreseModel newModel = new CoreseModel(mockCoreseGraph, testNamespaces); @@ -183,12 +183,12 @@ void testConstructor_FromNamespaces() { Set initialNamespaces = new HashSet<>(); Namespace ns1 = mock(Namespace.class); when(ns1.getPrefix()).thenReturn("prefix1"); - when(ns1.getName()).thenReturn("http://example.org/ns1#"); + when(ns1.getNamespace()).thenReturn("http://example.org/ns1#"); initialNamespaces.add(ns1); Namespace ns2 = mock(Namespace.class); when(ns2.getPrefix()).thenReturn("prefix2"); - when(ns2.getName()).thenReturn("http://example.org/ns2#"); + when(ns2.getNamespace()).thenReturn("http://example.org/ns2#"); initialNamespaces.add(ns2); CoreseModel newModel = new CoreseModel(initialNamespaces);